logstash-input-s3-sns-sqs 2.0.9 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2de335bdaa2405c6548eab14d6cf2d7e2432fa854b07da4b4a907878a0d3c2b6
4
- data.tar.gz: ee5c2054793e6fee1a8ba150ced23228d30615cc048ee52031a77d849fd0eeb8
3
+ metadata.gz: c42ccc1885923bc79a1f546a7be3b4ef2b1840c301905f0219e1cddf89266b9c
4
+ data.tar.gz: 755b037e0a94be66863bf05f6ee6ea20816663de671de879c1642c7ba4a4cec0
5
5
  SHA512:
6
- metadata.gz: cb42c6fe5d559594d96fe2191b79ce04135d9d6991495f2effc3364ccbc3b0b65f5725d4729625eddbfa14c119d8cc6bed5071972b5eb7a183119d5132efa376
7
- data.tar.gz: 2509fc5c56bd8dba8d2049809f49a5cbf91b69851b85178da8918c1e38133650ba0eb86901bd86ccb0913d85e089642b51cb45548195b7f30ffded113932123f
6
+ metadata.gz: 982cd8b60721b89f56e9e013126ce39fc95c5d0747ab73d98224af2626182f789a6dcde8317e3539612dfac7824d75ab149bda75e0cf75435a06bdd2291e7a0a
7
+ data.tar.gz: fbd220e56e392510d2bbe44b413afe458e7f0a8eb82c32cfa5024b8fc56629c61315e83ed31bb0ae581afe212a92f0cd4ccc6fd16bb32a27a996b09efd64bdd6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ##2.1.3
2
+ -SECURITY: Raise MIXIN to a newer Version
3
+ ##2.1.2
4
+ - FEATURE: Now it´s possible to use queue urls and names.
5
+ - FEATURE: Add sqs long polling config parameter: sqs_wait_time_seconds
6
+ - FIX: Valid UTF-8 byte sequences in logs are munged
7
+ - CLEANUP: Remove tests. (as a begin for clean testing)
8
+ ##2.1.1
9
+ - FEATURE: Enable Multiregion Support for included S3 client.
10
+ - Add region by bucket feature
11
+ ##2.1.0
12
+ - FEATURE: Add S3 metadata -> config :include_object_properties
13
+ - FEATURE: Watch for threads in exception state and restart...
1
14
  ##2.0.9
2
15
  -gzip dectection should return false for files smaller than gzip_signiture_bytes
3
16
  ##2.0.8
@@ -10,6 +10,7 @@ class S3ClientFactory
10
10
  @aws_options_hash.merge!(@s3_default_options) unless @s3_default_options.empty?
11
11
  @sts_client = Aws::STS::Client.new(region: options[:aws_region])
12
12
  @credentials_by_bucket = options[:s3_credentials_by_bucket]
13
+ @region_by_bucket = options[:s3_region_by_bucket]
13
14
  @logger.debug("Credentials by Bucket", :credentials => @credentials_by_bucket)
14
15
  @default_session_name = options[:s3_role_session_name]
15
16
  @clients_by_bucket = {}
@@ -24,6 +25,9 @@ class S3ClientFactory
24
25
  unless @credentials_by_bucket[bucket_name].nil?
25
26
  options.merge!(credentials: get_s3_auth(@credentials_by_bucket[bucket_name]))
26
27
  end
28
+ unless @region_by_bucket[bucket_name].nil?
29
+ options.merge!(region: @region_by_bucket[bucket_name])
30
+ end
27
31
  @clients_by_bucket[bucket_symbol] = Aws::S3::Client.new(options)
28
32
  @logger.debug("Created a new S3 Client", :bucket_name => bucket_name, :client => @clients_by_bucket[bucket_symbol], :used_options => options)
29
33
  end
@@ -48,7 +52,7 @@ class S3ClientFactory
48
52
  )
49
53
  elsif credentials.key?('access_key_id') && credentials.key?('secret_access_key')
50
54
  @logger.debug("Fetch credentials", :access_key => credentials['access_key_id'])
51
- return Aws::Credentials.new(credentials)
55
+ return Aws::Credentials.new(credentials['access_key_id'], credentials['secret_access_key'])
52
56
  end
53
57
  end
54
58
 
@@ -9,6 +9,7 @@ class S3Downloader
9
9
  @stopped = stop_semaphore
10
10
  @factory = options[:s3_client_factory]
11
11
  @delete_on_success = options[:delete_on_success]
12
+ @include_object_properties = options[:include_object_properties]
12
13
  end
13
14
 
14
15
  def copy_s3object_to_disk(record)
@@ -21,6 +22,7 @@ class S3Downloader
21
22
  key: record[:key],
22
23
  response_target: record[:local_file]
23
24
  )
25
+ record[:s3_data] = response.to_h.keep_if { |key| @include_object_properties.include?(key) }
24
26
  end
25
27
  rescue Aws::S3::Errors::ServiceError => e
26
28
  @logger.error("Unable to download file. Requeuing the message", :error => e, :record => record)
@@ -26,17 +26,18 @@ module LogProcessor
26
26
  @logger.warn("[#{Thread.current[:name]}] Abort reading in the middle of the file, we will read it again when logstash is started")
27
27
  throw :skip_delete
28
28
  end
29
- line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370")
30
- # Potentially dangerous! See https://medium.com/@adamhooper/in-ruby-dont-use-timeout-77d9d4e5a001
31
- # Decoding a line must not last longer than a few seconds. Otherwise, the file is probably corrupt.
32
- codec.decode(line) do |event|
33
- event_count += 1
34
- decorate_event(event, metadata, type, record[:key], record[:bucket], folder)
35
- #event_time = Time.now #PROFILING
36
- #event.set("[@metadata][progress][begin]", start_time)
37
- #event.set("[@metadata][progress][index_time]", event_time)
38
- #event.set("[@metadata][progress][line]", line_count)
39
- logstash_event_queue << event
29
+ begin
30
+ codec.decode(line) do |event|
31
+ event_count += 1
32
+ decorate_event(event, metadata, type, record[:key], record[:bucket], record[:s3_data])
33
+ #event_time = Time.now #PROFILING
34
+ #event.set("[@metadata][progress][begin]", start_time)
35
+ #event.set("[@metadata][progress][index_time]", event_time)
36
+ #event.set("[@metadata][progress][line]", line_count)
37
+ logstash_event_queue << event
38
+ end
39
+ rescue Exception => e
40
+ @logger.error("[#{Thread.current[:name]}] Unable to decode line", :line => line, :error => e)
40
41
  end
41
42
  end
42
43
  file_t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC) #PROFILING
@@ -45,7 +46,7 @@ module LogProcessor
45
46
  # ensure any stateful codecs (such as multi-line ) are flushed to the queue
46
47
  codec.flush do |event|
47
48
  event_count += 1
48
- decorate_event(event, metadata, type, record[:key], record[:bucket], folder)
49
+ decorate_event(event, metadata, type, record[:key], record[:bucket], record[:s3_data])
49
50
  @logger.debug("[#{Thread.current[:name]}] Flushing an incomplete event", :event => event.to_s)
50
51
  logstash_event_queue << event
51
52
  end
@@ -55,7 +56,7 @@ module LogProcessor
55
56
 
56
57
  private
57
58
 
58
- def decorate_event(event, metadata, type, key, bucket, folder)
59
+ def decorate_event(event, metadata, type, key, bucket, s3_data)
59
60
  if event_is_metadata?(event)
60
61
  @logger.debug('Updating the current cloudfront metadata', :event => event)
61
62
  update_metadata(metadata, event)
@@ -67,9 +68,11 @@ module LogProcessor
67
68
  event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
68
69
  event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
69
70
 
71
+ event.set("[@metadata][s3]", s3_data)
70
72
  event.set("[@metadata][s3][object_key]", key)
71
73
  event.set("[@metadata][s3][bucket_name]", bucket)
72
74
  event.set("[@metadata][s3][object_folder]", get_object_folder(key))
75
+
73
76
  end
74
77
  end
75
78
 
@@ -161,6 +161,10 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
161
161
  # Session name to use when assuming an IAM role
162
162
  config :s3_role_session_name, :validate => :string, :default => "logstash"
163
163
  config :delete_on_success, :validate => :boolean, :default => false
164
+ # Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
165
+ # into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
166
+ # be present.
167
+ config :include_object_properties, :validate => :array, :default => [:last_modified, :content_type, :metadata]
164
168
 
165
169
  ### sqs
166
170
  # Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
@@ -169,9 +173,11 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
169
173
  # Whether the event is processed though an SNS to SQS. (S3>SNS>SQS = true |S3>SQS=false)
170
174
  config :from_sns, :validate => :boolean, :default => true
171
175
  config :sqs_skip_delete, :validate => :boolean, :default => false
176
+ config :sqs_wait_time_seconds, :validate => :number, :required => false
177
+ config :sqs_delete_on_failure, :validate => :boolean, :default => true
178
+
172
179
  config :visibility_timeout, :validate => :number, :default => 120
173
180
  config :max_processing_time, :validate => :number, :default => 8000
174
-
175
181
  ### system
176
182
  config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
177
183
  # To run in multiple threads use this
@@ -188,6 +194,7 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
188
194
  FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
189
195
  @id ||= "Unknown" #Use INPUT{ id => name} for thread identifier
190
196
  @credentials_by_bucket = hash_key_is_regex({})
197
+ @region_by_bucket = hash_key_is_regex({})
191
198
  # create the bucket=>folder=>codec lookup from config options
192
199
  @codec_by_folder = hash_key_is_regex({})
193
200
  @type_by_folder = hash_key_is_regex({})
@@ -222,6 +229,9 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
222
229
  if options.key?('credentials')
223
230
  @credentials_by_bucket[bucket] = options['credentials']
224
231
  end
232
+ if options.key?('region')
233
+ @region_by_bucket[bucket] = options['region']
234
+ end
225
235
  if options.key?('folders')
226
236
  # make these hashes do key lookups using regex matching
227
237
  folders = hash_key_is_regex({})
@@ -242,24 +252,28 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
242
252
  @sqs_poller = SqsPoller.new(@logger, @received_stop,
243
253
  {
244
254
  visibility_timeout: @visibility_timeout,
245
- skip_delete: @sqs_skip_delete
255
+ skip_delete: @sqs_skip_delete,
256
+ wait_time_seconds: @sqs_wait_time_seconds
246
257
  },
247
258
  {
248
259
  sqs_queue: @queue,
249
260
  queue_owner_aws_account_id: @queue_owner_aws_account_id,
250
261
  from_sns: @from_sns,
251
- max_processing_time: @max_processing_time
262
+ max_processing_time: @max_processing_time,
263
+ sqs_delete_on_failure: @sqs_delete_on_failure
252
264
  },
253
265
  aws_options_hash)
254
266
  @s3_client_factory = S3ClientFactory.new(@logger, {
255
267
  aws_region: @region,
256
268
  s3_default_options: @s3_default_options,
257
269
  s3_credentials_by_bucket: @credentials_by_bucket,
270
+ s3_region_by_bucket: @region_by_bucket,
258
271
  s3_role_session_name: @s3_role_session_name
259
272
  }, aws_options_hash)
260
273
  @s3_downloader = S3Downloader.new(@logger, @received_stop, {
261
274
  s3_client_factory: @s3_client_factory,
262
- delete_on_success: @delete_on_success
275
+ delete_on_success: @delete_on_success,
276
+ include_object_properties: @include_object_properties
263
277
  })
264
278
  @codec_factory = CodecFactory.new(@logger, {
265
279
  default_codec: @codec,
@@ -273,18 +287,21 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
273
287
 
274
288
  # startup
275
289
  def run(logstash_event_queue)
276
- #LogStash::ShutdownWatcher.abort_threshold(30)
277
- # start them
278
- @queue_mutex = Mutex.new
279
- #@consumer_threads= 1
280
- @worker_threads = @consumer_threads.times.map do |thread_id|
281
- t = run_worker_thread(logstash_event_queue, thread_id)
282
- #make thead start async to prevent polling the same message from sqs
283
- sleep 0.5
284
- t
290
+ @control_threads = @consumer_threads.times.map do |thread_id|
291
+ Thread.new do
292
+ restart_count = 0
293
+ while not stop?
294
+ #make thead start async to prevent polling the same message from sqs
295
+ sleep 0.5
296
+ worker_thread = run_worker_thread(logstash_event_queue, thread_id)
297
+ worker_thread.join
298
+ restart_count += 1
299
+ thread_id = "#{thread_id}_#{restart_count}"
300
+ @logger.info("[control_thread] restarting a thread #{thread_id}... ", :thread => worker_thread.inspect)
301
+ end
302
+ end
285
303
  end
286
- # and wait (possibly infinitely) for them to shut down
287
- @worker_threads.each { |t| t.join }
304
+ @control_threads.each { |t| t.join }
288
305
  end
289
306
 
290
307
  # shutdown
@@ -311,7 +328,6 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
311
328
  # --- END plugin interface ------------------------------------------#
312
329
 
313
330
  private
314
-
315
331
  def run_worker_thread(queue, thread_id)
316
332
  Thread.new do
317
333
  LogStash::Util.set_thread_name("Worker #{@id}/#{thread_id}")
@@ -41,17 +41,24 @@ class SqsPoller
41
41
  @queue = client_options[:sqs_queue]
42
42
  @from_sns = client_options[:from_sns]
43
43
  @max_processing_time = client_options[:max_processing_time]
44
+ @sqs_delete_on_failure = client_options[:sqs_delete_on_failure]
44
45
  @options = DEFAULT_OPTIONS.merge(poller_options)
45
46
  begin
46
47
  @logger.info("Registering SQS input", :queue => @queue)
47
48
  sqs_client = Aws::SQS::Client.new(aws_options_hash)
48
- queue_url = sqs_client.get_queue_url({
49
- queue_name: @queue,
50
- queue_owner_aws_account_id: client_options[:queue_owner_aws_account_id]
51
- }).queue_url # is a method according to docs. Was [:queue_url].
49
+ if uri?(@queue)
50
+ queue_url = @queue
51
+ else
52
+ queue_url = sqs_client.get_queue_url({
53
+ queue_name: @queue,
54
+ queue_owner_aws_account_id: client_options[:queue_owner_aws_account_id]
55
+ }).queue_url
56
+ end
57
+
52
58
  @poller = Aws::SQS::QueuePoller.new(queue_url,
53
59
  :client => sqs_client
54
60
  )
61
+ @logger.info("[#{Thread.current[:name]}] connected to queue.", :queue_url => queue_url)
55
62
  rescue Aws::SQS::Errors::ServiceError => e
56
63
  @logger.error("Cannot establish connection to Amazon SQS", :error => e)
57
64
  raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
@@ -86,6 +93,7 @@ class SqsPoller
86
93
  poller_thread = Thread.current
87
94
  extender = Thread.new do
88
95
  while new_visibility < @max_processing_time do
96
+
89
97
  sleep message_backoff
90
98
  begin
91
99
  @poller.change_message_visibility_timeout(message, new_visibility)
@@ -98,8 +106,8 @@ class SqsPoller
98
106
  end
99
107
  end
100
108
  @logger.error("[#{Thread.current[:name]}] Maximum visibility reached! We will delete this message from queue!")
101
- @poller.delete_message(message)
102
- poller_thread.raise "[#{poller_thread[:name]}] Maximum visibility reached...!".freeze
109
+ @poller.delete_message(message) if @sqs_delete_on_failure
110
+ poller_thread.kill
103
111
  end
104
112
  extender[:name] = "#{Thread.current[:name]}/extender" #PROFILING
105
113
  failed = false
@@ -183,6 +191,16 @@ class SqsPoller
183
191
  end
184
192
  end
185
193
 
194
+ def uri?(string)
195
+ uri = URI.parse(string)
196
+ %w( http https ).include?(uri.scheme)
197
+ rescue URI::BadURIError
198
+ false
199
+ rescue URI::InvalidURIError
200
+ false
201
+ end
202
+
203
+
186
204
  def get_object_path(key)
187
205
  folder = ::File.dirname(key)
188
206
  return '' if folder == '.'
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-s3-sns-sqs'
3
- s.version = '2.0.9'
3
+ s.version = '2.1.3'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sns -> sqs."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -22,7 +22,7 @@ Gem::Specification.new do |s|
22
22
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 2.1.12", "<= 2.99"
23
23
 
24
24
  s.add_runtime_dependency 'logstash-codec-json', '~> 3.0'
25
- s.add_runtime_dependency 'logstash-mixin-aws', '~> 4.3'
25
+ s.add_runtime_dependency 'logstash-mixin-aws', '>= 4.3'
26
26
  s.add_development_dependency 'logstash-codec-json_stream', '~> 1.0'
27
27
  s.add_development_dependency 'logstash-devutils', '~> 1.3'
28
28
  end
@@ -15,11 +15,13 @@ require 'rspec/expectations'
15
15
 
16
16
  describe LogStash::Inputs::S3SNSSQS do
17
17
  class LogStash::Inputs::S3SNSSQS
18
- public :process_local_log # use method without error logging for better visibility of errors
18
+ public :process # use method without error logging for better visibility of errors
19
19
  end
20
20
  let(:codec_options) { {} }
21
21
 
22
22
  let(:input) { LogStash::Inputs::S3SNSSQS.new(config) }
23
+
24
+ let(:codec_factory) { CodecFactory.new(@logger, { default_codec: @codec, codec_by_folder: @codec_by_folder }) }
23
25
  subject { input }
24
26
 
25
27
  context "default parser choice" do
@@ -28,11 +30,11 @@ describe LogStash::Inputs::S3SNSSQS do
28
30
  end
29
31
  end
30
32
 
31
- let(:compressed_log_file) { File.join(File.dirname(__FILE__), '..', '..', 'fixtures', 'log-stream.real-formatted') }
33
+ let(:record) {{"local_file" => File.join(File.dirname(__FILE__), '..', '..', 'fixtures', 'log-stream.real-formatted') }}
32
34
  let(:key) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
33
35
  let(:folder) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
34
36
  let(:instance_codec) { "json" }
35
- let(:queue) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
37
+ let(:logstash_event_queue) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
36
38
  let(:bucket) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
37
39
  let(:message) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
38
40
  let(:size) { "123344" }
@@ -43,18 +45,22 @@ describe LogStash::Inputs::S3SNSSQS do
43
45
  subject do
44
46
  LogStash::Inputs::S3SNSSQS.new(config)
45
47
  end
48
+ # end
46
49
  let(:queue) { [] }
47
50
  before do
48
51
  @codec = LogStash::Codecs::JSONStream.new
49
52
  @codec.charset = "UTF-8"
50
- expect( subject.process_local_log(compressed_log_file, key, folder, @codec.clone, queue, bucket, message, size) ).to be true
53
+ @codec_factory = CodecFactory.new(@logger, {
54
+ default_codec: @codec,
55
+ codec_by_folder: @codec_by_folder
56
+ })
57
+ expect( subject.process(record, logstash_event_queue) ).to be true
51
58
  $stderr.puts "method #{queue.to_s}"
52
59
  end
53
60
 
54
- it '.process_local_log => process compressed log file and verfied logstash event queue with the correct number of events' do
55
- expect( queue.size ).to eq(38)
56
- expect( queue.clear).to be_empty
57
- end
58
-
61
+ #it '.process_local_log => process compressed log file and verfied logstash event queue with the correct number of events' do
62
+ # expect( queue.size ).to eq(38)
63
+ # expect( queue.clear).to be_empty
64
+ #end
59
65
  end
60
66
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-s3-sns-sqs
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.9
4
+ version: 2.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Herweg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-21 00:00:00.000000000 Z
11
+ date: 2021-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -47,7 +47,7 @@ dependencies:
47
47
  - !ruby/object:Gem::Dependency
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  requirements:
50
- - - "~>"
50
+ - - ">="
51
51
  - !ruby/object:Gem::Version
52
52
  version: '4.3'
53
53
  name: logstash-mixin-aws
@@ -55,7 +55,7 @@ dependencies:
55
55
  type: :runtime
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - "~>"
58
+ - - ">="
59
59
  - !ruby/object:Gem::Version
60
60
  version: '4.3'
61
61
  - !ruby/object:Gem::Dependency