logstash-input-s3-sns-sqs 2.0.7 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 129cf226f1cc14ddcd1b0ffa54dbc3f3472eafe574efeac163d15827e7226e9a
4
- data.tar.gz: eba5acd15e497fe5eb4ac6ee64a817a132ae4bb308601f4013e42da3e344e1cc
3
+ metadata.gz: 805cb772fda649de4273d9c86799255bc31b7f18b847983c1a1bacfee2eb0224
4
+ data.tar.gz: ef65ac96355cb246b493836323e7998e485ddf30ceb5df780d17b1f6d080451b
5
5
  SHA512:
6
- metadata.gz: 7293553ce571cd1347cd970852a83a88a1c51d2a29fe0e992df212cf4807a909e0ea8eb46663d2043cf3778a65c39006626b013308735f5e14cf1a1e1fcf69cc
7
- data.tar.gz: 4f4068a601e0f930167cd988b404f844a3420e3938db30d8c9b768216fca900f077e8658c2ad8bff54e4ff18b7217d4e0f316aaf6df0312176c3294bd047af10
6
+ metadata.gz: 338e687d52f65a484ca75c091cf003f263e8e80f00b545f3f6b36b7a3fc584ea29807233436c75a3189fdf075d7386aa1415f34a1996349b2a41f4b274cb6f75
7
+ data.tar.gz: abc6194b248640de99f8ed6f41304fff760b1667a08426dea2ee98d773f19d9b01010438c633452fc60e8c23b31c201b3000deab07e4cf4da630a7f4770cd0d1
@@ -1,3 +1,18 @@
1
+ ##2.1.2
2
+ - FEATURE: Now it´s possible to use queue urls and names.
3
+ - FEATURE: Add sqs long polling config parameter: sqs_wait_time_seconds
4
+ - FIX: Valid UTF-8 byte sequences in logs are munged
5
+ - CLEANUP: Remove tests. (as a begin for clean testing)
6
+ ##2.1.1
7
+ - FEATURE: Enable Multiregion Support for included S3 client.
8
+ - Add region by bucket feature
9
+ ##2.1.0
10
+ - FEATURE: Add S3 metadata -> config :include_object_properties
11
+ - FEATURE: Watch for threads in exception state and restart...
12
+ ##2.0.9
13
+ -gzip dectection should return false for files smaller than gzip_signiture_bytes
14
+ ##2.0.8
15
+ -fix nil class error
1
16
  ##2.0.7
2
17
  -fix gem error
3
18
  ##2.0.6
@@ -11,7 +11,7 @@ class MagicGzipValidator
11
11
  # signature.
12
12
  if file.stat.size < minimum_bytes_for_determining_signature
13
13
  puts "File too small to calculate signature"
14
- throw :skip_delete
14
+ return false
15
15
  end
16
16
 
17
17
  @file = file
@@ -10,6 +10,7 @@ class S3ClientFactory
10
10
  @aws_options_hash.merge!(@s3_default_options) unless @s3_default_options.empty?
11
11
  @sts_client = Aws::STS::Client.new(region: options[:aws_region])
12
12
  @credentials_by_bucket = options[:s3_credentials_by_bucket]
13
+ @region_by_bucket = options[:s3_region_by_bucket]
13
14
  @logger.debug("Credentials by Bucket", :credentials => @credentials_by_bucket)
14
15
  @default_session_name = options[:s3_role_session_name]
15
16
  @clients_by_bucket = {}
@@ -24,6 +25,9 @@ class S3ClientFactory
24
25
  unless @credentials_by_bucket[bucket_name].nil?
25
26
  options.merge!(credentials: get_s3_auth(@credentials_by_bucket[bucket_name]))
26
27
  end
28
+ unless @region_by_bucket[bucket_name].nil?
29
+ options.merge!(region: @region_by_bucket[bucket_name])
30
+ end
27
31
  @clients_by_bucket[bucket_symbol] = Aws::S3::Client.new(options)
28
32
  @logger.debug("Created a new S3 Client", :bucket_name => bucket_name, :client => @clients_by_bucket[bucket_symbol], :used_options => options)
29
33
  end
@@ -48,7 +52,7 @@ class S3ClientFactory
48
52
  )
49
53
  elsif credentials.key?('access_key_id') && credentials.key?('secret_access_key')
50
54
  @logger.debug("Fetch credentials", :access_key => credentials['access_key_id'])
51
- return Aws::Credentials.new(credentials)
55
+ return Aws::Credentials.new(credentials['access_key_id'], credentials['secret_access_key'])
52
56
  end
53
57
  end
54
58
 
@@ -9,6 +9,7 @@ class S3Downloader
9
9
  @stopped = stop_semaphore
10
10
  @factory = options[:s3_client_factory]
11
11
  @delete_on_success = options[:delete_on_success]
12
+ @include_object_properties = options[:include_object_properties]
12
13
  end
13
14
 
14
15
  def copy_s3object_to_disk(record)
@@ -21,6 +22,7 @@ class S3Downloader
21
22
  key: record[:key],
22
23
  response_target: record[:local_file]
23
24
  )
25
+ record[:s3_data] = response.to_h.keep_if { |key| @include_object_properties.include?(key) }
24
26
  end
25
27
  rescue Aws::S3::Errors::ServiceError => e
26
28
  @logger.error("Unable to download file. Requeuing the message", :error => e, :record => record)
@@ -161,6 +161,10 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
161
161
  # Session name to use when assuming an IAM role
162
162
  config :s3_role_session_name, :validate => :string, :default => "logstash"
163
163
  config :delete_on_success, :validate => :boolean, :default => false
164
+ # Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
165
+ # into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
166
+ # be present.
167
+ config :include_object_properties, :validate => :array, :default => [:last_modified, :content_type, :metadata]
164
168
 
165
169
  ### sqs
166
170
  # Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
@@ -169,9 +173,11 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
169
173
  # Whether the event is processed though an SNS to SQS. (S3>SNS>SQS = true |S3>SQS=false)
170
174
  config :from_sns, :validate => :boolean, :default => true
171
175
  config :sqs_skip_delete, :validate => :boolean, :default => false
176
+ config :sqs_wait_time_seconds, :validate => :number, :required => false
177
+ config :sqs_delete_on_failure, :validate => :boolean, :default => true
178
+
172
179
  config :visibility_timeout, :validate => :number, :default => 120
173
180
  config :max_processing_time, :validate => :number, :default => 8000
174
-
175
181
  ### system
176
182
  config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
177
183
  # To run in multiple threads use this
@@ -188,6 +194,7 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
188
194
  FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
189
195
  @id ||= "Unknown" #Use INPUT{ id => name} for thread identifier
190
196
  @credentials_by_bucket = hash_key_is_regex({})
197
+ @region_by_bucket = hash_key_is_regex({})
191
198
  # create the bucket=>folder=>codec lookup from config options
192
199
  @codec_by_folder = hash_key_is_regex({})
193
200
  @type_by_folder = hash_key_is_regex({})
@@ -222,6 +229,9 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
222
229
  if options.key?('credentials')
223
230
  @credentials_by_bucket[bucket] = options['credentials']
224
231
  end
232
+ if options.key?('region')
233
+ @region_by_bucket[bucket] = options['region']
234
+ end
225
235
  if options.key?('folders')
226
236
  # make these hashes do key lookups using regex matching
227
237
  folders = hash_key_is_regex({})
@@ -242,24 +252,28 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
242
252
  @sqs_poller = SqsPoller.new(@logger, @received_stop,
243
253
  {
244
254
  visibility_timeout: @visibility_timeout,
245
- skip_delete: @sqs_skip_delete
255
+ skip_delete: @sqs_skip_delete,
256
+ wait_time_seconds: @sqs_wait_time_seconds
246
257
  },
247
258
  {
248
259
  sqs_queue: @queue,
249
260
  queue_owner_aws_account_id: @queue_owner_aws_account_id,
250
261
  from_sns: @from_sns,
251
- max_processing_time: @max_processing_time
262
+ max_processing_time: @max_processing_time,
263
+ sqs_delete_on_failure: @sqs_delete_on_failure
252
264
  },
253
265
  aws_options_hash)
254
266
  @s3_client_factory = S3ClientFactory.new(@logger, {
255
267
  aws_region: @region,
256
268
  s3_default_options: @s3_default_options,
257
269
  s3_credentials_by_bucket: @credentials_by_bucket,
270
+ s3_region_by_bucket: @region_by_bucket,
258
271
  s3_role_session_name: @s3_role_session_name
259
272
  }, aws_options_hash)
260
273
  @s3_downloader = S3Downloader.new(@logger, @received_stop, {
261
274
  s3_client_factory: @s3_client_factory,
262
- delete_on_success: @delete_on_success
275
+ delete_on_success: @delete_on_success,
276
+ include_object_properties: @include_object_properties
263
277
  })
264
278
  @codec_factory = CodecFactory.new(@logger, {
265
279
  default_codec: @codec,
@@ -273,18 +287,21 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
273
287
 
274
288
  # startup
275
289
  def run(logstash_event_queue)
276
- #LogStash::ShutdownWatcher.abort_threshold(30)
277
- # start them
278
- @queue_mutex = Mutex.new
279
- #@consumer_threads= 1
280
- @worker_threads = @consumer_threads.times.map do |thread_id|
281
- t = run_worker_thread(logstash_event_queue, thread_id)
282
- #make thead start async to prevent polling the same message from sqs
283
- sleep 0.5
284
- t
290
+ @control_threads = @consumer_threads.times.map do |thread_id|
291
+ Thread.new do
292
+ restart_count = 0
293
+ while not stop?
294
+ #make thead start async to prevent polling the same message from sqs
295
+ sleep 0.5
296
+ worker_thread = run_worker_thread(logstash_event_queue, thread_id)
297
+ worker_thread.join
298
+ restart_count += 1
299
+ thread_id = "#{thread_id}_#{restart_count}"
300
+ @logger.info("[control_thread] restarting a thread #{thread_id}... ", :thread => worker_thread.inspect)
301
+ end
302
+ end
285
303
  end
286
- # and wait (possibly infinitely) for them to shut down
287
- @worker_threads.each { |t| t.join }
304
+ @control_threads.each { |t| t.join }
288
305
  end
289
306
 
290
307
  # shutdown
@@ -311,7 +328,6 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
311
328
  # --- END plugin interface ------------------------------------------#
312
329
 
313
330
  private
314
-
315
331
  def run_worker_thread(queue, thread_id)
316
332
  Thread.new do
317
333
  LogStash::Util.set_thread_name("Worker #{@id}/#{thread_id}")
@@ -14,7 +14,7 @@ module LogProcessor
14
14
  file = record[:local_file]
15
15
  codec = @codec_factory.get_codec(record)
16
16
  folder = record[:folder]
17
- type = @type_by_folder[record[:bucket]][folder]
17
+ type = @type_by_folder.fetch(record[:bucket],{})[folder]
18
18
  metadata = {}
19
19
  line_count = 0
20
20
  event_count = 0
@@ -26,17 +26,18 @@ module LogProcessor
26
26
  @logger.warn("[#{Thread.current[:name]}] Abort reading in the middle of the file, we will read it again when logstash is started")
27
27
  throw :skip_delete
28
28
  end
29
- line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370")
30
- # Potentially dangerous! See https://medium.com/@adamhooper/in-ruby-dont-use-timeout-77d9d4e5a001
31
- # Decoding a line must not last longer than a few seconds. Otherwise, the file is probably corrupt.
32
- codec.decode(line) do |event|
33
- event_count += 1
34
- decorate_event(event, metadata, type, record[:key], record[:bucket], folder)
35
- #event_time = Time.now #PROFILING
36
- #event.set("[@metadata][progress][begin]", start_time)
37
- #event.set("[@metadata][progress][index_time]", event_time)
38
- #event.set("[@metadata][progress][line]", line_count)
39
- logstash_event_queue << event
29
+ begin
30
+ codec.decode(line) do |event|
31
+ event_count += 1
32
+ decorate_event(event, metadata, type, record[:key], record[:bucket], record[:s3_data])
33
+ #event_time = Time.now #PROFILING
34
+ #event.set("[@metadata][progress][begin]", start_time)
35
+ #event.set("[@metadata][progress][index_time]", event_time)
36
+ #event.set("[@metadata][progress][line]", line_count)
37
+ logstash_event_queue << event
38
+ end
39
+ rescue Exception => e
40
+ @logger.error("[#{Thread.current[:name]}] Unable to decode line", :line => line, :error => e)
40
41
  end
41
42
  end
42
43
  file_t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC) #PROFILING
@@ -45,7 +46,7 @@ module LogProcessor
45
46
  # ensure any stateful codecs (such as multi-line ) are flushed to the queue
46
47
  codec.flush do |event|
47
48
  event_count += 1
48
- decorate_event(event, metadata, type, record[:key], record[:bucket], folder)
49
+ decorate_event(event, metadata, type, record[:key], record[:bucket], record[:s3_data])
49
50
  @logger.debug("[#{Thread.current[:name]}] Flushing an incomplete event", :event => event.to_s)
50
51
  logstash_event_queue << event
51
52
  end
@@ -55,7 +56,7 @@ module LogProcessor
55
56
 
56
57
  private
57
58
 
58
- def decorate_event(event, metadata, type, key, bucket, folder)
59
+ def decorate_event(event, metadata, type, key, bucket, s3_data)
59
60
  if event_is_metadata?(event)
60
61
  @logger.debug('Updating the current cloudfront metadata', :event => event)
61
62
  update_metadata(metadata, event)
@@ -67,9 +68,11 @@ module LogProcessor
67
68
  event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
68
69
  event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
69
70
 
71
+ event.set("[@metadata][s3]", s3_data)
70
72
  event.set("[@metadata][s3][object_key]", key)
71
73
  event.set("[@metadata][s3][bucket_name]", bucket)
72
74
  event.set("[@metadata][s3][object_folder]", get_object_folder(key))
75
+
73
76
  end
74
77
  end
75
78
 
@@ -41,17 +41,24 @@ class SqsPoller
41
41
  @queue = client_options[:sqs_queue]
42
42
  @from_sns = client_options[:from_sns]
43
43
  @max_processing_time = client_options[:max_processing_time]
44
+ @sqs_delete_on_failure = client_options[:sqs_delete_on_failure]
44
45
  @options = DEFAULT_OPTIONS.merge(poller_options)
45
46
  begin
46
47
  @logger.info("Registering SQS input", :queue => @queue)
47
48
  sqs_client = Aws::SQS::Client.new(aws_options_hash)
48
- queue_url = sqs_client.get_queue_url({
49
- queue_name: @queue,
50
- queue_owner_aws_account_id: client_options[:queue_owner_aws_account_id]
51
- }).queue_url # is a method according to docs. Was [:queue_url].
49
+ if uri?(@queue)
50
+ queue_url = @queue
51
+ else
52
+ queue_url = sqs_client.get_queue_url({
53
+ queue_name: @queue,
54
+ queue_owner_aws_account_id: client_options[:queue_owner_aws_account_id]
55
+ }).queue_url
56
+ end
57
+
52
58
  @poller = Aws::SQS::QueuePoller.new(queue_url,
53
59
  :client => sqs_client
54
60
  )
61
+ @logger.info("[#{Thread.current[:name]}] connected to queue.", :queue_url => queue_url)
55
62
  rescue Aws::SQS::Errors::ServiceError => e
56
63
  @logger.error("Cannot establish connection to Amazon SQS", :error => e)
57
64
  raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
@@ -86,6 +93,7 @@ class SqsPoller
86
93
  poller_thread = Thread.current
87
94
  extender = Thread.new do
88
95
  while new_visibility < @max_processing_time do
96
+
89
97
  sleep message_backoff
90
98
  begin
91
99
  @poller.change_message_visibility_timeout(message, new_visibility)
@@ -98,8 +106,8 @@ class SqsPoller
98
106
  end
99
107
  end
100
108
  @logger.error("[#{Thread.current[:name]}] Maximum visibility reached! We will delete this message from queue!")
101
- @poller.delete_message(message)
102
- poller_thread.raise "[#{poller_thread[:name]}] Maximum visibility reached...!".freeze
109
+ @poller.delete_message(message) if @sqs_delete_on_failure
110
+ poller_thread.kill
103
111
  end
104
112
  extender[:name] = "#{Thread.current[:name]}/extender" #PROFILING
105
113
  failed = false
@@ -183,6 +191,16 @@ class SqsPoller
183
191
  end
184
192
  end
185
193
 
194
+ def uri?(string)
195
+ uri = URI.parse(string)
196
+ %w( http https ).include?(uri.scheme)
197
+ rescue URI::BadURIError
198
+ false
199
+ rescue URI::InvalidURIError
200
+ false
201
+ end
202
+
203
+
186
204
  def get_object_path(key)
187
205
  folder = ::File.dirname(key)
188
206
  return '' if folder == '.'
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-s3-sns-sqs'
3
- s.version = '2.0.7'
3
+ s.version = '2.1.2'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sns -> sqs."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -15,11 +15,13 @@ require 'rspec/expectations'
15
15
 
16
16
  describe LogStash::Inputs::S3SNSSQS do
17
17
  class LogStash::Inputs::S3SNSSQS
18
- public :process_local_log # use method without error logging for better visibility of errors
18
+ public :process # use method without error logging for better visibility of errors
19
19
  end
20
20
  let(:codec_options) { {} }
21
21
 
22
22
  let(:input) { LogStash::Inputs::S3SNSSQS.new(config) }
23
+
24
+ let(:codec_factory) { CodecFactory.new(@logger, { default_codec: @codec, codec_by_folder: @codec_by_folder }) }
23
25
  subject { input }
24
26
 
25
27
  context "default parser choice" do
@@ -28,11 +30,11 @@ describe LogStash::Inputs::S3SNSSQS do
28
30
  end
29
31
  end
30
32
 
31
- let(:compressed_log_file) { File.join(File.dirname(__FILE__), '..', '..', 'fixtures', 'log-stream.real-formatted') }
33
+ let(:record) {{"local_file" => File.join(File.dirname(__FILE__), '..', '..', 'fixtures', 'log-stream.real-formatted') }}
32
34
  let(:key) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
33
35
  let(:folder) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
34
36
  let(:instance_codec) { "json" }
35
- let(:queue) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
37
+ let(:logstash_event_queue) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
36
38
  let(:bucket) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
37
39
  let(:message) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
38
40
  let(:size) { "123344" }
@@ -43,18 +45,22 @@ describe LogStash::Inputs::S3SNSSQS do
43
45
  subject do
44
46
  LogStash::Inputs::S3SNSSQS.new(config)
45
47
  end
48
+ # end
46
49
  let(:queue) { [] }
47
50
  before do
48
51
  @codec = LogStash::Codecs::JSONStream.new
49
52
  @codec.charset = "UTF-8"
50
- expect( subject.process_local_log(compressed_log_file, key, folder, @codec.clone, queue, bucket, message, size) ).to be true
53
+ @codec_factory = CodecFactory.new(@logger, {
54
+ default_codec: @codec,
55
+ codec_by_folder: @codec_by_folder
56
+ })
57
+ expect( subject.process(record, logstash_event_queue) ).to be true
51
58
  $stderr.puts "method #{queue.to_s}"
52
59
  end
53
60
 
54
- it '.process_local_log => process compressed log file and verfied logstash event queue with the correct number of events' do
55
- expect( queue.size ).to eq(38)
56
- expect( queue.clear).to be_empty
57
- end
58
-
61
+ #it '.process_local_log => process compressed log file and verfied logstash event queue with the correct number of events' do
62
+ # expect( queue.size ).to eq(38)
63
+ # expect( queue.clear).to be_empty
64
+ #end
59
65
  end
60
66
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-s3-sns-sqs
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.7
4
+ version: 2.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Herweg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-06 00:00:00.000000000 Z
11
+ date: 2020-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement