logstash-input-s3-sns-sqs 1.6.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2ebb0c135e09afcb7d44c4388817dec2050668e4255c05c62b869aa845f50648
4
- data.tar.gz: 7b06d7101b9b6e5431b6014c71346b3f0edb7e12aff589f67f6f042786e87724
3
+ metadata.gz: 253c85cd1d1dfa22a59d282a0eeae4e5a71c5630473db65a768fe3e00131adc9
4
+ data.tar.gz: 5e98e0d9b47c7f9b47d6e11aefa2d5c14f59e4fe6c66a7bd0934a250bb8fbcfb
5
5
  SHA512:
6
- metadata.gz: fad96d095a81b60159a6097cbefd1abd3e3924f9e6d2f77699cd73dc1be9db0cf1641249adaa778f9a38a968643970fae90f6f32e22679f564a360c47f5cb033
7
- data.tar.gz: 404a3a25c01b391b85385f8aa427d80fc32aba34d0161b1a751f9bf7a0c19cf1bfcac132d3797a510132b100fba69b8eaa3aea7f6e802387f7ff7e52a4b9852b
6
+ metadata.gz: ee38fcc3de70af94b7de1570b054cdf435224c77b64bebd1db9b8eee3a4097b91f0320ce09d732fdefd8b1d8e0ca722c0a9b799e49dceda881e36bb7b26417e0
7
+ data.tar.gz: d6818a6bdead5aae583a09e2af6e1e869a7fee4902c6503def1150584afb16f415a825eb2a34b2e8a05130d8e6348bbda28f4885852e8eb92ec552e68382d15c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ##2.0.0
2
+ Breaking Changes:
3
+ - s3_key_prefix was never functional and will be removed. Actually only used for metadata.folder backward compatibility.
4
+ config for s3 paths are regex (if not exact match)
5
+ - s3_options_by_bucket substitutes all s3_* options
6
+ We will merge deprecated options into the new structure for one release
7
+ Changes:
8
+ - Refactor plugin structure to be more modular
9
+ - Rework threadding design
10
+ - introduce s3_options_by_bucket to configure settings (e.g aws_options_hash or type)
1
11
  ##1.6.1
2
12
  - Fix typo in gzip error logging
3
13
  ##1.6.0
@@ -0,0 +1,37 @@
1
+ # CodecFactory:
2
+ # lazy-fetch codec plugins
3
+
4
+ class CodecFactory
5
+ def initialize(logger, options)
6
+ @logger = logger
7
+ @default_codec = options[:default_codec]
8
+ @codec_by_folder = options[:codec_by_folder]
9
+ @codecs = {
10
+ 'default' => @default_codec
11
+ }
12
+ end
13
+
14
+ def get_codec(record)
15
+ codec = find_codec(record)
16
+ if @codecs[codec].nil?
17
+ @codecs[codec] = get_codec_plugin(codec)
18
+ end
19
+ @logger.debug("Switching to codec #{codec}") if codec != 'default'
20
+ return @codecs[codec]
21
+ end
22
+
23
+ private
24
+
25
+ def find_codec(record)
26
+ bucket, key, folder = record[:bucket], record[:key], record[:folder]
27
+ unless @codec_by_folder[bucket].nil?
28
+ @logger.debug("trying to find codec for folder #{folder}", :codec => @codec_by_folder[bucket][folder])
29
+ return @codec_by_folder[bucket][folder] unless @codec_by_folder[bucket][folder].nil?
30
+ end
31
+ return 'default'
32
+ end
33
+
34
+ def get_codec_plugin(name, options = {})
35
+ LogStash::Plugin.lookup('codec', name).new(options)
36
+ end
37
+ end
@@ -0,0 +1,61 @@
1
+ # not needed - Mutex is part of core lib:
2
+ #require 'thread'
3
+
4
+ class S3ClientFactory
5
+
6
+ def initialize(logger, options, aws_options_hash)
7
+ @logger = logger
8
+ @aws_options_hash = aws_options_hash
9
+ @s3_default_options = Hash[options[:s3_default_options].map { |k, v| [k.to_sym, v] }]
10
+ @aws_options_hash.merge!(@s3_default_options) unless @s3_default_options.empty?
11
+ @sts_client = Aws::STS::Client.new(region: options[:aws_region])
12
+ @credentials_by_bucket = options[:s3_credentials_by_bucket]
13
+ @logger.debug("Credentials by Bucket", :credentials => @credentials_by_bucket)
14
+ @default_session_name = options[:s3_role_session_name]
15
+ @clients_by_bucket = {}
16
+ #@mutexes_by_bucket = {}
17
+ @creation_mutex = Mutex.new
18
+ end
19
+
20
+ def get_s3_client(bucket_name)
21
+ bucket_symbol = bucket_name.to_sym
22
+ @creation_mutex.synchronize do
23
+ if @clients_by_bucket[bucket_symbol].nil?
24
+ options = @aws_options_hash.clone
25
+ unless @credentials_by_bucket[bucket_name].nil?
26
+ options.merge!(credentials: get_s3_auth(@credentials_by_bucket[bucket_name]))
27
+ end
28
+ @clients_by_bucket[bucket_symbol] = Aws::S3::Client.new(options)
29
+ @logger.debug("Created a new S3 Client", :bucket_name => bucket_name, :client => @clients_by_bucket[bucket_symbol], :used_options => options)
30
+ #@mutexes_by_bucket[bucket_symbol] = Mutex.new
31
+ end
32
+ end
33
+ # to be thread-safe, one uses this method like this:
34
+ # s3_client_factory.get_s3_client(my_s3_bucket) do
35
+ # ... do stuff ...
36
+ # end
37
+ # FIXME: this does not allow concurrent downloads from the same bucket!
38
+ #@mutexes_by_bucket[bucket_symbol].synchronize do
39
+ # So we are testing this without this mutex.
40
+ yield @clients_by_bucket[bucket_symbol]
41
+ #end
42
+ end
43
+
44
+ private
45
+
46
+ def get_s3_auth(credentials)
47
+ # reminder: these are auto-refreshing!
48
+ if credentials.key?('role')
49
+ @logger.debug("Assume Role", :role => credentials["role"])
50
+ return Aws::AssumeRoleCredentials.new(
51
+ client: @sts_client,
52
+ role_arn: credentials['role'],
53
+ role_session_name: @default_session_name
54
+ )
55
+ elsif credentials.key?('access_key_id') && credentials.key?('secret_access_key')
56
+ @logger.debug("Fetch credentials", :access_key => credentials['access_key_id'])
57
+ return Aws::Credentials.new(credentials)
58
+ end
59
+ end
60
+
61
+ end # class
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+ require 'fileutils'
3
+ require 'thread'
4
+
5
+ class S3Downloader
6
+
7
+ def initialize(logger, stop_semaphore, options)
8
+ @logger = logger
9
+ @stopped = stop_semaphore
10
+ @factory = options[:s3_client_factory]
11
+ @delete_on_success = options[:delete_on_success]
12
+ end
13
+
14
+ def copy_s3object_to_disk(record)
15
+ # (from docs) WARNING:
16
+ # yielding data to a block disables retries of networking errors!
17
+ begin
18
+ @factory.get_s3_client(record[:bucket]) do |s3|
19
+ response = s3.get_object(
20
+ bucket: record[:bucket],
21
+ key: record[:key],
22
+ response_target: record[:local_file]
23
+ )
24
+ end
25
+ rescue Aws::S3::Errors::ServiceError => e
26
+ @logger.error("Unable to download file. Requeuing the message", :error => e, :record => record)
27
+ # prevent sqs message deletion
28
+ throw :skip_delete
29
+ end
30
+ throw :skip_delete if stop?
31
+ return true
32
+ end
33
+
34
+ def cleanup_local_object(record)
35
+ FileUtils.remove_entry_secure(record[:local_file], true) if ::File.exists?(record[:local_file])
36
+ rescue Exception => e
37
+ @logger.warn("Could not delete file", :file => record[:local_file], :error => e)
38
+ end
39
+
40
+ def cleanup_s3object(record)
41
+ return unless @delete_on_success
42
+ begin
43
+ @factory.get_s3_client(record[:bucket]) do |s3|
44
+ s3.delete_object(bucket: record[:bucket], key: record[:key])
45
+ end
46
+ rescue Exception => e
47
+ @logger.warn("Failed to delete s3 object", :record => record, :error => e)
48
+ end
49
+ end
50
+
51
+ def stop?
52
+ @stopped.value
53
+ end
54
+
55
+ end # class
@@ -1,15 +1,20 @@
1
1
  # encoding: utf-8
2
- #
3
2
  require "logstash/inputs/threadable"
4
3
  require "logstash/namespace"
5
4
  require "logstash/timestamp"
6
5
  require "logstash/plugin_mixins/aws_config"
6
+ require "logstash/shutdown_watcher"
7
7
  require "logstash/errors"
8
8
  require 'logstash/inputs/s3sqs/patch'
9
9
  require "aws-sdk"
10
- require "stud/interval"
11
- require 'cgi'
12
- require 'logstash/inputs/mime/MagicgzipValidator'
10
+ # "object-oriented interfaces on top of API clients"...
11
+ # => Overhead. FIXME: needed?
12
+ #require "aws-sdk-resources"
13
+ require "fileutils"
14
+ require "concurrent"
15
+ # unused in code:
16
+ #require "stud/interval"
17
+ #require "digest/md5"
13
18
 
14
19
  require 'java'
15
20
  java_import java.io.InputStream
@@ -19,6 +24,14 @@ java_import java.io.BufferedReader
19
24
  java_import java.util.zip.GZIPInputStream
20
25
  java_import java.util.zip.ZipException
21
26
 
27
+ # our helper classes
28
+ # these may go into this file for brevity...
29
+ require_relative 'sqs/poller'
30
+ require_relative 's3/client_factory'
31
+ require_relative 's3/downloader'
32
+ require_relative 'codec_factory'
33
+ require_relative 's3snssqs/log_processor'
34
+
22
35
  Aws.eager_autoload!
23
36
 
24
37
  # Get logs from AWS s3 buckets as issued by an object-created event via sqs.
@@ -89,472 +102,241 @@ Aws.eager_autoload!
89
102
  #
90
103
  class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
91
104
  include LogStash::PluginMixins::AwsConfig::V2
92
-
93
- BACKOFF_SLEEP_TIME = 1
94
- BACKOFF_FACTOR = 2
95
- MAX_TIME_BEFORE_GIVING_UP = 60
96
- EVENT_SOURCE = 'aws:s3'
97
- EVENT_TYPE = 'ObjectCreated'
105
+ include LogProcessor
98
106
 
99
107
  config_name "s3snssqs"
100
108
 
101
109
  default :codec, "plain"
102
110
 
111
+
112
+
113
+ # Future config might look somewhat like this:
114
+ #
115
+ # s3_options_by_bucket = [
116
+ # {
117
+ # "bucket_name": "my-beautiful-bucket",
118
+ # "credentials": { "role": "aws:role:arn:for:bucket:access" },
119
+ # "folders": [
120
+ # {
121
+ # "key": "my_folder",
122
+ # "codec": "json"
123
+ # "type": "my_lovely_index"
124
+ # },
125
+ # {
126
+ # "key": "my_other_folder",
127
+ # "codec": "json_stream"
128
+ # "type": ""
129
+ # }
130
+ # ]
131
+ # },
132
+ # {
133
+ # "bucket_name": "my-other-bucket"
134
+ # "credentials": {
135
+ # "access_key_id": "some-id",
136
+ # "secret_access_key": "some-secret-key"
137
+ # },
138
+ # "folders": [
139
+ # {
140
+ # "key": ""
141
+ # }
142
+ # ]
143
+ # }
144
+ # }
145
+
146
+ config :s3_key_prefix, :validate => :string, :default => '', :deprecated => true #, :obsolete => " Will be moved to s3_options_by_bucket/types"
147
+
148
+ config :s3_access_key_id, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
149
+ config :s3_secret_access_key, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
150
+ config :s3_role_arn, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
151
+
152
+ config :set_codec_by_folder, :validate => :hash, :default => {}, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
153
+
154
+ # Default Options for the S3 clients
155
+ config :s3_default_options, :validate => :hash, :required => false, :default => {}
156
+ # We need a list of buckets, together with role arns and possible folder/codecs:
157
+ config :s3_options_by_bucket, :validate => :array, :required => false # TODO: true
158
+ # Session name to use when assuming an IAM role
159
+ config :s3_role_session_name, :validate => :string, :default => "logstash"
160
+
161
+ ### sqs
103
162
  # Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
104
163
  config :queue, :validate => :string, :required => true
105
- config :s3_key_prefix, :validate => :string, :default => ''
106
- #Sometimes you need another key for s3. This is a first test...
107
- config :s3_access_key_id, :validate => :string
108
- config :s3_secret_access_key, :validate => :string
109
164
  config :queue_owner_aws_account_id, :validate => :string, :required => false
110
- #If you have different file-types in you s3 bucket, you could define codec by folder
111
- #set_codec_by_folder => {"My-ELB-logs" => "plain"}
112
- config :set_codec_by_folder, :validate => :hash, :default => {}
113
- config :delete_on_success, :validate => :boolean, :default => false
114
- config :sqs_explicit_delete, :validate => :boolean, :default => false
115
165
  # Whether the event is processed though an SNS to SQS. (S3>SNS>SQS = true |S3>SQS=false)
116
166
  config :from_sns, :validate => :boolean, :default => true
117
- # To run in multiple threads use this
118
- config :consumer_threads, :validate => :number
119
- config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
120
- # The AWS IAM Role to assume, if any.
121
- # This is used to generate temporary credentials typically for cross-account access.
122
- # See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information.
123
- config :s3_role_arn, :validate => :string
124
- # Session name to use when assuming an IAM role
125
- config :s3_role_session_name, :validate => :string, :default => "logstash"
167
+ config :sqs_skip_delete, :validate => :boolean, :default => false
168
+ config :delete_on_success, :validate => :boolean, :default => false
126
169
  config :visibility_timeout, :validate => :number, :default => 600
127
170
 
171
+ ### system
172
+ config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
173
+ # To run in multiple threads use this
174
+ config :consumer_threads, :validate => :number, :default => 1
128
175
 
129
- attr_reader :poller
130
- attr_reader :s3
131
-
132
-
133
- def set_codec (folder)
134
- begin
135
- @logger.debug("Automatically switching from #{@codec.class.config_name} to #{set_codec_by_folder[folder]} codec", :plugin => self.class.config_name)
136
- LogStash::Plugin.lookup("codec", "#{set_codec_by_folder[folder]}").new("charset" => @codec.charset)
137
- rescue Exception => e
138
- @logger.error("Failed to set_codec with error", :error => e)
139
- end
140
- end
141
176
 
142
177
  public
143
- def register
144
- require "fileutils"
145
- require "digest/md5"
146
- require "aws-sdk-resources"
147
178
 
148
- @runner_threads = []
149
- #make this hash keys lookups match like regex
150
- hash_key_is_regex(set_codec_by_folder)
151
- @logger.info("Registering SQS input", :queue => @queue)
152
- setup_queue
179
+ # --- BEGIN plugin interface ----------------------------------------#
153
180
 
181
+ # initialisation
182
+ def register
183
+ # prepare system
154
184
  FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
155
- end
156
-
157
- def setup_queue
158
- aws_sqs_client = Aws::SQS::Client.new(aws_options_hash)
159
- queue_url = aws_sqs_client.get_queue_url({ queue_name: @queue, queue_owner_aws_account_id: @queue_owner_aws_account_id})[:queue_url]
160
- @poller = Aws::SQS::QueuePoller.new(queue_url, :client => aws_sqs_client)
161
- get_s3client
162
- @s3_resource = get_s3object
163
- rescue Aws::SQS::Errors::ServiceError => e
164
- @logger.error("Cannot establish connection to Amazon SQS", :error => e)
165
- raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
166
- end
167
-
168
- def polling_options
169
- {
170
- # we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
171
- # (we will throw :skip_delete if download size isn't correct to process the event again later
172
- # -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
173
- :max_number_of_messages => 1,
174
- # we will use the queue's setting, a good value is 10 seconds
175
- # (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
176
- :skip_delete => false,
177
- :visibility_timeout => @visibility_timeout,
178
- :wait_time_seconds => nil,
179
- }
180
- end
181
185
 
182
- def handle_message(message, queue, instance_codec)
183
- hash = JSON.parse message.body
184
- @logger.debug("handle_message", :hash => hash, :message => message)
185
- #If send via sns there is an additional JSON layer
186
- if @from_sns then
187
- hash = JSON.parse(hash['Message'])
188
- end
189
- # there may be test events sent from the s3 bucket which won't contain a Records array,
190
- # we will skip those events and remove them from queue
191
- if hash['Records'] then
192
- # typically there will be only 1 record per event, but since it is an array we will
193
- # treat it as if there could be more records
194
- hash['Records'].each do |record|
195
- @logger.debug("We found a record", :record => record)
196
- # in case there are any events with Records that aren't s3 object-created events and can't therefore be
197
- # processed by this plugin, we will skip them and remove them from queue
198
- if record['eventSource'] == EVENT_SOURCE and record['eventName'].start_with?(EVENT_TYPE) then
199
- @logger.debug("It is a valid record")
200
- bucket = CGI.unescape(record['s3']['bucket']['name'])
201
- key = CGI.unescape(record['s3']['object']['key'])
202
- size = record['s3']['object']['size']
203
- type_folder = get_object_folder(key)
204
- # Set input codec by :set_codec_by_folder
205
- instance_codec = set_codec(type_folder) unless set_codec_by_folder["#{type_folder}"].nil?
206
- # try download and :skip_delete if it fails
207
- #if record['s3']['object']['size'] < 10000000 then
208
- process_log(bucket, key, type_folder, instance_codec, queue, message, size)
209
- #else
210
- # @logger.info("Your file is too big")
211
- #end
186
+ @credentials_by_bucket = hash_key_is_regex({})
187
+ # create the bucket=>folder=>codec lookup from config options
188
+ @codec_by_folder = hash_key_is_regex({})
189
+ @type_by_folder = hash_key_is_regex({})
190
+
191
+ # use deprecated settings only if new config is missing:
192
+ if @s3_options_by_bucket.nil?
193
+ # We don't know any bucket name, so we must rely on a "catch-all" regex
194
+ s3_options = {
195
+ 'bucket_name' => '.*',
196
+ 'folders' => @set_codec_by_folder.map { |key, codec|
197
+ { 'key' => key, 'codec' => codec }
198
+ }
199
+ }
200
+ if @s3_role_arn.nil?
201
+ # access key/secret key pair needed
202
+ unless @s3_access_key_id.nil? or @s3_secret_access_key.nil?
203
+ s3_options['credentials'] = {
204
+ 'access_key_id' => @s3_access_key_id,
205
+ 'secret_access_key' => @s3_secret_access_key
206
+ }
212
207
  end
208
+ else
209
+ s3_options['credentials'] = {
210
+ 'role' => @s3_role_arn
211
+ }
213
212
  end
213
+ @s3_options_by_bucket = [s3_options]
214
214
  end
215
- end
216
-
217
- private
218
- def process_log(bucket , key, folder, instance_codec, queue, message, size)
219
- s3bucket = @s3_resource.bucket(bucket)
220
- @logger.debug("Lets go reading file", :bucket => bucket, :key => key)
221
- object = s3bucket.object(key)
222
- filename = File.join(temporary_directory, File.basename(key))
223
- if download_remote_file(object, filename)
224
- if process_local_log( filename, key, folder, instance_codec, queue, bucket, message, size)
225
- begin
226
- FileUtils.remove_entry_secure(filename, true) if File.exists? filename
227
- delete_file_from_bucket(object)
228
- rescue Exception => e
229
- @logger.debug("We had problems to delete your file", :file => filename, :error => e)
230
- end
231
- end
232
- else
233
- begin
234
- FileUtils.remove_entry_secure(filename, true) if File.exists? filename
235
- rescue Exception => e
236
- @logger.debug("We had problems clean up your tmp dir", :file => filename, :error => e)
237
- end
238
- end
239
- end
240
-
241
- private
242
- # Stream the remove file to the local disk
243
- #
244
- # @param [S3Object] Reference to the remove S3 objec to download
245
- # @param [String] The Temporary filename to stream to.
246
- # @return [Boolean] True if the file was completely downloaded
247
- def download_remote_file(remote_object, local_filename)
248
- completed = false
249
- @logger.debug("S3 input: Download remote file", :remote_key => remote_object.key, :local_filename => local_filename)
250
- File.open(local_filename, 'wb') do |s3file|
251
- return completed if stop?
252
- begin
253
- remote_object.get(:response_target => s3file)
254
- rescue Aws::S3::Errors::ServiceError => e
255
- @logger.error("Unable to download file. We´ll requeue the message", :file => remote_object.inspect)
256
- throw :skip_delete
257
- end
258
- end
259
- completed = true
260
215
 
261
- return completed
262
- end
263
-
264
- private
265
-
266
- # Read the content of the local file
267
- #
268
- # @param [Queue] Where to push the event
269
- # @param [String] Which file to read from
270
- # @return [Boolean] True if the file was completely read, false otherwise.
271
- def process_local_log(filename, key, folder, instance_codec, queue, bucket, message, size)
272
- @logger.debug('Processing file', :filename => filename)
273
- metadata = {}
274
- start_time = Time.now
275
- # Currently codecs operates on bytes instead of stream.
276
- # So all IO stuff: decompression, reading need to be done in the actual
277
- # input and send as bytes to the codecs.
278
- read_file(filename) do |line|
279
- if (Time.now - start_time) >= (@visibility_timeout.to_f / 100.0 * 90.to_f)
280
- @logger.info("Increasing the visibility_timeout ... ", :timeout => @visibility_timeout, :filename => filename, :filesize => size, :start => start_time )
281
- poller.change_message_visibility_timeout(message, @visibility_timeout)
282
- start_time = Time.now
216
+ @s3_options_by_bucket.each do |options|
217
+ bucket = options['bucket_name']
218
+ if options.key?('credentials')
219
+ @credentials_by_bucket[bucket] = options['credentials']
283
220
  end
284
- if stop?
285
- @logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
286
- return false
287
- end
288
- line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370")
289
- #@logger.debug("read line", :line => line)
290
- instance_codec.decode(line) do |event|
291
- @logger.debug("decorate event")
292
- # We are making an assumption concerning cloudfront
293
- # log format, the user will use the plain or the line codec
294
- # and the message key will represent the actual line content.
295
- # If the event is only metadata the event will be drop.
296
- # This was the behavior of the pre 1.5 plugin.
297
- #
298
- # The line need to go through the codecs to replace
299
- # unknown bytes in the log stream before doing a regexp match or
300
- # you will get a `Error: invalid byte sequence in UTF-8'
301
- local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
221
+ if options.key?('folders')
222
+ # make these hashes do key lookups using regex matching
223
+ folders = hash_key_is_regex({})
224
+ types = hash_key_is_regex({})
225
+ options['folders'].each do |entry|
226
+ @logger.debug("options for folder ", :folder => entry)
227
+ folders[entry['key']] = entry['codec'] if entry.key?('codec')
228
+ types[entry['key']] = entry['type'] if entry.key?('type')
229
+ end
230
+ @codec_by_folder[bucket] = folders unless folders.empty?
231
+ @type_by_folder[bucket] = types unless types.empty?
302
232
  end
303
233
  end
304
- @logger.debug("end if file #{filename}")
305
- #@logger.info("event pre flush", :event => event)
306
- # #ensure any stateful codecs (such as multi-line ) are flushed to the queue
307
- instance_codec.flush do |event|
308
- local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
309
- @logger.debug("We´e to flush an incomplete event...", :event => event)
310
- end
311
-
312
- return true
313
- end # def process_local_log
314
-
315
- private
316
- def local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
317
- @logger.debug('decorating event', :event => event.to_s)
318
- if event_is_metadata?(event)
319
- @logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
320
- update_metadata(metadata, event)
321
- else
322
-
323
- decorate(event)
324
-
325
- event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
326
- event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
327
-
328
- event.set("[@metadata][s3][object_key]", key)
329
- event.set("[@metadata][s3][bucket_name]", bucket)
330
- event.set("[@metadata][s3][object_folder]", folder)
331
- @logger.debug('add metadata', :object_key => key, :bucket => bucket, :folder => folder)
332
- queue << event
333
- end
334
- end
335
-
336
-
337
- private
338
- def get_object_folder(key)
339
- if match=/#{s3_key_prefix}\/?(?<type_folder>.*?)\/.*/.match(key)
340
- return match['type_folder']
341
- else
342
- return ""
343
- end
344
- end
345
-
346
- private
347
- def read_file(filename, &block)
348
- if gzip?(filename)
349
- read_gzip_file(filename, block)
350
- else
351
- read_plain_file(filename, block)
352
- end
353
- end
354
-
355
- def read_plain_file(filename, block)
356
- File.open(filename, 'rb') do |file|
357
- file.each(&block)
358
- end
359
- end
360
-
361
- private
362
- def read_gzip_file(filename, block)
363
- file_stream = FileInputStream.new(filename)
364
- gzip_stream = GZIPInputStream.new(file_stream)
365
- decoder = InputStreamReader.new(gzip_stream, "UTF-8")
366
- buffered = BufferedReader.new(decoder)
367
-
368
- while (line = buffered.readLine())
369
- block.call(line)
370
- end
371
- rescue ZipException => e
372
- @logger.error("Gzip codec: We cannot uncompress the gzip file", :filename => filename, :error => e)
373
- ensure
374
- buffered.close unless buffered.nil?
375
- decoder.close unless decoder.nil?
376
- gzip_stream.close unless gzip_stream.nil?
377
- file_stream.close unless file_stream.nil?
378
- end
379
-
380
- private
381
- def gzip?(filename)
382
- return true if filename.end_with?('.gz','.gzip')
383
- MagicGzipValidator.new(File.new(filename, 'r')).valid?
384
- rescue Exception => e
385
- @logger.debug("Problem while gzip detection", :error => e)
386
- end
387
-
388
- private
389
- def delete_file_from_bucket(object)
390
- if @delete_on_success
391
- object.delete()
392
- end
393
- end
394
-
395
-
396
- private
397
- def get_s3client
398
- if s3_access_key_id and s3_secret_access_key
399
- @logger.debug("Using S3 Credentials from config", :ID => aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key) )
400
- @s3_client = Aws::S3::Client.new(aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key))
401
- elsif @s3_role_arn
402
- @s3_client = Aws::S3::Client.new(aws_options_hash.merge!({ :credentials => s3_assume_role }))
403
- @logger.debug("Using S3 Credentials from role", :s3client => @s3_client.inspect, :options => aws_options_hash.merge!({ :credentials => s3_assume_role }))
404
- else
405
- @s3_client = Aws::S3::Client.new(aws_options_hash)
406
- end
407
- end
408
-
409
- private
410
- def get_s3object
411
- s3 = Aws::S3::Resource.new(client: @s3_client)
412
- end
413
-
414
- private
415
- def s3_assume_role()
416
- Aws::AssumeRoleCredentials.new(
417
- client: Aws::STS::Client.new(region: @region),
418
- role_arn: @s3_role_arn,
419
- role_session_name: @s3_role_session_name
420
- )
421
- end
422
-
423
- private
424
- def event_is_metadata?(event)
425
- return false unless event.get("message").class == String
426
- line = event.get("message")
427
- version_metadata?(line) || fields_metadata?(line)
428
- end
429
-
430
- private
431
- def version_metadata?(line)
432
- line.start_with?('#Version: ')
433
- end
434
-
435
- private
436
- def fields_metadata?(line)
437
- line.start_with?('#Fields: ')
438
- end
439
-
440
- private
441
- def update_metadata(metadata, event)
442
- line = event.get('message').strip
443
-
444
- if version_metadata?(line)
445
- metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
446
- end
447
234
 
448
- if fields_metadata?(line)
449
- metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
450
- end
235
+ @received_stop = Concurrent::AtomicBoolean.new(false)
236
+
237
+ # instantiate helpers
238
+ @sqs_poller = SqsPoller.new(@logger, @received_stop, @queue, {
239
+ queue_owner_aws_account_id: @queue_owner_aws_account_id,
240
+ from_sns: @from_sns,
241
+ sqs_explicit_delete: @sqs_explicit_delete,
242
+ visibility_timeout: @visibility_timeout
243
+ }, aws_options_hash)
244
+ @s3_client_factory = S3ClientFactory.new(@logger, {
245
+ aws_region: @region,
246
+ s3_default_options: @s3_default_options,
247
+ s3_credentials_by_bucket: @credentials_by_bucket,
248
+ s3_role_session_name: @s3_role_session_name
249
+ }, aws_options_hash)
250
+ @s3_downloader = S3Downloader.new(@logger, @received_stop, {
251
+ s3_client_factory: @s3_client_factory,
252
+ delete_on_success: @delete_on_success
253
+ })
254
+ @codec_factory = CodecFactory.new(@logger, {
255
+ default_codec: @codec,
256
+ codec_by_folder: @codec_by_folder
257
+ })
258
+ #@log_processor = LogProcessor.new(self)
259
+
260
+ # administrative stuff
261
+ @worker_threads = []
451
262
  end
452
263
 
453
- public
454
- def run(queue)
455
- if @consumer_threads
456
- # ensure we can stop logstash correctly
457
- @runner_threads = consumer_threads.times.map { |consumer| thread_runner(queue) }
458
- @runner_threads.each { |t| t.join }
459
- else
460
- #Fallback to simple single thread worker
461
- # ensure we can stop logstash correctly
462
- poller.before_request do |stats|
463
- if stop? then
464
- @logger.warn("issuing :stop_polling on stop?", :queue => @queue)
465
- # this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
466
- throw :stop_polling
467
- end
468
- end
469
- # poll a message and process it
470
- run_with_backoff do
471
- poller.poll(polling_options) do |message|
472
- begin
473
- handle_message(message, queue, @codec.clone)
474
- poller.delete_message(message)
475
- rescue Exception => e
476
- @logger.info("Error in poller block ... ", :error => e)
477
- end
478
- end
479
- end
264
+ # startup
265
+ def run(logstash_event_queue)
266
+ #LogStash::ShutdownWatcher.abort_threshold(30)
267
+ # start them
268
+ @worker_threads = @consumer_threads.times.map do |_|
269
+ run_worker_thread(logstash_event_queue)
480
270
  end
271
+ # and wait (possibly infinitely) for them to shut down
272
+ @worker_threads.each { |t| t.join }
481
273
  end
482
274
 
483
- public
275
+ # shutdown
484
276
  def stop
485
- if @consumer_threads
486
- @runner_threads.each do |c|
487
- begin
488
- @logger.info("Stopping thread ... ", :thread => c.inspect)
489
- c.wakeup
490
- rescue
491
- @logger.error("Cannot stop thread ... try to kill him", :thread => c.inspect)
492
- c.kill
493
- end
277
+ @received_stop.make_true
278
+ @worker_threads.each do |worker|
279
+ begin
280
+ @logger.info("Stopping thread ... ", :thread => worker.inspect)
281
+ worker.wakeup
282
+ rescue
283
+ @logger.error("Cannot stop thread ... try to kill him", :thread => worker.inspect)
284
+ worker.kill
494
285
  end
495
- else
496
- @logger.warn("Stopping all threads?", :queue => @queue)
497
286
  end
498
287
  end
499
288
 
289
+ # --- END plugin interface ------------------------------------------#
290
+
500
291
  private
501
- def thread_runner(queue)
292
+
293
+ def run_worker_thread(queue)
502
294
  Thread.new do
503
- @logger.info("Starting new thread")
504
- begin
505
- poller.before_request do |stats|
506
- if stop? then
507
- @logger.warn("issuing :stop_polling on stop?", :queue => @queue)
508
- # this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
509
- throw :stop_polling
510
- end
511
- end
512
- # poll a message and process it
513
- run_with_backoff do
514
- poller.poll(polling_options) do |message|
515
- begin
516
- handle_message(message, queue, @codec.clone)
517
- poller.delete_message(message) if @sqs_explicit_delete
518
- rescue Exception => e
519
- @logger.info("Error in poller block ... ", :error => e)
520
- end
295
+ @logger.info("Starting new worker thread")
296
+ @sqs_poller.run do |record|
297
+ throw :skip_delete if stop?
298
+ @logger.debug("Outside Poller: got a record", :record => record)
299
+ # record is a valid object with the keys ":bucket", ":key", ":size"
300
+ record[:local_file] = File.join(@temporary_directory, File.basename(record[:key]))
301
+ if @s3_downloader.copy_s3object_to_disk(record)
302
+ completed = catch(:skip_delete) do
303
+ process(record, queue)
521
304
  end
305
+ @s3_downloader.cleanup_local_object(record)
306
+ # re-throw if necessary:
307
+ throw :skip_delete unless completed
308
+ @s3_downloader.cleanup_s3object(record)
522
309
  end
523
310
  end
524
311
  end
525
312
  end
526
313
 
527
- private
528
- # Runs an AWS request inside a Ruby block with an exponential backoff in case
529
- # we experience a ServiceError.
530
- #
531
- # @param [Integer] max_time maximum amount of time to sleep before giving up.
532
- # @param [Integer] sleep_time the initial amount of time to sleep before retrying.
533
- # @param [Block] block Ruby code block to execute.
534
- def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME, &block)
535
- next_sleep = sleep_time
536
- begin
537
- block.call
538
- next_sleep = sleep_time
539
- rescue Aws::SQS::Errors::ServiceError => e
540
- @logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
541
- sleep(next_sleep)
542
- next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
543
- retry
314
+ # Will be remove in further releases...
315
+ def get_object_folder(key)
316
+ if match=/#{s3_key_prefix}\/?(?<type_folder>.*?)\/.*/.match(key)
317
+ return match['type_folder']
318
+ else
319
+ return ""
544
320
  end
545
321
  end
546
322
 
547
- private
548
323
  def hash_key_is_regex(myhash)
549
324
  myhash.default_proc = lambda do |hash, lookup|
550
325
  result=nil
551
326
  hash.each_pair do |key, value|
552
327
  if %r[#{key}] =~ lookup
553
- result=value
328
+ result = value
554
329
  break
555
330
  end
556
331
  end
557
332
  result
558
333
  end
334
+ # return input hash (convenience)
335
+ return myhash
336
+ end
337
+
338
+ def stop?
339
+ @received_stop.value
559
340
  end
341
+
560
342
  end # class