logstash-input-s3-sns-sqs 1.6.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2ebb0c135e09afcb7d44c4388817dec2050668e4255c05c62b869aa845f50648
4
- data.tar.gz: 7b06d7101b9b6e5431b6014c71346b3f0edb7e12aff589f67f6f042786e87724
3
+ metadata.gz: 253c85cd1d1dfa22a59d282a0eeae4e5a71c5630473db65a768fe3e00131adc9
4
+ data.tar.gz: 5e98e0d9b47c7f9b47d6e11aefa2d5c14f59e4fe6c66a7bd0934a250bb8fbcfb
5
5
  SHA512:
6
- metadata.gz: fad96d095a81b60159a6097cbefd1abd3e3924f9e6d2f77699cd73dc1be9db0cf1641249adaa778f9a38a968643970fae90f6f32e22679f564a360c47f5cb033
7
- data.tar.gz: 404a3a25c01b391b85385f8aa427d80fc32aba34d0161b1a751f9bf7a0c19cf1bfcac132d3797a510132b100fba69b8eaa3aea7f6e802387f7ff7e52a4b9852b
6
+ metadata.gz: ee38fcc3de70af94b7de1570b054cdf435224c77b64bebd1db9b8eee3a4097b91f0320ce09d732fdefd8b1d8e0ca722c0a9b799e49dceda881e36bb7b26417e0
7
+ data.tar.gz: d6818a6bdead5aae583a09e2af6e1e869a7fee4902c6503def1150584afb16f415a825eb2a34b2e8a05130d8e6348bbda28f4885852e8eb92ec552e68382d15c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ##2.0.0
2
+ Breaking Changes:
3
+ - s3_key_prefix was never functional and will be removed. Actually only used for metadata.folder backward compatibility.
4
+ config for s3 paths are regex (if not exact match)
5
+ - s3_options_by_bucket substitutes all s3_* options
6
+ We will merge deprecated options into the new structure for one release
7
+ Changes:
8
+ - Refactor plugin structure to be more modular
9
+ - Rework threadding design
10
+ - introduce s3_options_by_bucket to configure settings (e.g aws_options_hash or type)
1
11
  ##1.6.1
2
12
  - Fix typo in gzip error logging
3
13
  ##1.6.0
@@ -0,0 +1,37 @@
1
+ # CodecFactory:
2
+ # lazy-fetch codec plugins
3
+
4
+ class CodecFactory
5
+ def initialize(logger, options)
6
+ @logger = logger
7
+ @default_codec = options[:default_codec]
8
+ @codec_by_folder = options[:codec_by_folder]
9
+ @codecs = {
10
+ 'default' => @default_codec
11
+ }
12
+ end
13
+
14
+ def get_codec(record)
15
+ codec = find_codec(record)
16
+ if @codecs[codec].nil?
17
+ @codecs[codec] = get_codec_plugin(codec)
18
+ end
19
+ @logger.debug("Switching to codec #{codec}") if codec != 'default'
20
+ return @codecs[codec]
21
+ end
22
+
23
+ private
24
+
25
+ def find_codec(record)
26
+ bucket, key, folder = record[:bucket], record[:key], record[:folder]
27
+ unless @codec_by_folder[bucket].nil?
28
+ @logger.debug("trying to find codec for folder #{folder}", :codec => @codec_by_folder[bucket][folder])
29
+ return @codec_by_folder[bucket][folder] unless @codec_by_folder[bucket][folder].nil?
30
+ end
31
+ return 'default'
32
+ end
33
+
34
+ def get_codec_plugin(name, options = {})
35
+ LogStash::Plugin.lookup('codec', name).new(options)
36
+ end
37
+ end
@@ -0,0 +1,61 @@
1
+ # not needed - Mutex is part of core lib:
2
+ #require 'thread'
3
+
4
+ class S3ClientFactory
5
+
6
+ def initialize(logger, options, aws_options_hash)
7
+ @logger = logger
8
+ @aws_options_hash = aws_options_hash
9
+ @s3_default_options = Hash[options[:s3_default_options].map { |k, v| [k.to_sym, v] }]
10
+ @aws_options_hash.merge!(@s3_default_options) unless @s3_default_options.empty?
11
+ @sts_client = Aws::STS::Client.new(region: options[:aws_region])
12
+ @credentials_by_bucket = options[:s3_credentials_by_bucket]
13
+ @logger.debug("Credentials by Bucket", :credentials => @credentials_by_bucket)
14
+ @default_session_name = options[:s3_role_session_name]
15
+ @clients_by_bucket = {}
16
+ #@mutexes_by_bucket = {}
17
+ @creation_mutex = Mutex.new
18
+ end
19
+
20
+ def get_s3_client(bucket_name)
21
+ bucket_symbol = bucket_name.to_sym
22
+ @creation_mutex.synchronize do
23
+ if @clients_by_bucket[bucket_symbol].nil?
24
+ options = @aws_options_hash.clone
25
+ unless @credentials_by_bucket[bucket_name].nil?
26
+ options.merge!(credentials: get_s3_auth(@credentials_by_bucket[bucket_name]))
27
+ end
28
+ @clients_by_bucket[bucket_symbol] = Aws::S3::Client.new(options)
29
+ @logger.debug("Created a new S3 Client", :bucket_name => bucket_name, :client => @clients_by_bucket[bucket_symbol], :used_options => options)
30
+ #@mutexes_by_bucket[bucket_symbol] = Mutex.new
31
+ end
32
+ end
33
+ # to be thread-safe, one uses this method like this:
34
+ # s3_client_factory.get_s3_client(my_s3_bucket) do
35
+ # ... do stuff ...
36
+ # end
37
+ # FIXME: this does not allow concurrent downloads from the same bucket!
38
+ #@mutexes_by_bucket[bucket_symbol].synchronize do
39
+ # So we are testing this without this mutex.
40
+ yield @clients_by_bucket[bucket_symbol]
41
+ #end
42
+ end
43
+
44
+ private
45
+
46
+ def get_s3_auth(credentials)
47
+ # reminder: these are auto-refreshing!
48
+ if credentials.key?('role')
49
+ @logger.debug("Assume Role", :role => credentials["role"])
50
+ return Aws::AssumeRoleCredentials.new(
51
+ client: @sts_client,
52
+ role_arn: credentials['role'],
53
+ role_session_name: @default_session_name
54
+ )
55
+ elsif credentials.key?('access_key_id') && credentials.key?('secret_access_key')
56
+ @logger.debug("Fetch credentials", :access_key => credentials['access_key_id'])
57
+ return Aws::Credentials.new(credentials)
58
+ end
59
+ end
60
+
61
+ end # class
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+ require 'fileutils'
3
+ require 'thread'
4
+
5
+ class S3Downloader
6
+
7
+ def initialize(logger, stop_semaphore, options)
8
+ @logger = logger
9
+ @stopped = stop_semaphore
10
+ @factory = options[:s3_client_factory]
11
+ @delete_on_success = options[:delete_on_success]
12
+ end
13
+
14
+ def copy_s3object_to_disk(record)
15
+ # (from docs) WARNING:
16
+ # yielding data to a block disables retries of networking errors!
17
+ begin
18
+ @factory.get_s3_client(record[:bucket]) do |s3|
19
+ response = s3.get_object(
20
+ bucket: record[:bucket],
21
+ key: record[:key],
22
+ response_target: record[:local_file]
23
+ )
24
+ end
25
+ rescue Aws::S3::Errors::ServiceError => e
26
+ @logger.error("Unable to download file. Requeuing the message", :error => e, :record => record)
27
+ # prevent sqs message deletion
28
+ throw :skip_delete
29
+ end
30
+ throw :skip_delete if stop?
31
+ return true
32
+ end
33
+
34
+ def cleanup_local_object(record)
35
+ FileUtils.remove_entry_secure(record[:local_file], true) if ::File.exists?(record[:local_file])
36
+ rescue Exception => e
37
+ @logger.warn("Could not delete file", :file => record[:local_file], :error => e)
38
+ end
39
+
40
+ def cleanup_s3object(record)
41
+ return unless @delete_on_success
42
+ begin
43
+ @factory.get_s3_client(record[:bucket]) do |s3|
44
+ s3.delete_object(bucket: record[:bucket], key: record[:key])
45
+ end
46
+ rescue Exception => e
47
+ @logger.warn("Failed to delete s3 object", :record => record, :error => e)
48
+ end
49
+ end
50
+
51
+ def stop?
52
+ @stopped.value
53
+ end
54
+
55
+ end # class
@@ -1,15 +1,20 @@
1
1
  # encoding: utf-8
2
- #
3
2
  require "logstash/inputs/threadable"
4
3
  require "logstash/namespace"
5
4
  require "logstash/timestamp"
6
5
  require "logstash/plugin_mixins/aws_config"
6
+ require "logstash/shutdown_watcher"
7
7
  require "logstash/errors"
8
8
  require 'logstash/inputs/s3sqs/patch'
9
9
  require "aws-sdk"
10
- require "stud/interval"
11
- require 'cgi'
12
- require 'logstash/inputs/mime/MagicgzipValidator'
10
+ # "object-oriented interfaces on top of API clients"...
11
+ # => Overhead. FIXME: needed?
12
+ #require "aws-sdk-resources"
13
+ require "fileutils"
14
+ require "concurrent"
15
+ # unused in code:
16
+ #require "stud/interval"
17
+ #require "digest/md5"
13
18
 
14
19
  require 'java'
15
20
  java_import java.io.InputStream
@@ -19,6 +24,14 @@ java_import java.io.BufferedReader
19
24
  java_import java.util.zip.GZIPInputStream
20
25
  java_import java.util.zip.ZipException
21
26
 
27
+ # our helper classes
28
+ # these may go into this file for brevity...
29
+ require_relative 'sqs/poller'
30
+ require_relative 's3/client_factory'
31
+ require_relative 's3/downloader'
32
+ require_relative 'codec_factory'
33
+ require_relative 's3snssqs/log_processor'
34
+
22
35
  Aws.eager_autoload!
23
36
 
24
37
  # Get logs from AWS s3 buckets as issued by an object-created event via sqs.
@@ -89,472 +102,241 @@ Aws.eager_autoload!
89
102
  #
90
103
  class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
91
104
  include LogStash::PluginMixins::AwsConfig::V2
92
-
93
- BACKOFF_SLEEP_TIME = 1
94
- BACKOFF_FACTOR = 2
95
- MAX_TIME_BEFORE_GIVING_UP = 60
96
- EVENT_SOURCE = 'aws:s3'
97
- EVENT_TYPE = 'ObjectCreated'
105
+ include LogProcessor
98
106
 
99
107
  config_name "s3snssqs"
100
108
 
101
109
  default :codec, "plain"
102
110
 
111
+
112
+
113
+ # Future config might look somewhat like this:
114
+ #
115
+ # s3_options_by_bucket = [
116
+ # {
117
+ # "bucket_name": "my-beautiful-bucket",
118
+ # "credentials": { "role": "aws:role:arn:for:bucket:access" },
119
+ # "folders": [
120
+ # {
121
+ # "key": "my_folder",
122
+ # "codec": "json"
123
+ # "type": "my_lovely_index"
124
+ # },
125
+ # {
126
+ # "key": "my_other_folder",
127
+ # "codec": "json_stream"
128
+ # "type": ""
129
+ # }
130
+ # ]
131
+ # },
132
+ # {
133
+ # "bucket_name": "my-other-bucket"
134
+ # "credentials": {
135
+ # "access_key_id": "some-id",
136
+ # "secret_access_key": "some-secret-key"
137
+ # },
138
+ # "folders": [
139
+ # {
140
+ # "key": ""
141
+ # }
142
+ # ]
143
+ # }
144
+ # }
145
+
146
+ config :s3_key_prefix, :validate => :string, :default => '', :deprecated => true #, :obsolete => " Will be moved to s3_options_by_bucket/types"
147
+
148
+ config :s3_access_key_id, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
149
+ config :s3_secret_access_key, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
150
+ config :s3_role_arn, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
151
+
152
+ config :set_codec_by_folder, :validate => :hash, :default => {}, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
153
+
154
+ # Default Options for the S3 clients
155
+ config :s3_default_options, :validate => :hash, :required => false, :default => {}
156
+ # We need a list of buckets, together with role arns and possible folder/codecs:
157
+ config :s3_options_by_bucket, :validate => :array, :required => false # TODO: true
158
+ # Session name to use when assuming an IAM role
159
+ config :s3_role_session_name, :validate => :string, :default => "logstash"
160
+
161
+ ### sqs
103
162
  # Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
104
163
  config :queue, :validate => :string, :required => true
105
- config :s3_key_prefix, :validate => :string, :default => ''
106
- #Sometimes you need another key for s3. This is a first test...
107
- config :s3_access_key_id, :validate => :string
108
- config :s3_secret_access_key, :validate => :string
109
164
  config :queue_owner_aws_account_id, :validate => :string, :required => false
110
- #If you have different file-types in you s3 bucket, you could define codec by folder
111
- #set_codec_by_folder => {"My-ELB-logs" => "plain"}
112
- config :set_codec_by_folder, :validate => :hash, :default => {}
113
- config :delete_on_success, :validate => :boolean, :default => false
114
- config :sqs_explicit_delete, :validate => :boolean, :default => false
115
165
  # Whether the event is processed though an SNS to SQS. (S3>SNS>SQS = true |S3>SQS=false)
116
166
  config :from_sns, :validate => :boolean, :default => true
117
- # To run in multiple threads use this
118
- config :consumer_threads, :validate => :number
119
- config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
120
- # The AWS IAM Role to assume, if any.
121
- # This is used to generate temporary credentials typically for cross-account access.
122
- # See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information.
123
- config :s3_role_arn, :validate => :string
124
- # Session name to use when assuming an IAM role
125
- config :s3_role_session_name, :validate => :string, :default => "logstash"
167
+ config :sqs_skip_delete, :validate => :boolean, :default => false
168
+ config :delete_on_success, :validate => :boolean, :default => false
126
169
  config :visibility_timeout, :validate => :number, :default => 600
127
170
 
171
+ ### system
172
+ config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
173
+ # To run in multiple threads use this
174
+ config :consumer_threads, :validate => :number, :default => 1
128
175
 
129
- attr_reader :poller
130
- attr_reader :s3
131
-
132
-
133
- def set_codec (folder)
134
- begin
135
- @logger.debug("Automatically switching from #{@codec.class.config_name} to #{set_codec_by_folder[folder]} codec", :plugin => self.class.config_name)
136
- LogStash::Plugin.lookup("codec", "#{set_codec_by_folder[folder]}").new("charset" => @codec.charset)
137
- rescue Exception => e
138
- @logger.error("Failed to set_codec with error", :error => e)
139
- end
140
- end
141
176
 
142
177
  public
143
- def register
144
- require "fileutils"
145
- require "digest/md5"
146
- require "aws-sdk-resources"
147
178
 
148
- @runner_threads = []
149
- #make this hash keys lookups match like regex
150
- hash_key_is_regex(set_codec_by_folder)
151
- @logger.info("Registering SQS input", :queue => @queue)
152
- setup_queue
179
+ # --- BEGIN plugin interface ----------------------------------------#
153
180
 
181
+ # initialisation
182
+ def register
183
+ # prepare system
154
184
  FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
155
- end
156
-
157
- def setup_queue
158
- aws_sqs_client = Aws::SQS::Client.new(aws_options_hash)
159
- queue_url = aws_sqs_client.get_queue_url({ queue_name: @queue, queue_owner_aws_account_id: @queue_owner_aws_account_id})[:queue_url]
160
- @poller = Aws::SQS::QueuePoller.new(queue_url, :client => aws_sqs_client)
161
- get_s3client
162
- @s3_resource = get_s3object
163
- rescue Aws::SQS::Errors::ServiceError => e
164
- @logger.error("Cannot establish connection to Amazon SQS", :error => e)
165
- raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
166
- end
167
-
168
- def polling_options
169
- {
170
- # we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
171
- # (we will throw :skip_delete if download size isn't correct to process the event again later
172
- # -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
173
- :max_number_of_messages => 1,
174
- # we will use the queue's setting, a good value is 10 seconds
175
- # (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
176
- :skip_delete => false,
177
- :visibility_timeout => @visibility_timeout,
178
- :wait_time_seconds => nil,
179
- }
180
- end
181
185
 
182
- def handle_message(message, queue, instance_codec)
183
- hash = JSON.parse message.body
184
- @logger.debug("handle_message", :hash => hash, :message => message)
185
- #If send via sns there is an additional JSON layer
186
- if @from_sns then
187
- hash = JSON.parse(hash['Message'])
188
- end
189
- # there may be test events sent from the s3 bucket which won't contain a Records array,
190
- # we will skip those events and remove them from queue
191
- if hash['Records'] then
192
- # typically there will be only 1 record per event, but since it is an array we will
193
- # treat it as if there could be more records
194
- hash['Records'].each do |record|
195
- @logger.debug("We found a record", :record => record)
196
- # in case there are any events with Records that aren't s3 object-created events and can't therefore be
197
- # processed by this plugin, we will skip them and remove them from queue
198
- if record['eventSource'] == EVENT_SOURCE and record['eventName'].start_with?(EVENT_TYPE) then
199
- @logger.debug("It is a valid record")
200
- bucket = CGI.unescape(record['s3']['bucket']['name'])
201
- key = CGI.unescape(record['s3']['object']['key'])
202
- size = record['s3']['object']['size']
203
- type_folder = get_object_folder(key)
204
- # Set input codec by :set_codec_by_folder
205
- instance_codec = set_codec(type_folder) unless set_codec_by_folder["#{type_folder}"].nil?
206
- # try download and :skip_delete if it fails
207
- #if record['s3']['object']['size'] < 10000000 then
208
- process_log(bucket, key, type_folder, instance_codec, queue, message, size)
209
- #else
210
- # @logger.info("Your file is too big")
211
- #end
186
+ @credentials_by_bucket = hash_key_is_regex({})
187
+ # create the bucket=>folder=>codec lookup from config options
188
+ @codec_by_folder = hash_key_is_regex({})
189
+ @type_by_folder = hash_key_is_regex({})
190
+
191
+ # use deprecated settings only if new config is missing:
192
+ if @s3_options_by_bucket.nil?
193
+ # We don't know any bucket name, so we must rely on a "catch-all" regex
194
+ s3_options = {
195
+ 'bucket_name' => '.*',
196
+ 'folders' => @set_codec_by_folder.map { |key, codec|
197
+ { 'key' => key, 'codec' => codec }
198
+ }
199
+ }
200
+ if @s3_role_arn.nil?
201
+ # access key/secret key pair needed
202
+ unless @s3_access_key_id.nil? or @s3_secret_access_key.nil?
203
+ s3_options['credentials'] = {
204
+ 'access_key_id' => @s3_access_key_id,
205
+ 'secret_access_key' => @s3_secret_access_key
206
+ }
212
207
  end
208
+ else
209
+ s3_options['credentials'] = {
210
+ 'role' => @s3_role_arn
211
+ }
213
212
  end
213
+ @s3_options_by_bucket = [s3_options]
214
214
  end
215
- end
216
-
217
- private
218
- def process_log(bucket , key, folder, instance_codec, queue, message, size)
219
- s3bucket = @s3_resource.bucket(bucket)
220
- @logger.debug("Lets go reading file", :bucket => bucket, :key => key)
221
- object = s3bucket.object(key)
222
- filename = File.join(temporary_directory, File.basename(key))
223
- if download_remote_file(object, filename)
224
- if process_local_log( filename, key, folder, instance_codec, queue, bucket, message, size)
225
- begin
226
- FileUtils.remove_entry_secure(filename, true) if File.exists? filename
227
- delete_file_from_bucket(object)
228
- rescue Exception => e
229
- @logger.debug("We had problems to delete your file", :file => filename, :error => e)
230
- end
231
- end
232
- else
233
- begin
234
- FileUtils.remove_entry_secure(filename, true) if File.exists? filename
235
- rescue Exception => e
236
- @logger.debug("We had problems clean up your tmp dir", :file => filename, :error => e)
237
- end
238
- end
239
- end
240
-
241
- private
242
- # Stream the remove file to the local disk
243
- #
244
- # @param [S3Object] Reference to the remove S3 objec to download
245
- # @param [String] The Temporary filename to stream to.
246
- # @return [Boolean] True if the file was completely downloaded
247
- def download_remote_file(remote_object, local_filename)
248
- completed = false
249
- @logger.debug("S3 input: Download remote file", :remote_key => remote_object.key, :local_filename => local_filename)
250
- File.open(local_filename, 'wb') do |s3file|
251
- return completed if stop?
252
- begin
253
- remote_object.get(:response_target => s3file)
254
- rescue Aws::S3::Errors::ServiceError => e
255
- @logger.error("Unable to download file. We´ll requeue the message", :file => remote_object.inspect)
256
- throw :skip_delete
257
- end
258
- end
259
- completed = true
260
215
 
261
- return completed
262
- end
263
-
264
- private
265
-
266
- # Read the content of the local file
267
- #
268
- # @param [Queue] Where to push the event
269
- # @param [String] Which file to read from
270
- # @return [Boolean] True if the file was completely read, false otherwise.
271
- def process_local_log(filename, key, folder, instance_codec, queue, bucket, message, size)
272
- @logger.debug('Processing file', :filename => filename)
273
- metadata = {}
274
- start_time = Time.now
275
- # Currently codecs operates on bytes instead of stream.
276
- # So all IO stuff: decompression, reading need to be done in the actual
277
- # input and send as bytes to the codecs.
278
- read_file(filename) do |line|
279
- if (Time.now - start_time) >= (@visibility_timeout.to_f / 100.0 * 90.to_f)
280
- @logger.info("Increasing the visibility_timeout ... ", :timeout => @visibility_timeout, :filename => filename, :filesize => size, :start => start_time )
281
- poller.change_message_visibility_timeout(message, @visibility_timeout)
282
- start_time = Time.now
216
+ @s3_options_by_bucket.each do |options|
217
+ bucket = options['bucket_name']
218
+ if options.key?('credentials')
219
+ @credentials_by_bucket[bucket] = options['credentials']
283
220
  end
284
- if stop?
285
- @logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
286
- return false
287
- end
288
- line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370")
289
- #@logger.debug("read line", :line => line)
290
- instance_codec.decode(line) do |event|
291
- @logger.debug("decorate event")
292
- # We are making an assumption concerning cloudfront
293
- # log format, the user will use the plain or the line codec
294
- # and the message key will represent the actual line content.
295
- # If the event is only metadata the event will be drop.
296
- # This was the behavior of the pre 1.5 plugin.
297
- #
298
- # The line need to go through the codecs to replace
299
- # unknown bytes in the log stream before doing a regexp match or
300
- # you will get a `Error: invalid byte sequence in UTF-8'
301
- local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
221
+ if options.key?('folders')
222
+ # make these hashes do key lookups using regex matching
223
+ folders = hash_key_is_regex({})
224
+ types = hash_key_is_regex({})
225
+ options['folders'].each do |entry|
226
+ @logger.debug("options for folder ", :folder => entry)
227
+ folders[entry['key']] = entry['codec'] if entry.key?('codec')
228
+ types[entry['key']] = entry['type'] if entry.key?('type')
229
+ end
230
+ @codec_by_folder[bucket] = folders unless folders.empty?
231
+ @type_by_folder[bucket] = types unless types.empty?
302
232
  end
303
233
  end
304
- @logger.debug("end if file #{filename}")
305
- #@logger.info("event pre flush", :event => event)
306
- # #ensure any stateful codecs (such as multi-line ) are flushed to the queue
307
- instance_codec.flush do |event|
308
- local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
309
- @logger.debug("We´e to flush an incomplete event...", :event => event)
310
- end
311
-
312
- return true
313
- end # def process_local_log
314
-
315
- private
316
- def local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
317
- @logger.debug('decorating event', :event => event.to_s)
318
- if event_is_metadata?(event)
319
- @logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
320
- update_metadata(metadata, event)
321
- else
322
-
323
- decorate(event)
324
-
325
- event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
326
- event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
327
-
328
- event.set("[@metadata][s3][object_key]", key)
329
- event.set("[@metadata][s3][bucket_name]", bucket)
330
- event.set("[@metadata][s3][object_folder]", folder)
331
- @logger.debug('add metadata', :object_key => key, :bucket => bucket, :folder => folder)
332
- queue << event
333
- end
334
- end
335
-
336
-
337
- private
338
- def get_object_folder(key)
339
- if match=/#{s3_key_prefix}\/?(?<type_folder>.*?)\/.*/.match(key)
340
- return match['type_folder']
341
- else
342
- return ""
343
- end
344
- end
345
-
346
- private
347
- def read_file(filename, &block)
348
- if gzip?(filename)
349
- read_gzip_file(filename, block)
350
- else
351
- read_plain_file(filename, block)
352
- end
353
- end
354
-
355
- def read_plain_file(filename, block)
356
- File.open(filename, 'rb') do |file|
357
- file.each(&block)
358
- end
359
- end
360
-
361
- private
362
- def read_gzip_file(filename, block)
363
- file_stream = FileInputStream.new(filename)
364
- gzip_stream = GZIPInputStream.new(file_stream)
365
- decoder = InputStreamReader.new(gzip_stream, "UTF-8")
366
- buffered = BufferedReader.new(decoder)
367
-
368
- while (line = buffered.readLine())
369
- block.call(line)
370
- end
371
- rescue ZipException => e
372
- @logger.error("Gzip codec: We cannot uncompress the gzip file", :filename => filename, :error => e)
373
- ensure
374
- buffered.close unless buffered.nil?
375
- decoder.close unless decoder.nil?
376
- gzip_stream.close unless gzip_stream.nil?
377
- file_stream.close unless file_stream.nil?
378
- end
379
-
380
- private
381
- def gzip?(filename)
382
- return true if filename.end_with?('.gz','.gzip')
383
- MagicGzipValidator.new(File.new(filename, 'r')).valid?
384
- rescue Exception => e
385
- @logger.debug("Problem while gzip detection", :error => e)
386
- end
387
-
388
- private
389
- def delete_file_from_bucket(object)
390
- if @delete_on_success
391
- object.delete()
392
- end
393
- end
394
-
395
-
396
- private
397
- def get_s3client
398
- if s3_access_key_id and s3_secret_access_key
399
- @logger.debug("Using S3 Credentials from config", :ID => aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key) )
400
- @s3_client = Aws::S3::Client.new(aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key))
401
- elsif @s3_role_arn
402
- @s3_client = Aws::S3::Client.new(aws_options_hash.merge!({ :credentials => s3_assume_role }))
403
- @logger.debug("Using S3 Credentials from role", :s3client => @s3_client.inspect, :options => aws_options_hash.merge!({ :credentials => s3_assume_role }))
404
- else
405
- @s3_client = Aws::S3::Client.new(aws_options_hash)
406
- end
407
- end
408
-
409
- private
410
- def get_s3object
411
- s3 = Aws::S3::Resource.new(client: @s3_client)
412
- end
413
-
414
- private
415
- def s3_assume_role()
416
- Aws::AssumeRoleCredentials.new(
417
- client: Aws::STS::Client.new(region: @region),
418
- role_arn: @s3_role_arn,
419
- role_session_name: @s3_role_session_name
420
- )
421
- end
422
-
423
- private
424
- def event_is_metadata?(event)
425
- return false unless event.get("message").class == String
426
- line = event.get("message")
427
- version_metadata?(line) || fields_metadata?(line)
428
- end
429
-
430
- private
431
- def version_metadata?(line)
432
- line.start_with?('#Version: ')
433
- end
434
-
435
- private
436
- def fields_metadata?(line)
437
- line.start_with?('#Fields: ')
438
- end
439
-
440
- private
441
- def update_metadata(metadata, event)
442
- line = event.get('message').strip
443
-
444
- if version_metadata?(line)
445
- metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
446
- end
447
234
 
448
- if fields_metadata?(line)
449
- metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
450
- end
235
+ @received_stop = Concurrent::AtomicBoolean.new(false)
236
+
237
+ # instantiate helpers
238
+ @sqs_poller = SqsPoller.new(@logger, @received_stop, @queue, {
239
+ queue_owner_aws_account_id: @queue_owner_aws_account_id,
240
+ from_sns: @from_sns,
241
+ sqs_explicit_delete: @sqs_explicit_delete,
242
+ visibility_timeout: @visibility_timeout
243
+ }, aws_options_hash)
244
+ @s3_client_factory = S3ClientFactory.new(@logger, {
245
+ aws_region: @region,
246
+ s3_default_options: @s3_default_options,
247
+ s3_credentials_by_bucket: @credentials_by_bucket,
248
+ s3_role_session_name: @s3_role_session_name
249
+ }, aws_options_hash)
250
+ @s3_downloader = S3Downloader.new(@logger, @received_stop, {
251
+ s3_client_factory: @s3_client_factory,
252
+ delete_on_success: @delete_on_success
253
+ })
254
+ @codec_factory = CodecFactory.new(@logger, {
255
+ default_codec: @codec,
256
+ codec_by_folder: @codec_by_folder
257
+ })
258
+ #@log_processor = LogProcessor.new(self)
259
+
260
+ # administrative stuff
261
+ @worker_threads = []
451
262
  end
452
263
 
453
- public
454
- def run(queue)
455
- if @consumer_threads
456
- # ensure we can stop logstash correctly
457
- @runner_threads = consumer_threads.times.map { |consumer| thread_runner(queue) }
458
- @runner_threads.each { |t| t.join }
459
- else
460
- #Fallback to simple single thread worker
461
- # ensure we can stop logstash correctly
462
- poller.before_request do |stats|
463
- if stop? then
464
- @logger.warn("issuing :stop_polling on stop?", :queue => @queue)
465
- # this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
466
- throw :stop_polling
467
- end
468
- end
469
- # poll a message and process it
470
- run_with_backoff do
471
- poller.poll(polling_options) do |message|
472
- begin
473
- handle_message(message, queue, @codec.clone)
474
- poller.delete_message(message)
475
- rescue Exception => e
476
- @logger.info("Error in poller block ... ", :error => e)
477
- end
478
- end
479
- end
264
+ # startup
265
+ def run(logstash_event_queue)
266
+ #LogStash::ShutdownWatcher.abort_threshold(30)
267
+ # start them
268
+ @worker_threads = @consumer_threads.times.map do |_|
269
+ run_worker_thread(logstash_event_queue)
480
270
  end
271
+ # and wait (possibly infinitely) for them to shut down
272
+ @worker_threads.each { |t| t.join }
481
273
  end
482
274
 
483
- public
275
+ # shutdown
484
276
  def stop
485
- if @consumer_threads
486
- @runner_threads.each do |c|
487
- begin
488
- @logger.info("Stopping thread ... ", :thread => c.inspect)
489
- c.wakeup
490
- rescue
491
- @logger.error("Cannot stop thread ... try to kill him", :thread => c.inspect)
492
- c.kill
493
- end
277
+ @received_stop.make_true
278
+ @worker_threads.each do |worker|
279
+ begin
280
+ @logger.info("Stopping thread ... ", :thread => worker.inspect)
281
+ worker.wakeup
282
+ rescue
283
+ @logger.error("Cannot stop thread ... try to kill him", :thread => worker.inspect)
284
+ worker.kill
494
285
  end
495
- else
496
- @logger.warn("Stopping all threads?", :queue => @queue)
497
286
  end
498
287
  end
499
288
 
289
+ # --- END plugin interface ------------------------------------------#
290
+
500
291
  private
501
- def thread_runner(queue)
292
+
293
+ def run_worker_thread(queue)
502
294
  Thread.new do
503
- @logger.info("Starting new thread")
504
- begin
505
- poller.before_request do |stats|
506
- if stop? then
507
- @logger.warn("issuing :stop_polling on stop?", :queue => @queue)
508
- # this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
509
- throw :stop_polling
510
- end
511
- end
512
- # poll a message and process it
513
- run_with_backoff do
514
- poller.poll(polling_options) do |message|
515
- begin
516
- handle_message(message, queue, @codec.clone)
517
- poller.delete_message(message) if @sqs_explicit_delete
518
- rescue Exception => e
519
- @logger.info("Error in poller block ... ", :error => e)
520
- end
295
+ @logger.info("Starting new worker thread")
296
+ @sqs_poller.run do |record|
297
+ throw :skip_delete if stop?
298
+ @logger.debug("Outside Poller: got a record", :record => record)
299
+ # record is a valid object with the keys ":bucket", ":key", ":size"
300
+ record[:local_file] = File.join(@temporary_directory, File.basename(record[:key]))
301
+ if @s3_downloader.copy_s3object_to_disk(record)
302
+ completed = catch(:skip_delete) do
303
+ process(record, queue)
521
304
  end
305
+ @s3_downloader.cleanup_local_object(record)
306
+ # re-throw if necessary:
307
+ throw :skip_delete unless completed
308
+ @s3_downloader.cleanup_s3object(record)
522
309
  end
523
310
  end
524
311
  end
525
312
  end
526
313
 
527
- private
528
- # Runs an AWS request inside a Ruby block with an exponential backoff in case
529
- # we experience a ServiceError.
530
- #
531
- # @param [Integer] max_time maximum amount of time to sleep before giving up.
532
- # @param [Integer] sleep_time the initial amount of time to sleep before retrying.
533
- # @param [Block] block Ruby code block to execute.
534
- def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME, &block)
535
- next_sleep = sleep_time
536
- begin
537
- block.call
538
- next_sleep = sleep_time
539
- rescue Aws::SQS::Errors::ServiceError => e
540
- @logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
541
- sleep(next_sleep)
542
- next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
543
- retry
314
+ # Will be remove in further releases...
315
+ def get_object_folder(key)
316
+ if match=/#{s3_key_prefix}\/?(?<type_folder>.*?)\/.*/.match(key)
317
+ return match['type_folder']
318
+ else
319
+ return ""
544
320
  end
545
321
  end
546
322
 
547
- private
548
323
  def hash_key_is_regex(myhash)
549
324
  myhash.default_proc = lambda do |hash, lookup|
550
325
  result=nil
551
326
  hash.each_pair do |key, value|
552
327
  if %r[#{key}] =~ lookup
553
- result=value
328
+ result = value
554
329
  break
555
330
  end
556
331
  end
557
332
  result
558
333
  end
334
+ # return input hash (convenience)
335
+ return myhash
336
+ end
337
+
338
+ def stop?
339
+ @received_stop.value
559
340
  end
341
+
560
342
  end # class