logstash-integration-aws 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/docs/codec-cloudfront.asciidoc +53 -0
  10. data/docs/codec-cloudtrail.asciidoc +45 -0
  11. data/docs/index.asciidoc +38 -0
  12. data/docs/input-cloudwatch.asciidoc +320 -0
  13. data/docs/input-s3.asciidoc +346 -0
  14. data/docs/input-sqs.asciidoc +287 -0
  15. data/docs/output-cloudwatch.asciidoc +321 -0
  16. data/docs/output-s3.asciidoc +442 -0
  17. data/docs/output-sns.asciidoc +166 -0
  18. data/docs/output-sqs.asciidoc +242 -0
  19. data/lib/logstash/codecs/cloudfront.rb +84 -0
  20. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  21. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  22. data/lib/logstash/inputs/s3.rb +466 -0
  23. data/lib/logstash/inputs/sqs.rb +196 -0
  24. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  25. data/lib/logstash/outputs/s3/file_repository.rb +121 -0
  26. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  27. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  28. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  29. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  30. data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
  31. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  32. data/lib/logstash/outputs/s3/uploader.rb +74 -0
  33. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  34. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  35. data/lib/logstash/outputs/s3.rb +405 -0
  36. data/lib/logstash/outputs/sns.rb +133 -0
  37. data/lib/logstash/outputs/sqs.rb +167 -0
  38. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  39. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  40. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  41. data/logstash-integration-aws.gemspec +52 -0
  42. data/spec/codecs/cloudfront_spec.rb +92 -0
  43. data/spec/codecs/cloudtrail_spec.rb +56 -0
  44. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  45. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  46. data/spec/fixtures/cloudfront.log +4 -0
  47. data/spec/fixtures/compressed.log.gee.zip +0 -0
  48. data/spec/fixtures/compressed.log.gz +0 -0
  49. data/spec/fixtures/compressed.log.gzip +0 -0
  50. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  51. data/spec/fixtures/json.log +2 -0
  52. data/spec/fixtures/json_with_message.log +2 -0
  53. data/spec/fixtures/multiline.log +6 -0
  54. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  55. data/spec/fixtures/uncompressed.log +2 -0
  56. data/spec/inputs/cloudwatch_spec.rb +85 -0
  57. data/spec/inputs/s3_spec.rb +610 -0
  58. data/spec/inputs/sincedb_spec.rb +17 -0
  59. data/spec/inputs/sqs_spec.rb +324 -0
  60. data/spec/integration/cloudwatch_spec.rb +25 -0
  61. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  62. data/spec/integration/gzip_file_spec.rb +62 -0
  63. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  64. data/spec/integration/outputs/sqs_spec.rb +98 -0
  65. data/spec/integration/restore_from_crash_spec.rb +67 -0
  66. data/spec/integration/s3_spec.rb +66 -0
  67. data/spec/integration/size_rotation_spec.rb +59 -0
  68. data/spec/integration/sqs_spec.rb +110 -0
  69. data/spec/integration/stress_test_spec.rb +60 -0
  70. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  71. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  72. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  73. data/spec/outputs/cloudwatch_spec.rb +38 -0
  74. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  75. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  76. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  77. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  78. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  79. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  80. data/spec/outputs/s3/uploader_spec.rb +69 -0
  81. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  82. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  83. data/spec/outputs/s3_spec.rb +232 -0
  84. data/spec/outputs/sns_spec.rb +160 -0
  85. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  86. data/spec/spec_helper.rb +8 -0
  87. data/spec/support/helpers.rb +119 -0
  88. data/spec/unit/outputs/sqs_spec.rb +247 -0
  89. metadata +467 -0
@@ -0,0 +1,466 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/plugin_mixins/aws_config"
5
+ require "time"
6
+ require "date"
7
+ require "tmpdir"
8
+ require "stud/interval"
9
+ require "stud/temporary"
10
+ require "aws-sdk-s3"
11
+ require "logstash/plugin_mixins/ecs_compatibility_support"
12
+
13
+ require 'java'
14
+
15
+ # Stream events from files from a S3 bucket.
16
+ #
17
+ # Each line from each file generates an event.
18
+ # Files ending in `.gz` are handled as gzip'ed files.
19
+ class LogStash::Inputs::S3 < LogStash::Inputs::Base
20
+
21
+ java_import java.io.InputStream
22
+ java_import java.io.InputStreamReader
23
+ java_import java.io.FileInputStream
24
+ java_import java.io.BufferedReader
25
+ java_import java.util.zip.GZIPInputStream
26
+ java_import java.util.zip.ZipException
27
+
28
+ include LogStash::PluginMixins::AwsConfig::V2
29
+ include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
30
+
31
+ config_name "s3"
32
+
33
+ default :codec, "plain"
34
+
35
+ # The name of the S3 bucket.
36
+ config :bucket, :validate => :string, :required => true
37
+
38
+ # If specified, the prefix of filenames in the bucket must match (not a regexp)
39
+ config :prefix, :validate => :string, :default => nil
40
+
41
+ config :additional_settings, :validate => :hash, :default => {}
42
+
43
+ # The path to use for writing state. The state stored by this plugin is
44
+ # a memory of files already processed by this plugin.
45
+ #
46
+ # If not specified, the default is in `{path.data}/plugins/inputs/s3/...`
47
+ #
48
+ # Should be a path with filename not just a directory.
49
+ config :sincedb_path, :validate => :string, :default => nil
50
+
51
+ # Name of a S3 bucket to backup processed files to.
52
+ config :backup_to_bucket, :validate => :string, :default => nil
53
+
54
+ # Append a prefix to the key (full path including file name in s3) after processing.
55
+ # If backing up to another (or the same) bucket, this effectively lets you
56
+ # choose a new 'folder' to place the files in
57
+ config :backup_add_prefix, :validate => :string, :default => nil
58
+
59
+ # Path of a local directory to backup processed files to.
60
+ config :backup_to_dir, :validate => :string, :default => nil
61
+
62
+ # Whether to delete processed files from the original bucket.
63
+ config :delete, :validate => :boolean, :default => false
64
+
65
+ # Interval to wait between to check the file list again after a run is finished.
66
+ # Value is in seconds.
67
+ config :interval, :validate => :number, :default => 60
68
+
69
+ # Whether to watch for new files with the interval.
70
+ # If false, overrides any interval and only lists the s3 bucket once.
71
+ config :watch_for_new_files, :validate => :boolean, :default => true
72
+
73
+ # Ruby style regexp of keys to exclude from the bucket
74
+ config :exclude_pattern, :validate => :string, :default => nil
75
+
76
+ # Set the directory where logstash will store the tmp files before processing them.
77
+ # default to the current OS temporary directory in linux /tmp/logstash
78
+ config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
79
+
80
+ # Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
81
+ # into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
82
+ # be present.
83
+ config :include_object_properties, :validate => :boolean, :default => false
84
+
85
+ # Regular expression used to determine whether an input file is in gzip format.
86
+ # default to an expression that matches *.gz and *.gzip file extensions
87
+ config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
88
+
89
+ CUTOFF_SECOND = 3
90
+
91
+ def initialize(*params)
92
+ super
93
+ @cloudfront_fields_key = ecs_select[disabled: 'cloudfront_fields', v1: '[@metadata][s3][cloudfront][fields]']
94
+ @cloudfront_version_key = ecs_select[disabled: 'cloudfront_version', v1: '[@metadata][s3][cloudfront][version]']
95
+ end
96
+
97
+ def register
98
+ require "fileutils"
99
+ require "digest/md5"
100
+
101
+ @logger.info("Registering", :bucket => @bucket, :region => @region)
102
+
103
+ s3 = get_s3object
104
+
105
+ @s3bucket = s3.bucket(@bucket)
106
+
107
+ unless @backup_to_bucket.nil?
108
+ @backup_bucket = s3.bucket(@backup_to_bucket)
109
+ begin
110
+ s3.client.head_bucket({ :bucket => @backup_to_bucket})
111
+ rescue Aws::S3::Errors::NoSuchBucket
112
+ s3.create_bucket({ :bucket => @backup_to_bucket})
113
+ end
114
+ end
115
+
116
+ unless @backup_to_dir.nil?
117
+ Dir.mkdir(@backup_to_dir, 0700) unless File.exists?(@backup_to_dir)
118
+ end
119
+
120
+ FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
121
+
122
+ if !@watch_for_new_files && original_params.include?('interval')
123
+ logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
124
+ end
125
+ end
126
+
127
+ def run(queue)
128
+ @current_thread = Thread.current
129
+ Stud.interval(@interval) do
130
+ process_files(queue)
131
+ stop unless @watch_for_new_files
132
+ end
133
+ end # def run
134
+
135
+ def list_new_files
136
+ objects = []
137
+ found = false
138
+ current_time = Time.now
139
+ sincedb_time = sincedb.read
140
+ begin
141
+ @s3bucket.objects(:prefix => @prefix).each do |log|
142
+ found = true
143
+ @logger.debug('Found key', :key => log.key)
144
+ if ignore_filename?(log.key)
145
+ @logger.debug('Ignoring', :key => log.key)
146
+ elsif log.content_length <= 0
147
+ @logger.debug('Object Zero Length', :key => log.key)
148
+ elsif log.last_modified <= sincedb_time
149
+ @logger.debug('Object Not Modified', :key => log.key)
150
+ elsif log.last_modified > (current_time - CUTOFF_SECOND).utc # file modified within last two seconds will be processed in next cycle
151
+ @logger.debug('Object Modified After Cutoff Time', :key => log.key)
152
+ elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
153
+ @logger.debug('Object Archived to Glacier', :key => log.key)
154
+ else
155
+ objects << log
156
+ @logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
157
+ end
158
+ end
159
+ @logger.info('No files found in bucket', :prefix => prefix) unless found
160
+ rescue Aws::Errors::ServiceError => e
161
+ @logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
162
+ end
163
+ objects.sort_by { |log| log.last_modified }
164
+ end # def fetch_new_files
165
+
166
+ def backup_to_bucket(object)
167
+ unless @backup_to_bucket.nil?
168
+ backup_key = "#{@backup_add_prefix}#{object.key}"
169
+ @backup_bucket.object(backup_key).copy_from(:copy_source => "#{object.bucket_name}/#{object.key}")
170
+ if @delete
171
+ object.delete()
172
+ end
173
+ end
174
+ end
175
+
176
+ def backup_to_dir(filename)
177
+ unless @backup_to_dir.nil?
178
+ FileUtils.cp(filename, @backup_to_dir)
179
+ end
180
+ end
181
+
182
+ def process_files(queue)
183
+ objects = list_new_files
184
+
185
+ objects.each do |log|
186
+ if stop?
187
+ break
188
+ else
189
+ process_log(queue, log)
190
+ end
191
+ end
192
+ end # def process_files
193
+
194
+ def stop
195
+ # @current_thread is initialized in the `#run` method,
196
+ # this variable is needed because the `#stop` is a called in another thread
197
+ # than the `#run` method and requiring us to call stop! with a explicit thread.
198
+ Stud.stop!(@current_thread)
199
+ end
200
+
201
+ private
202
+
203
+ # Read the content of the local file
204
+ #
205
+ # @param [Queue] Where to push the event
206
+ # @param [String] Which file to read from
207
+ # @param [S3Object] Source s3 object
208
+ # @return [Boolean] True if the file was completely read, false otherwise.
209
+ def process_local_log(queue, filename, object)
210
+ @logger.debug('Processing file', :filename => filename)
211
+ metadata = {}
212
+ # Currently codecs operates on bytes instead of stream.
213
+ # So all IO stuff: decompression, reading need to be done in the actual
214
+ # input and send as bytes to the codecs.
215
+ read_file(filename) do |line|
216
+ if stop?
217
+ @logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
218
+ return false
219
+ end
220
+
221
+ @codec.decode(line) do |event|
222
+ # We are making an assumption concerning cloudfront
223
+ # log format, the user will use the plain or the line codec
224
+ # and the message key will represent the actual line content.
225
+ # If the event is only metadata the event will be drop.
226
+ # This was the behavior of the pre 1.5 plugin.
227
+ #
228
+ # The line need to go through the codecs to replace
229
+ # unknown bytes in the log stream before doing a regexp match or
230
+ # you will get a `Error: invalid byte sequence in UTF-8'
231
+ if event_is_metadata?(event)
232
+ @logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
233
+ update_metadata(metadata, event)
234
+ else
235
+ push_decoded_event(queue, metadata, object, event)
236
+ end
237
+ end
238
+ end
239
+ # #ensure any stateful codecs (such as multi-line ) are flushed to the queue
240
+ @codec.flush do |event|
241
+ push_decoded_event(queue, metadata, object, event)
242
+ end
243
+
244
+ return true
245
+ end # def process_local_log
246
+
247
+ def push_decoded_event(queue, metadata, object, event)
248
+ decorate(event)
249
+
250
+ if @include_object_properties
251
+ event.set("[@metadata][s3]", object.data.to_h)
252
+ else
253
+ event.set("[@metadata][s3]", {})
254
+ end
255
+
256
+ event.set("[@metadata][s3][key]", object.key)
257
+ event.set(@cloudfront_version_key, metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
258
+ event.set(@cloudfront_fields_key, metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
259
+
260
+ queue << event
261
+ end
262
+
263
+ def event_is_metadata?(event)
264
+ return false unless event.get("message").class == String
265
+ line = event.get("message")
266
+ version_metadata?(line) || fields_metadata?(line)
267
+ end
268
+
269
+ def version_metadata?(line)
270
+ line.start_with?('#Version: ')
271
+ end
272
+
273
+ def fields_metadata?(line)
274
+ line.start_with?('#Fields: ')
275
+ end
276
+
277
+ def update_metadata(metadata, event)
278
+ line = event.get('message').strip
279
+
280
+ if version_metadata?(line)
281
+ metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
282
+ end
283
+
284
+ if fields_metadata?(line)
285
+ metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
286
+ end
287
+ end
288
+
289
+ def read_file(filename, &block)
290
+ if gzip?(filename)
291
+ read_gzip_file(filename, block)
292
+ else
293
+ read_plain_file(filename, block)
294
+ end
295
+ rescue => e
296
+ # skip any broken file
297
+ @logger.error("Failed to read file, processing skipped", :exception => e.class, :message => e.message, :filename => filename)
298
+ end
299
+
300
+ def read_plain_file(filename, block)
301
+ File.open(filename, 'rb') do |file|
302
+ file.each(&block)
303
+ end
304
+ end
305
+
306
+ def read_gzip_file(filename, block)
307
+ file_stream = FileInputStream.new(filename)
308
+ gzip_stream = GZIPInputStream.new(file_stream)
309
+ decoder = InputStreamReader.new(gzip_stream, "UTF-8")
310
+ buffered = BufferedReader.new(decoder)
311
+
312
+ while (line = buffered.readLine())
313
+ block.call(line)
314
+ end
315
+ ensure
316
+ buffered.close unless buffered.nil?
317
+ decoder.close unless decoder.nil?
318
+ gzip_stream.close unless gzip_stream.nil?
319
+ file_stream.close unless file_stream.nil?
320
+ end
321
+
322
+ def gzip?(filename)
323
+ Regexp.new(@gzip_pattern).match(filename)
324
+ end
325
+
326
+ def sincedb
327
+ @sincedb ||= if @sincedb_path.nil?
328
+ @logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
329
+ SinceDB::File.new(sincedb_file)
330
+ else
331
+ @logger.info("Using the provided sincedb_path", :sincedb_path => @sincedb_path)
332
+ SinceDB::File.new(@sincedb_path)
333
+ end
334
+ end
335
+
336
+ def sincedb_file
337
+ digest = Digest::MD5.hexdigest("#{@bucket}+#{@prefix}")
338
+ dir = File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "s3")
339
+ FileUtils::mkdir_p(dir)
340
+ path = File.join(dir, "sincedb_#{digest}")
341
+
342
+ # Migrate old default sincedb path to new one.
343
+ if ENV["HOME"]
344
+ # This is the old file path including the old digest mechanism.
345
+ # It remains as a way to automatically upgrade users with the old default ($HOME)
346
+ # to the new default (path.data)
347
+ old = File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@bucket}+#{@prefix}"))
348
+ if File.exist?(old)
349
+ logger.info("Migrating old sincedb in $HOME to {path.data}")
350
+ FileUtils.mv(old, path)
351
+ end
352
+ end
353
+
354
+ path
355
+ end
356
+
357
+ def ignore_filename?(filename)
358
+ if @prefix == filename
359
+ return true
360
+ elsif filename.end_with?("/")
361
+ return true
362
+ elsif (@backup_add_prefix && @backup_to_bucket == @bucket && filename =~ /^#{backup_add_prefix}/)
363
+ return true
364
+ elsif @exclude_pattern.nil?
365
+ return false
366
+ elsif filename =~ Regexp.new(@exclude_pattern)
367
+ return true
368
+ else
369
+ return false
370
+ end
371
+ end
372
+
373
+ def process_log(queue, log)
374
+ @logger.debug("Processing", :bucket => @bucket, :key => log.key)
375
+ object = @s3bucket.object(log.key)
376
+
377
+ filename = File.join(temporary_directory, File.basename(log.key))
378
+ if download_remote_file(object, filename)
379
+ if process_local_log(queue, filename, object)
380
+ if object.last_modified == log.last_modified
381
+ backup_to_bucket(object)
382
+ backup_to_dir(filename)
383
+ delete_file_from_bucket(object)
384
+ FileUtils.remove_entry_secure(filename, true)
385
+ sincedb.write(log.last_modified)
386
+ else
387
+ @logger.info("#{log.key} is updated at #{object.last_modified} and will process in the next cycle")
388
+ end
389
+ end
390
+ else
391
+ FileUtils.remove_entry_secure(filename, true)
392
+ end
393
+ end
394
+
395
+ # Stream the remove file to the local disk
396
+ #
397
+ # @param [S3Object] Reference to the remove S3 objec to download
398
+ # @param [String] The Temporary filename to stream to.
399
+ # @return [Boolean] True if the file was completely downloaded
400
+ def download_remote_file(remote_object, local_filename)
401
+ completed = false
402
+ @logger.debug("Downloading remote file", :remote_key => remote_object.key, :local_filename => local_filename)
403
+ File.open(local_filename, 'wb') do |s3file|
404
+ return completed if stop?
405
+ begin
406
+ remote_object.get(:response_target => s3file)
407
+ completed = true
408
+ rescue Aws::Errors::ServiceError => e
409
+ @logger.warn("Unable to download remote file", :exception => e.class, :message => e.message, :remote_key => remote_object.key)
410
+ end
411
+ end
412
+ completed
413
+ end
414
+
415
+ def delete_file_from_bucket(object)
416
+ if @delete and @backup_to_bucket.nil?
417
+ object.delete()
418
+ end
419
+ end
420
+
421
+ def get_s3object
422
+ s3 = Aws::S3::Resource.new(aws_options_hash || {})
423
+ end
424
+
425
+ def file_restored?(object)
426
+ begin
427
+ restore = object.data.restore
428
+ if restore && restore.match(/ongoing-request\s?=\s?["']false["']/)
429
+ if restore = restore.match(/expiry-date\s?=\s?["'](.*?)["']/)
430
+ expiry_date = DateTime.parse(restore[1])
431
+ return true if DateTime.now < expiry_date # restored
432
+ else
433
+ @logger.debug("No expiry-date header for restore request: #{object.data.restore}")
434
+ return nil # no expiry-date found for ongoing request
435
+ end
436
+ end
437
+ rescue => e
438
+ @logger.debug("Could not determine Glacier restore status", :exception => e.class, :message => e.message)
439
+ end
440
+ return false
441
+ end
442
+
443
+ module SinceDB
444
+ class File
445
+ def initialize(file)
446
+ @sincedb_path = file
447
+ end
448
+
449
+ # @return [Time]
450
+ def read
451
+ if ::File.exists?(@sincedb_path)
452
+ content = ::File.read(@sincedb_path).chomp.strip
453
+ # If the file was created but we didn't have the time to write to it
454
+ return content.empty? ? Time.new(0) : Time.parse(content)
455
+ else
456
+ return Time.new(0)
457
+ end
458
+ end
459
+
460
+ def write(since = nil)
461
+ since = Time.now if since.nil?
462
+ ::File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
463
+ end
464
+ end
465
+ end
466
+ end # class LogStash::Inputs::S3
@@ -0,0 +1,196 @@
1
+ # encoding: utf-8
2
+ #
3
+ require "logstash/inputs/threadable"
4
+ require "logstash/namespace"
5
+ require "logstash/timestamp"
6
+ require "logstash/plugin_mixins/aws_config"
7
+ require "logstash/errors"
8
+
9
+ # Pull events from an Amazon Web Services Simple Queue Service (SQS) queue.
10
+ #
11
+ # SQS is a simple, scalable queue system that is part of the
12
+ # Amazon Web Services suite of tools.
13
+ #
14
+ # Although SQS is similar to other queuing systems like AMQP, it
15
+ # uses a custom API and requires that you have an AWS account.
16
+ # See http://aws.amazon.com/sqs/ for more details on how SQS works,
17
+ # what the pricing schedule looks like and how to setup a queue.
18
+ #
19
+ # To use this plugin, you *must*:
20
+ #
21
+ # * Have an AWS account
22
+ # * Setup an SQS queue
23
+ # * Create an identify that has access to consume messages from the queue.
24
+ #
25
+ # The "consumer" identity must have the following permissions on the queue:
26
+ #
27
+ # * `sqs:ChangeMessageVisibility`
28
+ # * `sqs:ChangeMessageVisibilityBatch`
29
+ # * `sqs:DeleteMessage`
30
+ # * `sqs:DeleteMessageBatch`
31
+ # * `sqs:GetQueueAttributes`
32
+ # * `sqs:GetQueueUrl`
33
+ # * `sqs:ListQueues`
34
+ # * `sqs:ReceiveMessage`
35
+ #
36
+ # Typically, you should setup an IAM policy, create a user and apply the IAM policy to the user.
37
+ # A sample policy is as follows:
38
+ # [source,json]
39
+ # {
40
+ # "Statement": [
41
+ # {
42
+ # "Action": [
43
+ # "sqs:ChangeMessageVisibility",
44
+ # "sqs:ChangeMessageVisibilityBatch",
45
+ # "sqs:GetQueueAttributes",
46
+ # "sqs:GetQueueUrl",
47
+ # "sqs:ListQueues",
48
+ # "sqs:SendMessage",
49
+ # "sqs:SendMessageBatch"
50
+ # ],
51
+ # "Effect": "Allow",
52
+ # "Resource": [
53
+ # "arn:aws:sqs:us-east-1:123456789012:Logstash"
54
+ # ]
55
+ # }
56
+ # ]
57
+ # }
58
+ #
59
+ # See http://aws.amazon.com/iam/ for more details on setting up AWS identities.
60
+ #
61
+ class LogStash::Inputs::SQS < LogStash::Inputs::Threadable
62
+ include LogStash::PluginMixins::AwsConfig::V2
63
+
64
+ MAX_TIME_BEFORE_GIVING_UP = 60
65
+ MAX_MESSAGES_TO_FETCH = 10 # Between 1-10 in the AWS-SDK doc
66
+ SENT_TIMESTAMP = "SentTimestamp"
67
+ SQS_ATTRIBUTES = [SENT_TIMESTAMP]
68
+ BACKOFF_SLEEP_TIME = 1
69
+ BACKOFF_FACTOR = 2
70
+ DEFAULT_POLLING_FREQUENCY = 20
71
+
72
+ config_name "sqs"
73
+
74
+ default :codec, "json"
75
+
76
+ config :additional_settings, :validate => :hash, :default => {}
77
+
78
+ # Name of the SQS Queue name to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
79
+ config :queue, :validate => :string, :required => true
80
+
81
+ # Account ID of the AWS account which owns the queue.
82
+ config :queue_owner_aws_account_id, :validate => :string, :required => false
83
+
84
+ # Name of the event field in which to store the SQS message ID
85
+ config :id_field, :validate => :string
86
+
87
+ # Name of the event field in which to store the SQS message MD5 checksum
88
+ config :md5_field, :validate => :string
89
+
90
+ # Name of the event field in which to store the SQS message Sent Timestamp
91
+ config :sent_timestamp_field, :validate => :string
92
+
93
+ # Polling frequency, default is 20 seconds
94
+ config :polling_frequency, :validate => :number, :default => DEFAULT_POLLING_FREQUENCY
95
+
96
+ attr_reader :poller
97
+
98
+ def register
99
+ require "aws-sdk-sqs"
100
+ @logger.info("Registering SQS input", :queue => @queue, :queue_owner_aws_account_id => @queue_owner_aws_account_id)
101
+
102
+ setup_queue
103
+ end
104
+
105
+ def queue_url(aws_sqs_client)
106
+ if @queue_owner_aws_account_id
107
+ return aws_sqs_client.get_queue_url({:queue_name => @queue, :queue_owner_aws_account_id => @queue_owner_aws_account_id})[:queue_url]
108
+ else
109
+ return aws_sqs_client.get_queue_url(:queue_name => @queue)[:queue_url]
110
+ end
111
+ end
112
+
113
+ def setup_queue
114
+ aws_sqs_client = Aws::SQS::Client.new(aws_options_hash || {})
115
+ poller = Aws::SQS::QueuePoller.new(queue_url(aws_sqs_client), :client => aws_sqs_client)
116
+ poller.before_request { |stats| throw :stop_polling if stop? }
117
+
118
+ @poller = poller
119
+ rescue Aws::SQS::Errors::ServiceError, Seahorse::Client::NetworkingError => e
120
+ @logger.error("Cannot establish connection to Amazon SQS", exception_details(e))
121
+ raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
122
+ end
123
+
124
+ def polling_options
125
+ {
126
+ :max_number_of_messages => MAX_MESSAGES_TO_FETCH,
127
+ :attribute_names => SQS_ATTRIBUTES,
128
+ :wait_time_seconds => @polling_frequency
129
+ }
130
+ end
131
+
132
+ def add_sqs_data(event, message)
133
+ event.set(@id_field, message.message_id) if @id_field
134
+ event.set(@md5_field, message.md5_of_body) if @md5_field
135
+ event.set(@sent_timestamp_field, convert_epoch_to_timestamp(message.attributes[SENT_TIMESTAMP])) if @sent_timestamp_field
136
+ event
137
+ end
138
+
139
+ def handle_message(message, output_queue)
140
+ @codec.decode(message.body) do |event|
141
+ add_sqs_data(event, message)
142
+ decorate(event)
143
+ output_queue << event
144
+ end
145
+ end
146
+
147
+ def run(output_queue)
148
+ @logger.debug("Polling SQS queue", :polling_options => polling_options)
149
+
150
+ run_with_backoff do
151
+ poller.poll(polling_options) do |messages, stats|
152
+ break if stop?
153
+ messages.each {|message| handle_message(message, output_queue) }
154
+ @logger.debug("SQS Stats:", :request_count => stats.request_count,
155
+ :received_message_count => stats.received_message_count,
156
+ :last_message_received_at => stats.last_message_received_at) if @logger.debug?
157
+ end
158
+ end
159
+ end
160
+
161
+ private
162
+
163
+ # Runs an AWS request inside a Ruby block with an exponential backoff in case
164
+ # we experience a ServiceError.
165
+ #
166
+ # @param [Block] block Ruby code block to execute.
167
+ def run_with_backoff(&block)
168
+ sleep_time = BACKOFF_SLEEP_TIME
169
+ begin
170
+ block.call
171
+ rescue Aws::SQS::Errors::ServiceError, Seahorse::Client::NetworkingError => e
172
+ @logger.warn("SQS error ... retrying with exponential backoff", exception_details(e, sleep_time))
173
+ sleep_time = backoff_sleep(sleep_time)
174
+ retry
175
+ end
176
+ end
177
+
178
+ def backoff_sleep(sleep_time)
179
+ sleep(sleep_time)
180
+ sleep_time > MAX_TIME_BEFORE_GIVING_UP ? sleep_time : sleep_time * BACKOFF_FACTOR
181
+ end
182
+
183
+ def convert_epoch_to_timestamp(time)
184
+ LogStash::Timestamp.at(time.to_i / 1000)
185
+ end
186
+
187
+ def exception_details(e, sleep_time = nil)
188
+ details = { :queue => @queue, :exception => e.class, :message => e.message }
189
+ details[:code] = e.code if e.is_a?(Aws::SQS::Errors::ServiceError) && e.code
190
+ details[:cause] = e.original_error if e.respond_to?(:original_error) && e.original_error # Seahorse::Client::NetworkingError
191
+ details[:sleep_time] = sleep_time if sleep_time
192
+ details[:backtrace] = e.backtrace if @logger.debug?
193
+ details
194
+ end
195
+
196
+ end # class LogStash::Inputs::SQS