logstash-integration-aws 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/docs/codec-cloudfront.asciidoc +53 -0
  10. data/docs/codec-cloudtrail.asciidoc +45 -0
  11. data/docs/index.asciidoc +38 -0
  12. data/docs/input-cloudwatch.asciidoc +320 -0
  13. data/docs/input-s3.asciidoc +346 -0
  14. data/docs/input-sqs.asciidoc +287 -0
  15. data/docs/output-cloudwatch.asciidoc +321 -0
  16. data/docs/output-s3.asciidoc +442 -0
  17. data/docs/output-sns.asciidoc +166 -0
  18. data/docs/output-sqs.asciidoc +242 -0
  19. data/lib/logstash/codecs/cloudfront.rb +84 -0
  20. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  21. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  22. data/lib/logstash/inputs/s3.rb +466 -0
  23. data/lib/logstash/inputs/sqs.rb +196 -0
  24. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  25. data/lib/logstash/outputs/s3/file_repository.rb +121 -0
  26. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  27. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  28. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  29. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  30. data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
  31. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  32. data/lib/logstash/outputs/s3/uploader.rb +74 -0
  33. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  34. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  35. data/lib/logstash/outputs/s3.rb +405 -0
  36. data/lib/logstash/outputs/sns.rb +133 -0
  37. data/lib/logstash/outputs/sqs.rb +167 -0
  38. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  39. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  40. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  41. data/logstash-integration-aws.gemspec +52 -0
  42. data/spec/codecs/cloudfront_spec.rb +92 -0
  43. data/spec/codecs/cloudtrail_spec.rb +56 -0
  44. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  45. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  46. data/spec/fixtures/cloudfront.log +4 -0
  47. data/spec/fixtures/compressed.log.gee.zip +0 -0
  48. data/spec/fixtures/compressed.log.gz +0 -0
  49. data/spec/fixtures/compressed.log.gzip +0 -0
  50. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  51. data/spec/fixtures/json.log +2 -0
  52. data/spec/fixtures/json_with_message.log +2 -0
  53. data/spec/fixtures/multiline.log +6 -0
  54. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  55. data/spec/fixtures/uncompressed.log +2 -0
  56. data/spec/inputs/cloudwatch_spec.rb +85 -0
  57. data/spec/inputs/s3_spec.rb +610 -0
  58. data/spec/inputs/sincedb_spec.rb +17 -0
  59. data/spec/inputs/sqs_spec.rb +324 -0
  60. data/spec/integration/cloudwatch_spec.rb +25 -0
  61. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  62. data/spec/integration/gzip_file_spec.rb +62 -0
  63. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  64. data/spec/integration/outputs/sqs_spec.rb +98 -0
  65. data/spec/integration/restore_from_crash_spec.rb +67 -0
  66. data/spec/integration/s3_spec.rb +66 -0
  67. data/spec/integration/size_rotation_spec.rb +59 -0
  68. data/spec/integration/sqs_spec.rb +110 -0
  69. data/spec/integration/stress_test_spec.rb +60 -0
  70. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  71. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  72. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  73. data/spec/outputs/cloudwatch_spec.rb +38 -0
  74. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  75. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  76. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  77. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  78. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  79. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  80. data/spec/outputs/s3/uploader_spec.rb +69 -0
  81. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  82. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  83. data/spec/outputs/s3_spec.rb +232 -0
  84. data/spec/outputs/sns_spec.rb +160 -0
  85. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  86. data/spec/spec_helper.rb +8 -0
  87. data/spec/support/helpers.rb +119 -0
  88. data/spec/unit/outputs/sqs_spec.rb +247 -0
  89. metadata +467 -0
@@ -0,0 +1,466 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/plugin_mixins/aws_config"
5
+ require "time"
6
+ require "date"
7
+ require "tmpdir"
8
+ require "stud/interval"
9
+ require "stud/temporary"
10
+ require "aws-sdk-s3"
11
+ require "logstash/plugin_mixins/ecs_compatibility_support"
12
+
13
+ require 'java'
14
+
15
+ # Stream events from files from a S3 bucket.
16
+ #
17
+ # Each line from each file generates an event.
18
+ # Files ending in `.gz` are handled as gzip'ed files.
19
+ class LogStash::Inputs::S3 < LogStash::Inputs::Base
20
+
21
+ java_import java.io.InputStream
22
+ java_import java.io.InputStreamReader
23
+ java_import java.io.FileInputStream
24
+ java_import java.io.BufferedReader
25
+ java_import java.util.zip.GZIPInputStream
26
+ java_import java.util.zip.ZipException
27
+
28
+ include LogStash::PluginMixins::AwsConfig::V2
29
+ include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
30
+
31
+ config_name "s3"
32
+
33
+ default :codec, "plain"
34
+
35
+ # The name of the S3 bucket.
36
+ config :bucket, :validate => :string, :required => true
37
+
38
+ # If specified, the prefix of filenames in the bucket must match (not a regexp)
39
+ config :prefix, :validate => :string, :default => nil
40
+
41
+ config :additional_settings, :validate => :hash, :default => {}
42
+
43
+ # The path to use for writing state. The state stored by this plugin is
44
+ # a memory of files already processed by this plugin.
45
+ #
46
+ # If not specified, the default is in `{path.data}/plugins/inputs/s3/...`
47
+ #
48
+ # Should be a path with filename not just a directory.
49
+ config :sincedb_path, :validate => :string, :default => nil
50
+
51
+ # Name of a S3 bucket to backup processed files to.
52
+ config :backup_to_bucket, :validate => :string, :default => nil
53
+
54
+ # Append a prefix to the key (full path including file name in s3) after processing.
55
+ # If backing up to another (or the same) bucket, this effectively lets you
56
+ # choose a new 'folder' to place the files in
57
+ config :backup_add_prefix, :validate => :string, :default => nil
58
+
59
+ # Path of a local directory to backup processed files to.
60
+ config :backup_to_dir, :validate => :string, :default => nil
61
+
62
+ # Whether to delete processed files from the original bucket.
63
+ config :delete, :validate => :boolean, :default => false
64
+
65
+ # Interval to wait between to check the file list again after a run is finished.
66
+ # Value is in seconds.
67
+ config :interval, :validate => :number, :default => 60
68
+
69
+ # Whether to watch for new files with the interval.
70
+ # If false, overrides any interval and only lists the s3 bucket once.
71
+ config :watch_for_new_files, :validate => :boolean, :default => true
72
+
73
+ # Ruby style regexp of keys to exclude from the bucket
74
+ config :exclude_pattern, :validate => :string, :default => nil
75
+
76
+ # Set the directory where logstash will store the tmp files before processing them.
77
+ # default to the current OS temporary directory in linux /tmp/logstash
78
+ config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
79
+
80
+ # Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
81
+ # into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
82
+ # be present.
83
+ config :include_object_properties, :validate => :boolean, :default => false
84
+
85
+ # Regular expression used to determine whether an input file is in gzip format.
86
+ # default to an expression that matches *.gz and *.gzip file extensions
87
+ config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
88
+
89
+ CUTOFF_SECOND = 3
90
+
91
+ def initialize(*params)
92
+ super
93
+ @cloudfront_fields_key = ecs_select[disabled: 'cloudfront_fields', v1: '[@metadata][s3][cloudfront][fields]']
94
+ @cloudfront_version_key = ecs_select[disabled: 'cloudfront_version', v1: '[@metadata][s3][cloudfront][version]']
95
+ end
96
+
97
+ def register
98
+ require "fileutils"
99
+ require "digest/md5"
100
+
101
+ @logger.info("Registering", :bucket => @bucket, :region => @region)
102
+
103
+ s3 = get_s3object
104
+
105
+ @s3bucket = s3.bucket(@bucket)
106
+
107
+ unless @backup_to_bucket.nil?
108
+ @backup_bucket = s3.bucket(@backup_to_bucket)
109
+ begin
110
+ s3.client.head_bucket({ :bucket => @backup_to_bucket})
111
+ rescue Aws::S3::Errors::NoSuchBucket
112
+ s3.create_bucket({ :bucket => @backup_to_bucket})
113
+ end
114
+ end
115
+
116
+ unless @backup_to_dir.nil?
117
+ Dir.mkdir(@backup_to_dir, 0700) unless File.exists?(@backup_to_dir)
118
+ end
119
+
120
+ FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
121
+
122
+ if !@watch_for_new_files && original_params.include?('interval')
123
+ logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
124
+ end
125
+ end
126
+
127
+ def run(queue)
128
+ @current_thread = Thread.current
129
+ Stud.interval(@interval) do
130
+ process_files(queue)
131
+ stop unless @watch_for_new_files
132
+ end
133
+ end # def run
134
+
135
+ def list_new_files
136
+ objects = []
137
+ found = false
138
+ current_time = Time.now
139
+ sincedb_time = sincedb.read
140
+ begin
141
+ @s3bucket.objects(:prefix => @prefix).each do |log|
142
+ found = true
143
+ @logger.debug('Found key', :key => log.key)
144
+ if ignore_filename?(log.key)
145
+ @logger.debug('Ignoring', :key => log.key)
146
+ elsif log.content_length <= 0
147
+ @logger.debug('Object Zero Length', :key => log.key)
148
+ elsif log.last_modified <= sincedb_time
149
+ @logger.debug('Object Not Modified', :key => log.key)
150
+ elsif log.last_modified > (current_time - CUTOFF_SECOND).utc # file modified within last two seconds will be processed in next cycle
151
+ @logger.debug('Object Modified After Cutoff Time', :key => log.key)
152
+ elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
153
+ @logger.debug('Object Archived to Glacier', :key => log.key)
154
+ else
155
+ objects << log
156
+ @logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
157
+ end
158
+ end
159
+ @logger.info('No files found in bucket', :prefix => prefix) unless found
160
+ rescue Aws::Errors::ServiceError => e
161
+ @logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
162
+ end
163
+ objects.sort_by { |log| log.last_modified }
164
+ end # def fetch_new_files
165
+
166
+ def backup_to_bucket(object)
167
+ unless @backup_to_bucket.nil?
168
+ backup_key = "#{@backup_add_prefix}#{object.key}"
169
+ @backup_bucket.object(backup_key).copy_from(:copy_source => "#{object.bucket_name}/#{object.key}")
170
+ if @delete
171
+ object.delete()
172
+ end
173
+ end
174
+ end
175
+
176
+ def backup_to_dir(filename)
177
+ unless @backup_to_dir.nil?
178
+ FileUtils.cp(filename, @backup_to_dir)
179
+ end
180
+ end
181
+
182
+ def process_files(queue)
183
+ objects = list_new_files
184
+
185
+ objects.each do |log|
186
+ if stop?
187
+ break
188
+ else
189
+ process_log(queue, log)
190
+ end
191
+ end
192
+ end # def process_files
193
+
194
+ def stop
195
+ # @current_thread is initialized in the `#run` method,
196
+ # this variable is needed because the `#stop` is a called in another thread
197
+ # than the `#run` method and requiring us to call stop! with a explicit thread.
198
+ Stud.stop!(@current_thread)
199
+ end
200
+
201
+ private
202
+
203
+ # Read the content of the local file
204
+ #
205
+ # @param [Queue] Where to push the event
206
+ # @param [String] Which file to read from
207
+ # @param [S3Object] Source s3 object
208
+ # @return [Boolean] True if the file was completely read, false otherwise.
209
+ def process_local_log(queue, filename, object)
210
+ @logger.debug('Processing file', :filename => filename)
211
+ metadata = {}
212
+ # Currently codecs operates on bytes instead of stream.
213
+ # So all IO stuff: decompression, reading need to be done in the actual
214
+ # input and send as bytes to the codecs.
215
+ read_file(filename) do |line|
216
+ if stop?
217
+ @logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
218
+ return false
219
+ end
220
+
221
+ @codec.decode(line) do |event|
222
+ # We are making an assumption concerning cloudfront
223
+ # log format, the user will use the plain or the line codec
224
+ # and the message key will represent the actual line content.
225
+ # If the event is only metadata the event will be drop.
226
+ # This was the behavior of the pre 1.5 plugin.
227
+ #
228
+ # The line need to go through the codecs to replace
229
+ # unknown bytes in the log stream before doing a regexp match or
230
+ # you will get a `Error: invalid byte sequence in UTF-8'
231
+ if event_is_metadata?(event)
232
+ @logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
233
+ update_metadata(metadata, event)
234
+ else
235
+ push_decoded_event(queue, metadata, object, event)
236
+ end
237
+ end
238
+ end
239
+ # #ensure any stateful codecs (such as multi-line ) are flushed to the queue
240
+ @codec.flush do |event|
241
+ push_decoded_event(queue, metadata, object, event)
242
+ end
243
+
244
+ return true
245
+ end # def process_local_log
246
+
247
+ def push_decoded_event(queue, metadata, object, event)
248
+ decorate(event)
249
+
250
+ if @include_object_properties
251
+ event.set("[@metadata][s3]", object.data.to_h)
252
+ else
253
+ event.set("[@metadata][s3]", {})
254
+ end
255
+
256
+ event.set("[@metadata][s3][key]", object.key)
257
+ event.set(@cloudfront_version_key, metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
258
+ event.set(@cloudfront_fields_key, metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
259
+
260
+ queue << event
261
+ end
262
+
263
+ def event_is_metadata?(event)
264
+ return false unless event.get("message").class == String
265
+ line = event.get("message")
266
+ version_metadata?(line) || fields_metadata?(line)
267
+ end
268
+
269
+ def version_metadata?(line)
270
+ line.start_with?('#Version: ')
271
+ end
272
+
273
+ def fields_metadata?(line)
274
+ line.start_with?('#Fields: ')
275
+ end
276
+
277
+ def update_metadata(metadata, event)
278
+ line = event.get('message').strip
279
+
280
+ if version_metadata?(line)
281
+ metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
282
+ end
283
+
284
+ if fields_metadata?(line)
285
+ metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
286
+ end
287
+ end
288
+
289
+ def read_file(filename, &block)
290
+ if gzip?(filename)
291
+ read_gzip_file(filename, block)
292
+ else
293
+ read_plain_file(filename, block)
294
+ end
295
+ rescue => e
296
+ # skip any broken file
297
+ @logger.error("Failed to read file, processing skipped", :exception => e.class, :message => e.message, :filename => filename)
298
+ end
299
+
300
+ def read_plain_file(filename, block)
301
+ File.open(filename, 'rb') do |file|
302
+ file.each(&block)
303
+ end
304
+ end
305
+
306
+ def read_gzip_file(filename, block)
307
+ file_stream = FileInputStream.new(filename)
308
+ gzip_stream = GZIPInputStream.new(file_stream)
309
+ decoder = InputStreamReader.new(gzip_stream, "UTF-8")
310
+ buffered = BufferedReader.new(decoder)
311
+
312
+ while (line = buffered.readLine())
313
+ block.call(line)
314
+ end
315
+ ensure
316
+ buffered.close unless buffered.nil?
317
+ decoder.close unless decoder.nil?
318
+ gzip_stream.close unless gzip_stream.nil?
319
+ file_stream.close unless file_stream.nil?
320
+ end
321
+
322
+ def gzip?(filename)
323
+ Regexp.new(@gzip_pattern).match(filename)
324
+ end
325
+
326
+ def sincedb
327
+ @sincedb ||= if @sincedb_path.nil?
328
+ @logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
329
+ SinceDB::File.new(sincedb_file)
330
+ else
331
+ @logger.info("Using the provided sincedb_path", :sincedb_path => @sincedb_path)
332
+ SinceDB::File.new(@sincedb_path)
333
+ end
334
+ end
335
+
336
+ def sincedb_file
337
+ digest = Digest::MD5.hexdigest("#{@bucket}+#{@prefix}")
338
+ dir = File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "s3")
339
+ FileUtils::mkdir_p(dir)
340
+ path = File.join(dir, "sincedb_#{digest}")
341
+
342
+ # Migrate old default sincedb path to new one.
343
+ if ENV["HOME"]
344
+ # This is the old file path including the old digest mechanism.
345
+ # It remains as a way to automatically upgrade users with the old default ($HOME)
346
+ # to the new default (path.data)
347
+ old = File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@bucket}+#{@prefix}"))
348
+ if File.exist?(old)
349
+ logger.info("Migrating old sincedb in $HOME to {path.data}")
350
+ FileUtils.mv(old, path)
351
+ end
352
+ end
353
+
354
+ path
355
+ end
356
+
357
+ def ignore_filename?(filename)
358
+ if @prefix == filename
359
+ return true
360
+ elsif filename.end_with?("/")
361
+ return true
362
+ elsif (@backup_add_prefix && @backup_to_bucket == @bucket && filename =~ /^#{backup_add_prefix}/)
363
+ return true
364
+ elsif @exclude_pattern.nil?
365
+ return false
366
+ elsif filename =~ Regexp.new(@exclude_pattern)
367
+ return true
368
+ else
369
+ return false
370
+ end
371
+ end
372
+
373
+ def process_log(queue, log)
374
+ @logger.debug("Processing", :bucket => @bucket, :key => log.key)
375
+ object = @s3bucket.object(log.key)
376
+
377
+ filename = File.join(temporary_directory, File.basename(log.key))
378
+ if download_remote_file(object, filename)
379
+ if process_local_log(queue, filename, object)
380
+ if object.last_modified == log.last_modified
381
+ backup_to_bucket(object)
382
+ backup_to_dir(filename)
383
+ delete_file_from_bucket(object)
384
+ FileUtils.remove_entry_secure(filename, true)
385
+ sincedb.write(log.last_modified)
386
+ else
387
+ @logger.info("#{log.key} is updated at #{object.last_modified} and will process in the next cycle")
388
+ end
389
+ end
390
+ else
391
+ FileUtils.remove_entry_secure(filename, true)
392
+ end
393
+ end
394
+
395
+ # Stream the remove file to the local disk
396
+ #
397
+ # @param [S3Object] Reference to the remove S3 objec to download
398
+ # @param [String] The Temporary filename to stream to.
399
+ # @return [Boolean] True if the file was completely downloaded
400
+ def download_remote_file(remote_object, local_filename)
401
+ completed = false
402
+ @logger.debug("Downloading remote file", :remote_key => remote_object.key, :local_filename => local_filename)
403
+ File.open(local_filename, 'wb') do |s3file|
404
+ return completed if stop?
405
+ begin
406
+ remote_object.get(:response_target => s3file)
407
+ completed = true
408
+ rescue Aws::Errors::ServiceError => e
409
+ @logger.warn("Unable to download remote file", :exception => e.class, :message => e.message, :remote_key => remote_object.key)
410
+ end
411
+ end
412
+ completed
413
+ end
414
+
415
+ def delete_file_from_bucket(object)
416
+ if @delete and @backup_to_bucket.nil?
417
+ object.delete()
418
+ end
419
+ end
420
+
421
+ def get_s3object
422
+ s3 = Aws::S3::Resource.new(aws_options_hash || {})
423
+ end
424
+
425
+ def file_restored?(object)
426
+ begin
427
+ restore = object.data.restore
428
+ if restore && restore.match(/ongoing-request\s?=\s?["']false["']/)
429
+ if restore = restore.match(/expiry-date\s?=\s?["'](.*?)["']/)
430
+ expiry_date = DateTime.parse(restore[1])
431
+ return true if DateTime.now < expiry_date # restored
432
+ else
433
+ @logger.debug("No expiry-date header for restore request: #{object.data.restore}")
434
+ return nil # no expiry-date found for ongoing request
435
+ end
436
+ end
437
+ rescue => e
438
+ @logger.debug("Could not determine Glacier restore status", :exception => e.class, :message => e.message)
439
+ end
440
+ return false
441
+ end
442
+
443
+ module SinceDB
444
+ class File
445
+ def initialize(file)
446
+ @sincedb_path = file
447
+ end
448
+
449
+ # @return [Time]
450
+ def read
451
+ if ::File.exists?(@sincedb_path)
452
+ content = ::File.read(@sincedb_path).chomp.strip
453
+ # If the file was created but we didn't have the time to write to it
454
+ return content.empty? ? Time.new(0) : Time.parse(content)
455
+ else
456
+ return Time.new(0)
457
+ end
458
+ end
459
+
460
+ def write(since = nil)
461
+ since = Time.now if since.nil?
462
+ ::File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
463
+ end
464
+ end
465
+ end
466
+ end # class LogStash::Inputs::S3
@@ -0,0 +1,196 @@
1
+ # encoding: utf-8
2
+ #
3
+ require "logstash/inputs/threadable"
4
+ require "logstash/namespace"
5
+ require "logstash/timestamp"
6
+ require "logstash/plugin_mixins/aws_config"
7
+ require "logstash/errors"
8
+
9
+ # Pull events from an Amazon Web Services Simple Queue Service (SQS) queue.
10
+ #
11
+ # SQS is a simple, scalable queue system that is part of the
12
+ # Amazon Web Services suite of tools.
13
+ #
14
+ # Although SQS is similar to other queuing systems like AMQP, it
15
+ # uses a custom API and requires that you have an AWS account.
16
+ # See http://aws.amazon.com/sqs/ for more details on how SQS works,
17
+ # what the pricing schedule looks like and how to setup a queue.
18
+ #
19
+ # To use this plugin, you *must*:
20
+ #
21
+ # * Have an AWS account
22
+ # * Setup an SQS queue
23
+ # * Create an identify that has access to consume messages from the queue.
24
+ #
25
+ # The "consumer" identity must have the following permissions on the queue:
26
+ #
27
+ # * `sqs:ChangeMessageVisibility`
28
+ # * `sqs:ChangeMessageVisibilityBatch`
29
+ # * `sqs:DeleteMessage`
30
+ # * `sqs:DeleteMessageBatch`
31
+ # * `sqs:GetQueueAttributes`
32
+ # * `sqs:GetQueueUrl`
33
+ # * `sqs:ListQueues`
34
+ # * `sqs:ReceiveMessage`
35
+ #
36
+ # Typically, you should setup an IAM policy, create a user and apply the IAM policy to the user.
37
+ # A sample policy is as follows:
38
+ # [source,json]
39
+ # {
40
+ # "Statement": [
41
+ # {
42
+ # "Action": [
43
+ # "sqs:ChangeMessageVisibility",
44
+ # "sqs:ChangeMessageVisibilityBatch",
45
+ # "sqs:GetQueueAttributes",
46
+ # "sqs:GetQueueUrl",
47
+ # "sqs:ListQueues",
48
+ # "sqs:SendMessage",
49
+ # "sqs:SendMessageBatch"
50
+ # ],
51
+ # "Effect": "Allow",
52
+ # "Resource": [
53
+ # "arn:aws:sqs:us-east-1:123456789012:Logstash"
54
+ # ]
55
+ # }
56
+ # ]
57
+ # }
58
+ #
59
+ # See http://aws.amazon.com/iam/ for more details on setting up AWS identities.
60
+ #
61
+ class LogStash::Inputs::SQS < LogStash::Inputs::Threadable
62
+ include LogStash::PluginMixins::AwsConfig::V2
63
+
64
+ MAX_TIME_BEFORE_GIVING_UP = 60
65
+ MAX_MESSAGES_TO_FETCH = 10 # Between 1-10 in the AWS-SDK doc
66
+ SENT_TIMESTAMP = "SentTimestamp"
67
+ SQS_ATTRIBUTES = [SENT_TIMESTAMP]
68
+ BACKOFF_SLEEP_TIME = 1
69
+ BACKOFF_FACTOR = 2
70
+ DEFAULT_POLLING_FREQUENCY = 20
71
+
72
+ config_name "sqs"
73
+
74
+ default :codec, "json"
75
+
76
+ config :additional_settings, :validate => :hash, :default => {}
77
+
78
+ # Name of the SQS Queue name to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
79
+ config :queue, :validate => :string, :required => true
80
+
81
+ # Account ID of the AWS account which owns the queue.
82
+ config :queue_owner_aws_account_id, :validate => :string, :required => false
83
+
84
+ # Name of the event field in which to store the SQS message ID
85
+ config :id_field, :validate => :string
86
+
87
+ # Name of the event field in which to store the SQS message MD5 checksum
88
+ config :md5_field, :validate => :string
89
+
90
+ # Name of the event field in which to store the SQS message Sent Timestamp
91
+ config :sent_timestamp_field, :validate => :string
92
+
93
+ # Polling frequency, default is 20 seconds
94
+ config :polling_frequency, :validate => :number, :default => DEFAULT_POLLING_FREQUENCY
95
+
96
+ attr_reader :poller
97
+
98
+ def register
99
+ require "aws-sdk-sqs"
100
+ @logger.info("Registering SQS input", :queue => @queue, :queue_owner_aws_account_id => @queue_owner_aws_account_id)
101
+
102
+ setup_queue
103
+ end
104
+
105
+ def queue_url(aws_sqs_client)
106
+ if @queue_owner_aws_account_id
107
+ return aws_sqs_client.get_queue_url({:queue_name => @queue, :queue_owner_aws_account_id => @queue_owner_aws_account_id})[:queue_url]
108
+ else
109
+ return aws_sqs_client.get_queue_url(:queue_name => @queue)[:queue_url]
110
+ end
111
+ end
112
+
113
+ def setup_queue
114
+ aws_sqs_client = Aws::SQS::Client.new(aws_options_hash || {})
115
+ poller = Aws::SQS::QueuePoller.new(queue_url(aws_sqs_client), :client => aws_sqs_client)
116
+ poller.before_request { |stats| throw :stop_polling if stop? }
117
+
118
+ @poller = poller
119
+ rescue Aws::SQS::Errors::ServiceError, Seahorse::Client::NetworkingError => e
120
+ @logger.error("Cannot establish connection to Amazon SQS", exception_details(e))
121
+ raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
122
+ end
123
+
124
+ def polling_options
125
+ {
126
+ :max_number_of_messages => MAX_MESSAGES_TO_FETCH,
127
+ :attribute_names => SQS_ATTRIBUTES,
128
+ :wait_time_seconds => @polling_frequency
129
+ }
130
+ end
131
+
132
+ def add_sqs_data(event, message)
133
+ event.set(@id_field, message.message_id) if @id_field
134
+ event.set(@md5_field, message.md5_of_body) if @md5_field
135
+ event.set(@sent_timestamp_field, convert_epoch_to_timestamp(message.attributes[SENT_TIMESTAMP])) if @sent_timestamp_field
136
+ event
137
+ end
138
+
139
+ def handle_message(message, output_queue)
140
+ @codec.decode(message.body) do |event|
141
+ add_sqs_data(event, message)
142
+ decorate(event)
143
+ output_queue << event
144
+ end
145
+ end
146
+
147
+ def run(output_queue)
148
+ @logger.debug("Polling SQS queue", :polling_options => polling_options)
149
+
150
+ run_with_backoff do
151
+ poller.poll(polling_options) do |messages, stats|
152
+ break if stop?
153
+ messages.each {|message| handle_message(message, output_queue) }
154
+ @logger.debug("SQS Stats:", :request_count => stats.request_count,
155
+ :received_message_count => stats.received_message_count,
156
+ :last_message_received_at => stats.last_message_received_at) if @logger.debug?
157
+ end
158
+ end
159
+ end
160
+
161
+ private
162
+
163
+ # Runs an AWS request inside a Ruby block with an exponential backoff in case
164
+ # we experience a ServiceError.
165
+ #
166
+ # @param [Block] block Ruby code block to execute.
167
+ def run_with_backoff(&block)
168
+ sleep_time = BACKOFF_SLEEP_TIME
169
+ begin
170
+ block.call
171
+ rescue Aws::SQS::Errors::ServiceError, Seahorse::Client::NetworkingError => e
172
+ @logger.warn("SQS error ... retrying with exponential backoff", exception_details(e, sleep_time))
173
+ sleep_time = backoff_sleep(sleep_time)
174
+ retry
175
+ end
176
+ end
177
+
178
+ def backoff_sleep(sleep_time)
179
+ sleep(sleep_time)
180
+ sleep_time > MAX_TIME_BEFORE_GIVING_UP ? sleep_time : sleep_time * BACKOFF_FACTOR
181
+ end
182
+
183
+ def convert_epoch_to_timestamp(time)
184
+ LogStash::Timestamp.at(time.to_i / 1000)
185
+ end
186
+
187
+ def exception_details(e, sleep_time = nil)
188
+ details = { :queue => @queue, :exception => e.class, :message => e.message }
189
+ details[:code] = e.code if e.is_a?(Aws::SQS::Errors::ServiceError) && e.code
190
+ details[:cause] = e.original_error if e.respond_to?(:original_error) && e.original_error # Seahorse::Client::NetworkingError
191
+ details[:sleep_time] = sleep_time if sleep_time
192
+ details[:backtrace] = e.backtrace if @logger.debug?
193
+ details
194
+ end
195
+
196
+ end # class LogStash::Inputs::SQS