logstash-integration-aws 7.1.1-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.PRE.MERGE.md +658 -0
- data/CHANGELOG.md +33 -0
- data/CONTRIBUTORS +40 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +5 -0
- data/README.md +205 -0
- data/VERSION +1 -0
- data/docs/codec-cloudfront.asciidoc +53 -0
- data/docs/codec-cloudtrail.asciidoc +45 -0
- data/docs/index.asciidoc +36 -0
- data/docs/input-cloudwatch.asciidoc +320 -0
- data/docs/input-s3.asciidoc +346 -0
- data/docs/input-sqs.asciidoc +287 -0
- data/docs/output-cloudwatch.asciidoc +321 -0
- data/docs/output-s3.asciidoc +442 -0
- data/docs/output-sns.asciidoc +166 -0
- data/docs/output-sqs.asciidoc +242 -0
- data/lib/logstash/codecs/cloudfront.rb +84 -0
- data/lib/logstash/codecs/cloudtrail.rb +47 -0
- data/lib/logstash/inputs/cloudwatch.rb +338 -0
- data/lib/logstash/inputs/s3.rb +466 -0
- data/lib/logstash/inputs/sqs.rb +196 -0
- data/lib/logstash/outputs/cloudwatch.rb +346 -0
- data/lib/logstash/outputs/s3/file_repository.rb +193 -0
- data/lib/logstash/outputs/s3/path_validator.rb +18 -0
- data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/temporary_file.rb +114 -0
- data/lib/logstash/outputs/s3/temporary_file_factory.rb +126 -0
- data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/uploader.rb +76 -0
- data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
- data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
- data/lib/logstash/outputs/s3.rb +442 -0
- data/lib/logstash/outputs/sns.rb +133 -0
- data/lib/logstash/outputs/sqs.rb +167 -0
- data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
- data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
- data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
- data/lib/logstash-integration-aws_jars.rb +4 -0
- data/lib/tasks/build.rake +15 -0
- data/logstash-integration-aws.gemspec +55 -0
- data/spec/codecs/cloudfront_spec.rb +92 -0
- data/spec/codecs/cloudtrail_spec.rb +56 -0
- data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
- data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
- data/spec/fixtures/cloudfront.log +4 -0
- data/spec/fixtures/compressed.log.gee.zip +0 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/json.log +2 -0
- data/spec/fixtures/json_with_message.log +2 -0
- data/spec/fixtures/multiline.log +6 -0
- data/spec/fixtures/multiple_compressed_streams.gz +0 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/inputs/cloudwatch_spec.rb +85 -0
- data/spec/inputs/s3_spec.rb +610 -0
- data/spec/inputs/sincedb_spec.rb +17 -0
- data/spec/inputs/sqs_spec.rb +324 -0
- data/spec/integration/cloudwatch_spec.rb +25 -0
- data/spec/integration/dynamic_prefix_spec.rb +92 -0
- data/spec/integration/gzip_file_spec.rb +62 -0
- data/spec/integration/gzip_size_rotation_spec.rb +63 -0
- data/spec/integration/outputs/sqs_spec.rb +98 -0
- data/spec/integration/restore_from_crash_spec.rb +133 -0
- data/spec/integration/s3_spec.rb +66 -0
- data/spec/integration/size_rotation_spec.rb +59 -0
- data/spec/integration/sqs_spec.rb +110 -0
- data/spec/integration/stress_test_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
- data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
- data/spec/outputs/cloudwatch_spec.rb +38 -0
- data/spec/outputs/s3/file_repository_spec.rb +143 -0
- data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
- data/spec/outputs/s3/temporary_file_spec.rb +47 -0
- data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/s3/uploader_spec.rb +69 -0
- data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
- data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
- data/spec/outputs/s3_spec.rb +232 -0
- data/spec/outputs/sns_spec.rb +160 -0
- data/spec/plugin_mixin/aws_config_spec.rb +217 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/support/helpers.rb +121 -0
- data/spec/unit/outputs/sqs_spec.rb +247 -0
- data/vendor/jar-dependencies/org/logstash/plugins/integration/aws/logstash-integration-aws/7.1.1/logstash-integration-aws-7.1.1.jar +0 -0
- metadata +472 -0
@@ -0,0 +1,466 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/plugin_mixins/aws_config"
|
5
|
+
require "time"
|
6
|
+
require "date"
|
7
|
+
require "tmpdir"
|
8
|
+
require "stud/interval"
|
9
|
+
require "stud/temporary"
|
10
|
+
require "aws-sdk-s3"
|
11
|
+
require "logstash/plugin_mixins/ecs_compatibility_support"
|
12
|
+
|
13
|
+
require 'java'
|
14
|
+
|
15
|
+
# Stream events from files from a S3 bucket.
|
16
|
+
#
|
17
|
+
# Each line from each file generates an event.
|
18
|
+
# Files ending in `.gz` are handled as gzip'ed files.
|
19
|
+
class LogStash::Inputs::S3 < LogStash::Inputs::Base
|
20
|
+
|
21
|
+
java_import java.io.InputStream
|
22
|
+
java_import java.io.InputStreamReader
|
23
|
+
java_import java.io.FileInputStream
|
24
|
+
java_import java.io.BufferedReader
|
25
|
+
java_import java.util.zip.GZIPInputStream
|
26
|
+
java_import java.util.zip.ZipException
|
27
|
+
|
28
|
+
include LogStash::PluginMixins::AwsConfig::V2
|
29
|
+
include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
|
30
|
+
|
31
|
+
config_name "s3"
|
32
|
+
|
33
|
+
default :codec, "plain"
|
34
|
+
|
35
|
+
# The name of the S3 bucket.
|
36
|
+
config :bucket, :validate => :string, :required => true
|
37
|
+
|
38
|
+
# If specified, the prefix of filenames in the bucket must match (not a regexp)
|
39
|
+
config :prefix, :validate => :string, :default => nil
|
40
|
+
|
41
|
+
config :additional_settings, :validate => :hash, :default => {}
|
42
|
+
|
43
|
+
# The path to use for writing state. The state stored by this plugin is
|
44
|
+
# a memory of files already processed by this plugin.
|
45
|
+
#
|
46
|
+
# If not specified, the default is in `{path.data}/plugins/inputs/s3/...`
|
47
|
+
#
|
48
|
+
# Should be a path with filename not just a directory.
|
49
|
+
config :sincedb_path, :validate => :string, :default => nil
|
50
|
+
|
51
|
+
# Name of a S3 bucket to backup processed files to.
|
52
|
+
config :backup_to_bucket, :validate => :string, :default => nil
|
53
|
+
|
54
|
+
# Append a prefix to the key (full path including file name in s3) after processing.
|
55
|
+
# If backing up to another (or the same) bucket, this effectively lets you
|
56
|
+
# choose a new 'folder' to place the files in
|
57
|
+
config :backup_add_prefix, :validate => :string, :default => nil
|
58
|
+
|
59
|
+
# Path of a local directory to backup processed files to.
|
60
|
+
config :backup_to_dir, :validate => :string, :default => nil
|
61
|
+
|
62
|
+
# Whether to delete processed files from the original bucket.
|
63
|
+
config :delete, :validate => :boolean, :default => false
|
64
|
+
|
65
|
+
# Interval to wait between to check the file list again after a run is finished.
|
66
|
+
# Value is in seconds.
|
67
|
+
config :interval, :validate => :number, :default => 60
|
68
|
+
|
69
|
+
# Whether to watch for new files with the interval.
|
70
|
+
# If false, overrides any interval and only lists the s3 bucket once.
|
71
|
+
config :watch_for_new_files, :validate => :boolean, :default => true
|
72
|
+
|
73
|
+
# Ruby style regexp of keys to exclude from the bucket
|
74
|
+
config :exclude_pattern, :validate => :string, :default => nil
|
75
|
+
|
76
|
+
# Set the directory where logstash will store the tmp files before processing them.
|
77
|
+
# default to the current OS temporary directory in linux /tmp/logstash
|
78
|
+
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
79
|
+
|
80
|
+
# Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
|
81
|
+
# into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
|
82
|
+
# be present.
|
83
|
+
config :include_object_properties, :validate => :boolean, :default => false
|
84
|
+
|
85
|
+
# Regular expression used to determine whether an input file is in gzip format.
|
86
|
+
# default to an expression that matches *.gz and *.gzip file extensions
|
87
|
+
config :gzip_pattern, :validate => :string, :default => "\.gz(ip)?$"
|
88
|
+
|
89
|
+
CUTOFF_SECOND = 3
|
90
|
+
|
91
|
+
def initialize(*params)
|
92
|
+
super
|
93
|
+
@cloudfront_fields_key = ecs_select[disabled: 'cloudfront_fields', v1: '[@metadata][s3][cloudfront][fields]']
|
94
|
+
@cloudfront_version_key = ecs_select[disabled: 'cloudfront_version', v1: '[@metadata][s3][cloudfront][version]']
|
95
|
+
end
|
96
|
+
|
97
|
+
def register
|
98
|
+
require "fileutils"
|
99
|
+
require "digest/md5"
|
100
|
+
|
101
|
+
@logger.info("Registering", :bucket => @bucket, :region => @region)
|
102
|
+
|
103
|
+
s3 = get_s3object
|
104
|
+
|
105
|
+
@s3bucket = s3.bucket(@bucket)
|
106
|
+
|
107
|
+
unless @backup_to_bucket.nil?
|
108
|
+
@backup_bucket = s3.bucket(@backup_to_bucket)
|
109
|
+
begin
|
110
|
+
s3.client.head_bucket({ :bucket => @backup_to_bucket})
|
111
|
+
rescue Aws::S3::Errors::NoSuchBucket
|
112
|
+
s3.create_bucket({ :bucket => @backup_to_bucket})
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
unless @backup_to_dir.nil?
|
117
|
+
Dir.mkdir(@backup_to_dir, 0700) unless File.exists?(@backup_to_dir)
|
118
|
+
end
|
119
|
+
|
120
|
+
FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
|
121
|
+
|
122
|
+
if !@watch_for_new_files && original_params.include?('interval')
|
123
|
+
logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.")
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def run(queue)
|
128
|
+
@current_thread = Thread.current
|
129
|
+
Stud.interval(@interval) do
|
130
|
+
process_files(queue)
|
131
|
+
stop unless @watch_for_new_files
|
132
|
+
end
|
133
|
+
end # def run
|
134
|
+
|
135
|
+
def list_new_files
|
136
|
+
objects = []
|
137
|
+
found = false
|
138
|
+
current_time = Time.now
|
139
|
+
sincedb_time = sincedb.read
|
140
|
+
begin
|
141
|
+
@s3bucket.objects(:prefix => @prefix).each do |log|
|
142
|
+
found = true
|
143
|
+
@logger.debug('Found key', :key => log.key)
|
144
|
+
if ignore_filename?(log.key)
|
145
|
+
@logger.debug('Ignoring', :key => log.key)
|
146
|
+
elsif log.content_length <= 0
|
147
|
+
@logger.debug('Object Zero Length', :key => log.key)
|
148
|
+
elsif log.last_modified <= sincedb_time
|
149
|
+
@logger.debug('Object Not Modified', :key => log.key)
|
150
|
+
elsif log.last_modified > (current_time - CUTOFF_SECOND).utc # file modified within last two seconds will be processed in next cycle
|
151
|
+
@logger.debug('Object Modified After Cutoff Time', :key => log.key)
|
152
|
+
elsif (log.storage_class == 'GLACIER' || log.storage_class == 'DEEP_ARCHIVE') && !file_restored?(log.object)
|
153
|
+
@logger.debug('Object Archived to Glacier', :key => log.key)
|
154
|
+
else
|
155
|
+
objects << log
|
156
|
+
@logger.debug("Added to objects[]", :key => log.key, :length => objects.length)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
@logger.info('No files found in bucket', :prefix => prefix) unless found
|
160
|
+
rescue Aws::Errors::ServiceError => e
|
161
|
+
@logger.error("Unable to list objects in bucket", :exception => e.class, :message => e.message, :backtrace => e.backtrace, :prefix => prefix)
|
162
|
+
end
|
163
|
+
objects.sort_by { |log| log.last_modified }
|
164
|
+
end # def fetch_new_files
|
165
|
+
|
166
|
+
def backup_to_bucket(object)
|
167
|
+
unless @backup_to_bucket.nil?
|
168
|
+
backup_key = "#{@backup_add_prefix}#{object.key}"
|
169
|
+
@backup_bucket.object(backup_key).copy_from(:copy_source => "#{object.bucket_name}/#{object.key}")
|
170
|
+
if @delete
|
171
|
+
object.delete()
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def backup_to_dir(filename)
|
177
|
+
unless @backup_to_dir.nil?
|
178
|
+
FileUtils.cp(filename, @backup_to_dir)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def process_files(queue)
|
183
|
+
objects = list_new_files
|
184
|
+
|
185
|
+
objects.each do |log|
|
186
|
+
if stop?
|
187
|
+
break
|
188
|
+
else
|
189
|
+
process_log(queue, log)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end # def process_files
|
193
|
+
|
194
|
+
def stop
|
195
|
+
# @current_thread is initialized in the `#run` method,
|
196
|
+
# this variable is needed because the `#stop` is a called in another thread
|
197
|
+
# than the `#run` method and requiring us to call stop! with a explicit thread.
|
198
|
+
Stud.stop!(@current_thread)
|
199
|
+
end
|
200
|
+
|
201
|
+
private
|
202
|
+
|
203
|
+
# Read the content of the local file
|
204
|
+
#
|
205
|
+
# @param [Queue] Where to push the event
|
206
|
+
# @param [String] Which file to read from
|
207
|
+
# @param [S3Object] Source s3 object
|
208
|
+
# @return [Boolean] True if the file was completely read, false otherwise.
|
209
|
+
def process_local_log(queue, filename, object)
|
210
|
+
@logger.debug('Processing file', :filename => filename)
|
211
|
+
metadata = {}
|
212
|
+
# Currently codecs operates on bytes instead of stream.
|
213
|
+
# So all IO stuff: decompression, reading need to be done in the actual
|
214
|
+
# input and send as bytes to the codecs.
|
215
|
+
read_file(filename) do |line|
|
216
|
+
if stop?
|
217
|
+
@logger.warn("Logstash S3 input, stop reading in the middle of the file, we will read it again when logstash is started")
|
218
|
+
return false
|
219
|
+
end
|
220
|
+
|
221
|
+
@codec.decode(line) do |event|
|
222
|
+
# We are making an assumption concerning cloudfront
|
223
|
+
# log format, the user will use the plain or the line codec
|
224
|
+
# and the message key will represent the actual line content.
|
225
|
+
# If the event is only metadata the event will be drop.
|
226
|
+
# This was the behavior of the pre 1.5 plugin.
|
227
|
+
#
|
228
|
+
# The line need to go through the codecs to replace
|
229
|
+
# unknown bytes in the log stream before doing a regexp match or
|
230
|
+
# you will get a `Error: invalid byte sequence in UTF-8'
|
231
|
+
if event_is_metadata?(event)
|
232
|
+
@logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
|
233
|
+
update_metadata(metadata, event)
|
234
|
+
else
|
235
|
+
push_decoded_event(queue, metadata, object, event)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
# #ensure any stateful codecs (such as multi-line ) are flushed to the queue
|
240
|
+
@codec.flush do |event|
|
241
|
+
push_decoded_event(queue, metadata, object, event)
|
242
|
+
end
|
243
|
+
|
244
|
+
return true
|
245
|
+
end # def process_local_log
|
246
|
+
|
247
|
+
def push_decoded_event(queue, metadata, object, event)
|
248
|
+
decorate(event)
|
249
|
+
|
250
|
+
if @include_object_properties
|
251
|
+
event.set("[@metadata][s3]", object.data.to_h)
|
252
|
+
else
|
253
|
+
event.set("[@metadata][s3]", {})
|
254
|
+
end
|
255
|
+
|
256
|
+
event.set("[@metadata][s3][key]", object.key)
|
257
|
+
event.set(@cloudfront_version_key, metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
|
258
|
+
event.set(@cloudfront_fields_key, metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
|
259
|
+
|
260
|
+
queue << event
|
261
|
+
end
|
262
|
+
|
263
|
+
def event_is_metadata?(event)
|
264
|
+
return false unless event.get("message").class == String
|
265
|
+
line = event.get("message")
|
266
|
+
version_metadata?(line) || fields_metadata?(line)
|
267
|
+
end
|
268
|
+
|
269
|
+
def version_metadata?(line)
|
270
|
+
line.start_with?('#Version: ')
|
271
|
+
end
|
272
|
+
|
273
|
+
def fields_metadata?(line)
|
274
|
+
line.start_with?('#Fields: ')
|
275
|
+
end
|
276
|
+
|
277
|
+
def update_metadata(metadata, event)
|
278
|
+
line = event.get('message').strip
|
279
|
+
|
280
|
+
if version_metadata?(line)
|
281
|
+
metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
|
282
|
+
end
|
283
|
+
|
284
|
+
if fields_metadata?(line)
|
285
|
+
metadata[:cloudfront_fields] = line.split(/#Fields: (.+)/).last
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def read_file(filename, &block)
|
290
|
+
if gzip?(filename)
|
291
|
+
read_gzip_file(filename, block)
|
292
|
+
else
|
293
|
+
read_plain_file(filename, block)
|
294
|
+
end
|
295
|
+
rescue => e
|
296
|
+
# skip any broken file
|
297
|
+
@logger.error("Failed to read file, processing skipped", :exception => e.class, :message => e.message, :filename => filename)
|
298
|
+
end
|
299
|
+
|
300
|
+
def read_plain_file(filename, block)
|
301
|
+
File.open(filename, 'rb') do |file|
|
302
|
+
file.each(&block)
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
def read_gzip_file(filename, block)
|
307
|
+
file_stream = FileInputStream.new(filename)
|
308
|
+
gzip_stream = GZIPInputStream.new(file_stream)
|
309
|
+
decoder = InputStreamReader.new(gzip_stream, "UTF-8")
|
310
|
+
buffered = BufferedReader.new(decoder)
|
311
|
+
|
312
|
+
while (line = buffered.readLine())
|
313
|
+
block.call(line)
|
314
|
+
end
|
315
|
+
ensure
|
316
|
+
buffered.close unless buffered.nil?
|
317
|
+
decoder.close unless decoder.nil?
|
318
|
+
gzip_stream.close unless gzip_stream.nil?
|
319
|
+
file_stream.close unless file_stream.nil?
|
320
|
+
end
|
321
|
+
|
322
|
+
def gzip?(filename)
|
323
|
+
Regexp.new(@gzip_pattern).match(filename)
|
324
|
+
end
|
325
|
+
|
326
|
+
def sincedb
|
327
|
+
@sincedb ||= if @sincedb_path.nil?
|
328
|
+
@logger.info("Using default generated file for the sincedb", :filename => sincedb_file)
|
329
|
+
SinceDB::File.new(sincedb_file)
|
330
|
+
else
|
331
|
+
@logger.info("Using the provided sincedb_path", :sincedb_path => @sincedb_path)
|
332
|
+
SinceDB::File.new(@sincedb_path)
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
def sincedb_file
|
337
|
+
digest = Digest::MD5.hexdigest("#{@bucket}+#{@prefix}")
|
338
|
+
dir = File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "s3")
|
339
|
+
FileUtils::mkdir_p(dir)
|
340
|
+
path = File.join(dir, "sincedb_#{digest}")
|
341
|
+
|
342
|
+
# Migrate old default sincedb path to new one.
|
343
|
+
if ENV["HOME"]
|
344
|
+
# This is the old file path including the old digest mechanism.
|
345
|
+
# It remains as a way to automatically upgrade users with the old default ($HOME)
|
346
|
+
# to the new default (path.data)
|
347
|
+
old = File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@bucket}+#{@prefix}"))
|
348
|
+
if File.exist?(old)
|
349
|
+
logger.info("Migrating old sincedb in $HOME to {path.data}")
|
350
|
+
FileUtils.mv(old, path)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
path
|
355
|
+
end
|
356
|
+
|
357
|
+
def ignore_filename?(filename)
|
358
|
+
if @prefix == filename
|
359
|
+
return true
|
360
|
+
elsif filename.end_with?("/")
|
361
|
+
return true
|
362
|
+
elsif (@backup_add_prefix && @backup_to_bucket == @bucket && filename =~ /^#{backup_add_prefix}/)
|
363
|
+
return true
|
364
|
+
elsif @exclude_pattern.nil?
|
365
|
+
return false
|
366
|
+
elsif filename =~ Regexp.new(@exclude_pattern)
|
367
|
+
return true
|
368
|
+
else
|
369
|
+
return false
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
def process_log(queue, log)
|
374
|
+
@logger.debug("Processing", :bucket => @bucket, :key => log.key)
|
375
|
+
object = @s3bucket.object(log.key)
|
376
|
+
|
377
|
+
filename = File.join(temporary_directory, File.basename(log.key))
|
378
|
+
if download_remote_file(object, filename)
|
379
|
+
if process_local_log(queue, filename, object)
|
380
|
+
if object.last_modified == log.last_modified
|
381
|
+
backup_to_bucket(object)
|
382
|
+
backup_to_dir(filename)
|
383
|
+
delete_file_from_bucket(object)
|
384
|
+
FileUtils.remove_entry_secure(filename, true)
|
385
|
+
sincedb.write(log.last_modified)
|
386
|
+
else
|
387
|
+
@logger.info("#{log.key} is updated at #{object.last_modified} and will process in the next cycle")
|
388
|
+
end
|
389
|
+
end
|
390
|
+
else
|
391
|
+
FileUtils.remove_entry_secure(filename, true)
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
# Stream the remove file to the local disk
|
396
|
+
#
|
397
|
+
# @param [S3Object] Reference to the remove S3 objec to download
|
398
|
+
# @param [String] The Temporary filename to stream to.
|
399
|
+
# @return [Boolean] True if the file was completely downloaded
|
400
|
+
def download_remote_file(remote_object, local_filename)
|
401
|
+
completed = false
|
402
|
+
@logger.debug("Downloading remote file", :remote_key => remote_object.key, :local_filename => local_filename)
|
403
|
+
File.open(local_filename, 'wb') do |s3file|
|
404
|
+
return completed if stop?
|
405
|
+
begin
|
406
|
+
remote_object.get(:response_target => s3file)
|
407
|
+
completed = true
|
408
|
+
rescue Aws::Errors::ServiceError => e
|
409
|
+
@logger.warn("Unable to download remote file", :exception => e.class, :message => e.message, :remote_key => remote_object.key)
|
410
|
+
end
|
411
|
+
end
|
412
|
+
completed
|
413
|
+
end
|
414
|
+
|
415
|
+
def delete_file_from_bucket(object)
|
416
|
+
if @delete and @backup_to_bucket.nil?
|
417
|
+
object.delete()
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
def get_s3object
|
422
|
+
s3 = Aws::S3::Resource.new(aws_options_hash || {})
|
423
|
+
end
|
424
|
+
|
425
|
+
def file_restored?(object)
|
426
|
+
begin
|
427
|
+
restore = object.data.restore
|
428
|
+
if restore && restore.match(/ongoing-request\s?=\s?["']false["']/)
|
429
|
+
if restore = restore.match(/expiry-date\s?=\s?["'](.*?)["']/)
|
430
|
+
expiry_date = DateTime.parse(restore[1])
|
431
|
+
return true if DateTime.now < expiry_date # restored
|
432
|
+
else
|
433
|
+
@logger.debug("No expiry-date header for restore request: #{object.data.restore}")
|
434
|
+
return nil # no expiry-date found for ongoing request
|
435
|
+
end
|
436
|
+
end
|
437
|
+
rescue => e
|
438
|
+
@logger.debug("Could not determine Glacier restore status", :exception => e.class, :message => e.message)
|
439
|
+
end
|
440
|
+
return false
|
441
|
+
end
|
442
|
+
|
443
|
+
module SinceDB
|
444
|
+
class File
|
445
|
+
def initialize(file)
|
446
|
+
@sincedb_path = file
|
447
|
+
end
|
448
|
+
|
449
|
+
# @return [Time]
|
450
|
+
def read
|
451
|
+
if ::File.exists?(@sincedb_path)
|
452
|
+
content = ::File.read(@sincedb_path).chomp.strip
|
453
|
+
# If the file was created but we didn't have the time to write to it
|
454
|
+
return content.empty? ? Time.new(0) : Time.parse(content)
|
455
|
+
else
|
456
|
+
return Time.new(0)
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
def write(since = nil)
|
461
|
+
since = Time.now if since.nil?
|
462
|
+
::File.open(@sincedb_path, 'w') { |file| file.write(since.to_s) }
|
463
|
+
end
|
464
|
+
end
|
465
|
+
end
|
466
|
+
end # class LogStash::Inputs::S3
|
@@ -0,0 +1,196 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require "logstash/inputs/threadable"
|
4
|
+
require "logstash/namespace"
|
5
|
+
require "logstash/timestamp"
|
6
|
+
require "logstash/plugin_mixins/aws_config"
|
7
|
+
require "logstash/errors"
|
8
|
+
|
9
|
+
# Pull events from an Amazon Web Services Simple Queue Service (SQS) queue.
|
10
|
+
#
|
11
|
+
# SQS is a simple, scalable queue system that is part of the
|
12
|
+
# Amazon Web Services suite of tools.
|
13
|
+
#
|
14
|
+
# Although SQS is similar to other queuing systems like AMQP, it
|
15
|
+
# uses a custom API and requires that you have an AWS account.
|
16
|
+
# See http://aws.amazon.com/sqs/ for more details on how SQS works,
|
17
|
+
# what the pricing schedule looks like and how to setup a queue.
|
18
|
+
#
|
19
|
+
# To use this plugin, you *must*:
|
20
|
+
#
|
21
|
+
# * Have an AWS account
|
22
|
+
# * Setup an SQS queue
|
23
|
+
# * Create an identify that has access to consume messages from the queue.
|
24
|
+
#
|
25
|
+
# The "consumer" identity must have the following permissions on the queue:
|
26
|
+
#
|
27
|
+
# * `sqs:ChangeMessageVisibility`
|
28
|
+
# * `sqs:ChangeMessageVisibilityBatch`
|
29
|
+
# * `sqs:DeleteMessage`
|
30
|
+
# * `sqs:DeleteMessageBatch`
|
31
|
+
# * `sqs:GetQueueAttributes`
|
32
|
+
# * `sqs:GetQueueUrl`
|
33
|
+
# * `sqs:ListQueues`
|
34
|
+
# * `sqs:ReceiveMessage`
|
35
|
+
#
|
36
|
+
# Typically, you should setup an IAM policy, create a user and apply the IAM policy to the user.
|
37
|
+
# A sample policy is as follows:
|
38
|
+
# [source,json]
|
39
|
+
# {
|
40
|
+
# "Statement": [
|
41
|
+
# {
|
42
|
+
# "Action": [
|
43
|
+
# "sqs:ChangeMessageVisibility",
|
44
|
+
# "sqs:ChangeMessageVisibilityBatch",
|
45
|
+
# "sqs:GetQueueAttributes",
|
46
|
+
# "sqs:GetQueueUrl",
|
47
|
+
# "sqs:ListQueues",
|
48
|
+
# "sqs:SendMessage",
|
49
|
+
# "sqs:SendMessageBatch"
|
50
|
+
# ],
|
51
|
+
# "Effect": "Allow",
|
52
|
+
# "Resource": [
|
53
|
+
# "arn:aws:sqs:us-east-1:123456789012:Logstash"
|
54
|
+
# ]
|
55
|
+
# }
|
56
|
+
# ]
|
57
|
+
# }
|
58
|
+
#
|
59
|
+
# See http://aws.amazon.com/iam/ for more details on setting up AWS identities.
|
60
|
+
#
|
61
|
+
class LogStash::Inputs::SQS < LogStash::Inputs::Threadable
|
62
|
+
include LogStash::PluginMixins::AwsConfig::V2
|
63
|
+
|
64
|
+
MAX_TIME_BEFORE_GIVING_UP = 60
|
65
|
+
MAX_MESSAGES_TO_FETCH = 10 # Between 1-10 in the AWS-SDK doc
|
66
|
+
SENT_TIMESTAMP = "SentTimestamp"
|
67
|
+
SQS_ATTRIBUTES = [SENT_TIMESTAMP]
|
68
|
+
BACKOFF_SLEEP_TIME = 1
|
69
|
+
BACKOFF_FACTOR = 2
|
70
|
+
DEFAULT_POLLING_FREQUENCY = 20
|
71
|
+
|
72
|
+
config_name "sqs"
|
73
|
+
|
74
|
+
default :codec, "json"
|
75
|
+
|
76
|
+
config :additional_settings, :validate => :hash, :default => {}
|
77
|
+
|
78
|
+
# Name of the SQS Queue name to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
|
79
|
+
config :queue, :validate => :string, :required => true
|
80
|
+
|
81
|
+
# Account ID of the AWS account which owns the queue.
|
82
|
+
config :queue_owner_aws_account_id, :validate => :string, :required => false
|
83
|
+
|
84
|
+
# Name of the event field in which to store the SQS message ID
|
85
|
+
config :id_field, :validate => :string
|
86
|
+
|
87
|
+
# Name of the event field in which to store the SQS message MD5 checksum
|
88
|
+
config :md5_field, :validate => :string
|
89
|
+
|
90
|
+
# Name of the event field in which to store the SQS message Sent Timestamp
|
91
|
+
config :sent_timestamp_field, :validate => :string
|
92
|
+
|
93
|
+
# Polling frequency, default is 20 seconds
|
94
|
+
config :polling_frequency, :validate => :number, :default => DEFAULT_POLLING_FREQUENCY
|
95
|
+
|
96
|
+
attr_reader :poller
|
97
|
+
|
98
|
+
def register
|
99
|
+
require "aws-sdk-sqs"
|
100
|
+
@logger.info("Registering SQS input", :queue => @queue, :queue_owner_aws_account_id => @queue_owner_aws_account_id)
|
101
|
+
|
102
|
+
setup_queue
|
103
|
+
end
|
104
|
+
|
105
|
+
def queue_url(aws_sqs_client)
|
106
|
+
if @queue_owner_aws_account_id
|
107
|
+
return aws_sqs_client.get_queue_url({:queue_name => @queue, :queue_owner_aws_account_id => @queue_owner_aws_account_id})[:queue_url]
|
108
|
+
else
|
109
|
+
return aws_sqs_client.get_queue_url(:queue_name => @queue)[:queue_url]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def setup_queue
|
114
|
+
aws_sqs_client = Aws::SQS::Client.new(aws_options_hash || {})
|
115
|
+
poller = Aws::SQS::QueuePoller.new(queue_url(aws_sqs_client), :client => aws_sqs_client)
|
116
|
+
poller.before_request { |stats| throw :stop_polling if stop? }
|
117
|
+
|
118
|
+
@poller = poller
|
119
|
+
rescue Aws::SQS::Errors::ServiceError, Seahorse::Client::NetworkingError => e
|
120
|
+
@logger.error("Cannot establish connection to Amazon SQS", exception_details(e))
|
121
|
+
raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
|
122
|
+
end
|
123
|
+
|
124
|
+
def polling_options
|
125
|
+
{
|
126
|
+
:max_number_of_messages => MAX_MESSAGES_TO_FETCH,
|
127
|
+
:attribute_names => SQS_ATTRIBUTES,
|
128
|
+
:wait_time_seconds => @polling_frequency
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
def add_sqs_data(event, message)
|
133
|
+
event.set(@id_field, message.message_id) if @id_field
|
134
|
+
event.set(@md5_field, message.md5_of_body) if @md5_field
|
135
|
+
event.set(@sent_timestamp_field, convert_epoch_to_timestamp(message.attributes[SENT_TIMESTAMP])) if @sent_timestamp_field
|
136
|
+
event
|
137
|
+
end
|
138
|
+
|
139
|
+
def handle_message(message, output_queue)
|
140
|
+
@codec.decode(message.body) do |event|
|
141
|
+
add_sqs_data(event, message)
|
142
|
+
decorate(event)
|
143
|
+
output_queue << event
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def run(output_queue)
|
148
|
+
@logger.debug("Polling SQS queue", :polling_options => polling_options)
|
149
|
+
|
150
|
+
run_with_backoff do
|
151
|
+
poller.poll(polling_options) do |messages, stats|
|
152
|
+
break if stop?
|
153
|
+
messages.each {|message| handle_message(message, output_queue) }
|
154
|
+
@logger.debug("SQS Stats:", :request_count => stats.request_count,
|
155
|
+
:received_message_count => stats.received_message_count,
|
156
|
+
:last_message_received_at => stats.last_message_received_at) if @logger.debug?
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
|
163
|
+
# Runs an AWS request inside a Ruby block with an exponential backoff in case
|
164
|
+
# we experience a ServiceError.
|
165
|
+
#
|
166
|
+
# @param [Block] block Ruby code block to execute.
|
167
|
+
def run_with_backoff(&block)
|
168
|
+
sleep_time = BACKOFF_SLEEP_TIME
|
169
|
+
begin
|
170
|
+
block.call
|
171
|
+
rescue Aws::SQS::Errors::ServiceError, Seahorse::Client::NetworkingError => e
|
172
|
+
@logger.warn("SQS error ... retrying with exponential backoff", exception_details(e, sleep_time))
|
173
|
+
sleep_time = backoff_sleep(sleep_time)
|
174
|
+
retry
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def backoff_sleep(sleep_time)
|
179
|
+
sleep(sleep_time)
|
180
|
+
sleep_time > MAX_TIME_BEFORE_GIVING_UP ? sleep_time : sleep_time * BACKOFF_FACTOR
|
181
|
+
end
|
182
|
+
|
183
|
+
def convert_epoch_to_timestamp(time)
|
184
|
+
LogStash::Timestamp.at(time.to_i / 1000)
|
185
|
+
end
|
186
|
+
|
187
|
+
def exception_details(e, sleep_time = nil)
|
188
|
+
details = { :queue => @queue, :exception => e.class, :message => e.message }
|
189
|
+
details[:code] = e.code if e.is_a?(Aws::SQS::Errors::ServiceError) && e.code
|
190
|
+
details[:cause] = e.original_error if e.respond_to?(:original_error) && e.original_error # Seahorse::Client::NetworkingError
|
191
|
+
details[:sleep_time] = sleep_time if sleep_time
|
192
|
+
details[:backtrace] = e.backtrace if @logger.debug?
|
193
|
+
details
|
194
|
+
end
|
195
|
+
|
196
|
+
end # class LogStash::Inputs::SQS
|