logstash-input-s3-sns-sqs 2.0.7 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/lib/logstash/inputs/mime/magic_gzip_validator.rb +1 -1
- data/lib/logstash/inputs/s3/client_factory.rb +5 -1
- data/lib/logstash/inputs/s3/downloader.rb +2 -0
- data/lib/logstash/inputs/s3snssqs.rb +32 -16
- data/lib/logstash/inputs/s3snssqs/log_processor.rb +17 -14
- data/lib/logstash/inputs/sqs/poller.rb +24 -6
- data/logstash-input-s3-sns-sqs.gemspec +1 -1
- data/spec/inputs/s3sqs_spec.rb +15 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 805cb772fda649de4273d9c86799255bc31b7f18b847983c1a1bacfee2eb0224
|
4
|
+
data.tar.gz: ef65ac96355cb246b493836323e7998e485ddf30ceb5df780d17b1f6d080451b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 338e687d52f65a484ca75c091cf003f263e8e80f00b545f3f6b36b7a3fc584ea29807233436c75a3189fdf075d7386aa1415f34a1996349b2a41f4b274cb6f75
|
7
|
+
data.tar.gz: abc6194b248640de99f8ed6f41304fff760b1667a08426dea2ee98d773f19d9b01010438c633452fc60e8c23b31c201b3000deab07e4cf4da630a7f4770cd0d1
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
##2.1.2
|
2
|
+
- FEATURE: Now it´s possible to use queue urls and names.
|
3
|
+
- FEATURE: Add sqs long polling config parameter: sqs_wait_time_seconds
|
4
|
+
- FIX: Valid UTF-8 byte sequences in logs are munged
|
5
|
+
- CLEANUP: Remove tests. (as a begin for clean testing)
|
6
|
+
##2.1.1
|
7
|
+
- FEATURE: Enable Multiregion Support for included S3 client.
|
8
|
+
- Add region by bucket feature
|
9
|
+
##2.1.0
|
10
|
+
- FEATURE: Add S3 metadata -> config :include_object_properties
|
11
|
+
- FEATURE: Watch for threads in exception state and restart...
|
12
|
+
##2.0.9
|
13
|
+
-gzip dectection should return false for files smaller than gzip_signiture_bytes
|
14
|
+
##2.0.8
|
15
|
+
-fix nil class error
|
1
16
|
##2.0.7
|
2
17
|
-fix gem error
|
3
18
|
##2.0.6
|
@@ -10,6 +10,7 @@ class S3ClientFactory
|
|
10
10
|
@aws_options_hash.merge!(@s3_default_options) unless @s3_default_options.empty?
|
11
11
|
@sts_client = Aws::STS::Client.new(region: options[:aws_region])
|
12
12
|
@credentials_by_bucket = options[:s3_credentials_by_bucket]
|
13
|
+
@region_by_bucket = options[:s3_region_by_bucket]
|
13
14
|
@logger.debug("Credentials by Bucket", :credentials => @credentials_by_bucket)
|
14
15
|
@default_session_name = options[:s3_role_session_name]
|
15
16
|
@clients_by_bucket = {}
|
@@ -24,6 +25,9 @@ class S3ClientFactory
|
|
24
25
|
unless @credentials_by_bucket[bucket_name].nil?
|
25
26
|
options.merge!(credentials: get_s3_auth(@credentials_by_bucket[bucket_name]))
|
26
27
|
end
|
28
|
+
unless @region_by_bucket[bucket_name].nil?
|
29
|
+
options.merge!(region: @region_by_bucket[bucket_name])
|
30
|
+
end
|
27
31
|
@clients_by_bucket[bucket_symbol] = Aws::S3::Client.new(options)
|
28
32
|
@logger.debug("Created a new S3 Client", :bucket_name => bucket_name, :client => @clients_by_bucket[bucket_symbol], :used_options => options)
|
29
33
|
end
|
@@ -48,7 +52,7 @@ class S3ClientFactory
|
|
48
52
|
)
|
49
53
|
elsif credentials.key?('access_key_id') && credentials.key?('secret_access_key')
|
50
54
|
@logger.debug("Fetch credentials", :access_key => credentials['access_key_id'])
|
51
|
-
return Aws::Credentials.new(credentials)
|
55
|
+
return Aws::Credentials.new(credentials['access_key_id'], credentials['secret_access_key'])
|
52
56
|
end
|
53
57
|
end
|
54
58
|
|
@@ -9,6 +9,7 @@ class S3Downloader
|
|
9
9
|
@stopped = stop_semaphore
|
10
10
|
@factory = options[:s3_client_factory]
|
11
11
|
@delete_on_success = options[:delete_on_success]
|
12
|
+
@include_object_properties = options[:include_object_properties]
|
12
13
|
end
|
13
14
|
|
14
15
|
def copy_s3object_to_disk(record)
|
@@ -21,6 +22,7 @@ class S3Downloader
|
|
21
22
|
key: record[:key],
|
22
23
|
response_target: record[:local_file]
|
23
24
|
)
|
25
|
+
record[:s3_data] = response.to_h.keep_if { |key| @include_object_properties.include?(key) }
|
24
26
|
end
|
25
27
|
rescue Aws::S3::Errors::ServiceError => e
|
26
28
|
@logger.error("Unable to download file. Requeuing the message", :error => e, :record => record)
|
@@ -161,6 +161,10 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
|
161
161
|
# Session name to use when assuming an IAM role
|
162
162
|
config :s3_role_session_name, :validate => :string, :default => "logstash"
|
163
163
|
config :delete_on_success, :validate => :boolean, :default => false
|
164
|
+
# Whether or not to include the S3 object's properties (last_modified, content_type, metadata)
|
165
|
+
# into each Event at [@metadata][s3]. Regardless of this setting, [@metdata][s3][key] will always
|
166
|
+
# be present.
|
167
|
+
config :include_object_properties, :validate => :array, :default => [:last_modified, :content_type, :metadata]
|
164
168
|
|
165
169
|
### sqs
|
166
170
|
# Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
|
@@ -169,9 +173,11 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
|
169
173
|
# Whether the event is processed though an SNS to SQS. (S3>SNS>SQS = true |S3>SQS=false)
|
170
174
|
config :from_sns, :validate => :boolean, :default => true
|
171
175
|
config :sqs_skip_delete, :validate => :boolean, :default => false
|
176
|
+
config :sqs_wait_time_seconds, :validate => :number, :required => false
|
177
|
+
config :sqs_delete_on_failure, :validate => :boolean, :default => true
|
178
|
+
|
172
179
|
config :visibility_timeout, :validate => :number, :default => 120
|
173
180
|
config :max_processing_time, :validate => :number, :default => 8000
|
174
|
-
|
175
181
|
### system
|
176
182
|
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
177
183
|
# To run in multiple threads use this
|
@@ -188,6 +194,7 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
|
188
194
|
FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
|
189
195
|
@id ||= "Unknown" #Use INPUT{ id => name} for thread identifier
|
190
196
|
@credentials_by_bucket = hash_key_is_regex({})
|
197
|
+
@region_by_bucket = hash_key_is_regex({})
|
191
198
|
# create the bucket=>folder=>codec lookup from config options
|
192
199
|
@codec_by_folder = hash_key_is_regex({})
|
193
200
|
@type_by_folder = hash_key_is_regex({})
|
@@ -222,6 +229,9 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
|
222
229
|
if options.key?('credentials')
|
223
230
|
@credentials_by_bucket[bucket] = options['credentials']
|
224
231
|
end
|
232
|
+
if options.key?('region')
|
233
|
+
@region_by_bucket[bucket] = options['region']
|
234
|
+
end
|
225
235
|
if options.key?('folders')
|
226
236
|
# make these hashes do key lookups using regex matching
|
227
237
|
folders = hash_key_is_regex({})
|
@@ -242,24 +252,28 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
|
242
252
|
@sqs_poller = SqsPoller.new(@logger, @received_stop,
|
243
253
|
{
|
244
254
|
visibility_timeout: @visibility_timeout,
|
245
|
-
skip_delete: @sqs_skip_delete
|
255
|
+
skip_delete: @sqs_skip_delete,
|
256
|
+
wait_time_seconds: @sqs_wait_time_seconds
|
246
257
|
},
|
247
258
|
{
|
248
259
|
sqs_queue: @queue,
|
249
260
|
queue_owner_aws_account_id: @queue_owner_aws_account_id,
|
250
261
|
from_sns: @from_sns,
|
251
|
-
max_processing_time: @max_processing_time
|
262
|
+
max_processing_time: @max_processing_time,
|
263
|
+
sqs_delete_on_failure: @sqs_delete_on_failure
|
252
264
|
},
|
253
265
|
aws_options_hash)
|
254
266
|
@s3_client_factory = S3ClientFactory.new(@logger, {
|
255
267
|
aws_region: @region,
|
256
268
|
s3_default_options: @s3_default_options,
|
257
269
|
s3_credentials_by_bucket: @credentials_by_bucket,
|
270
|
+
s3_region_by_bucket: @region_by_bucket,
|
258
271
|
s3_role_session_name: @s3_role_session_name
|
259
272
|
}, aws_options_hash)
|
260
273
|
@s3_downloader = S3Downloader.new(@logger, @received_stop, {
|
261
274
|
s3_client_factory: @s3_client_factory,
|
262
|
-
delete_on_success: @delete_on_success
|
275
|
+
delete_on_success: @delete_on_success,
|
276
|
+
include_object_properties: @include_object_properties
|
263
277
|
})
|
264
278
|
@codec_factory = CodecFactory.new(@logger, {
|
265
279
|
default_codec: @codec,
|
@@ -273,18 +287,21 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
|
273
287
|
|
274
288
|
# startup
|
275
289
|
def run(logstash_event_queue)
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
290
|
+
@control_threads = @consumer_threads.times.map do |thread_id|
|
291
|
+
Thread.new do
|
292
|
+
restart_count = 0
|
293
|
+
while not stop?
|
294
|
+
#make thead start async to prevent polling the same message from sqs
|
295
|
+
sleep 0.5
|
296
|
+
worker_thread = run_worker_thread(logstash_event_queue, thread_id)
|
297
|
+
worker_thread.join
|
298
|
+
restart_count += 1
|
299
|
+
thread_id = "#{thread_id}_#{restart_count}"
|
300
|
+
@logger.info("[control_thread] restarting a thread #{thread_id}... ", :thread => worker_thread.inspect)
|
301
|
+
end
|
302
|
+
end
|
285
303
|
end
|
286
|
-
|
287
|
-
@worker_threads.each { |t| t.join }
|
304
|
+
@control_threads.each { |t| t.join }
|
288
305
|
end
|
289
306
|
|
290
307
|
# shutdown
|
@@ -311,7 +328,6 @@ class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
|
311
328
|
# --- END plugin interface ------------------------------------------#
|
312
329
|
|
313
330
|
private
|
314
|
-
|
315
331
|
def run_worker_thread(queue, thread_id)
|
316
332
|
Thread.new do
|
317
333
|
LogStash::Util.set_thread_name("Worker #{@id}/#{thread_id}")
|
@@ -14,7 +14,7 @@ module LogProcessor
|
|
14
14
|
file = record[:local_file]
|
15
15
|
codec = @codec_factory.get_codec(record)
|
16
16
|
folder = record[:folder]
|
17
|
-
type = @type_by_folder
|
17
|
+
type = @type_by_folder.fetch(record[:bucket],{})[folder]
|
18
18
|
metadata = {}
|
19
19
|
line_count = 0
|
20
20
|
event_count = 0
|
@@ -26,17 +26,18 @@ module LogProcessor
|
|
26
26
|
@logger.warn("[#{Thread.current[:name]}] Abort reading in the middle of the file, we will read it again when logstash is started")
|
27
27
|
throw :skip_delete
|
28
28
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
29
|
+
begin
|
30
|
+
codec.decode(line) do |event|
|
31
|
+
event_count += 1
|
32
|
+
decorate_event(event, metadata, type, record[:key], record[:bucket], record[:s3_data])
|
33
|
+
#event_time = Time.now #PROFILING
|
34
|
+
#event.set("[@metadata][progress][begin]", start_time)
|
35
|
+
#event.set("[@metadata][progress][index_time]", event_time)
|
36
|
+
#event.set("[@metadata][progress][line]", line_count)
|
37
|
+
logstash_event_queue << event
|
38
|
+
end
|
39
|
+
rescue Exception => e
|
40
|
+
@logger.error("[#{Thread.current[:name]}] Unable to decode line", :line => line, :error => e)
|
40
41
|
end
|
41
42
|
end
|
42
43
|
file_t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC) #PROFILING
|
@@ -45,7 +46,7 @@ module LogProcessor
|
|
45
46
|
# ensure any stateful codecs (such as multi-line ) are flushed to the queue
|
46
47
|
codec.flush do |event|
|
47
48
|
event_count += 1
|
48
|
-
decorate_event(event, metadata, type, record[:key], record[:bucket],
|
49
|
+
decorate_event(event, metadata, type, record[:key], record[:bucket], record[:s3_data])
|
49
50
|
@logger.debug("[#{Thread.current[:name]}] Flushing an incomplete event", :event => event.to_s)
|
50
51
|
logstash_event_queue << event
|
51
52
|
end
|
@@ -55,7 +56,7 @@ module LogProcessor
|
|
55
56
|
|
56
57
|
private
|
57
58
|
|
58
|
-
def decorate_event(event, metadata, type, key, bucket,
|
59
|
+
def decorate_event(event, metadata, type, key, bucket, s3_data)
|
59
60
|
if event_is_metadata?(event)
|
60
61
|
@logger.debug('Updating the current cloudfront metadata', :event => event)
|
61
62
|
update_metadata(metadata, event)
|
@@ -67,9 +68,11 @@ module LogProcessor
|
|
67
68
|
event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
|
68
69
|
event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
|
69
70
|
|
71
|
+
event.set("[@metadata][s3]", s3_data)
|
70
72
|
event.set("[@metadata][s3][object_key]", key)
|
71
73
|
event.set("[@metadata][s3][bucket_name]", bucket)
|
72
74
|
event.set("[@metadata][s3][object_folder]", get_object_folder(key))
|
75
|
+
|
73
76
|
end
|
74
77
|
end
|
75
78
|
|
@@ -41,17 +41,24 @@ class SqsPoller
|
|
41
41
|
@queue = client_options[:sqs_queue]
|
42
42
|
@from_sns = client_options[:from_sns]
|
43
43
|
@max_processing_time = client_options[:max_processing_time]
|
44
|
+
@sqs_delete_on_failure = client_options[:sqs_delete_on_failure]
|
44
45
|
@options = DEFAULT_OPTIONS.merge(poller_options)
|
45
46
|
begin
|
46
47
|
@logger.info("Registering SQS input", :queue => @queue)
|
47
48
|
sqs_client = Aws::SQS::Client.new(aws_options_hash)
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
if uri?(@queue)
|
50
|
+
queue_url = @queue
|
51
|
+
else
|
52
|
+
queue_url = sqs_client.get_queue_url({
|
53
|
+
queue_name: @queue,
|
54
|
+
queue_owner_aws_account_id: client_options[:queue_owner_aws_account_id]
|
55
|
+
}).queue_url
|
56
|
+
end
|
57
|
+
|
52
58
|
@poller = Aws::SQS::QueuePoller.new(queue_url,
|
53
59
|
:client => sqs_client
|
54
60
|
)
|
61
|
+
@logger.info("[#{Thread.current[:name]}] connected to queue.", :queue_url => queue_url)
|
55
62
|
rescue Aws::SQS::Errors::ServiceError => e
|
56
63
|
@logger.error("Cannot establish connection to Amazon SQS", :error => e)
|
57
64
|
raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
|
@@ -86,6 +93,7 @@ class SqsPoller
|
|
86
93
|
poller_thread = Thread.current
|
87
94
|
extender = Thread.new do
|
88
95
|
while new_visibility < @max_processing_time do
|
96
|
+
|
89
97
|
sleep message_backoff
|
90
98
|
begin
|
91
99
|
@poller.change_message_visibility_timeout(message, new_visibility)
|
@@ -98,8 +106,8 @@ class SqsPoller
|
|
98
106
|
end
|
99
107
|
end
|
100
108
|
@logger.error("[#{Thread.current[:name]}] Maximum visibility reached! We will delete this message from queue!")
|
101
|
-
@poller.delete_message(message)
|
102
|
-
poller_thread.
|
109
|
+
@poller.delete_message(message) if @sqs_delete_on_failure
|
110
|
+
poller_thread.kill
|
103
111
|
end
|
104
112
|
extender[:name] = "#{Thread.current[:name]}/extender" #PROFILING
|
105
113
|
failed = false
|
@@ -183,6 +191,16 @@ class SqsPoller
|
|
183
191
|
end
|
184
192
|
end
|
185
193
|
|
194
|
+
def uri?(string)
|
195
|
+
uri = URI.parse(string)
|
196
|
+
%w( http https ).include?(uri.scheme)
|
197
|
+
rescue URI::BadURIError
|
198
|
+
false
|
199
|
+
rescue URI::InvalidURIError
|
200
|
+
false
|
201
|
+
end
|
202
|
+
|
203
|
+
|
186
204
|
def get_object_path(key)
|
187
205
|
folder = ::File.dirname(key)
|
188
206
|
return '' if folder == '.'
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-s3-sns-sqs'
|
3
|
-
s.version = '2.
|
3
|
+
s.version = '2.1.2'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sns -> sqs."
|
6
6
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
data/spec/inputs/s3sqs_spec.rb
CHANGED
@@ -15,11 +15,13 @@ require 'rspec/expectations'
|
|
15
15
|
|
16
16
|
describe LogStash::Inputs::S3SNSSQS do
|
17
17
|
class LogStash::Inputs::S3SNSSQS
|
18
|
-
public :
|
18
|
+
public :process # use method without error logging for better visibility of errors
|
19
19
|
end
|
20
20
|
let(:codec_options) { {} }
|
21
21
|
|
22
22
|
let(:input) { LogStash::Inputs::S3SNSSQS.new(config) }
|
23
|
+
|
24
|
+
let(:codec_factory) { CodecFactory.new(@logger, { default_codec: @codec, codec_by_folder: @codec_by_folder }) }
|
23
25
|
subject { input }
|
24
26
|
|
25
27
|
context "default parser choice" do
|
@@ -28,11 +30,11 @@ describe LogStash::Inputs::S3SNSSQS do
|
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
31
|
-
let(:
|
33
|
+
let(:record) {{"local_file" => File.join(File.dirname(__FILE__), '..', '..', 'fixtures', 'log-stream.real-formatted') }}
|
32
34
|
let(:key) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
|
33
35
|
let(:folder) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
|
34
36
|
let(:instance_codec) { "json" }
|
35
|
-
let(:
|
37
|
+
let(:logstash_event_queue) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
|
36
38
|
let(:bucket) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
|
37
39
|
let(:message) { "arn:aws:iam::123456789012:role/AuthorizedRole" }
|
38
40
|
let(:size) { "123344" }
|
@@ -43,18 +45,22 @@ describe LogStash::Inputs::S3SNSSQS do
|
|
43
45
|
subject do
|
44
46
|
LogStash::Inputs::S3SNSSQS.new(config)
|
45
47
|
end
|
48
|
+
# end
|
46
49
|
let(:queue) { [] }
|
47
50
|
before do
|
48
51
|
@codec = LogStash::Codecs::JSONStream.new
|
49
52
|
@codec.charset = "UTF-8"
|
50
|
-
|
53
|
+
@codec_factory = CodecFactory.new(@logger, {
|
54
|
+
default_codec: @codec,
|
55
|
+
codec_by_folder: @codec_by_folder
|
56
|
+
})
|
57
|
+
expect( subject.process(record, logstash_event_queue) ).to be true
|
51
58
|
$stderr.puts "method #{queue.to_s}"
|
52
59
|
end
|
53
60
|
|
54
|
-
it '.process_local_log => process compressed log file and verfied logstash event queue with the correct number of events' do
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
|
61
|
+
#it '.process_local_log => process compressed log file and verfied logstash event queue with the correct number of events' do
|
62
|
+
# expect( queue.size ).to eq(38)
|
63
|
+
# expect( queue.clear).to be_empty
|
64
|
+
#end
|
59
65
|
end
|
60
66
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-s3-sns-sqs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christian Herweg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|