logstash-input-sqs_s3 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/logstash/inputs/sqs_s3.rb +29 -6
- data/lib/logstash/inputs/sqs_s3.rb.save +226 -0
- data/logstash-input-sqs_s3.gemspec +1 -1
- metadata +2 -3
- data/spec/inputs/s3sqs_spec.rb +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2c1d88096f1454205e11131c1a7ea980190c45f8d49cd2f99fc8ff3f7652819
|
4
|
+
data.tar.gz: 5a9a5b9258cedd4d5b187b08faec6f97db84c8b68762d76da97e828aad9ba9c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a8d547ff18844c52c89480623c56dcbd787906fedff1fd2fecb09c5e3d3a9ae034ae7026b03af20a9a52b4451228b1f102524bfecaa89390862709cf1b73744d
|
7
|
+
data.tar.gz: 4f0ca6ab34c61efe0a1e650e48b44b84e16138a82ae92a3585c45936aecd6c059170d392bf4a7667398de49e5bd1e913b67b43f8771a5fede44ec2c4cfc219dd
|
@@ -6,6 +6,12 @@ require "logstash/timestamp"
|
|
6
6
|
require "logstash/plugin_mixins/aws_config"
|
7
7
|
require "logstash/errors"
|
8
8
|
|
9
|
+
# Forcibly load all modules marked to be lazily loaded.
|
10
|
+
#
|
11
|
+
# It is recommended that this is called prior to launching threads. See
|
12
|
+
# https://aws.amazon.com/blogs/developer/threading-with-the-aws-sdk-for-ruby/.
|
13
|
+
Aws.eager_autoload!
|
14
|
+
|
9
15
|
# Get logs from AWS s3 buckets as issued by an object-created event via sqs.
|
10
16
|
#
|
11
17
|
# This plugin is based on the logstash-input-sqs plugin but doesn't log the sqs event itself.
|
@@ -79,6 +85,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
79
85
|
MAX_TIME_BEFORE_GIVING_UP = 60
|
80
86
|
EVENT_SOURCE = 'aws:s3'
|
81
87
|
EVENT_TYPE = 'ObjectCreated'
|
88
|
+
MAX_MESSAGES_TO_FETCH = 10 # Between 1-10 in the AWS-SDK doc
|
89
|
+
SENT_TIMESTAMP = "SentTimestamp"
|
90
|
+
SQS_ATTRIBUTES = [SENT_TIMESTAMP]
|
82
91
|
|
83
92
|
config_name "sqs_s3"
|
84
93
|
|
@@ -87,6 +96,15 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
87
96
|
# Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
|
88
97
|
config :queue, :validate => :string, :required => true
|
89
98
|
|
99
|
+
# Name of the event field in which to store the SQS message ID
|
100
|
+
config :id_field, :validate => :string
|
101
|
+
|
102
|
+
# Name of the event field in which to store the SQS message Sent Timestamp
|
103
|
+
config :sent_timestamp_field, :validate => :string
|
104
|
+
|
105
|
+
# Max messages to fetch, default is 10
|
106
|
+
config :max_messages_to_fetch, :validate => :number, :default => MAX_MESSAGES_TO_FETCH
|
107
|
+
|
90
108
|
attr_reader :poller
|
91
109
|
attr_reader :s3
|
92
110
|
|
@@ -108,10 +126,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
108
126
|
|
109
127
|
def polling_options
|
110
128
|
{
|
111
|
-
#
|
112
|
-
|
113
|
-
|
114
|
-
:max_number_of_messages => 1,
|
129
|
+
# the number of messages to fetch in a single api call
|
130
|
+
:max_number_of_messages => MAX_MESSAGES_TO_FETCH,
|
131
|
+
:attribute_names => SQS_ATTRIBUTES,
|
115
132
|
# we will use the queue's setting, a good value is 10 seconds
|
116
133
|
# (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
|
117
134
|
:wait_time_seconds => nil,
|
@@ -161,6 +178,8 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
161
178
|
|
162
179
|
event.set('[@metadata][s3_bucket_name]', record['s3']['bucket']['name'])
|
163
180
|
event.set('[@metadata][s3_object_key]', record['s3']['object']['key'])
|
181
|
+
event.set(@id_field, message.message_id) if @id_field
|
182
|
+
event.set(@sent_timestamp_field, convert_epoch_to_timestamp(message.attributes[SENT_TIMESTAMP])) if @sent_timestamp_field
|
164
183
|
|
165
184
|
queue << event
|
166
185
|
end
|
@@ -191,8 +210,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
191
210
|
end
|
192
211
|
# poll a message and process it
|
193
212
|
run_with_backoff do
|
194
|
-
poller.poll(polling_options) do |
|
195
|
-
|
213
|
+
poller.poll(polling_options) do |messages|
|
214
|
+
messages.each do |message|
|
215
|
+
handle_message(message, queue)
|
196
216
|
end
|
197
217
|
end
|
198
218
|
end
|
@@ -217,4 +237,7 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
217
237
|
end
|
218
238
|
end
|
219
239
|
|
240
|
+
def convert_epoch_to_timestamp(time)
|
241
|
+
LogStash::Timestamp.at(time.to_i / 1000)
|
242
|
+
end
|
220
243
|
end # class
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require "logstash/inputs/threadable"
|
4
|
+
require "logstash/namespace"
|
5
|
+
require "logstash/timestamp"
|
6
|
+
require "logstash/plugin_mixins/aws_config"
|
7
|
+
require "logstash/errors"
|
8
|
+
|
9
|
+
# Forcibly load all modules marked to be lazily loaded.
|
10
|
+
#
|
11
|
+
# It is recommended that this is called prior to launching threads. See
|
12
|
+
# https://aws.amazon.com/blogs/developer/threading-with-the-aws-sdk-for-ruby/.
|
13
|
+
Aws.eager_autoload!
|
14
|
+
|
15
|
+
# Get logs from AWS s3 buckets as issued by an object-created event via sqs.
|
16
|
+
#
|
17
|
+
# This plugin is based on the logstash-input-sqs plugin but doesn't log the sqs event itself.
|
18
|
+
# Instead it assumes, that the event is an s3 object-created event and will then download
|
19
|
+
# and process the given file.
|
20
|
+
#
|
21
|
+
# Some issues of logstash-input-sqs, like logstash not shutting down properly, have been
|
22
|
+
# fixed for this plugin.
|
23
|
+
#
|
24
|
+
# In contrast to logstash-input-sqs this plugin uses the "Receive Message Wait Time"
|
25
|
+
# configured for the sqs queue in question, a good value will be something like 10 seconds
|
26
|
+
# to ensure a reasonable shutdown time of logstash.
|
27
|
+
# Also use a "Default Visibility Timeout" that is high enough for log files to be downloaded
|
28
|
+
# and processed (I think a good value should be 5-10 minutes for most use cases), the plugin will
|
29
|
+
# avoid removing the event from the queue if the associated log file couldn't be correctly
|
30
|
+
# passed to the processing level of logstash (e.g. downloaded content size doesn't match sqs event).
|
31
|
+
#
|
32
|
+
# This plugin is meant for high availability setups, in contrast to logstash-input-s3 you can safely
|
33
|
+
# use multiple logstash nodes, since the usage of sqs will ensure that each logfile is processed
|
34
|
+
# only once and no file will get lost on node failure or downscaling for auto-scaling groups.
|
35
|
+
# (You should use a "Message Retention Period" >= 4 days for your sqs to ensure you can survive
|
36
|
+
# a weekend of faulty log file processing)
|
37
|
+
# The plugin will not delete objects from s3 buckets, so make sure to have a reasonable "Lifecycle"
|
38
|
+
# configured for your buckets, which should keep the files at least "Message Retention Period" days.
|
39
|
+
#
|
40
|
+
# A typical setup will contain some s3 buckets containing elb, cloudtrail or other log files.
|
41
|
+
# These will be configured to send object-created events to a sqs queue, which will be configured
|
42
|
+
# as the source queue for this plugin.
|
43
|
+
# (The plugin supports gzipped content if it is marked with "contend-encoding: gzip" as it is the
|
44
|
+
# case for cloudtrail logs)
|
45
|
+
#
|
46
|
+
# The logstash node therefore must have sqs permissions + the permissions to download objects
|
47
|
+
# from the s3 buckets that send events to the queue.
|
48
|
+
# (If logstash nodes are running on EC2 you should use a ServerRole to provide permissions)
|
49
|
+
# [source,json]
|
50
|
+
# {
|
51
|
+
# "Version": "2012-10-17",
|
52
|
+
# "Statement": [
|
53
|
+
# {
|
54
|
+
# "Effect": "Allow",
|
55
|
+
# "Action": [
|
56
|
+
# "sqs:Get*",
|
57
|
+
# "sqs:List*",
|
58
|
+
# "sqs:ReceiveMessage",
|
59
|
+
# "sqs:ChangeMessageVisibility*",
|
60
|
+
# "sqs:DeleteMessage*"
|
61
|
+
# ],
|
62
|
+
# "Resource": [
|
63
|
+
# "arn:aws:sqs:us-east-1:123456789012:my-elb-log-queue"
|
64
|
+
# ]
|
65
|
+
# },
|
66
|
+
# {
|
67
|
+
# "Effect": "Allow",
|
68
|
+
# "Action": [
|
69
|
+
# "s3:Get*",
|
70
|
+
# "s3:List*"
|
71
|
+
# ],
|
72
|
+
# "Resource": [
|
73
|
+
# "arn:aws:s3:::my-elb-logs",
|
74
|
+
# "arn:aws:s3:::my-elb-logs/*"
|
75
|
+
# ]
|
76
|
+
# }
|
77
|
+
# ]
|
78
|
+
# }
|
79
|
+
#
|
80
|
+
class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
81
|
+
include LogStash::PluginMixins::AwsConfig::V2
|
82
|
+
|
83
|
+
BACKOFF_SLEEP_TIME = 1
|
84
|
+
BACKOFF_FACTOR = 2
|
85
|
+
MAX_TIME_BEFORE_GIVING_UP = 60
|
86
|
+
EVENT_SOURCE = 'aws:s3'
|
87
|
+
EVENT_TYPE = 'ObjectCreated'
|
88
|
+
|
89
|
+
config_name "sqs_s3"
|
90
|
+
|
91
|
+
default :codec, "plain"
|
92
|
+
|
93
|
+
# Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
|
94
|
+
config :queue, :validate => :string, :required => true
|
95
|
+
|
96
|
+
attr_reader :poller
|
97
|
+
attr_reader :s3
|
98
|
+
|
99
|
+
def register
|
100
|
+
require "aws-sdk"
|
101
|
+
@logger.info("Registering SQS input", :queue => @queue)
|
102
|
+
setup_queue
|
103
|
+
end
|
104
|
+
|
105
|
+
def setup_queue
|
106
|
+
aws_sqs_client = Aws::SQS::Client.new(aws_options_hash)
|
107
|
+
queue_url = aws_sqs_client.get_queue_url(:queue_name => @queue)[:queue_url]
|
108
|
+
@poller = Aws::SQS::QueuePoller.new(queue_url, :client => aws_sqs_client)
|
109
|
+
@s3 = Aws::S3::Client.new(aws_options_hash)
|
110
|
+
rescue Aws::SQS::Errors::ServiceError => e
|
111
|
+
@logger.error("Cannot establish connection to Amazon SQS", :error => e)
|
112
|
+
raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
|
113
|
+
end
|
114
|
+
|
115
|
+
def polling_options
|
116
|
+
{
|
117
|
+
# we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
|
118
|
+
# (we will throw :skip_delete if download size isn't correct to process the event again later
|
119
|
+
# -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
|
120
|
+
:max_number_of_messages => 1,
|
121
|
+
# we will use the queue's setting, a good value is 10 seconds
|
122
|
+
# (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
|
123
|
+
:wait_time_seconds => nil,
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
def handle_message(message, queue)
|
128
|
+
hash = JSON.parse message.body
|
129
|
+
# there may be test events sent from the s3 bucket which won't contain a Records array,
|
130
|
+
# we will skip those events and remove them from queue
|
131
|
+
if hash['Records'] then
|
132
|
+
# typically there will be only 1 record per event, but since it is an array we will
|
133
|
+
# treat it as if there could be more records
|
134
|
+
hash['Records'].each do |record|
|
135
|
+
# in case there are any events with Records that aren't s3 object-created events and can't therefore be
|
136
|
+
# processed by this plugin, we will skip them and remove them from queue
|
137
|
+
if record['eventSource'] == EVENT_SOURCE and record['eventName'].start_with?(EVENT_TYPE) then
|
138
|
+
# try download and :skip_delete if it fails
|
139
|
+
begin
|
140
|
+
response = @s3.get_object(
|
141
|
+
bucket: record['s3']['bucket']['name'],
|
142
|
+
key: record['s3']['object']['key']
|
143
|
+
)
|
144
|
+
rescue => e
|
145
|
+
@logger.warn("issuing :skip_delete on failed download", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
|
146
|
+
throw :skip_delete
|
147
|
+
end
|
148
|
+
# verify downloaded content size
|
149
|
+
if response.content_length == record['s3']['object']['size'] then
|
150
|
+
body = response.body
|
151
|
+
# if necessary unzip
|
152
|
+
if response.content_encoding == "gzip" or record['s3']['object']['key'].end_with?(".gz") then
|
153
|
+
begin
|
154
|
+
temp = Zlib::GzipReader.new(body)
|
155
|
+
rescue => e
|
156
|
+
@logger.warn("content is marked to be gzipped but can't unzip it, assuming plain text", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
|
157
|
+
temp = body
|
158
|
+
end
|
159
|
+
body = temp
|
160
|
+
end
|
161
|
+
# process the plain text content
|
162
|
+
begin
|
163
|
+
lines = body.read.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370").split(/\n/)
|
164
|
+
lines.each do |line|
|
165
|
+
@codec.decode(line) do |event|
|
166
|
+
decorate(event)
|
167
|
+
|
168
|
+
event.set('[@metadata][s3_bucket_name]', record['s3']['bucket']['name'])
|
169
|
+
event.set('[@metadata][s3_object_key]', record['s3']['object']['key'])
|
170
|
+
|
171
|
+
queue << event
|
172
|
+
end
|
173
|
+
end
|
174
|
+
rescue => e
|
175
|
+
@logger.warn("issuing :skip_delete on failed plain text processing", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
|
176
|
+
throw :skip_delete
|
177
|
+
end
|
178
|
+
# otherwise try again later
|
179
|
+
else
|
180
|
+
@logger.warn("issuing :skip_delete on wrong download content size", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'],
|
181
|
+
:download_size => response.content_length, :expected => record['s3']['object']['size'])
|
182
|
+
throw :skip_delete
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def run(queue)
|
190
|
+
# ensure we can stop logstash correctly
|
191
|
+
poller.before_request do |stats|
|
192
|
+
if stop? then
|
193
|
+
@logger.warn("issuing :stop_polling on stop?", :queue => @queue)
|
194
|
+
# this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
|
195
|
+
throw :stop_polling
|
196
|
+
end
|
197
|
+
end
|
198
|
+
# poll a message and process it
|
199
|
+
run_with_backoff do
|
200
|
+
poller.poll(polling_options) do |message|
|
201
|
+
handle_message(message, queue)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
private
|
207
|
+
# Runs an AWS request inside a Ruby block with an exponential backoff in case
|
208
|
+
# we experience a ServiceError.
|
209
|
+
#
|
210
|
+
# @param [Integer] max_time maximum amount of time to sleep before giving up.
|
211
|
+
# @param [Integer] sleep_time the initial amount of time to sleep before retrying.
|
212
|
+
# @param [Block] block Ruby code block to execute.
|
213
|
+
def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME, &block)
|
214
|
+
next_sleep = sleep_time
|
215
|
+
begin
|
216
|
+
block.call
|
217
|
+
next_sleep = sleep_time
|
218
|
+
rescue Aws::SQS::Errors::ServiceError => e
|
219
|
+
@logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
|
220
|
+
sleep(next_sleep)
|
221
|
+
next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
|
222
|
+
retry
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
end # class
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-sqs_s3'
|
3
|
-
s.version = '1.1.
|
3
|
+
s.version = '1.1.2'
|
4
4
|
s.licenses = ['Apache License (2.0)']
|
5
5
|
s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sqs."
|
6
6
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program. Full credit goes to Heiko Finzel. Republishing this gem to support Logstash 5."
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-sqs_s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Heiko Finzel
|
@@ -88,8 +88,8 @@ files:
|
|
88
88
|
- NOTICE.TXT
|
89
89
|
- README.md
|
90
90
|
- lib/logstash/inputs/sqs_s3.rb
|
91
|
+
- lib/logstash/inputs/sqs_s3.rb.save
|
91
92
|
- logstash-input-sqs_s3.gemspec
|
92
|
-
- spec/inputs/s3sqs_spec.rb
|
93
93
|
- spec/inputs/sqs_s3_spec.rb
|
94
94
|
- spec/spec_helper.rb
|
95
95
|
homepage: https://www.boreus.de
|
@@ -119,6 +119,5 @@ signing_key:
|
|
119
119
|
specification_version: 4
|
120
120
|
summary: Get logs from AWS s3 buckets as issued by an object-created event via sqs.
|
121
121
|
test_files:
|
122
|
-
- spec/inputs/s3sqs_spec.rb
|
123
122
|
- spec/inputs/sqs_s3_spec.rb
|
124
123
|
- spec/spec_helper.rb
|