logstash-input-sqs_s3 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/logstash/inputs/sqs_s3.rb +29 -6
- data/lib/logstash/inputs/sqs_s3.rb.save +226 -0
- data/logstash-input-sqs_s3.gemspec +1 -1
- metadata +2 -3
- data/spec/inputs/s3sqs_spec.rb +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2c1d88096f1454205e11131c1a7ea980190c45f8d49cd2f99fc8ff3f7652819
|
4
|
+
data.tar.gz: 5a9a5b9258cedd4d5b187b08faec6f97db84c8b68762d76da97e828aad9ba9c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a8d547ff18844c52c89480623c56dcbd787906fedff1fd2fecb09c5e3d3a9ae034ae7026b03af20a9a52b4451228b1f102524bfecaa89390862709cf1b73744d
|
7
|
+
data.tar.gz: 4f0ca6ab34c61efe0a1e650e48b44b84e16138a82ae92a3585c45936aecd6c059170d392bf4a7667398de49e5bd1e913b67b43f8771a5fede44ec2c4cfc219dd
|
@@ -6,6 +6,12 @@ require "logstash/timestamp"
|
|
6
6
|
require "logstash/plugin_mixins/aws_config"
|
7
7
|
require "logstash/errors"
|
8
8
|
|
9
|
+
# Forcibly load all modules marked to be lazily loaded.
|
10
|
+
#
|
11
|
+
# It is recommended that this is called prior to launching threads. See
|
12
|
+
# https://aws.amazon.com/blogs/developer/threading-with-the-aws-sdk-for-ruby/.
|
13
|
+
Aws.eager_autoload!
|
14
|
+
|
9
15
|
# Get logs from AWS s3 buckets as issued by an object-created event via sqs.
|
10
16
|
#
|
11
17
|
# This plugin is based on the logstash-input-sqs plugin but doesn't log the sqs event itself.
|
@@ -79,6 +85,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
79
85
|
MAX_TIME_BEFORE_GIVING_UP = 60
|
80
86
|
EVENT_SOURCE = 'aws:s3'
|
81
87
|
EVENT_TYPE = 'ObjectCreated'
|
88
|
+
MAX_MESSAGES_TO_FETCH = 10 # Between 1-10 in the AWS-SDK doc
|
89
|
+
SENT_TIMESTAMP = "SentTimestamp"
|
90
|
+
SQS_ATTRIBUTES = [SENT_TIMESTAMP]
|
82
91
|
|
83
92
|
config_name "sqs_s3"
|
84
93
|
|
@@ -87,6 +96,15 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
87
96
|
# Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
|
88
97
|
config :queue, :validate => :string, :required => true
|
89
98
|
|
99
|
+
# Name of the event field in which to store the SQS message ID
|
100
|
+
config :id_field, :validate => :string
|
101
|
+
|
102
|
+
# Name of the event field in which to store the SQS message Sent Timestamp
|
103
|
+
config :sent_timestamp_field, :validate => :string
|
104
|
+
|
105
|
+
# Max messages to fetch, default is 10
|
106
|
+
config :max_messages_to_fetch, :validate => :number, :default => MAX_MESSAGES_TO_FETCH
|
107
|
+
|
90
108
|
attr_reader :poller
|
91
109
|
attr_reader :s3
|
92
110
|
|
@@ -108,10 +126,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
108
126
|
|
109
127
|
def polling_options
|
110
128
|
{
|
111
|
-
#
|
112
|
-
|
113
|
-
|
114
|
-
:max_number_of_messages => 1,
|
129
|
+
# the number of messages to fetch in a single api call
|
130
|
+
:max_number_of_messages => MAX_MESSAGES_TO_FETCH,
|
131
|
+
:attribute_names => SQS_ATTRIBUTES,
|
115
132
|
# we will use the queue's setting, a good value is 10 seconds
|
116
133
|
# (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
|
117
134
|
:wait_time_seconds => nil,
|
@@ -161,6 +178,8 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
161
178
|
|
162
179
|
event.set('[@metadata][s3_bucket_name]', record['s3']['bucket']['name'])
|
163
180
|
event.set('[@metadata][s3_object_key]', record['s3']['object']['key'])
|
181
|
+
event.set(@id_field, message.message_id) if @id_field
|
182
|
+
event.set(@sent_timestamp_field, convert_epoch_to_timestamp(message.attributes[SENT_TIMESTAMP])) if @sent_timestamp_field
|
164
183
|
|
165
184
|
queue << event
|
166
185
|
end
|
@@ -191,8 +210,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
191
210
|
end
|
192
211
|
# poll a message and process it
|
193
212
|
run_with_backoff do
|
194
|
-
poller.poll(polling_options) do |
|
195
|
-
|
213
|
+
poller.poll(polling_options) do |messages|
|
214
|
+
messages.each do |message|
|
215
|
+
handle_message(message, queue)
|
196
216
|
end
|
197
217
|
end
|
198
218
|
end
|
@@ -217,4 +237,7 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
|
217
237
|
end
|
218
238
|
end
|
219
239
|
|
240
|
+
def convert_epoch_to_timestamp(time)
|
241
|
+
LogStash::Timestamp.at(time.to_i / 1000)
|
242
|
+
end
|
220
243
|
end # class
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require "logstash/inputs/threadable"
|
4
|
+
require "logstash/namespace"
|
5
|
+
require "logstash/timestamp"
|
6
|
+
require "logstash/plugin_mixins/aws_config"
|
7
|
+
require "logstash/errors"
|
8
|
+
|
9
|
+
# Forcibly load all modules marked to be lazily loaded.
|
10
|
+
#
|
11
|
+
# It is recommended that this is called prior to launching threads. See
|
12
|
+
# https://aws.amazon.com/blogs/developer/threading-with-the-aws-sdk-for-ruby/.
|
13
|
+
Aws.eager_autoload!
|
14
|
+
|
15
|
+
# Get logs from AWS s3 buckets as issued by an object-created event via sqs.
|
16
|
+
#
|
17
|
+
# This plugin is based on the logstash-input-sqs plugin but doesn't log the sqs event itself.
|
18
|
+
# Instead it assumes, that the event is an s3 object-created event and will then download
|
19
|
+
# and process the given file.
|
20
|
+
#
|
21
|
+
# Some issues of logstash-input-sqs, like logstash not shutting down properly, have been
|
22
|
+
# fixed for this plugin.
|
23
|
+
#
|
24
|
+
# In contrast to logstash-input-sqs this plugin uses the "Receive Message Wait Time"
|
25
|
+
# configured for the sqs queue in question, a good value will be something like 10 seconds
|
26
|
+
# to ensure a reasonable shutdown time of logstash.
|
27
|
+
# Also use a "Default Visibility Timeout" that is high enough for log files to be downloaded
|
28
|
+
# and processed (I think a good value should be 5-10 minutes for most use cases), the plugin will
|
29
|
+
# avoid removing the event from the queue if the associated log file couldn't be correctly
|
30
|
+
# passed to the processing level of logstash (e.g. downloaded content size doesn't match sqs event).
|
31
|
+
#
|
32
|
+
# This plugin is meant for high availability setups, in contrast to logstash-input-s3 you can safely
|
33
|
+
# use multiple logstash nodes, since the usage of sqs will ensure that each logfile is processed
|
34
|
+
# only once and no file will get lost on node failure or downscaling for auto-scaling groups.
|
35
|
+
# (You should use a "Message Retention Period" >= 4 days for your sqs to ensure you can survive
|
36
|
+
# a weekend of faulty log file processing)
|
37
|
+
# The plugin will not delete objects from s3 buckets, so make sure to have a reasonable "Lifecycle"
|
38
|
+
# configured for your buckets, which should keep the files at least "Message Retention Period" days.
|
39
|
+
#
|
40
|
+
# A typical setup will contain some s3 buckets containing elb, cloudtrail or other log files.
|
41
|
+
# These will be configured to send object-created events to a sqs queue, which will be configured
|
42
|
+
# as the source queue for this plugin.
|
43
|
+
# (The plugin supports gzipped content if it is marked with "contend-encoding: gzip" as it is the
|
44
|
+
# case for cloudtrail logs)
|
45
|
+
#
|
46
|
+
# The logstash node therefore must have sqs permissions + the permissions to download objects
|
47
|
+
# from the s3 buckets that send events to the queue.
|
48
|
+
# (If logstash nodes are running on EC2 you should use a ServerRole to provide permissions)
|
49
|
+
# [source,json]
|
50
|
+
# {
|
51
|
+
# "Version": "2012-10-17",
|
52
|
+
# "Statement": [
|
53
|
+
# {
|
54
|
+
# "Effect": "Allow",
|
55
|
+
# "Action": [
|
56
|
+
# "sqs:Get*",
|
57
|
+
# "sqs:List*",
|
58
|
+
# "sqs:ReceiveMessage",
|
59
|
+
# "sqs:ChangeMessageVisibility*",
|
60
|
+
# "sqs:DeleteMessage*"
|
61
|
+
# ],
|
62
|
+
# "Resource": [
|
63
|
+
# "arn:aws:sqs:us-east-1:123456789012:my-elb-log-queue"
|
64
|
+
# ]
|
65
|
+
# },
|
66
|
+
# {
|
67
|
+
# "Effect": "Allow",
|
68
|
+
# "Action": [
|
69
|
+
# "s3:Get*",
|
70
|
+
# "s3:List*"
|
71
|
+
# ],
|
72
|
+
# "Resource": [
|
73
|
+
# "arn:aws:s3:::my-elb-logs",
|
74
|
+
# "arn:aws:s3:::my-elb-logs/*"
|
75
|
+
# ]
|
76
|
+
# }
|
77
|
+
# ]
|
78
|
+
# }
|
79
|
+
#
|
80
|
+
class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
|
81
|
+
include LogStash::PluginMixins::AwsConfig::V2
|
82
|
+
|
83
|
+
BACKOFF_SLEEP_TIME = 1
|
84
|
+
BACKOFF_FACTOR = 2
|
85
|
+
MAX_TIME_BEFORE_GIVING_UP = 60
|
86
|
+
EVENT_SOURCE = 'aws:s3'
|
87
|
+
EVENT_TYPE = 'ObjectCreated'
|
88
|
+
|
89
|
+
config_name "sqs_s3"
|
90
|
+
|
91
|
+
default :codec, "plain"
|
92
|
+
|
93
|
+
# Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
|
94
|
+
config :queue, :validate => :string, :required => true
|
95
|
+
|
96
|
+
attr_reader :poller
|
97
|
+
attr_reader :s3
|
98
|
+
|
99
|
+
def register
|
100
|
+
require "aws-sdk"
|
101
|
+
@logger.info("Registering SQS input", :queue => @queue)
|
102
|
+
setup_queue
|
103
|
+
end
|
104
|
+
|
105
|
+
def setup_queue
|
106
|
+
aws_sqs_client = Aws::SQS::Client.new(aws_options_hash)
|
107
|
+
queue_url = aws_sqs_client.get_queue_url(:queue_name => @queue)[:queue_url]
|
108
|
+
@poller = Aws::SQS::QueuePoller.new(queue_url, :client => aws_sqs_client)
|
109
|
+
@s3 = Aws::S3::Client.new(aws_options_hash)
|
110
|
+
rescue Aws::SQS::Errors::ServiceError => e
|
111
|
+
@logger.error("Cannot establish connection to Amazon SQS", :error => e)
|
112
|
+
raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
|
113
|
+
end
|
114
|
+
|
115
|
+
def polling_options
|
116
|
+
{
|
117
|
+
# we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
|
118
|
+
# (we will throw :skip_delete if download size isn't correct to process the event again later
|
119
|
+
# -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
|
120
|
+
:max_number_of_messages => 1,
|
121
|
+
# we will use the queue's setting, a good value is 10 seconds
|
122
|
+
# (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
|
123
|
+
:wait_time_seconds => nil,
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
def handle_message(message, queue)
|
128
|
+
hash = JSON.parse message.body
|
129
|
+
# there may be test events sent from the s3 bucket which won't contain a Records array,
|
130
|
+
# we will skip those events and remove them from queue
|
131
|
+
if hash['Records'] then
|
132
|
+
# typically there will be only 1 record per event, but since it is an array we will
|
133
|
+
# treat it as if there could be more records
|
134
|
+
hash['Records'].each do |record|
|
135
|
+
# in case there are any events with Records that aren't s3 object-created events and can't therefore be
|
136
|
+
# processed by this plugin, we will skip them and remove them from queue
|
137
|
+
if record['eventSource'] == EVENT_SOURCE and record['eventName'].start_with?(EVENT_TYPE) then
|
138
|
+
# try download and :skip_delete if it fails
|
139
|
+
begin
|
140
|
+
response = @s3.get_object(
|
141
|
+
bucket: record['s3']['bucket']['name'],
|
142
|
+
key: record['s3']['object']['key']
|
143
|
+
)
|
144
|
+
rescue => e
|
145
|
+
@logger.warn("issuing :skip_delete on failed download", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
|
146
|
+
throw :skip_delete
|
147
|
+
end
|
148
|
+
# verify downloaded content size
|
149
|
+
if response.content_length == record['s3']['object']['size'] then
|
150
|
+
body = response.body
|
151
|
+
# if necessary unzip
|
152
|
+
if response.content_encoding == "gzip" or record['s3']['object']['key'].end_with?(".gz") then
|
153
|
+
begin
|
154
|
+
temp = Zlib::GzipReader.new(body)
|
155
|
+
rescue => e
|
156
|
+
@logger.warn("content is marked to be gzipped but can't unzip it, assuming plain text", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
|
157
|
+
temp = body
|
158
|
+
end
|
159
|
+
body = temp
|
160
|
+
end
|
161
|
+
# process the plain text content
|
162
|
+
begin
|
163
|
+
lines = body.read.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370").split(/\n/)
|
164
|
+
lines.each do |line|
|
165
|
+
@codec.decode(line) do |event|
|
166
|
+
decorate(event)
|
167
|
+
|
168
|
+
event.set('[@metadata][s3_bucket_name]', record['s3']['bucket']['name'])
|
169
|
+
event.set('[@metadata][s3_object_key]', record['s3']['object']['key'])
|
170
|
+
|
171
|
+
queue << event
|
172
|
+
end
|
173
|
+
end
|
174
|
+
rescue => e
|
175
|
+
@logger.warn("issuing :skip_delete on failed plain text processing", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
|
176
|
+
throw :skip_delete
|
177
|
+
end
|
178
|
+
# otherwise try again later
|
179
|
+
else
|
180
|
+
@logger.warn("issuing :skip_delete on wrong download content size", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'],
|
181
|
+
:download_size => response.content_length, :expected => record['s3']['object']['size'])
|
182
|
+
throw :skip_delete
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def run(queue)
|
190
|
+
# ensure we can stop logstash correctly
|
191
|
+
poller.before_request do |stats|
|
192
|
+
if stop? then
|
193
|
+
@logger.warn("issuing :stop_polling on stop?", :queue => @queue)
|
194
|
+
# this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
|
195
|
+
throw :stop_polling
|
196
|
+
end
|
197
|
+
end
|
198
|
+
# poll a message and process it
|
199
|
+
run_with_backoff do
|
200
|
+
poller.poll(polling_options) do |message|
|
201
|
+
handle_message(message, queue)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
private
|
207
|
+
# Runs an AWS request inside a Ruby block with an exponential backoff in case
|
208
|
+
# we experience a ServiceError.
|
209
|
+
#
|
210
|
+
# @param [Integer] max_time maximum amount of time to sleep before giving up.
|
211
|
+
# @param [Integer] sleep_time the initial amount of time to sleep before retrying.
|
212
|
+
# @param [Block] block Ruby code block to execute.
|
213
|
+
def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME, &block)
|
214
|
+
next_sleep = sleep_time
|
215
|
+
begin
|
216
|
+
block.call
|
217
|
+
next_sleep = sleep_time
|
218
|
+
rescue Aws::SQS::Errors::ServiceError => e
|
219
|
+
@logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
|
220
|
+
sleep(next_sleep)
|
221
|
+
next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
|
222
|
+
retry
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
end # class
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-input-sqs_s3'
|
3
|
-
s.version = '1.1.
|
3
|
+
s.version = '1.1.2'
|
4
4
|
s.licenses = ['Apache License (2.0)']
|
5
5
|
s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sqs."
|
6
6
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program. Full credit goes to Heiko Finzel. Republishing this gem to support Logstash 5."
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-sqs_s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Heiko Finzel
|
@@ -88,8 +88,8 @@ files:
|
|
88
88
|
- NOTICE.TXT
|
89
89
|
- README.md
|
90
90
|
- lib/logstash/inputs/sqs_s3.rb
|
91
|
+
- lib/logstash/inputs/sqs_s3.rb.save
|
91
92
|
- logstash-input-sqs_s3.gemspec
|
92
|
-
- spec/inputs/s3sqs_spec.rb
|
93
93
|
- spec/inputs/sqs_s3_spec.rb
|
94
94
|
- spec/spec_helper.rb
|
95
95
|
homepage: https://www.boreus.de
|
@@ -119,6 +119,5 @@ signing_key:
|
|
119
119
|
specification_version: 4
|
120
120
|
summary: Get logs from AWS s3 buckets as issued by an object-created event via sqs.
|
121
121
|
test_files:
|
122
|
-
- spec/inputs/s3sqs_spec.rb
|
123
122
|
- spec/inputs/sqs_s3_spec.rb
|
124
123
|
- spec/spec_helper.rb
|