logstash-input-sqs_s3 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92aedc8907953b85a96cfba10afe5c5a0b498a85b822e97d799582fb001d1e39
4
- data.tar.gz: 1c485cde83ad7d59d931663c4331ed40742328569de7498efc547f769e8b502f
3
+ metadata.gz: b2c1d88096f1454205e11131c1a7ea980190c45f8d49cd2f99fc8ff3f7652819
4
+ data.tar.gz: 5a9a5b9258cedd4d5b187b08faec6f97db84c8b68762d76da97e828aad9ba9c8
5
5
  SHA512:
6
- metadata.gz: c9eef9ed7153832d3e534ec71763a77de4275133882da1f7ccd044dde470c9caeaecc5f667f9b08d6932f28d1e91bb7901e2db8bc9a893522f244e9c76c40761
7
- data.tar.gz: 61ebd9633618d97867684135361eeaeb6288f87100fb2654b8cb3bfc8813b8bef1be7a1798f0349ec6c31671c92cd6d8e18f651d8bce9d5cd23e24e838c85d33
6
+ metadata.gz: a8d547ff18844c52c89480623c56dcbd787906fedff1fd2fecb09c5e3d3a9ae034ae7026b03af20a9a52b4451228b1f102524bfecaa89390862709cf1b73744d
7
+ data.tar.gz: 4f0ca6ab34c61efe0a1e650e48b44b84e16138a82ae92a3585c45936aecd6c059170d392bf4a7667398de49e5bd1e913b67b43f8771a5fede44ec2c4cfc219dd
@@ -6,6 +6,12 @@ require "logstash/timestamp"
6
6
  require "logstash/plugin_mixins/aws_config"
7
7
  require "logstash/errors"
8
8
 
9
+ # Forcibly load all modules marked to be lazily loaded.
10
+ #
11
+ # It is recommended that this is called prior to launching threads. See
12
+ # https://aws.amazon.com/blogs/developer/threading-with-the-aws-sdk-for-ruby/.
13
+ Aws.eager_autoload!
14
+
9
15
  # Get logs from AWS s3 buckets as issued by an object-created event via sqs.
10
16
  #
11
17
  # This plugin is based on the logstash-input-sqs plugin but doesn't log the sqs event itself.
@@ -79,6 +85,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
79
85
  MAX_TIME_BEFORE_GIVING_UP = 60
80
86
  EVENT_SOURCE = 'aws:s3'
81
87
  EVENT_TYPE = 'ObjectCreated'
88
+ MAX_MESSAGES_TO_FETCH = 10 # Between 1-10 in the AWS-SDK doc
89
+ SENT_TIMESTAMP = "SentTimestamp"
90
+ SQS_ATTRIBUTES = [SENT_TIMESTAMP]
82
91
 
83
92
  config_name "sqs_s3"
84
93
 
@@ -87,6 +96,15 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
87
96
  # Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
88
97
  config :queue, :validate => :string, :required => true
89
98
 
99
+ # Name of the event field in which to store the SQS message ID
100
+ config :id_field, :validate => :string
101
+
102
+ # Name of the event field in which to store the SQS message Sent Timestamp
103
+ config :sent_timestamp_field, :validate => :string
104
+
105
+ # Max messages to fetch, default is 10
106
+ config :max_messages_to_fetch, :validate => :number, :default => MAX_MESSAGES_TO_FETCH
107
+
90
108
  attr_reader :poller
91
109
  attr_reader :s3
92
110
 
@@ -108,10 +126,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
108
126
 
109
127
  def polling_options
110
128
  {
111
- # we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
112
- # (we will throw :skip_delete if download size isn't correct to process the event again later
113
- # -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
114
- :max_number_of_messages => 1,
129
+ # the number of messages to fetch in a single api call
130
+ :max_number_of_messages => MAX_MESSAGES_TO_FETCH,
131
+ :attribute_names => SQS_ATTRIBUTES,
115
132
  # we will use the queue's setting, a good value is 10 seconds
116
133
  # (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
117
134
  :wait_time_seconds => nil,
@@ -161,6 +178,8 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
161
178
 
162
179
  event.set('[@metadata][s3_bucket_name]', record['s3']['bucket']['name'])
163
180
  event.set('[@metadata][s3_object_key]', record['s3']['object']['key'])
181
+ event.set(@id_field, message.message_id) if @id_field
182
+ event.set(@sent_timestamp_field, convert_epoch_to_timestamp(message.attributes[SENT_TIMESTAMP])) if @sent_timestamp_field
164
183
 
165
184
  queue << event
166
185
  end
@@ -191,8 +210,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
191
210
  end
192
211
  # poll a message and process it
193
212
  run_with_backoff do
194
- poller.poll(polling_options) do |message|
195
- handle_message(message, queue)
213
+ poller.poll(polling_options) do |messages|
214
+ messages.each do |message|
215
+ handle_message(message, queue)
196
216
  end
197
217
  end
198
218
  end
@@ -217,4 +237,7 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
217
237
  end
218
238
  end
219
239
 
240
+ def convert_epoch_to_timestamp(time)
241
+ LogStash::Timestamp.at(time.to_i / 1000)
242
+ end
220
243
  end # class
@@ -0,0 +1,226 @@
1
+ # encoding: utf-8
2
+ #
3
+ require "logstash/inputs/threadable"
4
+ require "logstash/namespace"
5
+ require "logstash/timestamp"
6
+ require "logstash/plugin_mixins/aws_config"
7
+ require "logstash/errors"
8
+
9
+ # Forcibly load all modules marked to be lazily loaded.
10
+ #
11
+ # It is recommended that this is called prior to launching threads. See
12
+ # https://aws.amazon.com/blogs/developer/threading-with-the-aws-sdk-for-ruby/.
13
+ Aws.eager_autoload!
14
+
15
+ # Get logs from AWS s3 buckets as issued by an object-created event via sqs.
16
+ #
17
+ # This plugin is based on the logstash-input-sqs plugin but doesn't log the sqs event itself.
18
+ # Instead it assumes, that the event is an s3 object-created event and will then download
19
+ # and process the given file.
20
+ #
21
+ # Some issues of logstash-input-sqs, like logstash not shutting down properly, have been
22
+ # fixed for this plugin.
23
+ #
24
+ # In contrast to logstash-input-sqs this plugin uses the "Receive Message Wait Time"
25
+ # configured for the sqs queue in question, a good value will be something like 10 seconds
26
+ # to ensure a reasonable shutdown time of logstash.
27
+ # Also use a "Default Visibility Timeout" that is high enough for log files to be downloaded
28
+ # and processed (I think a good value should be 5-10 minutes for most use cases), the plugin will
29
+ # avoid removing the event from the queue if the associated log file couldn't be correctly
30
+ # passed to the processing level of logstash (e.g. downloaded content size doesn't match sqs event).
31
+ #
32
+ # This plugin is meant for high availability setups, in contrast to logstash-input-s3 you can safely
33
+ # use multiple logstash nodes, since the usage of sqs will ensure that each logfile is processed
34
+ # only once and no file will get lost on node failure or downscaling for auto-scaling groups.
35
+ # (You should use a "Message Retention Period" >= 4 days for your sqs to ensure you can survive
36
+ # a weekend of faulty log file processing)
37
+ # The plugin will not delete objects from s3 buckets, so make sure to have a reasonable "Lifecycle"
38
+ # configured for your buckets, which should keep the files at least "Message Retention Period" days.
39
+ #
40
+ # A typical setup will contain some s3 buckets containing elb, cloudtrail or other log files.
41
+ # These will be configured to send object-created events to a sqs queue, which will be configured
42
+ # as the source queue for this plugin.
43
+ # (The plugin supports gzipped content if it is marked with "contend-encoding: gzip" as it is the
44
+ # case for cloudtrail logs)
45
+ #
46
+ # The logstash node therefore must have sqs permissions + the permissions to download objects
47
+ # from the s3 buckets that send events to the queue.
48
+ # (If logstash nodes are running on EC2 you should use a ServerRole to provide permissions)
49
+ # [source,json]
50
+ # {
51
+ # "Version": "2012-10-17",
52
+ # "Statement": [
53
+ # {
54
+ # "Effect": "Allow",
55
+ # "Action": [
56
+ # "sqs:Get*",
57
+ # "sqs:List*",
58
+ # "sqs:ReceiveMessage",
59
+ # "sqs:ChangeMessageVisibility*",
60
+ # "sqs:DeleteMessage*"
61
+ # ],
62
+ # "Resource": [
63
+ # "arn:aws:sqs:us-east-1:123456789012:my-elb-log-queue"
64
+ # ]
65
+ # },
66
+ # {
67
+ # "Effect": "Allow",
68
+ # "Action": [
69
+ # "s3:Get*",
70
+ # "s3:List*"
71
+ # ],
72
+ # "Resource": [
73
+ # "arn:aws:s3:::my-elb-logs",
74
+ # "arn:aws:s3:::my-elb-logs/*"
75
+ # ]
76
+ # }
77
+ # ]
78
+ # }
79
+ #
80
+ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
81
+ include LogStash::PluginMixins::AwsConfig::V2
82
+
83
+ BACKOFF_SLEEP_TIME = 1
84
+ BACKOFF_FACTOR = 2
85
+ MAX_TIME_BEFORE_GIVING_UP = 60
86
+ EVENT_SOURCE = 'aws:s3'
87
+ EVENT_TYPE = 'ObjectCreated'
88
+
89
+ config_name "sqs_s3"
90
+
91
+ default :codec, "plain"
92
+
93
+ # Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
94
+ config :queue, :validate => :string, :required => true
95
+
96
+ attr_reader :poller
97
+ attr_reader :s3
98
+
99
+ def register
100
+ require "aws-sdk"
101
+ @logger.info("Registering SQS input", :queue => @queue)
102
+ setup_queue
103
+ end
104
+
105
+ def setup_queue
106
+ aws_sqs_client = Aws::SQS::Client.new(aws_options_hash)
107
+ queue_url = aws_sqs_client.get_queue_url(:queue_name => @queue)[:queue_url]
108
+ @poller = Aws::SQS::QueuePoller.new(queue_url, :client => aws_sqs_client)
109
+ @s3 = Aws::S3::Client.new(aws_options_hash)
110
+ rescue Aws::SQS::Errors::ServiceError => e
111
+ @logger.error("Cannot establish connection to Amazon SQS", :error => e)
112
+ raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
113
+ end
114
+
115
+ def polling_options
116
+ {
117
+ # we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
118
+ # (we will throw :skip_delete if download size isn't correct to process the event again later
119
+ # -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
120
+ :max_number_of_messages => 1,
121
+ # we will use the queue's setting, a good value is 10 seconds
122
+ # (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
123
+ :wait_time_seconds => nil,
124
+ }
125
+ end
126
+
127
+ def handle_message(message, queue)
128
+ hash = JSON.parse message.body
129
+ # there may be test events sent from the s3 bucket which won't contain a Records array,
130
+ # we will skip those events and remove them from queue
131
+ if hash['Records'] then
132
+ # typically there will be only 1 record per event, but since it is an array we will
133
+ # treat it as if there could be more records
134
+ hash['Records'].each do |record|
135
+ # in case there are any events with Records that aren't s3 object-created events and can't therefore be
136
+ # processed by this plugin, we will skip them and remove them from queue
137
+ if record['eventSource'] == EVENT_SOURCE and record['eventName'].start_with?(EVENT_TYPE) then
138
+ # try download and :skip_delete if it fails
139
+ begin
140
+ response = @s3.get_object(
141
+ bucket: record['s3']['bucket']['name'],
142
+ key: record['s3']['object']['key']
143
+ )
144
+ rescue => e
145
+ @logger.warn("issuing :skip_delete on failed download", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
146
+ throw :skip_delete
147
+ end
148
+ # verify downloaded content size
149
+ if response.content_length == record['s3']['object']['size'] then
150
+ body = response.body
151
+ # if necessary unzip
152
+ if response.content_encoding == "gzip" or record['s3']['object']['key'].end_with?(".gz") then
153
+ begin
154
+ temp = Zlib::GzipReader.new(body)
155
+ rescue => e
156
+ @logger.warn("content is marked to be gzipped but can't unzip it, assuming plain text", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
157
+ temp = body
158
+ end
159
+ body = temp
160
+ end
161
+ # process the plain text content
162
+ begin
163
+ lines = body.read.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370").split(/\n/)
164
+ lines.each do |line|
165
+ @codec.decode(line) do |event|
166
+ decorate(event)
167
+
168
+ event.set('[@metadata][s3_bucket_name]', record['s3']['bucket']['name'])
169
+ event.set('[@metadata][s3_object_key]', record['s3']['object']['key'])
170
+
171
+ queue << event
172
+ end
173
+ end
174
+ rescue => e
175
+ @logger.warn("issuing :skip_delete on failed plain text processing", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
176
+ throw :skip_delete
177
+ end
178
+ # otherwise try again later
179
+ else
180
+ @logger.warn("issuing :skip_delete on wrong download content size", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'],
181
+ :download_size => response.content_length, :expected => record['s3']['object']['size'])
182
+ throw :skip_delete
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
188
+
189
+ def run(queue)
190
+ # ensure we can stop logstash correctly
191
+ poller.before_request do |stats|
192
+ if stop? then
193
+ @logger.warn("issuing :stop_polling on stop?", :queue => @queue)
194
+ # this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
195
+ throw :stop_polling
196
+ end
197
+ end
198
+ # poll a message and process it
199
+ run_with_backoff do
200
+ poller.poll(polling_options) do |message|
201
+ handle_message(message, queue)
202
+ end
203
+ end
204
+ end
205
+
206
+ private
207
+ # Runs an AWS request inside a Ruby block with an exponential backoff in case
208
+ # we experience a ServiceError.
209
+ #
210
+ # @param [Integer] max_time maximum amount of time to sleep before giving up.
211
+ # @param [Integer] sleep_time the initial amount of time to sleep before retrying.
212
+ # @param [Block] block Ruby code block to execute.
213
+ def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME, &block)
214
+ next_sleep = sleep_time
215
+ begin
216
+ block.call
217
+ next_sleep = sleep_time
218
+ rescue Aws::SQS::Errors::ServiceError => e
219
+ @logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
220
+ sleep(next_sleep)
221
+ next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
222
+ retry
223
+ end
224
+ end
225
+
226
+ end # class
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-sqs_s3'
3
- s.version = '1.1.1'
3
+ s.version = '1.1.2'
4
4
  s.licenses = ['Apache License (2.0)']
5
5
  s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sqs."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program. Full credit goes to Heiko Finzel. Republishing this gem to support Logstash 5."
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-sqs_s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heiko Finzel
@@ -88,8 +88,8 @@ files:
88
88
  - NOTICE.TXT
89
89
  - README.md
90
90
  - lib/logstash/inputs/sqs_s3.rb
91
+ - lib/logstash/inputs/sqs_s3.rb.save
91
92
  - logstash-input-sqs_s3.gemspec
92
- - spec/inputs/s3sqs_spec.rb
93
93
  - spec/inputs/sqs_s3_spec.rb
94
94
  - spec/spec_helper.rb
95
95
  homepage: https://www.boreus.de
@@ -119,6 +119,5 @@ signing_key:
119
119
  specification_version: 4
120
120
  summary: Get logs from AWS s3 buckets as issued by an object-created event via sqs.
121
121
  test_files:
122
- - spec/inputs/s3sqs_spec.rb
123
122
  - spec/inputs/sqs_s3_spec.rb
124
123
  - spec/spec_helper.rb
@@ -1,9 +0,0 @@
1
- # encoding: utf-8
2
- require "logstash/devutils/rspec/spec_helper"
3
- require "logstash/inputs/s3sqs"
4
-
5
- describe LogStash::Inputs::S3SQS do
6
-
7
- true.should be_true
8
-
9
- end