logstash-input-sqs_s3 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92aedc8907953b85a96cfba10afe5c5a0b498a85b822e97d799582fb001d1e39
4
- data.tar.gz: 1c485cde83ad7d59d931663c4331ed40742328569de7498efc547f769e8b502f
3
+ metadata.gz: b2c1d88096f1454205e11131c1a7ea980190c45f8d49cd2f99fc8ff3f7652819
4
+ data.tar.gz: 5a9a5b9258cedd4d5b187b08faec6f97db84c8b68762d76da97e828aad9ba9c8
5
5
  SHA512:
6
- metadata.gz: c9eef9ed7153832d3e534ec71763a77de4275133882da1f7ccd044dde470c9caeaecc5f667f9b08d6932f28d1e91bb7901e2db8bc9a893522f244e9c76c40761
7
- data.tar.gz: 61ebd9633618d97867684135361eeaeb6288f87100fb2654b8cb3bfc8813b8bef1be7a1798f0349ec6c31671c92cd6d8e18f651d8bce9d5cd23e24e838c85d33
6
+ metadata.gz: a8d547ff18844c52c89480623c56dcbd787906fedff1fd2fecb09c5e3d3a9ae034ae7026b03af20a9a52b4451228b1f102524bfecaa89390862709cf1b73744d
7
+ data.tar.gz: 4f0ca6ab34c61efe0a1e650e48b44b84e16138a82ae92a3585c45936aecd6c059170d392bf4a7667398de49e5bd1e913b67b43f8771a5fede44ec2c4cfc219dd
@@ -6,6 +6,12 @@ require "logstash/timestamp"
6
6
  require "logstash/plugin_mixins/aws_config"
7
7
  require "logstash/errors"
8
8
 
9
+ # Forcibly load all modules marked to be lazily loaded.
10
+ #
11
+ # It is recommended that this is called prior to launching threads. See
12
+ # https://aws.amazon.com/blogs/developer/threading-with-the-aws-sdk-for-ruby/.
13
+ Aws.eager_autoload!
14
+
9
15
  # Get logs from AWS s3 buckets as issued by an object-created event via sqs.
10
16
  #
11
17
  # This plugin is based on the logstash-input-sqs plugin but doesn't log the sqs event itself.
@@ -79,6 +85,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
79
85
  MAX_TIME_BEFORE_GIVING_UP = 60
80
86
  EVENT_SOURCE = 'aws:s3'
81
87
  EVENT_TYPE = 'ObjectCreated'
88
+ MAX_MESSAGES_TO_FETCH = 10 # Between 1-10 in the AWS-SDK doc
89
+ SENT_TIMESTAMP = "SentTimestamp"
90
+ SQS_ATTRIBUTES = [SENT_TIMESTAMP]
82
91
 
83
92
  config_name "sqs_s3"
84
93
 
@@ -87,6 +96,15 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
87
96
  # Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
88
97
  config :queue, :validate => :string, :required => true
89
98
 
99
+ # Name of the event field in which to store the SQS message ID
100
+ config :id_field, :validate => :string
101
+
102
+ # Name of the event field in which to store the SQS message Sent Timestamp
103
+ config :sent_timestamp_field, :validate => :string
104
+
105
+ # Max messages to fetch, default is 10
106
+ config :max_messages_to_fetch, :validate => :number, :default => MAX_MESSAGES_TO_FETCH
107
+
90
108
  attr_reader :poller
91
109
  attr_reader :s3
92
110
 
@@ -108,10 +126,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
108
126
 
109
127
  def polling_options
110
128
  {
111
- # we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
112
- # (we will throw :skip_delete if download size isn't correct to process the event again later
113
- # -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
114
- :max_number_of_messages => 1,
129
+ # the number of messages to fetch in a single api call
130
+ :max_number_of_messages => MAX_MESSAGES_TO_FETCH,
131
+ :attribute_names => SQS_ATTRIBUTES,
115
132
  # we will use the queue's setting, a good value is 10 seconds
116
133
  # (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
117
134
  :wait_time_seconds => nil,
@@ -161,6 +178,8 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
161
178
 
162
179
  event.set('[@metadata][s3_bucket_name]', record['s3']['bucket']['name'])
163
180
  event.set('[@metadata][s3_object_key]', record['s3']['object']['key'])
181
+ event.set(@id_field, message.message_id) if @id_field
182
+ event.set(@sent_timestamp_field, convert_epoch_to_timestamp(message.attributes[SENT_TIMESTAMP])) if @sent_timestamp_field
164
183
 
165
184
  queue << event
166
185
  end
@@ -191,8 +210,9 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
191
210
  end
192
211
  # poll a message and process it
193
212
  run_with_backoff do
194
- poller.poll(polling_options) do |message|
195
- handle_message(message, queue)
213
+ poller.poll(polling_options) do |messages|
214
+ messages.each do |message|
215
+ handle_message(message, queue)
196
216
  end
197
217
  end
198
218
  end
@@ -217,4 +237,7 @@ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
217
237
  end
218
238
  end
219
239
 
240
+ def convert_epoch_to_timestamp(time)
241
+ LogStash::Timestamp.at(time.to_i / 1000)
242
+ end
220
243
  end # class
@@ -0,0 +1,226 @@
1
+ # encoding: utf-8
2
+ #
3
+ require "logstash/inputs/threadable"
4
+ require "logstash/namespace"
5
+ require "logstash/timestamp"
6
+ require "logstash/plugin_mixins/aws_config"
7
+ require "logstash/errors"
8
+
9
+ # Forcibly load all modules marked to be lazily loaded.
10
+ #
11
+ # It is recommended that this is called prior to launching threads. See
12
+ # https://aws.amazon.com/blogs/developer/threading-with-the-aws-sdk-for-ruby/.
13
+ Aws.eager_autoload!
14
+
15
+ # Get logs from AWS s3 buckets as issued by an object-created event via sqs.
16
+ #
17
+ # This plugin is based on the logstash-input-sqs plugin but doesn't log the sqs event itself.
18
+ # Instead it assumes, that the event is an s3 object-created event and will then download
19
+ # and process the given file.
20
+ #
21
+ # Some issues of logstash-input-sqs, like logstash not shutting down properly, have been
22
+ # fixed for this plugin.
23
+ #
24
+ # In contrast to logstash-input-sqs this plugin uses the "Receive Message Wait Time"
25
+ # configured for the sqs queue in question, a good value will be something like 10 seconds
26
+ # to ensure a reasonable shutdown time of logstash.
27
+ # Also use a "Default Visibility Timeout" that is high enough for log files to be downloaded
28
+ # and processed (I think a good value should be 5-10 minutes for most use cases), the plugin will
29
+ # avoid removing the event from the queue if the associated log file couldn't be correctly
30
+ # passed to the processing level of logstash (e.g. downloaded content size doesn't match sqs event).
31
+ #
32
+ # This plugin is meant for high availability setups, in contrast to logstash-input-s3 you can safely
33
+ # use multiple logstash nodes, since the usage of sqs will ensure that each logfile is processed
34
+ # only once and no file will get lost on node failure or downscaling for auto-scaling groups.
35
+ # (You should use a "Message Retention Period" >= 4 days for your sqs to ensure you can survive
36
+ # a weekend of faulty log file processing)
37
+ # The plugin will not delete objects from s3 buckets, so make sure to have a reasonable "Lifecycle"
38
+ # configured for your buckets, which should keep the files at least "Message Retention Period" days.
39
+ #
40
+ # A typical setup will contain some s3 buckets containing elb, cloudtrail or other log files.
41
+ # These will be configured to send object-created events to a sqs queue, which will be configured
42
+ # as the source queue for this plugin.
43
+ # (The plugin supports gzipped content if it is marked with "contend-encoding: gzip" as it is the
44
+ # case for cloudtrail logs)
45
+ #
46
+ # The logstash node therefore must have sqs permissions + the permissions to download objects
47
+ # from the s3 buckets that send events to the queue.
48
+ # (If logstash nodes are running on EC2 you should use a ServerRole to provide permissions)
49
+ # [source,json]
50
+ # {
51
+ # "Version": "2012-10-17",
52
+ # "Statement": [
53
+ # {
54
+ # "Effect": "Allow",
55
+ # "Action": [
56
+ # "sqs:Get*",
57
+ # "sqs:List*",
58
+ # "sqs:ReceiveMessage",
59
+ # "sqs:ChangeMessageVisibility*",
60
+ # "sqs:DeleteMessage*"
61
+ # ],
62
+ # "Resource": [
63
+ # "arn:aws:sqs:us-east-1:123456789012:my-elb-log-queue"
64
+ # ]
65
+ # },
66
+ # {
67
+ # "Effect": "Allow",
68
+ # "Action": [
69
+ # "s3:Get*",
70
+ # "s3:List*"
71
+ # ],
72
+ # "Resource": [
73
+ # "arn:aws:s3:::my-elb-logs",
74
+ # "arn:aws:s3:::my-elb-logs/*"
75
+ # ]
76
+ # }
77
+ # ]
78
+ # }
79
+ #
80
+ class LogStash::Inputs::SQSS3 < LogStash::Inputs::Threadable
81
+ include LogStash::PluginMixins::AwsConfig::V2
82
+
83
+ BACKOFF_SLEEP_TIME = 1
84
+ BACKOFF_FACTOR = 2
85
+ MAX_TIME_BEFORE_GIVING_UP = 60
86
+ EVENT_SOURCE = 'aws:s3'
87
+ EVENT_TYPE = 'ObjectCreated'
88
+
89
+ config_name "sqs_s3"
90
+
91
+ default :codec, "plain"
92
+
93
+ # Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
94
+ config :queue, :validate => :string, :required => true
95
+
96
+ attr_reader :poller
97
+ attr_reader :s3
98
+
99
+ def register
100
+ require "aws-sdk"
101
+ @logger.info("Registering SQS input", :queue => @queue)
102
+ setup_queue
103
+ end
104
+
105
+ def setup_queue
106
+ aws_sqs_client = Aws::SQS::Client.new(aws_options_hash)
107
+ queue_url = aws_sqs_client.get_queue_url(:queue_name => @queue)[:queue_url]
108
+ @poller = Aws::SQS::QueuePoller.new(queue_url, :client => aws_sqs_client)
109
+ @s3 = Aws::S3::Client.new(aws_options_hash)
110
+ rescue Aws::SQS::Errors::ServiceError => e
111
+ @logger.error("Cannot establish connection to Amazon SQS", :error => e)
112
+ raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
113
+ end
114
+
115
+ def polling_options
116
+ {
117
+ # we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
118
+ # (we will throw :skip_delete if download size isn't correct to process the event again later
119
+ # -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
120
+ :max_number_of_messages => 1,
121
+ # we will use the queue's setting, a good value is 10 seconds
122
+ # (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
123
+ :wait_time_seconds => nil,
124
+ }
125
+ end
126
+
127
+ def handle_message(message, queue)
128
+ hash = JSON.parse message.body
129
+ # there may be test events sent from the s3 bucket which won't contain a Records array,
130
+ # we will skip those events and remove them from queue
131
+ if hash['Records'] then
132
+ # typically there will be only 1 record per event, but since it is an array we will
133
+ # treat it as if there could be more records
134
+ hash['Records'].each do |record|
135
+ # in case there are any events with Records that aren't s3 object-created events and can't therefore be
136
+ # processed by this plugin, we will skip them and remove them from queue
137
+ if record['eventSource'] == EVENT_SOURCE and record['eventName'].start_with?(EVENT_TYPE) then
138
+ # try download and :skip_delete if it fails
139
+ begin
140
+ response = @s3.get_object(
141
+ bucket: record['s3']['bucket']['name'],
142
+ key: record['s3']['object']['key']
143
+ )
144
+ rescue => e
145
+ @logger.warn("issuing :skip_delete on failed download", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
146
+ throw :skip_delete
147
+ end
148
+ # verify downloaded content size
149
+ if response.content_length == record['s3']['object']['size'] then
150
+ body = response.body
151
+ # if necessary unzip
152
+ if response.content_encoding == "gzip" or record['s3']['object']['key'].end_with?(".gz") then
153
+ begin
154
+ temp = Zlib::GzipReader.new(body)
155
+ rescue => e
156
+ @logger.warn("content is marked to be gzipped but can't unzip it, assuming plain text", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
157
+ temp = body
158
+ end
159
+ body = temp
160
+ end
161
+ # process the plain text content
162
+ begin
163
+ lines = body.read.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: "\u2370").split(/\n/)
164
+ lines.each do |line|
165
+ @codec.decode(line) do |event|
166
+ decorate(event)
167
+
168
+ event.set('[@metadata][s3_bucket_name]', record['s3']['bucket']['name'])
169
+ event.set('[@metadata][s3_object_key]', record['s3']['object']['key'])
170
+
171
+ queue << event
172
+ end
173
+ end
174
+ rescue => e
175
+ @logger.warn("issuing :skip_delete on failed plain text processing", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'], :error => e)
176
+ throw :skip_delete
177
+ end
178
+ # otherwise try again later
179
+ else
180
+ @logger.warn("issuing :skip_delete on wrong download content size", :bucket => record['s3']['bucket']['name'], :object => record['s3']['object']['key'],
181
+ :download_size => response.content_length, :expected => record['s3']['object']['size'])
182
+ throw :skip_delete
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
188
+
189
+ def run(queue)
190
+ # ensure we can stop logstash correctly
191
+ poller.before_request do |stats|
192
+ if stop? then
193
+ @logger.warn("issuing :stop_polling on stop?", :queue => @queue)
194
+ # this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
195
+ throw :stop_polling
196
+ end
197
+ end
198
+ # poll a message and process it
199
+ run_with_backoff do
200
+ poller.poll(polling_options) do |message|
201
+ handle_message(message, queue)
202
+ end
203
+ end
204
+ end
205
+
206
+ private
207
+ # Runs an AWS request inside a Ruby block with an exponential backoff in case
208
+ # we experience a ServiceError.
209
+ #
210
+ # @param [Integer] max_time maximum amount of time to sleep before giving up.
211
+ # @param [Integer] sleep_time the initial amount of time to sleep before retrying.
212
+ # @param [Block] block Ruby code block to execute.
213
+ def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME, &block)
214
+ next_sleep = sleep_time
215
+ begin
216
+ block.call
217
+ next_sleep = sleep_time
218
+ rescue Aws::SQS::Errors::ServiceError => e
219
+ @logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
220
+ sleep(next_sleep)
221
+ next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
222
+ retry
223
+ end
224
+ end
225
+
226
+ end # class
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-sqs_s3'
3
- s.version = '1.1.1'
3
+ s.version = '1.1.2'
4
4
  s.licenses = ['Apache License (2.0)']
5
5
  s.summary = "Get logs from AWS s3 buckets as issued by an object-created event via sqs."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program. Full credit goes to Heiko Finzel. Republishing this gem to support Logstash 5."
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-sqs_s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heiko Finzel
@@ -88,8 +88,8 @@ files:
88
88
  - NOTICE.TXT
89
89
  - README.md
90
90
  - lib/logstash/inputs/sqs_s3.rb
91
+ - lib/logstash/inputs/sqs_s3.rb.save
91
92
  - logstash-input-sqs_s3.gemspec
92
- - spec/inputs/s3sqs_spec.rb
93
93
  - spec/inputs/sqs_s3_spec.rb
94
94
  - spec/spec_helper.rb
95
95
  homepage: https://www.boreus.de
@@ -119,6 +119,5 @@ signing_key:
119
119
  specification_version: 4
120
120
  summary: Get logs from AWS s3 buckets as issued by an object-created event via sqs.
121
121
  test_files:
122
- - spec/inputs/s3sqs_spec.rb
123
122
  - spec/inputs/sqs_s3_spec.rb
124
123
  - spec/spec_helper.rb
@@ -1,9 +0,0 @@
1
- # encoding: utf-8
2
- require "logstash/devutils/rspec/spec_helper"
3
- require "logstash/inputs/s3sqs"
4
-
5
- describe LogStash::Inputs::S3SQS do
6
-
7
- true.should be_true
8
-
9
- end