logstash-input-s3-sns-sqs 1.6.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/logstash/inputs/codec_factory.rb +37 -0
- data/lib/logstash/inputs/mime/{MagicgzipValidator.rb → magic_gzip_validator.rb} +0 -0
- data/lib/logstash/inputs/s3/client_factory.rb +61 -0
- data/lib/logstash/inputs/s3/downloader.rb +55 -0
- data/lib/logstash/inputs/s3snssqs.rb +195 -413
- data/lib/logstash/inputs/s3snssqs/log_processor.rb +119 -0
- data/lib/logstash/inputs/sqs/poller.rb +183 -0
- data/logstash-input-s3-sns-sqs.gemspec +6 -7
- data/spec/inputs/s3sqs_spec.rb +55 -4
- metadata +35 -18
- data/spec/spec_helper.rb +0 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 253c85cd1d1dfa22a59d282a0eeae4e5a71c5630473db65a768fe3e00131adc9
|
4
|
+
data.tar.gz: 5e98e0d9b47c7f9b47d6e11aefa2d5c14f59e4fe6c66a7bd0934a250bb8fbcfb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee38fcc3de70af94b7de1570b054cdf435224c77b64bebd1db9b8eee3a4097b91f0320ce09d732fdefd8b1d8e0ca722c0a9b799e49dceda881e36bb7b26417e0
|
7
|
+
data.tar.gz: d6818a6bdead5aae583a09e2af6e1e869a7fee4902c6503def1150584afb16f415a825eb2a34b2e8a05130d8e6348bbda28f4885852e8eb92ec552e68382d15c
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
##2.0.0
|
2
|
+
Breaking Changes:
|
3
|
+
- s3_key_prefix was never functional and will be removed. Actually only used for metadata.folder backward compatibility.
|
4
|
+
config for s3 paths are regex (if not exact match)
|
5
|
+
- s3_options_by_bucket substitutes all s3_* options
|
6
|
+
We will merge deprecated options into the new structure for one release
|
7
|
+
Changes:
|
8
|
+
- Refactor plugin structure to be more modular
|
9
|
+
- Rework threadding design
|
10
|
+
- introduce s3_options_by_bucket to configure settings (e.g aws_options_hash or type)
|
1
11
|
##1.6.1
|
2
12
|
- Fix typo in gzip error logging
|
3
13
|
##1.6.0
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# CodecFactory:
|
2
|
+
# lazy-fetch codec plugins
|
3
|
+
|
4
|
+
class CodecFactory
|
5
|
+
def initialize(logger, options)
|
6
|
+
@logger = logger
|
7
|
+
@default_codec = options[:default_codec]
|
8
|
+
@codec_by_folder = options[:codec_by_folder]
|
9
|
+
@codecs = {
|
10
|
+
'default' => @default_codec
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_codec(record)
|
15
|
+
codec = find_codec(record)
|
16
|
+
if @codecs[codec].nil?
|
17
|
+
@codecs[codec] = get_codec_plugin(codec)
|
18
|
+
end
|
19
|
+
@logger.debug("Switching to codec #{codec}") if codec != 'default'
|
20
|
+
return @codecs[codec]
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def find_codec(record)
|
26
|
+
bucket, key, folder = record[:bucket], record[:key], record[:folder]
|
27
|
+
unless @codec_by_folder[bucket].nil?
|
28
|
+
@logger.debug("trying to find codec for folder #{folder}", :codec => @codec_by_folder[bucket][folder])
|
29
|
+
return @codec_by_folder[bucket][folder] unless @codec_by_folder[bucket][folder].nil?
|
30
|
+
end
|
31
|
+
return 'default'
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_codec_plugin(name, options = {})
|
35
|
+
LogStash::Plugin.lookup('codec', name).new(options)
|
36
|
+
end
|
37
|
+
end
|
File without changes
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# not needed - Mutex is part of core lib:
|
2
|
+
#require 'thread'
|
3
|
+
|
4
|
+
class S3ClientFactory
|
5
|
+
|
6
|
+
def initialize(logger, options, aws_options_hash)
|
7
|
+
@logger = logger
|
8
|
+
@aws_options_hash = aws_options_hash
|
9
|
+
@s3_default_options = Hash[options[:s3_default_options].map { |k, v| [k.to_sym, v] }]
|
10
|
+
@aws_options_hash.merge!(@s3_default_options) unless @s3_default_options.empty?
|
11
|
+
@sts_client = Aws::STS::Client.new(region: options[:aws_region])
|
12
|
+
@credentials_by_bucket = options[:s3_credentials_by_bucket]
|
13
|
+
@logger.debug("Credentials by Bucket", :credentials => @credentials_by_bucket)
|
14
|
+
@default_session_name = options[:s3_role_session_name]
|
15
|
+
@clients_by_bucket = {}
|
16
|
+
#@mutexes_by_bucket = {}
|
17
|
+
@creation_mutex = Mutex.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_s3_client(bucket_name)
|
21
|
+
bucket_symbol = bucket_name.to_sym
|
22
|
+
@creation_mutex.synchronize do
|
23
|
+
if @clients_by_bucket[bucket_symbol].nil?
|
24
|
+
options = @aws_options_hash.clone
|
25
|
+
unless @credentials_by_bucket[bucket_name].nil?
|
26
|
+
options.merge!(credentials: get_s3_auth(@credentials_by_bucket[bucket_name]))
|
27
|
+
end
|
28
|
+
@clients_by_bucket[bucket_symbol] = Aws::S3::Client.new(options)
|
29
|
+
@logger.debug("Created a new S3 Client", :bucket_name => bucket_name, :client => @clients_by_bucket[bucket_symbol], :used_options => options)
|
30
|
+
#@mutexes_by_bucket[bucket_symbol] = Mutex.new
|
31
|
+
end
|
32
|
+
end
|
33
|
+
# to be thread-safe, one uses this method like this:
|
34
|
+
# s3_client_factory.get_s3_client(my_s3_bucket) do
|
35
|
+
# ... do stuff ...
|
36
|
+
# end
|
37
|
+
# FIXME: this does not allow concurrent downloads from the same bucket!
|
38
|
+
#@mutexes_by_bucket[bucket_symbol].synchronize do
|
39
|
+
# So we are testing this without this mutex.
|
40
|
+
yield @clients_by_bucket[bucket_symbol]
|
41
|
+
#end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def get_s3_auth(credentials)
|
47
|
+
# reminder: these are auto-refreshing!
|
48
|
+
if credentials.key?('role')
|
49
|
+
@logger.debug("Assume Role", :role => credentials["role"])
|
50
|
+
return Aws::AssumeRoleCredentials.new(
|
51
|
+
client: @sts_client,
|
52
|
+
role_arn: credentials['role'],
|
53
|
+
role_session_name: @default_session_name
|
54
|
+
)
|
55
|
+
elsif credentials.key?('access_key_id') && credentials.key?('secret_access_key')
|
56
|
+
@logger.debug("Fetch credentials", :access_key => credentials['access_key_id'])
|
57
|
+
return Aws::Credentials.new(credentials)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'fileutils'
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class S3Downloader
|
6
|
+
|
7
|
+
def initialize(logger, stop_semaphore, options)
|
8
|
+
@logger = logger
|
9
|
+
@stopped = stop_semaphore
|
10
|
+
@factory = options[:s3_client_factory]
|
11
|
+
@delete_on_success = options[:delete_on_success]
|
12
|
+
end
|
13
|
+
|
14
|
+
def copy_s3object_to_disk(record)
|
15
|
+
# (from docs) WARNING:
|
16
|
+
# yielding data to a block disables retries of networking errors!
|
17
|
+
begin
|
18
|
+
@factory.get_s3_client(record[:bucket]) do |s3|
|
19
|
+
response = s3.get_object(
|
20
|
+
bucket: record[:bucket],
|
21
|
+
key: record[:key],
|
22
|
+
response_target: record[:local_file]
|
23
|
+
)
|
24
|
+
end
|
25
|
+
rescue Aws::S3::Errors::ServiceError => e
|
26
|
+
@logger.error("Unable to download file. Requeuing the message", :error => e, :record => record)
|
27
|
+
# prevent sqs message deletion
|
28
|
+
throw :skip_delete
|
29
|
+
end
|
30
|
+
throw :skip_delete if stop?
|
31
|
+
return true
|
32
|
+
end
|
33
|
+
|
34
|
+
def cleanup_local_object(record)
|
35
|
+
FileUtils.remove_entry_secure(record[:local_file], true) if ::File.exists?(record[:local_file])
|
36
|
+
rescue Exception => e
|
37
|
+
@logger.warn("Could not delete file", :file => record[:local_file], :error => e)
|
38
|
+
end
|
39
|
+
|
40
|
+
def cleanup_s3object(record)
|
41
|
+
return unless @delete_on_success
|
42
|
+
begin
|
43
|
+
@factory.get_s3_client(record[:bucket]) do |s3|
|
44
|
+
s3.delete_object(bucket: record[:bucket], key: record[:key])
|
45
|
+
end
|
46
|
+
rescue Exception => e
|
47
|
+
@logger.warn("Failed to delete s3 object", :record => record, :error => e)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def stop?
|
52
|
+
@stopped.value
|
53
|
+
end
|
54
|
+
|
55
|
+
end # class
|
@@ -1,15 +1,20 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
#
|
3
2
|
require "logstash/inputs/threadable"
|
4
3
|
require "logstash/namespace"
|
5
4
|
require "logstash/timestamp"
|
6
5
|
require "logstash/plugin_mixins/aws_config"
|
6
|
+
require "logstash/shutdown_watcher"
|
7
7
|
require "logstash/errors"
|
8
8
|
require 'logstash/inputs/s3sqs/patch'
|
9
9
|
require "aws-sdk"
|
10
|
-
|
11
|
-
|
12
|
-
require
|
10
|
+
# "object-oriented interfaces on top of API clients"...
|
11
|
+
# => Overhead. FIXME: needed?
|
12
|
+
#require "aws-sdk-resources"
|
13
|
+
require "fileutils"
|
14
|
+
require "concurrent"
|
15
|
+
# unused in code:
|
16
|
+
#require "stud/interval"
|
17
|
+
#require "digest/md5"
|
13
18
|
|
14
19
|
require 'java'
|
15
20
|
java_import java.io.InputStream
|
@@ -19,6 +24,14 @@ java_import java.io.BufferedReader
|
|
19
24
|
java_import java.util.zip.GZIPInputStream
|
20
25
|
java_import java.util.zip.ZipException
|
21
26
|
|
27
|
+
# our helper classes
|
28
|
+
# these may go into this file for brevity...
|
29
|
+
require_relative 'sqs/poller'
|
30
|
+
require_relative 's3/client_factory'
|
31
|
+
require_relative 's3/downloader'
|
32
|
+
require_relative 'codec_factory'
|
33
|
+
require_relative 's3snssqs/log_processor'
|
34
|
+
|
22
35
|
Aws.eager_autoload!
|
23
36
|
|
24
37
|
# Get logs from AWS s3 buckets as issued by an object-created event via sqs.
|
@@ -89,472 +102,241 @@ Aws.eager_autoload!
|
|
89
102
|
#
|
90
103
|
class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
91
104
|
include LogStash::PluginMixins::AwsConfig::V2
|
92
|
-
|
93
|
-
BACKOFF_SLEEP_TIME = 1
|
94
|
-
BACKOFF_FACTOR = 2
|
95
|
-
MAX_TIME_BEFORE_GIVING_UP = 60
|
96
|
-
EVENT_SOURCE = 'aws:s3'
|
97
|
-
EVENT_TYPE = 'ObjectCreated'
|
105
|
+
include LogProcessor
|
98
106
|
|
99
107
|
config_name "s3snssqs"
|
100
108
|
|
101
109
|
default :codec, "plain"
|
102
110
|
|
111
|
+
|
112
|
+
|
113
|
+
# Future config might look somewhat like this:
|
114
|
+
#
|
115
|
+
# s3_options_by_bucket = [
|
116
|
+
# {
|
117
|
+
# "bucket_name": "my-beautiful-bucket",
|
118
|
+
# "credentials": { "role": "aws:role:arn:for:bucket:access" },
|
119
|
+
# "folders": [
|
120
|
+
# {
|
121
|
+
# "key": "my_folder",
|
122
|
+
# "codec": "json"
|
123
|
+
# "type": "my_lovely_index"
|
124
|
+
# },
|
125
|
+
# {
|
126
|
+
# "key": "my_other_folder",
|
127
|
+
# "codec": "json_stream"
|
128
|
+
# "type": ""
|
129
|
+
# }
|
130
|
+
# ]
|
131
|
+
# },
|
132
|
+
# {
|
133
|
+
# "bucket_name": "my-other-bucket"
|
134
|
+
# "credentials": {
|
135
|
+
# "access_key_id": "some-id",
|
136
|
+
# "secret_access_key": "some-secret-key"
|
137
|
+
# },
|
138
|
+
# "folders": [
|
139
|
+
# {
|
140
|
+
# "key": ""
|
141
|
+
# }
|
142
|
+
# ]
|
143
|
+
# }
|
144
|
+
# }
|
145
|
+
|
146
|
+
config :s3_key_prefix, :validate => :string, :default => '', :deprecated => true #, :obsolete => " Will be moved to s3_options_by_bucket/types"
|
147
|
+
|
148
|
+
config :s3_access_key_id, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
|
149
|
+
config :s3_secret_access_key, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
|
150
|
+
config :s3_role_arn, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
|
151
|
+
|
152
|
+
config :set_codec_by_folder, :validate => :hash, :default => {}, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
|
153
|
+
|
154
|
+
# Default Options for the S3 clients
|
155
|
+
config :s3_default_options, :validate => :hash, :required => false, :default => {}
|
156
|
+
# We need a list of buckets, together with role arns and possible folder/codecs:
|
157
|
+
config :s3_options_by_bucket, :validate => :array, :required => false # TODO: true
|
158
|
+
# Session name to use when assuming an IAM role
|
159
|
+
config :s3_role_session_name, :validate => :string, :default => "logstash"
|
160
|
+
|
161
|
+
### sqs
|
103
162
|
# Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
|
104
163
|
config :queue, :validate => :string, :required => true
|
105
|
-
config :s3_key_prefix, :validate => :string, :default => ''
|
106
|
-
#Sometimes you need another key for s3. This is a first test...
|
107
|
-
config :s3_access_key_id, :validate => :string
|
108
|
-
config :s3_secret_access_key, :validate => :string
|
109
164
|
config :queue_owner_aws_account_id, :validate => :string, :required => false
|
110
|
-
#If you have different file-types in you s3 bucket, you could define codec by folder
|
111
|
-
#set_codec_by_folder => {"My-ELB-logs" => "plain"}
|
112
|
-
config :set_codec_by_folder, :validate => :hash, :default => {}
|
113
|
-
config :delete_on_success, :validate => :boolean, :default => false
|
114
|
-
config :sqs_explicit_delete, :validate => :boolean, :default => false
|
115
165
|
# Whether the event is processed though an SNS to SQS. (S3>SNS>SQS = true |S3>SQS=false)
|
116
166
|
config :from_sns, :validate => :boolean, :default => true
|
117
|
-
|
118
|
-
config :
|
119
|
-
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
120
|
-
# The AWS IAM Role to assume, if any.
|
121
|
-
# This is used to generate temporary credentials typically for cross-account access.
|
122
|
-
# See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information.
|
123
|
-
config :s3_role_arn, :validate => :string
|
124
|
-
# Session name to use when assuming an IAM role
|
125
|
-
config :s3_role_session_name, :validate => :string, :default => "logstash"
|
167
|
+
config :sqs_skip_delete, :validate => :boolean, :default => false
|
168
|
+
config :delete_on_success, :validate => :boolean, :default => false
|
126
169
|
config :visibility_timeout, :validate => :number, :default => 600
|
127
170
|
|
171
|
+
### system
|
172
|
+
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
173
|
+
# To run in multiple threads use this
|
174
|
+
config :consumer_threads, :validate => :number, :default => 1
|
128
175
|
|
129
|
-
attr_reader :poller
|
130
|
-
attr_reader :s3
|
131
|
-
|
132
|
-
|
133
|
-
def set_codec (folder)
|
134
|
-
begin
|
135
|
-
@logger.debug("Automatically switching from #{@codec.class.config_name} to #{set_codec_by_folder[folder]} codec", :plugin => self.class.config_name)
|
136
|
-
LogStash::Plugin.lookup("codec", "#{set_codec_by_folder[folder]}").new("charset" => @codec.charset)
|
137
|
-
rescue Exception => e
|
138
|
-
@logger.error("Failed to set_codec with error", :error => e)
|
139
|
-
end
|
140
|
-
end
|
141
176
|
|
142
177
|
public
|
143
|
-
def register
|
144
|
-
require "fileutils"
|
145
|
-
require "digest/md5"
|
146
|
-
require "aws-sdk-resources"
|
147
178
|
|
148
|
-
|
149
|
-
#make this hash keys lookups match like regex
|
150
|
-
hash_key_is_regex(set_codec_by_folder)
|
151
|
-
@logger.info("Registering SQS input", :queue => @queue)
|
152
|
-
setup_queue
|
179
|
+
# --- BEGIN plugin interface ----------------------------------------#
|
153
180
|
|
181
|
+
# initialisation
|
182
|
+
def register
|
183
|
+
# prepare system
|
154
184
|
FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
|
155
|
-
end
|
156
|
-
|
157
|
-
def setup_queue
|
158
|
-
aws_sqs_client = Aws::SQS::Client.new(aws_options_hash)
|
159
|
-
queue_url = aws_sqs_client.get_queue_url({ queue_name: @queue, queue_owner_aws_account_id: @queue_owner_aws_account_id})[:queue_url]
|
160
|
-
@poller = Aws::SQS::QueuePoller.new(queue_url, :client => aws_sqs_client)
|
161
|
-
get_s3client
|
162
|
-
@s3_resource = get_s3object
|
163
|
-
rescue Aws::SQS::Errors::ServiceError => e
|
164
|
-
@logger.error("Cannot establish connection to Amazon SQS", :error => e)
|
165
|
-
raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
|
166
|
-
end
|
167
|
-
|
168
|
-
def polling_options
|
169
|
-
{
|
170
|
-
# we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
|
171
|
-
# (we will throw :skip_delete if download size isn't correct to process the event again later
|
172
|
-
# -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
|
173
|
-
:max_number_of_messages => 1,
|
174
|
-
# we will use the queue's setting, a good value is 10 seconds
|
175
|
-
# (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
|
176
|
-
:skip_delete => false,
|
177
|
-
:visibility_timeout => @visibility_timeout,
|
178
|
-
:wait_time_seconds => nil,
|
179
|
-
}
|
180
|
-
end
|
181
185
|
|
182
|
-
|
183
|
-
|
184
|
-
@
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
#
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
type_folder = get_object_folder(key)
|
204
|
-
# Set input codec by :set_codec_by_folder
|
205
|
-
instance_codec = set_codec(type_folder) unless set_codec_by_folder["#{type_folder}"].nil?
|
206
|
-
# try download and :skip_delete if it fails
|
207
|
-
#if record['s3']['object']['size'] < 10000000 then
|
208
|
-
process_log(bucket, key, type_folder, instance_codec, queue, message, size)
|
209
|
-
#else
|
210
|
-
# @logger.info("Your file is too big")
|
211
|
-
#end
|
186
|
+
@credentials_by_bucket = hash_key_is_regex({})
|
187
|
+
# create the bucket=>folder=>codec lookup from config options
|
188
|
+
@codec_by_folder = hash_key_is_regex({})
|
189
|
+
@type_by_folder = hash_key_is_regex({})
|
190
|
+
|
191
|
+
# use deprecated settings only if new config is missing:
|
192
|
+
if @s3_options_by_bucket.nil?
|
193
|
+
# We don't know any bucket name, so we must rely on a "catch-all" regex
|
194
|
+
s3_options = {
|
195
|
+
'bucket_name' => '.*',
|
196
|
+
'folders' => @set_codec_by_folder.map { |key, codec|
|
197
|
+
{ 'key' => key, 'codec' => codec }
|
198
|
+
}
|
199
|
+
}
|
200
|
+
if @s3_role_arn.nil?
|
201
|
+
# access key/secret key pair needed
|
202
|
+
unless @s3_access_key_id.nil? or @s3_secret_access_key.nil?
|
203
|
+
s3_options['credentials'] = {
|
204
|
+
'access_key_id' => @s3_access_key_id,
|
205
|
+
'secret_access_key' => @s3_secret_access_key
|
206
|
+
}
|
212
207
|
end
|
208
|
+
else
|
209
|
+
s3_options['credentials'] = {
|
210
|
+
'role' => @s3_role_arn
|
211
|
+
}
|
213
212
|
end
|
213
|
+
@s3_options_by_bucket = [s3_options]
|
214
214
|
end
|
215
|
-
end
|
216
|
-
|
217
|
-
private
|
218
|
-
def process_log(bucket , key, folder, instance_codec, queue, message, size)
|
219
|
-
s3bucket = @s3_resource.bucket(bucket)
|
220
|
-
@logger.debug("Lets go reading file", :bucket => bucket, :key => key)
|
221
|
-
object = s3bucket.object(key)
|
222
|
-
filename = File.join(temporary_directory, File.basename(key))
|
223
|
-
if download_remote_file(object, filename)
|
224
|
-
if process_local_log( filename, key, folder, instance_codec, queue, bucket, message, size)
|
225
|
-
begin
|
226
|
-
FileUtils.remove_entry_secure(filename, true) if File.exists? filename
|
227
|
-
delete_file_from_bucket(object)
|
228
|
-
rescue Exception => e
|
229
|
-
@logger.debug("We had problems to delete your file", :file => filename, :error => e)
|
230
|
-
end
|
231
|
-
end
|
232
|
-
else
|
233
|
-
begin
|
234
|
-
FileUtils.remove_entry_secure(filename, true) if File.exists? filename
|
235
|
-
rescue Exception => e
|
236
|
-
@logger.debug("We had problems clean up your tmp dir", :file => filename, :error => e)
|
237
|
-
end
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
private
|
242
|
-
# Stream the remove file to the local disk
|
243
|
-
#
|
244
|
-
# @param [S3Object] Reference to the remove S3 objec to download
|
245
|
-
# @param [String] The Temporary filename to stream to.
|
246
|
-
# @return [Boolean] True if the file was completely downloaded
|
247
|
-
def download_remote_file(remote_object, local_filename)
|
248
|
-
completed = false
|
249
|
-
@logger.debug("S3 input: Download remote file", :remote_key => remote_object.key, :local_filename => local_filename)
|
250
|
-
File.open(local_filename, 'wb') do |s3file|
|
251
|
-
return completed if stop?
|
252
|
-
begin
|
253
|
-
remote_object.get(:response_target => s3file)
|
254
|
-
rescue Aws::S3::Errors::ServiceError => e
|
255
|
-
@logger.error("Unable to download file. We´ll requeue the message", :file => remote_object.inspect)
|
256
|
-
throw :skip_delete
|
257
|
-
end
|
258
|
-
end
|
259
|
-
completed = true
|
260
215
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
# Read the content of the local file
|
267
|
-
#
|
268
|
-
# @param [Queue] Where to push the event
|
269
|
-
# @param [String] Which file to read from
|
270
|
-
# @return [Boolean] True if the file was completely read, false otherwise.
|
271
|
-
def process_local_log(filename, key, folder, instance_codec, queue, bucket, message, size)
|
272
|
-
@logger.debug('Processing file', :filename => filename)
|
273
|
-
metadata = {}
|
274
|
-
start_time = Time.now
|
275
|
-
# Currently codecs operates on bytes instead of stream.
|
276
|
-
# So all IO stuff: decompression, reading need to be done in the actual
|
277
|
-
# input and send as bytes to the codecs.
|
278
|
-
read_file(filename) do |line|
|
279
|
-
if (Time.now - start_time) >= (@visibility_timeout.to_f / 100.0 * 90.to_f)
|
280
|
-
@logger.info("Increasing the visibility_timeout ... ", :timeout => @visibility_timeout, :filename => filename, :filesize => size, :start => start_time )
|
281
|
-
poller.change_message_visibility_timeout(message, @visibility_timeout)
|
282
|
-
start_time = Time.now
|
216
|
+
@s3_options_by_bucket.each do |options|
|
217
|
+
bucket = options['bucket_name']
|
218
|
+
if options.key?('credentials')
|
219
|
+
@credentials_by_bucket[bucket] = options['credentials']
|
283
220
|
end
|
284
|
-
if
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
# If the event is only metadata the event will be drop.
|
296
|
-
# This was the behavior of the pre 1.5 plugin.
|
297
|
-
#
|
298
|
-
# The line need to go through the codecs to replace
|
299
|
-
# unknown bytes in the log stream before doing a regexp match or
|
300
|
-
# you will get a `Error: invalid byte sequence in UTF-8'
|
301
|
-
local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
|
221
|
+
if options.key?('folders')
|
222
|
+
# make these hashes do key lookups using regex matching
|
223
|
+
folders = hash_key_is_regex({})
|
224
|
+
types = hash_key_is_regex({})
|
225
|
+
options['folders'].each do |entry|
|
226
|
+
@logger.debug("options for folder ", :folder => entry)
|
227
|
+
folders[entry['key']] = entry['codec'] if entry.key?('codec')
|
228
|
+
types[entry['key']] = entry['type'] if entry.key?('type')
|
229
|
+
end
|
230
|
+
@codec_by_folder[bucket] = folders unless folders.empty?
|
231
|
+
@type_by_folder[bucket] = types unless types.empty?
|
302
232
|
end
|
303
233
|
end
|
304
|
-
@logger.debug("end if file #{filename}")
|
305
|
-
#@logger.info("event pre flush", :event => event)
|
306
|
-
# #ensure any stateful codecs (such as multi-line ) are flushed to the queue
|
307
|
-
instance_codec.flush do |event|
|
308
|
-
local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
|
309
|
-
@logger.debug("We´e to flush an incomplete event...", :event => event)
|
310
|
-
end
|
311
|
-
|
312
|
-
return true
|
313
|
-
end # def process_local_log
|
314
|
-
|
315
|
-
private
|
316
|
-
def local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
|
317
|
-
@logger.debug('decorating event', :event => event.to_s)
|
318
|
-
if event_is_metadata?(event)
|
319
|
-
@logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
|
320
|
-
update_metadata(metadata, event)
|
321
|
-
else
|
322
|
-
|
323
|
-
decorate(event)
|
324
|
-
|
325
|
-
event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
|
326
|
-
event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
|
327
|
-
|
328
|
-
event.set("[@metadata][s3][object_key]", key)
|
329
|
-
event.set("[@metadata][s3][bucket_name]", bucket)
|
330
|
-
event.set("[@metadata][s3][object_folder]", folder)
|
331
|
-
@logger.debug('add metadata', :object_key => key, :bucket => bucket, :folder => folder)
|
332
|
-
queue << event
|
333
|
-
end
|
334
|
-
end
|
335
|
-
|
336
|
-
|
337
|
-
private
|
338
|
-
def get_object_folder(key)
|
339
|
-
if match=/#{s3_key_prefix}\/?(?<type_folder>.*?)\/.*/.match(key)
|
340
|
-
return match['type_folder']
|
341
|
-
else
|
342
|
-
return ""
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
private
|
347
|
-
def read_file(filename, &block)
|
348
|
-
if gzip?(filename)
|
349
|
-
read_gzip_file(filename, block)
|
350
|
-
else
|
351
|
-
read_plain_file(filename, block)
|
352
|
-
end
|
353
|
-
end
|
354
|
-
|
355
|
-
def read_plain_file(filename, block)
|
356
|
-
File.open(filename, 'rb') do |file|
|
357
|
-
file.each(&block)
|
358
|
-
end
|
359
|
-
end
|
360
|
-
|
361
|
-
private
|
362
|
-
def read_gzip_file(filename, block)
|
363
|
-
file_stream = FileInputStream.new(filename)
|
364
|
-
gzip_stream = GZIPInputStream.new(file_stream)
|
365
|
-
decoder = InputStreamReader.new(gzip_stream, "UTF-8")
|
366
|
-
buffered = BufferedReader.new(decoder)
|
367
|
-
|
368
|
-
while (line = buffered.readLine())
|
369
|
-
block.call(line)
|
370
|
-
end
|
371
|
-
rescue ZipException => e
|
372
|
-
@logger.error("Gzip codec: We cannot uncompress the gzip file", :filename => filename, :error => e)
|
373
|
-
ensure
|
374
|
-
buffered.close unless buffered.nil?
|
375
|
-
decoder.close unless decoder.nil?
|
376
|
-
gzip_stream.close unless gzip_stream.nil?
|
377
|
-
file_stream.close unless file_stream.nil?
|
378
|
-
end
|
379
|
-
|
380
|
-
private
|
381
|
-
def gzip?(filename)
|
382
|
-
return true if filename.end_with?('.gz','.gzip')
|
383
|
-
MagicGzipValidator.new(File.new(filename, 'r')).valid?
|
384
|
-
rescue Exception => e
|
385
|
-
@logger.debug("Problem while gzip detection", :error => e)
|
386
|
-
end
|
387
|
-
|
388
|
-
private
|
389
|
-
def delete_file_from_bucket(object)
|
390
|
-
if @delete_on_success
|
391
|
-
object.delete()
|
392
|
-
end
|
393
|
-
end
|
394
|
-
|
395
|
-
|
396
|
-
private
|
397
|
-
def get_s3client
|
398
|
-
if s3_access_key_id and s3_secret_access_key
|
399
|
-
@logger.debug("Using S3 Credentials from config", :ID => aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key) )
|
400
|
-
@s3_client = Aws::S3::Client.new(aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key))
|
401
|
-
elsif @s3_role_arn
|
402
|
-
@s3_client = Aws::S3::Client.new(aws_options_hash.merge!({ :credentials => s3_assume_role }))
|
403
|
-
@logger.debug("Using S3 Credentials from role", :s3client => @s3_client.inspect, :options => aws_options_hash.merge!({ :credentials => s3_assume_role }))
|
404
|
-
else
|
405
|
-
@s3_client = Aws::S3::Client.new(aws_options_hash)
|
406
|
-
end
|
407
|
-
end
|
408
|
-
|
409
|
-
private
|
410
|
-
def get_s3object
|
411
|
-
s3 = Aws::S3::Resource.new(client: @s3_client)
|
412
|
-
end
|
413
|
-
|
414
|
-
private
|
415
|
-
def s3_assume_role()
|
416
|
-
Aws::AssumeRoleCredentials.new(
|
417
|
-
client: Aws::STS::Client.new(region: @region),
|
418
|
-
role_arn: @s3_role_arn,
|
419
|
-
role_session_name: @s3_role_session_name
|
420
|
-
)
|
421
|
-
end
|
422
|
-
|
423
|
-
private
|
424
|
-
def event_is_metadata?(event)
|
425
|
-
return false unless event.get("message").class == String
|
426
|
-
line = event.get("message")
|
427
|
-
version_metadata?(line) || fields_metadata?(line)
|
428
|
-
end
|
429
|
-
|
430
|
-
private
|
431
|
-
def version_metadata?(line)
|
432
|
-
line.start_with?('#Version: ')
|
433
|
-
end
|
434
|
-
|
435
|
-
private
|
436
|
-
def fields_metadata?(line)
|
437
|
-
line.start_with?('#Fields: ')
|
438
|
-
end
|
439
|
-
|
440
|
-
private
|
441
|
-
def update_metadata(metadata, event)
|
442
|
-
line = event.get('message').strip
|
443
|
-
|
444
|
-
if version_metadata?(line)
|
445
|
-
metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
|
446
|
-
end
|
447
234
|
|
448
|
-
|
449
|
-
|
450
|
-
|
235
|
+
@received_stop = Concurrent::AtomicBoolean.new(false)
|
236
|
+
|
237
|
+
# instantiate helpers
|
238
|
+
@sqs_poller = SqsPoller.new(@logger, @received_stop, @queue, {
|
239
|
+
queue_owner_aws_account_id: @queue_owner_aws_account_id,
|
240
|
+
from_sns: @from_sns,
|
241
|
+
sqs_explicit_delete: @sqs_explicit_delete,
|
242
|
+
visibility_timeout: @visibility_timeout
|
243
|
+
}, aws_options_hash)
|
244
|
+
@s3_client_factory = S3ClientFactory.new(@logger, {
|
245
|
+
aws_region: @region,
|
246
|
+
s3_default_options: @s3_default_options,
|
247
|
+
s3_credentials_by_bucket: @credentials_by_bucket,
|
248
|
+
s3_role_session_name: @s3_role_session_name
|
249
|
+
}, aws_options_hash)
|
250
|
+
@s3_downloader = S3Downloader.new(@logger, @received_stop, {
|
251
|
+
s3_client_factory: @s3_client_factory,
|
252
|
+
delete_on_success: @delete_on_success
|
253
|
+
})
|
254
|
+
@codec_factory = CodecFactory.new(@logger, {
|
255
|
+
default_codec: @codec,
|
256
|
+
codec_by_folder: @codec_by_folder
|
257
|
+
})
|
258
|
+
#@log_processor = LogProcessor.new(self)
|
259
|
+
|
260
|
+
# administrative stuff
|
261
|
+
@worker_threads = []
|
451
262
|
end
|
452
263
|
|
453
|
-
|
454
|
-
def run(
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
else
|
460
|
-
#Fallback to simple single thread worker
|
461
|
-
# ensure we can stop logstash correctly
|
462
|
-
poller.before_request do |stats|
|
463
|
-
if stop? then
|
464
|
-
@logger.warn("issuing :stop_polling on stop?", :queue => @queue)
|
465
|
-
# this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
|
466
|
-
throw :stop_polling
|
467
|
-
end
|
468
|
-
end
|
469
|
-
# poll a message and process it
|
470
|
-
run_with_backoff do
|
471
|
-
poller.poll(polling_options) do |message|
|
472
|
-
begin
|
473
|
-
handle_message(message, queue, @codec.clone)
|
474
|
-
poller.delete_message(message)
|
475
|
-
rescue Exception => e
|
476
|
-
@logger.info("Error in poller block ... ", :error => e)
|
477
|
-
end
|
478
|
-
end
|
479
|
-
end
|
264
|
+
# startup
|
265
|
+
def run(logstash_event_queue)
|
266
|
+
#LogStash::ShutdownWatcher.abort_threshold(30)
|
267
|
+
# start them
|
268
|
+
@worker_threads = @consumer_threads.times.map do |_|
|
269
|
+
run_worker_thread(logstash_event_queue)
|
480
270
|
end
|
271
|
+
# and wait (possibly infinitely) for them to shut down
|
272
|
+
@worker_threads.each { |t| t.join }
|
481
273
|
end
|
482
274
|
|
483
|
-
|
275
|
+
# shutdown
|
484
276
|
def stop
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
end
|
277
|
+
@received_stop.make_true
|
278
|
+
@worker_threads.each do |worker|
|
279
|
+
begin
|
280
|
+
@logger.info("Stopping thread ... ", :thread => worker.inspect)
|
281
|
+
worker.wakeup
|
282
|
+
rescue
|
283
|
+
@logger.error("Cannot stop thread ... try to kill him", :thread => worker.inspect)
|
284
|
+
worker.kill
|
494
285
|
end
|
495
|
-
else
|
496
|
-
@logger.warn("Stopping all threads?", :queue => @queue)
|
497
286
|
end
|
498
287
|
end
|
499
288
|
|
289
|
+
# --- END plugin interface ------------------------------------------#
|
290
|
+
|
500
291
|
private
|
501
|
-
|
292
|
+
|
293
|
+
def run_worker_thread(queue)
|
502
294
|
Thread.new do
|
503
|
-
@logger.info("Starting new thread")
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
# poll a message and process it
|
513
|
-
run_with_backoff do
|
514
|
-
poller.poll(polling_options) do |message|
|
515
|
-
begin
|
516
|
-
handle_message(message, queue, @codec.clone)
|
517
|
-
poller.delete_message(message) if @sqs_explicit_delete
|
518
|
-
rescue Exception => e
|
519
|
-
@logger.info("Error in poller block ... ", :error => e)
|
520
|
-
end
|
295
|
+
@logger.info("Starting new worker thread")
|
296
|
+
@sqs_poller.run do |record|
|
297
|
+
throw :skip_delete if stop?
|
298
|
+
@logger.debug("Outside Poller: got a record", :record => record)
|
299
|
+
# record is a valid object with the keys ":bucket", ":key", ":size"
|
300
|
+
record[:local_file] = File.join(@temporary_directory, File.basename(record[:key]))
|
301
|
+
if @s3_downloader.copy_s3object_to_disk(record)
|
302
|
+
completed = catch(:skip_delete) do
|
303
|
+
process(record, queue)
|
521
304
|
end
|
305
|
+
@s3_downloader.cleanup_local_object(record)
|
306
|
+
# re-throw if necessary:
|
307
|
+
throw :skip_delete unless completed
|
308
|
+
@s3_downloader.cleanup_s3object(record)
|
522
309
|
end
|
523
310
|
end
|
524
311
|
end
|
525
312
|
end
|
526
313
|
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
# @param [Block] block Ruby code block to execute.
|
534
|
-
def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME, &block)
|
535
|
-
next_sleep = sleep_time
|
536
|
-
begin
|
537
|
-
block.call
|
538
|
-
next_sleep = sleep_time
|
539
|
-
rescue Aws::SQS::Errors::ServiceError => e
|
540
|
-
@logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
|
541
|
-
sleep(next_sleep)
|
542
|
-
next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
|
543
|
-
retry
|
314
|
+
# Will be remove in further releases...
|
315
|
+
def get_object_folder(key)
|
316
|
+
if match=/#{s3_key_prefix}\/?(?<type_folder>.*?)\/.*/.match(key)
|
317
|
+
return match['type_folder']
|
318
|
+
else
|
319
|
+
return ""
|
544
320
|
end
|
545
321
|
end
|
546
322
|
|
547
|
-
private
|
548
323
|
def hash_key_is_regex(myhash)
|
549
324
|
myhash.default_proc = lambda do |hash, lookup|
|
550
325
|
result=nil
|
551
326
|
hash.each_pair do |key, value|
|
552
327
|
if %r[#{key}] =~ lookup
|
553
|
-
result=value
|
328
|
+
result = value
|
554
329
|
break
|
555
330
|
end
|
556
331
|
end
|
557
332
|
result
|
558
333
|
end
|
334
|
+
# return input hash (convenience)
|
335
|
+
return myhash
|
336
|
+
end
|
337
|
+
|
338
|
+
def stop?
|
339
|
+
@received_stop.value
|
559
340
|
end
|
341
|
+
|
560
342
|
end # class
|