logstash-input-s3-sns-sqs 1.6.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/logstash/inputs/codec_factory.rb +37 -0
- data/lib/logstash/inputs/mime/{MagicgzipValidator.rb → magic_gzip_validator.rb} +0 -0
- data/lib/logstash/inputs/s3/client_factory.rb +61 -0
- data/lib/logstash/inputs/s3/downloader.rb +55 -0
- data/lib/logstash/inputs/s3snssqs.rb +195 -413
- data/lib/logstash/inputs/s3snssqs/log_processor.rb +119 -0
- data/lib/logstash/inputs/sqs/poller.rb +183 -0
- data/logstash-input-s3-sns-sqs.gemspec +6 -7
- data/spec/inputs/s3sqs_spec.rb +55 -4
- metadata +35 -18
- data/spec/spec_helper.rb +0 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 253c85cd1d1dfa22a59d282a0eeae4e5a71c5630473db65a768fe3e00131adc9
|
4
|
+
data.tar.gz: 5e98e0d9b47c7f9b47d6e11aefa2d5c14f59e4fe6c66a7bd0934a250bb8fbcfb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee38fcc3de70af94b7de1570b054cdf435224c77b64bebd1db9b8eee3a4097b91f0320ce09d732fdefd8b1d8e0ca722c0a9b799e49dceda881e36bb7b26417e0
|
7
|
+
data.tar.gz: d6818a6bdead5aae583a09e2af6e1e869a7fee4902c6503def1150584afb16f415a825eb2a34b2e8a05130d8e6348bbda28f4885852e8eb92ec552e68382d15c
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
##2.0.0
|
2
|
+
Breaking Changes:
|
3
|
+
- s3_key_prefix was never functional and will be removed. Actually only used for metadata.folder backward compatibility.
|
4
|
+
config for s3 paths are regex (if not exact match)
|
5
|
+
- s3_options_by_bucket substitutes all s3_* options
|
6
|
+
We will merge deprecated options into the new structure for one release
|
7
|
+
Changes:
|
8
|
+
- Refactor plugin structure to be more modular
|
9
|
+
- Rework threadding design
|
10
|
+
- introduce s3_options_by_bucket to configure settings (e.g aws_options_hash or type)
|
1
11
|
##1.6.1
|
2
12
|
- Fix typo in gzip error logging
|
3
13
|
##1.6.0
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# CodecFactory:
|
2
|
+
# lazy-fetch codec plugins
|
3
|
+
|
4
|
+
class CodecFactory
|
5
|
+
def initialize(logger, options)
|
6
|
+
@logger = logger
|
7
|
+
@default_codec = options[:default_codec]
|
8
|
+
@codec_by_folder = options[:codec_by_folder]
|
9
|
+
@codecs = {
|
10
|
+
'default' => @default_codec
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_codec(record)
|
15
|
+
codec = find_codec(record)
|
16
|
+
if @codecs[codec].nil?
|
17
|
+
@codecs[codec] = get_codec_plugin(codec)
|
18
|
+
end
|
19
|
+
@logger.debug("Switching to codec #{codec}") if codec != 'default'
|
20
|
+
return @codecs[codec]
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def find_codec(record)
|
26
|
+
bucket, key, folder = record[:bucket], record[:key], record[:folder]
|
27
|
+
unless @codec_by_folder[bucket].nil?
|
28
|
+
@logger.debug("trying to find codec for folder #{folder}", :codec => @codec_by_folder[bucket][folder])
|
29
|
+
return @codec_by_folder[bucket][folder] unless @codec_by_folder[bucket][folder].nil?
|
30
|
+
end
|
31
|
+
return 'default'
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_codec_plugin(name, options = {})
|
35
|
+
LogStash::Plugin.lookup('codec', name).new(options)
|
36
|
+
end
|
37
|
+
end
|
File without changes
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# not needed - Mutex is part of core lib:
|
2
|
+
#require 'thread'
|
3
|
+
|
4
|
+
class S3ClientFactory
|
5
|
+
|
6
|
+
def initialize(logger, options, aws_options_hash)
|
7
|
+
@logger = logger
|
8
|
+
@aws_options_hash = aws_options_hash
|
9
|
+
@s3_default_options = Hash[options[:s3_default_options].map { |k, v| [k.to_sym, v] }]
|
10
|
+
@aws_options_hash.merge!(@s3_default_options) unless @s3_default_options.empty?
|
11
|
+
@sts_client = Aws::STS::Client.new(region: options[:aws_region])
|
12
|
+
@credentials_by_bucket = options[:s3_credentials_by_bucket]
|
13
|
+
@logger.debug("Credentials by Bucket", :credentials => @credentials_by_bucket)
|
14
|
+
@default_session_name = options[:s3_role_session_name]
|
15
|
+
@clients_by_bucket = {}
|
16
|
+
#@mutexes_by_bucket = {}
|
17
|
+
@creation_mutex = Mutex.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_s3_client(bucket_name)
|
21
|
+
bucket_symbol = bucket_name.to_sym
|
22
|
+
@creation_mutex.synchronize do
|
23
|
+
if @clients_by_bucket[bucket_symbol].nil?
|
24
|
+
options = @aws_options_hash.clone
|
25
|
+
unless @credentials_by_bucket[bucket_name].nil?
|
26
|
+
options.merge!(credentials: get_s3_auth(@credentials_by_bucket[bucket_name]))
|
27
|
+
end
|
28
|
+
@clients_by_bucket[bucket_symbol] = Aws::S3::Client.new(options)
|
29
|
+
@logger.debug("Created a new S3 Client", :bucket_name => bucket_name, :client => @clients_by_bucket[bucket_symbol], :used_options => options)
|
30
|
+
#@mutexes_by_bucket[bucket_symbol] = Mutex.new
|
31
|
+
end
|
32
|
+
end
|
33
|
+
# to be thread-safe, one uses this method like this:
|
34
|
+
# s3_client_factory.get_s3_client(my_s3_bucket) do
|
35
|
+
# ... do stuff ...
|
36
|
+
# end
|
37
|
+
# FIXME: this does not allow concurrent downloads from the same bucket!
|
38
|
+
#@mutexes_by_bucket[bucket_symbol].synchronize do
|
39
|
+
# So we are testing this without this mutex.
|
40
|
+
yield @clients_by_bucket[bucket_symbol]
|
41
|
+
#end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def get_s3_auth(credentials)
|
47
|
+
# reminder: these are auto-refreshing!
|
48
|
+
if credentials.key?('role')
|
49
|
+
@logger.debug("Assume Role", :role => credentials["role"])
|
50
|
+
return Aws::AssumeRoleCredentials.new(
|
51
|
+
client: @sts_client,
|
52
|
+
role_arn: credentials['role'],
|
53
|
+
role_session_name: @default_session_name
|
54
|
+
)
|
55
|
+
elsif credentials.key?('access_key_id') && credentials.key?('secret_access_key')
|
56
|
+
@logger.debug("Fetch credentials", :access_key => credentials['access_key_id'])
|
57
|
+
return Aws::Credentials.new(credentials)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'fileutils'
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class S3Downloader
|
6
|
+
|
7
|
+
def initialize(logger, stop_semaphore, options)
|
8
|
+
@logger = logger
|
9
|
+
@stopped = stop_semaphore
|
10
|
+
@factory = options[:s3_client_factory]
|
11
|
+
@delete_on_success = options[:delete_on_success]
|
12
|
+
end
|
13
|
+
|
14
|
+
def copy_s3object_to_disk(record)
|
15
|
+
# (from docs) WARNING:
|
16
|
+
# yielding data to a block disables retries of networking errors!
|
17
|
+
begin
|
18
|
+
@factory.get_s3_client(record[:bucket]) do |s3|
|
19
|
+
response = s3.get_object(
|
20
|
+
bucket: record[:bucket],
|
21
|
+
key: record[:key],
|
22
|
+
response_target: record[:local_file]
|
23
|
+
)
|
24
|
+
end
|
25
|
+
rescue Aws::S3::Errors::ServiceError => e
|
26
|
+
@logger.error("Unable to download file. Requeuing the message", :error => e, :record => record)
|
27
|
+
# prevent sqs message deletion
|
28
|
+
throw :skip_delete
|
29
|
+
end
|
30
|
+
throw :skip_delete if stop?
|
31
|
+
return true
|
32
|
+
end
|
33
|
+
|
34
|
+
def cleanup_local_object(record)
|
35
|
+
FileUtils.remove_entry_secure(record[:local_file], true) if ::File.exists?(record[:local_file])
|
36
|
+
rescue Exception => e
|
37
|
+
@logger.warn("Could not delete file", :file => record[:local_file], :error => e)
|
38
|
+
end
|
39
|
+
|
40
|
+
def cleanup_s3object(record)
|
41
|
+
return unless @delete_on_success
|
42
|
+
begin
|
43
|
+
@factory.get_s3_client(record[:bucket]) do |s3|
|
44
|
+
s3.delete_object(bucket: record[:bucket], key: record[:key])
|
45
|
+
end
|
46
|
+
rescue Exception => e
|
47
|
+
@logger.warn("Failed to delete s3 object", :record => record, :error => e)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def stop?
|
52
|
+
@stopped.value
|
53
|
+
end
|
54
|
+
|
55
|
+
end # class
|
@@ -1,15 +1,20 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
#
|
3
2
|
require "logstash/inputs/threadable"
|
4
3
|
require "logstash/namespace"
|
5
4
|
require "logstash/timestamp"
|
6
5
|
require "logstash/plugin_mixins/aws_config"
|
6
|
+
require "logstash/shutdown_watcher"
|
7
7
|
require "logstash/errors"
|
8
8
|
require 'logstash/inputs/s3sqs/patch'
|
9
9
|
require "aws-sdk"
|
10
|
-
|
11
|
-
|
12
|
-
require
|
10
|
+
# "object-oriented interfaces on top of API clients"...
|
11
|
+
# => Overhead. FIXME: needed?
|
12
|
+
#require "aws-sdk-resources"
|
13
|
+
require "fileutils"
|
14
|
+
require "concurrent"
|
15
|
+
# unused in code:
|
16
|
+
#require "stud/interval"
|
17
|
+
#require "digest/md5"
|
13
18
|
|
14
19
|
require 'java'
|
15
20
|
java_import java.io.InputStream
|
@@ -19,6 +24,14 @@ java_import java.io.BufferedReader
|
|
19
24
|
java_import java.util.zip.GZIPInputStream
|
20
25
|
java_import java.util.zip.ZipException
|
21
26
|
|
27
|
+
# our helper classes
|
28
|
+
# these may go into this file for brevity...
|
29
|
+
require_relative 'sqs/poller'
|
30
|
+
require_relative 's3/client_factory'
|
31
|
+
require_relative 's3/downloader'
|
32
|
+
require_relative 'codec_factory'
|
33
|
+
require_relative 's3snssqs/log_processor'
|
34
|
+
|
22
35
|
Aws.eager_autoload!
|
23
36
|
|
24
37
|
# Get logs from AWS s3 buckets as issued by an object-created event via sqs.
|
@@ -89,472 +102,241 @@ Aws.eager_autoload!
|
|
89
102
|
#
|
90
103
|
class LogStash::Inputs::S3SNSSQS < LogStash::Inputs::Threadable
|
91
104
|
include LogStash::PluginMixins::AwsConfig::V2
|
92
|
-
|
93
|
-
BACKOFF_SLEEP_TIME = 1
|
94
|
-
BACKOFF_FACTOR = 2
|
95
|
-
MAX_TIME_BEFORE_GIVING_UP = 60
|
96
|
-
EVENT_SOURCE = 'aws:s3'
|
97
|
-
EVENT_TYPE = 'ObjectCreated'
|
105
|
+
include LogProcessor
|
98
106
|
|
99
107
|
config_name "s3snssqs"
|
100
108
|
|
101
109
|
default :codec, "plain"
|
102
110
|
|
111
|
+
|
112
|
+
|
113
|
+
# Future config might look somewhat like this:
|
114
|
+
#
|
115
|
+
# s3_options_by_bucket = [
|
116
|
+
# {
|
117
|
+
# "bucket_name": "my-beautiful-bucket",
|
118
|
+
# "credentials": { "role": "aws:role:arn:for:bucket:access" },
|
119
|
+
# "folders": [
|
120
|
+
# {
|
121
|
+
# "key": "my_folder",
|
122
|
+
# "codec": "json"
|
123
|
+
# "type": "my_lovely_index"
|
124
|
+
# },
|
125
|
+
# {
|
126
|
+
# "key": "my_other_folder",
|
127
|
+
# "codec": "json_stream"
|
128
|
+
# "type": ""
|
129
|
+
# }
|
130
|
+
# ]
|
131
|
+
# },
|
132
|
+
# {
|
133
|
+
# "bucket_name": "my-other-bucket"
|
134
|
+
# "credentials": {
|
135
|
+
# "access_key_id": "some-id",
|
136
|
+
# "secret_access_key": "some-secret-key"
|
137
|
+
# },
|
138
|
+
# "folders": [
|
139
|
+
# {
|
140
|
+
# "key": ""
|
141
|
+
# }
|
142
|
+
# ]
|
143
|
+
# }
|
144
|
+
# }
|
145
|
+
|
146
|
+
config :s3_key_prefix, :validate => :string, :default => '', :deprecated => true #, :obsolete => " Will be moved to s3_options_by_bucket/types"
|
147
|
+
|
148
|
+
config :s3_access_key_id, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
|
149
|
+
config :s3_secret_access_key, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
|
150
|
+
config :s3_role_arn, :validate => :string, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
|
151
|
+
|
152
|
+
config :set_codec_by_folder, :validate => :hash, :default => {}, :deprecated => true #, :obsolete => "Please migrate to :s3_options_by_bucket. We will remove this option in the next Version"
|
153
|
+
|
154
|
+
# Default Options for the S3 clients
|
155
|
+
config :s3_default_options, :validate => :hash, :required => false, :default => {}
|
156
|
+
# We need a list of buckets, together with role arns and possible folder/codecs:
|
157
|
+
config :s3_options_by_bucket, :validate => :array, :required => false # TODO: true
|
158
|
+
# Session name to use when assuming an IAM role
|
159
|
+
config :s3_role_session_name, :validate => :string, :default => "logstash"
|
160
|
+
|
161
|
+
### sqs
|
103
162
|
# Name of the SQS Queue to pull messages from. Note that this is just the name of the queue, not the URL or ARN.
|
104
163
|
config :queue, :validate => :string, :required => true
|
105
|
-
config :s3_key_prefix, :validate => :string, :default => ''
|
106
|
-
#Sometimes you need another key for s3. This is a first test...
|
107
|
-
config :s3_access_key_id, :validate => :string
|
108
|
-
config :s3_secret_access_key, :validate => :string
|
109
164
|
config :queue_owner_aws_account_id, :validate => :string, :required => false
|
110
|
-
#If you have different file-types in you s3 bucket, you could define codec by folder
|
111
|
-
#set_codec_by_folder => {"My-ELB-logs" => "plain"}
|
112
|
-
config :set_codec_by_folder, :validate => :hash, :default => {}
|
113
|
-
config :delete_on_success, :validate => :boolean, :default => false
|
114
|
-
config :sqs_explicit_delete, :validate => :boolean, :default => false
|
115
165
|
# Whether the event is processed though an SNS to SQS. (S3>SNS>SQS = true |S3>SQS=false)
|
116
166
|
config :from_sns, :validate => :boolean, :default => true
|
117
|
-
|
118
|
-
config :
|
119
|
-
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
120
|
-
# The AWS IAM Role to assume, if any.
|
121
|
-
# This is used to generate temporary credentials typically for cross-account access.
|
122
|
-
# See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html for more information.
|
123
|
-
config :s3_role_arn, :validate => :string
|
124
|
-
# Session name to use when assuming an IAM role
|
125
|
-
config :s3_role_session_name, :validate => :string, :default => "logstash"
|
167
|
+
config :sqs_skip_delete, :validate => :boolean, :default => false
|
168
|
+
config :delete_on_success, :validate => :boolean, :default => false
|
126
169
|
config :visibility_timeout, :validate => :number, :default => 600
|
127
170
|
|
171
|
+
### system
|
172
|
+
config :temporary_directory, :validate => :string, :default => File.join(Dir.tmpdir, "logstash")
|
173
|
+
# To run in multiple threads use this
|
174
|
+
config :consumer_threads, :validate => :number, :default => 1
|
128
175
|
|
129
|
-
attr_reader :poller
|
130
|
-
attr_reader :s3
|
131
|
-
|
132
|
-
|
133
|
-
def set_codec (folder)
|
134
|
-
begin
|
135
|
-
@logger.debug("Automatically switching from #{@codec.class.config_name} to #{set_codec_by_folder[folder]} codec", :plugin => self.class.config_name)
|
136
|
-
LogStash::Plugin.lookup("codec", "#{set_codec_by_folder[folder]}").new("charset" => @codec.charset)
|
137
|
-
rescue Exception => e
|
138
|
-
@logger.error("Failed to set_codec with error", :error => e)
|
139
|
-
end
|
140
|
-
end
|
141
176
|
|
142
177
|
public
|
143
|
-
def register
|
144
|
-
require "fileutils"
|
145
|
-
require "digest/md5"
|
146
|
-
require "aws-sdk-resources"
|
147
178
|
|
148
|
-
|
149
|
-
#make this hash keys lookups match like regex
|
150
|
-
hash_key_is_regex(set_codec_by_folder)
|
151
|
-
@logger.info("Registering SQS input", :queue => @queue)
|
152
|
-
setup_queue
|
179
|
+
# --- BEGIN plugin interface ----------------------------------------#
|
153
180
|
|
181
|
+
# initialisation
|
182
|
+
def register
|
183
|
+
# prepare system
|
154
184
|
FileUtils.mkdir_p(@temporary_directory) unless Dir.exist?(@temporary_directory)
|
155
|
-
end
|
156
|
-
|
157
|
-
def setup_queue
|
158
|
-
aws_sqs_client = Aws::SQS::Client.new(aws_options_hash)
|
159
|
-
queue_url = aws_sqs_client.get_queue_url({ queue_name: @queue, queue_owner_aws_account_id: @queue_owner_aws_account_id})[:queue_url]
|
160
|
-
@poller = Aws::SQS::QueuePoller.new(queue_url, :client => aws_sqs_client)
|
161
|
-
get_s3client
|
162
|
-
@s3_resource = get_s3object
|
163
|
-
rescue Aws::SQS::Errors::ServiceError => e
|
164
|
-
@logger.error("Cannot establish connection to Amazon SQS", :error => e)
|
165
|
-
raise LogStash::ConfigurationError, "Verify the SQS queue name and your credentials"
|
166
|
-
end
|
167
|
-
|
168
|
-
def polling_options
|
169
|
-
{
|
170
|
-
# we will query 1 message at a time, so we can ensure correct error handling if we can't download a single file correctly
|
171
|
-
# (we will throw :skip_delete if download size isn't correct to process the event again later
|
172
|
-
# -> set a reasonable "Default Visibility Timeout" for your queue, so that there's enough time to process the log files)
|
173
|
-
:max_number_of_messages => 1,
|
174
|
-
# we will use the queue's setting, a good value is 10 seconds
|
175
|
-
# (to ensure fast logstash shutdown on the one hand and few api calls on the other hand)
|
176
|
-
:skip_delete => false,
|
177
|
-
:visibility_timeout => @visibility_timeout,
|
178
|
-
:wait_time_seconds => nil,
|
179
|
-
}
|
180
|
-
end
|
181
185
|
|
182
|
-
|
183
|
-
|
184
|
-
@
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
#
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
type_folder = get_object_folder(key)
|
204
|
-
# Set input codec by :set_codec_by_folder
|
205
|
-
instance_codec = set_codec(type_folder) unless set_codec_by_folder["#{type_folder}"].nil?
|
206
|
-
# try download and :skip_delete if it fails
|
207
|
-
#if record['s3']['object']['size'] < 10000000 then
|
208
|
-
process_log(bucket, key, type_folder, instance_codec, queue, message, size)
|
209
|
-
#else
|
210
|
-
# @logger.info("Your file is too big")
|
211
|
-
#end
|
186
|
+
@credentials_by_bucket = hash_key_is_regex({})
|
187
|
+
# create the bucket=>folder=>codec lookup from config options
|
188
|
+
@codec_by_folder = hash_key_is_regex({})
|
189
|
+
@type_by_folder = hash_key_is_regex({})
|
190
|
+
|
191
|
+
# use deprecated settings only if new config is missing:
|
192
|
+
if @s3_options_by_bucket.nil?
|
193
|
+
# We don't know any bucket name, so we must rely on a "catch-all" regex
|
194
|
+
s3_options = {
|
195
|
+
'bucket_name' => '.*',
|
196
|
+
'folders' => @set_codec_by_folder.map { |key, codec|
|
197
|
+
{ 'key' => key, 'codec' => codec }
|
198
|
+
}
|
199
|
+
}
|
200
|
+
if @s3_role_arn.nil?
|
201
|
+
# access key/secret key pair needed
|
202
|
+
unless @s3_access_key_id.nil? or @s3_secret_access_key.nil?
|
203
|
+
s3_options['credentials'] = {
|
204
|
+
'access_key_id' => @s3_access_key_id,
|
205
|
+
'secret_access_key' => @s3_secret_access_key
|
206
|
+
}
|
212
207
|
end
|
208
|
+
else
|
209
|
+
s3_options['credentials'] = {
|
210
|
+
'role' => @s3_role_arn
|
211
|
+
}
|
213
212
|
end
|
213
|
+
@s3_options_by_bucket = [s3_options]
|
214
214
|
end
|
215
|
-
end
|
216
|
-
|
217
|
-
private
|
218
|
-
def process_log(bucket , key, folder, instance_codec, queue, message, size)
|
219
|
-
s3bucket = @s3_resource.bucket(bucket)
|
220
|
-
@logger.debug("Lets go reading file", :bucket => bucket, :key => key)
|
221
|
-
object = s3bucket.object(key)
|
222
|
-
filename = File.join(temporary_directory, File.basename(key))
|
223
|
-
if download_remote_file(object, filename)
|
224
|
-
if process_local_log( filename, key, folder, instance_codec, queue, bucket, message, size)
|
225
|
-
begin
|
226
|
-
FileUtils.remove_entry_secure(filename, true) if File.exists? filename
|
227
|
-
delete_file_from_bucket(object)
|
228
|
-
rescue Exception => e
|
229
|
-
@logger.debug("We had problems to delete your file", :file => filename, :error => e)
|
230
|
-
end
|
231
|
-
end
|
232
|
-
else
|
233
|
-
begin
|
234
|
-
FileUtils.remove_entry_secure(filename, true) if File.exists? filename
|
235
|
-
rescue Exception => e
|
236
|
-
@logger.debug("We had problems clean up your tmp dir", :file => filename, :error => e)
|
237
|
-
end
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
private
|
242
|
-
# Stream the remove file to the local disk
|
243
|
-
#
|
244
|
-
# @param [S3Object] Reference to the remove S3 objec to download
|
245
|
-
# @param [String] The Temporary filename to stream to.
|
246
|
-
# @return [Boolean] True if the file was completely downloaded
|
247
|
-
def download_remote_file(remote_object, local_filename)
|
248
|
-
completed = false
|
249
|
-
@logger.debug("S3 input: Download remote file", :remote_key => remote_object.key, :local_filename => local_filename)
|
250
|
-
File.open(local_filename, 'wb') do |s3file|
|
251
|
-
return completed if stop?
|
252
|
-
begin
|
253
|
-
remote_object.get(:response_target => s3file)
|
254
|
-
rescue Aws::S3::Errors::ServiceError => e
|
255
|
-
@logger.error("Unable to download file. We´ll requeue the message", :file => remote_object.inspect)
|
256
|
-
throw :skip_delete
|
257
|
-
end
|
258
|
-
end
|
259
|
-
completed = true
|
260
215
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
# Read the content of the local file
|
267
|
-
#
|
268
|
-
# @param [Queue] Where to push the event
|
269
|
-
# @param [String] Which file to read from
|
270
|
-
# @return [Boolean] True if the file was completely read, false otherwise.
|
271
|
-
def process_local_log(filename, key, folder, instance_codec, queue, bucket, message, size)
|
272
|
-
@logger.debug('Processing file', :filename => filename)
|
273
|
-
metadata = {}
|
274
|
-
start_time = Time.now
|
275
|
-
# Currently codecs operates on bytes instead of stream.
|
276
|
-
# So all IO stuff: decompression, reading need to be done in the actual
|
277
|
-
# input and send as bytes to the codecs.
|
278
|
-
read_file(filename) do |line|
|
279
|
-
if (Time.now - start_time) >= (@visibility_timeout.to_f / 100.0 * 90.to_f)
|
280
|
-
@logger.info("Increasing the visibility_timeout ... ", :timeout => @visibility_timeout, :filename => filename, :filesize => size, :start => start_time )
|
281
|
-
poller.change_message_visibility_timeout(message, @visibility_timeout)
|
282
|
-
start_time = Time.now
|
216
|
+
@s3_options_by_bucket.each do |options|
|
217
|
+
bucket = options['bucket_name']
|
218
|
+
if options.key?('credentials')
|
219
|
+
@credentials_by_bucket[bucket] = options['credentials']
|
283
220
|
end
|
284
|
-
if
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
# If the event is only metadata the event will be drop.
|
296
|
-
# This was the behavior of the pre 1.5 plugin.
|
297
|
-
#
|
298
|
-
# The line need to go through the codecs to replace
|
299
|
-
# unknown bytes in the log stream before doing a regexp match or
|
300
|
-
# you will get a `Error: invalid byte sequence in UTF-8'
|
301
|
-
local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
|
221
|
+
if options.key?('folders')
|
222
|
+
# make these hashes do key lookups using regex matching
|
223
|
+
folders = hash_key_is_regex({})
|
224
|
+
types = hash_key_is_regex({})
|
225
|
+
options['folders'].each do |entry|
|
226
|
+
@logger.debug("options for folder ", :folder => entry)
|
227
|
+
folders[entry['key']] = entry['codec'] if entry.key?('codec')
|
228
|
+
types[entry['key']] = entry['type'] if entry.key?('type')
|
229
|
+
end
|
230
|
+
@codec_by_folder[bucket] = folders unless folders.empty?
|
231
|
+
@type_by_folder[bucket] = types unless types.empty?
|
302
232
|
end
|
303
233
|
end
|
304
|
-
@logger.debug("end if file #{filename}")
|
305
|
-
#@logger.info("event pre flush", :event => event)
|
306
|
-
# #ensure any stateful codecs (such as multi-line ) are flushed to the queue
|
307
|
-
instance_codec.flush do |event|
|
308
|
-
local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
|
309
|
-
@logger.debug("We´e to flush an incomplete event...", :event => event)
|
310
|
-
end
|
311
|
-
|
312
|
-
return true
|
313
|
-
end # def process_local_log
|
314
|
-
|
315
|
-
private
|
316
|
-
def local_decorate_and_queue(event, queue, key, folder, metadata, bucket)
|
317
|
-
@logger.debug('decorating event', :event => event.to_s)
|
318
|
-
if event_is_metadata?(event)
|
319
|
-
@logger.debug('Event is metadata, updating the current cloudfront metadata', :event => event)
|
320
|
-
update_metadata(metadata, event)
|
321
|
-
else
|
322
|
-
|
323
|
-
decorate(event)
|
324
|
-
|
325
|
-
event.set("cloudfront_version", metadata[:cloudfront_version]) unless metadata[:cloudfront_version].nil?
|
326
|
-
event.set("cloudfront_fields", metadata[:cloudfront_fields]) unless metadata[:cloudfront_fields].nil?
|
327
|
-
|
328
|
-
event.set("[@metadata][s3][object_key]", key)
|
329
|
-
event.set("[@metadata][s3][bucket_name]", bucket)
|
330
|
-
event.set("[@metadata][s3][object_folder]", folder)
|
331
|
-
@logger.debug('add metadata', :object_key => key, :bucket => bucket, :folder => folder)
|
332
|
-
queue << event
|
333
|
-
end
|
334
|
-
end
|
335
|
-
|
336
|
-
|
337
|
-
private
|
338
|
-
def get_object_folder(key)
|
339
|
-
if match=/#{s3_key_prefix}\/?(?<type_folder>.*?)\/.*/.match(key)
|
340
|
-
return match['type_folder']
|
341
|
-
else
|
342
|
-
return ""
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
private
|
347
|
-
def read_file(filename, &block)
|
348
|
-
if gzip?(filename)
|
349
|
-
read_gzip_file(filename, block)
|
350
|
-
else
|
351
|
-
read_plain_file(filename, block)
|
352
|
-
end
|
353
|
-
end
|
354
|
-
|
355
|
-
def read_plain_file(filename, block)
|
356
|
-
File.open(filename, 'rb') do |file|
|
357
|
-
file.each(&block)
|
358
|
-
end
|
359
|
-
end
|
360
|
-
|
361
|
-
private
|
362
|
-
def read_gzip_file(filename, block)
|
363
|
-
file_stream = FileInputStream.new(filename)
|
364
|
-
gzip_stream = GZIPInputStream.new(file_stream)
|
365
|
-
decoder = InputStreamReader.new(gzip_stream, "UTF-8")
|
366
|
-
buffered = BufferedReader.new(decoder)
|
367
|
-
|
368
|
-
while (line = buffered.readLine())
|
369
|
-
block.call(line)
|
370
|
-
end
|
371
|
-
rescue ZipException => e
|
372
|
-
@logger.error("Gzip codec: We cannot uncompress the gzip file", :filename => filename, :error => e)
|
373
|
-
ensure
|
374
|
-
buffered.close unless buffered.nil?
|
375
|
-
decoder.close unless decoder.nil?
|
376
|
-
gzip_stream.close unless gzip_stream.nil?
|
377
|
-
file_stream.close unless file_stream.nil?
|
378
|
-
end
|
379
|
-
|
380
|
-
private
|
381
|
-
def gzip?(filename)
|
382
|
-
return true if filename.end_with?('.gz','.gzip')
|
383
|
-
MagicGzipValidator.new(File.new(filename, 'r')).valid?
|
384
|
-
rescue Exception => e
|
385
|
-
@logger.debug("Problem while gzip detection", :error => e)
|
386
|
-
end
|
387
|
-
|
388
|
-
private
|
389
|
-
def delete_file_from_bucket(object)
|
390
|
-
if @delete_on_success
|
391
|
-
object.delete()
|
392
|
-
end
|
393
|
-
end
|
394
|
-
|
395
|
-
|
396
|
-
private
|
397
|
-
def get_s3client
|
398
|
-
if s3_access_key_id and s3_secret_access_key
|
399
|
-
@logger.debug("Using S3 Credentials from config", :ID => aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key) )
|
400
|
-
@s3_client = Aws::S3::Client.new(aws_options_hash.merge(:access_key_id => s3_access_key_id, :secret_access_key => s3_secret_access_key))
|
401
|
-
elsif @s3_role_arn
|
402
|
-
@s3_client = Aws::S3::Client.new(aws_options_hash.merge!({ :credentials => s3_assume_role }))
|
403
|
-
@logger.debug("Using S3 Credentials from role", :s3client => @s3_client.inspect, :options => aws_options_hash.merge!({ :credentials => s3_assume_role }))
|
404
|
-
else
|
405
|
-
@s3_client = Aws::S3::Client.new(aws_options_hash)
|
406
|
-
end
|
407
|
-
end
|
408
|
-
|
409
|
-
private
|
410
|
-
def get_s3object
|
411
|
-
s3 = Aws::S3::Resource.new(client: @s3_client)
|
412
|
-
end
|
413
|
-
|
414
|
-
private
|
415
|
-
def s3_assume_role()
|
416
|
-
Aws::AssumeRoleCredentials.new(
|
417
|
-
client: Aws::STS::Client.new(region: @region),
|
418
|
-
role_arn: @s3_role_arn,
|
419
|
-
role_session_name: @s3_role_session_name
|
420
|
-
)
|
421
|
-
end
|
422
|
-
|
423
|
-
private
|
424
|
-
def event_is_metadata?(event)
|
425
|
-
return false unless event.get("message").class == String
|
426
|
-
line = event.get("message")
|
427
|
-
version_metadata?(line) || fields_metadata?(line)
|
428
|
-
end
|
429
|
-
|
430
|
-
private
|
431
|
-
def version_metadata?(line)
|
432
|
-
line.start_with?('#Version: ')
|
433
|
-
end
|
434
|
-
|
435
|
-
private
|
436
|
-
def fields_metadata?(line)
|
437
|
-
line.start_with?('#Fields: ')
|
438
|
-
end
|
439
|
-
|
440
|
-
private
|
441
|
-
def update_metadata(metadata, event)
|
442
|
-
line = event.get('message').strip
|
443
|
-
|
444
|
-
if version_metadata?(line)
|
445
|
-
metadata[:cloudfront_version] = line.split(/#Version: (.+)/).last
|
446
|
-
end
|
447
234
|
|
448
|
-
|
449
|
-
|
450
|
-
|
235
|
+
@received_stop = Concurrent::AtomicBoolean.new(false)
|
236
|
+
|
237
|
+
# instantiate helpers
|
238
|
+
@sqs_poller = SqsPoller.new(@logger, @received_stop, @queue, {
|
239
|
+
queue_owner_aws_account_id: @queue_owner_aws_account_id,
|
240
|
+
from_sns: @from_sns,
|
241
|
+
sqs_explicit_delete: @sqs_explicit_delete,
|
242
|
+
visibility_timeout: @visibility_timeout
|
243
|
+
}, aws_options_hash)
|
244
|
+
@s3_client_factory = S3ClientFactory.new(@logger, {
|
245
|
+
aws_region: @region,
|
246
|
+
s3_default_options: @s3_default_options,
|
247
|
+
s3_credentials_by_bucket: @credentials_by_bucket,
|
248
|
+
s3_role_session_name: @s3_role_session_name
|
249
|
+
}, aws_options_hash)
|
250
|
+
@s3_downloader = S3Downloader.new(@logger, @received_stop, {
|
251
|
+
s3_client_factory: @s3_client_factory,
|
252
|
+
delete_on_success: @delete_on_success
|
253
|
+
})
|
254
|
+
@codec_factory = CodecFactory.new(@logger, {
|
255
|
+
default_codec: @codec,
|
256
|
+
codec_by_folder: @codec_by_folder
|
257
|
+
})
|
258
|
+
#@log_processor = LogProcessor.new(self)
|
259
|
+
|
260
|
+
# administrative stuff
|
261
|
+
@worker_threads = []
|
451
262
|
end
|
452
263
|
|
453
|
-
|
454
|
-
def run(
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
else
|
460
|
-
#Fallback to simple single thread worker
|
461
|
-
# ensure we can stop logstash correctly
|
462
|
-
poller.before_request do |stats|
|
463
|
-
if stop? then
|
464
|
-
@logger.warn("issuing :stop_polling on stop?", :queue => @queue)
|
465
|
-
# this can take up to "Receive Message Wait Time" (of the sqs queue) seconds to be recognized
|
466
|
-
throw :stop_polling
|
467
|
-
end
|
468
|
-
end
|
469
|
-
# poll a message and process it
|
470
|
-
run_with_backoff do
|
471
|
-
poller.poll(polling_options) do |message|
|
472
|
-
begin
|
473
|
-
handle_message(message, queue, @codec.clone)
|
474
|
-
poller.delete_message(message)
|
475
|
-
rescue Exception => e
|
476
|
-
@logger.info("Error in poller block ... ", :error => e)
|
477
|
-
end
|
478
|
-
end
|
479
|
-
end
|
264
|
+
# startup
|
265
|
+
def run(logstash_event_queue)
|
266
|
+
#LogStash::ShutdownWatcher.abort_threshold(30)
|
267
|
+
# start them
|
268
|
+
@worker_threads = @consumer_threads.times.map do |_|
|
269
|
+
run_worker_thread(logstash_event_queue)
|
480
270
|
end
|
271
|
+
# and wait (possibly infinitely) for them to shut down
|
272
|
+
@worker_threads.each { |t| t.join }
|
481
273
|
end
|
482
274
|
|
483
|
-
|
275
|
+
# shutdown
|
484
276
|
def stop
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
end
|
277
|
+
@received_stop.make_true
|
278
|
+
@worker_threads.each do |worker|
|
279
|
+
begin
|
280
|
+
@logger.info("Stopping thread ... ", :thread => worker.inspect)
|
281
|
+
worker.wakeup
|
282
|
+
rescue
|
283
|
+
@logger.error("Cannot stop thread ... try to kill him", :thread => worker.inspect)
|
284
|
+
worker.kill
|
494
285
|
end
|
495
|
-
else
|
496
|
-
@logger.warn("Stopping all threads?", :queue => @queue)
|
497
286
|
end
|
498
287
|
end
|
499
288
|
|
289
|
+
# --- END plugin interface ------------------------------------------#
|
290
|
+
|
500
291
|
private
|
501
|
-
|
292
|
+
|
293
|
+
def run_worker_thread(queue)
|
502
294
|
Thread.new do
|
503
|
-
@logger.info("Starting new thread")
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
# poll a message and process it
|
513
|
-
run_with_backoff do
|
514
|
-
poller.poll(polling_options) do |message|
|
515
|
-
begin
|
516
|
-
handle_message(message, queue, @codec.clone)
|
517
|
-
poller.delete_message(message) if @sqs_explicit_delete
|
518
|
-
rescue Exception => e
|
519
|
-
@logger.info("Error in poller block ... ", :error => e)
|
520
|
-
end
|
295
|
+
@logger.info("Starting new worker thread")
|
296
|
+
@sqs_poller.run do |record|
|
297
|
+
throw :skip_delete if stop?
|
298
|
+
@logger.debug("Outside Poller: got a record", :record => record)
|
299
|
+
# record is a valid object with the keys ":bucket", ":key", ":size"
|
300
|
+
record[:local_file] = File.join(@temporary_directory, File.basename(record[:key]))
|
301
|
+
if @s3_downloader.copy_s3object_to_disk(record)
|
302
|
+
completed = catch(:skip_delete) do
|
303
|
+
process(record, queue)
|
521
304
|
end
|
305
|
+
@s3_downloader.cleanup_local_object(record)
|
306
|
+
# re-throw if necessary:
|
307
|
+
throw :skip_delete unless completed
|
308
|
+
@s3_downloader.cleanup_s3object(record)
|
522
309
|
end
|
523
310
|
end
|
524
311
|
end
|
525
312
|
end
|
526
313
|
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
# @param [Block] block Ruby code block to execute.
|
534
|
-
def run_with_backoff(max_time = MAX_TIME_BEFORE_GIVING_UP, sleep_time = BACKOFF_SLEEP_TIME, &block)
|
535
|
-
next_sleep = sleep_time
|
536
|
-
begin
|
537
|
-
block.call
|
538
|
-
next_sleep = sleep_time
|
539
|
-
rescue Aws::SQS::Errors::ServiceError => e
|
540
|
-
@logger.warn("Aws::SQS::Errors::ServiceError ... retrying SQS request with exponential backoff", :queue => @queue, :sleep_time => sleep_time, :error => e)
|
541
|
-
sleep(next_sleep)
|
542
|
-
next_sleep = next_sleep > max_time ? sleep_time : sleep_time * BACKOFF_FACTOR
|
543
|
-
retry
|
314
|
+
# Will be remove in further releases...
|
315
|
+
def get_object_folder(key)
|
316
|
+
if match=/#{s3_key_prefix}\/?(?<type_folder>.*?)\/.*/.match(key)
|
317
|
+
return match['type_folder']
|
318
|
+
else
|
319
|
+
return ""
|
544
320
|
end
|
545
321
|
end
|
546
322
|
|
547
|
-
private
|
548
323
|
def hash_key_is_regex(myhash)
|
549
324
|
myhash.default_proc = lambda do |hash, lookup|
|
550
325
|
result=nil
|
551
326
|
hash.each_pair do |key, value|
|
552
327
|
if %r[#{key}] =~ lookup
|
553
|
-
result=value
|
328
|
+
result = value
|
554
329
|
break
|
555
330
|
end
|
556
331
|
end
|
557
332
|
result
|
558
333
|
end
|
334
|
+
# return input hash (convenience)
|
335
|
+
return myhash
|
336
|
+
end
|
337
|
+
|
338
|
+
def stop?
|
339
|
+
@received_stop.value
|
559
340
|
end
|
341
|
+
|
560
342
|
end # class
|