fluent-plugin-s3 0.6.9 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/ChangeLog +5 -0
- data/README.md +91 -1
- data/VERSION +1 -1
- data/lib/fluent/plugin/in_s3.rb +316 -0
- data/lib/fluent/plugin/out_s3.rb +2 -0
- data/lib/fluent/plugin/s3_extractor_gzip_command.rb +46 -0
- data/lib/fluent/plugin/s3_extractor_lzma2.rb +40 -0
- data/lib/fluent/plugin/s3_extractor_lzo.rb +40 -0
- data/test/test_in_s3.rb +222 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6cff4e256a511905f01a1f6ceeb2b29654fd3e11
|
4
|
+
data.tar.gz: 1dc609cd3e8c6097854174cc03469b4e04f1639c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d94cd2e1b7ac41f7c9f17ce303f2fa63773e4b5000dbf59dd96cecbd67f3c139dd17a69eda8c4ec2b2c103fdbd576832a8064e1aad090855d4388a6c3ab5cebb
|
7
|
+
data.tar.gz: 0deee51a8774c9aa7a98f961d6be3a9497ca8aa82b38bf8479f811b5453840e888dc58d7c6d195c52337942092d66a5bb1f6419c87083eaa7973aa434b2b19f0
|
data/.travis.yml
CHANGED
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -16,13 +16,17 @@ reached, and then another log '2011-01-03 message B' is reached in this order,
|
|
16
16
|
the former one is stored in "20110102.gz" file, and latter one in
|
17
17
|
"20110103.gz" file.
|
18
18
|
|
19
|
+
**s3** input plugin reads data from S3 periodically. This plugin uses
|
20
|
+
SQS queue on the region same as S3 bucket.
|
21
|
+
We must setup SQS queue and S3 event notification before use this plugin.
|
22
|
+
|
19
23
|
## Installation
|
20
24
|
|
21
25
|
Simply use RubyGems:
|
22
26
|
|
23
27
|
gem install fluent-plugin-s3
|
24
28
|
|
25
|
-
## Configuration
|
29
|
+
## Output: Configuration
|
26
30
|
|
27
31
|
<match pattern>
|
28
32
|
@type s3
|
@@ -430,6 +434,92 @@ Path to the shared file. Defaults to "#{Dir.home}/.aws/credentials".
|
|
430
434
|
|
431
435
|
Defaults to 'default' or `[ENV]('AWS_PROFILE')`.
|
432
436
|
|
437
|
+
## Input: Setup
|
438
|
+
|
439
|
+
1. Create new [SQS](https://aws.amazon.com/documentation/sqs/) queue (use same region as S3)
|
440
|
+
2. Set proper permission to new queue
|
441
|
+
3. [Configure S3 event notification](http://docs.aws.amazon.com/AmazonS3/latest/dev/NotificationHowTo.html)
|
442
|
+
4. Write configuration file such as fluent.conf
|
443
|
+
5. Run fluentd
|
444
|
+
|
445
|
+
## Input: Configuration
|
446
|
+
|
447
|
+
<source>
|
448
|
+
type s3
|
449
|
+
|
450
|
+
aws_key_id YOUR_AWS_KEY_ID
|
451
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
452
|
+
s3_bucket YOUR_S3_BUCKET_NAME
|
453
|
+
s3_region ap-northeast-1
|
454
|
+
|
455
|
+
<sqs>
|
456
|
+
queue_name YOUR_SQS_QUEUE_NAME
|
457
|
+
</sqs>
|
458
|
+
</source>
|
459
|
+
|
460
|
+
**aws_key_id**
|
461
|
+
|
462
|
+
AWS access key id. This parameter is required when your agent is not running on EC2 instance with an IAM Role.
|
463
|
+
|
464
|
+
**aws_sec_key**
|
465
|
+
|
466
|
+
AWS secret key. This parameter is required when your agent is not running on EC2 instance with an IAM Role.
|
467
|
+
|
468
|
+
**aws_iam_retries**
|
469
|
+
|
470
|
+
The number of attempts to make (with exponential backoff) when loading instance profile credentials from the EC2 metadata
|
471
|
+
service using an IAM role. Defaults to 5 retries.
|
472
|
+
|
473
|
+
**s3_bucket (required)**
|
474
|
+
|
475
|
+
S3 bucket name.
|
476
|
+
|
477
|
+
**s3_region**
|
478
|
+
|
479
|
+
S3 region name. For example, US West (Oregon) Region is
|
480
|
+
"us-west-2". The full list of regions are available here. >
|
481
|
+
http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region. We
|
482
|
+
recommend using `s3_region` instead of `s3_endpoint`.
|
483
|
+
|
484
|
+
**store_as**
|
485
|
+
|
486
|
+
archive format on S3. You can use serveral format:
|
487
|
+
|
488
|
+
* gzip (default)
|
489
|
+
* json
|
490
|
+
* text
|
491
|
+
* lzo (Need lzop command)
|
492
|
+
* lzma2 (Need xz command)
|
493
|
+
* gzip_command (Need gzip command)
|
494
|
+
* This compressor uses an external gzip command, hence would result in utilizing CPU cores well compared with `gzip`
|
495
|
+
|
496
|
+
See 'Use your compression algorithm' section for adding another format.
|
497
|
+
|
498
|
+
**format**
|
499
|
+
|
500
|
+
Parse a line as this format in the S3 object. Supported formats are
|
501
|
+
"apache_error", "apache2", "syslog", "json", "tsv", "ltsv", "csv",
|
502
|
+
"nginx" and "none".
|
503
|
+
|
504
|
+
**check_apikey_on_start**
|
505
|
+
|
506
|
+
Check AWS key on start. Default is true.
|
507
|
+
|
508
|
+
**proxy_uri**
|
509
|
+
|
510
|
+
URI of proxy environment.
|
511
|
+
|
512
|
+
**sqs/queue_name (required)**
|
513
|
+
|
514
|
+
SQS queue name. Need to create SQS queue on the region same as S3 bucket.
|
515
|
+
|
516
|
+
**sqs/skip_delete**
|
517
|
+
|
518
|
+
When true, messages are not deleted after polling block. Default is false.
|
519
|
+
|
520
|
+
**sqs/wait_time_seconds**
|
521
|
+
|
522
|
+
The long polling interval. Default is 20.
|
433
523
|
|
434
524
|
## IAM Policy
|
435
525
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.0
|
@@ -0,0 +1,316 @@
|
|
1
|
+
require 'fluent/input'
|
2
|
+
|
3
|
+
module Fluent
|
4
|
+
class S3Input < Input
|
5
|
+
Fluent::Plugin.register_input('s3', self)
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
super
|
9
|
+
require 'aws-sdk-resources'
|
10
|
+
require 'zlib'
|
11
|
+
require 'time'
|
12
|
+
require 'tempfile'
|
13
|
+
|
14
|
+
@extractor = nil
|
15
|
+
end
|
16
|
+
|
17
|
+
# For fluentd v0.12.16 or earlier
|
18
|
+
class << self
|
19
|
+
unless method_defined?(:desc)
|
20
|
+
def desc(description)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
unless Fluent::Config::ConfigureProxy.method_defined?(:desc)
|
25
|
+
Fluent::Config::ConfigureProxy.class_eval do
|
26
|
+
def desc(description)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
desc "AWS access key id"
|
32
|
+
config_param :aws_key_id, :string, :default => nil, :secret => true
|
33
|
+
desc "AWS secret key."
|
34
|
+
config_param :aws_sec_key, :string, :default => nil, :secret => true
|
35
|
+
config_section :assume_role_credentials, :multi => false do
|
36
|
+
desc "The Amazon Resource Name (ARN) of the role to assume"
|
37
|
+
config_param :role_arn, :string
|
38
|
+
desc "An identifier for the assumed role session"
|
39
|
+
config_param :role_session_name, :string
|
40
|
+
desc "An IAM policy in JSON format"
|
41
|
+
config_param :policy, :string, :default => nil
|
42
|
+
desc "The duration, in seconds, of the role session (900-3600)"
|
43
|
+
config_param :duration_seconds, :integer, :default => nil
|
44
|
+
desc "A unique identifier that is used by third parties when assuming roles in their customers' accounts."
|
45
|
+
config_param :external_id, :string, :default => nil
|
46
|
+
end
|
47
|
+
config_section :instance_profile_credentials, :multi => false do
|
48
|
+
desc "Number of times to retry when retrieving credentials"
|
49
|
+
config_param :retries, :integer, :default => nil
|
50
|
+
desc "IP address (default:169.254.169.254)"
|
51
|
+
config_param :ip_address, :string, :default => nil
|
52
|
+
desc "Port number (default:80)"
|
53
|
+
config_param :port, :integer, :default => nil
|
54
|
+
desc "Number of seconds to wait for the connection to open"
|
55
|
+
config_param :http_open_timeout, :float, :default => nil
|
56
|
+
desc "Number of seconds to wait for one block to be read"
|
57
|
+
config_param :http_read_timeout, :float, :default => nil
|
58
|
+
# config_param :delay, :integer or :proc, :default => nil
|
59
|
+
# config_param :http_degub_output, :io, :default => nil
|
60
|
+
end
|
61
|
+
config_section :shared_credentials, :multi => false do
|
62
|
+
desc "Path to the shared file. (default: $HOME/.aws/credentials)"
|
63
|
+
config_param :path, :string, :default => nil
|
64
|
+
desc "Profile name. Default to 'default' or ENV['AWS_PROFILE']"
|
65
|
+
config_param :profile_name, :string, :default => nil
|
66
|
+
end
|
67
|
+
desc "S3 bucket name"
|
68
|
+
config_param :s3_bucket, :string
|
69
|
+
desc "S3 region name"
|
70
|
+
config_param :s3_region, :string, :default => ENV["AWS_REGION"] || "us-east-1"
|
71
|
+
desc "Archive format on S3"
|
72
|
+
config_param :store_as, :string, :default => "gzip"
|
73
|
+
desc "Check AWS key on start"
|
74
|
+
config_param :check_apikey_on_start, :bool, :default => true
|
75
|
+
desc "URI of proxy environment"
|
76
|
+
config_param :proxy_uri, :string, :default => nil
|
77
|
+
desc "Change one line format in the S3 object (none,json,ltsv,single_value)"
|
78
|
+
config_param :format, :string, :default => 'none'
|
79
|
+
|
80
|
+
config_section :sqs, :required => true, :multi => false do
|
81
|
+
desc "SQS queue name"
|
82
|
+
config_param :queue_name, :string, :default => nil
|
83
|
+
desc "Skip message deletion"
|
84
|
+
config_param :skip_delete, :bool, :default => false
|
85
|
+
desc "The long polling interval."
|
86
|
+
config_param :wait_time_seconds, :integer, :default => 20
|
87
|
+
end
|
88
|
+
|
89
|
+
desc "Tag string"
|
90
|
+
config_param :tag, :string, :default => "input.s3"
|
91
|
+
|
92
|
+
attr_reader :bucket
|
93
|
+
|
94
|
+
def configure(conf)
|
95
|
+
super
|
96
|
+
|
97
|
+
unless @sqs.queue_name
|
98
|
+
raise ConfigError, "sqs/queue_name is required"
|
99
|
+
end
|
100
|
+
|
101
|
+
@extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log)
|
102
|
+
@extractor.configure(conf)
|
103
|
+
|
104
|
+
@parser = Plugin.new_parser(@format)
|
105
|
+
@parser.configure(conf)
|
106
|
+
end
|
107
|
+
|
108
|
+
def start
|
109
|
+
super
|
110
|
+
|
111
|
+
s3_client = create_s3_client
|
112
|
+
@s3 = Aws::S3::Resource.new(:client => s3_client)
|
113
|
+
@bucket = @s3.bucket(@s3_bucket)
|
114
|
+
|
115
|
+
raise "#{@bucket.name} is not found." unless @bucket.exists?
|
116
|
+
|
117
|
+
check_apikeys if @check_apikey_on_start
|
118
|
+
|
119
|
+
sqs_client = create_sqs_client
|
120
|
+
response = sqs_client.get_queue_url(queue_name: @sqs.queue_name)
|
121
|
+
sqs_queue_url = response.queue_url
|
122
|
+
|
123
|
+
@poller = Aws::SQS::QueuePoller.new(sqs_queue_url, client: sqs_client)
|
124
|
+
|
125
|
+
@running = true
|
126
|
+
@thread = Thread.new(&method(:run))
|
127
|
+
end
|
128
|
+
|
129
|
+
def shutdown
|
130
|
+
@running = false
|
131
|
+
@thread.join
|
132
|
+
super
|
133
|
+
end
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
def run
|
138
|
+
options = {}
|
139
|
+
options[:wait_time_seconds] = @sqs.wait_time_seconds
|
140
|
+
options[:skip_delete] = @sqs.skip_delete
|
141
|
+
@poller.before_request do |stats|
|
142
|
+
throw :stop_polling unless @running
|
143
|
+
end
|
144
|
+
@poller.poll(options) do |message|
|
145
|
+
begin
|
146
|
+
body = Yajl.load(message.body)
|
147
|
+
next unless body["Records"] # skip test queue
|
148
|
+
|
149
|
+
process(body)
|
150
|
+
rescue => e
|
151
|
+
log.warn "#{e.message}\n#{e.backtrace.join("\n")}"
|
152
|
+
@running = false
|
153
|
+
throw :skip_delete
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def setup_credentials
|
159
|
+
options = {}
|
160
|
+
credentials_options = {}
|
161
|
+
case
|
162
|
+
when @aws_key_id && @aws_sec_key
|
163
|
+
options[:access_key_id] = @aws_key_id
|
164
|
+
options[:secret_access_key] = @aws_sec_key
|
165
|
+
when @assume_role_credentials
|
166
|
+
c = @assume_role_credentials
|
167
|
+
credentials_options[:role_arn] = c.role_arn
|
168
|
+
credentials_options[:role_session_name] = c.role_session_name
|
169
|
+
credentials_options[:policy] = c.policy if c.policy
|
170
|
+
credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
|
171
|
+
credentials_options[:external_id] = c.external_id if c.external_id
|
172
|
+
options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options)
|
173
|
+
when @instance_profile_credentials
|
174
|
+
c = @instance_profile_credentials
|
175
|
+
credentials_options[:retries] = c.retries if c.retries
|
176
|
+
credentials_options[:ip_address] = c.ip_address if c.ip_address
|
177
|
+
credentials_options[:port] = c.port if c.port
|
178
|
+
credentials_options[:http_open_timeout] = c.http_open_timeout if c.http_open_timeout
|
179
|
+
credentials_options[:http_read_timeout] = c.http_read_timeout if c.http_read_timeout
|
180
|
+
options[:credentials] = Aws::InstanceProfileCredentials.new(credentials_options)
|
181
|
+
when @shared_credentials
|
182
|
+
c = @shared_credentials
|
183
|
+
credentials_options[:path] = c.path if c.path
|
184
|
+
credentials_options[:profile_name] = c.profile_name if c.profile_name
|
185
|
+
options[:credentials] = Aws::SharedCredentials.new(credentials_options)
|
186
|
+
else
|
187
|
+
# Use default credentials
|
188
|
+
# See http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html
|
189
|
+
end
|
190
|
+
options
|
191
|
+
end
|
192
|
+
|
193
|
+
def create_s3_client
|
194
|
+
options = setup_credentials
|
195
|
+
options[:region] = @s3_region if @s3_region
|
196
|
+
options[:proxy_uri] = @proxy_uri if @proxy_uri
|
197
|
+
|
198
|
+
Aws::S3::Client.new(options)
|
199
|
+
end
|
200
|
+
|
201
|
+
def create_sqs_client
|
202
|
+
options = setup_credentials
|
203
|
+
options[:region] = @s3_region if @s3_region
|
204
|
+
|
205
|
+
Aws::SQS::Client.new(options)
|
206
|
+
end
|
207
|
+
|
208
|
+
def check_apikeys
|
209
|
+
@bucket.objects.first
|
210
|
+
rescue => e
|
211
|
+
raise "can't call S3 API. Please check your aws_key_id / aws_sec_key or s3_region configuration. error = #{e.inspect}"
|
212
|
+
end
|
213
|
+
|
214
|
+
def process(body)
|
215
|
+
s3 = body["Records"].first["s3"]
|
216
|
+
key = s3["object"]["key"]
|
217
|
+
|
218
|
+
io = @bucket.object(key).get.body
|
219
|
+
content = @extractor.extract(io)
|
220
|
+
content.each_line do |line|
|
221
|
+
time, record = @parser.parse(line)
|
222
|
+
router.emit(@tag, time, record)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
class Extractor
|
227
|
+
include Configurable
|
228
|
+
|
229
|
+
attr_reader :log
|
230
|
+
|
231
|
+
def initialize(log: $log, **options)
|
232
|
+
super()
|
233
|
+
@log = log
|
234
|
+
end
|
235
|
+
|
236
|
+
def configure(conf)
|
237
|
+
super
|
238
|
+
end
|
239
|
+
|
240
|
+
def ext
|
241
|
+
end
|
242
|
+
|
243
|
+
def content_type
|
244
|
+
end
|
245
|
+
|
246
|
+
def extract(io)
|
247
|
+
end
|
248
|
+
|
249
|
+
private
|
250
|
+
|
251
|
+
def check_command(command, algo = nil)
|
252
|
+
require 'open3'
|
253
|
+
|
254
|
+
algo = command if algo.nil?
|
255
|
+
begin
|
256
|
+
Open3.capture3("#{command} -V")
|
257
|
+
rescue Errno::ENOENT
|
258
|
+
raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
class GzipExtractor < Extractor
|
264
|
+
def ext
|
265
|
+
'gz'.freeze
|
266
|
+
end
|
267
|
+
|
268
|
+
def content_type
|
269
|
+
'application/x-gzip'.freeze
|
270
|
+
end
|
271
|
+
|
272
|
+
def extract(io)
|
273
|
+
Zlib::GzipReader.wrap(io) do |gz|
|
274
|
+
gz.read
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
class TextExtractor < Extractor
|
280
|
+
def ext
|
281
|
+
'txt'.freeze
|
282
|
+
end
|
283
|
+
|
284
|
+
def content_type
|
285
|
+
'text/plain'.freeze
|
286
|
+
end
|
287
|
+
|
288
|
+
def extract(io)
|
289
|
+
io.read
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
class JsonExtractor < TextExtractor
|
294
|
+
def ext
|
295
|
+
'json'.freeze
|
296
|
+
end
|
297
|
+
|
298
|
+
def content_type
|
299
|
+
'application/json'.freeze
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
EXTRACTOR_REGISTRY = Registry.new(:s3_extractor_type, 'fluent/plugin/s3_extractor_')
|
304
|
+
{
|
305
|
+
'gzip' => GzipExtractor,
|
306
|
+
'text' => TextExtractor,
|
307
|
+
'json' => JsonExtractor
|
308
|
+
}.each do |name, extractor|
|
309
|
+
EXTRACTOR_REGISTRY.register(name, extractor)
|
310
|
+
end
|
311
|
+
|
312
|
+
def self.register_extractor(name, extractor)
|
313
|
+
EXTRACTOR_REGISTRY.register(name, extractor)
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
data/lib/fluent/plugin/out_s3.rb
CHANGED
@@ -0,0 +1,46 @@
|
|
1
|
+
module Fluent
|
2
|
+
class S3Input
|
3
|
+
class GzipCommandExtractor < Extractor
|
4
|
+
S3Input.register_extractor('gzip_command', self)
|
5
|
+
|
6
|
+
config_param :command_parameter, :string, :default => '-dc'
|
7
|
+
|
8
|
+
def configure(conf)
|
9
|
+
super
|
10
|
+
check_command('gzip')
|
11
|
+
end
|
12
|
+
|
13
|
+
def ext
|
14
|
+
'gz'.freeze
|
15
|
+
end
|
16
|
+
|
17
|
+
def content_type
|
18
|
+
'application/x-gzip'.freeze
|
19
|
+
end
|
20
|
+
|
21
|
+
def extract(io)
|
22
|
+
path = if io.respond_to?(:path)
|
23
|
+
io.path
|
24
|
+
else
|
25
|
+
temp = Tempfile.new("gzip-temp")
|
26
|
+
temp.write(io.read)
|
27
|
+
temp.close
|
28
|
+
temp.path
|
29
|
+
end
|
30
|
+
|
31
|
+
stdout, succeeded = Open3.capture2("gzip #{@command_parameter} #{path}")
|
32
|
+
if succeeded
|
33
|
+
stdout
|
34
|
+
else
|
35
|
+
log.warn "failed to execute gzip command. Fallback to GzipReader. status = #{succeeded}"
|
36
|
+
begin
|
37
|
+
io.rewind
|
38
|
+
Zlib::GzipReader.wrap(io) do |gz|
|
39
|
+
gz.read
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Fluent
|
2
|
+
class S3Input
|
3
|
+
class LZMA2Extractor < Extractor
|
4
|
+
S3Input.register_extractor('lzma2', self)
|
5
|
+
|
6
|
+
config_param :command_parameter, :string, :default => '-qdc'
|
7
|
+
|
8
|
+
def configure(conf)
|
9
|
+
super
|
10
|
+
check_command('xz', 'LZMA')
|
11
|
+
end
|
12
|
+
|
13
|
+
def ext
|
14
|
+
'xz'.freeze
|
15
|
+
end
|
16
|
+
|
17
|
+
def content_type
|
18
|
+
'application/x-xz'.freeze
|
19
|
+
end
|
20
|
+
|
21
|
+
def extract(io)
|
22
|
+
path = if io.respond_to?(path)
|
23
|
+
io.path
|
24
|
+
else
|
25
|
+
temp = Tempfile.new("xz-temp")
|
26
|
+
temp.write(io.read)
|
27
|
+
temp.close
|
28
|
+
temp.path
|
29
|
+
end
|
30
|
+
|
31
|
+
stdout, succeeded = Open3.capture2("xz #{@command_parameter} #{path}")
|
32
|
+
if succeeded
|
33
|
+
stdout
|
34
|
+
else
|
35
|
+
raise "Failed to extract #{path} with xz command."
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Fluent
|
2
|
+
class S3Input
|
3
|
+
class LZOExtractor < Extractor
|
4
|
+
S3Input.register_extractor('lzo', self)
|
5
|
+
|
6
|
+
config_param :command_parameter, :string, :default => '-qdc'
|
7
|
+
|
8
|
+
def configure(conf)
|
9
|
+
super
|
10
|
+
check_command('lzop', 'LZO')
|
11
|
+
end
|
12
|
+
|
13
|
+
def ext
|
14
|
+
'lzo'.freeze
|
15
|
+
end
|
16
|
+
|
17
|
+
def content_type
|
18
|
+
'application/x-lzop'.freeze
|
19
|
+
end
|
20
|
+
|
21
|
+
def extract(io)
|
22
|
+
path = if io.respond_to?(path)
|
23
|
+
io.path
|
24
|
+
else
|
25
|
+
temp = Tempfile.new("lzop-temp")
|
26
|
+
temp.write(io.read)
|
27
|
+
temp.close
|
28
|
+
temp.path
|
29
|
+
end
|
30
|
+
|
31
|
+
stdout, succeeded = Open3.capture2("lzop #{@command_parameter} #{path}")
|
32
|
+
if succeeded
|
33
|
+
stdout
|
34
|
+
else
|
35
|
+
raise "Failed to extract #{path} with lzop command."
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/test/test_in_s3.rb
ADDED
@@ -0,0 +1,222 @@
|
|
1
|
+
require 'aws-sdk-resources'
|
2
|
+
|
3
|
+
require 'fluent/test'
|
4
|
+
require 'fluent/plugin/in_s3'
|
5
|
+
|
6
|
+
require 'test/unit/rr'
|
7
|
+
require 'zlib'
|
8
|
+
require 'fileutils'
|
9
|
+
|
10
|
+
class S3InputTest < Test::Unit::TestCase
|
11
|
+
def setup
|
12
|
+
Fluent::Test.setup
|
13
|
+
@time = Time.parse("2015-09-30 13:14:15 UTC").to_i
|
14
|
+
Fluent::Engine.now = @time
|
15
|
+
end
|
16
|
+
|
17
|
+
CONFIG = %[
|
18
|
+
aws_key_id test_key_id
|
19
|
+
aws_sec_key test_sec_key
|
20
|
+
s3_bucket test_bucket
|
21
|
+
utc
|
22
|
+
buffer_type memory
|
23
|
+
<sqs>
|
24
|
+
queue_name test_queue
|
25
|
+
</sqs>
|
26
|
+
]
|
27
|
+
|
28
|
+
def create_driver(conf = CONFIG)
|
29
|
+
d = Fluent::Test::InputTestDriver.new(Fluent::S3Input)
|
30
|
+
d.configure(conf)
|
31
|
+
d
|
32
|
+
end
|
33
|
+
|
34
|
+
class ConfigTest < self
|
35
|
+
def test_default
|
36
|
+
d = create_driver
|
37
|
+
extractor = d.instance.instance_variable_get(:@extractor)
|
38
|
+
actual = {
|
39
|
+
aws_key_id: d.instance.aws_key_id,
|
40
|
+
aws_sec_key: d.instance.aws_sec_key,
|
41
|
+
s3_bucket: d.instance.s3_bucket,
|
42
|
+
s3_region: d.instance.s3_region,
|
43
|
+
sqs_queue_name: d.instance.sqs.queue_name,
|
44
|
+
extractor_ext: extractor.ext,
|
45
|
+
extractor_content_type: extractor.content_type
|
46
|
+
}
|
47
|
+
expected = {
|
48
|
+
aws_key_id: "test_key_id",
|
49
|
+
aws_sec_key: "test_sec_key",
|
50
|
+
s3_bucket: "test_bucket",
|
51
|
+
s3_region: "us-east-1",
|
52
|
+
sqs_queue_name: "test_queue",
|
53
|
+
extractor_ext: "gz",
|
54
|
+
extractor_content_type: "application/x-gzip"
|
55
|
+
}
|
56
|
+
assert_equal(expected, actual)
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_empty
|
60
|
+
assert_raise(Fluent::ConfigError) do
|
61
|
+
create_driver("")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_without_sqs_section
|
66
|
+
conf = %[
|
67
|
+
aws_key_id test_key_id
|
68
|
+
aws_sec_key test_sec_key
|
69
|
+
s3_bucket test_bucket
|
70
|
+
utc
|
71
|
+
]
|
72
|
+
assert_raise_message("'<sqs>' sections are required") do
|
73
|
+
create_driver(conf)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_unknown_store_as
|
78
|
+
config = CONFIG + "\nstore_as unknown"
|
79
|
+
assert_raise(Fluent::ConfigError) do
|
80
|
+
create_driver(config)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
data("json" => ["json", "json", "application/json"],
|
85
|
+
"text" => ["text", "txt", "text/plain"],
|
86
|
+
"gzip_command" => ["gzip_command", "gz", "application/x-gzip"],
|
87
|
+
"lzo" => ["lzo", "lzo", "application/x-lzop"],
|
88
|
+
"lzma2" => ["lzma2", "xz", "application/x-xz"])
|
89
|
+
def test_extractor(data)
|
90
|
+
store_type, ext, content_type = data
|
91
|
+
config = CONFIG + "\nstore_as #{store_type}\n"
|
92
|
+
d = create_driver(config)
|
93
|
+
extractor = d.instance.instance_variable_get(:@extractor)
|
94
|
+
expected = {
|
95
|
+
ext: ext,
|
96
|
+
content_type: content_type
|
97
|
+
}
|
98
|
+
actual = {
|
99
|
+
ext: extractor.ext,
|
100
|
+
content_type: extractor.content_type
|
101
|
+
}
|
102
|
+
assert_equal(expected, actual)
|
103
|
+
rescue Fluent::ConfigError => e
|
104
|
+
pend(e.message)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
Struct.new("StubResponse", :queue_url)
|
109
|
+
Struct.new("StubMessage", :message_id, :receipt_handle, :body)
|
110
|
+
|
111
|
+
def setup_mocks
|
112
|
+
@s3_client = stub(Aws::S3::Client.new(:stub_responses => true))
|
113
|
+
mock(Aws::S3::Client).new(anything).at_least(0) { @s3_client }
|
114
|
+
@s3_resource = mock(Aws::S3::Resource.new(:client => @s3_client))
|
115
|
+
mock(Aws::S3::Resource).new(:client => @s3_client) { @s3_resource }
|
116
|
+
@s3_bucket = mock(Aws::S3::Bucket.new(:name => "test",
|
117
|
+
:client => @s3_client))
|
118
|
+
@s3_resource.bucket(anything) { @s3_bucket }
|
119
|
+
|
120
|
+
test_queue_url = "http://example.com/test_queue"
|
121
|
+
@sqs_client = stub(Aws::SQS::Client.new(:stub_responses => true))
|
122
|
+
@sqs_response = stub(Struct::StubResponse.new(test_queue_url))
|
123
|
+
@sqs_client.get_queue_url(queue_name: "test_queue"){ @sqs_response }
|
124
|
+
mock(Aws::SQS::Client).new(anything).at_least(0) { @sqs_client }
|
125
|
+
@real_poller = Aws::SQS::QueuePoller.new(test_queue_url, client: @sqs_client)
|
126
|
+
@sqs_poller = stub(@real_poller)
|
127
|
+
mock(Aws::SQS::QueuePoller).new(anything, client: @sqs_client) { @sqs_poller }
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_no_records
|
131
|
+
setup_mocks
|
132
|
+
d = create_driver(CONFIG + "\ncheck_apikey_on_start false\n")
|
133
|
+
mock(d.instance).process(anything).never
|
134
|
+
|
135
|
+
message = Struct::StubMessage.new(1, 1, "{}")
|
136
|
+
@sqs_poller.get_messages {|config, stats|
|
137
|
+
config.before_request.call(stats) if config.before_request
|
138
|
+
stats.request_count += 1
|
139
|
+
if stats.request_count > 1
|
140
|
+
d.instance.instance_variable_set(:@running, false)
|
141
|
+
end
|
142
|
+
[message]
|
143
|
+
}
|
144
|
+
assert_nothing_raised do
|
145
|
+
d.run
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_one_record
|
150
|
+
setup_mocks
|
151
|
+
d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\n")
|
152
|
+
d.expect_emit("input.s3", @time, { "message" => "aaa" })
|
153
|
+
|
154
|
+
s3_object = stub(Object.new)
|
155
|
+
s3_response = stub(Object.new)
|
156
|
+
s3_response.body { StringIO.new("aaa") }
|
157
|
+
s3_object.get { s3_response }
|
158
|
+
@s3_bucket.object(anything).at_least(1) { s3_object }
|
159
|
+
|
160
|
+
body = {
|
161
|
+
"Records" => [
|
162
|
+
{
|
163
|
+
"s3" => {
|
164
|
+
"object" => {
|
165
|
+
"key" => "test_key"
|
166
|
+
}
|
167
|
+
}
|
168
|
+
}
|
169
|
+
]
|
170
|
+
}
|
171
|
+
message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
|
172
|
+
@sqs_poller.get_messages {|config, stats|
|
173
|
+
config.before_request.call(stats) if config.before_request
|
174
|
+
stats.request_count += 1
|
175
|
+
if stats.request_count > 1
|
176
|
+
d.instance.instance_variable_set(:@running, false)
|
177
|
+
end
|
178
|
+
[message]
|
179
|
+
}
|
180
|
+
assert_nothing_raised do
|
181
|
+
d.run
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_one_record_multi_line
|
186
|
+
setup_mocks
|
187
|
+
d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\n")
|
188
|
+
d.expect_emit("input.s3", @time, { "message" => "aaa\n" })
|
189
|
+
d.expect_emit("input.s3", @time, { "message" => "bbb\n" })
|
190
|
+
d.expect_emit("input.s3", @time, { "message" => "ccc\n" })
|
191
|
+
|
192
|
+
s3_object = stub(Object.new)
|
193
|
+
s3_response = stub(Object.new)
|
194
|
+
s3_response.body { StringIO.new("aaa\nbbb\nccc\n") }
|
195
|
+
s3_object.get { s3_response }
|
196
|
+
@s3_bucket.object(anything).at_least(1) { s3_object }
|
197
|
+
|
198
|
+
body = {
|
199
|
+
"Records" => [
|
200
|
+
{
|
201
|
+
"s3" => {
|
202
|
+
"object" => {
|
203
|
+
"key" => "test_key"
|
204
|
+
}
|
205
|
+
}
|
206
|
+
}
|
207
|
+
]
|
208
|
+
}
|
209
|
+
message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
|
210
|
+
@sqs_poller.get_messages {|config, stats|
|
211
|
+
config.before_request.call(stats) if config.before_request
|
212
|
+
stats.request_count += 1
|
213
|
+
if stats.request_count > 1
|
214
|
+
d.instance.instance_variable_set(:@running, false)
|
215
|
+
end
|
216
|
+
[message]
|
217
|
+
}
|
218
|
+
assert_nothing_raised do
|
219
|
+
d.run
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-08-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -160,10 +160,15 @@ files:
|
|
160
160
|
- VERSION
|
161
161
|
- appveyor.yml
|
162
162
|
- fluent-plugin-s3.gemspec
|
163
|
+
- lib/fluent/plugin/in_s3.rb
|
163
164
|
- lib/fluent/plugin/out_s3.rb
|
164
165
|
- lib/fluent/plugin/s3_compressor_gzip_command.rb
|
165
166
|
- lib/fluent/plugin/s3_compressor_lzma2.rb
|
166
167
|
- lib/fluent/plugin/s3_compressor_lzo.rb
|
168
|
+
- lib/fluent/plugin/s3_extractor_gzip_command.rb
|
169
|
+
- lib/fluent/plugin/s3_extractor_lzma2.rb
|
170
|
+
- lib/fluent/plugin/s3_extractor_lzo.rb
|
171
|
+
- test/test_in_s3.rb
|
167
172
|
- test/test_out_s3.rb
|
168
173
|
homepage: https://github.com/fluent/fluent-plugin-s3
|
169
174
|
licenses:
|
@@ -190,4 +195,5 @@ signing_key:
|
|
190
195
|
specification_version: 4
|
191
196
|
summary: Amazon S3 output plugin for Fluentd event collector
|
192
197
|
test_files:
|
198
|
+
- test/test_in_s3.rb
|
193
199
|
- test/test_out_s3.rb
|