fluent-plugin-s3-file-inclusion 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.test_files = FileList['test/test_*.rb']
10
+ test.verbose = true
11
+ end
12
+
13
+ task :default => [:build]
14
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.4.1
@@ -0,0 +1,25 @@
1
+ version: '{build}'
2
+
3
+ install:
4
+ - SET PATH=C:\Ruby%ruby_version%\bin;%PATH%
5
+ - "%devkit%\\devkitvars.bat"
6
+ - ruby --version
7
+ - gem --version
8
+ - bundle install
9
+ build: off
10
+ test_script:
11
+ - bundle exec rake test TESTOPTS=-v
12
+
13
+ environment:
14
+ matrix:
15
+ - ruby_version: "22-x64"
16
+ devkit: C:\Ruby21-x64\DevKit
17
+ - ruby_version: "22"
18
+ devkit: C:\Ruby21\DevKit
19
+ - ruby_version: "21-x64"
20
+ devkit: C:\Ruby21-x64\DevKit
21
+ - ruby_version: "21"
22
+ devkit: C:\Ruby21\DevKit
23
+ matrix:
24
+ allow_failures:
25
+ - ruby_version: "21"
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-s3-file-inclusion"
6
+ gem.description = "Amazon S3 output plugin for Fluentd event collector"
7
+ gem.license = "Apache-2.0"
8
+ gem.homepage = "https://github.com/fluent/fluent-plugin-s3"
9
+ gem.summary = gem.description
10
+ gem.version = File.read("VERSION").strip
11
+ gem.authors = ["Sadayuki Furuhashi", "Masahiro Nakagawa"]
12
+ gem.email = "frsyuki@gmail.com"
13
+ #gem.platform = Gem::Platform::RUBY
14
+ gem.files = `git ls-files`.split("\n")
15
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ gem.require_paths = ['lib']
18
+
19
+ gem.add_dependency "fluentd", [">= 0.14.22", "< 2"]
20
+ gem.add_dependency "aws-sdk-s3", "~> 1.60"
21
+ gem.add_dependency "aws-sdk-sqs", "~> 1.23"
22
+ gem.add_development_dependency "rake", ">= 0.9.2"
23
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
24
+ gem.add_development_dependency "test-unit-rr", ">= 1.0.3"
25
+ gem.add_development_dependency "timecop"
26
+ end
@@ -0,0 +1,12 @@
1
+ require 'fluent/log'
2
+ # For Fluentd v0.14.13 or earlier
3
+ # logger for Aws::S3::Client and Aws::SQS::Client required `#<<` method
4
+ module Fluent
5
+ class Log
6
+ unless method_defined?(:<<)
7
+ def <<(message)
8
+ write(message)
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,419 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/log-ext'
3
+
4
+ require 'aws-sdk-s3'
5
+ require 'aws-sdk-sqs'
6
+ require 'aws-sdk-sqs/queue_poller'
7
+ require 'cgi/util'
8
+ require 'zlib'
9
+ require 'time'
10
+ require 'tempfile'
11
+
12
+ module Fluent::Plugin
13
+ class S3Input < Input
14
+ Fluent::Plugin.register_input('s3', self)
15
+
16
+ helpers :compat_parameters, :parser, :thread
17
+
18
+ def initialize
19
+ super
20
+ @extractor = nil
21
+ end
22
+
23
+ DEFAULT_PARSE_TYPE = "none"
24
+
25
+ desc "Use aws-sdk-ruby bundled cert"
26
+ config_param :use_bundled_cert, :bool, default: false
27
+ desc "Add object metadata to the records parsed out of a given object"
28
+ config_param :add_object_metadata, :bool, default: false
29
+ desc "AWS access key id"
30
+ config_param :aws_key_id, :string, default: nil, secret: true
31
+ desc "AWS secret key."
32
+ config_param :aws_sec_key, :string, default: nil, secret: true
33
+ config_section :assume_role_credentials, multi: false do
34
+ desc "The Amazon Resource Name (ARN) of the role to assume"
35
+ config_param :role_arn, :string
36
+ desc "An identifier for the assumed role session"
37
+ config_param :role_session_name, :string
38
+ desc "An IAM policy in JSON format"
39
+ config_param :policy, :string, default: nil
40
+ desc "The duration, in seconds, of the role session (900-3600)"
41
+ config_param :duration_seconds, :integer, default: nil
42
+ desc "A unique identifier that is used by third parties when assuming roles in their customers' accounts."
43
+ config_param :external_id, :string, default: nil
44
+ end
45
+ # See the following link for additional params that could be added:
46
+ # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/STS/Client.html#assume_role_with_web_identity-instance_method
47
+ config_section :web_identity_credentials, multi: false do
48
+ desc "The Amazon Resource Name (ARN) of the role to assume"
49
+ config_param :role_arn, :string # required
50
+ desc "An identifier for the assumed role session"
51
+ config_param :role_session_name, :string #required
52
+ desc "The absolute path to the file on disk containing the OIDC token"
53
+ config_param :web_identity_token_file, :string #required
54
+ desc "An IAM policy in JSON format"
55
+ config_param :policy, :string, default: nil
56
+ desc "The duration, in seconds, of the role session (900-43200)"
57
+ config_param :duration_seconds, :integer, default: nil
58
+ end
59
+ config_section :instance_profile_credentials, multi: false do
60
+ desc "Number of times to retry when retrieving credentials"
61
+ config_param :retries, :integer, default: nil
62
+ desc "IP address (default:169.254.169.254)"
63
+ config_param :ip_address, :string, default: nil
64
+ desc "Port number (default:80)"
65
+ config_param :port, :integer, default: nil
66
+ desc "Number of seconds to wait for the connection to open"
67
+ config_param :http_open_timeout, :float, default: nil
68
+ desc "Number of seconds to wait for one block to be read"
69
+ config_param :http_read_timeout, :float, default: nil
70
+ # config_param :delay, :integer or :proc, :default => nil
71
+ # config_param :http_degub_output, :io, :default => nil
72
+ end
73
+ config_section :shared_credentials, multi: false do
74
+ desc "Path to the shared file. (default: $HOME/.aws/credentials)"
75
+ config_param :path, :string, default: nil
76
+ desc "Profile name. Default to 'default' or ENV['AWS_PROFILE']"
77
+ config_param :profile_name, :string, default: nil
78
+ end
79
+ desc "S3 bucket name"
80
+ config_param :s3_bucket, :string
81
+ desc "S3 region name"
82
+ config_param :s3_region, :string, default: ENV["AWS_REGION"] || "us-east-1"
83
+ desc "Use 's3_region' instead"
84
+ config_param :s3_endpoint, :string, default: nil
85
+ desc "If true, the bucket name is always left in the request URI and never moved to the host as a sub-domain"
86
+ config_param :force_path_style, :bool, default: false
87
+ desc "Archive format on S3"
88
+ config_param :store_as, :string, default: "gzip"
89
+ desc "Check AWS key on start"
90
+ config_param :check_apikey_on_start, :bool, default: true
91
+ desc "URI of proxy environment"
92
+ config_param :proxy_uri, :string, default: nil
93
+
94
+ config_section :sqs, required: true, multi: false do
95
+ desc "SQS queue name"
96
+ config_param :queue_name, :string, default: nil
97
+ desc "SQS Owner Account ID"
98
+ config_param :queue_owner_aws_account_id, :string, default: nil
99
+ desc "Use 's3_region' instead"
100
+ config_param :endpoint, :string, default: nil
101
+ desc "Skip message deletion"
102
+ config_param :skip_delete, :bool, default: false
103
+ desc "The long polling interval."
104
+ config_param :wait_time_seconds, :integer, default: 20
105
+ desc "Polling error retry interval."
106
+ config_param :retry_error_interval, :integer, default: 300
107
+ desc "regex to only process files"
108
+ config_param :include_file_regex, :string, default: ".*"
109
+ end
110
+
111
+ desc "Tag string"
112
+ config_param :tag, :string, default: "input.s3"
113
+
114
+ config_section :parse do
115
+ config_set_default :@type, DEFAULT_PARSE_TYPE
116
+ end
117
+
118
+ attr_reader :bucket
119
+
120
+ def configure(conf)
121
+ super
122
+
123
+ if @s3_endpoint && (@s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) })
124
+ raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services"
125
+ end
126
+
127
+ if @sqs.endpoint && (@sqs.endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @sqs.endpoint.include?(e) })
128
+ raise Fluent::ConfigError, "sqs/endpoint parameter is not supported for SQS, use s3_region instead. This parameter is for SQS compatible services"
129
+ end
130
+
131
+ parser_config = conf.elements("parse").first
132
+ unless @sqs.queue_name
133
+ raise Fluent::ConfigError, "sqs/queue_name is required"
134
+ end
135
+
136
+ Aws.use_bundled_cert! if @use_bundled_cert
137
+
138
+ @extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log)
139
+ @extractor.configure(conf)
140
+
141
+ @parser = parser_create(conf: parser_config, default_type: DEFAULT_PARSE_TYPE)
142
+ end
143
+
144
+ def start
145
+ super
146
+
147
+ s3_client = create_s3_client
148
+ log.debug("Succeeded to create S3 client")
149
+ @s3 = Aws::S3::Resource.new(client: s3_client)
150
+ @bucket = @s3.bucket(@s3_bucket)
151
+
152
+ raise "#{@bucket.name} is not found." unless @bucket.exists?
153
+
154
+ check_apikeys if @check_apikey_on_start
155
+
156
+ sqs_client = create_sqs_client
157
+ log.debug("Succeeded to create SQS client")
158
+ response = sqs_client.get_queue_url(queue_name: @sqs.queue_name, queue_owner_aws_account_id: @sqs.queue_owner_aws_account_id)
159
+ sqs_queue_url = response.queue_url
160
+ log.debug("Succeeded to get SQS queue URL")
161
+ @include_file_regex = Regexp.new(@sqs.include_file_regex)
162
+
163
+ @poller = Aws::SQS::QueuePoller.new(sqs_queue_url, client: sqs_client)
164
+
165
+ @running = true
166
+ thread_create(:in_s3, &method(:run))
167
+ end
168
+
169
+ def shutdown
170
+ @running = false
171
+ super
172
+ end
173
+
174
+ private
175
+
176
+ def run
177
+ options = {}
178
+ options[:wait_time_seconds] = @sqs.wait_time_seconds
179
+ options[:skip_delete] = @sqs.skip_delete
180
+ @poller.before_request do |stats|
181
+ throw :stop_polling unless @running
182
+ end
183
+ begin
184
+ @poller.poll(options) do |message|
185
+ begin
186
+ body = Yajl.load(message.body)
187
+ log.debug(body)
188
+ next unless body["Records"] # skip test queue
189
+
190
+ process(body)
191
+ rescue => e
192
+ log.warn(error: e)
193
+ log.warn_backtrace(e.backtrace)
194
+ throw :skip_delete
195
+ end
196
+ end
197
+ rescue => e
198
+ log.warn("SQS Polling Failed. Retry in #{@sqs.retry_error_interval} seconds", error: e)
199
+ sleep(@sqs.retry_error_interval)
200
+ retry
201
+ end
202
+ end
203
+
204
+ def setup_credentials
205
+ options = {}
206
+ credentials_options = {}
207
+ case
208
+ when @aws_key_id && @aws_sec_key
209
+ options[:access_key_id] = @aws_key_id
210
+ options[:secret_access_key] = @aws_sec_key
211
+ when @assume_role_credentials
212
+ c = @assume_role_credentials
213
+ credentials_options[:role_arn] = c.role_arn
214
+ credentials_options[:role_session_name] = c.role_session_name
215
+ credentials_options[:policy] = c.policy if c.policy
216
+ credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
217
+ credentials_options[:external_id] = c.external_id if c.external_id
218
+ if @s3_region
219
+ credentials_options[:client] = Aws::STS::Client.new(:region => @s3_region)
220
+ end
221
+ options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options)
222
+ when @web_identity_credentials
223
+ c = @web_identity_credentials
224
+ credentials_options[:role_arn] = c.role_arn
225
+ credentials_options[:role_session_name] = c.role_session_name
226
+ credentials_options[:web_identity_token_file] = c.web_identity_token_file
227
+ credentials_options[:policy] = c.policy if c.policy
228
+ credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
229
+ if @s3_region
230
+ credentials_options[:client] = Aws::STS::Client.new(:region => @s3_region)
231
+ end
232
+ options[:credentials] = Aws::AssumeRoleWebIdentityCredentials.new(credentials_options)
233
+ when @instance_profile_credentials
234
+ c = @instance_profile_credentials
235
+ credentials_options[:retries] = c.retries if c.retries
236
+ credentials_options[:ip_address] = c.ip_address if c.ip_address
237
+ credentials_options[:port] = c.port if c.port
238
+ credentials_options[:http_open_timeout] = c.http_open_timeout if c.http_open_timeout
239
+ credentials_options[:http_read_timeout] = c.http_read_timeout if c.http_read_timeout
240
+ if ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"]
241
+ options[:credentials] = Aws::ECSCredentials.new(credentials_options)
242
+ else
243
+ options[:credentials] = Aws::InstanceProfileCredentials.new(credentials_options)
244
+ end
245
+ when @shared_credentials
246
+ c = @shared_credentials
247
+ credentials_options[:path] = c.path if c.path
248
+ credentials_options[:profile_name] = c.profile_name if c.profile_name
249
+ options[:credentials] = Aws::SharedCredentials.new(credentials_options)
250
+ else
251
+ # Use default credentials
252
+ # See http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html
253
+ end
254
+ options
255
+ end
256
+
257
+ def create_s3_client
258
+ options = setup_credentials
259
+ options[:region] = @s3_region if @s3_region
260
+ options[:endpoint] = @s3_endpoint if @s3_endpoint
261
+ options[:force_path_style] = @force_path_style
262
+ options[:http_proxy] = @proxy_uri if @proxy_uri
263
+ log.on_trace do
264
+ options[:http_wire_trace] = true
265
+ options[:logger] = log
266
+ end
267
+
268
+ Aws::S3::Client.new(options)
269
+ end
270
+
271
+ def create_sqs_client
272
+ options = setup_credentials
273
+ options[:region] = @s3_region if @s3_region
274
+ options[:endpoint] = @sqs.endpoint if @sqs.endpoint
275
+ options[:http_proxy] = @proxy_uri if @proxy_uri
276
+ log.on_trace do
277
+ options[:http_wire_trace] = true
278
+ options[:logger] = log
279
+ end
280
+
281
+ Aws::SQS::Client.new(options)
282
+ end
283
+
284
+ def check_apikeys
285
+ @bucket.objects.first
286
+ log.debug("Succeeded to verify API keys")
287
+ rescue => e
288
+ raise "can't call S3 API. Please check your credentials or s3_region configuration. error = #{e.inspect}"
289
+ end
290
+
291
+ def process(body)
292
+ s3 = body["Records"].first["s3"]
293
+ raw_key = s3["object"]["key"]
294
+ key = CGI.unescape(raw_key)
295
+
296
+ unless key =~ @include_file_regex
297
+ log.info("#{key} doesn't match regex. skipping")
298
+ return
299
+ end
300
+
301
+ io = @bucket.object(key).get.body
302
+ content = @extractor.extract(io)
303
+ es = Fluent::MultiEventStream.new
304
+ content.each_line do |line|
305
+ @parser.parse(line) do |time, record|
306
+ if @add_object_metadata
307
+ record['s3_bucket'] = @s3_bucket
308
+ record['s3_key'] = raw_key
309
+ end
310
+ es.add(time, record)
311
+ end
312
+ end
313
+ router.emit_stream(@tag, es)
314
+ end
315
+
316
+ class Extractor
317
+ include Fluent::Configurable
318
+
319
+ attr_reader :log
320
+
321
+ def initialize(log: $log, **options)
322
+ super()
323
+ @log = log
324
+ end
325
+
326
+ def configure(conf)
327
+ super
328
+ end
329
+
330
+ def ext
331
+ end
332
+
333
+ def content_type
334
+ end
335
+
336
+ def extract(io)
337
+ end
338
+
339
+ private
340
+
341
+ def check_command(command, algo = nil)
342
+ require 'open3'
343
+
344
+ algo = command if algo.nil?
345
+ begin
346
+ Open3.capture3("#{command} -V")
347
+ rescue Errno::ENOENT
348
+ raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
349
+ end
350
+ end
351
+ end
352
+
353
+ class GzipExtractor < Extractor
354
+ def ext
355
+ 'gz'.freeze
356
+ end
357
+
358
+ def content_type
359
+ 'application/x-gzip'.freeze
360
+ end
361
+
362
+ # https://bugs.ruby-lang.org/issues/9790
363
+ # https://bugs.ruby-lang.org/issues/11180
364
+ # https://github.com/exAspArk/multiple_files_gzip_reader
365
+ def extract(io)
366
+ parts = []
367
+ loop do
368
+ unused = nil
369
+ Zlib::GzipReader.wrap(io) do |gz|
370
+ parts << gz.read
371
+ unused = gz.unused
372
+ gz.finish
373
+ end
374
+ io.pos -= unused ? unused.length : 0
375
+ break if io.eof?
376
+ end
377
+ io.close
378
+ parts.join
379
+ end
380
+ end
381
+
382
+ class TextExtractor < Extractor
383
+ def ext
384
+ 'txt'.freeze
385
+ end
386
+
387
+ def content_type
388
+ 'text/plain'.freeze
389
+ end
390
+
391
+ def extract(io)
392
+ io.read
393
+ end
394
+ end
395
+
396
+ class JsonExtractor < TextExtractor
397
+ def ext
398
+ 'json'.freeze
399
+ end
400
+
401
+ def content_type
402
+ 'application/json'.freeze
403
+ end
404
+ end
405
+
406
+ EXTRACTOR_REGISTRY = Fluent::Registry.new(:s3_extractor_type, 'fluent/plugin/s3_extractor_')
407
+ {
408
+ 'gzip' => GzipExtractor,
409
+ 'text' => TextExtractor,
410
+ 'json' => JsonExtractor
411
+ }.each do |name, extractor|
412
+ EXTRACTOR_REGISTRY.register(name, extractor)
413
+ end
414
+
415
+ def self.register_extractor(name, extractor)
416
+ EXTRACTOR_REGISTRY.register(name, extractor)
417
+ end
418
+ end
419
+ end