fluent-plugin-s3-file-inclusion 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.test_files = FileList['test/test_*.rb']
10
+ test.verbose = true
11
+ end
12
+
13
+ task :default => [:build]
14
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.4.1
@@ -0,0 +1,25 @@
1
+ version: '{build}'
2
+
3
+ install:
4
+ - SET PATH=C:\Ruby%ruby_version%\bin;%PATH%
5
+ - "%devkit%\\devkitvars.bat"
6
+ - ruby --version
7
+ - gem --version
8
+ - bundle install
9
+ build: off
10
+ test_script:
11
+ - bundle exec rake test TESTOPTS=-v
12
+
13
+ environment:
14
+ matrix:
15
+ - ruby_version: "22-x64"
16
+ devkit: C:\Ruby21-x64\DevKit
17
+ - ruby_version: "22"
18
+ devkit: C:\Ruby21\DevKit
19
+ - ruby_version: "21-x64"
20
+ devkit: C:\Ruby21-x64\DevKit
21
+ - ruby_version: "21"
22
+ devkit: C:\Ruby21\DevKit
23
+ matrix:
24
+ allow_failures:
25
+ - ruby_version: "21"
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-s3-file-inclusion"
6
+ gem.description = "Amazon S3 output plugin for Fluentd event collector"
7
+ gem.license = "Apache-2.0"
8
+ gem.homepage = "https://github.com/fluent/fluent-plugin-s3"
9
+ gem.summary = gem.description
10
+ gem.version = File.read("VERSION").strip
11
+ gem.authors = ["Sadayuki Furuhashi", "Masahiro Nakagawa"]
12
+ gem.email = "frsyuki@gmail.com"
13
+ #gem.platform = Gem::Platform::RUBY
14
+ gem.files = `git ls-files`.split("\n")
15
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ gem.require_paths = ['lib']
18
+
19
+ gem.add_dependency "fluentd", [">= 0.14.22", "< 2"]
20
+ gem.add_dependency "aws-sdk-s3", "~> 1.60"
21
+ gem.add_dependency "aws-sdk-sqs", "~> 1.23"
22
+ gem.add_development_dependency "rake", ">= 0.9.2"
23
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
24
+ gem.add_development_dependency "test-unit-rr", ">= 1.0.3"
25
+ gem.add_development_dependency "timecop"
26
+ end
@@ -0,0 +1,12 @@
1
+ require 'fluent/log'
2
+ # For Fluentd v0.14.13 or earlier
3
+ # logger for Aws::S3::Client and Aws::SQS::Client required `#<<` method
4
+ module Fluent
5
+ class Log
6
+ unless method_defined?(:<<)
7
+ def <<(message)
8
+ write(message)
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,419 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/log-ext'
3
+
4
+ require 'aws-sdk-s3'
5
+ require 'aws-sdk-sqs'
6
+ require 'aws-sdk-sqs/queue_poller'
7
+ require 'cgi/util'
8
+ require 'zlib'
9
+ require 'time'
10
+ require 'tempfile'
11
+
12
+ module Fluent::Plugin
13
+ class S3Input < Input
14
+ Fluent::Plugin.register_input('s3', self)
15
+
16
+ helpers :compat_parameters, :parser, :thread
17
+
18
+ def initialize
19
+ super
20
+ @extractor = nil
21
+ end
22
+
23
+ DEFAULT_PARSE_TYPE = "none"
24
+
25
+ desc "Use aws-sdk-ruby bundled cert"
26
+ config_param :use_bundled_cert, :bool, default: false
27
+ desc "Add object metadata to the records parsed out of a given object"
28
+ config_param :add_object_metadata, :bool, default: false
29
+ desc "AWS access key id"
30
+ config_param :aws_key_id, :string, default: nil, secret: true
31
+ desc "AWS secret key."
32
+ config_param :aws_sec_key, :string, default: nil, secret: true
33
+ config_section :assume_role_credentials, multi: false do
34
+ desc "The Amazon Resource Name (ARN) of the role to assume"
35
+ config_param :role_arn, :string
36
+ desc "An identifier for the assumed role session"
37
+ config_param :role_session_name, :string
38
+ desc "An IAM policy in JSON format"
39
+ config_param :policy, :string, default: nil
40
+ desc "The duration, in seconds, of the role session (900-3600)"
41
+ config_param :duration_seconds, :integer, default: nil
42
+ desc "A unique identifier that is used by third parties when assuming roles in their customers' accounts."
43
+ config_param :external_id, :string, default: nil
44
+ end
45
+ # See the following link for additional params that could be added:
46
+ # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/STS/Client.html#assume_role_with_web_identity-instance_method
47
+ config_section :web_identity_credentials, multi: false do
48
+ desc "The Amazon Resource Name (ARN) of the role to assume"
49
+ config_param :role_arn, :string # required
50
+ desc "An identifier for the assumed role session"
51
+ config_param :role_session_name, :string #required
52
+ desc "The absolute path to the file on disk containing the OIDC token"
53
+ config_param :web_identity_token_file, :string #required
54
+ desc "An IAM policy in JSON format"
55
+ config_param :policy, :string, default: nil
56
+ desc "The duration, in seconds, of the role session (900-43200)"
57
+ config_param :duration_seconds, :integer, default: nil
58
+ end
59
+ config_section :instance_profile_credentials, multi: false do
60
+ desc "Number of times to retry when retrieving credentials"
61
+ config_param :retries, :integer, default: nil
62
+ desc "IP address (default:169.254.169.254)"
63
+ config_param :ip_address, :string, default: nil
64
+ desc "Port number (default:80)"
65
+ config_param :port, :integer, default: nil
66
+ desc "Number of seconds to wait for the connection to open"
67
+ config_param :http_open_timeout, :float, default: nil
68
+ desc "Number of seconds to wait for one block to be read"
69
+ config_param :http_read_timeout, :float, default: nil
70
+ # config_param :delay, :integer or :proc, :default => nil
71
+ # config_param :http_degub_output, :io, :default => nil
72
+ end
73
+ config_section :shared_credentials, multi: false do
74
+ desc "Path to the shared file. (default: $HOME/.aws/credentials)"
75
+ config_param :path, :string, default: nil
76
+ desc "Profile name. Default to 'default' or ENV['AWS_PROFILE']"
77
+ config_param :profile_name, :string, default: nil
78
+ end
79
+ desc "S3 bucket name"
80
+ config_param :s3_bucket, :string
81
+ desc "S3 region name"
82
+ config_param :s3_region, :string, default: ENV["AWS_REGION"] || "us-east-1"
83
+ desc "Use 's3_region' instead"
84
+ config_param :s3_endpoint, :string, default: nil
85
+ desc "If true, the bucket name is always left in the request URI and never moved to the host as a sub-domain"
86
+ config_param :force_path_style, :bool, default: false
87
+ desc "Archive format on S3"
88
+ config_param :store_as, :string, default: "gzip"
89
+ desc "Check AWS key on start"
90
+ config_param :check_apikey_on_start, :bool, default: true
91
+ desc "URI of proxy environment"
92
+ config_param :proxy_uri, :string, default: nil
93
+
94
+ config_section :sqs, required: true, multi: false do
95
+ desc "SQS queue name"
96
+ config_param :queue_name, :string, default: nil
97
+ desc "SQS Owner Account ID"
98
+ config_param :queue_owner_aws_account_id, :string, default: nil
99
+ desc "Use 's3_region' instead"
100
+ config_param :endpoint, :string, default: nil
101
+ desc "Skip message deletion"
102
+ config_param :skip_delete, :bool, default: false
103
+ desc "The long polling interval."
104
+ config_param :wait_time_seconds, :integer, default: 20
105
+ desc "Polling error retry interval."
106
+ config_param :retry_error_interval, :integer, default: 300
107
+ desc "regex to only process files"
108
+ config_param :include_file_regex, :string, default: ".*"
109
+ end
110
+
111
+ desc "Tag string"
112
+ config_param :tag, :string, default: "input.s3"
113
+
114
+ config_section :parse do
115
+ config_set_default :@type, DEFAULT_PARSE_TYPE
116
+ end
117
+
118
+ attr_reader :bucket
119
+
120
+ def configure(conf)
121
+ super
122
+
123
+ if @s3_endpoint && (@s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) })
124
+ raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services"
125
+ end
126
+
127
+ if @sqs.endpoint && (@sqs.endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @sqs.endpoint.include?(e) })
128
+ raise Fluent::ConfigError, "sqs/endpoint parameter is not supported for SQS, use s3_region instead. This parameter is for SQS compatible services"
129
+ end
130
+
131
+ parser_config = conf.elements("parse").first
132
+ unless @sqs.queue_name
133
+ raise Fluent::ConfigError, "sqs/queue_name is required"
134
+ end
135
+
136
+ Aws.use_bundled_cert! if @use_bundled_cert
137
+
138
+ @extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log)
139
+ @extractor.configure(conf)
140
+
141
+ @parser = parser_create(conf: parser_config, default_type: DEFAULT_PARSE_TYPE)
142
+ end
143
+
144
+ def start
145
+ super
146
+
147
+ s3_client = create_s3_client
148
+ log.debug("Succeeded to create S3 client")
149
+ @s3 = Aws::S3::Resource.new(client: s3_client)
150
+ @bucket = @s3.bucket(@s3_bucket)
151
+
152
+ raise "#{@bucket.name} is not found." unless @bucket.exists?
153
+
154
+ check_apikeys if @check_apikey_on_start
155
+
156
+ sqs_client = create_sqs_client
157
+ log.debug("Succeeded to create SQS client")
158
+ response = sqs_client.get_queue_url(queue_name: @sqs.queue_name, queue_owner_aws_account_id: @sqs.queue_owner_aws_account_id)
159
+ sqs_queue_url = response.queue_url
160
+ log.debug("Succeeded to get SQS queue URL")
161
+ @include_file_regex = Regexp.new(@sqs.include_file_regex)
162
+
163
+ @poller = Aws::SQS::QueuePoller.new(sqs_queue_url, client: sqs_client)
164
+
165
+ @running = true
166
+ thread_create(:in_s3, &method(:run))
167
+ end
168
+
169
+ def shutdown
170
+ @running = false
171
+ super
172
+ end
173
+
174
+ private
175
+
176
+ def run
177
+ options = {}
178
+ options[:wait_time_seconds] = @sqs.wait_time_seconds
179
+ options[:skip_delete] = @sqs.skip_delete
180
+ @poller.before_request do |stats|
181
+ throw :stop_polling unless @running
182
+ end
183
+ begin
184
+ @poller.poll(options) do |message|
185
+ begin
186
+ body = Yajl.load(message.body)
187
+ log.debug(body)
188
+ next unless body["Records"] # skip test queue
189
+
190
+ process(body)
191
+ rescue => e
192
+ log.warn(error: e)
193
+ log.warn_backtrace(e.backtrace)
194
+ throw :skip_delete
195
+ end
196
+ end
197
+ rescue => e
198
+ log.warn("SQS Polling Failed. Retry in #{@sqs.retry_error_interval} seconds", error: e)
199
+ sleep(@sqs.retry_error_interval)
200
+ retry
201
+ end
202
+ end
203
+
204
+ def setup_credentials
205
+ options = {}
206
+ credentials_options = {}
207
+ case
208
+ when @aws_key_id && @aws_sec_key
209
+ options[:access_key_id] = @aws_key_id
210
+ options[:secret_access_key] = @aws_sec_key
211
+ when @assume_role_credentials
212
+ c = @assume_role_credentials
213
+ credentials_options[:role_arn] = c.role_arn
214
+ credentials_options[:role_session_name] = c.role_session_name
215
+ credentials_options[:policy] = c.policy if c.policy
216
+ credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
217
+ credentials_options[:external_id] = c.external_id if c.external_id
218
+ if @s3_region
219
+ credentials_options[:client] = Aws::STS::Client.new(:region => @s3_region)
220
+ end
221
+ options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options)
222
+ when @web_identity_credentials
223
+ c = @web_identity_credentials
224
+ credentials_options[:role_arn] = c.role_arn
225
+ credentials_options[:role_session_name] = c.role_session_name
226
+ credentials_options[:web_identity_token_file] = c.web_identity_token_file
227
+ credentials_options[:policy] = c.policy if c.policy
228
+ credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
229
+ if @s3_region
230
+ credentials_options[:client] = Aws::STS::Client.new(:region => @s3_region)
231
+ end
232
+ options[:credentials] = Aws::AssumeRoleWebIdentityCredentials.new(credentials_options)
233
+ when @instance_profile_credentials
234
+ c = @instance_profile_credentials
235
+ credentials_options[:retries] = c.retries if c.retries
236
+ credentials_options[:ip_address] = c.ip_address if c.ip_address
237
+ credentials_options[:port] = c.port if c.port
238
+ credentials_options[:http_open_timeout] = c.http_open_timeout if c.http_open_timeout
239
+ credentials_options[:http_read_timeout] = c.http_read_timeout if c.http_read_timeout
240
+ if ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"]
241
+ options[:credentials] = Aws::ECSCredentials.new(credentials_options)
242
+ else
243
+ options[:credentials] = Aws::InstanceProfileCredentials.new(credentials_options)
244
+ end
245
+ when @shared_credentials
246
+ c = @shared_credentials
247
+ credentials_options[:path] = c.path if c.path
248
+ credentials_options[:profile_name] = c.profile_name if c.profile_name
249
+ options[:credentials] = Aws::SharedCredentials.new(credentials_options)
250
+ else
251
+ # Use default credentials
252
+ # See http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html
253
+ end
254
+ options
255
+ end
256
+
257
+ def create_s3_client
258
+ options = setup_credentials
259
+ options[:region] = @s3_region if @s3_region
260
+ options[:endpoint] = @s3_endpoint if @s3_endpoint
261
+ options[:force_path_style] = @force_path_style
262
+ options[:http_proxy] = @proxy_uri if @proxy_uri
263
+ log.on_trace do
264
+ options[:http_wire_trace] = true
265
+ options[:logger] = log
266
+ end
267
+
268
+ Aws::S3::Client.new(options)
269
+ end
270
+
271
+ def create_sqs_client
272
+ options = setup_credentials
273
+ options[:region] = @s3_region if @s3_region
274
+ options[:endpoint] = @sqs.endpoint if @sqs.endpoint
275
+ options[:http_proxy] = @proxy_uri if @proxy_uri
276
+ log.on_trace do
277
+ options[:http_wire_trace] = true
278
+ options[:logger] = log
279
+ end
280
+
281
+ Aws::SQS::Client.new(options)
282
+ end
283
+
284
+ def check_apikeys
285
+ @bucket.objects.first
286
+ log.debug("Succeeded to verify API keys")
287
+ rescue => e
288
+ raise "can't call S3 API. Please check your credentials or s3_region configuration. error = #{e.inspect}"
289
+ end
290
+
291
+ def process(body)
292
+ s3 = body["Records"].first["s3"]
293
+ raw_key = s3["object"]["key"]
294
+ key = CGI.unescape(raw_key)
295
+
296
+ unless key =~ @include_file_regex
297
+ log.info("#{key} doesn't match regex. skipping")
298
+ return
299
+ end
300
+
301
+ io = @bucket.object(key).get.body
302
+ content = @extractor.extract(io)
303
+ es = Fluent::MultiEventStream.new
304
+ content.each_line do |line|
305
+ @parser.parse(line) do |time, record|
306
+ if @add_object_metadata
307
+ record['s3_bucket'] = @s3_bucket
308
+ record['s3_key'] = raw_key
309
+ end
310
+ es.add(time, record)
311
+ end
312
+ end
313
+ router.emit_stream(@tag, es)
314
+ end
315
+
316
+ class Extractor
317
+ include Fluent::Configurable
318
+
319
+ attr_reader :log
320
+
321
+ def initialize(log: $log, **options)
322
+ super()
323
+ @log = log
324
+ end
325
+
326
+ def configure(conf)
327
+ super
328
+ end
329
+
330
+ def ext
331
+ end
332
+
333
+ def content_type
334
+ end
335
+
336
+ def extract(io)
337
+ end
338
+
339
+ private
340
+
341
+ def check_command(command, algo = nil)
342
+ require 'open3'
343
+
344
+ algo = command if algo.nil?
345
+ begin
346
+ Open3.capture3("#{command} -V")
347
+ rescue Errno::ENOENT
348
+ raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
349
+ end
350
+ end
351
+ end
352
+
353
+ class GzipExtractor < Extractor
354
+ def ext
355
+ 'gz'.freeze
356
+ end
357
+
358
+ def content_type
359
+ 'application/x-gzip'.freeze
360
+ end
361
+
362
+ # https://bugs.ruby-lang.org/issues/9790
363
+ # https://bugs.ruby-lang.org/issues/11180
364
+ # https://github.com/exAspArk/multiple_files_gzip_reader
365
+ def extract(io)
366
+ parts = []
367
+ loop do
368
+ unused = nil
369
+ Zlib::GzipReader.wrap(io) do |gz|
370
+ parts << gz.read
371
+ unused = gz.unused
372
+ gz.finish
373
+ end
374
+ io.pos -= unused ? unused.length : 0
375
+ break if io.eof?
376
+ end
377
+ io.close
378
+ parts.join
379
+ end
380
+ end
381
+
382
+ class TextExtractor < Extractor
383
+ def ext
384
+ 'txt'.freeze
385
+ end
386
+
387
+ def content_type
388
+ 'text/plain'.freeze
389
+ end
390
+
391
+ def extract(io)
392
+ io.read
393
+ end
394
+ end
395
+
396
+ class JsonExtractor < TextExtractor
397
+ def ext
398
+ 'json'.freeze
399
+ end
400
+
401
+ def content_type
402
+ 'application/json'.freeze
403
+ end
404
+ end
405
+
406
+ EXTRACTOR_REGISTRY = Fluent::Registry.new(:s3_extractor_type, 'fluent/plugin/s3_extractor_')
407
+ {
408
+ 'gzip' => GzipExtractor,
409
+ 'text' => TextExtractor,
410
+ 'json' => JsonExtractor
411
+ }.each do |name, extractor|
412
+ EXTRACTOR_REGISTRY.register(name, extractor)
413
+ end
414
+
415
+ def self.register_extractor(name, extractor)
416
+ EXTRACTOR_REGISTRY.register(name, extractor)
417
+ end
418
+ end
419
+ end