fluent-plugin-s3 1.3.3 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/docs/v0.12.md ADDED
@@ -0,0 +1,52 @@
1
+ # Configuration: Output (v0.12 style)
2
+
3
+ Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it.
4
+
5
+ <match pattern>
6
+ @type s3
7
+
8
+ aws_key_id YOUR_AWS_KEY_ID
9
+ aws_sec_key YOUR_AWS_SECRET_KEY
10
+ s3_bucket YOUR_S3_BUCKET_NAME
11
+ s3_region ap-northeast-1
12
+
13
+ path logs/
14
+ s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
15
+ buffer_path /var/log/fluent/s3
16
+ time_slice_format %Y%m%d-%H
17
+ time_slice_wait 10m
18
+ utc
19
+ format json
20
+ </match>
21
+
22
+ If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin.
23
+
24
+ The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them.
25
+
26
+ ## format (for v0.12)
27
+
28
+ @format json
29
+ include_time_key true
30
+ time_key log_time # default is time
31
+
32
+ This parameter is for v0.12. Use [`<format>`](https://docs.fluentd.org/configuration/format-section) and [`<inject>`](https://docs.fluentd.org/configuration/inject-section) for v1.
33
+
34
+ ## buffer_path (for v0.12)
35
+
36
+ path prefix of the files to buffer logs.
37
+
38
+ This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1.
39
+
40
+ ## time_slice_format (for v0.12)
41
+
42
+ Format of the time used as the file name. Default is '%Y%m%d'. Use
43
+ '%Y%m%d%H' to split files hourly.
44
+
45
+ This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1.
46
+
47
+ ## time_slice_wait (for v0.12)
48
+
49
+ The time to wait old logs. Default is 10 minutes. Specify larger value if
50
+ old logs may reach.
51
+
52
+ This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1.
@@ -192,7 +192,7 @@ module Fluent::Plugin
192
192
  end
193
193
  end
194
194
  rescue => e
195
- log.warn("SQS Polling Failed. Retry in #{@sqs.retry_error_interval} seconds")
195
+ log.warn("SQS Polling Failed. Retry in #{@sqs.retry_error_interval} seconds", error: e)
196
196
  sleep(@sqs.retry_error_interval)
197
197
  retry
198
198
  end
@@ -5,6 +5,7 @@ require 'aws-sdk-s3'
5
5
  require 'zlib'
6
6
  require 'time'
7
7
  require 'tempfile'
8
+ require 'securerandom'
8
9
 
9
10
  module Fluent::Plugin
10
11
  class S3Output < Output
@@ -41,6 +42,10 @@ module Fluent::Plugin
41
42
  config_param :external_id, :string, default: nil, secret: true
42
43
  desc "The region of the STS endpoint to use."
43
44
  config_param :sts_region, :string, default: nil
45
+ desc "A http proxy url for requests to aws sts service"
46
+ config_param :sts_http_proxy, :string, default: nil, secret: true
47
+ desc "A url for a regional sts api endpoint, the default is global"
48
+ config_param :sts_endpoint_url, :string, default: nil
44
49
  end
45
50
  # See the following link for additional params that could be added:
46
51
  # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/STS/Client.html#assume_role_with_web_identity-instance_method
@@ -380,7 +385,7 @@ module Fluent::Plugin
380
385
  end
381
386
 
382
387
  def uuid_random
383
- ::UUIDTools::UUID.random_create.to_s
388
+ SecureRandom.uuid
384
389
  end
385
390
 
386
391
  # This is stolen from Fluentd
@@ -437,17 +442,6 @@ module Fluent::Plugin
437
442
  }
438
443
 
439
444
  if @s3_object_key_format.include?('%{uuid_flush}')
440
- # test uuidtools works or not
441
- begin
442
- require 'uuidtools'
443
- rescue LoadError
444
- raise Fluent::ConfigError, "uuidtools gem not found. Install uuidtools gem first"
445
- end
446
- begin
447
- uuid_random
448
- rescue => e
449
- raise Fluent::ConfigError, "Generating uuid doesn't work. Can't use %{uuid_flush} on this environment. #{e}"
450
- end
451
445
  @uuid_flush_enabled = true
452
446
  end
453
447
 
@@ -479,22 +473,47 @@ module Fluent::Plugin
479
473
  options = {}
480
474
  credentials_options = {}
481
475
  case
482
- when @aws_key_id && @aws_sec_key
483
- options[:access_key_id] = @aws_key_id
484
- options[:secret_access_key] = @aws_sec_key
485
476
  when @assume_role_credentials
486
477
  c = @assume_role_credentials
478
+ iam_user_credentials = @aws_key_id && @aws_sec_key ? Aws::Credentials.new(@aws_key_id, @aws_sec_key) : nil
479
+ region = c.sts_region || @s3_region
487
480
  credentials_options[:role_arn] = c.role_arn
488
481
  credentials_options[:role_session_name] = c.role_session_name
489
482
  credentials_options[:policy] = c.policy if c.policy
490
483
  credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
491
484
  credentials_options[:external_id] = c.external_id if c.external_id
492
- if c.sts_region
493
- credentials_options[:client] = Aws::STS::Client.new(region: c.sts_region)
494
- elsif @s3_region
495
- credentials_options[:client] = Aws::STS::Client.new(region: @s3_region)
485
+ credentials_options[:sts_endpoint_url] = c.sts_endpoint_url if c.sts_endpoint_url
486
+ credentials_options[:sts_http_proxy] = c.sts_http_proxy if c.sts_http_proxy
487
+ if c.sts_http_proxy && c.sts_endpoint_url
488
+ credentials_options[:client] = if iam_user_credentials
489
+ Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url, credentials: iam_user_credentials)
490
+ else
491
+ Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url)
492
+ end
493
+ elsif c.sts_http_proxy
494
+ credentials_options[:client] = if iam_user_credentials
495
+ Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, credentials: iam_user_credentials)
496
+ else
497
+ Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy)
498
+ end
499
+ elsif c.sts_endpoint_url
500
+ credentials_options[:client] = if iam_user_credentials
501
+ Aws::STS::Client.new(region: region, endpoint: c.sts_endpoint_url, credentials: iam_user_credentials)
502
+ else
503
+ Aws::STS::Client.new(region: region, endpoint: c.sts_endpoint_url)
504
+ end
505
+ else
506
+ credentials_options[:client] = if iam_user_credentials
507
+ Aws::STS::Client.new(region: region, credentials: iam_user_credentials)
508
+ else
509
+ Aws::STS::Client.new(region: region)
510
+ end
496
511
  end
512
+
497
513
  options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options)
514
+ when @aws_key_id && @aws_sec_key
515
+ options[:access_key_id] = @aws_key_id
516
+ options[:secret_access_key] = @aws_sec_key
498
517
  when @web_identity_credentials
499
518
  c = @web_identity_credentials
500
519
  credentials_options[:role_arn] = c.role_arn
@@ -0,0 +1,83 @@
1
+ require "open3"
2
+
3
+ module Fluent::Plugin
4
+ class S3Output
5
+ class ParquetCompressor < Compressor
6
+ S3Output.register_compressor("parquet", self)
7
+
8
+ config_section :compress, multi: false do
9
+ desc "parquet compression codec"
10
+ config_param :parquet_compression_codec, :enum, list: [:uncompressed, :snappy, :gzip, :lzo, :brotli, :lz4, :zstd], default: :snappy
11
+ desc "parquet file page size"
12
+ config_param :parquet_page_size, :size, default: 8192
13
+ desc "parquet file row group size"
14
+ config_param :parquet_row_group_size, :size, default: 128 * 1024 * 1024
15
+ desc "record data format type"
16
+ config_param :record_type, :enum, list: [:avro, :csv, :jsonl, :msgpack, :tsv, :json], default: :msgpack
17
+ desc "schema type"
18
+ config_param :schema_type, :enum, list: [:avro, :bigquery], default: :avro
19
+ desc "path to schema file"
20
+ config_param :schema_file, :string
21
+ end
22
+
23
+ def configure(conf)
24
+ super
25
+ check_command("columnify", "-h")
26
+
27
+ if [:lzo, :brotli, :lz4].include?(@compress.parquet_compression_codec)
28
+ raise Fluent::ConfigError, "unsupported compression codec: #{@compress.parquet_compression_codec}"
29
+ end
30
+
31
+ @parquet_compression_codec = @compress.parquet_compression_codec.to_s.upcase
32
+ if @compress.record_type == :json
33
+ @record_type = :jsonl
34
+ else
35
+ @record_type = @compress.record_type
36
+ end
37
+ end
38
+
39
+ def ext
40
+ "parquet".freeze
41
+ end
42
+
43
+ def content_type
44
+ "application/octet-stream".freeze
45
+ end
46
+
47
+ def compress(chunk, tmp)
48
+ chunk_is_file = @buffer_type == "file"
49
+ path = if chunk_is_file
50
+ chunk.path
51
+ else
52
+ w = Tempfile.new("chunk-parquet-tmp")
53
+ w.binmode
54
+ chunk.write_to(w)
55
+ w.close
56
+ w.path
57
+ end
58
+ stdout, stderr, status = columnify(path, tmp.path)
59
+ unless status.success?
60
+ raise Fluent::UnrecoverableError, "failed to execute columnify command. stdout=#{stdout} stderr=#{stderr} status=#{status.inspect}"
61
+ end
62
+ ensure
63
+ unless chunk_is_file
64
+ w.close(true) rescue nil
65
+ end
66
+ end
67
+
68
+ private
69
+
70
+ def columnify(src_path, dst_path)
71
+ Open3.capture3("columnify",
72
+ "-parquetCompressionCodec", @parquet_compression_codec,
73
+ "-parquetPageSize", @compress.parquet_page_size.to_s,
74
+ "-parquetRowGroupSize", @compress.parquet_row_group_size.to_s,
75
+ "-recordType", @record_type.to_s,
76
+ "-schemaType", @compress.schema_type.to_s,
77
+ "-schemaFile", @compress.schema_file,
78
+ "-output", dst_path,
79
+ src_path)
80
+ end
81
+ end
82
+ end
83
+ end
data/test/test_out_s3.rb CHANGED
@@ -9,7 +9,6 @@ require 'test/unit/rr'
9
9
  require 'zlib'
10
10
  require 'fileutils'
11
11
  require 'timecop'
12
- require 'uuidtools'
13
12
  require 'ostruct'
14
13
 
15
14
  include Fluent::Test::Helpers
@@ -349,17 +348,11 @@ EOC
349
348
 
350
349
  def test_write_with_custom_s3_object_key_format_containing_uuid_flush_placeholder
351
350
 
352
- begin
353
- require 'uuidtools'
354
- rescue LoadError
355
- pend("uuidtools not found. skip this test")
356
- end
357
-
358
351
  # Partial mock the S3Bucket, not to make an actual connection to Amazon S3
359
352
  setup_mocks(true)
360
353
 
361
354
  uuid = "5755e23f-9b54-42d8-8818-2ea38c6f279e"
362
- stub(::UUIDTools::UUID).random_create{ uuid }
355
+ stub(::SecureRandom).uuid{ uuid }
363
356
 
364
357
  s3_local_file_path = "/tmp/s3-test.txt"
365
358
  s3path = "log/events/ts=20110102-13/events_0-#{uuid}.gz"
@@ -567,6 +560,137 @@ EOC
567
560
  assert_equal(expected_credentials, credentials)
568
561
  end
569
562
 
563
+ def test_assume_role_with_iam_credentials
564
+ expected_credentials = Aws::Credentials.new("test_key_id", "test_sec_key")
565
+ sts_client = Aws::STS::Client.new(region: 'ap-northeast-1', credentials: expected_credentials)
566
+ mock(Aws::Credentials).new("test_key_id", "test_sec_key") { expected_credentials }
567
+ mock(Aws::STS::Client).new(region: 'ap-northeast-1', credentials: expected_credentials){ sts_client }
568
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
569
+ role_session_name: "test_session",
570
+ client: sts_client){
571
+ expected_credentials
572
+ }
573
+ config = CONFIG_TIME_SLICE
574
+ config += %[
575
+ s3_region ap-northeast-1
576
+
577
+ <assume_role_credentials>
578
+ role_arn test_arn
579
+ role_session_name test_session
580
+ </assume_role_credentials>
581
+ ]
582
+ d = create_time_sliced_driver(config)
583
+ assert_nothing_raised { d.run {} }
584
+ client = d.instance.instance_variable_get(:@s3).client
585
+ credentials = client.config.credentials
586
+ assert_equal(expected_credentials, credentials)
587
+ end
588
+
589
+ def test_assume_role_credentials_with_region_and_sts_http_proxy
590
+ expected_credentials = Aws::Credentials.new("test_key", "test_secret")
591
+ expected_region = "ap-northeast-1"
592
+ expected_sts_http_proxy = 'http://example.com'
593
+ sts_client = Aws::STS::Client.new(region: expected_region, http_proxy: expected_sts_http_proxy)
594
+ mock(Aws::STS::Client).new(region:expected_region, http_proxy: expected_sts_http_proxy){ sts_client }
595
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
596
+ role_session_name: "test_session",
597
+ client: sts_client,
598
+ sts_http_proxy: expected_sts_http_proxy){
599
+ expected_credentials
600
+ }
601
+ config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n")
602
+ config += %[
603
+ s3_region #{expected_region}
604
+ <assume_role_credentials>
605
+ role_arn test_arn
606
+ role_session_name test_session
607
+ sts_http_proxy #{expected_sts_http_proxy}
608
+ </assume_role_credentials>
609
+ ]
610
+ d = create_time_sliced_driver(config)
611
+ assert_nothing_raised { d.run {} }
612
+ client = d.instance.instance_variable_get(:@s3).client
613
+ credentials = client.config.credentials
614
+ assert_equal(expected_credentials, credentials)
615
+ end
616
+
617
+ def test_assume_role_credentials_with_sts_http_proxy
618
+ expected_credentials = Aws::Credentials.new("test_key", "test_secret")
619
+ expected_sts_http_proxy = 'http://example.com'
620
+ sts_client = Aws::STS::Client.new(region: "us-east-1", http_proxy: expected_sts_http_proxy)
621
+ mock(Aws::STS::Client).new(region: "us-east-1", http_proxy: expected_sts_http_proxy){ sts_client }
622
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
623
+ role_session_name: "test_session",
624
+ client: sts_client,
625
+ sts_http_proxy: expected_sts_http_proxy){
626
+ expected_credentials
627
+ }
628
+ config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n")
629
+ config += %[
630
+ <assume_role_credentials>
631
+ role_arn test_arn
632
+ role_session_name test_session
633
+ sts_http_proxy #{expected_sts_http_proxy}
634
+ </assume_role_credentials>
635
+ ]
636
+ d = create_time_sliced_driver(config)
637
+ assert_nothing_raised { d.run {} }
638
+ client = d.instance.instance_variable_get(:@s3).client
639
+ credentials = client.config.credentials
640
+ assert_equal(expected_credentials, credentials)
641
+ end
642
+
643
+ def test_assume_role_credentials_with_sts_endpoint_url
644
+ expected_credentials = Aws::Credentials.new("test_key", "test_secret")
645
+ expected_sts_endpoint_url = 'http://example.com'
646
+ sts_client = Aws::STS::Client.new(region: "us-east-1", endpoint: expected_sts_endpoint_url)
647
+ mock(Aws::STS::Client).new(region: "us-east-1", endpoint: expected_sts_endpoint_url){ sts_client }
648
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
649
+ role_session_name: "test_session",
650
+ client: sts_client,
651
+ sts_endpoint_url: expected_sts_endpoint_url){
652
+ expected_credentials
653
+ }
654
+ config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n")
655
+ config += %[
656
+ <assume_role_credentials>
657
+ role_arn test_arn
658
+ role_session_name test_session
659
+ sts_endpoint_url #{expected_sts_endpoint_url}
660
+ </assume_role_credentials>
661
+ ]
662
+ d = create_time_sliced_driver(config)
663
+ assert_nothing_raised { d.run {} }
664
+ client = d.instance.instance_variable_get(:@s3).client
665
+ credentials = client.config.credentials
666
+ assert_equal(expected_credentials, credentials)
667
+ end
668
+
669
+ def test_assume_role_credentials_with_sts_region
670
+ expected_credentials = Aws::Credentials.new("test_key", "test_secret")
671
+ expected_sts_region = 'ap-south-1'
672
+ sts_client = Aws::STS::Client.new(region: expected_sts_region)
673
+ mock(Aws::STS::Client).new(region: expected_sts_region){ sts_client }
674
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
675
+ role_session_name: "test_session",
676
+ client: sts_client){
677
+ expected_credentials
678
+ }
679
+ config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n")
680
+ config += %[
681
+ <assume_role_credentials>
682
+ role_arn test_arn
683
+ role_session_name test_session
684
+ sts_region #{expected_sts_region}
685
+ </assume_role_credentials>
686
+ ]
687
+ d = create_time_sliced_driver(config)
688
+ assert_nothing_raised { d.run {} }
689
+ client = d.instance.instance_variable_get(:@s3).client
690
+ credentials = client.config.credentials
691
+ assert_equal(expected_credentials, credentials)
692
+ end
693
+
570
694
  def test_web_identity_credentials
571
695
  expected_credentials = Aws::Credentials.new("test_key", "test_secret")
572
696
  mock(Aws::AssumeRoleWebIdentityCredentials).new(
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.3
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-06-25 00:00:00.000000000 Z
12
+ date: 2021-04-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -121,8 +121,8 @@ executables: []
121
121
  extensions: []
122
122
  extra_rdoc_files: []
123
123
  files:
124
+ - ".github/workflows/linux.yml"
124
125
  - ".gitignore"
125
- - ".travis.yml"
126
126
  - AUTHORS
127
127
  - ChangeLog
128
128
  - Gemfile
@@ -131,6 +131,11 @@ files:
131
131
  - Rakefile
132
132
  - VERSION
133
133
  - appveyor.yml
134
+ - docs/credentials.md
135
+ - docs/howto.md
136
+ - docs/input.md
137
+ - docs/output.md
138
+ - docs/v0.12.md
134
139
  - fluent-plugin-s3.gemspec
135
140
  - lib/fluent/log-ext.rb
136
141
  - lib/fluent/plugin/in_s3.rb
@@ -138,6 +143,7 @@ files:
138
143
  - lib/fluent/plugin/s3_compressor_gzip_command.rb
139
144
  - lib/fluent/plugin/s3_compressor_lzma2.rb
140
145
  - lib/fluent/plugin/s3_compressor_lzo.rb
146
+ - lib/fluent/plugin/s3_compressor_parquet.rb
141
147
  - lib/fluent/plugin/s3_extractor_gzip_command.rb
142
148
  - lib/fluent/plugin/s3_extractor_lzma2.rb
143
149
  - lib/fluent/plugin/s3_extractor_lzo.rb
@@ -162,10 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
168
  - !ruby/object:Gem::Version
163
169
  version: '0'
164
170
  requirements: []
165
- rubygems_version: 3.0.3
171
+ rubygems_version: 3.1.2
166
172
  signing_key:
167
173
  specification_version: 4
168
174
  summary: Amazon S3 output plugin for Fluentd event collector
169
- test_files:
170
- - test/test_in_s3.rb
171
- - test/test_out_s3.rb
175
+ test_files: []