fluent-plugin-s3 1.4.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/docs/v0.12.md ADDED
@@ -0,0 +1,52 @@
1
+ # Configuration: Output (v0.12 style)
2
+
3
+ Here is a sample configuration for old fluentd v0.12. It works with fluentd v1 too but not recommended for it.
4
+
5
+ <match pattern>
6
+ @type s3
7
+
8
+ aws_key_id YOUR_AWS_KEY_ID
9
+ aws_sec_key YOUR_AWS_SECRET_KEY
10
+ s3_bucket YOUR_S3_BUCKET_NAME
11
+ s3_region ap-northeast-1
12
+
13
+ path logs/
14
+ s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
15
+ buffer_path /var/log/fluent/s3
16
+ time_slice_format %Y%m%d-%H
17
+ time_slice_wait 10m
18
+ utc
19
+ format json
20
+ </match>
21
+
22
+ If you want to embed tag in [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format), you need to use `fluent-plugin-forest` plugin.
23
+
24
+ The following explanations are about the differences with v1. Other parameters are same with v1, see [Configuration: Output](output.md) for them.
25
+
26
+ ## format (for v0.12)
27
+
28
+ @format json
29
+ include_time_key true
30
+ time_key log_time # default is time
31
+
32
+ This parameter is for v0.12. Use [`<format>`](https://docs.fluentd.org/configuration/format-section) and [`<inject>`](https://docs.fluentd.org/configuration/inject-section) for v1.
33
+
34
+ ## buffer_path (for v0.12)
35
+
36
+ path prefix of the files to buffer logs.
37
+
38
+ This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `path` in v1.
39
+
40
+ ## time_slice_format (for v0.12)
41
+
42
+ Format of the time used as the file name. Default is '%Y%m%d'. Use
43
+ '%Y%m%d%H' to split files hourly.
44
+
45
+ This parameter is for v0.12. Use [buffer placeholder](https://docs.fluentd.org/configuration/buffer-section#placeholders) for [`path`](output.md#path) / [`s3_object_key_format`](output.md#s3_object_key_format) in v1.
46
+
47
+ ## time_slice_wait (for v0.12)
48
+
49
+ The time to wait old logs. Default is 10 minutes. Specify larger value if
50
+ old logs may reach.
51
+
52
+ This parameter is for v0.12. Use [`<buffer>`](https://docs.fluentd.org/configuration/buffer-section)'s `timekey_wait` in v1.
@@ -115,10 +115,15 @@ module Fluent::Plugin
115
115
 
116
116
  attr_reader :bucket
117
117
 
118
+ def reject_s3_endpoint?
119
+ @s3_endpoint && !@s3_endpoint.end_with?('vpce.amazonaws.com') &&
120
+ @s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) }
121
+ end
122
+
118
123
  def configure(conf)
119
124
  super
120
125
 
121
- if @s3_endpoint && (@s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) })
126
+ if reject_s3_endpoint?
122
127
  raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services"
123
128
  end
124
129
 
@@ -173,6 +173,11 @@ module Fluent::Plugin
173
173
 
174
174
  MAX_HEX_RANDOM_LENGTH = 16
175
175
 
176
+ def reject_s3_endpoint?
177
+ @s3_endpoint && !@s3_endpoint.end_with?('vpce.amazonaws.com') &&
178
+ @s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) }
179
+ end
180
+
176
181
  def configure(conf)
177
182
  compat_parameters_convert(conf, :buffer, :formatter, :inject)
178
183
 
@@ -180,7 +185,7 @@ module Fluent::Plugin
180
185
 
181
186
  Aws.use_bundled_cert! if @use_bundled_cert
182
187
 
183
- if @s3_endpoint && (@s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) })
188
+ if reject_s3_endpoint?
184
189
  raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services"
185
190
  end
186
191
 
@@ -473,11 +478,10 @@ module Fluent::Plugin
473
478
  options = {}
474
479
  credentials_options = {}
475
480
  case
476
- when @aws_key_id && @aws_sec_key
477
- options[:access_key_id] = @aws_key_id
478
- options[:secret_access_key] = @aws_sec_key
479
481
  when @assume_role_credentials
480
482
  c = @assume_role_credentials
483
+ iam_user_credentials = @aws_key_id && @aws_sec_key ? Aws::Credentials.new(@aws_key_id, @aws_sec_key) : nil
484
+ region = c.sts_region || @s3_region
481
485
  credentials_options[:role_arn] = c.role_arn
482
486
  credentials_options[:role_session_name] = c.role_session_name
483
487
  credentials_options[:policy] = c.policy if c.policy
@@ -486,21 +490,35 @@ module Fluent::Plugin
486
490
  credentials_options[:sts_endpoint_url] = c.sts_endpoint_url if c.sts_endpoint_url
487
491
  credentials_options[:sts_http_proxy] = c.sts_http_proxy if c.sts_http_proxy
488
492
  if c.sts_http_proxy && c.sts_endpoint_url
489
- credentials_options[:client] = Aws::STS::Client.new(http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url)
490
- elsif @region && c.sts_http_proxy
491
- credentials_options[:client] = Aws::STS::Client.new(region: @region, http_proxy: c.sts_http_proxy)
492
- elsif @region && c.sts_endpoint_url
493
- credentials_options[:client] = Aws::STS::Client.new(region: @region, endpoint: c.sts_endpoint_url)
493
+ credentials_options[:client] = if iam_user_credentials
494
+ Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url, credentials: iam_user_credentials)
495
+ else
496
+ Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url)
497
+ end
494
498
  elsif c.sts_http_proxy
495
- credentials_options[:client] = Aws::STS::Client.new(http_proxy: c.sts_http_proxy)
499
+ credentials_options[:client] = if iam_user_credentials
500
+ Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy, credentials: iam_user_credentials)
501
+ else
502
+ Aws::STS::Client.new(region: region, http_proxy: c.sts_http_proxy)
503
+ end
496
504
  elsif c.sts_endpoint_url
497
- credentials_options[:client] = Aws::STS::Client.new(endpoint: c.sts_endpoint_url)
498
- elsif c.sts_region
499
- credentials_options[:client] = Aws::STS::Client.new(region: c.sts_region)
500
- elsif @s3_region
501
- credentials_options[:client] = Aws::STS::Client.new(region: @s3_region)
505
+ credentials_options[:client] = if iam_user_credentials
506
+ Aws::STS::Client.new(region: region, endpoint: c.sts_endpoint_url, credentials: iam_user_credentials)
507
+ else
508
+ Aws::STS::Client.new(region: region, endpoint: c.sts_endpoint_url)
509
+ end
510
+ else
511
+ credentials_options[:client] = if iam_user_credentials
512
+ Aws::STS::Client.new(region: region, credentials: iam_user_credentials)
513
+ else
514
+ Aws::STS::Client.new(region: region)
515
+ end
502
516
  end
517
+
503
518
  options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options)
519
+ when @aws_key_id && @aws_sec_key
520
+ options[:access_key_id] = @aws_key_id
521
+ options[:secret_access_key] = @aws_sec_key
504
522
  when @web_identity_credentials
505
523
  c = @web_identity_credentials
506
524
  credentials_options[:role_arn] = c.role_arn
@@ -0,0 +1,83 @@
1
+ require "open3"
2
+
3
+ module Fluent::Plugin
4
+ class S3Output
5
+ class ParquetCompressor < Compressor
6
+ S3Output.register_compressor("parquet", self)
7
+
8
+ config_section :compress, multi: false do
9
+ desc "parquet compression codec"
10
+ config_param :parquet_compression_codec, :enum, list: [:uncompressed, :snappy, :gzip, :lzo, :brotli, :lz4, :zstd], default: :snappy
11
+ desc "parquet file page size"
12
+ config_param :parquet_page_size, :size, default: 8192
13
+ desc "parquet file row group size"
14
+ config_param :parquet_row_group_size, :size, default: 128 * 1024 * 1024
15
+ desc "record data format type"
16
+ config_param :record_type, :enum, list: [:avro, :csv, :jsonl, :msgpack, :tsv, :json], default: :msgpack
17
+ desc "schema type"
18
+ config_param :schema_type, :enum, list: [:avro, :bigquery], default: :avro
19
+ desc "path to schema file"
20
+ config_param :schema_file, :string
21
+ end
22
+
23
+ def configure(conf)
24
+ super
25
+ check_command("columnify", "-h")
26
+
27
+ if [:lzo, :brotli, :lz4].include?(@compress.parquet_compression_codec)
28
+ raise Fluent::ConfigError, "unsupported compression codec: #{@compress.parquet_compression_codec}"
29
+ end
30
+
31
+ @parquet_compression_codec = @compress.parquet_compression_codec.to_s.upcase
32
+ if @compress.record_type == :json
33
+ @record_type = :jsonl
34
+ else
35
+ @record_type = @compress.record_type
36
+ end
37
+ end
38
+
39
+ def ext
40
+ "parquet".freeze
41
+ end
42
+
43
+ def content_type
44
+ "application/octet-stream".freeze
45
+ end
46
+
47
+ def compress(chunk, tmp)
48
+ chunk_is_file = @buffer_type == "file"
49
+ path = if chunk_is_file
50
+ chunk.path
51
+ else
52
+ w = Tempfile.new("chunk-parquet-tmp")
53
+ w.binmode
54
+ chunk.write_to(w)
55
+ w.close
56
+ w.path
57
+ end
58
+ stdout, stderr, status = columnify(path, tmp.path)
59
+ unless status.success?
60
+ raise Fluent::UnrecoverableError, "failed to execute columnify command. stdout=#{stdout} stderr=#{stderr} status=#{status.inspect}"
61
+ end
62
+ ensure
63
+ unless chunk_is_file
64
+ w.close(true) rescue nil
65
+ end
66
+ end
67
+
68
+ private
69
+
70
+ def columnify(src_path, dst_path)
71
+ Open3.capture3("columnify",
72
+ "-parquetCompressionCodec", @parquet_compression_codec,
73
+ "-parquetPageSize", @compress.parquet_page_size.to_s,
74
+ "-parquetRowGroupSize", @compress.parquet_row_group_size.to_s,
75
+ "-recordType", @record_type.to_s,
76
+ "-schemaType", @compress.schema_type.to_s,
77
+ "-schemaFile", @compress.schema_file,
78
+ "-output", dst_path,
79
+ src_path)
80
+ end
81
+ end
82
+ end
83
+ end
data/test/test_in_s3.rb CHANGED
@@ -115,14 +115,19 @@ class S3InputTest < Test::Unit::TestCase
115
115
  end
116
116
 
117
117
 
118
- def test_s3_endpoint_with_valid_endpoint
119
- d = create_driver(CONFIG + 's3_endpoint riak-cs.example.com')
120
- assert_equal 'riak-cs.example.com', d.instance.s3_endpoint
118
+ data('Normal endpoint' => 'riak-cs.example.com',
119
+ 'VPCE endpoint' => 'vpce.amazonaws.com',
120
+ 'FIPS endpoint' => 'fips.xxx.amazonaws.com',
121
+ 'GOV endpoint' => 'gov.xxx.amazonaws.com')
122
+ def test_s3_endpoint_with_valid_endpoint(endpoint)
123
+ d = create_driver(CONFIG + "s3_endpoint #{endpoint}")
124
+ assert_equal endpoint, d.instance.s3_endpoint
121
125
  end
122
126
 
123
127
  data('US West (Oregon)' => 's3-us-west-2.amazonaws.com',
124
128
  'EU (Frankfurt)' => 's3.eu-central-1.amazonaws.com',
125
- 'Asia Pacific (Tokyo)' => 's3-ap-northeast-1.amazonaws.com')
129
+ 'Asia Pacific (Tokyo)' => 's3-ap-northeast-1.amazonaws.com',
130
+ 'Invalid VPCE' => 'vpce.xxx.amazonaws.com')
126
131
  def test_s3_endpoint_with_invalid_endpoint(endpoint)
127
132
  assert_raise(Fluent::ConfigError, "s3_endpoint parameter is not supported, use s3_region instead. This parameter is for S3 compatible services") {
128
133
  create_driver(CONFIG + "s3_endpoint #{endpoint}")
data/test/test_out_s3.rb CHANGED
@@ -560,6 +560,137 @@ EOC
560
560
  assert_equal(expected_credentials, credentials)
561
561
  end
562
562
 
563
+ def test_assume_role_with_iam_credentials
564
+ expected_credentials = Aws::Credentials.new("test_key_id", "test_sec_key")
565
+ sts_client = Aws::STS::Client.new(region: 'ap-northeast-1', credentials: expected_credentials)
566
+ mock(Aws::Credentials).new("test_key_id", "test_sec_key") { expected_credentials }
567
+ mock(Aws::STS::Client).new(region: 'ap-northeast-1', credentials: expected_credentials){ sts_client }
568
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
569
+ role_session_name: "test_session",
570
+ client: sts_client){
571
+ expected_credentials
572
+ }
573
+ config = CONFIG_TIME_SLICE
574
+ config += %[
575
+ s3_region ap-northeast-1
576
+
577
+ <assume_role_credentials>
578
+ role_arn test_arn
579
+ role_session_name test_session
580
+ </assume_role_credentials>
581
+ ]
582
+ d = create_time_sliced_driver(config)
583
+ assert_nothing_raised { d.run {} }
584
+ client = d.instance.instance_variable_get(:@s3).client
585
+ credentials = client.config.credentials
586
+ assert_equal(expected_credentials, credentials)
587
+ end
588
+
589
+ def test_assume_role_credentials_with_region_and_sts_http_proxy
590
+ expected_credentials = Aws::Credentials.new("test_key", "test_secret")
591
+ expected_region = "ap-northeast-1"
592
+ expected_sts_http_proxy = 'http://example.com'
593
+ sts_client = Aws::STS::Client.new(region: expected_region, http_proxy: expected_sts_http_proxy)
594
+ mock(Aws::STS::Client).new(region:expected_region, http_proxy: expected_sts_http_proxy){ sts_client }
595
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
596
+ role_session_name: "test_session",
597
+ client: sts_client,
598
+ sts_http_proxy: expected_sts_http_proxy){
599
+ expected_credentials
600
+ }
601
+ config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n")
602
+ config += %[
603
+ s3_region #{expected_region}
604
+ <assume_role_credentials>
605
+ role_arn test_arn
606
+ role_session_name test_session
607
+ sts_http_proxy #{expected_sts_http_proxy}
608
+ </assume_role_credentials>
609
+ ]
610
+ d = create_time_sliced_driver(config)
611
+ assert_nothing_raised { d.run {} }
612
+ client = d.instance.instance_variable_get(:@s3).client
613
+ credentials = client.config.credentials
614
+ assert_equal(expected_credentials, credentials)
615
+ end
616
+
617
+ def test_assume_role_credentials_with_sts_http_proxy
618
+ expected_credentials = Aws::Credentials.new("test_key", "test_secret")
619
+ expected_sts_http_proxy = 'http://example.com'
620
+ sts_client = Aws::STS::Client.new(region: "us-east-1", http_proxy: expected_sts_http_proxy)
621
+ mock(Aws::STS::Client).new(region: "us-east-1", http_proxy: expected_sts_http_proxy){ sts_client }
622
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
623
+ role_session_name: "test_session",
624
+ client: sts_client,
625
+ sts_http_proxy: expected_sts_http_proxy){
626
+ expected_credentials
627
+ }
628
+ config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n")
629
+ config += %[
630
+ <assume_role_credentials>
631
+ role_arn test_arn
632
+ role_session_name test_session
633
+ sts_http_proxy #{expected_sts_http_proxy}
634
+ </assume_role_credentials>
635
+ ]
636
+ d = create_time_sliced_driver(config)
637
+ assert_nothing_raised { d.run {} }
638
+ client = d.instance.instance_variable_get(:@s3).client
639
+ credentials = client.config.credentials
640
+ assert_equal(expected_credentials, credentials)
641
+ end
642
+
643
+ def test_assume_role_credentials_with_sts_endpoint_url
644
+ expected_credentials = Aws::Credentials.new("test_key", "test_secret")
645
+ expected_sts_endpoint_url = 'http://example.com'
646
+ sts_client = Aws::STS::Client.new(region: "us-east-1", endpoint: expected_sts_endpoint_url)
647
+ mock(Aws::STS::Client).new(region: "us-east-1", endpoint: expected_sts_endpoint_url){ sts_client }
648
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
649
+ role_session_name: "test_session",
650
+ client: sts_client,
651
+ sts_endpoint_url: expected_sts_endpoint_url){
652
+ expected_credentials
653
+ }
654
+ config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n")
655
+ config += %[
656
+ <assume_role_credentials>
657
+ role_arn test_arn
658
+ role_session_name test_session
659
+ sts_endpoint_url #{expected_sts_endpoint_url}
660
+ </assume_role_credentials>
661
+ ]
662
+ d = create_time_sliced_driver(config)
663
+ assert_nothing_raised { d.run {} }
664
+ client = d.instance.instance_variable_get(:@s3).client
665
+ credentials = client.config.credentials
666
+ assert_equal(expected_credentials, credentials)
667
+ end
668
+
669
+ def test_assume_role_credentials_with_sts_region
670
+ expected_credentials = Aws::Credentials.new("test_key", "test_secret")
671
+ expected_sts_region = 'ap-south-1'
672
+ sts_client = Aws::STS::Client.new(region: expected_sts_region)
673
+ mock(Aws::STS::Client).new(region: expected_sts_region){ sts_client }
674
+ mock(Aws::AssumeRoleCredentials).new(role_arn: "test_arn",
675
+ role_session_name: "test_session",
676
+ client: sts_client){
677
+ expected_credentials
678
+ }
679
+ config = CONFIG_TIME_SLICE.split("\n").reject{|x| x =~ /.+aws_.+/}.join("\n")
680
+ config += %[
681
+ <assume_role_credentials>
682
+ role_arn test_arn
683
+ role_session_name test_session
684
+ sts_region #{expected_sts_region}
685
+ </assume_role_credentials>
686
+ ]
687
+ d = create_time_sliced_driver(config)
688
+ assert_nothing_raised { d.run {} }
689
+ client = d.instance.instance_variable_get(:@s3).client
690
+ credentials = client.config.credentials
691
+ assert_equal(expected_credentials, credentials)
692
+ end
693
+
563
694
  def test_web_identity_credentials
564
695
  expected_credentials = Aws::Credentials.new("test_key", "test_secret")
565
696
  mock(Aws::AssumeRoleWebIdentityCredentials).new(
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-08-03 00:00:00.000000000 Z
12
+ date: 2021-08-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -121,8 +121,12 @@ executables: []
121
121
  extensions: []
122
122
  extra_rdoc_files: []
123
123
  files:
124
+ - ".github/ISSUE_TEMPLATE/bug_report.yaml"
125
+ - ".github/ISSUE_TEMPLATE/config.yml"
126
+ - ".github/ISSUE_TEMPLATE/feature_request.yaml"
127
+ - ".github/workflows/linux.yml"
128
+ - ".github/workflows/stale-actions.yml"
124
129
  - ".gitignore"
125
- - ".travis.yml"
126
130
  - AUTHORS
127
131
  - ChangeLog
128
132
  - Gemfile
@@ -131,6 +135,11 @@ files:
131
135
  - Rakefile
132
136
  - VERSION
133
137
  - appveyor.yml
138
+ - docs/credentials.md
139
+ - docs/howto.md
140
+ - docs/input.md
141
+ - docs/output.md
142
+ - docs/v0.12.md
134
143
  - fluent-plugin-s3.gemspec
135
144
  - lib/fluent/log-ext.rb
136
145
  - lib/fluent/plugin/in_s3.rb
@@ -138,6 +147,7 @@ files:
138
147
  - lib/fluent/plugin/s3_compressor_gzip_command.rb
139
148
  - lib/fluent/plugin/s3_compressor_lzma2.rb
140
149
  - lib/fluent/plugin/s3_compressor_lzo.rb
150
+ - lib/fluent/plugin/s3_compressor_parquet.rb
141
151
  - lib/fluent/plugin/s3_extractor_gzip_command.rb
142
152
  - lib/fluent/plugin/s3_extractor_lzma2.rb
143
153
  - lib/fluent/plugin/s3_extractor_lzo.rb
@@ -162,10 +172,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
172
  - !ruby/object:Gem::Version
163
173
  version: '0'
164
174
  requirements: []
165
- rubygems_version: 3.0.3
175
+ rubygems_version: 3.1.6
166
176
  signing_key:
167
177
  specification_version: 4
168
178
  summary: Amazon S3 output plugin for Fluentd event collector
169
- test_files:
170
- - test/test_in_s3.rb
171
- - test/test_out_s3.rb
179
+ test_files: []