fluent-plugin-s3 1.8.4 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e3de70d5f20b42bd86ce425d436b2af2fabcb12bcd11756d6ccb311d32f117e7
4
- data.tar.gz: 8896f6755b9c7cb6c726950493164bf4275cf0776b16cd37dd660ffe511e2f16
3
+ metadata.gz: 4ad2143d493bcb5b5805df10225cdb068bffc8ae24a1a6838bde343b18a845ed
4
+ data.tar.gz: faf67f2a0e65e73b385944113a485f509365b3b9f1e8059ef1b06df8c6719607
5
5
  SHA512:
6
- metadata.gz: d9bd5499f054de826654d44208858684917d226f229fb4eb3e9f04cfe09dad089345fa16333ed05848499e4dd4b5ab321b36e7dc6dec733f0f2110f280b07512
7
- data.tar.gz: f21db8ac19f9a3e05502f39f0e32d3740a903431800df70871539404fb3fdb4226b0c974c0f65d2e5f7e32f177a78df90a8529314fc84a9c1de4d051bac577e0
6
+ metadata.gz: 51c6cd59240d0d2055260b779e9887ab5a8fc0839246cf9154566271de72e30a73ba0587ae2945dd275c1b16b71c43e165a77e43419e73279fbb938904790a48
7
+ data.tar.gz: ff8aa7a1c32cf273527165eee2dec5df4df217d20e5047c53908469bf13fd8baf6b803781e30d09126d83e0ba4658bf8b28ae2b034e403f627ae9a9e98b312a3
@@ -4,3 +4,20 @@ updates:
4
4
  directory: '/'
5
5
  schedule:
6
6
  interval: 'monthly'
7
+ groups:
8
+ # PR: "Security update [package] from [old] to [new]"
9
+ # This PR should be merged in hurry
10
+ security-updates:
11
+ applies-to: security-updates
12
+ patterns:
13
+ - '*'
14
+
15
+ # PR: "Bump [package] from [old] to [new]"
16
+ # No need to be merged this PR in hurry. It is enough to merge
17
+ # once in a month.
18
+ monthly-updates:
19
+ applies-to: version-updates
20
+ patterns:
21
+ - '*'
22
+ # Allow to create PR both of security and normal updates.
23
+ open-pull-requests-limit: 1
@@ -4,25 +4,34 @@ on:
4
4
  branches: [master]
5
5
  pull_request:
6
6
  branches: [master]
7
+ schedule:
8
+ - cron: '0 0 1 * *'
7
9
  jobs:
10
+ ruby-versions:
11
+ uses: ruby/actions/.github/workflows/ruby_versions.yml@master
12
+ with:
13
+ engine: cruby
14
+ min_version: 2.7
8
15
  build:
16
+ needs: ruby-versions
9
17
  runs-on: ${{ matrix.os }}
10
18
  strategy:
11
19
  fail-fast: false
12
20
  matrix:
13
- ruby: [ '4.0', '3.4', '3.3', '3.2', '3.1', '3.0', '2.7' ]
21
+ ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }}
22
+ exclude:
23
+ - ruby: head
14
24
  os:
15
25
  - ubuntu-latest
16
26
  name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
17
27
  steps:
18
- - uses: actions/checkout@v6
19
- - uses: ruby/setup-ruby@v1
28
+ - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
29
+ - uses: ruby/setup-ruby@afeafc3d1ab54a631816aba4c914a0081c12ff2f # v1.310.0
20
30
  with:
21
31
  ruby-version: ${{ matrix.ruby }}
22
32
  - name: unit testing
23
33
  env:
24
34
  CI: true
25
35
  run: |
26
- gem install rake
27
36
  bundle install --jobs 4 --retry 3
28
37
  bundle exec rake test
@@ -6,8 +6,11 @@ on:
6
6
  jobs:
7
7
  stale:
8
8
  runs-on: ubuntu-latest
9
+ permissions:
10
+ issues: write
11
+ pull-requests: write
9
12
  steps:
10
- - uses: actions/stale@v10
13
+ - uses: actions/stale@eb5cf3af3ac0a1aa4c9c45633dd1ae542a27a899 # v10.3.0
11
14
  with:
12
15
  repo-token: ${{ secrets.GITHUB_TOKEN }}
13
16
  days-before-stale: 30
data/ChangeLog CHANGED
@@ -1,3 +1,7 @@
1
+ Release 1.8.5 - 2026/06/25
2
+
3
+ * in_s3: enforce size limits on decompressed payloads
4
+
1
5
  Release 1.8.4 - 2026/03/04
2
6
 
3
7
  * in_s3: add aws_profile / aws_credential_process parameters for credencials (GitHub: #464)
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Amazon S3 plugin for [Fluentd](http://github.com/fluent/fluentd)
2
2
 
3
- [<img src="https://travis-ci.org/fluent/fluent-plugin-s3.svg?branch=master"
4
- alt="Build Status" />](https://travis-ci.org/fluent/fluent-plugin-s3) [<img
3
+ [![linux](https://github.com/fluent/fluent-plugin-s3/actions/workflows/linux.yml/badge.svg)](https://github.com/fluent/fluent-plugin-s3/actions/workflows/linux.yml) [<img
5
4
  src="https://codeclimate.com/github/fluent/fluent-plugin-s3/badges/gpa.svg"
6
5
  />](https://codeclimate.com/github/fluent/fluent-plugin-s3)
7
6
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.8.4
1
+ 1.8.5
data/docs/input.md CHANGED
@@ -29,6 +29,11 @@ See also [Configuration: credentials](credentials.md) for common comprehensive p
29
29
 
30
30
  Whether or not object metadata should be added to the record. Defaults to `false`. See below for details.
31
31
 
32
+ ## decompression_size_limit
33
+
34
+ The size limit of the decompressed data. The default is `256m` (256 MiB).
35
+ This parameter is designed to prevent memory exhaustion when extracting highly compressed objects from S3.
36
+
32
37
  ## match_regexp
33
38
 
34
39
  If provided, process the S3 object only if its keys matches the regular expression
data/docs/output.md CHANGED
@@ -209,7 +209,7 @@ parquet file page size. default: 8192 bytes
209
209
 
210
210
  ### parquet_row_group_size
211
211
 
212
- parquet file row group size. default: 128 MB
212
+ parquet file row group size. default: 128 MiB
213
213
 
214
214
  ### record_type
215
215
 
@@ -22,11 +22,14 @@ module Fluent::Plugin
22
22
  end
23
23
 
24
24
  DEFAULT_PARSE_TYPE = "none"
25
+ DECOMPRESSION_SIZE_LIMIT = 256 * 1024 * 1024
25
26
 
26
27
  desc "Use aws-sdk-ruby bundled cert"
27
28
  config_param :use_bundled_cert, :bool, default: false
28
29
  desc "Add object metadata to the records parsed out of a given object"
29
30
  config_param :add_object_metadata, :bool, default: false
31
+ desc 'The size limit of the extracted element.'
32
+ config_param :decompression_size_limit, :size, default: DECOMPRESSION_SIZE_LIMIT
30
33
  desc "AWS access key id"
31
34
  config_param :aws_key_id, :string, default: nil, secret: true
32
35
  desc "AWS secret key."
@@ -159,7 +162,7 @@ module Fluent::Plugin
159
162
 
160
163
  Aws.use_bundled_cert! if @use_bundled_cert
161
164
 
162
- @extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log)
165
+ @extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log, decompression_size_limit: @decompression_size_limit)
163
166
  @extractor.configure(conf)
164
167
 
165
168
  @parser = parser_create(conf: parser_config, default_type: DEFAULT_PARSE_TYPE)
@@ -210,7 +213,7 @@ module Fluent::Plugin
210
213
  begin
211
214
  @poller.poll(options) do |message|
212
215
  begin
213
- body = Yajl.load(message.body)
216
+ body = JSON.parse(message.body)
214
217
  log.debug(body)
215
218
  next unless is_valid_queue(body) # skip test queue
216
219
  if @match_regexp
@@ -365,13 +368,18 @@ module Fluent::Plugin
365
368
  end
366
369
 
367
370
  class Extractor
371
+ class SizeLimitError < StandardError; end
372
+
368
373
  include Fluent::Configurable
369
374
 
370
375
  attr_reader :log
371
376
 
372
- def initialize(log: $log, **options)
377
+ BYTES_TO_READ = 64 * 1024
378
+
379
+ def initialize(log: $log, decompression_size_limit: DECOMPRESSION_SIZE_LIMIT, **options)
373
380
  super()
374
381
  @log = log
382
+ @decompression_size_limit = decompression_size_limit
375
383
  end
376
384
 
377
385
  def configure(conf)
@@ -399,6 +407,38 @@ module Fluent::Plugin
399
407
  raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
400
408
  end
401
409
  end
410
+
411
+ def extract_with_command(command, io, tempfile_basename = "s3-extractor-tmp")
412
+ path = if io.respond_to?(:path)
413
+ io.path
414
+ else
415
+ extractor = TextExtractor.new(log: log, decompression_size_limit: @decompression_size_limit)
416
+ temp = Tempfile.new(tempfile_basename)
417
+ temp.write(extractor.extract(io))
418
+ temp.close
419
+ temp.path
420
+ end
421
+
422
+ out = ''
423
+ begin
424
+ Open3.popen3("#{command} #{path}") do |stdin, stdout, stderr, wait_thr|
425
+ stdin.close
426
+ while (chunk = stdout.read(BYTES_TO_READ))
427
+ out << chunk
428
+ if out.bytesize > @decompression_size_limit
429
+ Process.kill("TERM", wait_thr.pid) rescue nil
430
+ raise SizeLimitError, "Extracted data exceeds limit of #{@decompression_size_limit} bytes"
431
+ end
432
+ end
433
+
434
+ if wait_thr.value.success?
435
+ out
436
+ else
437
+ raise "Command execution failed: #{command} (status: #{wait_thr.value})"
438
+ end
439
+ end
440
+ end
441
+ end
402
442
  end
403
443
 
404
444
  class GzipExtractor < Extractor
@@ -414,19 +454,25 @@ module Fluent::Plugin
414
454
  # https://bugs.ruby-lang.org/issues/11180
415
455
  # https://github.com/exAspArk/multiple_files_gzip_reader
416
456
  def extract(io)
417
- parts = []
457
+ out = ''
418
458
  loop do
419
459
  unused = nil
420
460
  Zlib::GzipReader.wrap(io) do |gz|
421
- parts << gz.read
461
+ while (chunk = gz.read(BYTES_TO_READ))
462
+ out << chunk
463
+ if out.bytesize > @decompression_size_limit
464
+ raise SizeLimitError, "Extracted data exceeds limit of #{@decompression_size_limit} bytes"
465
+ end
466
+ end
422
467
  unused = gz.unused
423
468
  gz.finish
424
469
  end
425
470
  io.pos -= unused ? unused.length : 0
426
471
  break if io.eof?
427
472
  end
428
- io.close
429
- parts.join
473
+ out
474
+ ensure
475
+ io.close unless io.closed?
430
476
  end
431
477
  end
432
478
 
@@ -440,7 +486,14 @@ module Fluent::Plugin
440
486
  end
441
487
 
442
488
  def extract(io)
443
- io.read
489
+ out = ''
490
+ while (chunk = io.read(BYTES_TO_READ))
491
+ out << chunk
492
+ if out.bytesize > @decompression_size_limit
493
+ raise SizeLimitError, "Extracted data exceeds limit of #{@decompression_size_limit} bytes"
494
+ end
495
+ end
496
+ out
444
497
  end
445
498
  end
446
499
 
@@ -19,26 +19,15 @@ module Fluent::Plugin
19
19
  end
20
20
 
21
21
  def extract(io)
22
- path = if io.respond_to?(:path)
23
- io.path
24
- else
25
- temp = Tempfile.new("gzip-temp")
26
- temp.write(io.read)
27
- temp.close
28
- temp.path
29
- end
30
-
31
- stdout, succeeded = Open3.capture2("gzip #{@command_parameter} #{path}")
32
- if succeeded
33
- stdout
34
- else
35
- log.warn "failed to execute gzip command. Fallback to GzipReader. status = #{succeeded}"
36
- begin
37
- io.rewind
38
- Zlib::GzipReader.wrap(io) do |gz|
39
- gz.read
40
- end
41
- end
22
+ begin
23
+ extract_with_command("gzip #{@command_parameter}", io, "gzip-temp")
24
+ rescue SizeLimitError
25
+ raise
26
+ rescue => e
27
+ log.warn "gzip command execution failed: #{e.message}. Fallback to GzipExtractor."
28
+ io.rewind
29
+ extractor = GzipExtractor.new(log: log, decompression_size_limit: @decompression_size_limit)
30
+ extractor.extract(io)
42
31
  end
43
32
  end
44
33
  end
@@ -19,19 +19,11 @@ module Fluent::Plugin
19
19
  end
20
20
 
21
21
  def extract(io)
22
- path = if io.respond_to?(path)
23
- io.path
24
- else
25
- temp = Tempfile.new("xz-temp")
26
- temp.write(io.read)
27
- temp.close
28
- temp.path
29
- end
30
-
31
- stdout, succeeded = Open3.capture2("xz #{@command_parameter} #{path}")
32
- if succeeded
33
- stdout
34
- else
22
+ begin
23
+ extract_with_command("xz #{@command_parameter}", io, "xz-temp")
24
+ rescue SizeLimitError
25
+ raise
26
+ rescue
35
27
  raise "Failed to extract #{path} with xz command."
36
28
  end
37
29
  end
@@ -19,19 +19,11 @@ module Fluent::Plugin
19
19
  end
20
20
 
21
21
  def extract(io)
22
- path = if io.respond_to?(path)
23
- io.path
24
- else
25
- temp = Tempfile.new("lzop-temp")
26
- temp.write(io.read)
27
- temp.close
28
- temp.path
29
- end
30
-
31
- stdout, succeeded = Open3.capture2("lzop #{@command_parameter} #{path}")
32
- if succeeded
33
- stdout
34
- else
22
+ begin
23
+ extract_with_command("lzop #{@command_parameter}", io, "lzop-temp")
24
+ rescue SizeLimitError
25
+ raise
26
+ rescue
35
27
  raise "Failed to extract #{path} with lzop command."
36
28
  end
37
29
  end
data/test/test_in_s3.rb CHANGED
@@ -341,7 +341,7 @@ EOS
341
341
  }
342
342
  ]
343
343
  }
344
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
344
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
345
345
  @sqs_poller.get_messages(anything, anything) do |config, stats|
346
346
  config.before_request.call(stats) if config.before_request
347
347
  stats.request_count += 1
@@ -376,7 +376,7 @@ EOS
376
376
  }
377
377
  ]
378
378
  }
379
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
379
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
380
380
  @sqs_poller.get_messages(anything, anything) do |config, stats|
381
381
  config.before_request.call(stats) if config.before_request
382
382
  stats.request_count += 1
@@ -411,7 +411,7 @@ EOS
411
411
  }
412
412
  ]
413
413
  }
414
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
414
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
415
415
  @sqs_poller.get_messages(anything, anything) do |config, stats|
416
416
  config.before_request.call(stats) if config.before_request
417
417
  stats.request_count += 1
@@ -446,7 +446,7 @@ EOS
446
446
  }
447
447
  ]
448
448
  }
449
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
449
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
450
450
  @sqs_poller.get_messages(anything, anything) do |config, stats|
451
451
  config.before_request.call(stats) if config.before_request
452
452
  stats.request_count += 1
@@ -481,7 +481,7 @@ EOS
481
481
  }
482
482
  ]
483
483
  }
484
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
484
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
485
485
  @sqs_poller.get_messages(anything, anything) do |config, stats|
486
486
  config.before_request.call(stats) if config.before_request
487
487
  stats.request_count += 1
@@ -521,7 +521,7 @@ EOS
521
521
  }
522
522
  ]
523
523
  }
524
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
524
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
525
525
  @sqs_poller.get_messages(anything, anything) do |config, stats|
526
526
  config.before_request.call(stats) if config.before_request
527
527
  stats.request_count += 1
@@ -569,7 +569,7 @@ EOS
569
569
  }
570
570
  ]
571
571
  }
572
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
572
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
573
573
  @sqs_poller.get_messages(anything, anything) do |config, stats|
574
574
  config.before_request.call(stats) if config.before_request
575
575
  stats.request_count += 1
@@ -620,7 +620,7 @@ EOS
620
620
  }
621
621
  ]
622
622
  }
623
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
623
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
624
624
  @sqs_poller.get_messages(anything, anything) do |config, stats|
625
625
  config.before_request.call(stats) if config.before_request
626
626
  stats.request_count += 1
@@ -640,6 +640,91 @@ EOS
640
640
  assert_equal(expected_records, events.map {|_tag, _time, record| record })
641
641
  end
642
642
 
643
+ data(
644
+ "limit_gzip" => { type: "gzip", input: "StringIO", limit: 10, expected_error: true },
645
+ "limit_text" => { type: "text", input: "StringIO", limit: 10, expected_error: true },
646
+ "limit_gzip_command1" => { type: "gzip_command", input: "StringIO", limit: 10, expected_error: true },
647
+ "limit_gzip_command2" => { type: "gzip_command", input: "Tempfile", limit: 10, expected_error: true },
648
+ "normal_gzip_command" => { type: "gzip_command", input: "Tempfile", limit: 100, expected_error: false },
649
+ )
650
+ def test_decompression_size_limit(data)
651
+ store_type = data[:type]
652
+ input_type = data[:input]
653
+ limit = data[:limit]
654
+ setup_mocks
655
+
656
+ config = <<~CONF
657
+ #{CONFIG}
658
+ check_apikey_on_start false
659
+ store_as #{store_type}
660
+ format none
661
+ decompression_size_limit #{limit}
662
+ CONF
663
+
664
+ d = create_driver(config)
665
+
666
+ s3_object = stub(Object.new)
667
+ s3_response = stub(Object.new)
668
+ s3_response.body {
669
+ content = "#{'a'*10}\n#{'b'*10}\n"
670
+
671
+ # Switching between Tempfile and StringIO to cover both branches of the
672
+ # `io.respond_to?(:path)` condition in `extract_with_command`.
673
+ # This ensures that:
674
+ # 1. The StringIO route correctly uses TextExtractor to create a protected temporary file.
675
+ # 2. The Tempfile route correctly limits the output size during Open3.popen3 execution.
676
+ io = (input_type == "Tempfile") ? Tempfile.new : StringIO.new
677
+
678
+ case store_type
679
+ when "gzip", "gzip_command"
680
+ io.binmode
681
+ Zlib::GzipWriter.wrap(io) { |gz|
682
+ gz.write content
683
+ gz.finish
684
+ }
685
+ when "text"
686
+ io.write content
687
+ end
688
+
689
+ io.rewind
690
+ io
691
+ }
692
+ s3_object.get { s3_response }
693
+ @s3_bucket.object(anything).at_least(1) { s3_object }
694
+
695
+ body = {
696
+ "Records" => [
697
+ {
698
+ "s3" => {
699
+ "object" => {
700
+ "key" => "test_key"
701
+ }
702
+ }
703
+ }
704
+ ]
705
+ }
706
+ message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
707
+ @sqs_poller.get_messages(anything, anything) do |config, stats|
708
+ config.before_request.call(stats) if config.before_request
709
+ stats.request_count += 1
710
+ if stats.request_count >= 1
711
+ d.instance.instance_variable_set(:@running, false)
712
+ end
713
+ [message]
714
+ end
715
+ d.run
716
+
717
+ if data[:expected_error]
718
+ # Verify the protection mechanism: ensure SizeLimitError is logged.
719
+ assert_true d.logs.any? { |l| l.include?("Extracted data exceeds limit of #{limit} bytes") }
720
+ else
721
+ # Verify the normal execution path: ensure data is correctly extracted via Open3.popen3.
722
+ expected_records = [{ "message" => "#{'a'*10}\n" }, { "message" => "#{'b'*10}\n" }]
723
+ assert_equal(expected_records, d.events.map {|_tag, _time, record| record })
724
+ assert_false d.logs.any? { |l| l.include?("error_class") }
725
+ end
726
+ end
727
+
643
728
  def test_regexp_matching
644
729
  setup_mocks
645
730
  d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nmatch_regexp .*_key?")
@@ -661,7 +746,7 @@ EOS
661
746
  }
662
747
  ]
663
748
  }
664
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
749
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
665
750
  @sqs_poller.get_messages(anything, anything) do |config, stats|
666
751
  config.before_request.call(stats) if config.before_request
667
752
  stats.request_count += 1
@@ -690,7 +775,7 @@ EOS
690
775
  }
691
776
  ]
692
777
  }
693
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
778
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
694
779
  @sqs_poller.get_messages(anything, anything) do |config, stats|
695
780
  config.before_request.call(stats) if config.before_request
696
781
  stats.request_count += 1
@@ -735,7 +820,7 @@ EOS
735
820
  }
736
821
  }
737
822
 
738
- message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
823
+ message = Struct::StubMessage.new(1, 1, JSON.generate(body))
739
824
  @sqs_poller.get_messages(anything, anything) do |config, stats|
740
825
  config.before_request.call(stats) if config.before_request
741
826
  stats.request_count += 1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.4
4
+ version: 1.8.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -214,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
214
214
  - !ruby/object:Gem::Version
215
215
  version: '0'
216
216
  requirements: []
217
- rubygems_version: 4.0.6
217
+ rubygems_version: 4.0.10
218
218
  specification_version: 4
219
219
  summary: Amazon S3 output plugin for Fluentd event collector
220
220
  test_files: