RubyGems - fluent-plugin-s3 - Versions diffs - 1.8.4 → 1.8.5 - Mend

fluent-plugin-s3 1.8.4 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/.github/dependabot.yml +17 -0
data/.github/workflows/linux.yml +13 -4
data/.github/workflows/stale-actions.yml +4 -1
data/ChangeLog +4 -0
data/README.md +1 -2
data/VERSION +1 -1
data/docs/input.md +5 -0
data/docs/output.md +1 -1
data/lib/fluent/plugin/in_s3.rb +61 -8
data/lib/fluent/plugin/s3_extractor_gzip_command.rb +9 -20
data/lib/fluent/plugin/s3_extractor_lzma2.rb +5 -13
data/lib/fluent/plugin/s3_extractor_lzo.rb +5 -13
data/test/test_in_s3.rb +96 -11
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e3de70d5f20b42bd86ce425d436b2af2fabcb12bcd11756d6ccb311d32f117e7
-  data.tar.gz: 8896f6755b9c7cb6c726950493164bf4275cf0776b16cd37dd660ffe511e2f16
+  metadata.gz: 4ad2143d493bcb5b5805df10225cdb068bffc8ae24a1a6838bde343b18a845ed
+  data.tar.gz: faf67f2a0e65e73b385944113a485f509365b3b9f1e8059ef1b06df8c6719607
 SHA512:
-  metadata.gz: d9bd5499f054de826654d44208858684917d226f229fb4eb3e9f04cfe09dad089345fa16333ed05848499e4dd4b5ab321b36e7dc6dec733f0f2110f280b07512
-  data.tar.gz: f21db8ac19f9a3e05502f39f0e32d3740a903431800df70871539404fb3fdb4226b0c974c0f65d2e5f7e32f177a78df90a8529314fc84a9c1de4d051bac577e0
+  metadata.gz: 51c6cd59240d0d2055260b779e9887ab5a8fc0839246cf9154566271de72e30a73ba0587ae2945dd275c1b16b71c43e165a77e43419e73279fbb938904790a48
+  data.tar.gz: ff8aa7a1c32cf273527165eee2dec5df4df217d20e5047c53908469bf13fd8baf6b803781e30d09126d83e0ba4658bf8b28ae2b034e403f627ae9a9e98b312a3

data/.github/dependabot.yml CHANGED Viewed

@@ -4,3 +4,20 @@ updates:
     directory: '/'
     schedule:
       interval: 'monthly'
+    groups:
+      # PR: "Security update [package] from [old] to [new]"
+      # This PR should be merged in hurry
+      security-updates:
+        applies-to: security-updates
+        patterns:
+          - '*'
+      # PR: "Bump [package] from [old] to [new]"
+      # No need to be merged this PR in hurry. It is enough to merge
+      # once in a month.
+      monthly-updates:
+        applies-to: version-updates
+        patterns:
+          - '*'
+    # Allow to create PR both of security and normal updates.
+    open-pull-requests-limit: 1

data/.github/workflows/linux.yml CHANGED Viewed

@@ -4,25 +4,34 @@ on:
     branches: [master]
   pull_request:
     branches: [master]
+  schedule:
+    - cron: '0 0 1 * *'
 jobs:
+  ruby-versions:
+    uses: ruby/actions/.github/workflows/ruby_versions.yml@master
+    with:
+      engine: cruby
+      min_version: 2.7
   build:
+    needs: ruby-versions
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        ruby: [ '4.0', '3.4', '3.3', '3.2', '3.1', '3.0', '2.7' ]
+        ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }}
+        exclude:
+          - ruby: head
         os:
           - ubuntu-latest
     name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v6
-    - uses: ruby/setup-ruby@v1
+    - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+    - uses: ruby/setup-ruby@afeafc3d1ab54a631816aba4c914a0081c12ff2f # v1.310.0
       with:
         ruby-version: ${{ matrix.ruby }}
     - name: unit testing
       env:
         CI: true
       run: |
-        gem install rake
         bundle install --jobs 4 --retry 3
         bundle exec rake test

data/.github/workflows/stale-actions.yml CHANGED Viewed

@@ -6,8 +6,11 @@ on:
 jobs:
   stale:
     runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
     steps:
-    - uses: actions/stale@v10
+    - uses: actions/stale@eb5cf3af3ac0a1aa4c9c45633dd1ae542a27a899 # v10.3.0
       with:
         repo-token: ${{ secrets.GITHUB_TOKEN }}
         days-before-stale: 30

data/ChangeLog CHANGED Viewed

@@ -1,3 +1,7 @@
+Release 1.8.5 - 2026/06/25
+  * in_s3: enforce size limits on decompressed payloads
 Release 1.8.4 - 2026/03/04
   * in_s3: add aws_profile / aws_credential_process parameters for credencials (GitHub: #464)

data/README.md CHANGED Viewed

@@ -1,7 +1,6 @@
 # Amazon S3 plugin for [Fluentd](http://github.com/fluent/fluentd)
-[<img src="https://travis-ci.org/fluent/fluent-plugin-s3.svg?branch=master"
-alt="Build Status" />](https://travis-ci.org/fluent/fluent-plugin-s3) [<img
+[![linux](https://github.com/fluent/fluent-plugin-s3/actions/workflows/linux.yml/badge.svg)](https://github.com/fluent/fluent-plugin-s3/actions/workflows/linux.yml) [<img
 src="https://codeclimate.com/github/fluent/fluent-plugin-s3/badges/gpa.svg"
 />](https://codeclimate.com/github/fluent/fluent-plugin-s3)

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.8.4
1	+ 1.8.5

data/docs/input.md CHANGED Viewed

@@ -29,6 +29,11 @@ See also [Configuration: credentials](credentials.md) for common comprehensive p
 Whether or not object metadata should be added to the record. Defaults to `false`. See below for details.
+## decompression_size_limit
+The size limit of the decompressed data. The default is `256m` (256 MiB).
+This parameter is designed to prevent memory exhaustion when extracting highly compressed objects from S3.
 ## match_regexp
 If provided, process the S3 object only if its keys matches the regular expression

data/docs/output.md CHANGED Viewed

@@ -209,7 +209,7 @@ parquet file page size. default: 8192 bytes
 ### parquet_row_group_size
-parquet file row group size. default: 128 MB
+parquet file row group size. default: 128 MiB
 ### record_type

data/lib/fluent/plugin/in_s3.rb CHANGED Viewed

@@ -22,11 +22,14 @@ module Fluent::Plugin
     end
     DEFAULT_PARSE_TYPE = "none"
+    DECOMPRESSION_SIZE_LIMIT = 256 * 1024 * 1024
     desc "Use aws-sdk-ruby bundled cert"
     config_param :use_bundled_cert, :bool, default: false
     desc "Add object metadata to the records parsed out of a given object"
     config_param :add_object_metadata, :bool, default: false
+    desc 'The size limit of the extracted element.'
+    config_param :decompression_size_limit, :size, default: DECOMPRESSION_SIZE_LIMIT
     desc "AWS access key id"
     config_param :aws_key_id, :string, default: nil, secret: true
     desc "AWS secret key."
@@ -159,7 +162,7 @@ module Fluent::Plugin
       Aws.use_bundled_cert! if @use_bundled_cert
-      @extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log)
+      @extractor = EXTRACTOR_REGISTRY.lookup(@store_as).new(log: log, decompression_size_limit: @decompression_size_limit)
       @extractor.configure(conf)
       @parser = parser_create(conf: parser_config, default_type: DEFAULT_PARSE_TYPE)
@@ -210,7 +213,7 @@ module Fluent::Plugin
       begin
         @poller.poll(options) do |message|
           begin
-            body = Yajl.load(message.body)
+            body = JSON.parse(message.body)
             log.debug(body)
             next unless is_valid_queue(body) # skip test queue
             if @match_regexp
@@ -365,13 +368,18 @@ module Fluent::Plugin
     end
     class Extractor
+      class SizeLimitError < StandardError; end
       include Fluent::Configurable
       attr_reader :log
-      def initialize(log: $log, **options)
+      BYTES_TO_READ = 64 * 1024
+      def initialize(log: $log, decompression_size_limit: DECOMPRESSION_SIZE_LIMIT, **options)
         super()
         @log = log
+        @decompression_size_limit = decompression_size_limit
       end
       def configure(conf)
@@ -399,6 +407,38 @@ module Fluent::Plugin
           raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
         end
       end
+      def extract_with_command(command, io, tempfile_basename = "s3-extractor-tmp")
+        path = if io.respond_to?(:path)
+                 io.path
+               else
+                 extractor = TextExtractor.new(log: log, decompression_size_limit: @decompression_size_limit)
+                 temp = Tempfile.new(tempfile_basename)
+                 temp.write(extractor.extract(io))
+                 temp.close
+                 temp.path
+               end
+        out = ''
+        begin
+          Open3.popen3("#{command} #{path}") do |stdin, stdout, stderr, wait_thr|
+            stdin.close
+            while (chunk = stdout.read(BYTES_TO_READ))
+              out << chunk
+              if out.bytesize > @decompression_size_limit
+                Process.kill("TERM", wait_thr.pid) rescue nil
+                raise SizeLimitError, "Extracted data exceeds limit of #{@decompression_size_limit} bytes"
+              end
+            end
+            if wait_thr.value.success?
+              out
+            else
+              raise "Command execution failed: #{command} (status: #{wait_thr.value})"
+            end
+          end
+        end
+      end
     end
     class GzipExtractor < Extractor
@@ -414,19 +454,25 @@ module Fluent::Plugin
       # https://bugs.ruby-lang.org/issues/11180
       # https://github.com/exAspArk/multiple_files_gzip_reader
       def extract(io)
-        parts = []
+        out = ''
         loop do
           unused = nil
           Zlib::GzipReader.wrap(io) do |gz|
-            parts << gz.read
+            while (chunk = gz.read(BYTES_TO_READ))
+              out << chunk
+              if out.bytesize > @decompression_size_limit
+                raise SizeLimitError, "Extracted data exceeds limit of #{@decompression_size_limit} bytes"
+              end
+            end
             unused = gz.unused
             gz.finish
           end
           io.pos -= unused ? unused.length : 0
           break if io.eof?
         end
-        io.close
-        parts.join
+        out
+      ensure
+        io.close unless io.closed?
       end
     end
@@ -440,7 +486,14 @@ module Fluent::Plugin
       end
       def extract(io)
-        io.read
+        out = ''
+        while (chunk = io.read(BYTES_TO_READ))
+          out << chunk
+          if out.bytesize > @decompression_size_limit
+            raise SizeLimitError, "Extracted data exceeds limit of #{@decompression_size_limit} bytes"
+          end
+        end
+        out
       end
     end

data/lib/fluent/plugin/s3_extractor_gzip_command.rb CHANGED Viewed

@@ -19,26 +19,15 @@ module Fluent::Plugin
       end
       def extract(io)
-        path = if io.respond_to?(:path)
-                 io.path
-               else
-                 temp = Tempfile.new("gzip-temp")
-                 temp.write(io.read)
-                 temp.close
-                 temp.path
-               end
-        stdout, succeeded = Open3.capture2("gzip #{@command_parameter} #{path}")
-        if succeeded
-          stdout
-        else
-          log.warn "failed to execute gzip command. Fallback to GzipReader. status = #{succeeded}"
-          begin
-            io.rewind
-            Zlib::GzipReader.wrap(io) do |gz|
-              gz.read
-            end
-          end
+        begin
+          extract_with_command("gzip #{@command_parameter}", io, "gzip-temp")
+        rescue SizeLimitError
+          raise
+        rescue => e
+          log.warn "gzip command execution failed: #{e.message}. Fallback to GzipExtractor."
+          io.rewind
+          extractor = GzipExtractor.new(log: log, decompression_size_limit: @decompression_size_limit)
+          extractor.extract(io)
         end
       end
     end

data/lib/fluent/plugin/s3_extractor_lzma2.rb CHANGED Viewed

@@ -19,19 +19,11 @@ module Fluent::Plugin
       end
       def extract(io)
-        path = if io.respond_to?(path)
-                 io.path
-               else
-                 temp = Tempfile.new("xz-temp")
-                 temp.write(io.read)
-                 temp.close
-                 temp.path
-               end
-        stdout, succeeded = Open3.capture2("xz #{@command_parameter} #{path}")
-        if succeeded
-          stdout
-        else
+        begin
+          extract_with_command("xz #{@command_parameter}", io, "xz-temp")
+        rescue SizeLimitError
+          raise
+        rescue
           raise "Failed to extract #{path} with xz command."
         end
       end

data/lib/fluent/plugin/s3_extractor_lzo.rb CHANGED Viewed

@@ -19,19 +19,11 @@ module Fluent::Plugin
       end
       def extract(io)
-        path = if io.respond_to?(path)
-                 io.path
-               else
-                 temp = Tempfile.new("lzop-temp")
-                 temp.write(io.read)
-                 temp.close
-                 temp.path
-               end
-        stdout, succeeded = Open3.capture2("lzop #{@command_parameter} #{path}")
-        if succeeded
-          stdout
-        else
+        begin
+          extract_with_command("lzop #{@command_parameter}", io, "lzop-temp")
+        rescue SizeLimitError
+          raise
+        rescue
           raise "Failed to extract #{path} with lzop command."
         end
       end

data/test/test_in_s3.rb CHANGED Viewed

@@ -341,7 +341,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -376,7 +376,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -411,7 +411,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -446,7 +446,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -481,7 +481,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -521,7 +521,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -569,7 +569,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -620,7 +620,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -640,6 +640,91 @@ EOS
     assert_equal(expected_records, events.map {|_tag, _time, record| record })
   end
+  data(
+    "limit_gzip"          => { type: "gzip",         input: "StringIO", limit: 10,  expected_error: true },
+    "limit_text"          => { type: "text",         input: "StringIO", limit: 10,  expected_error: true },
+    "limit_gzip_command1" => { type: "gzip_command", input: "StringIO", limit: 10,  expected_error: true },
+    "limit_gzip_command2" => { type: "gzip_command", input: "Tempfile", limit: 10,  expected_error: true },
+    "normal_gzip_command" => { type: "gzip_command", input: "Tempfile", limit: 100, expected_error: false },
+    )
+  def test_decompression_size_limit(data)
+    store_type = data[:type]
+    input_type = data[:input]
+    limit = data[:limit]
+    setup_mocks
+    config = <<~CONF
+      #{CONFIG}
+      check_apikey_on_start false
+      store_as #{store_type}
+      format none
+      decompression_size_limit #{limit}
+    CONF
+    d = create_driver(config)
+    s3_object = stub(Object.new)
+    s3_response = stub(Object.new)
+    s3_response.body {
+      content = "#{'a'*10}\n#{'b'*10}\n"
+      # Switching between Tempfile and StringIO to cover both branches of the
+      # `io.respond_to?(:path)` condition in `extract_with_command`.
+      # This ensures that:
+      # 1. The StringIO route correctly uses TextExtractor to create a protected temporary file.
+      # 2. The Tempfile route correctly limits the output size during Open3.popen3 execution.
+      io = (input_type == "Tempfile") ? Tempfile.new : StringIO.new
+      case store_type
+      when "gzip", "gzip_command"
+        io.binmode
+        Zlib::GzipWriter.wrap(io) { |gz|
+          gz.write content
+          gz.finish
+        }
+      when "text"
+        io.write content
+      end
+      io.rewind
+      io
+    }
+    s3_object.get { s3_response }
+    @s3_bucket.object(anything).at_least(1) { s3_object }
+    body = {
+      "Records" => [
+        {
+          "s3" => {
+            "object" => {
+              "key" => "test_key"
+            }
+          }
+        }
+      ]
+    }
+    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    @sqs_poller.get_messages(anything, anything) do |config, stats|
+      config.before_request.call(stats) if config.before_request
+      stats.request_count += 1
+      if stats.request_count >= 1
+        d.instance.instance_variable_set(:@running, false)
+      end
+      [message]
+    end
+    d.run
+    if data[:expected_error]
+      # Verify the protection mechanism: ensure SizeLimitError is logged.
+      assert_true d.logs.any? { |l| l.include?("Extracted data exceeds limit of #{limit} bytes") }
+    else
+      # Verify the normal execution path: ensure data is correctly extracted via Open3.popen3.
+      expected_records = [{ "message" => "#{'a'*10}\n" }, { "message" => "#{'b'*10}\n" }]
+      assert_equal(expected_records, d.events.map {|_tag, _time, record| record })
+      assert_false d.logs.any? { |l| l.include?("error_class") }
+    end
+  end
   def test_regexp_matching
     setup_mocks
     d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nmatch_regexp .*_key?")
@@ -661,7 +746,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -690,7 +775,7 @@ EOS
         }
       ]
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1
@@ -735,7 +820,7 @@ EOS
       }
     }
-    message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
+    message = Struct::StubMessage.new(1, 1, JSON.generate(body))
     @sqs_poller.get_messages(anything, anything) do |config, stats|
       config.before_request.call(stats) if config.before_request
       stats.request_count += 1

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-s3
 version: !ruby/object:Gem::Version
-  version: 1.8.4
+  version: 1.8.5
 platform: ruby
 authors:
 - Sadayuki Furuhashi
@@ -214,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 4.0.6
+rubygems_version: 4.0.10
 specification_version: 4
 summary: Amazon S3 output plugin for Fluentd event collector
 test_files: