RubyGems - fluentd - Versions diffs - 1.16.4 → 1.16.5 - Mend

fluentd 1.16.4 → 1.16.5

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +8 -0
data/lib/fluent/plugin/buffer.rb +75 -68
data/lib/fluent/version.rb +1 -1
data/test/plugin/test_buffer.rb +59 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c5692dc727fe7cfb5bc8067382585203321fe93e9cdd8d081c97d31239579a5f
-  data.tar.gz: 067f6e0ade694fc438d600c1d2c7b18f251c6b64e26b4ac4164a9d706ee4ab29
+  metadata.gz: 6557ce41f087cb6d7e0b7e8fa76d10259b8aa3538b917cb0b51bd4836cbdc01b
+  data.tar.gz: 062f08fb0fd8c9b6a8d0e5ff0f2619a94bc7b07de4d94d466c0a20c03459d8f3
 SHA512:
-  metadata.gz: d991817aa42a3a773f58150c160cf4a328020ed64c8f50a5be6f68064bbbcfbea741d54ed692c50a6dec94244c03a60337508ac7b3af9ce022068cb5a4fe2a48
-  data.tar.gz: 9e05ad8b3558a377cba7abe379032a2e6aa8070b85d84c6f3f7e6d9a4a7594015d79868a67a8fe36e16e48e38bb87a3cfc0c936c7936e6a038257b2d9588064c
+  metadata.gz: 42c8786c9abbcb0144fc755492b41b2d6b1eb1dc77b5ffa3bce568554be262733659be42972b6b676dda52581924f29fc5976087368f865a629085a06264be83
+  data.tar.gz: ab0b41ca7db17f7968c512433efe185fb3228856fb53730af1888ed7ee6459962298607c2a87f1d3b722c51fe637a0638f119cf0722fb0a3087c0cf45e91ced8

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,13 @@
 # v1.16
+## Release v1.16.5 - 2024/03/27
+### Bug Fix
+* Buffer: Fix emit error of v1.16.4 sometimes failing to process large data
+  exceeding chunk size limit
+  https://github.com/fluent/fluentd/pull/4447
 ## Release v1.16.4 - 2024/03/14
 ### Bug Fix

data/lib/fluent/plugin/buffer.rb CHANGED Viewed

@@ -764,94 +764,95 @@ module Fluent
         while writing_splits_index < splits.size
           chunk = get_next_chunk.call
           errors = []
+          # The chunk must be locked until being passed to &block.
+          chunk.mon_enter
           modified_chunks << {chunk: chunk, adding_bytesize: 0, errors: errors}
-          chunk.synchronize do
-            raise ShouldRetry unless chunk.writable?
-            staged_chunk_used = true if chunk.staged?
-            original_bytesize = committed_bytesize = chunk.bytesize
-            begin
-              while writing_splits_index < splits.size
-                split = splits[writing_splits_index]
-                formatted_split = format ? format.call(split) : nil
-                if split.size == 1 # Check BufferChunkOverflowError
-                  determined_bytesize = nil
-                  if @compress != :text
-                    determined_bytesize = nil
-                  elsif formatted_split
-                    determined_bytesize = formatted_split.bytesize
-                  elsif split.first.respond_to?(:bytesize)
-                    determined_bytesize = split.first.bytesize
-                  end
+          raise ShouldRetry unless chunk.writable?
+          staged_chunk_used = true if chunk.staged?
-                  if determined_bytesize && determined_bytesize > @chunk_limit_size
-                    # It is a obvious case that BufferChunkOverflowError should be raised here.
-                    # But if it raises here, already processed 'split' or
-                    # the proceeding 'split' will be lost completely.
-                    # So it is a last resort to delay raising such a exception
-                    errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
-                    writing_splits_index += 1
-                    next
-                  end
+          original_bytesize = committed_bytesize = chunk.bytesize
+          begin
+            while writing_splits_index < splits.size
+              split = splits[writing_splits_index]
+              formatted_split = format ? format.call(split) : nil
-                  if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size
-                    # The split will (might) cause size over so keep already processed
-                    # 'split' content here (allow performance regression a bit).
-                    chunk.commit
-                    committed_bytesize = chunk.bytesize
-                  end
+              if split.size == 1 # Check BufferChunkOverflowError
+                determined_bytesize = nil
+                if @compress != :text
+                  determined_bytesize = nil
+                elsif formatted_split
+                  determined_bytesize = formatted_split.bytesize
+                elsif split.first.respond_to?(:bytesize)
+                  determined_bytesize = split.first.bytesize
                 end
-                if format
-                  chunk.concat(formatted_split, split.size)
-                else
-                  chunk.append(split, compress: @compress)
+                if determined_bytesize && determined_bytesize > @chunk_limit_size
+                  # It is a obvious case that BufferChunkOverflowError should be raised here.
+                  # But if it raises here, already processed 'split' or
+                  # the proceeding 'split' will be lost completely.
+                  # So it is a last resort to delay raising such a exception
+                  errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
+                  writing_splits_index += 1
+                  next
                 end
-                adding_bytes = chunk.bytesize - committed_bytesize
-                if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over?
-                  chunk.rollback
+                if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size
+                  # The split will (might) cause size over so keep already processed
+                  # 'split' content here (allow performance regression a bit).
+                  chunk.commit
                   committed_bytesize = chunk.bytesize
+                end
+              end
-                  if split.size == 1 # Check BufferChunkOverflowError again
-                    if adding_bytes > @chunk_limit_size
-                      errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
-                      writing_splits_index += 1
-                      next
-                    else
-                      # As already processed content is kept after rollback, then unstaged chunk should be queued.
-                      # After that, re-process current split again.
-                      # New chunk should be allocated, to do it, modify @stage and so on.
-                      synchronize { @stage.delete(modified_metadata) }
-                      staged_chunk_used = false
-                      chunk.unstaged!
-                      break
-                    end
-                  end
+              if format
+                chunk.concat(formatted_split, split.size)
+              else
+                chunk.append(split, compress: @compress)
+              end
+              adding_bytes = chunk.bytesize - committed_bytesize
-                  if chunk_size_full?(chunk) || split.size == 1
-                    enqueue_chunk_before_retry = true
+              if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over?
+                chunk.rollback
+                committed_bytesize = chunk.bytesize
+                if split.size == 1 # Check BufferChunkOverflowError again
+                  if adding_bytes > @chunk_limit_size
+                    errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
+                    writing_splits_index += 1
+                    next
                   else
-                    splits_count *= 10
+                    # As already processed content is kept after rollback, then unstaged chunk should be queued.
+                    # After that, re-process current split again.
+                    # New chunk should be allocated, to do it, modify @stage and so on.
+                    synchronize { @stage.delete(modified_metadata) }
+                    staged_chunk_used = false
+                    chunk.unstaged!
+                    break
                   end
+                end
-                  raise ShouldRetry
+                if chunk_size_full?(chunk) || split.size == 1
+                  enqueue_chunk_before_retry = true
+                else
+                  splits_count *= 10
                 end
-                writing_splits_index += 1
+                raise ShouldRetry
+              end
-                if chunk_size_full?(chunk)
-                  break
-                end
+              writing_splits_index += 1
+              if chunk_size_full?(chunk)
+                break
               end
-            rescue
-              chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it
-              raise
             end
-            modified_chunks.last[:adding_bytesize] = chunk.bytesize - original_bytesize
+          rescue
+            chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it
+            raise
           end
+          modified_chunks.last[:adding_bytesize] = chunk.bytesize - original_bytesize
         end
         modified_chunks.each do |data|
           block.call(data[:chunk], data[:adding_bytesize], data[:errors])
@@ -863,9 +864,15 @@ module Fluent
           if chunk.unstaged?
             chunk.purge rescue nil
           end
+          chunk.mon_exit rescue nil
         end
         enqueue_chunk(metadata) if enqueue_chunk_before_retry
         retry
+      ensure
+        modified_chunks.each do |data|
+          chunk = data[:chunk]
+          chunk.mon_exit
+        end
       end
       STATS_KEYS = [

data/lib/fluent/version.rb CHANGED Viewed

@@ -16,6 +16,6 @@
 module Fluent
-  VERSION = '1.16.4'
+  VERSION = '1.16.5'
 end

data/test/plugin/test_buffer.rb CHANGED Viewed

@@ -901,6 +901,65 @@ class BufferTest < Test::Unit::TestCase
       assert_equal 2, purge_count
     end
+    # https://github.com/fluent/fluentd/issues/4446
+    test "#write_step_by_step keeps chunks kept in locked in entire #write process" do
+      assert_equal 8 * 1024 * 1024, @p.chunk_limit_size
+      assert_equal 0.95, @p.chunk_full_threshold
+      mon_enter_counts_by_chunk = {}
+      mon_exit_counts_by_chunk = {}
+      stub.proxy(@p).generate_chunk(anything) do |chunk|
+        stub(chunk).mon_enter do
+          enter_count = 1 + mon_enter_counts_by_chunk.fetch(chunk, 0)
+          exit_count = mon_exit_counts_by_chunk.fetch(chunk, 0)
+          mon_enter_counts_by_chunk[chunk] = enter_count
+          # Assert that chunk is passed to &block of write_step_by_step before exiting the lock.
+          # (i.e. The lock count must be 2 greater than the exit count).
+          # Since ShouldRetry occurs once, the staged chunk takes the lock 3 times when calling the block.
+          if chunk.staged?
+            lock_in_block = enter_count == 3
+            assert_equal(enter_count - 2, exit_count) if lock_in_block
+          else
+            lock_in_block = enter_count == 2
+            assert_equal(enter_count - 2, exit_count) if lock_in_block
+          end
+        end
+        stub(chunk).mon_exit do
+          exit_count = 1 + mon_exit_counts_by_chunk.fetch(chunk, 0)
+          mon_exit_counts_by_chunk[chunk] = exit_count
+        end
+        chunk
+      end
+      m = @p.metadata(timekey: Time.parse('2016-04-11 16:40:00 +0000').to_i)
+      small_row = "x" * 1024 * 400
+      big_row = "x" * 1024 * 1024 * 8 # just `chunk_size_limit`, it does't cause BufferOverFlowError.
+      # Write 42 events in 1 event stream, last one is for triggering `ShouldRetry`
+      @p.write({m => [small_row] * 40 + [big_row] + ["x"]})
+      # Above event strem will be splitted twice by `Buffer#write_step_by_step`
+      #
+      # 1. `write_once`: 42 [events] * 1 [stream]
+      # 2. `write_step_by_step`: 4 [events]* 10 [streams] + 2 [events] * 1 [stream]
+      # 3. `write_step_by_step` (by `ShouldRetry`): 1 [event] * 42 [streams]
+      #
+      # Example of staged chunk lock behavior:
+      #
+      # 1. mon_enter in write_step_by_step
+      # 2. ShouldRetry occurs
+      # 3. mon_exit in write_step_by_step
+      # 4. mon_enter again in write_step_by_step (retry)
+      # 5. passed to &block of write_step_by_step
+      # 6. mon_enter in the block (write)
+      # 7. mon_exit in write_step_by_step
+      # 8. mon_exit in write
+      assert_equal(mon_enter_counts_by_chunk.values, mon_exit_counts_by_chunk.values)
+    end
   end
   sub_test_case 'standard format with configuration for test with lower chunk limit size' do

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: fluentd
 version: !ruby/object:Gem::Version
-  version: 1.16.4
+  version: 1.16.5
 platform: ruby
 authors:
 - Sadayuki Furuhashi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-03-14 00:00:00.000000000 Z
+date: 2024-03-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler