fluentd 1.15.3-x86-mingw32 → 1.16.1-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +1 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.yaml +1 -0
  4. data/.github/workflows/linux-test.yaml +2 -2
  5. data/.github/workflows/macos-test.yaml +2 -2
  6. data/.github/workflows/stale-actions.yml +24 -0
  7. data/.github/workflows/windows-test.yaml +2 -2
  8. data/CHANGELOG.md +114 -0
  9. data/CONTRIBUTING.md +1 -1
  10. data/MAINTAINERS.md +5 -3
  11. data/README.md +0 -1
  12. data/SECURITY.md +5 -9
  13. data/fluentd.gemspec +2 -2
  14. data/lib/fluent/command/fluentd.rb +55 -53
  15. data/lib/fluent/daemon.rb +2 -4
  16. data/lib/fluent/event.rb +2 -2
  17. data/lib/fluent/log/console_adapter.rb +66 -0
  18. data/lib/fluent/log.rb +35 -5
  19. data/lib/fluent/plugin/base.rb +5 -7
  20. data/lib/fluent/plugin/buf_file.rb +32 -3
  21. data/lib/fluent/plugin/buf_file_single.rb +32 -3
  22. data/lib/fluent/plugin/buffer/file_chunk.rb +1 -1
  23. data/lib/fluent/plugin/buffer.rb +21 -0
  24. data/lib/fluent/plugin/in_tcp.rb +47 -2
  25. data/lib/fluent/plugin/out_forward/ack_handler.rb +19 -4
  26. data/lib/fluent/plugin/out_forward.rb +2 -2
  27. data/lib/fluent/plugin/out_secondary_file.rb +39 -22
  28. data/lib/fluent/plugin/output.rb +49 -12
  29. data/lib/fluent/plugin_helper/http_server/server.rb +2 -1
  30. data/lib/fluent/plugin_helper/server.rb +8 -0
  31. data/lib/fluent/supervisor.rb +157 -251
  32. data/lib/fluent/test/driver/base.rb +11 -5
  33. data/lib/fluent/test/driver/filter.rb +4 -0
  34. data/lib/fluent/test/startup_shutdown.rb +6 -8
  35. data/lib/fluent/version.rb +1 -1
  36. data/templates/new_gem/test/helper.rb.erb +0 -1
  37. data/test/command/test_ctl.rb +1 -1
  38. data/test/command/test_fluentd.rb +137 -6
  39. data/test/command/test_plugin_config_formatter.rb +0 -1
  40. data/test/compat/test_parser.rb +5 -5
  41. data/test/config/test_system_config.rb +0 -8
  42. data/test/log/test_console_adapter.rb +110 -0
  43. data/test/plugin/out_forward/test_ack_handler.rb +39 -0
  44. data/test/plugin/test_base.rb +98 -0
  45. data/test/plugin/test_buf_file.rb +62 -23
  46. data/test/plugin/test_buf_file_single.rb +65 -0
  47. data/test/plugin/test_in_http.rb +2 -3
  48. data/test/plugin/test_in_monitor_agent.rb +2 -3
  49. data/test/plugin/test_in_tcp.rb +87 -2
  50. data/test/plugin/test_in_udp.rb +28 -0
  51. data/test/plugin/test_out_forward.rb +14 -18
  52. data/test/plugin/test_out_http.rb +1 -0
  53. data/test/plugin/test_output.rb +269 -0
  54. data/test/plugin/test_output_as_buffered_compress.rb +32 -18
  55. data/test/plugin/test_parser_regexp.rb +1 -6
  56. data/test/plugin_helper/test_http_server_helper.rb +1 -1
  57. data/test/plugin_helper/test_server.rb +59 -5
  58. data/test/test_config.rb +0 -21
  59. data/test/test_formatter.rb +23 -20
  60. data/test/test_log.rb +71 -36
  61. data/test/test_supervisor.rb +277 -282
  62. metadata +13 -19
  63. data/.drone.yml +0 -35
  64. data/.gitlab-ci.yml +0 -103
  65. data/test/test_logger_initializer.rb +0 -46
@@ -0,0 +1,66 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'console/terminal/logger'
18
+
19
+ module Fluent
20
+ class Log
21
+ # Async gem which is used by http_server helper switched logger mechanism to
22
+ # Console gem which isn't complatible with Ruby's standard Logger (since
23
+ # v1.17). This class adapts it to Fluentd's logger mechanism.
24
+ class ConsoleAdapter < Console::Terminal::Logger
25
+ def self.wrap(logger)
26
+ _, level = Console::Logger::LEVELS.find { |key, value|
27
+ if logger.level <= 0
28
+ key == :debug
29
+ else
30
+ value == logger.level - 1
31
+ end
32
+ }
33
+ Console::Logger.new(ConsoleAdapter.new(logger), level: level)
34
+ end
35
+
36
+ def initialize(logger)
37
+ @logger = logger
38
+ # When `verbose` is `true`, following items will be added as a prefix or
39
+ # suffix of the subject:
40
+ # * Severity
41
+ # * Object ID
42
+ # * PID
43
+ # * Time
44
+ # Severity and Time are added by Fluentd::Log too so they are redundant.
45
+ # PID is the worker's PID so it's also redundant.
46
+ # Object ID will be too verbose in usual cases.
47
+ # So set it as `false` here to suppress redundant items.
48
+ super(StringIO.new, verbose: false)
49
+ end
50
+
51
+ def call(subject = nil, *arguments, name: nil, severity: 'info', **options, &block)
52
+ if LEVEL_TEXT.include?(severity.to_s)
53
+ level = severity
54
+ else
55
+ @logger.warn("Unknown severity: #{severity}")
56
+ level = 'warn'
57
+ end
58
+
59
+ @io.seek(0)
60
+ @io.truncate(0)
61
+ super
62
+ @logger.send(level, @io.string.chomp)
63
+ end
64
+ end
65
+ end
66
+ end
data/lib/fluent/log.rb CHANGED
@@ -67,8 +67,29 @@ module Fluent
67
67
  LEVEL_TEXT.map{|t| "#{LOG_EVENT_TAG_PREFIX}.#{t}" }
68
68
  end
69
69
 
70
+ # Create a unique path for each process.
71
+ #
72
+ # >>> per_process_path("C:/tmp/test.log", :worker, 1)
73
+ # C:/tmp/test-1.log
74
+ # >>> per_process_path("C:/tmp/test.log", :supervisor, 0)
75
+ # C:/tmp/test-supervisor-0.log
76
+ def self.per_process_path(path, process_type, worker_id)
77
+ path = Pathname(path)
78
+ ext = path.extname
79
+
80
+ if process_type == :supervisor
81
+ suffix = "-#{process_type}-0#{ext}" # "-0" for backword compatibility.
82
+ else
83
+ suffix = "-#{worker_id}#{ext}"
84
+ end
85
+ return path.sub_ext(suffix).to_s
86
+ end
87
+
70
88
  def initialize(logger, opts={})
71
- # overwrites logger.level= so that config reloading resets level of Fluentd::Log
89
+ # When ServerEngine changes the logger.level, the Fluentd logger level should also change.
90
+ # So overwrites logger.level= below.
91
+ # However, currently Fluentd doesn't use the ServerEngine's reloading feature,
92
+ # so maybe we don't need this overwriting anymore.
72
93
  orig_logger_level_setter = logger.class.public_instance_method(:level=).bind(logger)
73
94
  me = self
74
95
  # The original ruby logger sets the number as each log level like below.
@@ -92,6 +113,7 @@ module Fluent
92
113
  # So if serverengine's logger level is changed, fluentd's log level will be changed to that + 1.
93
114
  logger.define_singleton_method(:level=) {|level| orig_logger_level_setter.call(level); me.level = self.level + 1 }
94
115
 
116
+ @path = opts[:path]
95
117
  @logger = logger
96
118
  @out = logger.instance_variable_get(:@logdev)
97
119
  @level = logger.level + 1
@@ -102,7 +124,8 @@ module Fluent
102
124
  @time_format = nil
103
125
  @formatter = nil
104
126
 
105
- self.format = :text
127
+ self.format = opts.fetch(:format, :text)
128
+ self.time_format = opts[:time_format] if opts.key?(:time_format)
106
129
  enable_color out.tty?
107
130
  # TODO: This variable name is unclear so we should change to better name.
108
131
  @threads_exclude_events = []
@@ -154,8 +177,12 @@ module Fluent
154
177
 
155
178
  attr_reader :format
156
179
  attr_reader :time_format
157
- attr_accessor :log_event_enabled, :ignore_repeated_log_interval, :ignore_same_log_interval
180
+ attr_accessor :log_event_enabled, :ignore_repeated_log_interval, :ignore_same_log_interval, :suppress_repeated_stacktrace
158
181
  attr_accessor :out
182
+ # Strictly speaking, we should also change @logger.level when the setter of @level is called.
183
+ # Currently, we don't need to do it, since Fluentd::Log doesn't use ServerEngine::DaemonLogger.level.
184
+ # Since We overwrites logger.level= so that @logger.level is applied to @level,
185
+ # we need to find a good way to do this, otherwise we will end up in an endless loop.
159
186
  attr_accessor :level
160
187
  attr_accessor :optional_header, :optional_attrs
161
188
 
@@ -202,9 +229,12 @@ module Fluent
202
229
  @time_formatter = Strftime.new(@time_format) rescue nil
203
230
  end
204
231
 
232
+ def stdout?
233
+ @out == $stdout
234
+ end
235
+
205
236
  def reopen!
206
- # do nothing in @logger.reopen! because it's already reopened in Supervisor.load_config
207
- @logger.reopen! if @logger
237
+ @out.reopen(@path, "a") if @path && @path != "-"
208
238
  nil
209
239
  end
210
240
 
@@ -53,14 +53,12 @@ module Fluent
53
53
  end
54
54
 
55
55
  def configure(conf)
56
- if Fluent::Engine.supervisor_mode || (conf.respond_to?(:for_this_worker?) && conf.for_this_worker?)
57
- workers = if conf.target_worker_ids && !conf.target_worker_ids.empty?
58
- conf.target_worker_ids.size
59
- else
60
- 1
61
- end
62
- system_config_override(workers: workers)
56
+ raise ArgumentError, "BUG: type of conf must be Fluent::Config::Element, but #{conf.class} is passed." unless conf.is_a?(Fluent::Config::Element)
57
+
58
+ if conf.for_this_worker? || (Fluent::Engine.supervisor_mode && !conf.for_every_workers?)
59
+ system_config_override(workers: conf.target_worker_ids.size)
63
60
  end
61
+
64
62
  super(conf, system_config.strict_config_value)
65
63
  @_state ||= State.new(false, false, false, false, false, false, false, false, false)
66
64
  @_state.configure = true
@@ -139,13 +139,20 @@ module Fluent
139
139
  def resume
140
140
  stage = {}
141
141
  queue = []
142
+ exist_broken_file = false
142
143
 
143
144
  patterns = [@path]
144
145
  patterns.unshift @additional_resume_path if @additional_resume_path
145
146
  Dir.glob(escaped_patterns(patterns)) do |path|
146
147
  next unless File.file?(path)
147
148
 
148
- log.debug { "restoring buffer file: path = #{path}" }
149
+ if owner.respond_to?(:buffer_config) && owner.buffer_config&.flush_at_shutdown
150
+ # When `flush_at_shutdown` is `true`, the remaining chunk files during resuming are possibly broken
151
+ # since there may be a power failure or similar failure.
152
+ log.warn { "restoring buffer file: path = #{path}" }
153
+ else
154
+ log.debug { "restoring buffer file: path = #{path}" }
155
+ end
149
156
 
150
157
  m = new_metadata() # this metadata will be overwritten by resuming .meta file content
151
158
  # so it should not added into @metadata_list for now
@@ -158,6 +165,7 @@ module Fluent
158
165
  begin
159
166
  chunk = Fluent::Plugin::Buffer::FileChunk.new(m, path, mode, compress: @compress) # file chunk resumes contents of metadata
160
167
  rescue Fluent::Plugin::Buffer::FileChunk::FileChunkError => e
168
+ exist_broken_file = true
161
169
  handle_broken_files(path, mode, e)
162
170
  next
163
171
  end
@@ -182,6 +190,15 @@ module Fluent
182
190
 
183
191
  queue.sort_by!{ |chunk| chunk.modified_at }
184
192
 
193
+ # If one of the files is corrupted, other files may also be corrupted and be undetected.
194
+ # The time priods of each chunk are helpful to check the data.
195
+ if exist_broken_file
196
+ log.info "Since a broken chunk file was found, it is possible that other files remaining at the time of resuming were also broken. Here is the list of the files."
197
+ (stage.values + queue).each { |chunk|
198
+ log.info " #{chunk.path}:", :created_at => chunk.created_at, :modified_at => chunk.modified_at
199
+ }
200
+ end
201
+
185
202
  return stage, queue
186
203
  end
187
204
 
@@ -195,8 +212,20 @@ module Fluent
195
212
  end
196
213
 
197
214
  def handle_broken_files(path, mode, e)
198
- log.error "found broken chunk file during resume. Deleted corresponding files:", :path => path, :mode => mode, :err_msg => e.message
199
- # After support 'backup_dir' feature, these files are moved to backup_dir instead of unlink.
215
+ log.error "found broken chunk file during resume.", :path => path, :mode => mode, :err_msg => e.message
216
+ unique_id = Fluent::Plugin::Buffer::FileChunk.unique_id_from_path(path)
217
+ backup(unique_id) { |f|
218
+ File.open(path, 'rb') { |chunk|
219
+ chunk.set_encoding(Encoding::ASCII_8BIT)
220
+ chunk.sync = true
221
+ chunk.binmode
222
+ IO.copy_stream(chunk, f)
223
+ }
224
+ }
225
+ rescue => error
226
+ log.error "backup failed. Delete corresponding files.", :err_msg => error.message
227
+ ensure
228
+ log.warn "disable_chunk_backup is true. #{dump_unique_id_hex(unique_id)} chunk is thrown away." if @disable_chunk_backup
200
229
  File.unlink(path, path + '.meta') rescue nil
201
230
  end
202
231
 
@@ -160,13 +160,20 @@ module Fluent
160
160
  def resume
161
161
  stage = {}
162
162
  queue = []
163
+ exist_broken_file = false
163
164
 
164
165
  patterns = [@path]
165
166
  patterns.unshift @additional_resume_path if @additional_resume_path
166
167
  Dir.glob(escaped_patterns(patterns)) do |path|
167
168
  next unless File.file?(path)
168
169
 
169
- log.debug { "restoring buffer file: path = #{path}" }
170
+ if owner.respond_to?(:buffer_config) && owner.buffer_config&.flush_at_shutdown
171
+ # When `flush_at_shutdown` is `true`, the remaining chunk files during resuming are possibly broken
172
+ # since there may be a power failure or similar failure.
173
+ log.warn { "restoring buffer file: path = #{path}" }
174
+ else
175
+ log.debug { "restoring buffer file: path = #{path}" }
176
+ end
170
177
 
171
178
  m = new_metadata() # this metadata will be updated in FileSingleChunk.new
172
179
  mode = Fluent::Plugin::Buffer::FileSingleChunk.assume_chunk_state(path)
@@ -179,6 +186,7 @@ module Fluent
179
186
  chunk = Fluent::Plugin::Buffer::FileSingleChunk.new(m, path, mode, @key_in_path, compress: @compress)
180
187
  chunk.restore_size(@chunk_format) if @calc_num_records
181
188
  rescue Fluent::Plugin::Buffer::FileSingleChunk::FileChunkError => e
189
+ exist_broken_file = true
182
190
  handle_broken_files(path, mode, e)
183
191
  next
184
192
  end
@@ -193,6 +201,15 @@ module Fluent
193
201
 
194
202
  queue.sort_by!(&:modified_at)
195
203
 
204
+ # If one of the files is corrupted, other files may also be corrupted and be undetected.
205
+ # The time priods of each chunk are helpful to check the data.
206
+ if exist_broken_file
207
+ log.info "Since a broken chunk file was found, it is possible that other files remaining at the time of resuming were also broken. Here is the list of the files."
208
+ (stage.values + queue).each { |chunk|
209
+ log.info " #{chunk.path}:", :created_at => chunk.created_at, :modified_at => chunk.modified_at
210
+ }
211
+ end
212
+
196
213
  return stage, queue
197
214
  end
198
215
 
@@ -207,8 +224,20 @@ module Fluent
207
224
  end
208
225
 
209
226
  def handle_broken_files(path, mode, e)
210
- log.error "found broken chunk file during resume. Delete corresponding files:", path: path, mode: mode, err_msg: e.message
211
- # After support 'backup_dir' feature, these files are moved to backup_dir instead of unlink.
227
+ log.error "found broken chunk file during resume.", :path => path, :mode => mode, :err_msg => e.message
228
+ unique_id, _ = Fluent::Plugin::Buffer::FileSingleChunk.unique_id_and_key_from_path(path)
229
+ backup(unique_id) { |f|
230
+ File.open(path, 'rb') { |chunk|
231
+ chunk.set_encoding(Encoding::ASCII_8BIT)
232
+ chunk.sync = true
233
+ chunk.binmode
234
+ IO.copy_stream(chunk, f)
235
+ }
236
+ }
237
+ rescue => error
238
+ log.error "backup failed. Delete corresponding files.", :err_msg => error.message
239
+ ensure
240
+ log.warn "disable_chunk_backup is true. #{dump_unique_id_hex(unique_id)} chunk is thrown away." if @disable_chunk_backup
212
241
  File.unlink(path) rescue nil
213
242
  end
214
243
 
@@ -204,7 +204,7 @@ module Fluent
204
204
  end
205
205
  end
206
206
 
207
- # used only for queued v0.12 buffer path
207
+ # used only for queued v0.12 buffer path or broken files
208
208
  def self.unique_id_from_path(path)
209
209
  if /\.(b|q)([0-9a-f]+)\.[^\/]*\Z/n =~ path # //n switch means explicit 'ASCII-8BIT' pattern
210
210
  return $2.scan(/../).map{|x| x.to_i(16) }.pack('C*')
@@ -66,6 +66,9 @@ module Fluent
66
66
  desc 'Compress buffered data.'
67
67
  config_param :compress, :enum, list: [:text, :gzip], default: :text
68
68
 
69
+ desc 'If true, chunks are thrown away when unrecoverable error happens'
70
+ config_param :disable_chunk_backup, :bool, default: false
71
+
69
72
  Metadata = Struct.new(:timekey, :tag, :variables, :seq) do
70
73
  def initialize(timekey, tag, variables)
71
74
  super(timekey, tag, variables, 0)
@@ -903,6 +906,24 @@ module Fluent
903
906
  { 'buffer' => stats }
904
907
  end
905
908
 
909
+ def backup(chunk_unique_id)
910
+ unique_id = dump_unique_id_hex(chunk_unique_id)
911
+
912
+ if @disable_chunk_backup
913
+ log.warn "disable_chunk_backup is true. #{unique_id} chunk is not backed up."
914
+ return
915
+ end
916
+
917
+ safe_owner_id = owner.plugin_id.gsub(/[ "\/\\:;|*<>?]/, '_')
918
+ backup_base_dir = system_config.root_dir || DEFAULT_BACKUP_DIR
919
+ backup_file = File.join(backup_base_dir, 'backup', "worker#{fluentd_worker_id}", safe_owner_id, "#{unique_id}.log")
920
+ backup_dir = File.dirname(backup_file)
921
+
922
+ log.warn "bad chunk is moved to #{backup_file}"
923
+ FileUtils.mkdir_p(backup_dir, mode: system_config.dir_permission || Fluent::DEFAULT_DIR_PERMISSION) unless Dir.exist?(backup_dir)
924
+ File.open(backup_file, 'ab', system_config.file_permission || Fluent::DEFAULT_FILE_PERMISSION) { |f| yield f }
925
+ end
926
+
906
927
  private
907
928
 
908
929
  def optimistic_queued?(metadata = nil)
@@ -36,10 +36,16 @@ module Fluent::Plugin
36
36
  desc "The field name of the client's address."
37
37
  config_param :source_address_key, :string, default: nil
38
38
 
39
+ # Setting default to nil for backward compatibility
40
+ desc "The max bytes of message."
41
+ config_param :message_length_limit, :size, default: nil
42
+
39
43
  config_param :blocking_timeout, :time, default: 0.5
40
44
 
41
45
  desc 'The payload is read up to this character.'
42
46
  config_param :delimiter, :string, default: "\n" # syslog family add "\n" to each message and this seems only way to split messages in tcp stream
47
+ desc 'Check the remote connection is still available by sending a keepalive packet if this value is true.'
48
+ config_param :send_keepalive_packet, :bool, default: false
43
49
 
44
50
  # in_forward like host/network restriction
45
51
  config_section :security, required: false, multi: false do
@@ -100,8 +106,9 @@ module Fluent::Plugin
100
106
 
101
107
  log.info "listening tcp socket", bind: @bind, port: @port
102
108
  del_size = @delimiter.length
109
+ discard_till_next_delimiter = false
103
110
  if @_extract_enabled && @_extract_tag_key
104
- server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
111
+ server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key, send_keepalive_packet: @send_keepalive_packet) do |data, conn|
105
112
  unless check_client(conn)
106
113
  conn.close
107
114
  next
@@ -114,6 +121,16 @@ module Fluent::Plugin
114
121
  msg = buf[pos...i]
115
122
  pos = i + del_size
116
123
 
124
+ if discard_till_next_delimiter
125
+ discard_till_next_delimiter = false
126
+ next
127
+ end
128
+
129
+ if !@message_length_limit.nil? && @message_length_limit < msg.bytesize
130
+ log.info "The received data is larger than 'message_length_limit', dropped:", limit: @message_length_limit, size: msg.bytesize, head: msg[...32]
131
+ next
132
+ end
133
+
117
134
  @parser.parse(msg) do |time, record|
118
135
  unless time && record
119
136
  log.warn "pattern not matched", message: msg
@@ -129,9 +146,18 @@ module Fluent::Plugin
129
146
  end
130
147
  end
131
148
  buf.slice!(0, pos) if pos > 0
149
+ # If the buffer size exceeds the limit here, it means that the next message will definitely exceed the limit.
150
+ # So we should clear the buffer here. Otherwise, it will keep storing useless data until the next delimiter comes.
151
+ if !@message_length_limit.nil? && @message_length_limit < buf.bytesize
152
+ log.info "The buffer size exceeds 'message_length_limit', cleared:", limit: @message_length_limit, size: buf.bytesize, head: buf[...32]
153
+ buf.clear
154
+ # We should discard the subsequent data until the next delimiter comes.
155
+ discard_till_next_delimiter = true
156
+ next
157
+ end
132
158
  end
133
159
  else
134
- server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
160
+ server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key, send_keepalive_packet: @send_keepalive_packet) do |data, conn|
135
161
  unless check_client(conn)
136
162
  conn.close
137
163
  next
@@ -145,6 +171,16 @@ module Fluent::Plugin
145
171
  msg = buf[pos...i]
146
172
  pos = i + del_size
147
173
 
174
+ if discard_till_next_delimiter
175
+ discard_till_next_delimiter = false
176
+ next
177
+ end
178
+
179
+ if !@message_length_limit.nil? && @message_length_limit < msg.bytesize
180
+ log.info "The received data is larger than 'message_length_limit', dropped:", limit: @message_length_limit, size: msg.bytesize, head: msg[...32]
181
+ next
182
+ end
183
+
148
184
  @parser.parse(msg) do |time, record|
149
185
  unless time && record
150
186
  log.warn "pattern not matched", message: msg
@@ -159,6 +195,15 @@ module Fluent::Plugin
159
195
  end
160
196
  router.emit_stream(@tag, es)
161
197
  buf.slice!(0, pos) if pos > 0
198
+ # If the buffer size exceeds the limit here, it means that the next message will definitely exceed the limit.
199
+ # So we should clear the buffer here. Otherwise, it will keep storing useless data until the next delimiter comes.
200
+ if !@message_length_limit.nil? && @message_length_limit < buf.bytesize
201
+ log.info "The buffer size exceeds 'message_length_limit', cleared:", limit: @message_length_limit, size: buf.bytesize, head: buf[...32]
202
+ buf.clear
203
+ # We should discard the subsequent data until the next delimiter comes.
204
+ discard_till_next_delimiter = true
205
+ next
206
+ end
162
207
  end
163
208
  end
164
209
  end
@@ -59,7 +59,13 @@ module Fluent::Plugin
59
59
  @ack_waitings = new_list
60
60
  end
61
61
 
62
- readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
62
+ begin
63
+ readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
64
+ rescue IOError
65
+ @log.info "connection closed while waiting for readable sockets"
66
+ readable_sockets = nil
67
+ end
68
+
63
69
  if readable_sockets
64
70
  readable_sockets.each do |sock|
65
71
  results << read_ack_from_sock(sock)
@@ -109,13 +115,22 @@ module Fluent::Plugin
109
115
  raw_data = sock.instance_of?(Fluent::PluginHelper::Socket::WrappedSocket::TLS) ? sock.readpartial(@read_length) : sock.recv(@read_length)
110
116
  rescue Errno::ECONNRESET, EOFError # ECONNRESET for #recv, #EOFError for #readpartial
111
117
  raw_data = ''
118
+ rescue IOError
119
+ @log.info "socket closed while receiving ack response"
120
+ return nil, Result::FAILED
112
121
  end
113
122
 
114
123
  info = find(sock)
115
124
 
116
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
117
- # If this happens we assume the data wasn't delivered and retry it.
118
- if raw_data.empty?
125
+ if info.nil?
126
+ # The info can be deleted by another thread during `sock.recv()` and `find()`.
127
+ # This is OK since another thread has completed to process the ack, so we can skip this.
128
+ # Note: exclusion mechanism about `collect_response()` may need to be considered.
129
+ @log.debug "could not find the ack info. this ack may be processed by another thread."
130
+ return nil, Result::FAILED
131
+ elsif raw_data.empty?
132
+ # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
133
+ # If this happens we assume the data wasn't delivered and retry it.
119
134
  @log.warn 'destination node closed the connection. regard it as unavailable.', host: info.node.host, port: info.node.port
120
135
  # info.node.disable!
121
136
  return info, Result::FAILED
@@ -521,8 +521,8 @@ module Fluent::Plugin
521
521
  when AckHandler::Result::SUCCESS
522
522
  commit_write(chunk_id)
523
523
  when AckHandler::Result::FAILED
524
- node.disable!
525
- rollback_write(chunk_id, update_retry: false)
524
+ node&.disable!
525
+ rollback_write(chunk_id, update_retry: false) if chunk_id
526
526
  when AckHandler::Result::CHUNKID_UNMATCHED
527
527
  rollback_write(chunk_id, update_retry: false)
528
528
  else
@@ -64,31 +64,29 @@ module Fluent::Plugin
64
64
  end
65
65
 
66
66
  def multi_workers_ready?
67
- ### TODO: add hack to synchronize for multi workers
68
67
  true
69
68
  end
70
69
 
71
70
  def write(chunk)
72
71
  path_without_suffix = extract_placeholders(@path_without_suffix, chunk)
73
- path = generate_path(path_without_suffix)
74
- FileUtils.mkdir_p File.dirname(path), mode: @dir_perm
75
-
76
- case @compress
77
- when :text
78
- File.open(path, "ab", @file_perm) {|f|
79
- f.flock(File::LOCK_EX)
80
- chunk.write_to(f)
81
- }
82
- when :gzip
83
- File.open(path, "ab", @file_perm) {|f|
84
- f.flock(File::LOCK_EX)
85
- gz = Zlib::GzipWriter.new(f)
86
- chunk.write_to(gz)
87
- gz.close
88
- }
72
+ generate_path(path_without_suffix) do |path|
73
+ FileUtils.mkdir_p File.dirname(path), mode: @dir_perm
74
+
75
+ case @compress
76
+ when :text
77
+ File.open(path, "ab", @file_perm) {|f|
78
+ f.flock(File::LOCK_EX)
79
+ chunk.write_to(f)
80
+ }
81
+ when :gzip
82
+ File.open(path, "ab", @file_perm) {|f|
83
+ f.flock(File::LOCK_EX)
84
+ gz = Zlib::GzipWriter.new(f)
85
+ chunk.write_to(gz)
86
+ gz.close
87
+ }
88
+ end
89
89
  end
90
-
91
- path
92
90
  end
93
91
 
94
92
  private
@@ -117,15 +115,34 @@ module Fluent::Plugin
117
115
 
118
116
  def generate_path(path_without_suffix)
119
117
  if @append
120
- "#{path_without_suffix}#{@suffix}"
121
- else
118
+ path = "#{path_without_suffix}#{@suffix}"
119
+ synchronize_path(path) do
120
+ yield path
121
+ end
122
+ return path
123
+ end
124
+
125
+ begin
122
126
  i = 0
123
127
  loop do
124
128
  path = "#{path_without_suffix}.#{i}#{@suffix}"
125
- return path unless File.exist?(path)
129
+ break unless File.exist?(path)
126
130
  i += 1
127
131
  end
132
+ synchronize_path(path) do
133
+ # If multiple processes or threads select the same path and another
134
+ # one entered this locking block first, the file should already
135
+ # exist and this one should retry to find new path.
136
+ raise FileAlreadyExist if File.exist?(path)
137
+ yield path
138
+ end
139
+ rescue FileAlreadyExist
140
+ retry
128
141
  end
142
+ path
143
+ end
144
+
145
+ class FileAlreadyExist < StandardError
129
146
  end
130
147
  end
131
148
  end