fluentd 1.15.3-x86-mingw32 → 1.16.1-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +1 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.yaml +1 -0
  4. data/.github/workflows/linux-test.yaml +2 -2
  5. data/.github/workflows/macos-test.yaml +2 -2
  6. data/.github/workflows/stale-actions.yml +24 -0
  7. data/.github/workflows/windows-test.yaml +2 -2
  8. data/CHANGELOG.md +114 -0
  9. data/CONTRIBUTING.md +1 -1
  10. data/MAINTAINERS.md +5 -3
  11. data/README.md +0 -1
  12. data/SECURITY.md +5 -9
  13. data/fluentd.gemspec +2 -2
  14. data/lib/fluent/command/fluentd.rb +55 -53
  15. data/lib/fluent/daemon.rb +2 -4
  16. data/lib/fluent/event.rb +2 -2
  17. data/lib/fluent/log/console_adapter.rb +66 -0
  18. data/lib/fluent/log.rb +35 -5
  19. data/lib/fluent/plugin/base.rb +5 -7
  20. data/lib/fluent/plugin/buf_file.rb +32 -3
  21. data/lib/fluent/plugin/buf_file_single.rb +32 -3
  22. data/lib/fluent/plugin/buffer/file_chunk.rb +1 -1
  23. data/lib/fluent/plugin/buffer.rb +21 -0
  24. data/lib/fluent/plugin/in_tcp.rb +47 -2
  25. data/lib/fluent/plugin/out_forward/ack_handler.rb +19 -4
  26. data/lib/fluent/plugin/out_forward.rb +2 -2
  27. data/lib/fluent/plugin/out_secondary_file.rb +39 -22
  28. data/lib/fluent/plugin/output.rb +49 -12
  29. data/lib/fluent/plugin_helper/http_server/server.rb +2 -1
  30. data/lib/fluent/plugin_helper/server.rb +8 -0
  31. data/lib/fluent/supervisor.rb +157 -251
  32. data/lib/fluent/test/driver/base.rb +11 -5
  33. data/lib/fluent/test/driver/filter.rb +4 -0
  34. data/lib/fluent/test/startup_shutdown.rb +6 -8
  35. data/lib/fluent/version.rb +1 -1
  36. data/templates/new_gem/test/helper.rb.erb +0 -1
  37. data/test/command/test_ctl.rb +1 -1
  38. data/test/command/test_fluentd.rb +137 -6
  39. data/test/command/test_plugin_config_formatter.rb +0 -1
  40. data/test/compat/test_parser.rb +5 -5
  41. data/test/config/test_system_config.rb +0 -8
  42. data/test/log/test_console_adapter.rb +110 -0
  43. data/test/plugin/out_forward/test_ack_handler.rb +39 -0
  44. data/test/plugin/test_base.rb +98 -0
  45. data/test/plugin/test_buf_file.rb +62 -23
  46. data/test/plugin/test_buf_file_single.rb +65 -0
  47. data/test/plugin/test_in_http.rb +2 -3
  48. data/test/plugin/test_in_monitor_agent.rb +2 -3
  49. data/test/plugin/test_in_tcp.rb +87 -2
  50. data/test/plugin/test_in_udp.rb +28 -0
  51. data/test/plugin/test_out_forward.rb +14 -18
  52. data/test/plugin/test_out_http.rb +1 -0
  53. data/test/plugin/test_output.rb +269 -0
  54. data/test/plugin/test_output_as_buffered_compress.rb +32 -18
  55. data/test/plugin/test_parser_regexp.rb +1 -6
  56. data/test/plugin_helper/test_http_server_helper.rb +1 -1
  57. data/test/plugin_helper/test_server.rb +59 -5
  58. data/test/test_config.rb +0 -21
  59. data/test/test_formatter.rb +23 -20
  60. data/test/test_log.rb +71 -36
  61. data/test/test_supervisor.rb +277 -282
  62. metadata +13 -19
  63. data/.drone.yml +0 -35
  64. data/.gitlab-ci.yml +0 -103
  65. data/test/test_logger_initializer.rb +0 -46
@@ -0,0 +1,66 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'console/terminal/logger'
18
+
19
+ module Fluent
20
+ class Log
21
+ # Async gem which is used by http_server helper switched logger mechanism to
22
+ # Console gem which isn't complatible with Ruby's standard Logger (since
23
+ # v1.17). This class adapts it to Fluentd's logger mechanism.
24
+ class ConsoleAdapter < Console::Terminal::Logger
25
+ def self.wrap(logger)
26
+ _, level = Console::Logger::LEVELS.find { |key, value|
27
+ if logger.level <= 0
28
+ key == :debug
29
+ else
30
+ value == logger.level - 1
31
+ end
32
+ }
33
+ Console::Logger.new(ConsoleAdapter.new(logger), level: level)
34
+ end
35
+
36
+ def initialize(logger)
37
+ @logger = logger
38
+ # When `verbose` is `true`, following items will be added as a prefix or
39
+ # suffix of the subject:
40
+ # * Severity
41
+ # * Object ID
42
+ # * PID
43
+ # * Time
44
+ # Severity and Time are added by Fluentd::Log too so they are redundant.
45
+ # PID is the worker's PID so it's also redundant.
46
+ # Object ID will be too verbose in usual cases.
47
+ # So set it as `false` here to suppress redundant items.
48
+ super(StringIO.new, verbose: false)
49
+ end
50
+
51
+ def call(subject = nil, *arguments, name: nil, severity: 'info', **options, &block)
52
+ if LEVEL_TEXT.include?(severity.to_s)
53
+ level = severity
54
+ else
55
+ @logger.warn("Unknown severity: #{severity}")
56
+ level = 'warn'
57
+ end
58
+
59
+ @io.seek(0)
60
+ @io.truncate(0)
61
+ super
62
+ @logger.send(level, @io.string.chomp)
63
+ end
64
+ end
65
+ end
66
+ end
data/lib/fluent/log.rb CHANGED
@@ -67,8 +67,29 @@ module Fluent
67
67
  LEVEL_TEXT.map{|t| "#{LOG_EVENT_TAG_PREFIX}.#{t}" }
68
68
  end
69
69
 
70
+ # Create a unique path for each process.
71
+ #
72
+ # >>> per_process_path("C:/tmp/test.log", :worker, 1)
73
+ # C:/tmp/test-1.log
74
+ # >>> per_process_path("C:/tmp/test.log", :supervisor, 0)
75
+ # C:/tmp/test-supervisor-0.log
76
+ def self.per_process_path(path, process_type, worker_id)
77
+ path = Pathname(path)
78
+ ext = path.extname
79
+
80
+ if process_type == :supervisor
81
+ suffix = "-#{process_type}-0#{ext}" # "-0" for backword compatibility.
82
+ else
83
+ suffix = "-#{worker_id}#{ext}"
84
+ end
85
+ return path.sub_ext(suffix).to_s
86
+ end
87
+
70
88
  def initialize(logger, opts={})
71
- # overwrites logger.level= so that config reloading resets level of Fluentd::Log
89
+ # When ServerEngine changes the logger.level, the Fluentd logger level should also change.
90
+ # So overwrites logger.level= below.
91
+ # However, currently Fluentd doesn't use the ServerEngine's reloading feature,
92
+ # so maybe we don't need this overwriting anymore.
72
93
  orig_logger_level_setter = logger.class.public_instance_method(:level=).bind(logger)
73
94
  me = self
74
95
  # The original ruby logger sets the number as each log level like below.
@@ -92,6 +113,7 @@ module Fluent
92
113
  # So if serverengine's logger level is changed, fluentd's log level will be changed to that + 1.
93
114
  logger.define_singleton_method(:level=) {|level| orig_logger_level_setter.call(level); me.level = self.level + 1 }
94
115
 
116
+ @path = opts[:path]
95
117
  @logger = logger
96
118
  @out = logger.instance_variable_get(:@logdev)
97
119
  @level = logger.level + 1
@@ -102,7 +124,8 @@ module Fluent
102
124
  @time_format = nil
103
125
  @formatter = nil
104
126
 
105
- self.format = :text
127
+ self.format = opts.fetch(:format, :text)
128
+ self.time_format = opts[:time_format] if opts.key?(:time_format)
106
129
  enable_color out.tty?
107
130
  # TODO: This variable name is unclear so we should change to better name.
108
131
  @threads_exclude_events = []
@@ -154,8 +177,12 @@ module Fluent
154
177
 
155
178
  attr_reader :format
156
179
  attr_reader :time_format
157
- attr_accessor :log_event_enabled, :ignore_repeated_log_interval, :ignore_same_log_interval
180
+ attr_accessor :log_event_enabled, :ignore_repeated_log_interval, :ignore_same_log_interval, :suppress_repeated_stacktrace
158
181
  attr_accessor :out
182
+ # Strictly speaking, we should also change @logger.level when the setter of @level is called.
183
+ # Currently, we don't need to do it, since Fluentd::Log doesn't use ServerEngine::DaemonLogger.level.
184
+ # Since We overwrites logger.level= so that @logger.level is applied to @level,
185
+ # we need to find a good way to do this, otherwise we will end up in an endless loop.
159
186
  attr_accessor :level
160
187
  attr_accessor :optional_header, :optional_attrs
161
188
 
@@ -202,9 +229,12 @@ module Fluent
202
229
  @time_formatter = Strftime.new(@time_format) rescue nil
203
230
  end
204
231
 
232
+ def stdout?
233
+ @out == $stdout
234
+ end
235
+
205
236
  def reopen!
206
- # do nothing in @logger.reopen! because it's already reopened in Supervisor.load_config
207
- @logger.reopen! if @logger
237
+ @out.reopen(@path, "a") if @path && @path != "-"
208
238
  nil
209
239
  end
210
240
 
@@ -53,14 +53,12 @@ module Fluent
53
53
  end
54
54
 
55
55
  def configure(conf)
56
- if Fluent::Engine.supervisor_mode || (conf.respond_to?(:for_this_worker?) && conf.for_this_worker?)
57
- workers = if conf.target_worker_ids && !conf.target_worker_ids.empty?
58
- conf.target_worker_ids.size
59
- else
60
- 1
61
- end
62
- system_config_override(workers: workers)
56
+ raise ArgumentError, "BUG: type of conf must be Fluent::Config::Element, but #{conf.class} is passed." unless conf.is_a?(Fluent::Config::Element)
57
+
58
+ if conf.for_this_worker? || (Fluent::Engine.supervisor_mode && !conf.for_every_workers?)
59
+ system_config_override(workers: conf.target_worker_ids.size)
63
60
  end
61
+
64
62
  super(conf, system_config.strict_config_value)
65
63
  @_state ||= State.new(false, false, false, false, false, false, false, false, false)
66
64
  @_state.configure = true
@@ -139,13 +139,20 @@ module Fluent
139
139
  def resume
140
140
  stage = {}
141
141
  queue = []
142
+ exist_broken_file = false
142
143
 
143
144
  patterns = [@path]
144
145
  patterns.unshift @additional_resume_path if @additional_resume_path
145
146
  Dir.glob(escaped_patterns(patterns)) do |path|
146
147
  next unless File.file?(path)
147
148
 
148
- log.debug { "restoring buffer file: path = #{path}" }
149
+ if owner.respond_to?(:buffer_config) && owner.buffer_config&.flush_at_shutdown
150
+ # When `flush_at_shutdown` is `true`, the remaining chunk files during resuming are possibly broken
151
+ # since there may be a power failure or similar failure.
152
+ log.warn { "restoring buffer file: path = #{path}" }
153
+ else
154
+ log.debug { "restoring buffer file: path = #{path}" }
155
+ end
149
156
 
150
157
  m = new_metadata() # this metadata will be overwritten by resuming .meta file content
151
158
  # so it should not added into @metadata_list for now
@@ -158,6 +165,7 @@ module Fluent
158
165
  begin
159
166
  chunk = Fluent::Plugin::Buffer::FileChunk.new(m, path, mode, compress: @compress) # file chunk resumes contents of metadata
160
167
  rescue Fluent::Plugin::Buffer::FileChunk::FileChunkError => e
168
+ exist_broken_file = true
161
169
  handle_broken_files(path, mode, e)
162
170
  next
163
171
  end
@@ -182,6 +190,15 @@ module Fluent
182
190
 
183
191
  queue.sort_by!{ |chunk| chunk.modified_at }
184
192
 
193
+ # If one of the files is corrupted, other files may also be corrupted and be undetected.
194
+ # The time priods of each chunk are helpful to check the data.
195
+ if exist_broken_file
196
+ log.info "Since a broken chunk file was found, it is possible that other files remaining at the time of resuming were also broken. Here is the list of the files."
197
+ (stage.values + queue).each { |chunk|
198
+ log.info " #{chunk.path}:", :created_at => chunk.created_at, :modified_at => chunk.modified_at
199
+ }
200
+ end
201
+
185
202
  return stage, queue
186
203
  end
187
204
 
@@ -195,8 +212,20 @@ module Fluent
195
212
  end
196
213
 
197
214
  def handle_broken_files(path, mode, e)
198
- log.error "found broken chunk file during resume. Deleted corresponding files:", :path => path, :mode => mode, :err_msg => e.message
199
- # After support 'backup_dir' feature, these files are moved to backup_dir instead of unlink.
215
+ log.error "found broken chunk file during resume.", :path => path, :mode => mode, :err_msg => e.message
216
+ unique_id = Fluent::Plugin::Buffer::FileChunk.unique_id_from_path(path)
217
+ backup(unique_id) { |f|
218
+ File.open(path, 'rb') { |chunk|
219
+ chunk.set_encoding(Encoding::ASCII_8BIT)
220
+ chunk.sync = true
221
+ chunk.binmode
222
+ IO.copy_stream(chunk, f)
223
+ }
224
+ }
225
+ rescue => error
226
+ log.error "backup failed. Delete corresponding files.", :err_msg => error.message
227
+ ensure
228
+ log.warn "disable_chunk_backup is true. #{dump_unique_id_hex(unique_id)} chunk is thrown away." if @disable_chunk_backup
200
229
  File.unlink(path, path + '.meta') rescue nil
201
230
  end
202
231
 
@@ -160,13 +160,20 @@ module Fluent
160
160
  def resume
161
161
  stage = {}
162
162
  queue = []
163
+ exist_broken_file = false
163
164
 
164
165
  patterns = [@path]
165
166
  patterns.unshift @additional_resume_path if @additional_resume_path
166
167
  Dir.glob(escaped_patterns(patterns)) do |path|
167
168
  next unless File.file?(path)
168
169
 
169
- log.debug { "restoring buffer file: path = #{path}" }
170
+ if owner.respond_to?(:buffer_config) && owner.buffer_config&.flush_at_shutdown
171
+ # When `flush_at_shutdown` is `true`, the remaining chunk files during resuming are possibly broken
172
+ # since there may be a power failure or similar failure.
173
+ log.warn { "restoring buffer file: path = #{path}" }
174
+ else
175
+ log.debug { "restoring buffer file: path = #{path}" }
176
+ end
170
177
 
171
178
  m = new_metadata() # this metadata will be updated in FileSingleChunk.new
172
179
  mode = Fluent::Plugin::Buffer::FileSingleChunk.assume_chunk_state(path)
@@ -179,6 +186,7 @@ module Fluent
179
186
  chunk = Fluent::Plugin::Buffer::FileSingleChunk.new(m, path, mode, @key_in_path, compress: @compress)
180
187
  chunk.restore_size(@chunk_format) if @calc_num_records
181
188
  rescue Fluent::Plugin::Buffer::FileSingleChunk::FileChunkError => e
189
+ exist_broken_file = true
182
190
  handle_broken_files(path, mode, e)
183
191
  next
184
192
  end
@@ -193,6 +201,15 @@ module Fluent
193
201
 
194
202
  queue.sort_by!(&:modified_at)
195
203
 
204
+ # If one of the files is corrupted, other files may also be corrupted and be undetected.
205
+ # The time priods of each chunk are helpful to check the data.
206
+ if exist_broken_file
207
+ log.info "Since a broken chunk file was found, it is possible that other files remaining at the time of resuming were also broken. Here is the list of the files."
208
+ (stage.values + queue).each { |chunk|
209
+ log.info " #{chunk.path}:", :created_at => chunk.created_at, :modified_at => chunk.modified_at
210
+ }
211
+ end
212
+
196
213
  return stage, queue
197
214
  end
198
215
 
@@ -207,8 +224,20 @@ module Fluent
207
224
  end
208
225
 
209
226
  def handle_broken_files(path, mode, e)
210
- log.error "found broken chunk file during resume. Delete corresponding files:", path: path, mode: mode, err_msg: e.message
211
- # After support 'backup_dir' feature, these files are moved to backup_dir instead of unlink.
227
+ log.error "found broken chunk file during resume.", :path => path, :mode => mode, :err_msg => e.message
228
+ unique_id, _ = Fluent::Plugin::Buffer::FileSingleChunk.unique_id_and_key_from_path(path)
229
+ backup(unique_id) { |f|
230
+ File.open(path, 'rb') { |chunk|
231
+ chunk.set_encoding(Encoding::ASCII_8BIT)
232
+ chunk.sync = true
233
+ chunk.binmode
234
+ IO.copy_stream(chunk, f)
235
+ }
236
+ }
237
+ rescue => error
238
+ log.error "backup failed. Delete corresponding files.", :err_msg => error.message
239
+ ensure
240
+ log.warn "disable_chunk_backup is true. #{dump_unique_id_hex(unique_id)} chunk is thrown away." if @disable_chunk_backup
212
241
  File.unlink(path) rescue nil
213
242
  end
214
243
 
@@ -204,7 +204,7 @@ module Fluent
204
204
  end
205
205
  end
206
206
 
207
- # used only for queued v0.12 buffer path
207
+ # used only for queued v0.12 buffer path or broken files
208
208
  def self.unique_id_from_path(path)
209
209
  if /\.(b|q)([0-9a-f]+)\.[^\/]*\Z/n =~ path # //n switch means explicit 'ASCII-8BIT' pattern
210
210
  return $2.scan(/../).map{|x| x.to_i(16) }.pack('C*')
@@ -66,6 +66,9 @@ module Fluent
66
66
  desc 'Compress buffered data.'
67
67
  config_param :compress, :enum, list: [:text, :gzip], default: :text
68
68
 
69
+ desc 'If true, chunks are thrown away when unrecoverable error happens'
70
+ config_param :disable_chunk_backup, :bool, default: false
71
+
69
72
  Metadata = Struct.new(:timekey, :tag, :variables, :seq) do
70
73
  def initialize(timekey, tag, variables)
71
74
  super(timekey, tag, variables, 0)
@@ -903,6 +906,24 @@ module Fluent
903
906
  { 'buffer' => stats }
904
907
  end
905
908
 
909
+ def backup(chunk_unique_id)
910
+ unique_id = dump_unique_id_hex(chunk_unique_id)
911
+
912
+ if @disable_chunk_backup
913
+ log.warn "disable_chunk_backup is true. #{unique_id} chunk is not backed up."
914
+ return
915
+ end
916
+
917
+ safe_owner_id = owner.plugin_id.gsub(/[ "\/\\:;|*<>?]/, '_')
918
+ backup_base_dir = system_config.root_dir || DEFAULT_BACKUP_DIR
919
+ backup_file = File.join(backup_base_dir, 'backup', "worker#{fluentd_worker_id}", safe_owner_id, "#{unique_id}.log")
920
+ backup_dir = File.dirname(backup_file)
921
+
922
+ log.warn "bad chunk is moved to #{backup_file}"
923
+ FileUtils.mkdir_p(backup_dir, mode: system_config.dir_permission || Fluent::DEFAULT_DIR_PERMISSION) unless Dir.exist?(backup_dir)
924
+ File.open(backup_file, 'ab', system_config.file_permission || Fluent::DEFAULT_FILE_PERMISSION) { |f| yield f }
925
+ end
926
+
906
927
  private
907
928
 
908
929
  def optimistic_queued?(metadata = nil)
@@ -36,10 +36,16 @@ module Fluent::Plugin
36
36
  desc "The field name of the client's address."
37
37
  config_param :source_address_key, :string, default: nil
38
38
 
39
+ # Setting default to nil for backward compatibility
40
+ desc "The max bytes of message."
41
+ config_param :message_length_limit, :size, default: nil
42
+
39
43
  config_param :blocking_timeout, :time, default: 0.5
40
44
 
41
45
  desc 'The payload is read up to this character.'
42
46
  config_param :delimiter, :string, default: "\n" # syslog family add "\n" to each message and this seems only way to split messages in tcp stream
47
+ desc 'Check the remote connection is still available by sending a keepalive packet if this value is true.'
48
+ config_param :send_keepalive_packet, :bool, default: false
43
49
 
44
50
  # in_forward like host/network restriction
45
51
  config_section :security, required: false, multi: false do
@@ -100,8 +106,9 @@ module Fluent::Plugin
100
106
 
101
107
  log.info "listening tcp socket", bind: @bind, port: @port
102
108
  del_size = @delimiter.length
109
+ discard_till_next_delimiter = false
103
110
  if @_extract_enabled && @_extract_tag_key
104
- server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
111
+ server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key, send_keepalive_packet: @send_keepalive_packet) do |data, conn|
105
112
  unless check_client(conn)
106
113
  conn.close
107
114
  next
@@ -114,6 +121,16 @@ module Fluent::Plugin
114
121
  msg = buf[pos...i]
115
122
  pos = i + del_size
116
123
 
124
+ if discard_till_next_delimiter
125
+ discard_till_next_delimiter = false
126
+ next
127
+ end
128
+
129
+ if !@message_length_limit.nil? && @message_length_limit < msg.bytesize
130
+ log.info "The received data is larger than 'message_length_limit', dropped:", limit: @message_length_limit, size: msg.bytesize, head: msg[...32]
131
+ next
132
+ end
133
+
117
134
  @parser.parse(msg) do |time, record|
118
135
  unless time && record
119
136
  log.warn "pattern not matched", message: msg
@@ -129,9 +146,18 @@ module Fluent::Plugin
129
146
  end
130
147
  end
131
148
  buf.slice!(0, pos) if pos > 0
149
+ # If the buffer size exceeds the limit here, it means that the next message will definitely exceed the limit.
150
+ # So we should clear the buffer here. Otherwise, it will keep storing useless data until the next delimiter comes.
151
+ if !@message_length_limit.nil? && @message_length_limit < buf.bytesize
152
+ log.info "The buffer size exceeds 'message_length_limit', cleared:", limit: @message_length_limit, size: buf.bytesize, head: buf[...32]
153
+ buf.clear
154
+ # We should discard the subsequent data until the next delimiter comes.
155
+ discard_till_next_delimiter = true
156
+ next
157
+ end
132
158
  end
133
159
  else
134
- server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
160
+ server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key, send_keepalive_packet: @send_keepalive_packet) do |data, conn|
135
161
  unless check_client(conn)
136
162
  conn.close
137
163
  next
@@ -145,6 +171,16 @@ module Fluent::Plugin
145
171
  msg = buf[pos...i]
146
172
  pos = i + del_size
147
173
 
174
+ if discard_till_next_delimiter
175
+ discard_till_next_delimiter = false
176
+ next
177
+ end
178
+
179
+ if !@message_length_limit.nil? && @message_length_limit < msg.bytesize
180
+ log.info "The received data is larger than 'message_length_limit', dropped:", limit: @message_length_limit, size: msg.bytesize, head: msg[...32]
181
+ next
182
+ end
183
+
148
184
  @parser.parse(msg) do |time, record|
149
185
  unless time && record
150
186
  log.warn "pattern not matched", message: msg
@@ -159,6 +195,15 @@ module Fluent::Plugin
159
195
  end
160
196
  router.emit_stream(@tag, es)
161
197
  buf.slice!(0, pos) if pos > 0
198
+ # If the buffer size exceeds the limit here, it means that the next message will definitely exceed the limit.
199
+ # So we should clear the buffer here. Otherwise, it will keep storing useless data until the next delimiter comes.
200
+ if !@message_length_limit.nil? && @message_length_limit < buf.bytesize
201
+ log.info "The buffer size exceeds 'message_length_limit', cleared:", limit: @message_length_limit, size: buf.bytesize, head: buf[...32]
202
+ buf.clear
203
+ # We should discard the subsequent data until the next delimiter comes.
204
+ discard_till_next_delimiter = true
205
+ next
206
+ end
162
207
  end
163
208
  end
164
209
  end
@@ -59,7 +59,13 @@ module Fluent::Plugin
59
59
  @ack_waitings = new_list
60
60
  end
61
61
 
62
- readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
62
+ begin
63
+ readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
64
+ rescue IOError
65
+ @log.info "connection closed while waiting for readable sockets"
66
+ readable_sockets = nil
67
+ end
68
+
63
69
  if readable_sockets
64
70
  readable_sockets.each do |sock|
65
71
  results << read_ack_from_sock(sock)
@@ -109,13 +115,22 @@ module Fluent::Plugin
109
115
  raw_data = sock.instance_of?(Fluent::PluginHelper::Socket::WrappedSocket::TLS) ? sock.readpartial(@read_length) : sock.recv(@read_length)
110
116
  rescue Errno::ECONNRESET, EOFError # ECONNRESET for #recv, #EOFError for #readpartial
111
117
  raw_data = ''
118
+ rescue IOError
119
+ @log.info "socket closed while receiving ack response"
120
+ return nil, Result::FAILED
112
121
  end
113
122
 
114
123
  info = find(sock)
115
124
 
116
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
117
- # If this happens we assume the data wasn't delivered and retry it.
118
- if raw_data.empty?
125
+ if info.nil?
126
+ # The info can be deleted by another thread during `sock.recv()` and `find()`.
127
+ # This is OK since another thread has completed to process the ack, so we can skip this.
128
+ # Note: exclusion mechanism about `collect_response()` may need to be considered.
129
+ @log.debug "could not find the ack info. this ack may be processed by another thread."
130
+ return nil, Result::FAILED
131
+ elsif raw_data.empty?
132
+ # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
133
+ # If this happens we assume the data wasn't delivered and retry it.
119
134
  @log.warn 'destination node closed the connection. regard it as unavailable.', host: info.node.host, port: info.node.port
120
135
  # info.node.disable!
121
136
  return info, Result::FAILED
@@ -521,8 +521,8 @@ module Fluent::Plugin
521
521
  when AckHandler::Result::SUCCESS
522
522
  commit_write(chunk_id)
523
523
  when AckHandler::Result::FAILED
524
- node.disable!
525
- rollback_write(chunk_id, update_retry: false)
524
+ node&.disable!
525
+ rollback_write(chunk_id, update_retry: false) if chunk_id
526
526
  when AckHandler::Result::CHUNKID_UNMATCHED
527
527
  rollback_write(chunk_id, update_retry: false)
528
528
  else
@@ -64,31 +64,29 @@ module Fluent::Plugin
64
64
  end
65
65
 
66
66
  def multi_workers_ready?
67
- ### TODO: add hack to synchronize for multi workers
68
67
  true
69
68
  end
70
69
 
71
70
  def write(chunk)
72
71
  path_without_suffix = extract_placeholders(@path_without_suffix, chunk)
73
- path = generate_path(path_without_suffix)
74
- FileUtils.mkdir_p File.dirname(path), mode: @dir_perm
75
-
76
- case @compress
77
- when :text
78
- File.open(path, "ab", @file_perm) {|f|
79
- f.flock(File::LOCK_EX)
80
- chunk.write_to(f)
81
- }
82
- when :gzip
83
- File.open(path, "ab", @file_perm) {|f|
84
- f.flock(File::LOCK_EX)
85
- gz = Zlib::GzipWriter.new(f)
86
- chunk.write_to(gz)
87
- gz.close
88
- }
72
+ generate_path(path_without_suffix) do |path|
73
+ FileUtils.mkdir_p File.dirname(path), mode: @dir_perm
74
+
75
+ case @compress
76
+ when :text
77
+ File.open(path, "ab", @file_perm) {|f|
78
+ f.flock(File::LOCK_EX)
79
+ chunk.write_to(f)
80
+ }
81
+ when :gzip
82
+ File.open(path, "ab", @file_perm) {|f|
83
+ f.flock(File::LOCK_EX)
84
+ gz = Zlib::GzipWriter.new(f)
85
+ chunk.write_to(gz)
86
+ gz.close
87
+ }
88
+ end
89
89
  end
90
-
91
- path
92
90
  end
93
91
 
94
92
  private
@@ -117,15 +115,34 @@ module Fluent::Plugin
117
115
 
118
116
  def generate_path(path_without_suffix)
119
117
  if @append
120
- "#{path_without_suffix}#{@suffix}"
121
- else
118
+ path = "#{path_without_suffix}#{@suffix}"
119
+ synchronize_path(path) do
120
+ yield path
121
+ end
122
+ return path
123
+ end
124
+
125
+ begin
122
126
  i = 0
123
127
  loop do
124
128
  path = "#{path_without_suffix}.#{i}#{@suffix}"
125
- return path unless File.exist?(path)
129
+ break unless File.exist?(path)
126
130
  i += 1
127
131
  end
132
+ synchronize_path(path) do
133
+ # If multiple processes or threads select the same path and another
134
+ # one entered this locking block first, the file should already
135
+ # exist and this one should retry to find new path.
136
+ raise FileAlreadyExist if File.exist?(path)
137
+ yield path
138
+ end
139
+ rescue FileAlreadyExist
140
+ retry
128
141
  end
142
+ path
143
+ end
144
+
145
+ class FileAlreadyExist < StandardError
129
146
  end
130
147
  end
131
148
  end