fluentd 1.15.2-x64-mingw-ucrt → 1.16.0-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/linux-test.yaml +2 -2
  3. data/.github/workflows/macos-test.yaml +2 -2
  4. data/.github/workflows/windows-test.yaml +2 -2
  5. data/CHANGELOG.md +96 -0
  6. data/MAINTAINERS.md +2 -0
  7. data/README.md +0 -1
  8. data/fluentd.gemspec +2 -2
  9. data/lib/fluent/command/fluentd.rb +55 -64
  10. data/lib/fluent/config/yaml_parser/loader.rb +18 -1
  11. data/lib/fluent/daemon.rb +2 -4
  12. data/lib/fluent/file_wrapper.rb +137 -0
  13. data/lib/fluent/log/console_adapter.rb +66 -0
  14. data/lib/fluent/log.rb +35 -5
  15. data/lib/fluent/oj_options.rb +1 -2
  16. data/lib/fluent/plugin/base.rb +5 -7
  17. data/lib/fluent/plugin/buf_file.rb +32 -3
  18. data/lib/fluent/plugin/buf_file_single.rb +32 -3
  19. data/lib/fluent/plugin/buffer/file_chunk.rb +1 -1
  20. data/lib/fluent/plugin/buffer.rb +21 -0
  21. data/lib/fluent/plugin/in_tail.rb +1 -6
  22. data/lib/fluent/plugin/in_tcp.rb +4 -2
  23. data/lib/fluent/plugin/out_file.rb +0 -4
  24. data/lib/fluent/plugin/out_forward/ack_handler.rb +19 -4
  25. data/lib/fluent/plugin/out_forward.rb +2 -2
  26. data/lib/fluent/plugin/out_secondary_file.rb +39 -22
  27. data/lib/fluent/plugin/output.rb +49 -12
  28. data/lib/fluent/plugin_helper/http_server/server.rb +2 -1
  29. data/lib/fluent/supervisor.rb +157 -232
  30. data/lib/fluent/test/driver/base.rb +11 -5
  31. data/lib/fluent/test/driver/filter.rb +4 -0
  32. data/lib/fluent/test/startup_shutdown.rb +6 -8
  33. data/lib/fluent/version.rb +1 -1
  34. data/test/command/test_ctl.rb +1 -1
  35. data/test/command/test_fluentd.rb +168 -22
  36. data/test/command/test_plugin_config_formatter.rb +0 -1
  37. data/test/compat/test_parser.rb +5 -5
  38. data/test/config/test_system_config.rb +0 -8
  39. data/test/log/test_console_adapter.rb +110 -0
  40. data/test/plugin/out_forward/test_ack_handler.rb +39 -0
  41. data/test/plugin/test_base.rb +98 -0
  42. data/test/plugin/test_buf_file.rb +62 -23
  43. data/test/plugin/test_buf_file_single.rb +65 -0
  44. data/test/plugin/test_in_http.rb +2 -3
  45. data/test/plugin/test_in_monitor_agent.rb +2 -3
  46. data/test/plugin/test_in_tail.rb +105 -103
  47. data/test/plugin/test_in_tcp.rb +15 -0
  48. data/test/plugin/test_out_file.rb +3 -2
  49. data/test/plugin/test_out_forward.rb +14 -18
  50. data/test/plugin/test_out_http.rb +1 -0
  51. data/test/plugin/test_output.rb +269 -0
  52. data/test/plugin/test_parser_regexp.rb +1 -6
  53. data/test/plugin_helper/test_http_server_helper.rb +1 -1
  54. data/test/plugin_helper/test_server.rb +10 -5
  55. data/test/test_config.rb +57 -21
  56. data/test/{plugin/test_file_wrapper.rb → test_file_wrapper.rb} +2 -2
  57. data/test/test_formatter.rb +23 -20
  58. data/test/test_log.rb +85 -40
  59. data/test/test_supervisor.rb +300 -283
  60. metadata +15 -24
  61. data/.drone.yml +0 -35
  62. data/.github/workflows/issue-auto-closer.yml +0 -12
  63. data/.github/workflows/stale-actions.yml +0 -22
  64. data/.gitlab-ci.yml +0 -103
  65. data/lib/fluent/plugin/file_wrapper.rb +0 -131
  66. data/test/test_logger_initializer.rb +0 -46
@@ -0,0 +1,137 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ unless Fluent.windows?
18
+ Fluent::FileWrapper = File
19
+ else
20
+ require 'fluent/win32api'
21
+
22
+ module Fluent
23
+ module FileWrapper
24
+ def self.open(path, mode='r')
25
+ io = WindowsFile.new(path, mode).io
26
+ if block_given?
27
+ v = yield io
28
+ io.close
29
+ v
30
+ else
31
+ io
32
+ end
33
+ end
34
+
35
+ def self.stat(path)
36
+ f = WindowsFile.new(path)
37
+ s = f.stat
38
+ f.close
39
+ s
40
+ end
41
+ end
42
+
43
+ class WindowsFile
44
+ include File::Constants
45
+
46
+ attr_reader :io
47
+
48
+ INVALID_HANDLE_VALUE = -1
49
+
50
+ def initialize(path, mode_enc='r')
51
+ @path = path
52
+ mode, enc = mode_enc.split(":", 2)
53
+ @io = File.open(path, mode2flags(mode))
54
+ @io.set_encoding(enc) if enc
55
+ @file_handle = Win32API._get_osfhandle(@io.to_i)
56
+ @io.instance_variable_set(:@file_index, self.ino)
57
+ def @io.ino
58
+ @file_index
59
+ end
60
+ end
61
+
62
+ def close
63
+ @io.close
64
+ @file_handle = INVALID_HANDLE_VALUE
65
+ end
66
+
67
+ # To keep backward compatibility, we continue to use GetFileInformationByHandle()
68
+ # to get file id.
69
+ # Note that Ruby's File.stat uses GetFileInformationByHandleEx() with FileIdInfo
70
+ # and returned value is different with above one, former one is 64 bit while
71
+ # later one is 128bit.
72
+ def ino
73
+ by_handle_file_information = '\0'*(4+8+8+8+4+4+4+4+4+4) #72bytes
74
+
75
+ unless Win32API.GetFileInformationByHandle(@file_handle, by_handle_file_information)
76
+ return 0
77
+ end
78
+
79
+ by_handle_file_information.unpack("I11Q1")[11] # fileindex
80
+ end
81
+
82
+ def stat
83
+ raise Errno::ENOENT if delete_pending
84
+ s = File.stat(@path)
85
+ s.instance_variable_set :@ino, self.ino
86
+ def s.ino; @ino; end
87
+ s
88
+ end
89
+
90
+ private
91
+
92
+ def mode2flags(mode)
93
+ # Always inject File::Constants::SHARE_DELETE
94
+ # https://github.com/fluent/fluentd/pull/3585#issuecomment-1101502617
95
+ # To enable SHARE_DELETE, BINARY is also required.
96
+ # https://bugs.ruby-lang.org/issues/11218
97
+ # https://github.com/ruby/ruby/blob/d6684f063bc53e3cab025bd39526eca3b480b5e7/win32/win32.c#L6332-L6345
98
+ flags = BINARY | SHARE_DELETE
99
+ case mode.delete("b")
100
+ when "r"
101
+ flags |= RDONLY
102
+ when "r+"
103
+ flags |= RDWR
104
+ when "w"
105
+ flags |= WRONLY | CREAT | TRUNC
106
+ when "w+"
107
+ flags |= RDWR | CREAT | TRUNC
108
+ when "a"
109
+ flags |= WRONLY | CREAT | APPEND
110
+ when "a+"
111
+ flags |= RDWR | CREAT | APPEND
112
+ else
113
+ raise Errno::EINVAL.new("Unsupported mode by Fluent::FileWrapper: #{mode}")
114
+ end
115
+ end
116
+
117
+ # DeletePending is a Windows-specific file state that roughly means
118
+ # "this file is queued for deletion, so close any open handlers"
119
+ #
120
+ # This flag can be retrieved via GetFileInformationByHandleEx().
121
+ #
122
+ # https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-getfileinformationbyhandleex
123
+ #
124
+ def delete_pending
125
+ file_standard_info = 0x01
126
+ bufsize = 1024
127
+ buf = '\0' * bufsize
128
+
129
+ unless Win32API.GetFileInformationByHandleEx(@file_handle, file_standard_info, buf, bufsize)
130
+ return false
131
+ end
132
+
133
+ return buf.unpack("QQICC")[3] != 0
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,66 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'console/terminal/logger'
18
+
19
+ module Fluent
20
+ class Log
21
+ # Async gem which is used by http_server helper switched logger mechanism to
22
+ # Console gem which isn't complatible with Ruby's standard Logger (since
23
+ # v1.17). This class adapts it to Fluentd's logger mechanism.
24
+ class ConsoleAdapter < Console::Terminal::Logger
25
+ def self.wrap(logger)
26
+ _, level = Console::Logger::LEVELS.find { |key, value|
27
+ if logger.level <= 0
28
+ key == :debug
29
+ else
30
+ value == logger.level - 1
31
+ end
32
+ }
33
+ Console::Logger.new(ConsoleAdapter.new(logger), level: level)
34
+ end
35
+
36
+ def initialize(logger)
37
+ @logger = logger
38
+ # When `verbose` is `true`, following items will be added as a prefix or
39
+ # suffix of the subject:
40
+ # * Severity
41
+ # * Object ID
42
+ # * PID
43
+ # * Time
44
+ # Severity and Time are added by Fluentd::Log too so they are redundant.
45
+ # PID is the worker's PID so it's also redundant.
46
+ # Object ID will be too verbose in usual cases.
47
+ # So set it as `false` here to suppress redundant items.
48
+ super(StringIO.new, verbose: false)
49
+ end
50
+
51
+ def call(subject = nil, *arguments, name: nil, severity: 'info', **options, &block)
52
+ if LEVEL_TEXT.include?(severity.to_s)
53
+ level = severity
54
+ else
55
+ @logger.warn("Unknown severity: #{severity}")
56
+ level = 'warn'
57
+ end
58
+
59
+ @io.seek(0)
60
+ @io.truncate(0)
61
+ super
62
+ @logger.send(level, @io.string.chomp)
63
+ end
64
+ end
65
+ end
66
+ end
data/lib/fluent/log.rb CHANGED
@@ -67,8 +67,29 @@ module Fluent
67
67
  LEVEL_TEXT.map{|t| "#{LOG_EVENT_TAG_PREFIX}.#{t}" }
68
68
  end
69
69
 
70
+ # Create a unique path for each process.
71
+ #
72
+ # >>> per_process_path("C:/tmp/test.log", :worker, 1)
73
+ # C:/tmp/test-1.log
74
+ # >>> per_process_path("C:/tmp/test.log", :supervisor, 0)
75
+ # C:/tmp/test-supervisor-0.log
76
+ def self.per_process_path(path, process_type, worker_id)
77
+ path = Pathname(path)
78
+ ext = path.extname
79
+
80
+ if process_type == :supervisor
81
+ suffix = "-#{process_type}-0#{ext}" # "-0" for backword compatibility.
82
+ else
83
+ suffix = "-#{worker_id}#{ext}"
84
+ end
85
+ return path.sub_ext(suffix).to_s
86
+ end
87
+
70
88
  def initialize(logger, opts={})
71
- # overwrites logger.level= so that config reloading resets level of Fluentd::Log
89
+ # When ServerEngine changes the logger.level, the Fluentd logger level should also change.
90
+ # So overwrites logger.level= below.
91
+ # However, currently Fluentd doesn't use the ServerEngine's reloading feature,
92
+ # so maybe we don't need this overwriting anymore.
72
93
  orig_logger_level_setter = logger.class.public_instance_method(:level=).bind(logger)
73
94
  me = self
74
95
  # The original ruby logger sets the number as each log level like below.
@@ -92,6 +113,7 @@ module Fluent
92
113
  # So if serverengine's logger level is changed, fluentd's log level will be changed to that + 1.
93
114
  logger.define_singleton_method(:level=) {|level| orig_logger_level_setter.call(level); me.level = self.level + 1 }
94
115
 
116
+ @path = opts[:path]
95
117
  @logger = logger
96
118
  @out = logger.instance_variable_get(:@logdev)
97
119
  @level = logger.level + 1
@@ -102,7 +124,8 @@ module Fluent
102
124
  @time_format = nil
103
125
  @formatter = nil
104
126
 
105
- self.format = :text
127
+ self.format = opts.fetch(:format, :text)
128
+ self.time_format = opts[:time_format] if opts.key?(:time_format)
106
129
  enable_color out.tty?
107
130
  # TODO: This variable name is unclear so we should change to better name.
108
131
  @threads_exclude_events = []
@@ -154,8 +177,12 @@ module Fluent
154
177
 
155
178
  attr_reader :format
156
179
  attr_reader :time_format
157
- attr_accessor :log_event_enabled, :ignore_repeated_log_interval, :ignore_same_log_interval
180
+ attr_accessor :log_event_enabled, :ignore_repeated_log_interval, :ignore_same_log_interval, :suppress_repeated_stacktrace
158
181
  attr_accessor :out
182
+ # Strictly speaking, we should also change @logger.level when the setter of @level is called.
183
+ # Currently, we don't need to do it, since Fluentd::Log doesn't use ServerEngine::DaemonLogger.level.
184
+ # Since We overwrites logger.level= so that @logger.level is applied to @level,
185
+ # we need to find a good way to do this, otherwise we will end up in an endless loop.
159
186
  attr_accessor :level
160
187
  attr_accessor :optional_header, :optional_attrs
161
188
 
@@ -202,9 +229,12 @@ module Fluent
202
229
  @time_formatter = Strftime.new(@time_format) rescue nil
203
230
  end
204
231
 
232
+ def stdout?
233
+ @out == $stdout
234
+ end
235
+
205
236
  def reopen!
206
- # do nothing in @logger.reopen! because it's already reopened in Supervisor.load_config
207
- @logger.reopen! if @logger
237
+ @out.reopen(@path, "a") if @path && @path != "-"
208
238
  nil
209
239
  end
210
240
 
@@ -4,14 +4,13 @@ module Fluent
4
4
  class OjOptions
5
5
  OPTIONS = {
6
6
  'bigdecimal_load': :symbol,
7
- 'max_nesting': :integer,
8
7
  'mode': :symbol,
9
8
  'use_to_json': :bool
10
9
  }
11
10
 
12
11
  ALLOWED_VALUES = {
13
12
  'bigdecimal_load': %i[bigdecimal float auto],
14
- 'mode': %i[strict null compat json rails object custom]
13
+ 'mode': %i[strict null compat json rails custom]
15
14
  }
16
15
 
17
16
  DEFAULTS = {
@@ -53,14 +53,12 @@ module Fluent
53
53
  end
54
54
 
55
55
  def configure(conf)
56
- if Fluent::Engine.supervisor_mode || (conf.respond_to?(:for_this_worker?) && conf.for_this_worker?)
57
- workers = if conf.target_worker_ids && !conf.target_worker_ids.empty?
58
- conf.target_worker_ids.size
59
- else
60
- 1
61
- end
62
- system_config_override(workers: workers)
56
+ raise ArgumentError, "BUG: type of conf must be Fluent::Config::Element, but #{conf.class} is passed." unless conf.is_a?(Fluent::Config::Element)
57
+
58
+ if conf.for_this_worker? || (Fluent::Engine.supervisor_mode && !conf.for_every_workers?)
59
+ system_config_override(workers: conf.target_worker_ids.size)
63
60
  end
61
+
64
62
  super(conf, system_config.strict_config_value)
65
63
  @_state ||= State.new(false, false, false, false, false, false, false, false, false)
66
64
  @_state.configure = true
@@ -139,13 +139,20 @@ module Fluent
139
139
  def resume
140
140
  stage = {}
141
141
  queue = []
142
+ exist_broken_file = false
142
143
 
143
144
  patterns = [@path]
144
145
  patterns.unshift @additional_resume_path if @additional_resume_path
145
146
  Dir.glob(escaped_patterns(patterns)) do |path|
146
147
  next unless File.file?(path)
147
148
 
148
- log.debug { "restoring buffer file: path = #{path}" }
149
+ if owner.respond_to?(:buffer_config) && owner.buffer_config&.flush_at_shutdown
150
+ # When `flush_at_shutdown` is `true`, the remaining chunk files during resuming are possibly broken
151
+ # since there may be a power failure or similar failure.
152
+ log.warn { "restoring buffer file: path = #{path}" }
153
+ else
154
+ log.debug { "restoring buffer file: path = #{path}" }
155
+ end
149
156
 
150
157
  m = new_metadata() # this metadata will be overwritten by resuming .meta file content
151
158
  # so it should not added into @metadata_list for now
@@ -158,6 +165,7 @@ module Fluent
158
165
  begin
159
166
  chunk = Fluent::Plugin::Buffer::FileChunk.new(m, path, mode, compress: @compress) # file chunk resumes contents of metadata
160
167
  rescue Fluent::Plugin::Buffer::FileChunk::FileChunkError => e
168
+ exist_broken_file = true
161
169
  handle_broken_files(path, mode, e)
162
170
  next
163
171
  end
@@ -182,6 +190,15 @@ module Fluent
182
190
 
183
191
  queue.sort_by!{ |chunk| chunk.modified_at }
184
192
 
193
+ # If one of the files is corrupted, other files may also be corrupted and be undetected.
194
+ # The time priods of each chunk are helpful to check the data.
195
+ if exist_broken_file
196
+ log.info "Since a broken chunk file was found, it is possible that other files remaining at the time of resuming were also broken. Here is the list of the files."
197
+ (stage.values + queue).each { |chunk|
198
+ log.info " #{chunk.path}:", :created_at => chunk.created_at, :modified_at => chunk.modified_at
199
+ }
200
+ end
201
+
185
202
  return stage, queue
186
203
  end
187
204
 
@@ -195,8 +212,20 @@ module Fluent
195
212
  end
196
213
 
197
214
  def handle_broken_files(path, mode, e)
198
- log.error "found broken chunk file during resume. Deleted corresponding files:", :path => path, :mode => mode, :err_msg => e.message
199
- # After support 'backup_dir' feature, these files are moved to backup_dir instead of unlink.
215
+ log.error "found broken chunk file during resume.", :path => path, :mode => mode, :err_msg => e.message
216
+ unique_id = Fluent::Plugin::Buffer::FileChunk.unique_id_from_path(path)
217
+ backup(unique_id) { |f|
218
+ File.open(path, 'rb') { |chunk|
219
+ chunk.set_encoding(Encoding::ASCII_8BIT)
220
+ chunk.sync = true
221
+ chunk.binmode
222
+ IO.copy_stream(chunk, f)
223
+ }
224
+ }
225
+ rescue => error
226
+ log.error "backup failed. Delete corresponding files.", :err_msg => error.message
227
+ ensure
228
+ log.warn "disable_chunk_backup is true. #{dump_unique_id_hex(unique_id)} chunk is thrown away." if @disable_chunk_backup
200
229
  File.unlink(path, path + '.meta') rescue nil
201
230
  end
202
231
 
@@ -160,13 +160,20 @@ module Fluent
160
160
  def resume
161
161
  stage = {}
162
162
  queue = []
163
+ exist_broken_file = false
163
164
 
164
165
  patterns = [@path]
165
166
  patterns.unshift @additional_resume_path if @additional_resume_path
166
167
  Dir.glob(escaped_patterns(patterns)) do |path|
167
168
  next unless File.file?(path)
168
169
 
169
- log.debug { "restoring buffer file: path = #{path}" }
170
+ if owner.respond_to?(:buffer_config) && owner.buffer_config&.flush_at_shutdown
171
+ # When `flush_at_shutdown` is `true`, the remaining chunk files during resuming are possibly broken
172
+ # since there may be a power failure or similar failure.
173
+ log.warn { "restoring buffer file: path = #{path}" }
174
+ else
175
+ log.debug { "restoring buffer file: path = #{path}" }
176
+ end
170
177
 
171
178
  m = new_metadata() # this metadata will be updated in FileSingleChunk.new
172
179
  mode = Fluent::Plugin::Buffer::FileSingleChunk.assume_chunk_state(path)
@@ -179,6 +186,7 @@ module Fluent
179
186
  chunk = Fluent::Plugin::Buffer::FileSingleChunk.new(m, path, mode, @key_in_path, compress: @compress)
180
187
  chunk.restore_size(@chunk_format) if @calc_num_records
181
188
  rescue Fluent::Plugin::Buffer::FileSingleChunk::FileChunkError => e
189
+ exist_broken_file = true
182
190
  handle_broken_files(path, mode, e)
183
191
  next
184
192
  end
@@ -193,6 +201,15 @@ module Fluent
193
201
 
194
202
  queue.sort_by!(&:modified_at)
195
203
 
204
+ # If one of the files is corrupted, other files may also be corrupted and be undetected.
205
+ # The time priods of each chunk are helpful to check the data.
206
+ if exist_broken_file
207
+ log.info "Since a broken chunk file was found, it is possible that other files remaining at the time of resuming were also broken. Here is the list of the files."
208
+ (stage.values + queue).each { |chunk|
209
+ log.info " #{chunk.path}:", :created_at => chunk.created_at, :modified_at => chunk.modified_at
210
+ }
211
+ end
212
+
196
213
  return stage, queue
197
214
  end
198
215
 
@@ -207,8 +224,20 @@ module Fluent
207
224
  end
208
225
 
209
226
  def handle_broken_files(path, mode, e)
210
- log.error "found broken chunk file during resume. Delete corresponding files:", path: path, mode: mode, err_msg: e.message
211
- # After support 'backup_dir' feature, these files are moved to backup_dir instead of unlink.
227
+ log.error "found broken chunk file during resume.", :path => path, :mode => mode, :err_msg => e.message
228
+ unique_id, _ = Fluent::Plugin::Buffer::FileSingleChunk.unique_id_and_key_from_path(path)
229
+ backup(unique_id) { |f|
230
+ File.open(path, 'rb') { |chunk|
231
+ chunk.set_encoding(Encoding::ASCII_8BIT)
232
+ chunk.sync = true
233
+ chunk.binmode
234
+ IO.copy_stream(chunk, f)
235
+ }
236
+ }
237
+ rescue => error
238
+ log.error "backup failed. Delete corresponding files.", :err_msg => error.message
239
+ ensure
240
+ log.warn "disable_chunk_backup is true. #{dump_unique_id_hex(unique_id)} chunk is thrown away." if @disable_chunk_backup
212
241
  File.unlink(path) rescue nil
213
242
  end
214
243
 
@@ -204,7 +204,7 @@ module Fluent
204
204
  end
205
205
  end
206
206
 
207
- # used only for queued v0.12 buffer path
207
+ # used only for queued v0.12 buffer path or broken files
208
208
  def self.unique_id_from_path(path)
209
209
  if /\.(b|q)([0-9a-f]+)\.[^\/]*\Z/n =~ path # //n switch means explicit 'ASCII-8BIT' pattern
210
210
  return $2.scan(/../).map{|x| x.to_i(16) }.pack('C*')
@@ -66,6 +66,9 @@ module Fluent
66
66
  desc 'Compress buffered data.'
67
67
  config_param :compress, :enum, list: [:text, :gzip], default: :text
68
68
 
69
+ desc 'If true, chunks are thrown away when unrecoverable error happens'
70
+ config_param :disable_chunk_backup, :bool, default: false
71
+
69
72
  Metadata = Struct.new(:timekey, :tag, :variables, :seq) do
70
73
  def initialize(timekey, tag, variables)
71
74
  super(timekey, tag, variables, 0)
@@ -903,6 +906,24 @@ module Fluent
903
906
  { 'buffer' => stats }
904
907
  end
905
908
 
909
+ def backup(chunk_unique_id)
910
+ unique_id = dump_unique_id_hex(chunk_unique_id)
911
+
912
+ if @disable_chunk_backup
913
+ log.warn "disable_chunk_backup is true. #{unique_id} chunk is not backed up."
914
+ return
915
+ end
916
+
917
+ safe_owner_id = owner.plugin_id.gsub(/[ "\/\\:;|*<>?]/, '_')
918
+ backup_base_dir = system_config.root_dir || DEFAULT_BACKUP_DIR
919
+ backup_file = File.join(backup_base_dir, 'backup', "worker#{fluentd_worker_id}", safe_owner_id, "#{unique_id}.log")
920
+ backup_dir = File.dirname(backup_file)
921
+
922
+ log.warn "bad chunk is moved to #{backup_file}"
923
+ FileUtils.mkdir_p(backup_dir, mode: system_config.dir_permission || Fluent::DEFAULT_DIR_PERMISSION) unless Dir.exist?(backup_dir)
924
+ File.open(backup_file, 'ab', system_config.file_permission || Fluent::DEFAULT_FILE_PERMISSION) { |f| yield f }
925
+ end
926
+
906
927
  private
907
928
 
908
929
  def optimistic_queued?(metadata = nil)
@@ -25,12 +25,7 @@ require 'fluent/variable_store'
25
25
  require 'fluent/capability'
26
26
  require 'fluent/plugin/in_tail/position_file'
27
27
  require 'fluent/plugin/in_tail/group_watch'
28
-
29
- if Fluent.windows?
30
- require_relative 'file_wrapper'
31
- else
32
- Fluent::FileWrapper = File
33
- end
28
+ require 'fluent/file_wrapper'
34
29
 
35
30
  module Fluent::Plugin
36
31
  class TailInput < Fluent::Plugin::Input
@@ -40,6 +40,8 @@ module Fluent::Plugin
40
40
 
41
41
  desc 'The payload is read up to this character.'
42
42
  config_param :delimiter, :string, default: "\n" # syslog family add "\n" to each message and this seems only way to split messages in tcp stream
43
+ desc 'Check the remote connection is still available by sending a keepalive packet if this value is true.'
44
+ config_param :send_keepalive_packet, :bool, default: false
43
45
 
44
46
  # in_forward like host/network restriction
45
47
  config_section :security, required: false, multi: false do
@@ -101,7 +103,7 @@ module Fluent::Plugin
101
103
  log.info "listening tcp socket", bind: @bind, port: @port
102
104
  del_size = @delimiter.length
103
105
  if @_extract_enabled && @_extract_tag_key
104
- server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
106
+ server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key, send_keepalive_packet: @send_keepalive_packet) do |data, conn|
105
107
  unless check_client(conn)
106
108
  conn.close
107
109
  next
@@ -131,7 +133,7 @@ module Fluent::Plugin
131
133
  buf.slice!(0, pos) if pos > 0
132
134
  end
133
135
  else
134
- server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
136
+ server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key, send_keepalive_packet: @send_keepalive_packet) do |data, conn|
135
137
  unless check_client(conn)
136
138
  conn.close
137
139
  next
@@ -188,10 +188,6 @@ module Fluent::Plugin
188
188
  condition = Gem::Dependency.new('', [">= 2.7.0", "< 3.1.0"])
189
189
  @need_ruby_on_macos_workaround = true if condition.match?('', RUBY_VERSION)
190
190
  end
191
-
192
- if @need_lock && @append && @fluentd_lock_dir.nil?
193
- raise Fluent::InvalidLockDirectory, "must set FLUENTD_LOCK_DIR on multi-worker append mode"
194
- end
195
191
  end
196
192
 
197
193
  def multi_workers_ready?
@@ -59,7 +59,13 @@ module Fluent::Plugin
59
59
  @ack_waitings = new_list
60
60
  end
61
61
 
62
- readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
62
+ begin
63
+ readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
64
+ rescue IOError
65
+ @log.info "connection closed while waiting for readable sockets"
66
+ readable_sockets = nil
67
+ end
68
+
63
69
  if readable_sockets
64
70
  readable_sockets.each do |sock|
65
71
  results << read_ack_from_sock(sock)
@@ -109,13 +115,22 @@ module Fluent::Plugin
109
115
  raw_data = sock.instance_of?(Fluent::PluginHelper::Socket::WrappedSocket::TLS) ? sock.readpartial(@read_length) : sock.recv(@read_length)
110
116
  rescue Errno::ECONNRESET, EOFError # ECONNRESET for #recv, #EOFError for #readpartial
111
117
  raw_data = ''
118
+ rescue IOError
119
+ @log.info "socket closed while receiving ack response"
120
+ return nil, Result::FAILED
112
121
  end
113
122
 
114
123
  info = find(sock)
115
124
 
116
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
117
- # If this happens we assume the data wasn't delivered and retry it.
118
- if raw_data.empty?
125
+ if info.nil?
126
+ # The info can be deleted by another thread during `sock.recv()` and `find()`.
127
+ # This is OK since another thread has completed to process the ack, so we can skip this.
128
+ # Note: exclusion mechanism about `collect_response()` may need to be considered.
129
+ @log.debug "could not find the ack info. this ack may be processed by another thread."
130
+ return nil, Result::FAILED
131
+ elsif raw_data.empty?
132
+ # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
133
+ # If this happens we assume the data wasn't delivered and retry it.
119
134
  @log.warn 'destination node closed the connection. regard it as unavailable.', host: info.node.host, port: info.node.port
120
135
  # info.node.disable!
121
136
  return info, Result::FAILED
@@ -521,8 +521,8 @@ module Fluent::Plugin
521
521
  when AckHandler::Result::SUCCESS
522
522
  commit_write(chunk_id)
523
523
  when AckHandler::Result::FAILED
524
- node.disable!
525
- rollback_write(chunk_id, update_retry: false)
524
+ node&.disable!
525
+ rollback_write(chunk_id, update_retry: false) if chunk_id
526
526
  when AckHandler::Result::CHUNKID_UNMATCHED
527
527
  rollback_write(chunk_id, update_retry: false)
528
528
  else