fluentd 1.13.3 → 1.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/{bug_report.yaml → bug_report.yml} +2 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +2 -2
  4. data/.github/ISSUE_TEMPLATE/{feature_request.yaml → feature_request.yml} +1 -0
  5. data/.github/workflows/stale-actions.yml +11 -9
  6. data/.github/workflows/test.yml +32 -0
  7. data/CHANGELOG.md +490 -10
  8. data/CONTRIBUTING.md +2 -2
  9. data/MAINTAINERS.md +7 -5
  10. data/README.md +3 -23
  11. data/Rakefile +1 -1
  12. data/SECURITY.md +14 -0
  13. data/fluentd.gemspec +7 -8
  14. data/lib/fluent/command/cat.rb +13 -3
  15. data/lib/fluent/command/ctl.rb +6 -3
  16. data/lib/fluent/command/fluentd.rb +73 -65
  17. data/lib/fluent/command/plugin_config_formatter.rb +1 -1
  18. data/lib/fluent/compat/output.rb +9 -6
  19. data/lib/fluent/config/dsl.rb +1 -1
  20. data/lib/fluent/config/error.rb +12 -0
  21. data/lib/fluent/config/literal_parser.rb +2 -2
  22. data/lib/fluent/config/parser.rb +1 -1
  23. data/lib/fluent/config/v1_parser.rb +3 -3
  24. data/lib/fluent/config/yaml_parser/fluent_value.rb +47 -0
  25. data/lib/fluent/config/yaml_parser/loader.rb +108 -0
  26. data/lib/fluent/config/yaml_parser/parser.rb +166 -0
  27. data/lib/fluent/config/yaml_parser/section_builder.rb +107 -0
  28. data/lib/fluent/config/yaml_parser.rb +56 -0
  29. data/lib/fluent/config.rb +14 -1
  30. data/lib/fluent/counter/server.rb +1 -1
  31. data/lib/fluent/counter/validator.rb +3 -3
  32. data/lib/fluent/daemon.rb +2 -4
  33. data/lib/fluent/engine.rb +1 -1
  34. data/lib/fluent/env.rb +4 -0
  35. data/lib/fluent/error.rb +3 -0
  36. data/lib/fluent/event.rb +8 -4
  37. data/lib/fluent/event_router.rb +47 -2
  38. data/lib/fluent/file_wrapper.rb +137 -0
  39. data/lib/fluent/log/console_adapter.rb +66 -0
  40. data/lib/fluent/log.rb +44 -5
  41. data/lib/fluent/match.rb +1 -1
  42. data/lib/fluent/msgpack_factory.rb +6 -1
  43. data/lib/fluent/oj_options.rb +1 -2
  44. data/lib/fluent/plugin/bare_output.rb +49 -8
  45. data/lib/fluent/plugin/base.rb +26 -9
  46. data/lib/fluent/plugin/buf_file.rb +34 -5
  47. data/lib/fluent/plugin/buf_file_single.rb +32 -3
  48. data/lib/fluent/plugin/buffer/file_chunk.rb +1 -1
  49. data/lib/fluent/plugin/buffer.rb +216 -70
  50. data/lib/fluent/plugin/filter.rb +35 -1
  51. data/lib/fluent/plugin/filter_record_transformer.rb +1 -1
  52. data/lib/fluent/plugin/in_forward.rb +2 -2
  53. data/lib/fluent/plugin/in_http.rb +39 -10
  54. data/lib/fluent/plugin/in_monitor_agent.rb +4 -2
  55. data/lib/fluent/plugin/in_sample.rb +1 -1
  56. data/lib/fluent/plugin/in_syslog.rb +13 -1
  57. data/lib/fluent/plugin/in_tail/group_watch.rb +204 -0
  58. data/lib/fluent/plugin/in_tail/position_file.rb +33 -33
  59. data/lib/fluent/plugin/in_tail.rb +216 -84
  60. data/lib/fluent/plugin/in_tcp.rb +47 -2
  61. data/lib/fluent/plugin/input.rb +39 -1
  62. data/lib/fluent/plugin/metrics.rb +119 -0
  63. data/lib/fluent/plugin/metrics_local.rb +96 -0
  64. data/lib/fluent/plugin/multi_output.rb +43 -6
  65. data/lib/fluent/plugin/out_copy.rb +1 -1
  66. data/lib/fluent/plugin/out_exec_filter.rb +2 -2
  67. data/lib/fluent/plugin/out_file.rb +20 -2
  68. data/lib/fluent/plugin/out_forward/ack_handler.rb +19 -4
  69. data/lib/fluent/plugin/out_forward/socket_cache.rb +2 -0
  70. data/lib/fluent/plugin/out_forward.rb +17 -9
  71. data/lib/fluent/plugin/out_secondary_file.rb +39 -22
  72. data/lib/fluent/plugin/output.rb +167 -78
  73. data/lib/fluent/plugin/parser.rb +3 -4
  74. data/lib/fluent/plugin/parser_apache2.rb +1 -1
  75. data/lib/fluent/plugin/parser_json.rb +1 -1
  76. data/lib/fluent/plugin/parser_syslog.rb +1 -1
  77. data/lib/fluent/plugin/storage_local.rb +3 -5
  78. data/lib/fluent/plugin.rb +10 -1
  79. data/lib/fluent/plugin_helper/child_process.rb +3 -0
  80. data/lib/fluent/plugin_helper/event_emitter.rb +8 -1
  81. data/lib/fluent/plugin_helper/event_loop.rb +2 -2
  82. data/lib/fluent/plugin_helper/http_server/server.rb +2 -1
  83. data/lib/fluent/plugin_helper/metrics.rb +129 -0
  84. data/lib/fluent/plugin_helper/record_accessor.rb +1 -1
  85. data/lib/fluent/plugin_helper/retry_state.rb +14 -4
  86. data/lib/fluent/plugin_helper/server.rb +35 -6
  87. data/lib/fluent/plugin_helper/service_discovery.rb +2 -2
  88. data/lib/fluent/plugin_helper/socket.rb +13 -2
  89. data/lib/fluent/plugin_helper/thread.rb +3 -3
  90. data/lib/fluent/plugin_helper.rb +1 -0
  91. data/lib/fluent/plugin_id.rb +3 -2
  92. data/lib/fluent/registry.rb +2 -1
  93. data/lib/fluent/root_agent.rb +6 -0
  94. data/lib/fluent/rpc.rb +4 -3
  95. data/lib/fluent/supervisor.rb +283 -259
  96. data/lib/fluent/system_config.rb +13 -3
  97. data/lib/fluent/test/driver/base.rb +11 -5
  98. data/lib/fluent/test/driver/filter.rb +4 -0
  99. data/lib/fluent/test/startup_shutdown.rb +6 -8
  100. data/lib/fluent/time.rb +21 -20
  101. data/lib/fluent/version.rb +1 -1
  102. data/lib/fluent/win32api.rb +38 -0
  103. data/lib/fluent/winsvc.rb +5 -8
  104. data/templates/new_gem/test/helper.rb.erb +0 -1
  105. data/test/command/test_cat.rb +31 -2
  106. data/test/command/test_ctl.rb +1 -2
  107. data/test/command/test_fluentd.rb +209 -24
  108. data/test/command/test_plugin_config_formatter.rb +0 -1
  109. data/test/compat/test_parser.rb +6 -6
  110. data/test/config/test_system_config.rb +13 -11
  111. data/test/config/test_types.rb +1 -1
  112. data/test/log/test_console_adapter.rb +110 -0
  113. data/test/plugin/in_tail/test_io_handler.rb +26 -8
  114. data/test/plugin/in_tail/test_position_file.rb +48 -59
  115. data/test/plugin/out_forward/test_ack_handler.rb +39 -0
  116. data/test/plugin/out_forward/test_socket_cache.rb +26 -1
  117. data/test/plugin/test_bare_output.rb +14 -1
  118. data/test/plugin/test_base.rb +133 -1
  119. data/test/plugin/test_buf_file.rb +62 -23
  120. data/test/plugin/test_buf_file_single.rb +65 -0
  121. data/test/plugin/test_buffer.rb +267 -3
  122. data/test/plugin/test_buffer_chunk.rb +11 -0
  123. data/test/plugin/test_filter.rb +12 -1
  124. data/test/plugin/test_filter_parser.rb +1 -1
  125. data/test/plugin/test_filter_stdout.rb +2 -2
  126. data/test/plugin/test_in_forward.rb +9 -11
  127. data/test/plugin/test_in_http.rb +65 -3
  128. data/test/plugin/test_in_monitor_agent.rb +216 -11
  129. data/test/plugin/test_in_object_space.rb +9 -3
  130. data/test/plugin/test_in_syslog.rb +35 -0
  131. data/test/plugin/test_in_tail.rb +1393 -385
  132. data/test/plugin/test_in_tcp.rb +87 -2
  133. data/test/plugin/test_in_udp.rb +28 -0
  134. data/test/plugin/test_in_unix.rb +2 -2
  135. data/test/plugin/test_input.rb +12 -1
  136. data/test/plugin/test_metrics.rb +294 -0
  137. data/test/plugin/test_metrics_local.rb +96 -0
  138. data/test/plugin/test_multi_output.rb +25 -1
  139. data/test/plugin/test_out_exec.rb +6 -4
  140. data/test/plugin/test_out_exec_filter.rb +6 -2
  141. data/test/plugin/test_out_file.rb +34 -17
  142. data/test/plugin/test_out_forward.rb +78 -77
  143. data/test/plugin/test_out_http.rb +1 -0
  144. data/test/plugin/test_out_stdout.rb +2 -2
  145. data/test/plugin/test_output.rb +297 -12
  146. data/test/plugin/test_output_as_buffered.rb +44 -44
  147. data/test/plugin/test_output_as_buffered_compress.rb +32 -18
  148. data/test/plugin/test_output_as_buffered_retries.rb +54 -7
  149. data/test/plugin/test_output_as_buffered_secondary.rb +4 -4
  150. data/test/plugin/test_parser_regexp.rb +1 -6
  151. data/test/plugin/test_parser_syslog.rb +1 -1
  152. data/test/plugin_helper/test_cert_option.rb +1 -1
  153. data/test/plugin_helper/test_child_process.rb +38 -16
  154. data/test/plugin_helper/test_event_emitter.rb +29 -0
  155. data/test/plugin_helper/test_http_server_helper.rb +1 -1
  156. data/test/plugin_helper/test_metrics.rb +137 -0
  157. data/test/plugin_helper/test_retry_state.rb +602 -38
  158. data/test/plugin_helper/test_server.rb +78 -6
  159. data/test/plugin_helper/test_timer.rb +2 -2
  160. data/test/test_config.rb +191 -24
  161. data/test/test_event_router.rb +17 -0
  162. data/test/test_file_wrapper.rb +53 -0
  163. data/test/test_formatter.rb +24 -21
  164. data/test/test_log.rb +122 -40
  165. data/test/test_msgpack_factory.rb +32 -0
  166. data/test/test_plugin_classes.rb +102 -0
  167. data/test/test_root_agent.rb +30 -1
  168. data/test/test_supervisor.rb +477 -257
  169. data/test/test_time_parser.rb +22 -0
  170. metadata +55 -34
  171. data/.drone.yml +0 -35
  172. data/.github/workflows/issue-auto-closer.yml +0 -12
  173. data/.github/workflows/linux-test.yaml +0 -36
  174. data/.github/workflows/macos-test.yaml +0 -30
  175. data/.github/workflows/windows-test.yaml +0 -46
  176. data/.gitlab-ci.yml +0 -103
  177. data/lib/fluent/plugin/file_wrapper.rb +0 -187
  178. data/test/plugin/test_file_wrapper.rb +0 -126
  179. data/test/test_logger_initializer.rb +0 -46
@@ -160,13 +160,20 @@ module Fluent
160
160
  def resume
161
161
  stage = {}
162
162
  queue = []
163
+ exist_broken_file = false
163
164
 
164
165
  patterns = [@path]
165
166
  patterns.unshift @additional_resume_path if @additional_resume_path
166
167
  Dir.glob(escaped_patterns(patterns)) do |path|
167
168
  next unless File.file?(path)
168
169
 
169
- log.debug { "restoring buffer file: path = #{path}" }
170
+ if owner.respond_to?(:buffer_config) && owner.buffer_config&.flush_at_shutdown
171
+ # When `flush_at_shutdown` is `true`, the remaining chunk files during resuming are possibly broken
172
+ # since there may be a power failure or similar failure.
173
+ log.warn { "restoring buffer file: path = #{path}" }
174
+ else
175
+ log.debug { "restoring buffer file: path = #{path}" }
176
+ end
170
177
 
171
178
  m = new_metadata() # this metadata will be updated in FileSingleChunk.new
172
179
  mode = Fluent::Plugin::Buffer::FileSingleChunk.assume_chunk_state(path)
@@ -179,6 +186,7 @@ module Fluent
179
186
  chunk = Fluent::Plugin::Buffer::FileSingleChunk.new(m, path, mode, @key_in_path, compress: @compress)
180
187
  chunk.restore_size(@chunk_format) if @calc_num_records
181
188
  rescue Fluent::Plugin::Buffer::FileSingleChunk::FileChunkError => e
189
+ exist_broken_file = true
182
190
  handle_broken_files(path, mode, e)
183
191
  next
184
192
  end
@@ -193,6 +201,15 @@ module Fluent
193
201
 
194
202
  queue.sort_by!(&:modified_at)
195
203
 
204
+ # If one of the files is corrupted, other files may also be corrupted and be undetected.
205
+ # The time priods of each chunk are helpful to check the data.
206
+ if exist_broken_file
207
+ log.info "Since a broken chunk file was found, it is possible that other files remaining at the time of resuming were also broken. Here is the list of the files."
208
+ (stage.values + queue).each { |chunk|
209
+ log.info " #{chunk.path}:", :created_at => chunk.created_at, :modified_at => chunk.modified_at
210
+ }
211
+ end
212
+
196
213
  return stage, queue
197
214
  end
198
215
 
@@ -207,8 +224,20 @@ module Fluent
207
224
  end
208
225
 
209
226
  def handle_broken_files(path, mode, e)
210
- log.error "found broken chunk file during resume. Delete corresponding files:", path: path, mode: mode, err_msg: e.message
211
- # After support 'backup_dir' feature, these files are moved to backup_dir instead of unlink.
227
+ log.error "found broken chunk file during resume.", :path => path, :mode => mode, :err_msg => e.message
228
+ unique_id, _ = Fluent::Plugin::Buffer::FileSingleChunk.unique_id_and_key_from_path(path)
229
+ backup(unique_id) { |f|
230
+ File.open(path, 'rb') { |chunk|
231
+ chunk.set_encoding(Encoding::ASCII_8BIT)
232
+ chunk.sync = true
233
+ chunk.binmode
234
+ IO.copy_stream(chunk, f)
235
+ }
236
+ }
237
+ rescue => error
238
+ log.error "backup failed. Delete corresponding files.", :err_msg => error.message
239
+ ensure
240
+ log.warn "disable_chunk_backup is true. #{dump_unique_id_hex(unique_id)} chunk is thrown away." if @disable_chunk_backup
212
241
  File.unlink(path) rescue nil
213
242
  end
214
243
 
@@ -204,7 +204,7 @@ module Fluent
204
204
  end
205
205
  end
206
206
 
207
- # used only for queued v0.12 buffer path
207
+ # used only for queued v0.12 buffer path or broken files
208
208
  def self.unique_id_from_path(path)
209
209
  if /\.(b|q)([0-9a-f]+)\.[^\/]*\Z/n =~ path # //n switch means explicit 'ASCII-8BIT' pattern
210
210
  return $2.scan(/../).map{|x| x.to_i(16) }.pack('C*')
@@ -16,6 +16,8 @@
16
16
 
17
17
  require 'fluent/plugin/base'
18
18
  require 'fluent/plugin/owned_by_mixin'
19
+ require 'fluent/plugin_id'
20
+ require 'fluent/plugin_helper'
19
21
  require 'fluent/unique_id'
20
22
  require 'fluent/ext_monitor_require'
21
23
 
@@ -24,7 +26,9 @@ module Fluent
24
26
  class Buffer < Base
25
27
  include OwnedByMixin
26
28
  include UniqueId::Mixin
29
+ include PluginId
27
30
  include MonitorMixin
31
+ include PluginHelper::Mixin # for metrics
28
32
 
29
33
  class BufferError < StandardError; end
30
34
  class BufferOverflowError < BufferError; end
@@ -39,6 +43,8 @@ module Fluent
39
43
 
40
44
  configured_in :buffer
41
45
 
46
+ helpers_internal :metrics
47
+
42
48
  # TODO: system total buffer limit size in bytes by SystemConfig
43
49
 
44
50
  config_param :chunk_limit_size, :size, default: DEFAULT_CHUNK_LIMIT_SIZE
@@ -60,6 +66,9 @@ module Fluent
60
66
  desc 'Compress buffered data.'
61
67
  config_param :compress, :enum, list: [:text, :gzip], default: :text
62
68
 
69
+ desc 'If true, chunks are thrown away when unrecoverable error happens'
70
+ config_param :disable_chunk_backup, :bool, default: false
71
+
63
72
  Metadata = Struct.new(:timekey, :tag, :variables, :seq) do
64
73
  def initialize(timekey, tag, variables)
65
74
  super(timekey, tag, variables, 0)
@@ -153,8 +162,11 @@ module Fluent
153
162
  end
154
163
  end
155
164
 
165
+ # for metrics
166
+ attr_reader :stage_size_metrics, :stage_length_metrics, :queue_size_metrics, :queue_length_metrics
167
+ attr_reader :available_buffer_space_ratios_metrics, :total_queued_size_metrics
168
+ attr_reader :newest_timekey_metrics, :oldest_timekey_metrics
156
169
  # for tests
157
- attr_accessor :stage_size, :queue_size
158
170
  attr_reader :stage, :queue, :dequeued, :queued_num
159
171
 
160
172
  def initialize
@@ -171,12 +183,35 @@ module Fluent
171
183
  @queued_num = {} # metadata => int (number of queued chunks)
172
184
  @dequeued_num = {} # metadata => int (number of dequeued chunks)
173
185
 
174
- @stage_size = @queue_size = 0
186
+ @stage_length_metrics = nil
187
+ @stage_size_metrics = nil
188
+ @queue_length_metrics = nil
189
+ @queue_size_metrics = nil
190
+ @available_buffer_space_ratios_metrics = nil
191
+ @total_queued_size_metrics = nil
192
+ @newest_timekey_metrics = nil
193
+ @oldest_timekey_metrics = nil
175
194
  @timekeys = Hash.new(0)
176
195
  @enable_update_timekeys = false
177
196
  @mutex = Mutex.new
178
197
  end
179
198
 
199
+ def stage_size
200
+ @stage_size_metrics.get
201
+ end
202
+
203
+ def stage_size=(value)
204
+ @stage_size_metrics.set(value)
205
+ end
206
+
207
+ def queue_size
208
+ @queue_size_metrics.get
209
+ end
210
+
211
+ def queue_size=(value)
212
+ @queue_size_metrics.set(value)
213
+ end
214
+
180
215
  def persistent?
181
216
  false
182
217
  end
@@ -187,6 +222,28 @@ module Fluent
187
222
  unless @queue_limit_length.nil?
188
223
  @total_limit_size = @chunk_limit_size * @queue_limit_length
189
224
  end
225
+ @stage_length_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "stage_length",
226
+ help_text: 'Length of stage buffers', prefer_gauge: true)
227
+ @stage_length_metrics.set(0)
228
+ @stage_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "stage_byte_size",
229
+ help_text: 'Total size of stage buffers', prefer_gauge: true)
230
+ @stage_size_metrics.set(0) # Ensure zero.
231
+ @queue_length_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "queue_length",
232
+ help_text: 'Length of queue buffers', prefer_gauge: true)
233
+ @queue_length_metrics.set(0)
234
+ @queue_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "queue_byte_size",
235
+ help_text: 'Total size of queue buffers', prefer_gauge: true)
236
+ @queue_size_metrics.set(0) # Ensure zero.
237
+ @available_buffer_space_ratios_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "available_buffer_space_ratios",
238
+ help_text: 'Ratio of available space in buffer', prefer_gauge: true)
239
+ @available_buffer_space_ratios_metrics.set(100) # Default is 100%.
240
+ @total_queued_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "total_queued_size",
241
+ help_text: 'Total size of stage and queue buffers', prefer_gauge: true)
242
+ @total_queued_size_metrics.set(0)
243
+ @newest_timekey_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "newest_timekey",
244
+ help_text: 'Newest timekey in buffer', prefer_gauge: true)
245
+ @oldest_timekey_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "oldest_timekey",
246
+ help_text: 'Oldest timekey in buffer', prefer_gauge: true)
190
247
  end
191
248
 
192
249
  def enable_update_timekeys
@@ -198,15 +255,15 @@ module Fluent
198
255
 
199
256
  @stage, @queue = resume
200
257
  @stage.each_pair do |metadata, chunk|
201
- @stage_size += chunk.bytesize
258
+ @stage_size_metrics.add(chunk.bytesize)
202
259
  end
203
260
  @queue.each do |chunk|
204
261
  @queued_num[chunk.metadata] ||= 0
205
262
  @queued_num[chunk.metadata] += 1
206
- @queue_size += chunk.bytesize
263
+ @queue_size_metrics.add(chunk.bytesize)
207
264
  end
208
265
  update_timekeys
209
- log.debug "buffer started", instance: self.object_id, stage_size: @stage_size, queue_size: @queue_size
266
+ log.debug "buffer started", instance: self.object_id, stage_size: @stage_size_metrics.get, queue_size: @queue_size_metrics.get
210
267
  end
211
268
 
212
269
  def close
@@ -228,17 +285,19 @@ module Fluent
228
285
  def terminate
229
286
  super
230
287
  @dequeued = @stage = @queue = @queued_num = nil
231
- @stage_size = @queue_size = 0
288
+ @stage_length_metrics = @stage_size_metrics = @queue_length_metrics = @queue_size_metrics = nil
289
+ @available_buffer_space_ratios_metrics = @total_queued_size_metrics = nil
290
+ @newest_timekey_metrics = @oldest_timekey_metrics = nil
232
291
  @timekeys.clear
233
292
  end
234
293
 
235
294
  def storable?
236
- @total_limit_size > @stage_size + @queue_size
295
+ @total_limit_size > @stage_size_metrics.get + @queue_size_metrics.get
237
296
  end
238
297
 
239
298
  ## TODO: for back pressure feature
240
299
  # def used?(ratio)
241
- # @total_limit_size * ratio > @stage_size + @queue_size
300
+ # @total_limit_size * ratio > @stage_size_metrics.get + @queue_size_metrics.get
242
301
  # end
243
302
 
244
303
  def resume
@@ -276,12 +335,14 @@ module Fluent
276
335
  unstaged_chunks = {} # metadata => [chunk, chunk, ...]
277
336
  chunks_to_enqueue = []
278
337
  staged_bytesizes_by_chunk = {}
338
+ # track internal BufferChunkOverflowError in write_step_by_step
339
+ buffer_chunk_overflow_errors = []
279
340
 
280
341
  begin
281
342
  # sort metadata to get lock of chunks in same order with other threads
282
343
  metadata_and_data.keys.sort.each do |metadata|
283
344
  data = metadata_and_data[metadata]
284
- write_once(metadata, data, format: format, size: size) do |chunk, adding_bytesize|
345
+ write_once(metadata, data, format: format, size: size) do |chunk, adding_bytesize, error|
285
346
  chunk.mon_enter # add lock to prevent to be committed/rollbacked from other threads
286
347
  operated_chunks << chunk
287
348
  if chunk.staged?
@@ -296,6 +357,9 @@ module Fluent
296
357
  unstaged_chunks[metadata] ||= []
297
358
  unstaged_chunks[metadata] << chunk
298
359
  end
360
+ if error && !error.empty?
361
+ buffer_chunk_overflow_errors << error
362
+ end
299
363
  end
300
364
  end
301
365
 
@@ -344,7 +408,7 @@ module Fluent
344
408
  #
345
409
  staged_bytesizes_by_chunk.each do |chunk, bytesize|
346
410
  chunk.synchronize do
347
- synchronize { @stage_size += bytesize }
411
+ synchronize { @stage_size_metrics.add(bytesize) }
348
412
  log.on_trace { log.trace { "chunk #{chunk.path} size_added: #{bytesize} new_size: #{chunk.bytesize}" } }
349
413
  end
350
414
  end
@@ -353,7 +417,7 @@ module Fluent
353
417
  if c.staged? && (enqueue || chunk_size_full?(c))
354
418
  m = c.metadata
355
419
  enqueue_chunk(m)
356
- if unstaged_chunks[m]
420
+ if unstaged_chunks[m] && !unstaged_chunks[m].empty?
357
421
  u = unstaged_chunks[m].pop
358
422
  u.synchronize do
359
423
  if u.unstaged? && !chunk_size_full?(u)
@@ -361,7 +425,7 @@ module Fluent
361
425
  u.metadata.seq = 0
362
426
  synchronize {
363
427
  @stage[m] = u.staged!
364
- @stage_size += u.bytesize
428
+ @stage_size_metrics.add(u.bytesize)
365
429
  }
366
430
  end
367
431
  end
@@ -388,6 +452,10 @@ module Fluent
388
452
  end
389
453
  chunk.mon_exit rescue nil # this may raise ThreadError for chunks already committed
390
454
  end
455
+ unless buffer_chunk_overflow_errors.empty?
456
+ # Notify delayed BufferChunkOverflowError here
457
+ raise BufferChunkOverflowError, buffer_chunk_overflow_errors.join(", ")
458
+ end
391
459
  end
392
460
  end
393
461
 
@@ -428,8 +496,8 @@ module Fluent
428
496
  chunk.enqueued!
429
497
  end
430
498
  bytesize = chunk.bytesize
431
- @stage_size -= bytesize
432
- @queue_size += bytesize
499
+ @stage_size_metrics.sub(bytesize)
500
+ @queue_size_metrics.add(bytesize)
433
501
  end
434
502
  end
435
503
  nil
@@ -446,7 +514,7 @@ module Fluent
446
514
  @queued_num[metadata] = @queued_num.fetch(metadata, 0) + 1
447
515
  chunk.enqueued!
448
516
  end
449
- @queue_size += chunk.bytesize
517
+ @queue_size_metrics.add(chunk.bytesize)
450
518
  end
451
519
  end
452
520
 
@@ -512,7 +580,7 @@ module Fluent
512
580
  chunk = @dequeued.delete(chunk_id)
513
581
  return false unless chunk # already purged by other thread
514
582
  @queue.unshift(chunk)
515
- log.trace "chunk taken back", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: chunk.metadata
583
+ log.on_trace { log.trace "chunk taken back", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: chunk.metadata }
516
584
  @queued_num[chunk.metadata] += 1 # BUG if nil
517
585
  @dequeued_num[chunk.metadata] -= 1
518
586
  end
@@ -531,7 +599,7 @@ module Fluent
531
599
  begin
532
600
  bytesize = chunk.bytesize
533
601
  chunk.purge
534
- @queue_size -= bytesize
602
+ @queue_size_metrics.sub(bytesize)
535
603
  rescue => e
536
604
  log.error "failed to purge buffer chunk", chunk_id: dump_unique_id_hex(chunk_id), error_class: e.class, error: e
537
605
  log.error_backtrace
@@ -542,7 +610,7 @@ module Fluent
542
610
  @queued_num.delete(metadata)
543
611
  @dequeued_num.delete(metadata)
544
612
  end
545
- log.trace "chunk purged", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata
613
+ log.on_trace { log.trace "chunk purged", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata }
546
614
  end
547
615
 
548
616
  nil
@@ -562,7 +630,7 @@ module Fluent
562
630
  log.error_backtrace
563
631
  end
564
632
  end
565
- @queue_size = 0
633
+ @queue_size_metrics.set(0)
566
634
  end
567
635
  end
568
636
 
@@ -680,16 +748,14 @@ module Fluent
680
748
  modified_chunks = []
681
749
  modified_metadata = metadata
682
750
  get_next_chunk = ->(){
683
- c = if staged_chunk_used
684
- # Staging new chunk here is bad idea:
685
- # Recovering whole state including newly staged chunks is much harder than current implementation.
686
- modified_metadata = modified_metadata.dup_next
687
- generate_chunk(modified_metadata)
688
- else
689
- synchronize { @stage[modified_metadata] ||= generate_chunk(modified_metadata).staged! }
690
- end
691
- modified_chunks << c
692
- c
751
+ if staged_chunk_used
752
+ # Staging new chunk here is bad idea:
753
+ # Recovering whole state including newly staged chunks is much harder than current implementation.
754
+ modified_metadata = modified_metadata.dup_next
755
+ generate_chunk(modified_metadata)
756
+ else
757
+ synchronize { @stage[modified_metadata] ||= generate_chunk(modified_metadata).staged! }
758
+ end
693
759
  }
694
760
 
695
761
  writing_splits_index = 0
@@ -697,60 +763,116 @@ module Fluent
697
763
 
698
764
  while writing_splits_index < splits.size
699
765
  chunk = get_next_chunk.call
700
- chunk.synchronize do
701
- raise ShouldRetry unless chunk.writable?
702
- staged_chunk_used = true if chunk.staged?
766
+ errors = []
767
+ # The chunk must be locked until being passed to &block.
768
+ chunk.mon_enter
769
+ modified_chunks << {chunk: chunk, adding_bytesize: 0, errors: errors}
703
770
 
704
- original_bytesize = chunk.bytesize
705
- begin
706
- while writing_splits_index < splits.size
707
- split = splits[writing_splits_index]
708
- if format
709
- chunk.concat(format.call(split), split.size)
710
- else
711
- chunk.append(split, compress: @compress)
771
+ raise ShouldRetry unless chunk.writable?
772
+ staged_chunk_used = true if chunk.staged?
773
+
774
+ original_bytesize = committed_bytesize = chunk.bytesize
775
+ begin
776
+ while writing_splits_index < splits.size
777
+ split = splits[writing_splits_index]
778
+ formatted_split = format ? format.call(split) : nil
779
+
780
+ if split.size == 1 # Check BufferChunkOverflowError
781
+ determined_bytesize = nil
782
+ if @compress != :text
783
+ determined_bytesize = nil
784
+ elsif formatted_split
785
+ determined_bytesize = formatted_split.bytesize
786
+ elsif split.first.respond_to?(:bytesize)
787
+ determined_bytesize = split.first.bytesize
712
788
  end
713
789
 
714
- if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over?
715
- chunk.rollback
790
+ if determined_bytesize && determined_bytesize > @chunk_limit_size
791
+ # It is a obvious case that BufferChunkOverflowError should be raised here.
792
+ # But if it raises here, already processed 'split' or
793
+ # the proceeding 'split' will be lost completely.
794
+ # So it is a last resort to delay raising such a exception
795
+ errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
796
+ writing_splits_index += 1
797
+ next
798
+ end
716
799
 
717
- if split.size == 1 && original_bytesize == 0
718
- big_record_size = format ? format.call(split).bytesize : split.first.bytesize
719
- raise BufferChunkOverflowError, "a #{big_record_size}bytes record is larger than buffer chunk limit size"
720
- end
800
+ if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size
801
+ # The split will (might) cause size over so keep already processed
802
+ # 'split' content here (allow performance regression a bit).
803
+ chunk.commit
804
+ committed_bytesize = chunk.bytesize
805
+ end
806
+ end
807
+
808
+ if format
809
+ chunk.concat(formatted_split, split.size)
810
+ else
811
+ chunk.append(split, compress: @compress)
812
+ end
813
+ adding_bytes = chunk.bytesize - committed_bytesize
814
+
815
+ if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over?
816
+ chunk.rollback
817
+ committed_bytesize = chunk.bytesize
721
818
 
722
- if chunk_size_full?(chunk) || split.size == 1
723
- enqueue_chunk_before_retry = true
819
+ if split.size == 1 # Check BufferChunkOverflowError again
820
+ if adding_bytes > @chunk_limit_size
821
+ errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
822
+ writing_splits_index += 1
823
+ next
724
824
  else
725
- splits_count *= 10
825
+ # As already processed content is kept after rollback, then unstaged chunk should be queued.
826
+ # After that, re-process current split again.
827
+ # New chunk should be allocated, to do it, modify @stage and so on.
828
+ synchronize { @stage.delete(modified_metadata) }
829
+ staged_chunk_used = false
830
+ chunk.unstaged!
831
+ break
726
832
  end
833
+ end
727
834
 
728
- raise ShouldRetry
835
+ if chunk_size_full?(chunk) || split.size == 1
836
+ enqueue_chunk_before_retry = true
837
+ else
838
+ splits_count *= 10
729
839
  end
730
840
 
731
- writing_splits_index += 1
841
+ raise ShouldRetry
842
+ end
843
+
844
+ writing_splits_index += 1
732
845
 
733
- if chunk_size_full?(chunk)
734
- break
735
- end
846
+ if chunk_size_full?(chunk)
847
+ break
736
848
  end
737
- rescue
738
- chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it
739
- raise
740
849
  end
741
-
742
- block.call(chunk, chunk.bytesize - original_bytesize)
850
+ rescue
851
+ chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it
852
+ raise
743
853
  end
854
+
855
+ modified_chunks.last[:adding_bytesize] = chunk.bytesize - original_bytesize
856
+ end
857
+ modified_chunks.each do |data|
858
+ block.call(data[:chunk], data[:adding_bytesize], data[:errors])
744
859
  end
745
860
  rescue ShouldRetry
746
- modified_chunks.each do |mc|
747
- mc.rollback rescue nil
748
- if mc.unstaged?
749
- mc.purge rescue nil
861
+ modified_chunks.each do |data|
862
+ chunk = data[:chunk]
863
+ chunk.rollback rescue nil
864
+ if chunk.unstaged?
865
+ chunk.purge rescue nil
750
866
  end
867
+ chunk.mon_exit rescue nil
751
868
  end
752
869
  enqueue_chunk(metadata) if enqueue_chunk_before_retry
753
870
  retry
871
+ ensure
872
+ modified_chunks.each do |data|
873
+ chunk = data[:chunk]
874
+ chunk.mon_exit
875
+ end
754
876
  end
755
877
 
756
878
  STATS_KEYS = [
@@ -765,28 +887,52 @@ module Fluent
765
887
  ]
766
888
 
767
889
  def statistics
768
- stage_size, queue_size = @stage_size, @queue_size
890
+ stage_size, queue_size = @stage_size_metrics.get, @queue_size_metrics.get
769
891
  buffer_space = 1.0 - ((stage_size + queue_size * 1.0) / @total_limit_size)
892
+ @stage_length_metrics.set(@stage.size)
893
+ @queue_length_metrics.set(@queue.size)
894
+ @available_buffer_space_ratios_metrics.set(buffer_space * 100)
895
+ @total_queued_size_metrics.set(stage_size + queue_size)
770
896
  stats = {
771
- 'stage_length' => @stage.size,
897
+ 'stage_length' => @stage_length_metrics.get,
772
898
  'stage_byte_size' => stage_size,
773
- 'queue_length' => @queue.size,
899
+ 'queue_length' => @queue_length_metrics.get,
774
900
  'queue_byte_size' => queue_size,
775
- 'available_buffer_space_ratios' => (buffer_space * 100).round(1),
776
- 'total_queued_size' => stage_size + queue_size,
901
+ 'available_buffer_space_ratios' => @available_buffer_space_ratios_metrics.get.round(1),
902
+ 'total_queued_size' => @total_queued_size_metrics.get,
777
903
  }
778
904
 
779
905
  tkeys = timekeys
780
906
  if (m = tkeys.min)
781
- stats['oldest_timekey'] = m
907
+ @oldest_timekey_metrics.set(m)
908
+ stats['oldest_timekey'] = @oldest_timekey_metrics.get
782
909
  end
783
910
  if (m = tkeys.max)
784
- stats['newest_timekey'] = m
911
+ @newest_timekey_metrics.set(m)
912
+ stats['newest_timekey'] = @newest_timekey_metrics.get
785
913
  end
786
914
 
787
915
  { 'buffer' => stats }
788
916
  end
789
917
 
918
+ def backup(chunk_unique_id)
919
+ unique_id = dump_unique_id_hex(chunk_unique_id)
920
+
921
+ if @disable_chunk_backup
922
+ log.warn "disable_chunk_backup is true. #{unique_id} chunk is not backed up."
923
+ return
924
+ end
925
+
926
+ safe_owner_id = owner.plugin_id.gsub(/[ "\/\\:;|*<>?]/, '_')
927
+ backup_base_dir = system_config.root_dir || DEFAULT_BACKUP_DIR
928
+ backup_file = File.join(backup_base_dir, 'backup', "worker#{fluentd_worker_id}", safe_owner_id, "#{unique_id}.log")
929
+ backup_dir = File.dirname(backup_file)
930
+
931
+ log.warn "bad chunk is moved to #{backup_file}"
932
+ FileUtils.mkdir_p(backup_dir, mode: system_config.dir_permission || Fluent::DEFAULT_DIR_PERMISSION) unless Dir.exist?(backup_dir)
933
+ File.open(backup_file, 'ab', system_config.file_permission || Fluent::DEFAULT_FILE_PERMISSION) { |f| yield f }
934
+ end
935
+
790
936
  private
791
937
 
792
938
  def optimistic_queued?(metadata = nil)
@@ -28,13 +28,47 @@ module Fluent
28
28
  include PluginLoggerMixin
29
29
  include PluginHelper::Mixin
30
30
 
31
- helpers_internal :event_emitter
31
+ helpers_internal :event_emitter, :metrics
32
32
 
33
33
  attr_reader :has_filter_with_time
34
34
 
35
35
  def initialize
36
36
  super
37
37
  @has_filter_with_time = has_filter_with_time?
38
+ @emit_records_metrics = nil
39
+ @emit_size_metrics = nil
40
+ @counter_mutex = Mutex.new
41
+ @enable_size_metrics = false
42
+ end
43
+
44
+ def emit_records
45
+ @emit_records_metrics.get
46
+ end
47
+
48
+ def emit_size
49
+ @emit_size_metrics.get
50
+ end
51
+
52
+ def configure(conf)
53
+ super
54
+
55
+ @emit_records_metrics = metrics_create(namespace: "fluentd", subsystem: "filter", name: "emit_records", help_text: "Number of count emit records")
56
+ @emit_size_metrics = metrics_create(namespace: "fluentd", subsystem: "filter", name: "emit_size", help_text: "Total size of emit events")
57
+ @enable_size_metrics = !!system_config.enable_size_metrics
58
+ end
59
+
60
+ def statistics
61
+ stats = {
62
+ 'emit_records' => @emit_records_metrics.get,
63
+ 'emit_size' => @emit_size_metrics.get,
64
+ }
65
+
66
+ { 'filter' => stats }
67
+ end
68
+
69
+ def measure_metrics(es)
70
+ @emit_records_metrics.add(es.size)
71
+ @emit_size_metrics.add(es.to_msgpack_stream.bytesize) if @enable_size_metrics
38
72
  end
39
73
 
40
74
  def filter(tag, time, record)
@@ -316,7 +316,7 @@ module Fluent::Plugin
316
316
  end
317
317
 
318
318
  (Object.instance_methods).each do |m|
319
- undef_method m unless m.to_s =~ /^__|respond_to_missing\?|object_id|public_methods|instance_eval|method_missing|define_singleton_method|respond_to\?|new_ostruct_member|^class$/
319
+ undef_method m unless /^__|respond_to_missing\?|object_id|public_methods|instance_eval|method_missing|define_singleton_method|respond_to\?|new_ostruct_member|^class$/.match?(m.to_s)
320
320
  end
321
321
  end
322
322
  end
@@ -40,7 +40,7 @@ module Fluent::Plugin
40
40
  config_param :backlog, :integer, default: nil
41
41
  # SO_LINGER 0 to send RST rather than FIN to avoid lots of connections sitting in TIME_WAIT at src
42
42
  desc 'The timeout time used to set linger option.'
43
- config_param :linger_timeout, :integer, default: 0
43
+ config_param :linger_timeout, :integer, default: nil, deprecated: "use transport directive"
44
44
  # This option is for Cool.io's loop wait timeout to avoid loop stuck at shutdown. Almost users don't need to change this value.
45
45
  config_param :blocking_timeout, :time, default: 0.5
46
46
  desc 'Try to resolve hostname from IP addresses or not.'
@@ -430,7 +430,7 @@ module Fluent::Plugin
430
430
  end
431
431
  _ping, hostname, shared_key_salt, shared_key_hexdigest, username, password_digest = message
432
432
 
433
- node = @nodes.select{|n| n[:address].include?(remote_addr) rescue false }.first
433
+ node = @nodes.find{|n| n[:address].include?(remote_addr) rescue false }
434
434
  if !node && !@security.allow_anonymous_source
435
435
  log.warn "Anonymous client disallowed", address: remote_addr, hostname: hostname
436
436
  return false, "anonymous source host '#{remote_addr}' denied", nil