fluentd 1.13.3 → 1.16.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/{bug_report.yaml → bug_report.yml} +2 -0
- data/.github/ISSUE_TEMPLATE/config.yml +2 -2
- data/.github/ISSUE_TEMPLATE/{feature_request.yaml → feature_request.yml} +1 -0
- data/.github/workflows/stale-actions.yml +11 -9
- data/.github/workflows/test.yml +32 -0
- data/CHANGELOG.md +490 -10
- data/CONTRIBUTING.md +2 -2
- data/MAINTAINERS.md +7 -5
- data/README.md +3 -23
- data/Rakefile +1 -1
- data/SECURITY.md +14 -0
- data/fluentd.gemspec +7 -8
- data/lib/fluent/command/cat.rb +13 -3
- data/lib/fluent/command/ctl.rb +6 -3
- data/lib/fluent/command/fluentd.rb +73 -65
- data/lib/fluent/command/plugin_config_formatter.rb +1 -1
- data/lib/fluent/compat/output.rb +9 -6
- data/lib/fluent/config/dsl.rb +1 -1
- data/lib/fluent/config/error.rb +12 -0
- data/lib/fluent/config/literal_parser.rb +2 -2
- data/lib/fluent/config/parser.rb +1 -1
- data/lib/fluent/config/v1_parser.rb +3 -3
- data/lib/fluent/config/yaml_parser/fluent_value.rb +47 -0
- data/lib/fluent/config/yaml_parser/loader.rb +108 -0
- data/lib/fluent/config/yaml_parser/parser.rb +166 -0
- data/lib/fluent/config/yaml_parser/section_builder.rb +107 -0
- data/lib/fluent/config/yaml_parser.rb +56 -0
- data/lib/fluent/config.rb +14 -1
- data/lib/fluent/counter/server.rb +1 -1
- data/lib/fluent/counter/validator.rb +3 -3
- data/lib/fluent/daemon.rb +2 -4
- data/lib/fluent/engine.rb +1 -1
- data/lib/fluent/env.rb +4 -0
- data/lib/fluent/error.rb +3 -0
- data/lib/fluent/event.rb +8 -4
- data/lib/fluent/event_router.rb +47 -2
- data/lib/fluent/file_wrapper.rb +137 -0
- data/lib/fluent/log/console_adapter.rb +66 -0
- data/lib/fluent/log.rb +44 -5
- data/lib/fluent/match.rb +1 -1
- data/lib/fluent/msgpack_factory.rb +6 -1
- data/lib/fluent/oj_options.rb +1 -2
- data/lib/fluent/plugin/bare_output.rb +49 -8
- data/lib/fluent/plugin/base.rb +26 -9
- data/lib/fluent/plugin/buf_file.rb +34 -5
- data/lib/fluent/plugin/buf_file_single.rb +32 -3
- data/lib/fluent/plugin/buffer/file_chunk.rb +1 -1
- data/lib/fluent/plugin/buffer.rb +216 -70
- data/lib/fluent/plugin/filter.rb +35 -1
- data/lib/fluent/plugin/filter_record_transformer.rb +1 -1
- data/lib/fluent/plugin/in_forward.rb +2 -2
- data/lib/fluent/plugin/in_http.rb +39 -10
- data/lib/fluent/plugin/in_monitor_agent.rb +4 -2
- data/lib/fluent/plugin/in_sample.rb +1 -1
- data/lib/fluent/plugin/in_syslog.rb +13 -1
- data/lib/fluent/plugin/in_tail/group_watch.rb +204 -0
- data/lib/fluent/plugin/in_tail/position_file.rb +33 -33
- data/lib/fluent/plugin/in_tail.rb +216 -84
- data/lib/fluent/plugin/in_tcp.rb +47 -2
- data/lib/fluent/plugin/input.rb +39 -1
- data/lib/fluent/plugin/metrics.rb +119 -0
- data/lib/fluent/plugin/metrics_local.rb +96 -0
- data/lib/fluent/plugin/multi_output.rb +43 -6
- data/lib/fluent/plugin/out_copy.rb +1 -1
- data/lib/fluent/plugin/out_exec_filter.rb +2 -2
- data/lib/fluent/plugin/out_file.rb +20 -2
- data/lib/fluent/plugin/out_forward/ack_handler.rb +19 -4
- data/lib/fluent/plugin/out_forward/socket_cache.rb +2 -0
- data/lib/fluent/plugin/out_forward.rb +17 -9
- data/lib/fluent/plugin/out_secondary_file.rb +39 -22
- data/lib/fluent/plugin/output.rb +167 -78
- data/lib/fluent/plugin/parser.rb +3 -4
- data/lib/fluent/plugin/parser_apache2.rb +1 -1
- data/lib/fluent/plugin/parser_json.rb +1 -1
- data/lib/fluent/plugin/parser_syslog.rb +1 -1
- data/lib/fluent/plugin/storage_local.rb +3 -5
- data/lib/fluent/plugin.rb +10 -1
- data/lib/fluent/plugin_helper/child_process.rb +3 -0
- data/lib/fluent/plugin_helper/event_emitter.rb +8 -1
- data/lib/fluent/plugin_helper/event_loop.rb +2 -2
- data/lib/fluent/plugin_helper/http_server/server.rb +2 -1
- data/lib/fluent/plugin_helper/metrics.rb +129 -0
- data/lib/fluent/plugin_helper/record_accessor.rb +1 -1
- data/lib/fluent/plugin_helper/retry_state.rb +14 -4
- data/lib/fluent/plugin_helper/server.rb +35 -6
- data/lib/fluent/plugin_helper/service_discovery.rb +2 -2
- data/lib/fluent/plugin_helper/socket.rb +13 -2
- data/lib/fluent/plugin_helper/thread.rb +3 -3
- data/lib/fluent/plugin_helper.rb +1 -0
- data/lib/fluent/plugin_id.rb +3 -2
- data/lib/fluent/registry.rb +2 -1
- data/lib/fluent/root_agent.rb +6 -0
- data/lib/fluent/rpc.rb +4 -3
- data/lib/fluent/supervisor.rb +283 -259
- data/lib/fluent/system_config.rb +13 -3
- data/lib/fluent/test/driver/base.rb +11 -5
- data/lib/fluent/test/driver/filter.rb +4 -0
- data/lib/fluent/test/startup_shutdown.rb +6 -8
- data/lib/fluent/time.rb +21 -20
- data/lib/fluent/version.rb +1 -1
- data/lib/fluent/win32api.rb +38 -0
- data/lib/fluent/winsvc.rb +5 -8
- data/templates/new_gem/test/helper.rb.erb +0 -1
- data/test/command/test_cat.rb +31 -2
- data/test/command/test_ctl.rb +1 -2
- data/test/command/test_fluentd.rb +209 -24
- data/test/command/test_plugin_config_formatter.rb +0 -1
- data/test/compat/test_parser.rb +6 -6
- data/test/config/test_system_config.rb +13 -11
- data/test/config/test_types.rb +1 -1
- data/test/log/test_console_adapter.rb +110 -0
- data/test/plugin/in_tail/test_io_handler.rb +26 -8
- data/test/plugin/in_tail/test_position_file.rb +48 -59
- data/test/plugin/out_forward/test_ack_handler.rb +39 -0
- data/test/plugin/out_forward/test_socket_cache.rb +26 -1
- data/test/plugin/test_bare_output.rb +14 -1
- data/test/plugin/test_base.rb +133 -1
- data/test/plugin/test_buf_file.rb +62 -23
- data/test/plugin/test_buf_file_single.rb +65 -0
- data/test/plugin/test_buffer.rb +267 -3
- data/test/plugin/test_buffer_chunk.rb +11 -0
- data/test/plugin/test_filter.rb +12 -1
- data/test/plugin/test_filter_parser.rb +1 -1
- data/test/plugin/test_filter_stdout.rb +2 -2
- data/test/plugin/test_in_forward.rb +9 -11
- data/test/plugin/test_in_http.rb +65 -3
- data/test/plugin/test_in_monitor_agent.rb +216 -11
- data/test/plugin/test_in_object_space.rb +9 -3
- data/test/plugin/test_in_syslog.rb +35 -0
- data/test/plugin/test_in_tail.rb +1393 -385
- data/test/plugin/test_in_tcp.rb +87 -2
- data/test/plugin/test_in_udp.rb +28 -0
- data/test/plugin/test_in_unix.rb +2 -2
- data/test/plugin/test_input.rb +12 -1
- data/test/plugin/test_metrics.rb +294 -0
- data/test/plugin/test_metrics_local.rb +96 -0
- data/test/plugin/test_multi_output.rb +25 -1
- data/test/plugin/test_out_exec.rb +6 -4
- data/test/plugin/test_out_exec_filter.rb +6 -2
- data/test/plugin/test_out_file.rb +34 -17
- data/test/plugin/test_out_forward.rb +78 -77
- data/test/plugin/test_out_http.rb +1 -0
- data/test/plugin/test_out_stdout.rb +2 -2
- data/test/plugin/test_output.rb +297 -12
- data/test/plugin/test_output_as_buffered.rb +44 -44
- data/test/plugin/test_output_as_buffered_compress.rb +32 -18
- data/test/plugin/test_output_as_buffered_retries.rb +54 -7
- data/test/plugin/test_output_as_buffered_secondary.rb +4 -4
- data/test/plugin/test_parser_regexp.rb +1 -6
- data/test/plugin/test_parser_syslog.rb +1 -1
- data/test/plugin_helper/test_cert_option.rb +1 -1
- data/test/plugin_helper/test_child_process.rb +38 -16
- data/test/plugin_helper/test_event_emitter.rb +29 -0
- data/test/plugin_helper/test_http_server_helper.rb +1 -1
- data/test/plugin_helper/test_metrics.rb +137 -0
- data/test/plugin_helper/test_retry_state.rb +602 -38
- data/test/plugin_helper/test_server.rb +78 -6
- data/test/plugin_helper/test_timer.rb +2 -2
- data/test/test_config.rb +191 -24
- data/test/test_event_router.rb +17 -0
- data/test/test_file_wrapper.rb +53 -0
- data/test/test_formatter.rb +24 -21
- data/test/test_log.rb +122 -40
- data/test/test_msgpack_factory.rb +32 -0
- data/test/test_plugin_classes.rb +102 -0
- data/test/test_root_agent.rb +30 -1
- data/test/test_supervisor.rb +477 -257
- data/test/test_time_parser.rb +22 -0
- metadata +55 -34
- data/.drone.yml +0 -35
- data/.github/workflows/issue-auto-closer.yml +0 -12
- data/.github/workflows/linux-test.yaml +0 -36
- data/.github/workflows/macos-test.yaml +0 -30
- data/.github/workflows/windows-test.yaml +0 -46
- data/.gitlab-ci.yml +0 -103
- data/lib/fluent/plugin/file_wrapper.rb +0 -187
- data/test/plugin/test_file_wrapper.rb +0 -126
- data/test/test_logger_initializer.rb +0 -46
@@ -160,13 +160,20 @@ module Fluent
|
|
160
160
|
def resume
|
161
161
|
stage = {}
|
162
162
|
queue = []
|
163
|
+
exist_broken_file = false
|
163
164
|
|
164
165
|
patterns = [@path]
|
165
166
|
patterns.unshift @additional_resume_path if @additional_resume_path
|
166
167
|
Dir.glob(escaped_patterns(patterns)) do |path|
|
167
168
|
next unless File.file?(path)
|
168
169
|
|
169
|
-
|
170
|
+
if owner.respond_to?(:buffer_config) && owner.buffer_config&.flush_at_shutdown
|
171
|
+
# When `flush_at_shutdown` is `true`, the remaining chunk files during resuming are possibly broken
|
172
|
+
# since there may be a power failure or similar failure.
|
173
|
+
log.warn { "restoring buffer file: path = #{path}" }
|
174
|
+
else
|
175
|
+
log.debug { "restoring buffer file: path = #{path}" }
|
176
|
+
end
|
170
177
|
|
171
178
|
m = new_metadata() # this metadata will be updated in FileSingleChunk.new
|
172
179
|
mode = Fluent::Plugin::Buffer::FileSingleChunk.assume_chunk_state(path)
|
@@ -179,6 +186,7 @@ module Fluent
|
|
179
186
|
chunk = Fluent::Plugin::Buffer::FileSingleChunk.new(m, path, mode, @key_in_path, compress: @compress)
|
180
187
|
chunk.restore_size(@chunk_format) if @calc_num_records
|
181
188
|
rescue Fluent::Plugin::Buffer::FileSingleChunk::FileChunkError => e
|
189
|
+
exist_broken_file = true
|
182
190
|
handle_broken_files(path, mode, e)
|
183
191
|
next
|
184
192
|
end
|
@@ -193,6 +201,15 @@ module Fluent
|
|
193
201
|
|
194
202
|
queue.sort_by!(&:modified_at)
|
195
203
|
|
204
|
+
# If one of the files is corrupted, other files may also be corrupted and be undetected.
|
205
|
+
# The time priods of each chunk are helpful to check the data.
|
206
|
+
if exist_broken_file
|
207
|
+
log.info "Since a broken chunk file was found, it is possible that other files remaining at the time of resuming were also broken. Here is the list of the files."
|
208
|
+
(stage.values + queue).each { |chunk|
|
209
|
+
log.info " #{chunk.path}:", :created_at => chunk.created_at, :modified_at => chunk.modified_at
|
210
|
+
}
|
211
|
+
end
|
212
|
+
|
196
213
|
return stage, queue
|
197
214
|
end
|
198
215
|
|
@@ -207,8 +224,20 @@ module Fluent
|
|
207
224
|
end
|
208
225
|
|
209
226
|
def handle_broken_files(path, mode, e)
|
210
|
-
log.error "found broken chunk file during resume.
|
211
|
-
|
227
|
+
log.error "found broken chunk file during resume.", :path => path, :mode => mode, :err_msg => e.message
|
228
|
+
unique_id, _ = Fluent::Plugin::Buffer::FileSingleChunk.unique_id_and_key_from_path(path)
|
229
|
+
backup(unique_id) { |f|
|
230
|
+
File.open(path, 'rb') { |chunk|
|
231
|
+
chunk.set_encoding(Encoding::ASCII_8BIT)
|
232
|
+
chunk.sync = true
|
233
|
+
chunk.binmode
|
234
|
+
IO.copy_stream(chunk, f)
|
235
|
+
}
|
236
|
+
}
|
237
|
+
rescue => error
|
238
|
+
log.error "backup failed. Delete corresponding files.", :err_msg => error.message
|
239
|
+
ensure
|
240
|
+
log.warn "disable_chunk_backup is true. #{dump_unique_id_hex(unique_id)} chunk is thrown away." if @disable_chunk_backup
|
212
241
|
File.unlink(path) rescue nil
|
213
242
|
end
|
214
243
|
|
@@ -204,7 +204,7 @@ module Fluent
|
|
204
204
|
end
|
205
205
|
end
|
206
206
|
|
207
|
-
# used only for queued v0.12 buffer path
|
207
|
+
# used only for queued v0.12 buffer path or broken files
|
208
208
|
def self.unique_id_from_path(path)
|
209
209
|
if /\.(b|q)([0-9a-f]+)\.[^\/]*\Z/n =~ path # //n switch means explicit 'ASCII-8BIT' pattern
|
210
210
|
return $2.scan(/../).map{|x| x.to_i(16) }.pack('C*')
|
data/lib/fluent/plugin/buffer.rb
CHANGED
@@ -16,6 +16,8 @@
|
|
16
16
|
|
17
17
|
require 'fluent/plugin/base'
|
18
18
|
require 'fluent/plugin/owned_by_mixin'
|
19
|
+
require 'fluent/plugin_id'
|
20
|
+
require 'fluent/plugin_helper'
|
19
21
|
require 'fluent/unique_id'
|
20
22
|
require 'fluent/ext_monitor_require'
|
21
23
|
|
@@ -24,7 +26,9 @@ module Fluent
|
|
24
26
|
class Buffer < Base
|
25
27
|
include OwnedByMixin
|
26
28
|
include UniqueId::Mixin
|
29
|
+
include PluginId
|
27
30
|
include MonitorMixin
|
31
|
+
include PluginHelper::Mixin # for metrics
|
28
32
|
|
29
33
|
class BufferError < StandardError; end
|
30
34
|
class BufferOverflowError < BufferError; end
|
@@ -39,6 +43,8 @@ module Fluent
|
|
39
43
|
|
40
44
|
configured_in :buffer
|
41
45
|
|
46
|
+
helpers_internal :metrics
|
47
|
+
|
42
48
|
# TODO: system total buffer limit size in bytes by SystemConfig
|
43
49
|
|
44
50
|
config_param :chunk_limit_size, :size, default: DEFAULT_CHUNK_LIMIT_SIZE
|
@@ -60,6 +66,9 @@ module Fluent
|
|
60
66
|
desc 'Compress buffered data.'
|
61
67
|
config_param :compress, :enum, list: [:text, :gzip], default: :text
|
62
68
|
|
69
|
+
desc 'If true, chunks are thrown away when unrecoverable error happens'
|
70
|
+
config_param :disable_chunk_backup, :bool, default: false
|
71
|
+
|
63
72
|
Metadata = Struct.new(:timekey, :tag, :variables, :seq) do
|
64
73
|
def initialize(timekey, tag, variables)
|
65
74
|
super(timekey, tag, variables, 0)
|
@@ -153,8 +162,11 @@ module Fluent
|
|
153
162
|
end
|
154
163
|
end
|
155
164
|
|
165
|
+
# for metrics
|
166
|
+
attr_reader :stage_size_metrics, :stage_length_metrics, :queue_size_metrics, :queue_length_metrics
|
167
|
+
attr_reader :available_buffer_space_ratios_metrics, :total_queued_size_metrics
|
168
|
+
attr_reader :newest_timekey_metrics, :oldest_timekey_metrics
|
156
169
|
# for tests
|
157
|
-
attr_accessor :stage_size, :queue_size
|
158
170
|
attr_reader :stage, :queue, :dequeued, :queued_num
|
159
171
|
|
160
172
|
def initialize
|
@@ -171,12 +183,35 @@ module Fluent
|
|
171
183
|
@queued_num = {} # metadata => int (number of queued chunks)
|
172
184
|
@dequeued_num = {} # metadata => int (number of dequeued chunks)
|
173
185
|
|
174
|
-
@
|
186
|
+
@stage_length_metrics = nil
|
187
|
+
@stage_size_metrics = nil
|
188
|
+
@queue_length_metrics = nil
|
189
|
+
@queue_size_metrics = nil
|
190
|
+
@available_buffer_space_ratios_metrics = nil
|
191
|
+
@total_queued_size_metrics = nil
|
192
|
+
@newest_timekey_metrics = nil
|
193
|
+
@oldest_timekey_metrics = nil
|
175
194
|
@timekeys = Hash.new(0)
|
176
195
|
@enable_update_timekeys = false
|
177
196
|
@mutex = Mutex.new
|
178
197
|
end
|
179
198
|
|
199
|
+
def stage_size
|
200
|
+
@stage_size_metrics.get
|
201
|
+
end
|
202
|
+
|
203
|
+
def stage_size=(value)
|
204
|
+
@stage_size_metrics.set(value)
|
205
|
+
end
|
206
|
+
|
207
|
+
def queue_size
|
208
|
+
@queue_size_metrics.get
|
209
|
+
end
|
210
|
+
|
211
|
+
def queue_size=(value)
|
212
|
+
@queue_size_metrics.set(value)
|
213
|
+
end
|
214
|
+
|
180
215
|
def persistent?
|
181
216
|
false
|
182
217
|
end
|
@@ -187,6 +222,28 @@ module Fluent
|
|
187
222
|
unless @queue_limit_length.nil?
|
188
223
|
@total_limit_size = @chunk_limit_size * @queue_limit_length
|
189
224
|
end
|
225
|
+
@stage_length_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "stage_length",
|
226
|
+
help_text: 'Length of stage buffers', prefer_gauge: true)
|
227
|
+
@stage_length_metrics.set(0)
|
228
|
+
@stage_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "stage_byte_size",
|
229
|
+
help_text: 'Total size of stage buffers', prefer_gauge: true)
|
230
|
+
@stage_size_metrics.set(0) # Ensure zero.
|
231
|
+
@queue_length_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "queue_length",
|
232
|
+
help_text: 'Length of queue buffers', prefer_gauge: true)
|
233
|
+
@queue_length_metrics.set(0)
|
234
|
+
@queue_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "queue_byte_size",
|
235
|
+
help_text: 'Total size of queue buffers', prefer_gauge: true)
|
236
|
+
@queue_size_metrics.set(0) # Ensure zero.
|
237
|
+
@available_buffer_space_ratios_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "available_buffer_space_ratios",
|
238
|
+
help_text: 'Ratio of available space in buffer', prefer_gauge: true)
|
239
|
+
@available_buffer_space_ratios_metrics.set(100) # Default is 100%.
|
240
|
+
@total_queued_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "total_queued_size",
|
241
|
+
help_text: 'Total size of stage and queue buffers', prefer_gauge: true)
|
242
|
+
@total_queued_size_metrics.set(0)
|
243
|
+
@newest_timekey_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "newest_timekey",
|
244
|
+
help_text: 'Newest timekey in buffer', prefer_gauge: true)
|
245
|
+
@oldest_timekey_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "oldest_timekey",
|
246
|
+
help_text: 'Oldest timekey in buffer', prefer_gauge: true)
|
190
247
|
end
|
191
248
|
|
192
249
|
def enable_update_timekeys
|
@@ -198,15 +255,15 @@ module Fluent
|
|
198
255
|
|
199
256
|
@stage, @queue = resume
|
200
257
|
@stage.each_pair do |metadata, chunk|
|
201
|
-
@
|
258
|
+
@stage_size_metrics.add(chunk.bytesize)
|
202
259
|
end
|
203
260
|
@queue.each do |chunk|
|
204
261
|
@queued_num[chunk.metadata] ||= 0
|
205
262
|
@queued_num[chunk.metadata] += 1
|
206
|
-
@
|
263
|
+
@queue_size_metrics.add(chunk.bytesize)
|
207
264
|
end
|
208
265
|
update_timekeys
|
209
|
-
log.debug "buffer started", instance: self.object_id, stage_size: @
|
266
|
+
log.debug "buffer started", instance: self.object_id, stage_size: @stage_size_metrics.get, queue_size: @queue_size_metrics.get
|
210
267
|
end
|
211
268
|
|
212
269
|
def close
|
@@ -228,17 +285,19 @@ module Fluent
|
|
228
285
|
def terminate
|
229
286
|
super
|
230
287
|
@dequeued = @stage = @queue = @queued_num = nil
|
231
|
-
@
|
288
|
+
@stage_length_metrics = @stage_size_metrics = @queue_length_metrics = @queue_size_metrics = nil
|
289
|
+
@available_buffer_space_ratios_metrics = @total_queued_size_metrics = nil
|
290
|
+
@newest_timekey_metrics = @oldest_timekey_metrics = nil
|
232
291
|
@timekeys.clear
|
233
292
|
end
|
234
293
|
|
235
294
|
def storable?
|
236
|
-
@total_limit_size > @
|
295
|
+
@total_limit_size > @stage_size_metrics.get + @queue_size_metrics.get
|
237
296
|
end
|
238
297
|
|
239
298
|
## TODO: for back pressure feature
|
240
299
|
# def used?(ratio)
|
241
|
-
# @total_limit_size * ratio > @
|
300
|
+
# @total_limit_size * ratio > @stage_size_metrics.get + @queue_size_metrics.get
|
242
301
|
# end
|
243
302
|
|
244
303
|
def resume
|
@@ -276,12 +335,14 @@ module Fluent
|
|
276
335
|
unstaged_chunks = {} # metadata => [chunk, chunk, ...]
|
277
336
|
chunks_to_enqueue = []
|
278
337
|
staged_bytesizes_by_chunk = {}
|
338
|
+
# track internal BufferChunkOverflowError in write_step_by_step
|
339
|
+
buffer_chunk_overflow_errors = []
|
279
340
|
|
280
341
|
begin
|
281
342
|
# sort metadata to get lock of chunks in same order with other threads
|
282
343
|
metadata_and_data.keys.sort.each do |metadata|
|
283
344
|
data = metadata_and_data[metadata]
|
284
|
-
write_once(metadata, data, format: format, size: size) do |chunk, adding_bytesize|
|
345
|
+
write_once(metadata, data, format: format, size: size) do |chunk, adding_bytesize, error|
|
285
346
|
chunk.mon_enter # add lock to prevent to be committed/rollbacked from other threads
|
286
347
|
operated_chunks << chunk
|
287
348
|
if chunk.staged?
|
@@ -296,6 +357,9 @@ module Fluent
|
|
296
357
|
unstaged_chunks[metadata] ||= []
|
297
358
|
unstaged_chunks[metadata] << chunk
|
298
359
|
end
|
360
|
+
if error && !error.empty?
|
361
|
+
buffer_chunk_overflow_errors << error
|
362
|
+
end
|
299
363
|
end
|
300
364
|
end
|
301
365
|
|
@@ -344,7 +408,7 @@ module Fluent
|
|
344
408
|
#
|
345
409
|
staged_bytesizes_by_chunk.each do |chunk, bytesize|
|
346
410
|
chunk.synchronize do
|
347
|
-
synchronize { @
|
411
|
+
synchronize { @stage_size_metrics.add(bytesize) }
|
348
412
|
log.on_trace { log.trace { "chunk #{chunk.path} size_added: #{bytesize} new_size: #{chunk.bytesize}" } }
|
349
413
|
end
|
350
414
|
end
|
@@ -353,7 +417,7 @@ module Fluent
|
|
353
417
|
if c.staged? && (enqueue || chunk_size_full?(c))
|
354
418
|
m = c.metadata
|
355
419
|
enqueue_chunk(m)
|
356
|
-
if unstaged_chunks[m]
|
420
|
+
if unstaged_chunks[m] && !unstaged_chunks[m].empty?
|
357
421
|
u = unstaged_chunks[m].pop
|
358
422
|
u.synchronize do
|
359
423
|
if u.unstaged? && !chunk_size_full?(u)
|
@@ -361,7 +425,7 @@ module Fluent
|
|
361
425
|
u.metadata.seq = 0
|
362
426
|
synchronize {
|
363
427
|
@stage[m] = u.staged!
|
364
|
-
@
|
428
|
+
@stage_size_metrics.add(u.bytesize)
|
365
429
|
}
|
366
430
|
end
|
367
431
|
end
|
@@ -388,6 +452,10 @@ module Fluent
|
|
388
452
|
end
|
389
453
|
chunk.mon_exit rescue nil # this may raise ThreadError for chunks already committed
|
390
454
|
end
|
455
|
+
unless buffer_chunk_overflow_errors.empty?
|
456
|
+
# Notify delayed BufferChunkOverflowError here
|
457
|
+
raise BufferChunkOverflowError, buffer_chunk_overflow_errors.join(", ")
|
458
|
+
end
|
391
459
|
end
|
392
460
|
end
|
393
461
|
|
@@ -428,8 +496,8 @@ module Fluent
|
|
428
496
|
chunk.enqueued!
|
429
497
|
end
|
430
498
|
bytesize = chunk.bytesize
|
431
|
-
@
|
432
|
-
@
|
499
|
+
@stage_size_metrics.sub(bytesize)
|
500
|
+
@queue_size_metrics.add(bytesize)
|
433
501
|
end
|
434
502
|
end
|
435
503
|
nil
|
@@ -446,7 +514,7 @@ module Fluent
|
|
446
514
|
@queued_num[metadata] = @queued_num.fetch(metadata, 0) + 1
|
447
515
|
chunk.enqueued!
|
448
516
|
end
|
449
|
-
@
|
517
|
+
@queue_size_metrics.add(chunk.bytesize)
|
450
518
|
end
|
451
519
|
end
|
452
520
|
|
@@ -512,7 +580,7 @@ module Fluent
|
|
512
580
|
chunk = @dequeued.delete(chunk_id)
|
513
581
|
return false unless chunk # already purged by other thread
|
514
582
|
@queue.unshift(chunk)
|
515
|
-
log.trace "chunk taken back", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: chunk.metadata
|
583
|
+
log.on_trace { log.trace "chunk taken back", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: chunk.metadata }
|
516
584
|
@queued_num[chunk.metadata] += 1 # BUG if nil
|
517
585
|
@dequeued_num[chunk.metadata] -= 1
|
518
586
|
end
|
@@ -531,7 +599,7 @@ module Fluent
|
|
531
599
|
begin
|
532
600
|
bytesize = chunk.bytesize
|
533
601
|
chunk.purge
|
534
|
-
@
|
602
|
+
@queue_size_metrics.sub(bytesize)
|
535
603
|
rescue => e
|
536
604
|
log.error "failed to purge buffer chunk", chunk_id: dump_unique_id_hex(chunk_id), error_class: e.class, error: e
|
537
605
|
log.error_backtrace
|
@@ -542,7 +610,7 @@ module Fluent
|
|
542
610
|
@queued_num.delete(metadata)
|
543
611
|
@dequeued_num.delete(metadata)
|
544
612
|
end
|
545
|
-
log.trace "chunk purged", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata
|
613
|
+
log.on_trace { log.trace "chunk purged", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata }
|
546
614
|
end
|
547
615
|
|
548
616
|
nil
|
@@ -562,7 +630,7 @@ module Fluent
|
|
562
630
|
log.error_backtrace
|
563
631
|
end
|
564
632
|
end
|
565
|
-
@
|
633
|
+
@queue_size_metrics.set(0)
|
566
634
|
end
|
567
635
|
end
|
568
636
|
|
@@ -680,16 +748,14 @@ module Fluent
|
|
680
748
|
modified_chunks = []
|
681
749
|
modified_metadata = metadata
|
682
750
|
get_next_chunk = ->(){
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
modified_chunks << c
|
692
|
-
c
|
751
|
+
if staged_chunk_used
|
752
|
+
# Staging new chunk here is bad idea:
|
753
|
+
# Recovering whole state including newly staged chunks is much harder than current implementation.
|
754
|
+
modified_metadata = modified_metadata.dup_next
|
755
|
+
generate_chunk(modified_metadata)
|
756
|
+
else
|
757
|
+
synchronize { @stage[modified_metadata] ||= generate_chunk(modified_metadata).staged! }
|
758
|
+
end
|
693
759
|
}
|
694
760
|
|
695
761
|
writing_splits_index = 0
|
@@ -697,60 +763,116 @@ module Fluent
|
|
697
763
|
|
698
764
|
while writing_splits_index < splits.size
|
699
765
|
chunk = get_next_chunk.call
|
700
|
-
|
701
|
-
|
702
|
-
|
766
|
+
errors = []
|
767
|
+
# The chunk must be locked until being passed to &block.
|
768
|
+
chunk.mon_enter
|
769
|
+
modified_chunks << {chunk: chunk, adding_bytesize: 0, errors: errors}
|
703
770
|
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
771
|
+
raise ShouldRetry unless chunk.writable?
|
772
|
+
staged_chunk_used = true if chunk.staged?
|
773
|
+
|
774
|
+
original_bytesize = committed_bytesize = chunk.bytesize
|
775
|
+
begin
|
776
|
+
while writing_splits_index < splits.size
|
777
|
+
split = splits[writing_splits_index]
|
778
|
+
formatted_split = format ? format.call(split) : nil
|
779
|
+
|
780
|
+
if split.size == 1 # Check BufferChunkOverflowError
|
781
|
+
determined_bytesize = nil
|
782
|
+
if @compress != :text
|
783
|
+
determined_bytesize = nil
|
784
|
+
elsif formatted_split
|
785
|
+
determined_bytesize = formatted_split.bytesize
|
786
|
+
elsif split.first.respond_to?(:bytesize)
|
787
|
+
determined_bytesize = split.first.bytesize
|
712
788
|
end
|
713
789
|
|
714
|
-
if
|
715
|
-
|
790
|
+
if determined_bytesize && determined_bytesize > @chunk_limit_size
|
791
|
+
# It is a obvious case that BufferChunkOverflowError should be raised here.
|
792
|
+
# But if it raises here, already processed 'split' or
|
793
|
+
# the proceeding 'split' will be lost completely.
|
794
|
+
# So it is a last resort to delay raising such a exception
|
795
|
+
errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
|
796
|
+
writing_splits_index += 1
|
797
|
+
next
|
798
|
+
end
|
716
799
|
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
800
|
+
if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size
|
801
|
+
# The split will (might) cause size over so keep already processed
|
802
|
+
# 'split' content here (allow performance regression a bit).
|
803
|
+
chunk.commit
|
804
|
+
committed_bytesize = chunk.bytesize
|
805
|
+
end
|
806
|
+
end
|
807
|
+
|
808
|
+
if format
|
809
|
+
chunk.concat(formatted_split, split.size)
|
810
|
+
else
|
811
|
+
chunk.append(split, compress: @compress)
|
812
|
+
end
|
813
|
+
adding_bytes = chunk.bytesize - committed_bytesize
|
814
|
+
|
815
|
+
if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over?
|
816
|
+
chunk.rollback
|
817
|
+
committed_bytesize = chunk.bytesize
|
721
818
|
|
722
|
-
|
723
|
-
|
819
|
+
if split.size == 1 # Check BufferChunkOverflowError again
|
820
|
+
if adding_bytes > @chunk_limit_size
|
821
|
+
errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})"
|
822
|
+
writing_splits_index += 1
|
823
|
+
next
|
724
824
|
else
|
725
|
-
|
825
|
+
# As already processed content is kept after rollback, then unstaged chunk should be queued.
|
826
|
+
# After that, re-process current split again.
|
827
|
+
# New chunk should be allocated, to do it, modify @stage and so on.
|
828
|
+
synchronize { @stage.delete(modified_metadata) }
|
829
|
+
staged_chunk_used = false
|
830
|
+
chunk.unstaged!
|
831
|
+
break
|
726
832
|
end
|
833
|
+
end
|
727
834
|
|
728
|
-
|
835
|
+
if chunk_size_full?(chunk) || split.size == 1
|
836
|
+
enqueue_chunk_before_retry = true
|
837
|
+
else
|
838
|
+
splits_count *= 10
|
729
839
|
end
|
730
840
|
|
731
|
-
|
841
|
+
raise ShouldRetry
|
842
|
+
end
|
843
|
+
|
844
|
+
writing_splits_index += 1
|
732
845
|
|
733
|
-
|
734
|
-
|
735
|
-
end
|
846
|
+
if chunk_size_full?(chunk)
|
847
|
+
break
|
736
848
|
end
|
737
|
-
rescue
|
738
|
-
chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it
|
739
|
-
raise
|
740
849
|
end
|
741
|
-
|
742
|
-
|
850
|
+
rescue
|
851
|
+
chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it
|
852
|
+
raise
|
743
853
|
end
|
854
|
+
|
855
|
+
modified_chunks.last[:adding_bytesize] = chunk.bytesize - original_bytesize
|
856
|
+
end
|
857
|
+
modified_chunks.each do |data|
|
858
|
+
block.call(data[:chunk], data[:adding_bytesize], data[:errors])
|
744
859
|
end
|
745
860
|
rescue ShouldRetry
|
746
|
-
modified_chunks.each do |
|
747
|
-
|
748
|
-
|
749
|
-
|
861
|
+
modified_chunks.each do |data|
|
862
|
+
chunk = data[:chunk]
|
863
|
+
chunk.rollback rescue nil
|
864
|
+
if chunk.unstaged?
|
865
|
+
chunk.purge rescue nil
|
750
866
|
end
|
867
|
+
chunk.mon_exit rescue nil
|
751
868
|
end
|
752
869
|
enqueue_chunk(metadata) if enqueue_chunk_before_retry
|
753
870
|
retry
|
871
|
+
ensure
|
872
|
+
modified_chunks.each do |data|
|
873
|
+
chunk = data[:chunk]
|
874
|
+
chunk.mon_exit
|
875
|
+
end
|
754
876
|
end
|
755
877
|
|
756
878
|
STATS_KEYS = [
|
@@ -765,28 +887,52 @@ module Fluent
|
|
765
887
|
]
|
766
888
|
|
767
889
|
def statistics
|
768
|
-
stage_size, queue_size = @
|
890
|
+
stage_size, queue_size = @stage_size_metrics.get, @queue_size_metrics.get
|
769
891
|
buffer_space = 1.0 - ((stage_size + queue_size * 1.0) / @total_limit_size)
|
892
|
+
@stage_length_metrics.set(@stage.size)
|
893
|
+
@queue_length_metrics.set(@queue.size)
|
894
|
+
@available_buffer_space_ratios_metrics.set(buffer_space * 100)
|
895
|
+
@total_queued_size_metrics.set(stage_size + queue_size)
|
770
896
|
stats = {
|
771
|
-
'stage_length' => @
|
897
|
+
'stage_length' => @stage_length_metrics.get,
|
772
898
|
'stage_byte_size' => stage_size,
|
773
|
-
'queue_length' => @
|
899
|
+
'queue_length' => @queue_length_metrics.get,
|
774
900
|
'queue_byte_size' => queue_size,
|
775
|
-
'available_buffer_space_ratios' =>
|
776
|
-
'total_queued_size' =>
|
901
|
+
'available_buffer_space_ratios' => @available_buffer_space_ratios_metrics.get.round(1),
|
902
|
+
'total_queued_size' => @total_queued_size_metrics.get,
|
777
903
|
}
|
778
904
|
|
779
905
|
tkeys = timekeys
|
780
906
|
if (m = tkeys.min)
|
781
|
-
|
907
|
+
@oldest_timekey_metrics.set(m)
|
908
|
+
stats['oldest_timekey'] = @oldest_timekey_metrics.get
|
782
909
|
end
|
783
910
|
if (m = tkeys.max)
|
784
|
-
|
911
|
+
@newest_timekey_metrics.set(m)
|
912
|
+
stats['newest_timekey'] = @newest_timekey_metrics.get
|
785
913
|
end
|
786
914
|
|
787
915
|
{ 'buffer' => stats }
|
788
916
|
end
|
789
917
|
|
918
|
+
def backup(chunk_unique_id)
|
919
|
+
unique_id = dump_unique_id_hex(chunk_unique_id)
|
920
|
+
|
921
|
+
if @disable_chunk_backup
|
922
|
+
log.warn "disable_chunk_backup is true. #{unique_id} chunk is not backed up."
|
923
|
+
return
|
924
|
+
end
|
925
|
+
|
926
|
+
safe_owner_id = owner.plugin_id.gsub(/[ "\/\\:;|*<>?]/, '_')
|
927
|
+
backup_base_dir = system_config.root_dir || DEFAULT_BACKUP_DIR
|
928
|
+
backup_file = File.join(backup_base_dir, 'backup', "worker#{fluentd_worker_id}", safe_owner_id, "#{unique_id}.log")
|
929
|
+
backup_dir = File.dirname(backup_file)
|
930
|
+
|
931
|
+
log.warn "bad chunk is moved to #{backup_file}"
|
932
|
+
FileUtils.mkdir_p(backup_dir, mode: system_config.dir_permission || Fluent::DEFAULT_DIR_PERMISSION) unless Dir.exist?(backup_dir)
|
933
|
+
File.open(backup_file, 'ab', system_config.file_permission || Fluent::DEFAULT_FILE_PERMISSION) { |f| yield f }
|
934
|
+
end
|
935
|
+
|
790
936
|
private
|
791
937
|
|
792
938
|
def optimistic_queued?(metadata = nil)
|
data/lib/fluent/plugin/filter.rb
CHANGED
@@ -28,13 +28,47 @@ module Fluent
|
|
28
28
|
include PluginLoggerMixin
|
29
29
|
include PluginHelper::Mixin
|
30
30
|
|
31
|
-
helpers_internal :event_emitter
|
31
|
+
helpers_internal :event_emitter, :metrics
|
32
32
|
|
33
33
|
attr_reader :has_filter_with_time
|
34
34
|
|
35
35
|
def initialize
|
36
36
|
super
|
37
37
|
@has_filter_with_time = has_filter_with_time?
|
38
|
+
@emit_records_metrics = nil
|
39
|
+
@emit_size_metrics = nil
|
40
|
+
@counter_mutex = Mutex.new
|
41
|
+
@enable_size_metrics = false
|
42
|
+
end
|
43
|
+
|
44
|
+
def emit_records
|
45
|
+
@emit_records_metrics.get
|
46
|
+
end
|
47
|
+
|
48
|
+
def emit_size
|
49
|
+
@emit_size_metrics.get
|
50
|
+
end
|
51
|
+
|
52
|
+
def configure(conf)
|
53
|
+
super
|
54
|
+
|
55
|
+
@emit_records_metrics = metrics_create(namespace: "fluentd", subsystem: "filter", name: "emit_records", help_text: "Number of count emit records")
|
56
|
+
@emit_size_metrics = metrics_create(namespace: "fluentd", subsystem: "filter", name: "emit_size", help_text: "Total size of emit events")
|
57
|
+
@enable_size_metrics = !!system_config.enable_size_metrics
|
58
|
+
end
|
59
|
+
|
60
|
+
def statistics
|
61
|
+
stats = {
|
62
|
+
'emit_records' => @emit_records_metrics.get,
|
63
|
+
'emit_size' => @emit_size_metrics.get,
|
64
|
+
}
|
65
|
+
|
66
|
+
{ 'filter' => stats }
|
67
|
+
end
|
68
|
+
|
69
|
+
def measure_metrics(es)
|
70
|
+
@emit_records_metrics.add(es.size)
|
71
|
+
@emit_size_metrics.add(es.to_msgpack_stream.bytesize) if @enable_size_metrics
|
38
72
|
end
|
39
73
|
|
40
74
|
def filter(tag, time, record)
|
@@ -316,7 +316,7 @@ module Fluent::Plugin
|
|
316
316
|
end
|
317
317
|
|
318
318
|
(Object.instance_methods).each do |m|
|
319
|
-
undef_method m unless
|
319
|
+
undef_method m unless /^__|respond_to_missing\?|object_id|public_methods|instance_eval|method_missing|define_singleton_method|respond_to\?|new_ostruct_member|^class$/.match?(m.to_s)
|
320
320
|
end
|
321
321
|
end
|
322
322
|
end
|
@@ -40,7 +40,7 @@ module Fluent::Plugin
|
|
40
40
|
config_param :backlog, :integer, default: nil
|
41
41
|
# SO_LINGER 0 to send RST rather than FIN to avoid lots of connections sitting in TIME_WAIT at src
|
42
42
|
desc 'The timeout time used to set linger option.'
|
43
|
-
config_param :linger_timeout, :integer, default:
|
43
|
+
config_param :linger_timeout, :integer, default: nil, deprecated: "use transport directive"
|
44
44
|
# This option is for Cool.io's loop wait timeout to avoid loop stuck at shutdown. Almost users don't need to change this value.
|
45
45
|
config_param :blocking_timeout, :time, default: 0.5
|
46
46
|
desc 'Try to resolve hostname from IP addresses or not.'
|
@@ -430,7 +430,7 @@ module Fluent::Plugin
|
|
430
430
|
end
|
431
431
|
_ping, hostname, shared_key_salt, shared_key_hexdigest, username, password_digest = message
|
432
432
|
|
433
|
-
node = @nodes.
|
433
|
+
node = @nodes.find{|n| n[:address].include?(remote_addr) rescue false }
|
434
434
|
if !node && !@security.allow_anonymous_source
|
435
435
|
log.warn "Anonymous client disallowed", address: remote_addr, hostname: hostname
|
436
436
|
return false, "anonymous source host '#{remote_addr}' denied", nil
|