fluentd 0.14.9 → 0.14.10
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of fluentd might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.travis.yml +2 -0
- data/ChangeLog +44 -0
- data/appveyor.yml +1 -0
- data/code-of-conduct.md +3 -0
- data/fluentd.gemspec +1 -1
- data/lib/fluent/command/cat.rb +11 -3
- data/lib/fluent/compat/output.rb +6 -3
- data/lib/fluent/compat/parser.rb +2 -0
- data/lib/fluent/config/section.rb +1 -1
- data/lib/fluent/env.rb +1 -1
- data/lib/fluent/plugin/filter_record_transformer.rb +12 -30
- data/lib/fluent/plugin/in_forward.rb +50 -169
- data/lib/fluent/plugin/in_monitor_agent.rb +8 -4
- data/lib/fluent/plugin/in_syslog.rb +13 -7
- data/lib/fluent/plugin/in_tail.rb +29 -14
- data/lib/fluent/plugin/in_tcp.rb +54 -14
- data/lib/fluent/plugin/in_udp.rb +49 -13
- data/lib/fluent/plugin/out_file.rb +30 -14
- data/lib/fluent/plugin/out_forward.rb +199 -173
- data/lib/fluent/plugin/output.rb +71 -46
- data/lib/fluent/plugin/parser_json.rb +1 -1
- data/lib/fluent/plugin_helper.rb +2 -0
- data/lib/fluent/plugin_helper/event_loop.rb +24 -6
- data/lib/fluent/plugin_helper/inject.rb +12 -1
- data/lib/fluent/plugin_helper/server.rb +494 -0
- data/lib/fluent/plugin_helper/socket.rb +101 -0
- data/lib/fluent/plugin_helper/socket_option.rb +84 -0
- data/lib/fluent/plugin_helper/timer.rb +1 -0
- data/lib/fluent/test/driver/base.rb +45 -13
- data/lib/fluent/version.rb +1 -1
- data/lib/fluent/winsvc.rb +1 -1
- data/test/compat/test_parser.rb +10 -0
- data/test/config/test_configurable.rb +20 -0
- data/test/helper.rb +36 -1
- data/test/plugin/test_filter_record_transformer.rb +31 -103
- data/test/plugin/test_in_forward.rb +13 -75
- data/test/plugin/test_in_monitor_agent.rb +65 -35
- data/test/plugin/test_in_syslog.rb +39 -3
- data/test/plugin/test_in_tcp.rb +78 -62
- data/test/plugin/test_in_udp.rb +101 -80
- data/test/plugin/test_out_file.rb +17 -0
- data/test/plugin/test_out_forward.rb +155 -125
- data/test/plugin/test_output_as_buffered.rb +4 -2
- data/test/plugin_helper/test_inject.rb +21 -0
- data/test/plugin_helper/test_server.rb +905 -0
- data/test/test_event_time.rb +3 -1
- data/test/test_output.rb +30 -1
- data/test/test_test_drivers.rb +5 -2
- metadata +19 -6
@@ -18,8 +18,7 @@ require 'json'
|
|
18
18
|
require 'webrick'
|
19
19
|
require 'cgi'
|
20
20
|
|
21
|
-
require '
|
22
|
-
|
21
|
+
require 'fluent/config/types'
|
23
22
|
require 'fluent/plugin/input'
|
24
23
|
require 'fluent/plugin/output'
|
25
24
|
require 'fluent/plugin/multi_output'
|
@@ -35,6 +34,7 @@ module Fluent::Plugin
|
|
35
34
|
config_param :port, :integer, default: 24220
|
36
35
|
config_param :tag, :string, default: nil
|
37
36
|
config_param :emit_interval, :time, default: 60
|
37
|
+
config_param :include_config, :bool, default: true
|
38
38
|
|
39
39
|
class MonitorServlet < WEBrick::HTTPServlet::AbstractServlet
|
40
40
|
def initialize(server, agent)
|
@@ -78,12 +78,16 @@ module Fluent::Plugin
|
|
78
78
|
|
79
79
|
# if ?debug=1 is set, set :with_debug_info for get_monitor_info
|
80
80
|
# and :pretty_json for render_json_error
|
81
|
-
opts = {}
|
81
|
+
opts = {with_config: @agent.include_config}
|
82
82
|
if s = qs['debug'] and s[0]
|
83
83
|
opts[:with_debug_info] = true
|
84
84
|
opts[:pretty_json] = true
|
85
85
|
end
|
86
86
|
|
87
|
+
if with_config = get_search_parameter(qs, 'with_config'.freeze)
|
88
|
+
opts[:with_config] = Fluent::Config.bool_value(with_config)
|
89
|
+
end
|
90
|
+
|
87
91
|
if tag = get_search_parameter(qs, 'tag'.freeze)
|
88
92
|
# ?tag= to search an output plugin by match pattern
|
89
93
|
if obj = @agent.plugin_info_by_tag(tag, opts)
|
@@ -329,7 +333,7 @@ module Fluent::Plugin
|
|
329
333
|
obj['plugin_id'] = pe.plugin_id
|
330
334
|
obj['plugin_category'] = plugin_category(pe)
|
331
335
|
obj['type'] = pe.config['@type']
|
332
|
-
obj['config'] = pe.config if
|
336
|
+
obj['config'] = pe.config if opts[:with_config]
|
333
337
|
|
334
338
|
# run MONITOR_INFO in plugins' instance context and store the info to obj
|
335
339
|
MONITOR_INFO.each_pair {|key,code|
|
@@ -85,6 +85,10 @@ module Fluent::Plugin
|
|
85
85
|
config_param :include_source_host, :bool, default: false
|
86
86
|
desc 'Specify key of source host when include_source_host is true.'
|
87
87
|
config_param :source_host_key, :string, default: 'source_host'.freeze
|
88
|
+
desc 'The field name of the priority.'
|
89
|
+
config_param :priority_key, :string, default: nil
|
90
|
+
desc 'The field name of the facility.'
|
91
|
+
config_param :facility_key, :string, default: nil
|
88
92
|
config_param :blocking_timeout, :time, default: 0.5
|
89
93
|
config_param :message_length_limit, :size, default: 2048
|
90
94
|
|
@@ -141,8 +145,15 @@ module Fluent::Plugin
|
|
141
145
|
end
|
142
146
|
|
143
147
|
pri ||= record.delete('pri')
|
148
|
+
facility = FACILITY_MAP[pri >> 3]
|
149
|
+
priority = PRIORITY_MAP[pri & 0b111]
|
150
|
+
|
151
|
+
record[@priority_key] = priority if @priority_key
|
152
|
+
record[@facility_key] = facility if @facility_key
|
144
153
|
record[@source_host_key] = addr[2] if @include_source_host
|
145
|
-
|
154
|
+
|
155
|
+
tag = "#{@tag}.#{facility}.#{priority}"
|
156
|
+
emit(tag, time, record)
|
146
157
|
end
|
147
158
|
rescue => e
|
148
159
|
log.error "invalid input", data: data, error: e
|
@@ -168,12 +179,7 @@ module Fluent::Plugin
|
|
168
179
|
end
|
169
180
|
end
|
170
181
|
|
171
|
-
def emit(
|
172
|
-
facility = FACILITY_MAP[pri >> 3]
|
173
|
-
priority = PRIORITY_MAP[pri & 0b111]
|
174
|
-
|
175
|
-
tag = "#{@tag}.#{facility}.#{priority}"
|
176
|
-
|
182
|
+
def emit(tag, time, record)
|
177
183
|
router.emit(tag, time, record)
|
178
184
|
rescue => e
|
179
185
|
log.error "syslog failed to emit", error: e, tag: tag, record: Yajl.dump(record)
|
@@ -164,6 +164,12 @@ module Fluent::Plugin
|
|
164
164
|
super
|
165
165
|
end
|
166
166
|
|
167
|
+
def close
|
168
|
+
super
|
169
|
+
# close file handles after all threads stopped (in #close of thread plugin helper)
|
170
|
+
close_watcher_handles
|
171
|
+
end
|
172
|
+
|
167
173
|
def expand_paths
|
168
174
|
date = Time.now
|
169
175
|
paths = []
|
@@ -234,18 +240,27 @@ module Fluent::Plugin
|
|
234
240
|
|
235
241
|
def stop_watchers(paths, immediate = false, unwatched = false)
|
236
242
|
paths.each { |path|
|
237
|
-
tw = @tails
|
243
|
+
tw = @tails[path]
|
238
244
|
if tw
|
239
245
|
tw.unwatched = unwatched
|
240
246
|
if immediate
|
241
|
-
|
247
|
+
detach_watcher(tw, false)
|
242
248
|
else
|
243
|
-
|
249
|
+
detach_watcher_after_rotate_wait(tw)
|
244
250
|
end
|
245
251
|
end
|
246
252
|
}
|
247
253
|
end
|
248
254
|
|
255
|
+
def close_watcher_handles
|
256
|
+
@tails.keys.each do |path|
|
257
|
+
tw = @tails.delete(path)
|
258
|
+
if tw
|
259
|
+
tw.close
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
249
264
|
# refresh_watchers calls @tails.keys so we don't use stop_watcher -> start_watcher sequence for safety.
|
250
265
|
def update_watcher(path, pe)
|
251
266
|
if @pf
|
@@ -256,24 +271,25 @@ module Fluent::Plugin
|
|
256
271
|
end
|
257
272
|
rotated_tw = @tails[path]
|
258
273
|
@tails[path] = setup_watcher(path, pe)
|
259
|
-
|
274
|
+
detach_watcher_after_rotate_wait(rotated_tw) if rotated_tw
|
260
275
|
end
|
261
276
|
|
262
277
|
# TailWatcher#close is called by another thread at shutdown phase.
|
263
278
|
# It causes 'can't modify string; temporarily locked' error in IOHandler
|
264
279
|
# so adding close_io argument to avoid this problem.
|
265
280
|
# At shutdown, IOHandler's io will be released automatically after detached the event loop
|
266
|
-
def
|
267
|
-
tw.
|
281
|
+
def detach_watcher(tw, close_io = true)
|
282
|
+
tw.detach
|
283
|
+
tw.close if close_io
|
268
284
|
flush_buffer(tw)
|
269
285
|
if tw.unwatched && @pf
|
270
286
|
@pf[tw.path].update_pos(PositionFile::UNWATCHED_POSITION)
|
271
287
|
end
|
272
288
|
end
|
273
289
|
|
274
|
-
def
|
290
|
+
def detach_watcher_after_rotate_wait(tw)
|
275
291
|
timer_execute(:in_tail_close_watcher, @rotate_wait, repeat: false) do
|
276
|
-
|
292
|
+
detach_watcher(tw)
|
277
293
|
end
|
278
294
|
end
|
279
295
|
|
@@ -435,14 +451,13 @@ module Fluent::Plugin
|
|
435
451
|
def detach
|
436
452
|
@timer_trigger.detach if @enable_watch_timer && @timer_trigger.attached?
|
437
453
|
@stat_trigger.detach if @stat_trigger.attached?
|
454
|
+
@io_handler.on_notify if @io_handler
|
438
455
|
end
|
439
456
|
|
440
|
-
def close
|
441
|
-
if
|
442
|
-
@io_handler.on_notify
|
457
|
+
def close
|
458
|
+
if @io_handler
|
443
459
|
@io_handler.close
|
444
460
|
end
|
445
|
-
detach
|
446
461
|
end
|
447
462
|
|
448
463
|
def on_notify
|
@@ -570,8 +585,8 @@ module Fluent::Plugin
|
|
570
585
|
else
|
571
586
|
@buffer << @io.readpartial(2048, @iobuf)
|
572
587
|
end
|
573
|
-
while
|
574
|
-
@lines <<
|
588
|
+
while idx = @buffer.index("\n".freeze)
|
589
|
+
@lines << @buffer.slice!(0, idx + 1)
|
575
590
|
end
|
576
591
|
if @lines.size >= @read_lines_limit
|
577
592
|
# not to use too much memory in case the file is very large
|
data/lib/fluent/plugin/in_tcp.rb
CHANGED
@@ -14,28 +14,68 @@
|
|
14
14
|
# limitations under the License.
|
15
15
|
#
|
16
16
|
|
17
|
-
require '
|
17
|
+
require 'fluent/plugin/input'
|
18
18
|
|
19
|
-
|
19
|
+
module Fluent::Plugin
|
20
|
+
class TcpInput < Input
|
21
|
+
Fluent::Plugin.register_input('tcp', self)
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
-
|
23
|
+
helpers :server, :parser, :extract, :compat_parameters
|
24
|
+
|
25
|
+
desc 'Tag of output events.'
|
26
|
+
config_param :tag, :string
|
27
|
+
desc 'The port to listen to.'
|
28
|
+
config_param :port, :integer, default: 5170
|
29
|
+
desc 'The bind address to listen to.'
|
30
|
+
config_param :bind, :string, default: '0.0.0.0'
|
31
|
+
|
32
|
+
desc "The field name of the client's hostname."
|
33
|
+
config_param :source_host_key, :string, default: nil, deprecated: "use source_hostname_key instead."
|
34
|
+
desc "The field name of the client's hostname."
|
35
|
+
config_param :source_hostname_key, :string, default: nil
|
36
|
+
|
37
|
+
config_param :blocking_timeout, :time, default: 0.5
|
24
38
|
|
25
|
-
config_set_default :port, 5170
|
26
39
|
desc 'The payload is read up to this character.'
|
27
40
|
config_param :delimiter, :string, default: "\n" # syslog family add "\n" to each message and this seems only way to split messages in tcp stream
|
28
41
|
|
29
|
-
def
|
30
|
-
|
42
|
+
def configure(conf)
|
43
|
+
compat_parameters_convert(conf, :parser)
|
44
|
+
super
|
45
|
+
@_event_loop_blocking_timeout = @blocking_timeout
|
46
|
+
@source_hostname_key ||= @source_host_key if @source_host_key
|
47
|
+
|
48
|
+
@parser = parser_create
|
49
|
+
end
|
50
|
+
|
51
|
+
def start
|
52
|
+
super
|
53
|
+
|
54
|
+
@buffer = ''
|
55
|
+
server_create(:in_tcp_server, @port, proto: :tcp, bind: @bind) do |data, conn|
|
56
|
+
@buffer << data
|
57
|
+
begin
|
58
|
+
pos = 0
|
59
|
+
while i = @buffer.index(@delimiter, pos)
|
60
|
+
msg = @buffer[pos...i]
|
61
|
+
pos = i + @delimiter.length
|
62
|
+
|
63
|
+
@parser.parse(msg) do |time, record|
|
64
|
+
unless time && record
|
65
|
+
log.warn "pattern not match", message: msg
|
66
|
+
next
|
67
|
+
end
|
31
68
|
|
32
|
-
|
33
|
-
|
34
|
-
|
69
|
+
tag = extract_tag_from_record(record)
|
70
|
+
tag ||= @tag
|
71
|
+
time ||= extract_time_from_record(record) || Fluent::EventTime.now
|
72
|
+
record[@source_hostname_key] = conn.remote_host if @source_hostname_key
|
73
|
+
router.emit(tag, time, record)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
@buffer.slice!(0, pos) if pos > 0
|
77
|
+
end
|
35
78
|
end
|
36
|
-
client = ServerEngine::SocketManager::Client.new(socket_manager_path)
|
37
|
-
lsock = client.listen_tcp(@bind, @port)
|
38
|
-
Coolio::TCPServer.new(lsock, nil, SocketUtil::TcpHandler, log, @delimiter, callback)
|
39
79
|
end
|
40
80
|
end
|
41
81
|
end
|
data/lib/fluent/plugin/in_udp.rb
CHANGED
@@ -14,24 +14,60 @@
|
|
14
14
|
# limitations under the License.
|
15
15
|
#
|
16
16
|
|
17
|
-
require 'fluent/plugin/
|
17
|
+
require 'fluent/plugin/input'
|
18
18
|
|
19
|
-
module Fluent
|
20
|
-
class UdpInput <
|
21
|
-
Plugin.register_input('udp', self)
|
19
|
+
module Fluent::Plugin
|
20
|
+
class UdpInput < Input
|
21
|
+
Fluent::Plugin.register_input('udp', self)
|
22
|
+
|
23
|
+
helpers :server, :parser, :extract, :compat_parameters
|
24
|
+
|
25
|
+
desc 'Tag of output events.'
|
26
|
+
config_param :tag, :string
|
27
|
+
desc 'The port to listen to.'
|
28
|
+
config_param :port, :integer, default: 5160
|
29
|
+
desc 'The bind address to listen to.'
|
30
|
+
config_param :bind, :string, default: '0.0.0.0'
|
31
|
+
|
32
|
+
desc "The field name of the client's hostname."
|
33
|
+
config_param :source_host_key, :string, default: nil, deprecated: "use source_hostname_key instead."
|
34
|
+
desc "The field name of the client's hostname."
|
35
|
+
config_param :source_hostname_key, :string, default: nil
|
22
36
|
|
23
|
-
config_set_default :port, 5160
|
24
37
|
config_param :body_size_limit, :size, default: 4096
|
25
38
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
39
|
+
config_param :blocking_timeout, :time, default: 0.5
|
40
|
+
|
41
|
+
def configure(conf)
|
42
|
+
compat_parameters_convert(conf, :parser)
|
43
|
+
super
|
44
|
+
@_event_loop_blocking_timeout = @blocking_timeout
|
45
|
+
@source_hostname_key ||= @source_host_key if @source_host_key
|
46
|
+
|
47
|
+
@parser = parser_create
|
48
|
+
end
|
49
|
+
|
50
|
+
def start
|
51
|
+
super
|
52
|
+
|
53
|
+
log.info "listening udp socket", bind: @bind, port: @port
|
54
|
+
server_create(:in_udp_server, @port, proto: :udp, bind: @bind, max_bytes: @body_size_limit) do |data, sock|
|
55
|
+
data.chomp!
|
56
|
+
begin
|
57
|
+
@parser.parse(data) do |time, record|
|
58
|
+
unless time && record
|
59
|
+
log.warn "pattern not match", data: data
|
60
|
+
next
|
61
|
+
end
|
62
|
+
|
63
|
+
tag = extract_tag_from_record(record)
|
64
|
+
tag ||= @tag
|
65
|
+
time ||= extract_time_from_record(record) || Fluent::EventTime.now
|
66
|
+
record[@source_hostname_key] = sock.remote_host if @source_hostname_key
|
67
|
+
router.emit(tag, time, record)
|
68
|
+
end
|
69
|
+
end
|
31
70
|
end
|
32
|
-
client = ServerEngine::SocketManager::Client.new(socket_manager_path)
|
33
|
-
@usock = client.listen_udp(@bind, @port)
|
34
|
-
SocketUtil::UdpHandler.new(@usock, log, @body_size_limit, callback)
|
35
71
|
end
|
36
72
|
end
|
37
73
|
end
|
@@ -113,25 +113,41 @@ module Fluent::Plugin
|
|
113
113
|
end
|
114
114
|
|
115
115
|
path_suffix = @add_path_suffix ? @path_suffix : ''
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
116
|
+
path_timekey = if @chunk_key_time
|
117
|
+
@as_secondary ? @primary_instance.buffer_config.timekey : @buffer_config.timekey
|
118
|
+
else
|
119
|
+
nil
|
120
|
+
end
|
121
|
+
@path_template = generate_path_template(@path, path_timekey, @append, @compress_method, path_suffix: path_suffix, time_slice_format: configured_time_slice_format)
|
122
|
+
|
123
|
+
if @as_secondary
|
124
|
+
# When this plugin is configured as secondary & primary plugin has tag key, but this plugin may not have it.
|
125
|
+
# Increment placeholder can make another output file per chunk tag/keys even if original path doesn't include it.
|
126
|
+
placeholder_validators(:path, @path_template).select{|v| v.type == :time }.each do |v|
|
127
|
+
v.validate!
|
128
|
+
end
|
129
|
+
else
|
130
|
+
placeholder_validate!(:path, @path_template)
|
131
|
+
|
132
|
+
max_tag_index = get_placeholders_tag(@path_template).max || 1
|
133
|
+
max_tag_index = 1 if max_tag_index < 1
|
134
|
+
dummy_tag = (['a'] * max_tag_index).join('.')
|
135
|
+
dummy_record_keys = get_placeholders_keys(@path_template) || ['message']
|
136
|
+
dummy_record = Hash[dummy_record_keys.zip(['data'] * dummy_record_keys.size)]
|
137
|
+
|
138
|
+
test_meta1 = metadata_for_test(dummy_tag, Fluent::Engine.now, dummy_record)
|
139
|
+
test_path = extract_placeholders(@path_template, test_meta1)
|
140
|
+
unless ::Fluent::FileUtil.writable_p?(test_path)
|
141
|
+
raise Fluent::ConfigError, "out_file: `#{test_path}` is not writable"
|
142
|
+
end
|
130
143
|
end
|
131
144
|
|
132
145
|
@formatter = formatter_create
|
133
146
|
|
134
147
|
if @symlink_path && @buffer.respond_to?(:path)
|
148
|
+
if @as_secondary
|
149
|
+
raise Fluent::ConfigError, "symlink_path option is unavailable in <secondary>: consider to use secondary_file plugin"
|
150
|
+
end
|
135
151
|
if Fluent.windows?
|
136
152
|
log.warn "symlink_path is unavailable on Windows platform. disabled."
|
137
153
|
@symlink_path = nil
|
@@ -23,16 +23,21 @@ require 'fluent/compat/socket_util'
|
|
23
23
|
module Fluent::Plugin
|
24
24
|
class ForwardOutput < Output
|
25
25
|
class Error < StandardError; end
|
26
|
-
class
|
26
|
+
class NoNodesAvailable < Error; end
|
27
27
|
class ConnectionClosedError < Error; end
|
28
|
-
class ACKTimeoutError < Error; end
|
29
28
|
|
30
29
|
Fluent::Plugin.register_output('forward', self)
|
31
30
|
|
31
|
+
helpers :socket, :server, :timer, :thread, :compat_parameters
|
32
|
+
|
32
33
|
LISTEN_PORT = 24224
|
33
34
|
|
35
|
+
PROCESS_CLOCK_ID = Process::CLOCK_MONOTONIC_RAW rescue Process::CLOCK_MONOTONIC
|
36
|
+
|
34
37
|
desc 'The timeout time when sending event logs.'
|
35
38
|
config_param :send_timeout, :time, default: 60
|
39
|
+
# TODO: add linger_timeout, recv_timeout
|
40
|
+
|
36
41
|
desc 'The transport protocol to use for heartbeats.(udp,tcp,none)'
|
37
42
|
config_param :heartbeat_type, :enum, list: [:tcp, :udp, :none], default: :tcp
|
38
43
|
desc 'The interval of the heartbeat packer.'
|
@@ -41,8 +46,6 @@ module Fluent::Plugin
|
|
41
46
|
config_param :recover_wait, :time, default: 10
|
42
47
|
desc 'The hard timeout used to detect server failure.'
|
43
48
|
config_param :hard_timeout, :time, default: 60
|
44
|
-
desc 'Set TTL to expire DNS cache in seconds.'
|
45
|
-
config_param :expire_dns_cache, :time, default: nil # 0 means disable cache
|
46
49
|
desc 'The threshold parameter used to detect server faults.'
|
47
50
|
config_param :phi_threshold, :integer, default: 16
|
48
51
|
desc 'Use the "Phi accrual failure detector" to detect server failure.'
|
@@ -50,14 +53,20 @@ module Fluent::Plugin
|
|
50
53
|
|
51
54
|
desc 'Change the protocol to at-least-once.'
|
52
55
|
config_param :require_ack_response, :bool, default: false # require in_forward to respond with ack
|
56
|
+
|
57
|
+
## The reason of default value of :ack_response_timeout:
|
58
|
+
# Linux default tcp_syn_retries is 5 (in many environment)
|
59
|
+
# 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec)
|
53
60
|
desc 'This option is used when require_ack_response is true.'
|
54
61
|
config_param :ack_response_timeout, :time, default: 190
|
55
|
-
|
56
|
-
config_param :read_length, :size, default: 512 # 512bytes
|
62
|
+
|
57
63
|
desc 'The interval while reading data from server'
|
58
64
|
config_param :read_interval_msec, :integer, default: 50 # 50ms
|
59
|
-
|
60
|
-
|
65
|
+
desc 'Reading data size from server'
|
66
|
+
config_param :read_length, :size, default: 512 # 512bytes
|
67
|
+
|
68
|
+
desc 'Set TTL to expire DNS cache in seconds.'
|
69
|
+
config_param :expire_dns_cache, :time, default: nil # 0 means disable cache
|
61
70
|
desc 'Enable client-side DNS round robin.'
|
62
71
|
config_param :dns_round_robin, :bool, default: false # heartbeat_type 'udp' is not available for this
|
63
72
|
|
@@ -107,10 +116,15 @@ module Fluent::Plugin
|
|
107
116
|
@nodes = [] #=> [Node]
|
108
117
|
@loop = nil
|
109
118
|
@thread = nil
|
110
|
-
|
119
|
+
|
120
|
+
@usock = nil
|
121
|
+
@sock_ack_waiting = nil
|
122
|
+
@sock_ack_waiting_mutex = nil
|
111
123
|
end
|
112
124
|
|
113
125
|
def configure(conf)
|
126
|
+
compat_parameters_convert(conf, :buffer, default_chunk_key: 'tag')
|
127
|
+
|
114
128
|
super
|
115
129
|
|
116
130
|
unless @chunk_key_tag
|
@@ -153,79 +167,104 @@ module Fluent::Plugin
|
|
153
167
|
raise Fluent::ConfigError, "ack_response_timeout must be a positive integer" if @ack_response_timeout < 1
|
154
168
|
end
|
155
169
|
|
170
|
+
def prefer_delayed_commit
|
171
|
+
@require_ack_response
|
172
|
+
end
|
173
|
+
|
156
174
|
def start
|
157
175
|
super
|
158
176
|
|
177
|
+
# Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout
|
178
|
+
# But it should be overwritten by ack_response_timeout to rollback chunks after timeout
|
179
|
+
if @ack_response_timeout && @delayed_commit_timeout != @ack_response_timeout
|
180
|
+
log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
|
181
|
+
@delayed_commit_timeout = @ack_response_timeout
|
182
|
+
end
|
183
|
+
|
159
184
|
@rand_seed = Random.new.seed
|
160
185
|
rebuild_weight_array
|
161
186
|
@rr = 0
|
162
|
-
@usock = nil
|
163
187
|
|
164
188
|
unless @heartbeat_type == :none
|
165
|
-
@loop = Coolio::Loop.new
|
166
|
-
|
167
189
|
if @heartbeat_type == :udp
|
168
|
-
|
169
|
-
@usock
|
170
|
-
|
171
|
-
|
172
|
-
|
190
|
+
@usock = socket_create_udp(@nodes.first.host, @nodes.first.port, nonblock: true)
|
191
|
+
server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length) do |data, sock|
|
192
|
+
sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
|
193
|
+
on_heartbeat(sockaddr, data)
|
194
|
+
end
|
173
195
|
end
|
196
|
+
timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_timer))
|
197
|
+
end
|
174
198
|
|
175
|
-
|
176
|
-
@
|
177
|
-
|
178
|
-
|
199
|
+
if @require_ack_response
|
200
|
+
@sock_ack_waiting_mutex = Mutex.new
|
201
|
+
@sock_ack_waiting = []
|
202
|
+
thread_create(:out_forward_receiving_ack, &method(:ack_reader))
|
179
203
|
end
|
180
204
|
end
|
181
205
|
|
182
|
-
def
|
183
|
-
@finished = true
|
184
|
-
if @loop
|
185
|
-
@loop.watchers.each {|w| w.detach }
|
186
|
-
# @loop.stop
|
187
|
-
@loop.stop rescue nil
|
188
|
-
end
|
189
|
-
@thread.join if @thread
|
206
|
+
def close
|
190
207
|
@usock.close if @usock
|
191
|
-
|
192
208
|
super
|
193
209
|
end
|
194
210
|
|
195
|
-
def run
|
196
|
-
@loop.run if @loop
|
197
|
-
rescue
|
198
|
-
log.error "unexpected error", error: $!.to_s
|
199
|
-
log.error_backtrace
|
200
|
-
end
|
201
|
-
|
202
211
|
def write(chunk)
|
203
212
|
return if chunk.empty?
|
213
|
+
tag = chunk.metadata.tag
|
214
|
+
select_a_healthy_node{|node| node.send_data(tag, chunk) }
|
215
|
+
end
|
204
216
|
|
217
|
+
ACKWaitingSockInfo = Struct.new(:sock, :chunk_id, :node, :time, :timeout) do
|
218
|
+
def expired?(now)
|
219
|
+
time + timeout < now
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def try_write(chunk)
|
224
|
+
if chunk.empty?
|
225
|
+
commit_write(chunk.unique_id)
|
226
|
+
return
|
227
|
+
end
|
205
228
|
tag = chunk.metadata.tag
|
229
|
+
sock, node = select_a_healthy_node{|n| n.send_data(tag, chunk) }
|
230
|
+
chunk_id = Base64.encode64(chunk.unique_id)
|
231
|
+
current_time = Process.clock_gettime(PROCESS_CLOCK_ID)
|
232
|
+
info = ACKWaitingSockInfo.new(sock, chunk_id, node, current_time, @ack_response_timeout)
|
233
|
+
@sock_ack_waiting_mutex.synchronize do
|
234
|
+
@sock_ack_waiting << info
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
def select_a_healthy_node
|
206
239
|
error = nil
|
207
240
|
|
208
241
|
wlen = @weight_array.length
|
209
242
|
wlen.times do
|
210
243
|
@rr = (@rr + 1) % wlen
|
211
244
|
node = @weight_array[@rr]
|
245
|
+
next unless node.available?
|
212
246
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
error = $! # use the latest error
|
220
|
-
end
|
247
|
+
begin
|
248
|
+
ret = yield node
|
249
|
+
return ret, node
|
250
|
+
rescue
|
251
|
+
# for load balancing during detecting crashed servers
|
252
|
+
error = $! # use the latest error
|
221
253
|
end
|
222
254
|
end
|
223
255
|
|
224
|
-
if error
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
256
|
+
raise error if error
|
257
|
+
raise NoNodesAvailable, "no nodes are available"
|
258
|
+
end
|
259
|
+
|
260
|
+
def create_transfer_socket(host, port, &block)
|
261
|
+
socket_create_tcp(
|
262
|
+
host, port,
|
263
|
+
linger_timeout: @send_timeout,
|
264
|
+
send_timeout: @send_timeout,
|
265
|
+
recv_timeout: @ack_response_timeout,
|
266
|
+
&block
|
267
|
+
)
|
229
268
|
end
|
230
269
|
|
231
270
|
# MessagePack FixArray length is 3
|
@@ -278,21 +317,7 @@ module Fluent::Plugin
|
|
278
317
|
@weight_array = weight_array
|
279
318
|
end
|
280
319
|
|
281
|
-
class HeartbeatRequestTimer < Coolio::TimerWatcher
|
282
|
-
def initialize(interval, callback)
|
283
|
-
super(interval, true)
|
284
|
-
@callback = callback
|
285
|
-
end
|
286
|
-
|
287
|
-
def on_timer
|
288
|
-
@callback.call
|
289
|
-
rescue
|
290
|
-
# TODO log?
|
291
|
-
end
|
292
|
-
end
|
293
|
-
|
294
320
|
def on_timer
|
295
|
-
return if @finished
|
296
321
|
@nodes.each {|n|
|
297
322
|
if n.tick
|
298
323
|
rebuild_weight_array
|
@@ -307,33 +332,86 @@ module Fluent::Plugin
|
|
307
332
|
}
|
308
333
|
end
|
309
334
|
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
335
|
+
def on_heartbeat(sockaddr, msg)
|
336
|
+
if node = @nodes.find {|n| n.sockaddr == sockaddr }
|
337
|
+
# log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
|
338
|
+
if node.heartbeat
|
339
|
+
rebuild_weight_array
|
340
|
+
end
|
315
341
|
end
|
342
|
+
end
|
316
343
|
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
344
|
+
# return chunk id when succeeded for tests
|
345
|
+
def read_ack_from_sock(sock, unpacker)
|
346
|
+
begin
|
347
|
+
raw_data = sock.recv(@read_length)
|
348
|
+
rescue Errno::ECONNRESET
|
349
|
+
raw_data = ""
|
350
|
+
end
|
351
|
+
info = @sock_ack_waiting_mutex.synchronize{ @sock_ack_waiting.find{|i| i.sock == sock } }
|
352
|
+
|
353
|
+
# When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
|
354
|
+
# If this happens we assume the data wasn't delivered and retry it.
|
355
|
+
if raw_data.empty?
|
356
|
+
log.warn "destination node closed the connection. regard it as unavailable.", host: info.node.host, port: info.node.port
|
357
|
+
info.node.disable!
|
358
|
+
return nil
|
359
|
+
else
|
360
|
+
unpacker.feed(raw_data)
|
361
|
+
res = unpacker.read
|
362
|
+
if res['ack'] != info.chunk_id
|
363
|
+
# Some errors may have occured when ack and chunk id is different, so send the chunk again.
|
364
|
+
log.warn "ack in response and chunk id in sent data are different", chunk_id: info.chunk_id, ack: res['ack']
|
365
|
+
rollback_write(info.chunk_id)
|
366
|
+
return nil
|
322
367
|
end
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
368
|
+
return info.chunk_id
|
369
|
+
end
|
370
|
+
rescue => e
|
371
|
+
log.error "unexpected error while receiving ack message", error: e
|
372
|
+
log.error_backtrace
|
373
|
+
ensure
|
374
|
+
@sock_ack_waiting_mutex.synchronize do
|
375
|
+
@sock_ack_waiting.delete(info)
|
329
376
|
end
|
330
377
|
end
|
331
378
|
|
332
|
-
def
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
379
|
+
def ack_reader
|
380
|
+
select_interval = if @delayed_commit_timeout > 3
|
381
|
+
2
|
382
|
+
else
|
383
|
+
@delayed_commit_timeout / 2.0
|
384
|
+
end
|
385
|
+
|
386
|
+
unpacker = Fluent::Engine.msgpack_unpacker
|
387
|
+
|
388
|
+
while thread_current_running?
|
389
|
+
now = Process.clock_gettime(PROCESS_CLOCK_ID)
|
390
|
+
sockets = []
|
391
|
+
@sock_ack_waiting_mutex.synchronize do
|
392
|
+
new_list = []
|
393
|
+
@sock_ack_waiting.each do |info|
|
394
|
+
if info.expired?(now)
|
395
|
+
# There are 2 types of cases when no response has been received from socket:
|
396
|
+
# (1) the node does not support sending responses
|
397
|
+
# (2) the node does support sending response but responses have not arrived for some reasons.
|
398
|
+
log.warn "no response from node. regard it as unavailable.", host: info.node.host, port: info.node.port
|
399
|
+
info.node.disable!
|
400
|
+
info.sock.close rescue nil
|
401
|
+
rollback_write(info.chunk_id)
|
402
|
+
else
|
403
|
+
sockets << info.sock
|
404
|
+
new_list << info
|
405
|
+
end
|
406
|
+
end
|
407
|
+
@sock_ack_waiting = new_list
|
408
|
+
end
|
409
|
+
|
410
|
+
readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
|
411
|
+
next unless readable_sockets
|
412
|
+
|
413
|
+
readable_sockets.each do |sock|
|
414
|
+
read_ack_from_sock(sock, unpacker)
|
337
415
|
end
|
338
416
|
end
|
339
417
|
end
|
@@ -386,20 +464,6 @@ module Fluent::Plugin
|
|
386
464
|
@standby
|
387
465
|
end
|
388
466
|
|
389
|
-
def connect
|
390
|
-
TCPSocket.new(resolved_host, port)
|
391
|
-
end
|
392
|
-
|
393
|
-
def set_socket_options(sock)
|
394
|
-
opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
395
|
-
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
396
|
-
|
397
|
-
opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
398
|
-
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
399
|
-
|
400
|
-
sock
|
401
|
-
end
|
402
|
-
|
403
467
|
def establish_connection(sock)
|
404
468
|
while available? && @state != :established
|
405
469
|
begin
|
@@ -430,98 +494,60 @@ module Fluent::Plugin
|
|
430
494
|
end
|
431
495
|
end
|
432
496
|
|
433
|
-
def
|
434
|
-
sock = connect
|
497
|
+
def send_data_actual(sock, tag, chunk)
|
435
498
|
@state = @sender.security ? :helo : :established
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
if @state != :established
|
440
|
-
establish_connection(sock)
|
441
|
-
end
|
499
|
+
if @state != :established
|
500
|
+
establish_connection(sock)
|
501
|
+
end
|
442
502
|
|
443
|
-
|
444
|
-
|
445
|
-
|
503
|
+
unless available?
|
504
|
+
raise ConnectionClosedError, "failed to establish connection with node #{@name}"
|
505
|
+
end
|
446
506
|
|
447
|
-
|
448
|
-
|
507
|
+
option = { 'size' => chunk.size, 'compressed' => @compress }
|
508
|
+
option['chunk'] = Base64.encode64(chunk.unique_id) if @sender.require_ack_response
|
449
509
|
|
450
|
-
|
451
|
-
|
510
|
+
# out_forward always uses Raw32 type for content.
|
511
|
+
# Raw16 can store only 64kbytes, and it should be much smaller than buffer chunk size.
|
452
512
|
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
if @sender.require_ack_response
|
462
|
-
# Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
|
463
|
-
# To avoid a decrease of throughput, it is necessary to prepare a list of chunks that wait for responses
|
464
|
-
# and process them asynchronously.
|
465
|
-
if IO.select([sock], nil, nil, @sender.ack_response_timeout)
|
466
|
-
raw_data = begin
|
467
|
-
sock.recv(1024)
|
468
|
-
rescue Errno::ECONNRESET
|
469
|
-
""
|
470
|
-
end
|
471
|
-
|
472
|
-
# When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
|
473
|
-
# If this happens we assume the data wasn't delivered and retry it.
|
474
|
-
if raw_data.empty?
|
475
|
-
@log.warn "node closed the connection. regard it as unavailable.", host: @host, port: @port
|
476
|
-
disable!
|
477
|
-
raise ConnectionClosedError, "node #{@host}:#{@port} closed connection"
|
478
|
-
else
|
479
|
-
@unpacker.feed(raw_data)
|
480
|
-
res = @unpacker.read
|
481
|
-
if res['ack'] != option['chunk']
|
482
|
-
# Some errors may have occured when ack and chunk id is different, so send the chunk again.
|
483
|
-
raise ResponseError, "ack in response and chunk id in sent data are different"
|
484
|
-
end
|
485
|
-
end
|
513
|
+
sock.write @sender.forward_header # beginArray(3)
|
514
|
+
sock.write tag.to_msgpack # 1. writeRaw(tag)
|
515
|
+
chunk.open(compressed: @compress) do |chunk_io|
|
516
|
+
sock.write [0xdb, chunk_io.size].pack('CN') # 2. beginRaw(size) raw32
|
517
|
+
IO.copy_stream(chunk_io, sock) # writeRawBody(packed_es)
|
518
|
+
end
|
519
|
+
sock.write option.to_msgpack # 3. writeOption(option)
|
520
|
+
end
|
486
521
|
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
end
|
496
|
-
end
|
522
|
+
def send_data(tag, chunk)
|
523
|
+
sock = @sender.create_transfer_socket(resolved_host, port)
|
524
|
+
begin
|
525
|
+
send_data_actual(sock, tag, chunk)
|
526
|
+
rescue
|
527
|
+
sock.close rescue nil
|
528
|
+
raise
|
529
|
+
end
|
497
530
|
|
498
|
-
|
499
|
-
|
500
|
-
ensure
|
501
|
-
sock.close_write
|
502
|
-
sock.close
|
531
|
+
if @sender.require_ack_response
|
532
|
+
return sock # to read ACK from socket
|
503
533
|
end
|
534
|
+
|
535
|
+
sock.close_write rescue nil
|
536
|
+
sock.close rescue nil
|
537
|
+
heartbeat(false)
|
538
|
+
nil
|
504
539
|
end
|
505
540
|
|
506
541
|
# FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
|
507
542
|
def send_heartbeat
|
508
543
|
case @sender.heartbeat_type
|
509
544
|
when :tcp
|
510
|
-
|
511
|
-
begin
|
512
|
-
opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
513
|
-
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
514
|
-
# opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
515
|
-
# sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
516
|
-
|
545
|
+
@sender.create_transfer_socket(resolved_host, port) do |sock|
|
517
546
|
## don't send any data to not cause a compatibility problem
|
518
547
|
# sock.write FORWARD_TCP_HEARTBEAT_DATA
|
519
548
|
|
520
549
|
# successful tcp connection establishment is considered as valid heartbeat
|
521
550
|
heartbeat(true)
|
522
|
-
ensure
|
523
|
-
sock.close_write
|
524
|
-
sock.close
|
525
551
|
end
|
526
552
|
when :udp
|
527
553
|
@usock.send "\0", 0, Socket.pack_sockaddr_in(@port, resolved_host)
|
@@ -543,7 +569,7 @@ module Fluent::Plugin
|
|
543
569
|
@resolved_host ||= resolve_dns!
|
544
570
|
|
545
571
|
else
|
546
|
-
now = Engine.now
|
572
|
+
now = Fluent::Engine.now
|
547
573
|
rh = @resolved_host
|
548
574
|
if !rh || now - @resolved_time >= @sender.expire_dns_cache
|
549
575
|
rh = @resolved_host = resolve_dns!
|