fluentd 1.6.3 → 1.7.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of fluentd might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.drone.yml +35 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +2 -0
- data/CHANGELOG.md +83 -0
- data/README.md +5 -1
- data/fluentd.gemspec +3 -2
- data/lib/fluent/clock.rb +4 -0
- data/lib/fluent/compat/output.rb +3 -3
- data/lib/fluent/compat/socket_util.rb +1 -1
- data/lib/fluent/config/element.rb +3 -3
- data/lib/fluent/config/literal_parser.rb +1 -1
- data/lib/fluent/config/section.rb +4 -1
- data/lib/fluent/error.rb +4 -0
- data/lib/fluent/event.rb +28 -24
- data/lib/fluent/event_router.rb +2 -1
- data/lib/fluent/log.rb +1 -1
- data/lib/fluent/msgpack_factory.rb +8 -0
- data/lib/fluent/plugin/bare_output.rb +4 -4
- data/lib/fluent/plugin/buf_file.rb +10 -1
- data/lib/fluent/plugin/buf_file_single.rb +219 -0
- data/lib/fluent/plugin/buffer.rb +62 -63
- data/lib/fluent/plugin/buffer/chunk.rb +21 -3
- data/lib/fluent/plugin/buffer/file_chunk.rb +44 -12
- data/lib/fluent/plugin/buffer/file_single_chunk.rb +314 -0
- data/lib/fluent/plugin/buffer/memory_chunk.rb +2 -1
- data/lib/fluent/plugin/compressable.rb +10 -6
- data/lib/fluent/plugin/filter_grep.rb +2 -2
- data/lib/fluent/plugin/formatter_csv.rb +10 -6
- data/lib/fluent/plugin/in_syslog.rb +10 -3
- data/lib/fluent/plugin/in_tail.rb +7 -2
- data/lib/fluent/plugin/in_tcp.rb +34 -7
- data/lib/fluent/plugin/multi_output.rb +4 -4
- data/lib/fluent/plugin/out_exec_filter.rb +1 -0
- data/lib/fluent/plugin/out_file.rb +13 -3
- data/lib/fluent/plugin/out_forward.rb +144 -588
- data/lib/fluent/plugin/out_forward/ack_handler.rb +161 -0
- data/lib/fluent/plugin/out_forward/connection_manager.rb +113 -0
- data/lib/fluent/plugin/out_forward/error.rb +28 -0
- data/lib/fluent/plugin/out_forward/failure_detector.rb +84 -0
- data/lib/fluent/plugin/out_forward/handshake_protocol.rb +121 -0
- data/lib/fluent/plugin/out_forward/load_balancer.rb +111 -0
- data/lib/fluent/plugin/out_forward/socket_cache.rb +138 -0
- data/lib/fluent/plugin/out_http.rb +231 -0
- data/lib/fluent/plugin/output.rb +29 -35
- data/lib/fluent/plugin/parser.rb +77 -0
- data/lib/fluent/plugin/parser_csv.rb +75 -0
- data/lib/fluent/plugin/parser_syslog.rb +106 -3
- data/lib/fluent/plugin_helper/server.rb +2 -2
- data/lib/fluent/plugin_helper/socket.rb +14 -1
- data/lib/fluent/plugin_helper/thread.rb +1 -0
- data/lib/fluent/root_agent.rb +1 -1
- data/lib/fluent/time.rb +4 -2
- data/lib/fluent/timezone.rb +21 -7
- data/lib/fluent/version.rb +1 -1
- data/test/command/test_fluentd.rb +1 -1
- data/test/command/test_plugin_generator.rb +18 -2
- data/test/config/test_configurable.rb +78 -40
- data/test/counter/test_store.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/helpers/process_extenstion.rb +33 -0
- data/test/plugin/out_forward/test_ack_handler.rb +101 -0
- data/test/plugin/out_forward/test_connection_manager.rb +145 -0
- data/test/plugin/out_forward/test_handshake_protocol.rb +103 -0
- data/test/plugin/out_forward/test_load_balancer.rb +60 -0
- data/test/plugin/out_forward/test_socket_cache.rb +139 -0
- data/test/plugin/test_buf_file.rb +172 -2
- data/test/plugin/test_buf_file_single.rb +801 -0
- data/test/plugin/test_buffer.rb +4 -48
- data/test/plugin/test_buffer_file_chunk.rb +38 -1
- data/test/plugin/test_buffer_file_single_chunk.rb +621 -0
- data/test/plugin/test_buffer_memory_chunk.rb +1 -0
- data/test/plugin/test_formatter_csv.rb +16 -0
- data/test/plugin/test_in_syslog.rb +56 -6
- data/test/plugin/test_in_tail.rb +1 -1
- data/test/plugin/test_in_tcp.rb +25 -0
- data/test/plugin/test_out_forward.rb +150 -201
- data/test/plugin/test_out_http.rb +352 -0
- data/test/plugin/test_output_as_buffered.rb +27 -24
- data/test/plugin/test_parser.rb +40 -0
- data/test/plugin/test_parser_csv.rb +83 -0
- data/test/plugin/test_parser_syslog.rb +118 -19
- data/test/plugin_helper/test_record_accessor.rb +1 -1
- data/test/test_time_formatter.rb +140 -121
- metadata +35 -6
@@ -64,9 +64,11 @@ module Fluent
|
|
64
64
|
unused = gz.unused
|
65
65
|
gz.finish
|
66
66
|
|
67
|
-
|
68
|
-
|
69
|
-
|
67
|
+
unless unused.nil?
|
68
|
+
adjust = unused.length
|
69
|
+
io.pos -= adjust
|
70
|
+
end
|
71
|
+
break if io.eof?
|
70
72
|
end
|
71
73
|
|
72
74
|
out
|
@@ -80,9 +82,11 @@ module Fluent
|
|
80
82
|
unused = gz.unused
|
81
83
|
gz.finish
|
82
84
|
|
83
|
-
|
84
|
-
|
85
|
-
|
85
|
+
unless unused.nil?
|
86
|
+
adjust = unused.length
|
87
|
+
input.pos -= adjust
|
88
|
+
end
|
89
|
+
break if input.eof?
|
86
90
|
end
|
87
91
|
|
88
92
|
output
|
@@ -110,7 +110,7 @@ module Fluent::Plugin
|
|
110
110
|
end
|
111
111
|
|
112
112
|
if @regexps.size > 1
|
113
|
-
log.info "Top level multiple <regexp> is
|
113
|
+
log.info "Top level multiple <regexp> is interpreted as 'and' condition"
|
114
114
|
end
|
115
115
|
@regexps.each do |e|
|
116
116
|
raise Fluent::ConfigError, "Duplicate key: #{e.key}" if regexp_and_conditions.key?(e.key)
|
@@ -118,7 +118,7 @@ module Fluent::Plugin
|
|
118
118
|
end
|
119
119
|
|
120
120
|
if @excludes.size > 1
|
121
|
-
log.info "Top level multiple <exclude> is
|
121
|
+
log.info "Top level multiple <exclude> is interpreted as 'or' condition"
|
122
122
|
end
|
123
123
|
@excludes.each do |e|
|
124
124
|
raise Fluent::ConfigError, "Duplicate key: #{e.key}" if exclude_or_conditions.key?(e.key)
|
@@ -33,18 +33,22 @@ module Fluent
|
|
33
33
|
|
34
34
|
def configure(conf)
|
35
35
|
super
|
36
|
+
|
36
37
|
@fields = fields.select{|f| !f.empty? }
|
37
38
|
raise ConfigError, "empty value is specified in fields parameter" if @fields.empty?
|
38
39
|
|
39
|
-
@generate_opts = {col_sep: @delimiter, force_quotes: @force_quotes
|
40
|
+
@generate_opts = {col_sep: @delimiter, force_quotes: @force_quotes, headers: @fields,
|
41
|
+
row_sep: @add_newline ? :auto : "".force_encoding(Encoding::ASCII_8BIT)}
|
42
|
+
# Cache CSV object per thread to avoid internal state sharing
|
43
|
+
@cache = {}
|
40
44
|
end
|
41
45
|
|
42
46
|
def format(tag, time, record)
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
47
|
+
csv = (@cache[Thread.current] ||= CSV.new("".force_encoding(Encoding::ASCII_8BIT), @generate_opts))
|
48
|
+
line = (csv << record).string.dup
|
49
|
+
# Need manual cleanup because CSV writer doesn't provide such method.
|
50
|
+
csv.rewind
|
51
|
+
csv.truncate(0)
|
48
52
|
line
|
49
53
|
end
|
50
54
|
end
|
@@ -199,6 +199,13 @@ module Fluent::Plugin
|
|
199
199
|
|
200
200
|
private
|
201
201
|
|
202
|
+
def emit_unmatched(data, sock)
|
203
|
+
record = {"unmatched_line" => data}
|
204
|
+
record[@source_address_key] = sock.remote_addr if @source_address_key
|
205
|
+
record[@source_hostname_key] = sock.remote_host if @source_hostname_key
|
206
|
+
emit("#{@tag}.unmatched", Fluent::EventTime.now, record)
|
207
|
+
end
|
208
|
+
|
202
209
|
def message_handler(data, sock)
|
203
210
|
pri = nil
|
204
211
|
text = data
|
@@ -206,7 +213,7 @@ module Fluent::Plugin
|
|
206
213
|
m = SYSLOG_REGEXP.match(data)
|
207
214
|
unless m
|
208
215
|
if @emit_unmatched_lines
|
209
|
-
|
216
|
+
emit_unmatched(data, sock)
|
210
217
|
end
|
211
218
|
log.warn "invalid syslog message: #{data.dump}"
|
212
219
|
return
|
@@ -218,7 +225,7 @@ module Fluent::Plugin
|
|
218
225
|
@parser.parse(text) do |time, record|
|
219
226
|
unless time && record
|
220
227
|
if @emit_unmatched_lines
|
221
|
-
|
228
|
+
emit_unmatched(data, sock)
|
222
229
|
end
|
223
230
|
log.warn "failed to parse message", data: data
|
224
231
|
return
|
@@ -238,7 +245,7 @@ module Fluent::Plugin
|
|
238
245
|
end
|
239
246
|
rescue => e
|
240
247
|
if @emit_unmatched_lines
|
241
|
-
|
248
|
+
emit_unmatched(data, sock)
|
242
249
|
end
|
243
250
|
log.error "invalid input", data: data, error: e
|
244
251
|
log.error_backtrace
|
@@ -97,6 +97,10 @@ module Fluent::Plugin
|
|
97
97
|
desc 'Ignore repeated permission error logs'
|
98
98
|
config_param :ignore_repeated_permission_error, :bool, default: false
|
99
99
|
|
100
|
+
config_section :parse, required: false, multi: true, init: true, param_name: :parser_configs do
|
101
|
+
config_argument :usage, :string, default: 'in_tail_parser'
|
102
|
+
end
|
103
|
+
|
100
104
|
attr_reader :paths
|
101
105
|
|
102
106
|
@@pos_file_paths = {}
|
@@ -148,7 +152,8 @@ module Fluent::Plugin
|
|
148
152
|
method(:parse_singleline)
|
149
153
|
end
|
150
154
|
@file_perm = system_config.file_permission || FILE_PERMISSION
|
151
|
-
|
155
|
+
# parser is already created by parser helper
|
156
|
+
@parser = parser_create(usage: parser_config['usage'] || @parser_configs.first.usage)
|
152
157
|
end
|
153
158
|
|
154
159
|
def configure_tag
|
@@ -431,7 +436,7 @@ module Fluent::Plugin
|
|
431
436
|
end
|
432
437
|
}
|
433
438
|
rescue => e
|
434
|
-
log.warn line.
|
439
|
+
log.warn 'invalid line found', file: tail_watcher.path, line: line, error: e.to_s
|
435
440
|
log.debug_backtrace(e.backtrace)
|
436
441
|
end
|
437
442
|
end
|
data/lib/fluent/plugin/in_tcp.rb
CHANGED
@@ -61,13 +61,15 @@ module Fluent::Plugin
|
|
61
61
|
def start
|
62
62
|
super
|
63
63
|
|
64
|
-
|
65
|
-
|
66
|
-
|
64
|
+
del_size = @delimiter.length
|
65
|
+
if @_extract_enabled && @_extract_tag_key
|
66
|
+
server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
|
67
|
+
conn.buffer << data
|
68
|
+
buf = conn.buffer
|
67
69
|
pos = 0
|
68
|
-
while i =
|
69
|
-
msg =
|
70
|
-
pos = i +
|
70
|
+
while i = buf.index(@delimiter, pos)
|
71
|
+
msg = buf[pos...i]
|
72
|
+
pos = i + del_size
|
71
73
|
|
72
74
|
@parser.parse(msg) do |time, record|
|
73
75
|
unless time && record
|
@@ -83,7 +85,32 @@ module Fluent::Plugin
|
|
83
85
|
router.emit(tag, time, record)
|
84
86
|
end
|
85
87
|
end
|
86
|
-
|
88
|
+
buf.slice!(0, pos) if pos > 0
|
89
|
+
end
|
90
|
+
else
|
91
|
+
server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
|
92
|
+
conn.buffer << data
|
93
|
+
buf = conn.buffer
|
94
|
+
pos = 0
|
95
|
+
es = Fluent::MultiEventStream.new
|
96
|
+
while i = buf.index(@delimiter, pos)
|
97
|
+
msg = buf[pos...i]
|
98
|
+
pos = i + del_size
|
99
|
+
|
100
|
+
@parser.parse(msg) do |time, record|
|
101
|
+
unless time && record
|
102
|
+
log.warn "pattern not matched", message: msg
|
103
|
+
next
|
104
|
+
end
|
105
|
+
|
106
|
+
time ||= extract_time_from_record(record) || Fluent::EventTime.now
|
107
|
+
record[@source_address_key] = conn.remote_addr if @source_address_key
|
108
|
+
record[@source_hostname_key] = conn.remote_host if @source_hostname_key
|
109
|
+
es.add(time, record)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
router.emit_stream(@tag, es)
|
113
|
+
buf.slice!(0, pos) if pos > 0
|
87
114
|
end
|
88
115
|
end
|
89
116
|
end
|
@@ -44,7 +44,7 @@ module Fluent
|
|
44
44
|
@outputs = []
|
45
45
|
@outputs_statically_created = false
|
46
46
|
|
47
|
-
@
|
47
|
+
@counter_mutex = Mutex.new
|
48
48
|
# TODO: well organized counters
|
49
49
|
@num_errors = 0
|
50
50
|
@emit_count = 0
|
@@ -143,12 +143,12 @@ module Fluent
|
|
143
143
|
end
|
144
144
|
|
145
145
|
def emit_sync(tag, es)
|
146
|
-
@
|
146
|
+
@counter_mutex.synchronize{ @emit_count += 1 }
|
147
147
|
begin
|
148
148
|
process(tag, es)
|
149
|
-
@
|
149
|
+
@counter_mutex.synchronize{ @emit_records += es.size }
|
150
150
|
rescue
|
151
|
-
@
|
151
|
+
@counter_mutex.synchronize{ @num_errors += 1 }
|
152
152
|
raise
|
153
153
|
end
|
154
154
|
end
|
@@ -72,6 +72,17 @@ module Fluent::Plugin
|
|
72
72
|
attr_accessor :last_written_path # for tests
|
73
73
|
|
74
74
|
module SymlinkBufferMixin
|
75
|
+
def metadata(timekey: nil, tag: nil, variables: nil)
|
76
|
+
metadata = super
|
77
|
+
|
78
|
+
@latest_metadata ||= new_metadata(timekey: 0)
|
79
|
+
if metadata.timekey && (metadata.timekey >= @latest_metadata.timekey)
|
80
|
+
@latest_metadata = metadata
|
81
|
+
end
|
82
|
+
|
83
|
+
metadata
|
84
|
+
end
|
85
|
+
|
75
86
|
def output_plugin_for_symlink=(output_plugin)
|
76
87
|
@_output_plugin_for_symlink = output_plugin
|
77
88
|
end
|
@@ -86,8 +97,7 @@ module Fluent::Plugin
|
|
86
97
|
# timekey will be appended into that file chunk. On the other side, resumed file chunks might NOT
|
87
98
|
# have timekey, especially in the cases that resumed file chunks are generated by Fluentd v0.12.
|
88
99
|
# These chunks will be enqueued immediately, and will be flushed soon.
|
89
|
-
|
90
|
-
if chunk.metadata == latest_metadata
|
100
|
+
if chunk.metadata == @latest_metadata
|
91
101
|
sym_path = @_output_plugin_for_symlink.extract_placeholders(@_symlink_path, chunk)
|
92
102
|
FileUtils.mkdir_p(File.dirname(sym_path), mode: @_output_plugin_for_symlink.dir_perm)
|
93
103
|
FileUtils.ln_sf(chunk.path, sym_path)
|
@@ -115,7 +125,7 @@ module Fluent::Plugin
|
|
115
125
|
|
116
126
|
if conf.has_key?('utc') || conf.has_key?('localtime')
|
117
127
|
param_name = conf.has_key?('utc') ? 'utc' : 'localtime'
|
118
|
-
log.warn "'#{param_name}' is
|
128
|
+
log.warn "'#{param_name}' is deprecated for output plugin. This parameter is used for formatter plugin in compatibility layer. If you want to use same feature, use timekey_use_utc parameter in <buffer> directive instead"
|
119
129
|
end
|
120
130
|
|
121
131
|
super
|
@@ -20,13 +20,16 @@ require 'fluent/clock'
|
|
20
20
|
require 'base64'
|
21
21
|
|
22
22
|
require 'fluent/compat/socket_util'
|
23
|
+
require 'fluent/plugin/out_forward/handshake_protocol'
|
24
|
+
require 'fluent/plugin/out_forward/load_balancer'
|
25
|
+
require 'fluent/plugin/out_forward/socket_cache'
|
26
|
+
require 'fluent/plugin/out_forward/failure_detector'
|
27
|
+
require 'fluent/plugin/out_forward/error'
|
28
|
+
require 'fluent/plugin/out_forward/connection_manager'
|
29
|
+
require 'fluent/plugin/out_forward/ack_handler'
|
23
30
|
|
24
31
|
module Fluent::Plugin
|
25
32
|
class ForwardOutput < Output
|
26
|
-
class Error < StandardError; end
|
27
|
-
class NoNodesAvailable < Error; end
|
28
|
-
class ConnectionClosedError < Error; end
|
29
|
-
|
30
33
|
Fluent::Plugin.register_output('forward', self)
|
31
34
|
|
32
35
|
helpers :socket, :server, :timer, :thread, :compat_parameters
|
@@ -104,6 +107,12 @@ module Fluent::Plugin
|
|
104
107
|
config_param :tls_client_private_key_path, :string, default: nil
|
105
108
|
desc 'The client private key passphrase for TLS.'
|
106
109
|
config_param :tls_client_private_key_passphrase, :string, default: nil, secret: true
|
110
|
+
desc 'The certificate thumbprint for searching from Windows system certstore.'
|
111
|
+
config_param :tls_cert_thumbprint, :string, default: nil, secret: true
|
112
|
+
desc 'The certificate logical store name on Windows system certstore.'
|
113
|
+
config_param :tls_cert_logical_store_name, :string, default: nil
|
114
|
+
desc 'Enable to use certificate enterprise store on Windows system certstore.'
|
115
|
+
config_param :tls_cert_use_enterprise_store, :bool, default: true
|
107
116
|
desc "Enable keepalive connection."
|
108
117
|
config_param :keepalive, :bool, default: false
|
109
118
|
desc "Expired time of keepalive. Default value is nil, which means to keep connection as long as possible"
|
@@ -154,8 +163,6 @@ module Fluent::Plugin
|
|
154
163
|
@thread = nil
|
155
164
|
|
156
165
|
@usock = nil
|
157
|
-
@sock_ack_waiting = nil
|
158
|
-
@sock_ack_waiting_mutex = nil
|
159
166
|
@keep_alive_watcher_interval = 5 # TODO
|
160
167
|
end
|
161
168
|
|
@@ -176,10 +183,8 @@ module Fluent::Plugin
|
|
176
183
|
@heartbeat_type = :transport
|
177
184
|
end
|
178
185
|
|
179
|
-
if @dns_round_robin
|
180
|
-
|
181
|
-
raise Fluent::ConfigError, "forward output heartbeat type must be 'transport' or 'none' to use dns_round_robin option"
|
182
|
-
end
|
186
|
+
if @dns_round_robin && @heartbeat_type == :udp
|
187
|
+
raise Fluent::ConfigError, "forward output heartbeat type must be 'transport' or 'none' to use dns_round_robin option"
|
183
188
|
end
|
184
189
|
|
185
190
|
if @transport == :tls
|
@@ -199,17 +204,35 @@ module Fluent::Plugin
|
|
199
204
|
@tls_verify_hostname = false
|
200
205
|
@tls_allow_self_signed_cert = true
|
201
206
|
end
|
207
|
+
|
208
|
+
if Fluent.windows?
|
209
|
+
if (@tls_cert_path || @tls_ca_cert_path) && @tls_cert_logical_store_name
|
210
|
+
raise Fluent::ConfigError, "specified both cert path and tls_cert_logical_store_name is not permitted"
|
211
|
+
end
|
212
|
+
else
|
213
|
+
raise Fluent::ConfigError, "This parameter is for only Windows" if @tls_cert_logical_store_name
|
214
|
+
raise Fluent::ConfigError, "This parameter is for only Windows" if @tls_cert_thumbprint
|
215
|
+
end
|
202
216
|
end
|
203
217
|
|
218
|
+
@ack_handler = @require_ack_response ? AckHandler.new(timeout: @ack_response_timeout, log: @log, read_length: @read_length) : nil
|
219
|
+
socket_cache = @keepalive ? SocketCache.new(@keepalive_timeout, @log) : nil
|
220
|
+
@connection_manager = ConnectionManager.new(
|
221
|
+
log: @log,
|
222
|
+
secure: !!@security,
|
223
|
+
connection_factory: method(:create_transfer_socket),
|
224
|
+
socket_cache: socket_cache,
|
225
|
+
)
|
226
|
+
|
204
227
|
@servers.each do |server|
|
205
228
|
failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
|
206
229
|
name = server.name || "#{server.host}:#{server.port}"
|
207
230
|
|
208
231
|
log.info "adding forwarding server '#{name}'", host: server.host, port: server.port, weight: server.weight, plugin_id: plugin_id
|
209
232
|
if @heartbeat_type == :none
|
210
|
-
@nodes << NoneHeartbeatNode.new(self, server, failure: failure,
|
233
|
+
@nodes << NoneHeartbeatNode.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler)
|
211
234
|
else
|
212
|
-
node = Node.new(self, server, failure: failure,
|
235
|
+
node = Node.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler)
|
213
236
|
begin
|
214
237
|
node.validate_host_resolution!
|
215
238
|
rescue => e
|
@@ -251,31 +274,25 @@ module Fluent::Plugin
|
|
251
274
|
def start
|
252
275
|
super
|
253
276
|
|
254
|
-
|
255
|
-
|
256
|
-
if @ack_response_timeout && @delayed_commit_timeout != @ack_response_timeout
|
257
|
-
log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
|
258
|
-
@delayed_commit_timeout = @ack_response_timeout + 2 # minimum ack_reader IO.select interval is 1s
|
259
|
-
end
|
260
|
-
|
261
|
-
@rand_seed = Random.new.seed
|
262
|
-
rebuild_weight_array
|
263
|
-
@rr = 0
|
277
|
+
@load_balancer = LoadBalancer.new(log)
|
278
|
+
@load_balancer.rebuild_weight_array(@nodes)
|
264
279
|
|
265
280
|
unless @heartbeat_type == :none
|
266
281
|
if @heartbeat_type == :udp
|
267
282
|
@usock = socket_create_udp(@nodes.first.host, @nodes.first.port, nonblock: true)
|
268
|
-
server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length
|
269
|
-
sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
|
270
|
-
on_heartbeat(sockaddr, data)
|
271
|
-
end
|
283
|
+
server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length, &method(:on_udp_heatbeat_response_recv))
|
272
284
|
end
|
273
|
-
timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:
|
285
|
+
timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_heartbeat_timer))
|
274
286
|
end
|
275
287
|
|
276
288
|
if @require_ack_response
|
277
|
-
@
|
278
|
-
|
289
|
+
# Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout
|
290
|
+
# But it should be overwritten by ack_response_timeout to rollback chunks after timeout
|
291
|
+
if @delayed_commit_timeout != @ack_response_timeout
|
292
|
+
log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
|
293
|
+
@delayed_commit_timeout = @ack_response_timeout + 2 # minimum ack_reader IO.select interval is 1s
|
294
|
+
end
|
295
|
+
|
279
296
|
thread_create(:out_forward_receiving_ack, &method(:ack_reader))
|
280
297
|
end
|
281
298
|
|
@@ -301,22 +318,22 @@ module Fluent::Plugin
|
|
301
318
|
@usock.close rescue nil
|
302
319
|
end
|
303
320
|
|
304
|
-
if @keepalive && @keepalive_timeout
|
305
|
-
@nodes.each(&:clear)
|
306
|
-
end
|
307
321
|
super
|
308
322
|
end
|
309
323
|
|
324
|
+
def stop
|
325
|
+
super
|
326
|
+
|
327
|
+
if @keepalive
|
328
|
+
@connection_manager.stop
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
310
332
|
def write(chunk)
|
311
333
|
return if chunk.empty?
|
312
334
|
tag = chunk.metadata.tag
|
313
|
-
select_a_healthy_node{|node| node.send_data(tag, chunk) }
|
314
|
-
end
|
315
335
|
|
316
|
-
|
317
|
-
def expired?(now)
|
318
|
-
time + timeout < now
|
319
|
-
end
|
336
|
+
@load_balancer.select_healthy_node { |node| node.send_data(tag, chunk) }
|
320
337
|
end
|
321
338
|
|
322
339
|
def try_write(chunk)
|
@@ -326,35 +343,7 @@ module Fluent::Plugin
|
|
326
343
|
return
|
327
344
|
end
|
328
345
|
tag = chunk.metadata.tag
|
329
|
-
|
330
|
-
chunk_id_base64 = Base64.encode64(chunk.unique_id)
|
331
|
-
current_time = Fluent::Clock.now
|
332
|
-
info = ACKWaitingSockInfo.new(sock, chunk.unique_id, chunk_id_base64, node, current_time, @ack_response_timeout)
|
333
|
-
@sock_ack_waiting_mutex.synchronize do
|
334
|
-
@sock_ack_waiting << info
|
335
|
-
end
|
336
|
-
end
|
337
|
-
|
338
|
-
def select_a_healthy_node
|
339
|
-
error = nil
|
340
|
-
|
341
|
-
wlen = @weight_array.length
|
342
|
-
wlen.times do
|
343
|
-
@rr = (@rr + 1) % wlen
|
344
|
-
node = @weight_array[@rr]
|
345
|
-
next unless node.available?
|
346
|
-
|
347
|
-
begin
|
348
|
-
ret = yield node
|
349
|
-
return ret, node
|
350
|
-
rescue
|
351
|
-
# for load balancing during detecting crashed servers
|
352
|
-
error = $! # use the latest error
|
353
|
-
end
|
354
|
-
end
|
355
|
-
|
356
|
-
raise error if error
|
357
|
-
raise NoNodesAvailable, "no nodes are available"
|
346
|
+
@load_balancer.select_healthy_node { |n| n.send_data(tag, chunk) }
|
358
347
|
end
|
359
348
|
|
360
349
|
def create_transfer_socket(host, port, hostname, &block)
|
@@ -372,6 +361,9 @@ module Fluent::Plugin
|
|
372
361
|
cert_path: @tls_client_cert_path,
|
373
362
|
private_key_path: @tls_client_private_key_path,
|
374
363
|
private_key_passphrase: @tls_client_private_key_passphrase,
|
364
|
+
cert_thumbprint: @tls_cert_thumbprint,
|
365
|
+
cert_logical_store_name: @tls_cert_logical_store_name,
|
366
|
+
cert_use_enterprise_store: @tls_cert_use_enterprise_store,
|
375
367
|
|
376
368
|
# Enabling SO_LINGER causes data loss on Windows
|
377
369
|
# https://github.com/fluent/fluentd/issues/1968
|
@@ -403,130 +395,41 @@ module Fluent::Plugin
|
|
403
395
|
|
404
396
|
private
|
405
397
|
|
406
|
-
def
|
407
|
-
|
408
|
-
|
409
|
-
}
|
410
|
-
|
411
|
-
lost_weight = 0
|
412
|
-
regular_nodes.each {|n|
|
413
|
-
unless n.available?
|
414
|
-
lost_weight += n.weight
|
415
|
-
end
|
416
|
-
}
|
417
|
-
log.debug "rebuilding weight array", lost_weight: lost_weight
|
418
|
-
|
419
|
-
if lost_weight > 0
|
420
|
-
standby_nodes.each {|n|
|
421
|
-
if n.available?
|
422
|
-
regular_nodes << n
|
423
|
-
log.warn "using standby node #{n.host}:#{n.port}", weight: n.weight
|
424
|
-
lost_weight -= n.weight
|
425
|
-
break if lost_weight <= 0
|
426
|
-
end
|
427
|
-
}
|
428
|
-
end
|
429
|
-
|
430
|
-
weight_array = []
|
431
|
-
if regular_nodes.empty?
|
432
|
-
log.warn('No nodes are available')
|
433
|
-
@weight_array = weight_array
|
434
|
-
return @weight_array
|
435
|
-
end
|
436
|
-
|
437
|
-
gcd = regular_nodes.map {|n| n.weight }.inject(0) {|r,w| r.gcd(w) }
|
438
|
-
regular_nodes.each {|n|
|
439
|
-
(n.weight / gcd).times {
|
440
|
-
weight_array << n
|
441
|
-
}
|
442
|
-
}
|
443
|
-
|
444
|
-
# for load balancing during detecting crashed servers
|
445
|
-
coe = (regular_nodes.size * 6) / weight_array.size
|
446
|
-
weight_array *= coe if coe > 1
|
447
|
-
|
448
|
-
r = Random.new(@rand_seed)
|
449
|
-
weight_array.sort_by! { r.rand }
|
450
|
-
|
451
|
-
@weight_array = weight_array
|
452
|
-
end
|
453
|
-
|
454
|
-
def on_timer
|
455
|
-
@nodes.each {|n|
|
398
|
+
def on_heartbeat_timer
|
399
|
+
need_rebuild = false
|
400
|
+
@nodes.each do |n|
|
456
401
|
begin
|
457
402
|
log.trace "sending heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type
|
458
403
|
n.usock = @usock if @usock
|
459
|
-
|
460
|
-
rebuild_weight_array
|
461
|
-
end
|
404
|
+
need_rebuild = n.send_heartbeat || need_rebuild
|
462
405
|
rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
|
463
406
|
log.debug "failed to send heartbeat packet", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e
|
464
407
|
rescue => e
|
465
408
|
log.debug "unexpected error happen during heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e
|
466
409
|
end
|
467
|
-
if n.tick
|
468
|
-
rebuild_weight_array
|
469
|
-
end
|
470
|
-
}
|
471
|
-
end
|
472
410
|
|
473
|
-
|
474
|
-
if node = @nodes.find {|n| n.sockaddr == sockaddr }
|
475
|
-
# log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
|
476
|
-
if node.heartbeat
|
477
|
-
rebuild_weight_array
|
478
|
-
end
|
411
|
+
need_rebuild = n.tick || need_rebuild
|
479
412
|
end
|
480
|
-
end
|
481
413
|
|
482
|
-
|
483
|
-
|
414
|
+
if need_rebuild
|
415
|
+
@load_balancer.rebuild_weight_array(@nodes)
|
416
|
+
end
|
484
417
|
end
|
485
418
|
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
end
|
493
|
-
info = @sock_ack_waiting_mutex.synchronize{ @sock_ack_waiting.find{|i| i.sock == sock } }
|
494
|
-
|
495
|
-
# When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
|
496
|
-
# If this happens we assume the data wasn't delivered and retry it.
|
497
|
-
if raw_data.empty?
|
498
|
-
log.warn "destination node closed the connection. regard it as unavailable.", host: info.node.host, port: info.node.port
|
499
|
-
info.node.disable!
|
500
|
-
rollback_write(info.chunk_id, update_retry: false)
|
501
|
-
return nil
|
502
|
-
else
|
503
|
-
unpacker.feed(raw_data)
|
504
|
-
res = unpacker.read
|
505
|
-
log.trace "getting response from destination", host: info.node.host, port: info.node.port, chunk_id: dump_unique_id_hex(info.chunk_id), response: res
|
506
|
-
if res['ack'] != info.chunk_id_base64
|
507
|
-
# Some errors may have occurred when ack and chunk id is different, so send the chunk again.
|
508
|
-
log.warn "ack in response and chunk id in sent data are different", chunk_id: dump_unique_id_hex(info.chunk_id), ack: res['ack']
|
509
|
-
rollback_write(info.chunk_id, update_retry: false)
|
510
|
-
return nil
|
511
|
-
else
|
512
|
-
log.trace "got a correct ack response", chunk_id: dump_unique_id_hex(info.chunk_id)
|
419
|
+
def on_udp_heatbeat_response_recv(data, sock)
|
420
|
+
sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
|
421
|
+
if node = @nodes.find { |n| n.sockaddr == sockaddr }
|
422
|
+
# log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
|
423
|
+
if node.heartbeat
|
424
|
+
@load_balancer.rebuild_weight_array(@nodes)
|
513
425
|
end
|
514
|
-
return info.chunk_id
|
515
|
-
end
|
516
|
-
rescue => e
|
517
|
-
log.error "unexpected error while receiving ack message", error: e
|
518
|
-
log.error_backtrace
|
519
|
-
ensure
|
520
|
-
if @keepalive
|
521
|
-
info.node.socket_cache.dec_ref_by_value(info.sock)
|
522
426
|
else
|
523
|
-
|
524
|
-
info.sock.close rescue nil
|
427
|
+
log.warn("Unknown heartbeat response received from #{sock.remote_host}:#{sock.remote_port}")
|
525
428
|
end
|
429
|
+
end
|
526
430
|
|
527
|
-
|
528
|
-
|
529
|
-
end
|
431
|
+
def on_purge_obsolete_socks
|
432
|
+
@connection_manager.purge_obsolete_socks
|
530
433
|
end
|
531
434
|
|
532
435
|
def ack_reader
|
@@ -536,185 +439,33 @@ module Fluent::Plugin
|
|
536
439
|
@delayed_commit_timeout / 3.0
|
537
440
|
end
|
538
441
|
|
539
|
-
unpacker = Fluent::Engine.msgpack_unpacker
|
540
|
-
|
541
442
|
while thread_current_running?
|
542
|
-
|
543
|
-
|
544
|
-
begin
|
545
|
-
@sock_ack_waiting_mutex.synchronize do
|
546
|
-
new_list = []
|
547
|
-
@sock_ack_waiting.each do |info|
|
548
|
-
if info.expired?(now)
|
549
|
-
# There are 2 types of cases when no response has been received from socket:
|
550
|
-
# (1) the node does not support sending responses
|
551
|
-
# (2) the node does support sending response but responses have not arrived for some reasons.
|
552
|
-
log.warn "no response from node. regard it as unavailable.", host: info.node.host, port: info.node.port
|
553
|
-
info.node.disable!
|
554
|
-
if @keepalive
|
555
|
-
info.node.socket_cache.revoke_by_value(info.sock)
|
556
|
-
end
|
557
|
-
info.sock.close rescue nil
|
558
|
-
rollback_write(info.chunk_id, update_retry: false)
|
559
|
-
else
|
560
|
-
sockets << info.sock
|
561
|
-
new_list << info
|
562
|
-
end
|
563
|
-
end
|
564
|
-
@sock_ack_waiting = new_list
|
565
|
-
end
|
443
|
+
@ack_handler.collect_response(select_interval) do |chunk_id, node, sock, result|
|
444
|
+
@connection_manager.close(sock)
|
566
445
|
|
567
|
-
|
568
|
-
|
446
|
+
case result
|
447
|
+
when AckHandler::Result::SUCCESS
|
448
|
+
commit_write(chunk_id)
|
449
|
+
when AckHandler::Result::FAILED
|
450
|
+
node.disable!
|
451
|
+
rollback_write(chunk_id, update_retry: false)
|
452
|
+
when AckHandler::Result::CHUNKID_UNMATCHED
|
453
|
+
rollback_write(chunk_id, update_retry: false)
|
454
|
+
else
|
455
|
+
log.warn("BUG: invalid status #{result} #{chunk_id}")
|
569
456
|
|
570
|
-
|
571
|
-
|
572
|
-
|
457
|
+
if chunk_id
|
458
|
+
rollback_write(chunk_id, update_retry: false)
|
459
|
+
end
|
573
460
|
end
|
574
|
-
rescue => e
|
575
|
-
log.error "unexpected error while receiving ack", error: e
|
576
|
-
log.error_backtrace
|
577
461
|
end
|
578
462
|
end
|
579
463
|
end
|
580
464
|
|
581
465
|
class Node
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
def initialize(timeout, log)
|
586
|
-
@log = log
|
587
|
-
@timeout = timeout
|
588
|
-
@active_socks = {}
|
589
|
-
@inactive_socks = {}
|
590
|
-
@mutex = Mutex.new
|
591
|
-
end
|
592
|
-
|
593
|
-
def revoke(key = Thread.current.object_id)
|
594
|
-
@mutex.synchronize do
|
595
|
-
if @active_socks[key]
|
596
|
-
@inactive_socks[key] = @active_socks.delete(key)
|
597
|
-
@inactive_socks[key].ref = 0
|
598
|
-
end
|
599
|
-
end
|
600
|
-
end
|
601
|
-
|
602
|
-
def clear
|
603
|
-
@mutex.synchronize do
|
604
|
-
@inactive_socks.values.each do |s|
|
605
|
-
s.sock.close rescue nil
|
606
|
-
end
|
607
|
-
@inactive_socks.clear
|
608
|
-
|
609
|
-
@active_socks.values.each do |s|
|
610
|
-
s.sock.close rescue nil
|
611
|
-
end
|
612
|
-
@active_socks.clear
|
613
|
-
end
|
614
|
-
end
|
615
|
-
|
616
|
-
def purge_obsolete_socks
|
617
|
-
@mutex.synchronize do
|
618
|
-
@inactive_socks.keys.each do |k|
|
619
|
-
# 0 means sockets stored in this class received all acks
|
620
|
-
if @inactive_socks[k].ref <= 0
|
621
|
-
s = @inactive_socks.delete(k)
|
622
|
-
s.sock.close rescue nil
|
623
|
-
@log.debug("purged obsolete socket #{s.sock}")
|
624
|
-
end
|
625
|
-
end
|
626
|
-
|
627
|
-
@active_socks.keys.each do |k|
|
628
|
-
if expired?(k) && @active_socks[k].ref <= 0
|
629
|
-
@inactive_socks[k] = @active_socks.delete(k)
|
630
|
-
end
|
631
|
-
end
|
632
|
-
end
|
633
|
-
end
|
634
|
-
|
635
|
-
# We expect that `yield` returns a unique object in this class
|
636
|
-
def fetch_or(key = Thread.current.object_id)
|
637
|
-
@mutex.synchronize do
|
638
|
-
unless @active_socks[key]
|
639
|
-
@active_socks[key] = TimedSocket.new(timeout, yield, 1)
|
640
|
-
@log.debug("connect new socket #{@active_socks[key]}")
|
641
|
-
return @active_socks[key].sock
|
642
|
-
end
|
643
|
-
|
644
|
-
if expired?(key)
|
645
|
-
# Do not close this socket here in case of it will be used by other place (e.g. wait for receiving ack)
|
646
|
-
@inactive_socks[key] = @active_socks.delete(key)
|
647
|
-
@log.debug("connection #{@inactive_socks[key]} is expired. reconnecting...")
|
648
|
-
@active_socks[key] = TimedSocket.new(timeout, yield, 0)
|
649
|
-
end
|
650
|
-
|
651
|
-
@active_socks[key].ref += 1
|
652
|
-
@active_socks[key].sock
|
653
|
-
end
|
654
|
-
end
|
655
|
-
|
656
|
-
def dec_ref(key = Thread.current.object_id)
|
657
|
-
@mutex.synchronize do
|
658
|
-
if @active_socks[key]
|
659
|
-
@active_socks[key].ref -= 1
|
660
|
-
elsif @inactive_socks[key]
|
661
|
-
@inactive_socks[key].ref -= 1
|
662
|
-
else
|
663
|
-
@log.warn("Not found key for dec_ref: #{key}")
|
664
|
-
end
|
665
|
-
end
|
666
|
-
end
|
667
|
-
|
668
|
-
# This method is expected to be called in class which doesn't call #inc_ref
|
669
|
-
def dec_ref_by_value(val)
|
670
|
-
@mutex.synchronize do
|
671
|
-
sock = @active_socks.detect { |_, v| v.sock == val }
|
672
|
-
if sock
|
673
|
-
key = sock.first
|
674
|
-
@active_socks[key].ref -= 1
|
675
|
-
return
|
676
|
-
end
|
677
|
-
|
678
|
-
sock = @inactive_socks.detect { |_, v| v.sock == val }
|
679
|
-
if sock
|
680
|
-
key = sock.first
|
681
|
-
@inactive_socks[key].ref -= 1
|
682
|
-
return
|
683
|
-
else
|
684
|
-
@log.warn("Not found key for dec_ref_by_value: #{key}")
|
685
|
-
end
|
686
|
-
end
|
687
|
-
end
|
688
|
-
|
689
|
-
# This method is expected to be called in class which doesn't call #fetch_or
|
690
|
-
def revoke_by_value(val)
|
691
|
-
@mutex.synchronize do
|
692
|
-
sock = @active_socks.detect { |_, v| v.sock == val }
|
693
|
-
if sock
|
694
|
-
key = sock.first
|
695
|
-
@inactive_socks[key] = @active_socks.delete(key)
|
696
|
-
@inactive_socks[key].ref = 0
|
697
|
-
else
|
698
|
-
@log.debug("Not found for revoke_by_value :#{val}")
|
699
|
-
end
|
700
|
-
end
|
701
|
-
end
|
702
|
-
|
703
|
-
private
|
704
|
-
|
705
|
-
def timeout
|
706
|
-
@timeout && Time.now + @timeout
|
707
|
-
end
|
708
|
-
|
709
|
-
# This method is thread unsafe
|
710
|
-
def expired?(key = Thread.current.object_id)
|
711
|
-
@active_socks[key].timeout ? @active_socks[key].timeout < Time.now : false
|
712
|
-
end
|
713
|
-
end
|
714
|
-
|
715
|
-
# @param keepalive [Bool]
|
716
|
-
# @param keepalive_timeout [Integer | nil]
|
717
|
-
def initialize(sender, server, failure:, keepalive: false, keepalive_timeout: nil)
|
466
|
+
# @param connection_manager [Fluent::Plugin::ForwardOutput::ConnectionManager]
|
467
|
+
# @param ack_handler [Fluent::Plugin::ForwardOutput::AckHandler]
|
468
|
+
def initialize(sender, server, failure:, connection_manager:, ack_handler:)
|
718
469
|
@sender = sender
|
719
470
|
@log = sender.log
|
720
471
|
@compress = sender.compress
|
@@ -737,10 +488,13 @@ module Fluent::Plugin
|
|
737
488
|
|
738
489
|
@usock = nil
|
739
490
|
|
740
|
-
@
|
741
|
-
|
742
|
-
|
743
|
-
|
491
|
+
@handshake = HandshakeProtocol.new(
|
492
|
+
log: @log,
|
493
|
+
hostname: sender.security && sender.security.self_hostname,
|
494
|
+
shared_key: server.shared_key || (sender.security && sender.security.shared_key) || '',
|
495
|
+
password: server.password,
|
496
|
+
username: server.username,
|
497
|
+
)
|
744
498
|
|
745
499
|
@unpacker = Fluent::Engine.msgpack_unpacker
|
746
500
|
|
@@ -748,20 +502,15 @@ module Fluent::Plugin
|
|
748
502
|
@resolved_time = 0
|
749
503
|
@resolved_once = false
|
750
504
|
|
751
|
-
@
|
752
|
-
|
753
|
-
@socket_cache = SocketCache.new(keepalive_timeout, @log)
|
754
|
-
end
|
505
|
+
@connection_manager = connection_manager
|
506
|
+
@ack_handler = ack_handler
|
755
507
|
end
|
756
508
|
|
757
509
|
attr_accessor :usock
|
758
510
|
|
759
511
|
attr_reader :name, :host, :port, :weight, :standby, :state
|
760
|
-
attr_reader :sockaddr # used by
|
761
|
-
attr_reader :failure
|
762
|
-
attr_reader :socket_cache # for ack
|
763
|
-
|
764
|
-
RequestInfo = Struct.new(:state, :shared_key_nonce, :auth)
|
512
|
+
attr_reader :sockaddr # used by on_udp_heatbeat_response_recv
|
513
|
+
attr_reader :failure # for test
|
765
514
|
|
766
515
|
def validate_host_resolution!
|
767
516
|
resolved_host
|
@@ -783,13 +532,15 @@ module Fluent::Plugin
|
|
783
532
|
connect do |sock, ri|
|
784
533
|
if ri.state != :established
|
785
534
|
establish_connection(sock, ri)
|
786
|
-
|
535
|
+
if ri.state != :established
|
536
|
+
raise "Failed to establish connection to #{@host}:#{@port}"
|
537
|
+
end
|
787
538
|
end
|
788
539
|
end
|
789
540
|
end
|
790
541
|
|
791
542
|
def establish_connection(sock, ri)
|
792
|
-
while
|
543
|
+
while ri.state != :established
|
793
544
|
begin
|
794
545
|
# TODO: On Ruby 2.2 or earlier, read_nonblock doesn't work expectedly.
|
795
546
|
# We need rewrite around here using new socket/server plugin helper.
|
@@ -799,7 +550,9 @@ module Fluent::Plugin
|
|
799
550
|
next
|
800
551
|
end
|
801
552
|
@unpacker.feed_each(buf) do |data|
|
802
|
-
|
553
|
+
if @handshake.invoke(sock, ri, data) == :established
|
554
|
+
@log.debug "connection established", host: @host, port: @port
|
555
|
+
end
|
803
556
|
end
|
804
557
|
rescue IO::WaitReadable
|
805
558
|
# If the exception is Errno::EWOULDBLOCK or Errno::EAGAIN, it is extended by IO::WaitReadable.
|
@@ -814,17 +567,21 @@ module Fluent::Plugin
|
|
814
567
|
@log.warn "disconnected", host: @host, port: @port
|
815
568
|
disable!
|
816
569
|
break
|
570
|
+
rescue HeloError => e
|
571
|
+
@log.warn "received invalid helo message from #{@name}"
|
572
|
+
disable!
|
573
|
+
break
|
574
|
+
rescue PingpongError => e
|
575
|
+
@log.warn "connection refused to #{@name || @host}: #{e.message}"
|
576
|
+
disable!
|
577
|
+
break
|
817
578
|
end
|
818
579
|
end
|
819
580
|
end
|
820
581
|
|
821
582
|
def send_data_actual(sock, tag, chunk)
|
822
|
-
unless available?
|
823
|
-
raise ConnectionClosedError, "failed to establish connection with node #{@name}"
|
824
|
-
end
|
825
|
-
|
826
583
|
option = { 'size' => chunk.size, 'compressed' => @compress }
|
827
|
-
option['chunk'] = Base64.encode64(chunk.unique_id) if @
|
584
|
+
option['chunk'] = Base64.encode64(chunk.unique_id) if @ack_handler
|
828
585
|
|
829
586
|
# https://github.com/fluent/fluentd/wiki/Forward-Protocol-Specification-v1#packedforward-mode
|
830
587
|
# out_forward always uses str32 type for entries.
|
@@ -845,48 +602,26 @@ module Fluent::Plugin
|
|
845
602
|
end
|
846
603
|
|
847
604
|
def send_data(tag, chunk)
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
605
|
+
ack = @ack_handler && @ack_handler.create_ack(chunk.unique_id, self)
|
606
|
+
connect(nil, ack: ack) do |sock, ri|
|
607
|
+
if ri.state != :established
|
608
|
+
establish_connection(sock, ri)
|
852
609
|
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
if @keepalive
|
857
|
-
@socket_cache.revoke
|
858
|
-
else
|
859
|
-
sock.close rescue nil
|
610
|
+
if ri.state != :established
|
611
|
+
raise ConnectionClosedError, "failed to establish connection with node #{@name}"
|
612
|
+
end
|
860
613
|
end
|
861
|
-
raise
|
862
|
-
end
|
863
614
|
|
864
|
-
|
865
|
-
return sock # to read ACK from socket
|
615
|
+
send_data_actual(sock, tag, chunk)
|
866
616
|
end
|
867
617
|
|
868
|
-
if @keepalive
|
869
|
-
@socket_cache.dec_ref
|
870
|
-
else
|
871
|
-
sock.close_write rescue nil
|
872
|
-
sock.close rescue nil
|
873
|
-
end
|
874
618
|
heartbeat(false)
|
875
619
|
nil
|
876
620
|
end
|
877
621
|
|
878
|
-
def clear
|
879
|
-
@keepalive && @socket_cache.clear
|
880
|
-
end
|
881
|
-
|
882
|
-
def purge_obsolete_socks
|
883
|
-
unless @keepalive
|
884
|
-
raise "Don not call this method without keepalive option"
|
885
|
-
end
|
886
|
-
@socket_cache.purge_obsolete_socks
|
887
|
-
end
|
888
|
-
|
889
622
|
# FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
|
623
|
+
#
|
624
|
+
# @return [Boolean] return true if it needs to rebuild nodes
|
890
625
|
def send_heartbeat
|
891
626
|
begin
|
892
627
|
dest_addr = resolved_host
|
@@ -894,14 +629,14 @@ module Fluent::Plugin
|
|
894
629
|
rescue ::SocketError => e
|
895
630
|
if !@resolved_once && @sender.ignore_network_errors_at_startup
|
896
631
|
@log.warn "failed to resolve node name in heartbeating", server: @name || @host, error: e
|
897
|
-
return
|
632
|
+
return false
|
898
633
|
end
|
899
634
|
raise
|
900
635
|
end
|
901
636
|
|
902
637
|
case @sender.heartbeat_type
|
903
638
|
when :transport
|
904
|
-
connect(dest_addr) do |
|
639
|
+
connect(dest_addr) do |_ri, _sock|
|
905
640
|
## don't send any data to not cause a compatibility problem
|
906
641
|
# sock.write FORWARD_TCP_HEARTBEAT_DATA
|
907
642
|
|
@@ -910,8 +645,9 @@ module Fluent::Plugin
|
|
910
645
|
heartbeat(true)
|
911
646
|
end
|
912
647
|
when :udp
|
913
|
-
@usock.send "\0", 0, Socket.pack_sockaddr_in(@port,
|
914
|
-
|
648
|
+
@usock.send "\0", 0, Socket.pack_sockaddr_in(@port, dest_addr)
|
649
|
+
# response is going to receive at on_udp_heatbeat_response_recv
|
650
|
+
false
|
915
651
|
when :none # :none doesn't use this class
|
916
652
|
raise "BUG: heartbeat_type none must not use Node"
|
917
653
|
else
|
@@ -943,14 +679,14 @@ module Fluent::Plugin
|
|
943
679
|
def resolve_dns!
|
944
680
|
addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM)
|
945
681
|
addrinfo = @sender.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
|
946
|
-
@sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by
|
682
|
+
@sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_udp_heatbeat_response_recv
|
947
683
|
addrinfo[3]
|
948
684
|
end
|
949
685
|
private :resolve_dns!
|
950
686
|
|
951
687
|
def tick
|
952
688
|
now = Time.now.to_f
|
953
|
-
|
689
|
+
unless available?
|
954
690
|
if @failure.hard_timeout?(now)
|
955
691
|
@failure.clear
|
956
692
|
end
|
@@ -959,7 +695,7 @@ module Fluent::Plugin
|
|
959
695
|
|
960
696
|
if @failure.hard_timeout?(now)
|
961
697
|
@log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, hard_timeout: true
|
962
|
-
|
698
|
+
disable!
|
963
699
|
@resolved_host = nil # expire cached host
|
964
700
|
@failure.clear
|
965
701
|
return true
|
@@ -969,7 +705,7 @@ module Fluent::Plugin
|
|
969
705
|
phi = @failure.phi(now)
|
970
706
|
if phi > @sender.phi_threshold
|
971
707
|
@log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi, phi_threshold: @sender.phi_threshold
|
972
|
-
|
708
|
+
disable!
|
973
709
|
@resolved_host = nil # expire cached host
|
974
710
|
@failure.clear
|
975
711
|
return true
|
@@ -981,7 +717,7 @@ module Fluent::Plugin
|
|
981
717
|
def heartbeat(detect=true)
|
982
718
|
now = Time.now.to_f
|
983
719
|
@failure.add(now)
|
984
|
-
if detect &&
|
720
|
+
if detect && !available? && @failure.sample_size > @sender.recover_sample_size
|
985
721
|
@available = true
|
986
722
|
@log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port
|
987
723
|
true
|
@@ -990,127 +726,10 @@ module Fluent::Plugin
|
|
990
726
|
end
|
991
727
|
end
|
992
728
|
|
993
|
-
def generate_salt
|
994
|
-
SecureRandom.hex(16)
|
995
|
-
end
|
996
|
-
|
997
|
-
def check_helo(ri, message)
|
998
|
-
@log.debug "checking helo"
|
999
|
-
# ['HELO', options(hash)]
|
1000
|
-
unless message.size == 2 && message[0] == 'HELO'
|
1001
|
-
return false
|
1002
|
-
end
|
1003
|
-
opts = message[1] || {}
|
1004
|
-
# make shared_key_check failed (instead of error) if protocol version mismatch exist
|
1005
|
-
ri.shared_key_nonce = opts['nonce'] || ''
|
1006
|
-
ri.auth = opts['auth'] || ''
|
1007
|
-
true
|
1008
|
-
end
|
1009
|
-
|
1010
|
-
def generate_ping(ri)
|
1011
|
-
@log.debug "generating ping"
|
1012
|
-
# ['PING', self_hostname, sharedkey\_salt, sha512\_hex(sharedkey\_salt + self_hostname + nonce + shared_key),
|
1013
|
-
# username || '', sha512\_hex(auth\_salt + username + password) || '']
|
1014
|
-
shared_key_hexdigest = Digest::SHA512.new.update(@shared_key_salt)
|
1015
|
-
.update(@sender.security.self_hostname)
|
1016
|
-
.update(ri.shared_key_nonce)
|
1017
|
-
.update(@shared_key)
|
1018
|
-
.hexdigest
|
1019
|
-
ping = ['PING', @sender.security.self_hostname, @shared_key_salt, shared_key_hexdigest]
|
1020
|
-
if !ri.auth.empty?
|
1021
|
-
password_hexdigest = Digest::SHA512.new.update(ri.auth).update(@username).update(@password).hexdigest
|
1022
|
-
ping.push(@username, password_hexdigest)
|
1023
|
-
else
|
1024
|
-
ping.push('','')
|
1025
|
-
end
|
1026
|
-
ping
|
1027
|
-
end
|
1028
|
-
|
1029
|
-
def check_pong(ri, message)
|
1030
|
-
@log.debug "checking pong"
|
1031
|
-
# ['PONG', bool(authentication result), 'reason if authentication failed',
|
1032
|
-
# self_hostname, sha512\_hex(salt + self_hostname + nonce + sharedkey)]
|
1033
|
-
unless message.size == 5 && message[0] == 'PONG'
|
1034
|
-
return false, 'invalid format for PONG message'
|
1035
|
-
end
|
1036
|
-
_pong, auth_result, reason, hostname, shared_key_hexdigest = message
|
1037
|
-
|
1038
|
-
unless auth_result
|
1039
|
-
return false, 'authentication failed: ' + reason
|
1040
|
-
end
|
1041
|
-
|
1042
|
-
if hostname == @sender.security.self_hostname
|
1043
|
-
return false, 'same hostname between input and output: invalid configuration'
|
1044
|
-
end
|
1045
|
-
|
1046
|
-
clientside = Digest::SHA512.new.update(@shared_key_salt).update(hostname).update(ri.shared_key_nonce).update(@shared_key).hexdigest
|
1047
|
-
unless shared_key_hexdigest == clientside
|
1048
|
-
return false, 'shared key mismatch'
|
1049
|
-
end
|
1050
|
-
|
1051
|
-
return true, nil
|
1052
|
-
end
|
1053
|
-
|
1054
|
-
def on_read(sock, ri, data)
|
1055
|
-
@log.trace __callee__
|
1056
|
-
|
1057
|
-
case ri.state
|
1058
|
-
when :helo
|
1059
|
-
unless check_helo(ri, data)
|
1060
|
-
@log.warn "received invalid helo message from #{@name}"
|
1061
|
-
disable! # shutdown
|
1062
|
-
return
|
1063
|
-
end
|
1064
|
-
sock.write(generate_ping(ri).to_msgpack)
|
1065
|
-
ri.state = :pingpong
|
1066
|
-
when :pingpong
|
1067
|
-
succeeded, reason = check_pong(ri, data)
|
1068
|
-
unless succeeded
|
1069
|
-
@log.warn "connection refused to #{@name || @host}: #{reason}"
|
1070
|
-
disable! # shutdown
|
1071
|
-
return
|
1072
|
-
end
|
1073
|
-
ri.state = :established
|
1074
|
-
@log.debug "connection established", host: @host, port: @port
|
1075
|
-
else
|
1076
|
-
raise "BUG: unknown session state: #{ri.state}"
|
1077
|
-
end
|
1078
|
-
end
|
1079
|
-
|
1080
729
|
private
|
1081
730
|
|
1082
|
-
def connect(host = nil)
|
1083
|
-
|
1084
|
-
if @keepalive
|
1085
|
-
ri = RequestInfo.new(:established)
|
1086
|
-
sock = @socket_cache.fetch_or do
|
1087
|
-
s = @sender.create_transfer_socket(host || resolved_host, port, @hostname)
|
1088
|
-
ri = RequestInfo.new(@sender.security ? :helo : :established) # overwrite if new connection
|
1089
|
-
s
|
1090
|
-
end
|
1091
|
-
[sock, ri]
|
1092
|
-
else
|
1093
|
-
@log.debug('connect new socket')
|
1094
|
-
[@sender.create_transfer_socket(host || resolved_host, port, @hostname), RequestInfo.new(@sender.security ? :helo : :established)]
|
1095
|
-
end
|
1096
|
-
|
1097
|
-
if block_given?
|
1098
|
-
ret = nil
|
1099
|
-
begin
|
1100
|
-
ret = yield(socket, request_info)
|
1101
|
-
rescue
|
1102
|
-
@socket_cache.revoke if @keepalive
|
1103
|
-
raise
|
1104
|
-
else
|
1105
|
-
@socket_cache.dec_ref if @keepalive
|
1106
|
-
ensure
|
1107
|
-
socket.close unless @keepalive
|
1108
|
-
end
|
1109
|
-
|
1110
|
-
ret
|
1111
|
-
else
|
1112
|
-
[socket, request_info]
|
1113
|
-
end
|
731
|
+
def connect(host = nil, ack: false, &block)
|
732
|
+
@connection_manager.connect(host: host || resolved_host, port: port, hostname: @hostname, ack: ack, &block)
|
1114
733
|
end
|
1115
734
|
end
|
1116
735
|
|
@@ -1128,68 +747,5 @@ module Fluent::Plugin
|
|
1128
747
|
true
|
1129
748
|
end
|
1130
749
|
end
|
1131
|
-
|
1132
|
-
class FailureDetector
|
1133
|
-
PHI_FACTOR = 1.0 / Math.log(10.0)
|
1134
|
-
SAMPLE_SIZE = 1000
|
1135
|
-
|
1136
|
-
def initialize(heartbeat_interval, hard_timeout, init_last)
|
1137
|
-
@heartbeat_interval = heartbeat_interval
|
1138
|
-
@last = init_last
|
1139
|
-
@hard_timeout = hard_timeout
|
1140
|
-
|
1141
|
-
# microsec
|
1142
|
-
@init_gap = (heartbeat_interval * 1e6).to_i
|
1143
|
-
@window = [@init_gap]
|
1144
|
-
end
|
1145
|
-
|
1146
|
-
def hard_timeout?(now)
|
1147
|
-
now - @last > @hard_timeout
|
1148
|
-
end
|
1149
|
-
|
1150
|
-
def add(now)
|
1151
|
-
if @window.empty?
|
1152
|
-
@window << @init_gap
|
1153
|
-
@last = now
|
1154
|
-
else
|
1155
|
-
gap = now - @last
|
1156
|
-
@window << (gap * 1e6).to_i
|
1157
|
-
@window.shift if @window.length > SAMPLE_SIZE
|
1158
|
-
@last = now
|
1159
|
-
end
|
1160
|
-
end
|
1161
|
-
|
1162
|
-
def phi(now)
|
1163
|
-
size = @window.size
|
1164
|
-
return 0.0 if size == 0
|
1165
|
-
|
1166
|
-
# Calculate weighted moving average
|
1167
|
-
mean_usec = 0
|
1168
|
-
fact = 0
|
1169
|
-
@window.each_with_index {|gap,i|
|
1170
|
-
mean_usec += gap * (1+i)
|
1171
|
-
fact += (1+i)
|
1172
|
-
}
|
1173
|
-
mean_usec = mean_usec / fact
|
1174
|
-
|
1175
|
-
# Normalize arrive intervals into 1sec
|
1176
|
-
mean = (mean_usec.to_f / 1e6) - @heartbeat_interval + 1
|
1177
|
-
|
1178
|
-
# Calculate phi of the phi accrual failure detector
|
1179
|
-
t = now - @last - @heartbeat_interval + 1
|
1180
|
-
phi = PHI_FACTOR * t / mean
|
1181
|
-
|
1182
|
-
return phi
|
1183
|
-
end
|
1184
|
-
|
1185
|
-
def sample_size
|
1186
|
-
@window.size
|
1187
|
-
end
|
1188
|
-
|
1189
|
-
def clear
|
1190
|
-
@window.clear
|
1191
|
-
@last = 0
|
1192
|
-
end
|
1193
|
-
end
|
1194
750
|
end
|
1195
751
|
end
|