fluentd 0.14.4 → 0.14.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of fluentd might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/ChangeLog +18 -0
- data/example/in_forward.conf +3 -0
- data/example/in_forward_client.conf +37 -0
- data/example/in_forward_shared_key.conf +15 -0
- data/example/in_forward_users.conf +24 -0
- data/example/out_forward.conf +13 -13
- data/example/out_forward_client.conf +109 -0
- data/example/out_forward_shared_key.conf +36 -0
- data/example/out_forward_users.conf +65 -0
- data/example/{out_buffered_null.conf → out_null.conf} +10 -6
- data/example/secondary_file.conf +41 -0
- data/lib/fluent/agent.rb +3 -1
- data/lib/fluent/plugin/buffer.rb +5 -1
- data/lib/fluent/plugin/in_forward.rb +300 -50
- data/lib/fluent/plugin/in_tail.rb +41 -85
- data/lib/fluent/plugin/multi_output.rb +4 -0
- data/lib/fluent/plugin/out_forward.rb +326 -209
- data/lib/fluent/plugin/out_null.rb +37 -0
- data/lib/fluent/plugin/out_secondary_file.rb +128 -0
- data/lib/fluent/plugin/out_stdout.rb +38 -2
- data/lib/fluent/plugin/output.rb +13 -5
- data/lib/fluent/root_agent.rb +1 -1
- data/lib/fluent/test/startup_shutdown.rb +33 -0
- data/lib/fluent/version.rb +1 -1
- data/test/plugin/test_in_forward.rb +906 -441
- data/test/plugin/test_in_monitor_agent.rb +4 -0
- data/test/plugin/test_in_tail.rb +681 -663
- data/test/plugin/test_out_forward.rb +150 -208
- data/test/plugin/test_out_null.rb +85 -9
- data/test/plugin/test_out_secondary_file.rb +432 -0
- data/test/plugin/test_out_stdout.rb +143 -45
- data/test/test_root_agent.rb +42 -0
- metadata +14 -9
- data/lib/fluent/plugin/out_buffered_null.rb +0 -59
- data/lib/fluent/plugin/out_buffered_stdout.rb +0 -70
- data/test/plugin/test_out_buffered_null.rb +0 -79
- data/test/plugin/test_out_buffered_stdout.rb +0 -122
@@ -53,18 +53,7 @@ module Fluent
|
|
53
53
|
desc 'The timeout time when sending event logs.'
|
54
54
|
config_param :send_timeout, :time, default: 60
|
55
55
|
desc 'The transport protocol to use for heartbeats.(udp,tcp,none)'
|
56
|
-
config_param :heartbeat_type,
|
57
|
-
case val.downcase
|
58
|
-
when 'tcp'
|
59
|
-
:tcp
|
60
|
-
when 'udp'
|
61
|
-
:udp
|
62
|
-
when 'none'
|
63
|
-
:none
|
64
|
-
else
|
65
|
-
raise ConfigError, "forward output heartbeat type should be 'tcp', 'udp', or 'none'"
|
66
|
-
end
|
67
|
-
end
|
56
|
+
config_param :heartbeat_type, :enum, list: [:tcp, :udp, :none], default: :tcp
|
68
57
|
desc 'The interval of the heartbeat packer.'
|
69
58
|
config_param :heartbeat_interval, :time, default: 1
|
70
59
|
desc 'The wait time before accepting a server fault recovery.'
|
@@ -81,21 +70,54 @@ module Fluent
|
|
81
70
|
desc 'Change the protocol to at-least-once.'
|
82
71
|
config_param :require_ack_response, :bool, default: false # require in_forward to respond with ack
|
83
72
|
desc 'This option is used when require_ack_response is true.'
|
84
|
-
config_param :ack_response_timeout, :time, default: 190
|
73
|
+
config_param :ack_response_timeout, :time, default: 190
|
74
|
+
desc 'Reading data size from server'
|
75
|
+
config_param :read_length, :size, default: 512 # 512bytes
|
76
|
+
desc 'The interval while reading data from server'
|
77
|
+
config_param :read_interval_msec, :integer, default: 50 # 50ms
|
85
78
|
# Linux default tcp_syn_retries is 5 (in many environment)
|
86
79
|
# 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec)
|
87
80
|
desc 'Enable client-side DNS round robin.'
|
88
81
|
config_param :dns_round_robin, :bool, default: false # heartbeat_type 'udp' is not available for this
|
89
82
|
|
83
|
+
config_section :security, required: false, multi: false do
|
84
|
+
desc 'The hostname'
|
85
|
+
config_param :self_hostname, :string
|
86
|
+
desc 'Shared key for authentication'
|
87
|
+
config_param :shared_key, :string, secret: true
|
88
|
+
end
|
89
|
+
|
90
|
+
config_section :server, param_name: :servers do
|
91
|
+
desc "The IP address or host name of the server."
|
92
|
+
config_param :host, :string
|
93
|
+
desc "The name of the server. Used in log messages."
|
94
|
+
config_param :name, :string, default: nil
|
95
|
+
desc "The port number of the host."
|
96
|
+
config_param :port, :integer, default: LISTEN_PORT
|
97
|
+
desc "The shared key per server."
|
98
|
+
config_param :shared_key, :string, default: nil, secret: true
|
99
|
+
desc "The username for authentication."
|
100
|
+
config_param :username, :string, default: ''
|
101
|
+
desc "The password for authentication."
|
102
|
+
config_param :password, :string, default: '', secret: true
|
103
|
+
desc "Marks a node as the standby node for an Active-Standby model between Fluentd nodes."
|
104
|
+
config_param :standby, :bool, default: false
|
105
|
+
desc "The load balancing weight."
|
106
|
+
config_param :weight, :integer, default: 60
|
107
|
+
end
|
108
|
+
|
90
109
|
attr_reader :nodes
|
91
110
|
|
92
111
|
config_param :port, :integer, default: LISTEN_PORT, obsoleted: "User <server> section instead."
|
93
112
|
config_param :host, :string, default: nil, obsoleted: "Use <server> section instead."
|
94
113
|
|
114
|
+
attr_reader :read_interval, :recover_sample_size
|
115
|
+
|
95
116
|
def configure(conf)
|
96
117
|
super
|
97
118
|
|
98
|
-
|
119
|
+
@read_interval = @read_interval_msec / 1000.0
|
120
|
+
@recover_sample_size = @recover_wait / @heartbeat_interval
|
99
121
|
|
100
122
|
if @dns_round_robin
|
101
123
|
if @heartbeat_type == :udp
|
@@ -103,39 +125,23 @@ module Fluent
|
|
103
125
|
end
|
104
126
|
end
|
105
127
|
|
106
|
-
|
107
|
-
next if e.name != "server"
|
108
|
-
|
109
|
-
host = e['host']
|
110
|
-
port = e['port']
|
111
|
-
port = port ? port.to_i : LISTEN_PORT
|
112
|
-
|
113
|
-
weight = e['weight']
|
114
|
-
weight = weight ? weight.to_i : 60
|
115
|
-
|
116
|
-
standby = !!e['standby']
|
117
|
-
|
118
|
-
name = e['name']
|
119
|
-
unless name
|
120
|
-
name = "#{host}:#{port}"
|
121
|
-
end
|
122
|
-
|
128
|
+
@servers.each do |server|
|
123
129
|
failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
|
130
|
+
name = server.name || "#{server.host}:#{server.port}"
|
124
131
|
|
125
|
-
|
126
|
-
@phi_threshold, recover_sample_size, @expire_dns_cache, @phi_failure_detector, @dns_round_robin)
|
127
|
-
|
132
|
+
log.info "adding forwarding server '#{name}'", host: server.host, port: server.port, weight: server.weight, plugin_id: plugin_id
|
128
133
|
if @heartbeat_type == :none
|
129
|
-
@nodes << NoneHeartbeatNode.new(
|
134
|
+
@nodes << NoneHeartbeatNode.new(self, server, failure: failure)
|
130
135
|
else
|
131
|
-
@nodes << Node.new(
|
136
|
+
@nodes << Node.new(self, server, failure: failure)
|
132
137
|
end
|
133
|
-
|
134
|
-
}
|
138
|
+
end
|
135
139
|
|
136
140
|
if @nodes.empty?
|
137
141
|
raise ConfigError, "forward output plugin requires at least one <server> is required"
|
138
142
|
end
|
143
|
+
|
144
|
+
raise Fluent::ConfigError, "ack_response_timeout must be a positive integer" if @ack_response_timeout < 1
|
139
145
|
end
|
140
146
|
|
141
147
|
def start
|
@@ -144,6 +150,7 @@ module Fluent
|
|
144
150
|
@rand_seed = Random.new.seed
|
145
151
|
rebuild_weight_array
|
146
152
|
@rr = 0
|
153
|
+
@usock = nil
|
147
154
|
|
148
155
|
unless @heartbeat_type == :none
|
149
156
|
@loop = Coolio::Loop.new
|
@@ -194,7 +201,7 @@ module Fluent
|
|
194
201
|
|
195
202
|
if node.available?
|
196
203
|
begin
|
197
|
-
send_data(
|
204
|
+
node.send_data(tag, chunk)
|
198
205
|
return
|
199
206
|
rescue
|
200
207
|
# for load balancing during detecting crashed servers
|
@@ -210,6 +217,12 @@ module Fluent
|
|
210
217
|
end
|
211
218
|
end
|
212
219
|
|
220
|
+
# MessagePack FixArray length is 3
|
221
|
+
FORWARD_HEADER = [0x93].pack('C').freeze
|
222
|
+
def forward_header
|
223
|
+
FORWARD_HEADER
|
224
|
+
end
|
225
|
+
|
213
226
|
private
|
214
227
|
|
215
228
|
def rebuild_weight_array
|
@@ -254,111 +267,6 @@ module Fluent
|
|
254
267
|
@weight_array = weight_array
|
255
268
|
end
|
256
269
|
|
257
|
-
# MessagePack FixArray length is 3
|
258
|
-
FORWARD_HEADER = [0x93].pack('C').freeze
|
259
|
-
def forward_header
|
260
|
-
FORWARD_HEADER
|
261
|
-
end
|
262
|
-
|
263
|
-
#FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
|
264
|
-
def send_heartbeat_tcp(node)
|
265
|
-
sock = connect(node)
|
266
|
-
begin
|
267
|
-
opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
268
|
-
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
269
|
-
opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
270
|
-
# don't send any data to not cause a compatibility problem
|
271
|
-
#sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
272
|
-
#sock.write FORWARD_TCP_HEARTBEAT_DATA
|
273
|
-
node.heartbeat(true)
|
274
|
-
ensure
|
275
|
-
sock.close_write
|
276
|
-
sock.close
|
277
|
-
end
|
278
|
-
end
|
279
|
-
|
280
|
-
def send_data(node, tag, chunk)
|
281
|
-
sock = connect(node)
|
282
|
-
begin
|
283
|
-
opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
284
|
-
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
285
|
-
|
286
|
-
opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
287
|
-
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
288
|
-
|
289
|
-
# beginArray(3)
|
290
|
-
sock.write forward_header
|
291
|
-
|
292
|
-
# writeRaw(tag)
|
293
|
-
sock.write tag.to_msgpack # tag
|
294
|
-
|
295
|
-
# beginRaw(size)
|
296
|
-
sz = chunk.size
|
297
|
-
#if sz < 32
|
298
|
-
# # FixRaw
|
299
|
-
# sock.write [0xa0 | sz].pack('C')
|
300
|
-
#elsif sz < 65536
|
301
|
-
# # raw 16
|
302
|
-
# sock.write [0xda, sz].pack('Cn')
|
303
|
-
#else
|
304
|
-
# raw 32
|
305
|
-
sock.write [0xdb, sz].pack('CN')
|
306
|
-
#end
|
307
|
-
|
308
|
-
# writeRawBody(packed_es)
|
309
|
-
chunk.write_to(sock)
|
310
|
-
|
311
|
-
option = { 'size' => chunk.size_of_events }
|
312
|
-
option['chunk'] = Base64.encode64(chunk.unique_id) if @require_ack_response
|
313
|
-
sock.write option.to_msgpack
|
314
|
-
|
315
|
-
if @require_ack_response && @ack_response_timeout > 0
|
316
|
-
# Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
|
317
|
-
# To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses
|
318
|
-
# and process them asynchronously.
|
319
|
-
if IO.select([sock], nil, nil, @ack_response_timeout)
|
320
|
-
raw_data = sock.recv(1024)
|
321
|
-
|
322
|
-
# When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
|
323
|
-
# If this happens we assume the data wasn't delivered and retry it.
|
324
|
-
if raw_data.empty?
|
325
|
-
@log.warn "node #{node.host}:#{node.port} closed the connection. regard it as unavailable."
|
326
|
-
node.disable!
|
327
|
-
raise ForwardOutputConnectionClosedError, "node #{node.host}:#{node.port} closed connection"
|
328
|
-
else
|
329
|
-
# Serialization type of the response is same as sent data.
|
330
|
-
res = MessagePack.unpack(raw_data)
|
331
|
-
|
332
|
-
if res['ack'] != option['chunk']
|
333
|
-
# Some errors may have occured when ack and chunk id is different, so send the chunk again.
|
334
|
-
raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different"
|
335
|
-
end
|
336
|
-
end
|
337
|
-
|
338
|
-
else
|
339
|
-
# IO.select returns nil on timeout.
|
340
|
-
# There are 2 types of cases when no response has been received:
|
341
|
-
# (1) the node does not support sending responses
|
342
|
-
# (2) the node does support sending response but responses have not arrived for some reasons.
|
343
|
-
@log.warn "no response from #{node.host}:#{node.port}. regard it as unavailable."
|
344
|
-
node.disable!
|
345
|
-
raise ForwardOutputACKTimeoutError, "node #{node.host}:#{node.port} does not return ACK"
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
node.heartbeat(false)
|
350
|
-
res # for test
|
351
|
-
ensure
|
352
|
-
sock.close_write
|
353
|
-
sock.close
|
354
|
-
end
|
355
|
-
end
|
356
|
-
|
357
|
-
def connect(node)
|
358
|
-
# TODO unix socket?
|
359
|
-
TCPSocket.new(node.resolved_host, node.port)
|
360
|
-
end
|
361
|
-
|
362
270
|
class HeartbeatRequestTimer < Coolio::TimerWatcher
|
363
271
|
def initialize(interval, callback)
|
364
272
|
super(interval, true)
|
@@ -379,15 +287,11 @@ module Fluent
|
|
379
287
|
rebuild_weight_array
|
380
288
|
end
|
381
289
|
begin
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
else
|
386
|
-
@usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
|
387
|
-
end
|
290
|
+
log.trace "sending heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type
|
291
|
+
n.usock = @usock if @usock
|
292
|
+
n.send_heartbeat
|
388
293
|
rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED
|
389
|
-
|
390
|
-
log.debug "failed to send heartbeat packet to #{n.host}:#{n.port}", error: $!.to_s
|
294
|
+
log.debug "failed to send heartbeat packet", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: $!
|
391
295
|
end
|
392
296
|
}
|
393
297
|
end
|
@@ -423,27 +327,38 @@ module Fluent
|
|
423
327
|
end
|
424
328
|
end
|
425
329
|
|
426
|
-
NodeConfig = Struct.new("NodeConfig", :name, :host, :port, :weight, :standby, :failure,
|
427
|
-
:phi_threshold, :recover_sample_size, :expire_dns_cache, :phi_failure_detector, :dns_round_robin)
|
428
|
-
|
429
330
|
class Node
|
430
|
-
def initialize(
|
431
|
-
@
|
432
|
-
@
|
433
|
-
|
434
|
-
@
|
435
|
-
@
|
436
|
-
@
|
437
|
-
@
|
331
|
+
def initialize(sender, server, failure:)
|
332
|
+
@sender = sender
|
333
|
+
@log = sender.log
|
334
|
+
|
335
|
+
@name = server.name
|
336
|
+
@host = server.host
|
337
|
+
@port = server.port
|
338
|
+
@weight = server.weight
|
339
|
+
@standby = server.standby
|
340
|
+
@failure = failure
|
438
341
|
@available = true
|
342
|
+
@state = nil
|
343
|
+
|
344
|
+
@usock = nil
|
345
|
+
|
346
|
+
@username = server.username
|
347
|
+
@password = server.password
|
348
|
+
@shared_key = server.shared_key || (sender.security && sender.security.shared_key) || ""
|
349
|
+
@shared_key_salt = generate_salt
|
350
|
+
@shared_key_nonce = ""
|
351
|
+
|
352
|
+
@unpacker = Fluent::Engine.msgpack_unpacker
|
439
353
|
|
440
354
|
@resolved_host = nil
|
441
355
|
@resolved_time = 0
|
442
356
|
resolved_host # check dns
|
443
357
|
end
|
444
358
|
|
445
|
-
|
446
|
-
|
359
|
+
attr_accessor :usock
|
360
|
+
|
361
|
+
attr_reader :name, :host, :port, :weight, :standby, :state
|
447
362
|
attr_reader :sockaddr # used by on_heartbeat
|
448
363
|
attr_reader :failure, :available # for test
|
449
364
|
|
@@ -456,33 +371,177 @@ module Fluent
|
|
456
371
|
end
|
457
372
|
|
458
373
|
def standby?
|
459
|
-
@
|
374
|
+
@standby
|
375
|
+
end
|
376
|
+
|
377
|
+
def connect
|
378
|
+
TCPSocket.new(resolved_host, port)
|
379
|
+
end
|
380
|
+
|
381
|
+
def set_socket_options(sock)
|
382
|
+
opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
383
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
384
|
+
|
385
|
+
opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
386
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
387
|
+
|
388
|
+
sock
|
389
|
+
end
|
390
|
+
|
391
|
+
def establish_connection(sock)
|
392
|
+
while available? && @state != :established
|
393
|
+
begin
|
394
|
+
# TODO: On Ruby 2.2 or earlier, read_nonblock doesn't work expectedly.
|
395
|
+
# We need rewrite around here using new socket/server plugin helper.
|
396
|
+
buf = sock.read_nonblock(@sender.read_length)
|
397
|
+
if buf.empty?
|
398
|
+
sleep @sender.read_interval
|
399
|
+
next
|
400
|
+
end
|
401
|
+
@unpacker.feed_each(buf) do |data|
|
402
|
+
on_read(sock, data)
|
403
|
+
end
|
404
|
+
rescue IO::WaitReadable
|
405
|
+
# If the exception is Errno::EWOULDBLOCK or Errno::EAGAIN, it is extended by IO::WaitReadable.
|
406
|
+
# So IO::WaitReadable can be used to rescue the exceptions for retrying read_nonblock.
|
407
|
+
# http://docs.ruby-lang.org/en/2.3.0/IO.html#method-i-read_nonblock
|
408
|
+
sleep @sender.read_interval unless @state == :established
|
409
|
+
rescue SystemCallError => e
|
410
|
+
@log.warn "disconnected by error", host: @host, port: @port, error: e
|
411
|
+
disable!
|
412
|
+
break
|
413
|
+
rescue EOFError
|
414
|
+
@log.warn "disconnected", host: @host, port: @port
|
415
|
+
disable!
|
416
|
+
break
|
417
|
+
end
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
def send_data(tag, chunk)
|
422
|
+
sock = connect
|
423
|
+
@state = @sender.security ? :helo : :established
|
424
|
+
begin
|
425
|
+
set_socket_options(sock)
|
426
|
+
|
427
|
+
if @state != :established
|
428
|
+
establish_connection(sock)
|
429
|
+
end
|
430
|
+
|
431
|
+
unless available?
|
432
|
+
raise ForwardOutputConnectionClosedError, "failed to establish connection with node #{@name}"
|
433
|
+
end
|
434
|
+
|
435
|
+
option = { 'size' => chunk.size_of_events }
|
436
|
+
option['chunk'] = Base64.encode64(chunk.unique_id) if @sender.require_ack_response
|
437
|
+
|
438
|
+
# out_forward always uses Raw32 type for content.
|
439
|
+
# Raw16 can store only 64kbytes, and it should be much smaller than buffer chunk size.
|
440
|
+
|
441
|
+
sock.write @sender.forward_header # beginArray(3)
|
442
|
+
sock.write tag.to_msgpack # 1. writeRaw(tag)
|
443
|
+
sock.write [0xdb, chunk.size].pack('CN') # 2. beginRaw(size) raw32
|
444
|
+
chunk.write_to(sock) # writeRawBody(packed_es)
|
445
|
+
sock.write option.to_msgpack # 3. writeOption(option)
|
446
|
+
|
447
|
+
if @sender.require_ack_response
|
448
|
+
# Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
|
449
|
+
# To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses
|
450
|
+
# and process them asynchronously.
|
451
|
+
if IO.select([sock], nil, nil, @sender.ack_response_timeout)
|
452
|
+
raw_data = begin
|
453
|
+
sock.recv(1024)
|
454
|
+
rescue Errno::ECONNRESET
|
455
|
+
""
|
456
|
+
end
|
457
|
+
|
458
|
+
# When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
|
459
|
+
# If this happens we assume the data wasn't delivered and retry it.
|
460
|
+
if raw_data.empty?
|
461
|
+
@log.warn "node closed the connection. regard it as unavailable.", host: @host, port: @port
|
462
|
+
disable!
|
463
|
+
raise ForwardOutputConnectionClosedError, "node #{@host}:#{@port} closed connection"
|
464
|
+
else
|
465
|
+
@unpacker.feed(raw_data)
|
466
|
+
res = @unpacker.read
|
467
|
+
if res['ack'] != option['chunk']
|
468
|
+
# Some errors may have occured when ack and chunk id is different, so send the chunk again.
|
469
|
+
raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different"
|
470
|
+
end
|
471
|
+
end
|
472
|
+
|
473
|
+
else
|
474
|
+
# IO.select returns nil on timeout.
|
475
|
+
# There are 2 types of cases when no response has been received:
|
476
|
+
# (1) the node does not support sending responses
|
477
|
+
# (2) the node does support sending response but responses have not arrived for some reasons.
|
478
|
+
@log.warn "no response from node. regard it as unavailable.", host: @host, port: @port
|
479
|
+
disable!
|
480
|
+
raise ForwardOutputACKTimeoutError, "node #{host}:#{port} does not return ACK"
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
heartbeat(false)
|
485
|
+
res # for test
|
486
|
+
ensure
|
487
|
+
sock.close_write
|
488
|
+
sock.close
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
# FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
|
493
|
+
def send_heartbeat
|
494
|
+
case @sender.heartbeat_type
|
495
|
+
when :tcp
|
496
|
+
sock = connect
|
497
|
+
begin
|
498
|
+
opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
499
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
500
|
+
# opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
501
|
+
# sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
502
|
+
|
503
|
+
## don't send any data to not cause a compatibility problem
|
504
|
+
# sock.write FORWARD_TCP_HEARTBEAT_DATA
|
505
|
+
|
506
|
+
# successful tcp connection establishment is considered as valid heartbeat
|
507
|
+
heartbeat(true)
|
508
|
+
ensure
|
509
|
+
sock.close_write
|
510
|
+
sock.close
|
511
|
+
end
|
512
|
+
when :udp
|
513
|
+
@usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
|
514
|
+
when :none # :none doesn't use this class
|
515
|
+
raise "BUG: heartbeat_type none must not use Node"
|
516
|
+
else
|
517
|
+
raise "BUG: unknown heartbeat_type '#{@sender.heartbeat_type}'"
|
518
|
+
end
|
460
519
|
end
|
461
520
|
|
462
521
|
def resolved_host
|
463
|
-
case @
|
522
|
+
case @sender.expire_dns_cache
|
464
523
|
when 0
|
465
524
|
# cache is disabled
|
466
|
-
|
525
|
+
resolve_dns!
|
467
526
|
|
468
527
|
when nil
|
469
528
|
# persistent cache
|
470
|
-
|
529
|
+
@resolved_host ||= resolve_dns!
|
471
530
|
|
472
531
|
else
|
473
532
|
now = Engine.now
|
474
533
|
rh = @resolved_host
|
475
|
-
if !rh || now - @resolved_time >= @
|
534
|
+
if !rh || now - @resolved_time >= @sender.expire_dns_cache
|
476
535
|
rh = @resolved_host = resolve_dns!
|
477
536
|
@resolved_time = now
|
478
537
|
end
|
479
|
-
|
538
|
+
rh
|
480
539
|
end
|
481
540
|
end
|
482
541
|
|
483
542
|
def resolve_dns!
|
484
543
|
addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM)
|
485
|
-
addrinfo = @
|
544
|
+
addrinfo = @sender.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
|
486
545
|
@sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_heartbeat
|
487
546
|
addrinfo[3]
|
488
547
|
end
|
@@ -505,36 +564,122 @@ module Fluent
|
|
505
564
|
return true
|
506
565
|
end
|
507
566
|
|
508
|
-
if @
|
567
|
+
if @sender.phi_failure_detector
|
509
568
|
phi = @failure.phi(now)
|
510
|
-
|
511
|
-
|
512
|
-
@log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi
|
569
|
+
if phi > @sender.phi_threshold
|
570
|
+
@log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi, phi_threshold: @sender.phi_threshold
|
513
571
|
@available = false
|
514
572
|
@resolved_host = nil # expire cached host
|
515
573
|
@failure.clear
|
516
574
|
return true
|
517
575
|
end
|
518
576
|
end
|
519
|
-
|
577
|
+
false
|
520
578
|
end
|
521
579
|
|
522
580
|
def heartbeat(detect=true)
|
523
581
|
now = Time.now.to_f
|
524
582
|
@failure.add(now)
|
525
|
-
|
526
|
-
if detect && !@available && @failure.sample_size > @conf.recover_sample_size
|
583
|
+
if detect && !@available && @failure.sample_size > @sender.recover_sample_size
|
527
584
|
@available = true
|
528
585
|
@log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port
|
529
|
-
|
586
|
+
true
|
530
587
|
else
|
531
|
-
|
588
|
+
nil
|
532
589
|
end
|
533
590
|
end
|
534
591
|
|
592
|
+
# TODO: #to_msgpack(string) is deprecated
|
535
593
|
def to_msgpack(out = '')
|
536
594
|
[@host, @port, @weight, @available].to_msgpack(out)
|
537
595
|
end
|
596
|
+
|
597
|
+
def generate_salt
|
598
|
+
SecureRandom.hex(16)
|
599
|
+
end
|
600
|
+
|
601
|
+
def check_helo(message)
|
602
|
+
@log.debug "checking helo"
|
603
|
+
# ['HELO', options(hash)]
|
604
|
+
unless message.size == 2 && message[0] == 'HELO'
|
605
|
+
return false
|
606
|
+
end
|
607
|
+
opts = message[1] || {}
|
608
|
+
# make shared_key_check failed (instead of error) if protocol version mismatch exist
|
609
|
+
@shared_key_nonce = opts['nonce'] || ''
|
610
|
+
@authentication = opts['auth'] || ''
|
611
|
+
true
|
612
|
+
end
|
613
|
+
|
614
|
+
def generate_ping
|
615
|
+
@log.debug "generating ping"
|
616
|
+
# ['PING', self_hostname, sharedkey\_salt, sha512\_hex(sharedkey\_salt + self_hostname + nonce + shared_key),
|
617
|
+
# username || '', sha512\_hex(auth\_salt + username + password) || '']
|
618
|
+
shared_key_hexdigest = Digest::SHA512.new.update(@shared_key_salt)
|
619
|
+
.update(@sender.security.self_hostname)
|
620
|
+
.update(@shared_key_nonce)
|
621
|
+
.update(@shared_key)
|
622
|
+
.hexdigest
|
623
|
+
ping = ['PING', @sender.security.self_hostname, @shared_key_salt, shared_key_hexdigest]
|
624
|
+
if !@authentication.empty?
|
625
|
+
password_hexdigest = Digest::SHA512.new.update(@authentication).update(@username).update(@password).hexdigest
|
626
|
+
ping.push(@username, password_hexdigest)
|
627
|
+
else
|
628
|
+
ping.push('','')
|
629
|
+
end
|
630
|
+
ping
|
631
|
+
end
|
632
|
+
|
633
|
+
def check_pong(message)
|
634
|
+
@log.debug "checking pong"
|
635
|
+
# ['PONG', bool(authentication result), 'reason if authentication failed',
|
636
|
+
# self_hostname, sha512\_hex(salt + self_hostname + nonce + sharedkey)]
|
637
|
+
unless message.size == 5 && message[0] == 'PONG'
|
638
|
+
return false, 'invalid format for PONG message'
|
639
|
+
end
|
640
|
+
_pong, auth_result, reason, hostname, shared_key_hexdigest = message
|
641
|
+
|
642
|
+
unless auth_result
|
643
|
+
return false, 'authentication failed: ' + reason
|
644
|
+
end
|
645
|
+
|
646
|
+
if hostname == @sender.security.self_hostname
|
647
|
+
return false, 'same hostname between input and output: invalid configuration'
|
648
|
+
end
|
649
|
+
|
650
|
+
clientside = Digest::SHA512.new.update(@shared_key_salt).update(hostname).update(@shared_key_nonce).update(@shared_key).hexdigest
|
651
|
+
unless shared_key_hexdigest == clientside
|
652
|
+
return false, 'shared key mismatch'
|
653
|
+
end
|
654
|
+
|
655
|
+
return true, nil
|
656
|
+
end
|
657
|
+
|
658
|
+
def on_read(sock, data)
|
659
|
+
@log.trace __callee__
|
660
|
+
|
661
|
+
case @state
|
662
|
+
when :helo
|
663
|
+
unless check_helo(data)
|
664
|
+
@log.warn "received invalid helo message from #{@name}"
|
665
|
+
disable! # shutdown
|
666
|
+
return
|
667
|
+
end
|
668
|
+
sock.write(generate_ping.to_msgpack)
|
669
|
+
@state = :pingpong
|
670
|
+
when :pingpong
|
671
|
+
succeeded, reason = check_pong(data)
|
672
|
+
unless succeeded
|
673
|
+
@log.warn "connection refused to #{@name}: #{reason}"
|
674
|
+
disable! # shutdown
|
675
|
+
return
|
676
|
+
end
|
677
|
+
@state = :established
|
678
|
+
@log.debug "connection established", host: @host, port: @port
|
679
|
+
else
|
680
|
+
raise "BUG: unknown session state: #{@state}"
|
681
|
+
end
|
682
|
+
end
|
538
683
|
end
|
539
684
|
|
540
685
|
# Override Node to disable heartbeat
|
@@ -614,33 +759,5 @@ module Fluent
|
|
614
759
|
@last = 0
|
615
760
|
end
|
616
761
|
end
|
617
|
-
|
618
|
-
## TODO
|
619
|
-
#class RPC
|
620
|
-
# def initialize(this)
|
621
|
-
# @this = this
|
622
|
-
# end
|
623
|
-
#
|
624
|
-
# def list_nodes
|
625
|
-
# @this.nodes
|
626
|
-
# end
|
627
|
-
#
|
628
|
-
# def list_fault_nodes
|
629
|
-
# list_nodes.select {|n| !n.available? }
|
630
|
-
# end
|
631
|
-
#
|
632
|
-
# def list_available_nodes
|
633
|
-
# list_nodes.select {|n| n.available? }
|
634
|
-
# end
|
635
|
-
#
|
636
|
-
# def add_node(name, host, port, weight)
|
637
|
-
# end
|
638
|
-
#
|
639
|
-
# def recover_node(host, port)
|
640
|
-
# end
|
641
|
-
#
|
642
|
-
# def remove_node(host, port)
|
643
|
-
# end
|
644
|
-
#end
|
645
762
|
end
|
646
763
|
end
|