fluentd 0.14.4 → 0.14.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of fluentd might be problematic. Click here for more details.

Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +18 -0
  3. data/example/in_forward.conf +3 -0
  4. data/example/in_forward_client.conf +37 -0
  5. data/example/in_forward_shared_key.conf +15 -0
  6. data/example/in_forward_users.conf +24 -0
  7. data/example/out_forward.conf +13 -13
  8. data/example/out_forward_client.conf +109 -0
  9. data/example/out_forward_shared_key.conf +36 -0
  10. data/example/out_forward_users.conf +65 -0
  11. data/example/{out_buffered_null.conf → out_null.conf} +10 -6
  12. data/example/secondary_file.conf +41 -0
  13. data/lib/fluent/agent.rb +3 -1
  14. data/lib/fluent/plugin/buffer.rb +5 -1
  15. data/lib/fluent/plugin/in_forward.rb +300 -50
  16. data/lib/fluent/plugin/in_tail.rb +41 -85
  17. data/lib/fluent/plugin/multi_output.rb +4 -0
  18. data/lib/fluent/plugin/out_forward.rb +326 -209
  19. data/lib/fluent/plugin/out_null.rb +37 -0
  20. data/lib/fluent/plugin/out_secondary_file.rb +128 -0
  21. data/lib/fluent/plugin/out_stdout.rb +38 -2
  22. data/lib/fluent/plugin/output.rb +13 -5
  23. data/lib/fluent/root_agent.rb +1 -1
  24. data/lib/fluent/test/startup_shutdown.rb +33 -0
  25. data/lib/fluent/version.rb +1 -1
  26. data/test/plugin/test_in_forward.rb +906 -441
  27. data/test/plugin/test_in_monitor_agent.rb +4 -0
  28. data/test/plugin/test_in_tail.rb +681 -663
  29. data/test/plugin/test_out_forward.rb +150 -208
  30. data/test/plugin/test_out_null.rb +85 -9
  31. data/test/plugin/test_out_secondary_file.rb +432 -0
  32. data/test/plugin/test_out_stdout.rb +143 -45
  33. data/test/test_root_agent.rb +42 -0
  34. metadata +14 -9
  35. data/lib/fluent/plugin/out_buffered_null.rb +0 -59
  36. data/lib/fluent/plugin/out_buffered_stdout.rb +0 -70
  37. data/test/plugin/test_out_buffered_null.rb +0 -79
  38. data/test/plugin/test_out_buffered_stdout.rb +0 -122
@@ -53,6 +53,10 @@ module Fluent
53
53
  # @rollback_count = 0
54
54
  end
55
55
 
56
+ def multi_output?
57
+ true
58
+ end
59
+
56
60
  def configure(conf)
57
61
  super
58
62
 
@@ -53,18 +53,7 @@ module Fluent
53
53
  desc 'The timeout time when sending event logs.'
54
54
  config_param :send_timeout, :time, default: 60
55
55
  desc 'The transport protocol to use for heartbeats.(udp,tcp,none)'
56
- config_param :heartbeat_type, default: :tcp do |val|
57
- case val.downcase
58
- when 'tcp'
59
- :tcp
60
- when 'udp'
61
- :udp
62
- when 'none'
63
- :none
64
- else
65
- raise ConfigError, "forward output heartbeat type should be 'tcp', 'udp', or 'none'"
66
- end
67
- end
56
+ config_param :heartbeat_type, :enum, list: [:tcp, :udp, :none], default: :tcp
68
57
  desc 'The interval of the heartbeat packer.'
69
58
  config_param :heartbeat_interval, :time, default: 1
70
59
  desc 'The wait time before accepting a server fault recovery.'
@@ -81,21 +70,54 @@ module Fluent
81
70
  desc 'Change the protocol to at-least-once.'
82
71
  config_param :require_ack_response, :bool, default: false # require in_forward to respond with ack
83
72
  desc 'This option is used when require_ack_response is true.'
84
- config_param :ack_response_timeout, :time, default: 190 # 0 means do not wait for ack responses
73
+ config_param :ack_response_timeout, :time, default: 190
74
+ desc 'Reading data size from server'
75
+ config_param :read_length, :size, default: 512 # 512bytes
76
+ desc 'The interval while reading data from server'
77
+ config_param :read_interval_msec, :integer, default: 50 # 50ms
85
78
  # Linux default tcp_syn_retries is 5 (in many environment)
86
79
  # 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec)
87
80
  desc 'Enable client-side DNS round robin.'
88
81
  config_param :dns_round_robin, :bool, default: false # heartbeat_type 'udp' is not available for this
89
82
 
83
+ config_section :security, required: false, multi: false do
84
+ desc 'The hostname'
85
+ config_param :self_hostname, :string
86
+ desc 'Shared key for authentication'
87
+ config_param :shared_key, :string, secret: true
88
+ end
89
+
90
+ config_section :server, param_name: :servers do
91
+ desc "The IP address or host name of the server."
92
+ config_param :host, :string
93
+ desc "The name of the server. Used in log messages."
94
+ config_param :name, :string, default: nil
95
+ desc "The port number of the host."
96
+ config_param :port, :integer, default: LISTEN_PORT
97
+ desc "The shared key per server."
98
+ config_param :shared_key, :string, default: nil, secret: true
99
+ desc "The username for authentication."
100
+ config_param :username, :string, default: ''
101
+ desc "The password for authentication."
102
+ config_param :password, :string, default: '', secret: true
103
+ desc "Marks a node as the standby node for an Active-Standby model between Fluentd nodes."
104
+ config_param :standby, :bool, default: false
105
+ desc "The load balancing weight."
106
+ config_param :weight, :integer, default: 60
107
+ end
108
+
90
109
  attr_reader :nodes
91
110
 
92
111
  config_param :port, :integer, default: LISTEN_PORT, obsoleted: "User <server> section instead."
93
112
  config_param :host, :string, default: nil, obsoleted: "Use <server> section instead."
94
113
 
114
+ attr_reader :read_interval, :recover_sample_size
115
+
95
116
  def configure(conf)
96
117
  super
97
118
 
98
- recover_sample_size = @recover_wait / @heartbeat_interval
119
+ @read_interval = @read_interval_msec / 1000.0
120
+ @recover_sample_size = @recover_wait / @heartbeat_interval
99
121
 
100
122
  if @dns_round_robin
101
123
  if @heartbeat_type == :udp
@@ -103,39 +125,23 @@ module Fluent
103
125
  end
104
126
  end
105
127
 
106
- conf.elements.each {|e|
107
- next if e.name != "server"
108
-
109
- host = e['host']
110
- port = e['port']
111
- port = port ? port.to_i : LISTEN_PORT
112
-
113
- weight = e['weight']
114
- weight = weight ? weight.to_i : 60
115
-
116
- standby = !!e['standby']
117
-
118
- name = e['name']
119
- unless name
120
- name = "#{host}:#{port}"
121
- end
122
-
128
+ @servers.each do |server|
123
129
  failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
130
+ name = server.name || "#{server.host}:#{server.port}"
124
131
 
125
- node_conf = NodeConfig.new(name, host, port, weight, standby, failure,
126
- @phi_threshold, recover_sample_size, @expire_dns_cache, @phi_failure_detector, @dns_round_robin)
127
-
132
+ log.info "adding forwarding server '#{name}'", host: server.host, port: server.port, weight: server.weight, plugin_id: plugin_id
128
133
  if @heartbeat_type == :none
129
- @nodes << NoneHeartbeatNode.new(log, node_conf)
134
+ @nodes << NoneHeartbeatNode.new(self, server, failure: failure)
130
135
  else
131
- @nodes << Node.new(log, node_conf)
136
+ @nodes << Node.new(self, server, failure: failure)
132
137
  end
133
- log.info "adding forwarding server '#{name}'", host: host, port: port, weight: weight, plugin_id: plugin_id
134
- }
138
+ end
135
139
 
136
140
  if @nodes.empty?
137
141
  raise ConfigError, "forward output plugin requires at least one <server> is required"
138
142
  end
143
+
144
+ raise Fluent::ConfigError, "ack_response_timeout must be a positive integer" if @ack_response_timeout < 1
139
145
  end
140
146
 
141
147
  def start
@@ -144,6 +150,7 @@ module Fluent
144
150
  @rand_seed = Random.new.seed
145
151
  rebuild_weight_array
146
152
  @rr = 0
153
+ @usock = nil
147
154
 
148
155
  unless @heartbeat_type == :none
149
156
  @loop = Coolio::Loop.new
@@ -194,7 +201,7 @@ module Fluent
194
201
 
195
202
  if node.available?
196
203
  begin
197
- send_data(node, tag, chunk)
204
+ node.send_data(tag, chunk)
198
205
  return
199
206
  rescue
200
207
  # for load balancing during detecting crashed servers
@@ -210,6 +217,12 @@ module Fluent
210
217
  end
211
218
  end
212
219
 
220
+ # MessagePack FixArray length is 3
221
+ FORWARD_HEADER = [0x93].pack('C').freeze
222
+ def forward_header
223
+ FORWARD_HEADER
224
+ end
225
+
213
226
  private
214
227
 
215
228
  def rebuild_weight_array
@@ -254,111 +267,6 @@ module Fluent
254
267
  @weight_array = weight_array
255
268
  end
256
269
 
257
- # MessagePack FixArray length is 3
258
- FORWARD_HEADER = [0x93].pack('C').freeze
259
- def forward_header
260
- FORWARD_HEADER
261
- end
262
-
263
- #FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
264
- def send_heartbeat_tcp(node)
265
- sock = connect(node)
266
- begin
267
- opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
268
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
269
- opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
270
- # don't send any data to not cause a compatibility problem
271
- #sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
272
- #sock.write FORWARD_TCP_HEARTBEAT_DATA
273
- node.heartbeat(true)
274
- ensure
275
- sock.close_write
276
- sock.close
277
- end
278
- end
279
-
280
- def send_data(node, tag, chunk)
281
- sock = connect(node)
282
- begin
283
- opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
284
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
285
-
286
- opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
287
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
288
-
289
- # beginArray(3)
290
- sock.write forward_header
291
-
292
- # writeRaw(tag)
293
- sock.write tag.to_msgpack # tag
294
-
295
- # beginRaw(size)
296
- sz = chunk.size
297
- #if sz < 32
298
- # # FixRaw
299
- # sock.write [0xa0 | sz].pack('C')
300
- #elsif sz < 65536
301
- # # raw 16
302
- # sock.write [0xda, sz].pack('Cn')
303
- #else
304
- # raw 32
305
- sock.write [0xdb, sz].pack('CN')
306
- #end
307
-
308
- # writeRawBody(packed_es)
309
- chunk.write_to(sock)
310
-
311
- option = { 'size' => chunk.size_of_events }
312
- option['chunk'] = Base64.encode64(chunk.unique_id) if @require_ack_response
313
- sock.write option.to_msgpack
314
-
315
- if @require_ack_response && @ack_response_timeout > 0
316
- # Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
317
- # To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses
318
- # and process them asynchronously.
319
- if IO.select([sock], nil, nil, @ack_response_timeout)
320
- raw_data = sock.recv(1024)
321
-
322
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
323
- # If this happens we assume the data wasn't delivered and retry it.
324
- if raw_data.empty?
325
- @log.warn "node #{node.host}:#{node.port} closed the connection. regard it as unavailable."
326
- node.disable!
327
- raise ForwardOutputConnectionClosedError, "node #{node.host}:#{node.port} closed connection"
328
- else
329
- # Serialization type of the response is same as sent data.
330
- res = MessagePack.unpack(raw_data)
331
-
332
- if res['ack'] != option['chunk']
333
- # Some errors may have occured when ack and chunk id is different, so send the chunk again.
334
- raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different"
335
- end
336
- end
337
-
338
- else
339
- # IO.select returns nil on timeout.
340
- # There are 2 types of cases when no response has been received:
341
- # (1) the node does not support sending responses
342
- # (2) the node does support sending response but responses have not arrived for some reasons.
343
- @log.warn "no response from #{node.host}:#{node.port}. regard it as unavailable."
344
- node.disable!
345
- raise ForwardOutputACKTimeoutError, "node #{node.host}:#{node.port} does not return ACK"
346
- end
347
- end
348
-
349
- node.heartbeat(false)
350
- res # for test
351
- ensure
352
- sock.close_write
353
- sock.close
354
- end
355
- end
356
-
357
- def connect(node)
358
- # TODO unix socket?
359
- TCPSocket.new(node.resolved_host, node.port)
360
- end
361
-
362
270
  class HeartbeatRequestTimer < Coolio::TimerWatcher
363
271
  def initialize(interval, callback)
364
272
  super(interval, true)
@@ -379,15 +287,11 @@ module Fluent
379
287
  rebuild_weight_array
380
288
  end
381
289
  begin
382
- #log.trace "sending heartbeat #{n.host}:#{n.port} on #{@heartbeat_type}"
383
- if @heartbeat_type == :tcp
384
- send_heartbeat_tcp(n)
385
- else
386
- @usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
387
- end
290
+ log.trace "sending heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type
291
+ n.usock = @usock if @usock
292
+ n.send_heartbeat
388
293
  rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED
389
- # TODO log
390
- log.debug "failed to send heartbeat packet to #{n.host}:#{n.port}", error: $!.to_s
294
+ log.debug "failed to send heartbeat packet", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: $!
391
295
  end
392
296
  }
393
297
  end
@@ -423,27 +327,38 @@ module Fluent
423
327
  end
424
328
  end
425
329
 
426
- NodeConfig = Struct.new("NodeConfig", :name, :host, :port, :weight, :standby, :failure,
427
- :phi_threshold, :recover_sample_size, :expire_dns_cache, :phi_failure_detector, :dns_round_robin)
428
-
429
330
  class Node
430
- def initialize(log, conf)
431
- @log = log
432
- @conf = conf
433
- @name = @conf.name
434
- @host = @conf.host
435
- @port = @conf.port
436
- @weight = @conf.weight
437
- @failure = @conf.failure
331
+ def initialize(sender, server, failure:)
332
+ @sender = sender
333
+ @log = sender.log
334
+
335
+ @name = server.name
336
+ @host = server.host
337
+ @port = server.port
338
+ @weight = server.weight
339
+ @standby = server.standby
340
+ @failure = failure
438
341
  @available = true
342
+ @state = nil
343
+
344
+ @usock = nil
345
+
346
+ @username = server.username
347
+ @password = server.password
348
+ @shared_key = server.shared_key || (sender.security && sender.security.shared_key) || ""
349
+ @shared_key_salt = generate_salt
350
+ @shared_key_nonce = ""
351
+
352
+ @unpacker = Fluent::Engine.msgpack_unpacker
439
353
 
440
354
  @resolved_host = nil
441
355
  @resolved_time = 0
442
356
  resolved_host # check dns
443
357
  end
444
358
 
445
- attr_reader :conf
446
- attr_reader :name, :host, :port, :weight
359
+ attr_accessor :usock
360
+
361
+ attr_reader :name, :host, :port, :weight, :standby, :state
447
362
  attr_reader :sockaddr # used by on_heartbeat
448
363
  attr_reader :failure, :available # for test
449
364
 
@@ -456,33 +371,177 @@ module Fluent
456
371
  end
457
372
 
458
373
  def standby?
459
- @conf.standby
374
+ @standby
375
+ end
376
+
377
+ def connect
378
+ TCPSocket.new(resolved_host, port)
379
+ end
380
+
381
+ def set_socket_options(sock)
382
+ opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
383
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
384
+
385
+ opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
386
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
387
+
388
+ sock
389
+ end
390
+
391
+ def establish_connection(sock)
392
+ while available? && @state != :established
393
+ begin
394
+ # TODO: On Ruby 2.2 or earlier, read_nonblock doesn't work expectedly.
395
+ # We need rewrite around here using new socket/server plugin helper.
396
+ buf = sock.read_nonblock(@sender.read_length)
397
+ if buf.empty?
398
+ sleep @sender.read_interval
399
+ next
400
+ end
401
+ @unpacker.feed_each(buf) do |data|
402
+ on_read(sock, data)
403
+ end
404
+ rescue IO::WaitReadable
405
+ # If the exception is Errno::EWOULDBLOCK or Errno::EAGAIN, it is extended by IO::WaitReadable.
406
+ # So IO::WaitReadable can be used to rescue the exceptions for retrying read_nonblock.
407
+ # http://docs.ruby-lang.org/en/2.3.0/IO.html#method-i-read_nonblock
408
+ sleep @sender.read_interval unless @state == :established
409
+ rescue SystemCallError => e
410
+ @log.warn "disconnected by error", host: @host, port: @port, error: e
411
+ disable!
412
+ break
413
+ rescue EOFError
414
+ @log.warn "disconnected", host: @host, port: @port
415
+ disable!
416
+ break
417
+ end
418
+ end
419
+ end
420
+
421
+ def send_data(tag, chunk)
422
+ sock = connect
423
+ @state = @sender.security ? :helo : :established
424
+ begin
425
+ set_socket_options(sock)
426
+
427
+ if @state != :established
428
+ establish_connection(sock)
429
+ end
430
+
431
+ unless available?
432
+ raise ForwardOutputConnectionClosedError, "failed to establish connection with node #{@name}"
433
+ end
434
+
435
+ option = { 'size' => chunk.size_of_events }
436
+ option['chunk'] = Base64.encode64(chunk.unique_id) if @sender.require_ack_response
437
+
438
+ # out_forward always uses Raw32 type for content.
439
+ # Raw16 can store only 64kbytes, and it should be much smaller than buffer chunk size.
440
+
441
+ sock.write @sender.forward_header # beginArray(3)
442
+ sock.write tag.to_msgpack # 1. writeRaw(tag)
443
+ sock.write [0xdb, chunk.size].pack('CN') # 2. beginRaw(size) raw32
444
+ chunk.write_to(sock) # writeRawBody(packed_es)
445
+ sock.write option.to_msgpack # 3. writeOption(option)
446
+
447
+ if @sender.require_ack_response
448
+ # Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
449
+ # To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses
450
+ # and process them asynchronously.
451
+ if IO.select([sock], nil, nil, @sender.ack_response_timeout)
452
+ raw_data = begin
453
+ sock.recv(1024)
454
+ rescue Errno::ECONNRESET
455
+ ""
456
+ end
457
+
458
+ # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
459
+ # If this happens we assume the data wasn't delivered and retry it.
460
+ if raw_data.empty?
461
+ @log.warn "node closed the connection. regard it as unavailable.", host: @host, port: @port
462
+ disable!
463
+ raise ForwardOutputConnectionClosedError, "node #{@host}:#{@port} closed connection"
464
+ else
465
+ @unpacker.feed(raw_data)
466
+ res = @unpacker.read
467
+ if res['ack'] != option['chunk']
468
+ # Some errors may have occured when ack and chunk id is different, so send the chunk again.
469
+ raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different"
470
+ end
471
+ end
472
+
473
+ else
474
+ # IO.select returns nil on timeout.
475
+ # There are 2 types of cases when no response has been received:
476
+ # (1) the node does not support sending responses
477
+ # (2) the node does support sending response but responses have not arrived for some reasons.
478
+ @log.warn "no response from node. regard it as unavailable.", host: @host, port: @port
479
+ disable!
480
+ raise ForwardOutputACKTimeoutError, "node #{host}:#{port} does not return ACK"
481
+ end
482
+ end
483
+
484
+ heartbeat(false)
485
+ res # for test
486
+ ensure
487
+ sock.close_write
488
+ sock.close
489
+ end
490
+ end
491
+
492
+ # FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
493
+ def send_heartbeat
494
+ case @sender.heartbeat_type
495
+ when :tcp
496
+ sock = connect
497
+ begin
498
+ opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
499
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
500
+ # opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
501
+ # sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
502
+
503
+ ## don't send any data to not cause a compatibility problem
504
+ # sock.write FORWARD_TCP_HEARTBEAT_DATA
505
+
506
+ # successful tcp connection establishment is considered as valid heartbeat
507
+ heartbeat(true)
508
+ ensure
509
+ sock.close_write
510
+ sock.close
511
+ end
512
+ when :udp
513
+ @usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
514
+ when :none # :none doesn't use this class
515
+ raise "BUG: heartbeat_type none must not use Node"
516
+ else
517
+ raise "BUG: unknown heartbeat_type '#{@sender.heartbeat_type}'"
518
+ end
460
519
  end
461
520
 
462
521
  def resolved_host
463
- case @conf.expire_dns_cache
522
+ case @sender.expire_dns_cache
464
523
  when 0
465
524
  # cache is disabled
466
- return resolve_dns!
525
+ resolve_dns!
467
526
 
468
527
  when nil
469
528
  # persistent cache
470
- return @resolved_host ||= resolve_dns!
529
+ @resolved_host ||= resolve_dns!
471
530
 
472
531
  else
473
532
  now = Engine.now
474
533
  rh = @resolved_host
475
- if !rh || now - @resolved_time >= @conf.expire_dns_cache
534
+ if !rh || now - @resolved_time >= @sender.expire_dns_cache
476
535
  rh = @resolved_host = resolve_dns!
477
536
  @resolved_time = now
478
537
  end
479
- return rh
538
+ rh
480
539
  end
481
540
  end
482
541
 
483
542
  def resolve_dns!
484
543
  addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM)
485
- addrinfo = @conf.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
544
+ addrinfo = @sender.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
486
545
  @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_heartbeat
487
546
  addrinfo[3]
488
547
  end
@@ -505,36 +564,122 @@ module Fluent
505
564
  return true
506
565
  end
507
566
 
508
- if @conf.phi_failure_detector
567
+ if @sender.phi_failure_detector
509
568
  phi = @failure.phi(now)
510
- #$log.trace "phi '#{@name}'", :host=>@host, :port=>@port, :phi=>phi
511
- if phi > @conf.phi_threshold
512
- @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi
569
+ if phi > @sender.phi_threshold
570
+ @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi, phi_threshold: @sender.phi_threshold
513
571
  @available = false
514
572
  @resolved_host = nil # expire cached host
515
573
  @failure.clear
516
574
  return true
517
575
  end
518
576
  end
519
- return false
577
+ false
520
578
  end
521
579
 
522
580
  def heartbeat(detect=true)
523
581
  now = Time.now.to_f
524
582
  @failure.add(now)
525
- #@log.trace "heartbeat from '#{@name}'", :host=>@host, :port=>@port, :available=>@available, :sample_size=>@failure.sample_size
526
- if detect && !@available && @failure.sample_size > @conf.recover_sample_size
583
+ if detect && !@available && @failure.sample_size > @sender.recover_sample_size
527
584
  @available = true
528
585
  @log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port
529
- return true
586
+ true
530
587
  else
531
- return nil
588
+ nil
532
589
  end
533
590
  end
534
591
 
592
+ # TODO: #to_msgpack(string) is deprecated
535
593
  def to_msgpack(out = '')
536
594
  [@host, @port, @weight, @available].to_msgpack(out)
537
595
  end
596
+
597
+ def generate_salt
598
+ SecureRandom.hex(16)
599
+ end
600
+
601
+ def check_helo(message)
602
+ @log.debug "checking helo"
603
+ # ['HELO', options(hash)]
604
+ unless message.size == 2 && message[0] == 'HELO'
605
+ return false
606
+ end
607
+ opts = message[1] || {}
608
+ # make shared_key_check failed (instead of error) if protocol version mismatch exist
609
+ @shared_key_nonce = opts['nonce'] || ''
610
+ @authentication = opts['auth'] || ''
611
+ true
612
+ end
613
+
614
+ def generate_ping
615
+ @log.debug "generating ping"
616
+ # ['PING', self_hostname, sharedkey\_salt, sha512\_hex(sharedkey\_salt + self_hostname + nonce + shared_key),
617
+ # username || '', sha512\_hex(auth\_salt + username + password) || '']
618
+ shared_key_hexdigest = Digest::SHA512.new.update(@shared_key_salt)
619
+ .update(@sender.security.self_hostname)
620
+ .update(@shared_key_nonce)
621
+ .update(@shared_key)
622
+ .hexdigest
623
+ ping = ['PING', @sender.security.self_hostname, @shared_key_salt, shared_key_hexdigest]
624
+ if !@authentication.empty?
625
+ password_hexdigest = Digest::SHA512.new.update(@authentication).update(@username).update(@password).hexdigest
626
+ ping.push(@username, password_hexdigest)
627
+ else
628
+ ping.push('','')
629
+ end
630
+ ping
631
+ end
632
+
633
+ def check_pong(message)
634
+ @log.debug "checking pong"
635
+ # ['PONG', bool(authentication result), 'reason if authentication failed',
636
+ # self_hostname, sha512\_hex(salt + self_hostname + nonce + sharedkey)]
637
+ unless message.size == 5 && message[0] == 'PONG'
638
+ return false, 'invalid format for PONG message'
639
+ end
640
+ _pong, auth_result, reason, hostname, shared_key_hexdigest = message
641
+
642
+ unless auth_result
643
+ return false, 'authentication failed: ' + reason
644
+ end
645
+
646
+ if hostname == @sender.security.self_hostname
647
+ return false, 'same hostname between input and output: invalid configuration'
648
+ end
649
+
650
+ clientside = Digest::SHA512.new.update(@shared_key_salt).update(hostname).update(@shared_key_nonce).update(@shared_key).hexdigest
651
+ unless shared_key_hexdigest == clientside
652
+ return false, 'shared key mismatch'
653
+ end
654
+
655
+ return true, nil
656
+ end
657
+
658
+ def on_read(sock, data)
659
+ @log.trace __callee__
660
+
661
+ case @state
662
+ when :helo
663
+ unless check_helo(data)
664
+ @log.warn "received invalid helo message from #{@name}"
665
+ disable! # shutdown
666
+ return
667
+ end
668
+ sock.write(generate_ping.to_msgpack)
669
+ @state = :pingpong
670
+ when :pingpong
671
+ succeeded, reason = check_pong(data)
672
+ unless succeeded
673
+ @log.warn "connection refused to #{@name}: #{reason}"
674
+ disable! # shutdown
675
+ return
676
+ end
677
+ @state = :established
678
+ @log.debug "connection established", host: @host, port: @port
679
+ else
680
+ raise "BUG: unknown session state: #{@state}"
681
+ end
682
+ end
538
683
  end
539
684
 
540
685
  # Override Node to disable heartbeat
@@ -614,33 +759,5 @@ module Fluent
614
759
  @last = 0
615
760
  end
616
761
  end
617
-
618
- ## TODO
619
- #class RPC
620
- # def initialize(this)
621
- # @this = this
622
- # end
623
- #
624
- # def list_nodes
625
- # @this.nodes
626
- # end
627
- #
628
- # def list_fault_nodes
629
- # list_nodes.select {|n| !n.available? }
630
- # end
631
- #
632
- # def list_available_nodes
633
- # list_nodes.select {|n| n.available? }
634
- # end
635
- #
636
- # def add_node(name, host, port, weight)
637
- # end
638
- #
639
- # def recover_node(host, port)
640
- # end
641
- #
642
- # def remove_node(host, port)
643
- # end
644
- #end
645
762
  end
646
763
  end