fluentd 0.14.4-x64-mingw32 → 0.14.5-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of fluentd might be problematic. Click here for more details.

Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +18 -0
  3. data/example/in_forward.conf +3 -0
  4. data/example/in_forward_client.conf +37 -0
  5. data/example/in_forward_shared_key.conf +15 -0
  6. data/example/in_forward_users.conf +24 -0
  7. data/example/out_forward.conf +13 -13
  8. data/example/out_forward_client.conf +109 -0
  9. data/example/out_forward_shared_key.conf +36 -0
  10. data/example/out_forward_users.conf +65 -0
  11. data/example/{out_buffered_null.conf → out_null.conf} +10 -6
  12. data/example/secondary_file.conf +41 -0
  13. data/lib/fluent/agent.rb +3 -1
  14. data/lib/fluent/plugin/buffer.rb +5 -1
  15. data/lib/fluent/plugin/in_forward.rb +300 -50
  16. data/lib/fluent/plugin/in_tail.rb +41 -85
  17. data/lib/fluent/plugin/multi_output.rb +4 -0
  18. data/lib/fluent/plugin/out_forward.rb +326 -209
  19. data/lib/fluent/plugin/out_null.rb +37 -0
  20. data/lib/fluent/plugin/out_secondary_file.rb +128 -0
  21. data/lib/fluent/plugin/out_stdout.rb +38 -2
  22. data/lib/fluent/plugin/output.rb +13 -5
  23. data/lib/fluent/root_agent.rb +1 -1
  24. data/lib/fluent/test/startup_shutdown.rb +33 -0
  25. data/lib/fluent/version.rb +1 -1
  26. data/test/plugin/test_in_forward.rb +906 -441
  27. data/test/plugin/test_in_monitor_agent.rb +4 -0
  28. data/test/plugin/test_in_tail.rb +681 -663
  29. data/test/plugin/test_out_forward.rb +150 -208
  30. data/test/plugin/test_out_null.rb +85 -9
  31. data/test/plugin/test_out_secondary_file.rb +432 -0
  32. data/test/plugin/test_out_stdout.rb +143 -45
  33. data/test/test_root_agent.rb +42 -0
  34. metadata +14 -9
  35. data/lib/fluent/plugin/out_buffered_null.rb +0 -59
  36. data/lib/fluent/plugin/out_buffered_stdout.rb +0 -70
  37. data/test/plugin/test_out_buffered_null.rb +0 -79
  38. data/test/plugin/test_out_buffered_stdout.rb +0 -122
@@ -16,10 +16,9 @@
16
16
 
17
17
  require 'cool.io'
18
18
 
19
- require 'fluent/input'
19
+ require 'fluent/plugin/input'
20
20
  require 'fluent/config/error'
21
21
  require 'fluent/event'
22
- require 'fluent/system_config'
23
22
  require 'fluent/plugin/buffer'
24
23
 
25
24
  if Fluent.windows?
@@ -28,11 +27,11 @@ else
28
27
  Fluent::FileWrapper = File
29
28
  end
30
29
 
31
- module Fluent
32
- class TailInput < Input
33
- include SystemConfig::Mixin
30
+ module Fluent::Plugin
31
+ class TailInput < Fluent::Plugin::Input
32
+ Fluent::Plugin.register_input('tail', self)
34
33
 
35
- Plugin.register_input('tail', self)
34
+ helpers :timer, :event_loop, :parser, :compat_parameters
36
35
 
37
36
  FILE_PERMISSION = 0644
38
37
 
@@ -77,11 +76,24 @@ module Fluent
77
76
  attr_reader :paths
78
77
 
79
78
  def configure(conf)
79
+ compat_parameters_convert(conf, :parser)
80
+ parser_config = conf.elements('parse').first
81
+ unless parser_config
82
+ raise Fluent::ConfigError, "<parse> section is required."
83
+ end
84
+ unless parser_config["@type"]
85
+ raise Fluent::ConfigError, "parse/@type is required."
86
+ end
87
+
88
+ (1..Fluent::Plugin::MultilineParser::FORMAT_MAX_NUM).each do |n|
89
+ parser_config["format#{n}"] = conf["format#{n}"] if conf["format#{n}"]
90
+ end
91
+
80
92
  super
81
93
 
82
94
  @paths = @path.split(',').map {|path| path.strip }
83
95
  if @paths.empty?
84
- raise ConfigError, "tail: 'path' parameter is required on tail input"
96
+ raise Fluent::ConfigError, "tail: 'path' parameter is required on tail input"
85
97
  end
86
98
 
87
99
  unless @pos_file
@@ -89,22 +101,17 @@ module Fluent
89
101
  $log.warn "this parameter is highly recommended to save the position to resume tailing."
90
102
  end
91
103
 
92
- configure_parser(conf)
93
104
  configure_tag
94
105
  configure_encoding
95
106
 
96
- @multiline_mode = conf['format'] =~ /multiline/
107
+ @multiline_mode = parser_config["@type"] =~ /multiline/
97
108
  @receive_handler = if @multiline_mode
98
109
  method(:parse_multilines)
99
110
  else
100
111
  method(:parse_singleline)
101
112
  end
102
113
  @file_perm = system_config.file_permission || FILE_PERMISSION
103
- end
104
-
105
- def configure_parser(conf)
106
- @parser = Plugin.new_parser(conf['format'])
107
- @parser.configure(conf)
114
+ @parser = parser_create(conf: parser_config)
108
115
  end
109
116
 
110
117
  def configure_tag
@@ -120,7 +127,7 @@ module Fluent
120
127
  def configure_encoding
121
128
  unless @encoding
122
129
  if @from_encoding
123
- raise ConfigError, "tail: 'from_encoding' parameter must be specified with 'encoding' parameter."
130
+ raise Fluent::ConfigError, "tail: 'from_encoding' parameter must be specified with 'encoding' parameter."
124
131
  end
125
132
  end
126
133
 
@@ -132,7 +139,7 @@ module Fluent
132
139
  begin
133
140
  Encoding.find(encoding_name) if encoding_name
134
141
  rescue ArgumentError => e
135
- raise ConfigError, e.message
142
+ raise Fluent::ConfigError, e.message
136
143
  end
137
144
  end
138
145
 
@@ -145,20 +152,12 @@ module Fluent
145
152
  @pf = PositionFile.parse(@pf_file)
146
153
  end
147
154
 
148
- @loop = Coolio::Loop.new
149
155
  refresh_watchers
150
-
151
- @refresh_trigger = TailWatcher::TimerWatcher.new(@refresh_interval, true, log, &method(:refresh_watchers))
152
- @refresh_trigger.attach(@loop)
153
- @thread = Thread.new(&method(:run))
156
+ timer_execute(:in_tail_refresh_watchers, @refresh_interval, &method(:refresh_watchers))
154
157
  end
155
158
 
156
159
  def shutdown
157
- @refresh_trigger.detach if @refresh_trigger && @refresh_trigger.attached?
158
-
159
160
  stop_watchers(@tails.keys, true)
160
- @loop.stop rescue nil # when all watchers are detached, `stop` raises RuntimeError. We can ignore this exception.
161
- @thread.join
162
161
  @pf_file.close if @pf_file
163
162
 
164
163
  super
@@ -206,8 +205,11 @@ module Fluent
206
205
 
207
206
  def setup_watcher(path, pe)
208
207
  line_buffer_timer_flusher = (@multiline_mode && @multiline_flush_interval) ? TailWatcher::LineBufferTimerFlusher.new(log, @multiline_flush_interval, &method(:flush_buffer)) : nil
209
- tw = TailWatcher.new(path, @rotate_wait, pe, log, @read_from_head, @enable_watch_timer, @read_lines_limit, method(:update_watcher), line_buffer_timer_flusher, &method(:receive_lines))
210
- tw.attach(@loop)
208
+ tw = TailWatcher.new(path, @rotate_wait, pe, log, @read_from_head, @enable_watch_timer, @read_lines_limit, method(:update_watcher), line_buffer_timer_flusher, &method(:receive_lines))
209
+ tw.attach do |watcher|
210
+ timer_execute(:in_tail_timer_trigger, 1, &watcher.method(:on_notify)) if watcher.enable_watch_timer
211
+ event_loop_attach(watcher.stat_trigger)
212
+ end
211
213
  tw
212
214
  end
213
215
 
@@ -218,7 +220,7 @@ module Fluent
218
220
  pe = @pf[path]
219
221
  if @read_from_head && pe.read_inode.zero?
220
222
  begin
221
- pe.update(FileWrapper.stat(path).ino, 0)
223
+ pe.update(Fluent::FileWrapper.stat(path).ino, 0)
222
224
  rescue Errno::ENOENT
223
225
  $log.warn "#{path} not found. Continuing without tailing it."
224
226
  end
@@ -263,8 +265,9 @@ module Fluent
263
265
  end
264
266
 
265
267
  def close_watcher_after_rotate_wait(tw)
266
- closer = TailWatcher::Closer.new(@rotate_wait, tw, log, &method(:close_watcher))
267
- closer.attach(@loop)
268
+ timer_execute(:in_tail_close_watcher, @rotate_wait, repeat: false) do
269
+ close_watcher(tw)
270
+ end
268
271
  end
269
272
 
270
273
  def flush_buffer(tw)
@@ -293,13 +296,6 @@ module Fluent
293
296
  end
294
297
  end
295
298
 
296
- def run
297
- @loop.run
298
- rescue
299
- log.error "unexpected error", error: $!.to_s
300
- log.error_backtrace
301
- end
302
-
303
299
  # @return true if no error or unrecoverable error happens in emit action. false if got BufferOverflowError
304
300
  def receive_lines(lines, tail_watcher)
305
301
  es = @receive_handler.call(lines, tail_watcher)
@@ -347,7 +343,7 @@ module Fluent
347
343
  end
348
344
 
349
345
  def parse_singleline(lines, tail_watcher)
350
- es = MultiEventStream.new
346
+ es = Fluent::MultiEventStream.new
351
347
  lines.each { |line|
352
348
  convert_line_to_event(line, es, tail_watcher)
353
349
  }
@@ -356,7 +352,7 @@ module Fluent
356
352
 
357
353
  def parse_multilines(lines, tail_watcher)
358
354
  lb = tail_watcher.line_buffer
359
- es = MultiEventStream.new
355
+ es = Fluent::MultiEventStream.new
360
356
  if @parser.has_firstline?
361
357
  tail_watcher.line_buffer_timer_flusher.reset_timer if tail_watcher.line_buffer_timer_flusher
362
358
  lines.each { |line|
@@ -400,8 +396,6 @@ module Fluent
400
396
  @receive_lines = receive_lines
401
397
  @update_watcher = update_watcher
402
398
 
403
- @timer_trigger = TimerWatcher.new(1, true, log, &method(:on_notify)) if @enable_watch_timer
404
-
405
399
  @stat_trigger = StatWatcher.new(path, log, &method(:on_notify))
406
400
 
407
401
  @rotate_handler = RotateHandler.new(path, log, &method(:on_rotate))
@@ -412,6 +406,8 @@ module Fluent
412
406
  end
413
407
 
414
408
  attr_reader :path
409
+ attr_reader :stat_trigger, :enable_watch_timer
410
+ attr_accessor :timer_trigger
415
411
  attr_accessor :line_buffer, :line_buffer_timer_flusher
416
412
  attr_accessor :unwatched # This is used for removing position entry from PositionFile
417
413
 
@@ -423,14 +419,12 @@ module Fluent
423
419
  @receive_lines.call(lines, self)
424
420
  end
425
421
 
426
- def attach(loop)
427
- @timer_trigger.attach(loop) if @enable_watch_timer
428
- @stat_trigger.attach(loop)
422
+ def attach
423
+ yield self
429
424
  on_notify
430
425
  end
431
426
 
432
427
  def detach
433
- @timer_trigger.detach if @enable_watch_timer && @timer_trigger.attached?
434
428
  @stat_trigger.detach if @stat_trigger.attached?
435
429
  end
436
430
 
@@ -523,22 +517,6 @@ module Fluent
523
517
  pe # This pe will be updated in on_rotate after TailWatcher is initialized
524
518
  end
525
519
 
526
- class TimerWatcher < Coolio::TimerWatcher
527
- def initialize(interval, repeat, log, &callback)
528
- @callback = callback
529
- @log = log
530
- super(interval, repeat)
531
- end
532
-
533
- def on_timer
534
- @callback.call
535
- rescue
536
- # TODO log?
537
- @log.error $!.to_s
538
- @log.error_backtrace
539
- end
540
- end
541
-
542
520
  class StatWatcher < Coolio::StatWatcher
543
521
  def initialize(path, log, &callback)
544
522
  @callback = callback
@@ -555,24 +533,6 @@ module Fluent
555
533
  end
556
534
  end
557
535
 
558
- class Closer < Coolio::TimerWatcher
559
- def initialize(interval, tw, log, &callback)
560
- @callback = callback
561
- @tw = tw
562
- @log = log
563
- super(interval, false)
564
- end
565
-
566
- def on_timer
567
- @callback.call(@tw)
568
- rescue => e
569
- @log.error e.to_s
570
- @log.error_backtrace(e.backtrace)
571
- ensure
572
- detach
573
- end
574
- end
575
-
576
536
  class IOHandler
577
537
  def initialize(io, pe, log, read_lines_limit, first = true, &receive_lines)
578
538
  @log = log
@@ -660,7 +620,7 @@ module Fluent
660
620
 
661
621
  def on_notify
662
622
  begin
663
- stat = FileWrapper.stat(@path)
623
+ stat = Fluent::FileWrapper.stat(@path)
664
624
  inode = stat.ino
665
625
  fsize = stat.size
666
626
  rescue Errno::ENOENT
@@ -673,7 +633,7 @@ module Fluent
673
633
  if @inode != inode || fsize < @fsize
674
634
  # rotated or truncated
675
635
  begin
676
- io = FileWrapper.open(@path)
636
+ io = Fluent::FileWrapper.open(@path)
677
637
  rescue Errno::ENOENT
678
638
  end
679
639
  @on_rotate.call(io)
@@ -688,7 +648,6 @@ module Fluent
688
648
  end
689
649
  end
690
650
 
691
-
692
651
  class LineBufferTimerFlusher
693
652
  def initialize(log, flush_interval, &flush_method)
694
653
  @log = log
@@ -713,7 +672,6 @@ module Fluent
713
672
  end
714
673
  end
715
674
 
716
-
717
675
  class PositionFile
718
676
  UNWATCHED_POSITION = 0xffffffffffffffff
719
677
 
@@ -833,6 +791,4 @@ module Fluent
833
791
  end
834
792
  end
835
793
  end
836
-
837
- NewTailInput = TailInput # for backward compatibility
838
794
  end
@@ -53,6 +53,10 @@ module Fluent
53
53
  # @rollback_count = 0
54
54
  end
55
55
 
56
+ def multi_output?
57
+ true
58
+ end
59
+
56
60
  def configure(conf)
57
61
  super
58
62
 
@@ -53,18 +53,7 @@ module Fluent
53
53
  desc 'The timeout time when sending event logs.'
54
54
  config_param :send_timeout, :time, default: 60
55
55
  desc 'The transport protocol to use for heartbeats.(udp,tcp,none)'
56
- config_param :heartbeat_type, default: :tcp do |val|
57
- case val.downcase
58
- when 'tcp'
59
- :tcp
60
- when 'udp'
61
- :udp
62
- when 'none'
63
- :none
64
- else
65
- raise ConfigError, "forward output heartbeat type should be 'tcp', 'udp', or 'none'"
66
- end
67
- end
56
+ config_param :heartbeat_type, :enum, list: [:tcp, :udp, :none], default: :tcp
68
57
  desc 'The interval of the heartbeat packer.'
69
58
  config_param :heartbeat_interval, :time, default: 1
70
59
  desc 'The wait time before accepting a server fault recovery.'
@@ -81,21 +70,54 @@ module Fluent
81
70
  desc 'Change the protocol to at-least-once.'
82
71
  config_param :require_ack_response, :bool, default: false # require in_forward to respond with ack
83
72
  desc 'This option is used when require_ack_response is true.'
84
- config_param :ack_response_timeout, :time, default: 190 # 0 means do not wait for ack responses
73
+ config_param :ack_response_timeout, :time, default: 190
74
+ desc 'Reading data size from server'
75
+ config_param :read_length, :size, default: 512 # 512bytes
76
+ desc 'The interval while reading data from server'
77
+ config_param :read_interval_msec, :integer, default: 50 # 50ms
85
78
  # Linux default tcp_syn_retries is 5 (in many environment)
86
79
  # 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec)
87
80
  desc 'Enable client-side DNS round robin.'
88
81
  config_param :dns_round_robin, :bool, default: false # heartbeat_type 'udp' is not available for this
89
82
 
83
+ config_section :security, required: false, multi: false do
84
+ desc 'The hostname'
85
+ config_param :self_hostname, :string
86
+ desc 'Shared key for authentication'
87
+ config_param :shared_key, :string, secret: true
88
+ end
89
+
90
+ config_section :server, param_name: :servers do
91
+ desc "The IP address or host name of the server."
92
+ config_param :host, :string
93
+ desc "The name of the server. Used in log messages."
94
+ config_param :name, :string, default: nil
95
+ desc "The port number of the host."
96
+ config_param :port, :integer, default: LISTEN_PORT
97
+ desc "The shared key per server."
98
+ config_param :shared_key, :string, default: nil, secret: true
99
+ desc "The username for authentication."
100
+ config_param :username, :string, default: ''
101
+ desc "The password for authentication."
102
+ config_param :password, :string, default: '', secret: true
103
+ desc "Marks a node as the standby node for an Active-Standby model between Fluentd nodes."
104
+ config_param :standby, :bool, default: false
105
+ desc "The load balancing weight."
106
+ config_param :weight, :integer, default: 60
107
+ end
108
+
90
109
  attr_reader :nodes
91
110
 
92
111
  config_param :port, :integer, default: LISTEN_PORT, obsoleted: "User <server> section instead."
93
112
  config_param :host, :string, default: nil, obsoleted: "Use <server> section instead."
94
113
 
114
+ attr_reader :read_interval, :recover_sample_size
115
+
95
116
  def configure(conf)
96
117
  super
97
118
 
98
- recover_sample_size = @recover_wait / @heartbeat_interval
119
+ @read_interval = @read_interval_msec / 1000.0
120
+ @recover_sample_size = @recover_wait / @heartbeat_interval
99
121
 
100
122
  if @dns_round_robin
101
123
  if @heartbeat_type == :udp
@@ -103,39 +125,23 @@ module Fluent
103
125
  end
104
126
  end
105
127
 
106
- conf.elements.each {|e|
107
- next if e.name != "server"
108
-
109
- host = e['host']
110
- port = e['port']
111
- port = port ? port.to_i : LISTEN_PORT
112
-
113
- weight = e['weight']
114
- weight = weight ? weight.to_i : 60
115
-
116
- standby = !!e['standby']
117
-
118
- name = e['name']
119
- unless name
120
- name = "#{host}:#{port}"
121
- end
122
-
128
+ @servers.each do |server|
123
129
  failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
130
+ name = server.name || "#{server.host}:#{server.port}"
124
131
 
125
- node_conf = NodeConfig.new(name, host, port, weight, standby, failure,
126
- @phi_threshold, recover_sample_size, @expire_dns_cache, @phi_failure_detector, @dns_round_robin)
127
-
132
+ log.info "adding forwarding server '#{name}'", host: server.host, port: server.port, weight: server.weight, plugin_id: plugin_id
128
133
  if @heartbeat_type == :none
129
- @nodes << NoneHeartbeatNode.new(log, node_conf)
134
+ @nodes << NoneHeartbeatNode.new(self, server, failure: failure)
130
135
  else
131
- @nodes << Node.new(log, node_conf)
136
+ @nodes << Node.new(self, server, failure: failure)
132
137
  end
133
- log.info "adding forwarding server '#{name}'", host: host, port: port, weight: weight, plugin_id: plugin_id
134
- }
138
+ end
135
139
 
136
140
  if @nodes.empty?
137
141
  raise ConfigError, "forward output plugin requires at least one <server> is required"
138
142
  end
143
+
144
+ raise Fluent::ConfigError, "ack_response_timeout must be a positive integer" if @ack_response_timeout < 1
139
145
  end
140
146
 
141
147
  def start
@@ -144,6 +150,7 @@ module Fluent
144
150
  @rand_seed = Random.new.seed
145
151
  rebuild_weight_array
146
152
  @rr = 0
153
+ @usock = nil
147
154
 
148
155
  unless @heartbeat_type == :none
149
156
  @loop = Coolio::Loop.new
@@ -194,7 +201,7 @@ module Fluent
194
201
 
195
202
  if node.available?
196
203
  begin
197
- send_data(node, tag, chunk)
204
+ node.send_data(tag, chunk)
198
205
  return
199
206
  rescue
200
207
  # for load balancing during detecting crashed servers
@@ -210,6 +217,12 @@ module Fluent
210
217
  end
211
218
  end
212
219
 
220
+ # MessagePack FixArray length is 3
221
+ FORWARD_HEADER = [0x93].pack('C').freeze
222
+ def forward_header
223
+ FORWARD_HEADER
224
+ end
225
+
213
226
  private
214
227
 
215
228
  def rebuild_weight_array
@@ -254,111 +267,6 @@ module Fluent
254
267
  @weight_array = weight_array
255
268
  end
256
269
 
257
- # MessagePack FixArray length is 3
258
- FORWARD_HEADER = [0x93].pack('C').freeze
259
- def forward_header
260
- FORWARD_HEADER
261
- end
262
-
263
- #FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
264
- def send_heartbeat_tcp(node)
265
- sock = connect(node)
266
- begin
267
- opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
268
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
269
- opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
270
- # don't send any data to not cause a compatibility problem
271
- #sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
272
- #sock.write FORWARD_TCP_HEARTBEAT_DATA
273
- node.heartbeat(true)
274
- ensure
275
- sock.close_write
276
- sock.close
277
- end
278
- end
279
-
280
- def send_data(node, tag, chunk)
281
- sock = connect(node)
282
- begin
283
- opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
284
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
285
-
286
- opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
287
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
288
-
289
- # beginArray(3)
290
- sock.write forward_header
291
-
292
- # writeRaw(tag)
293
- sock.write tag.to_msgpack # tag
294
-
295
- # beginRaw(size)
296
- sz = chunk.size
297
- #if sz < 32
298
- # # FixRaw
299
- # sock.write [0xa0 | sz].pack('C')
300
- #elsif sz < 65536
301
- # # raw 16
302
- # sock.write [0xda, sz].pack('Cn')
303
- #else
304
- # raw 32
305
- sock.write [0xdb, sz].pack('CN')
306
- #end
307
-
308
- # writeRawBody(packed_es)
309
- chunk.write_to(sock)
310
-
311
- option = { 'size' => chunk.size_of_events }
312
- option['chunk'] = Base64.encode64(chunk.unique_id) if @require_ack_response
313
- sock.write option.to_msgpack
314
-
315
- if @require_ack_response && @ack_response_timeout > 0
316
- # Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
317
- # To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses
318
- # and process them asynchronously.
319
- if IO.select([sock], nil, nil, @ack_response_timeout)
320
- raw_data = sock.recv(1024)
321
-
322
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
323
- # If this happens we assume the data wasn't delivered and retry it.
324
- if raw_data.empty?
325
- @log.warn "node #{node.host}:#{node.port} closed the connection. regard it as unavailable."
326
- node.disable!
327
- raise ForwardOutputConnectionClosedError, "node #{node.host}:#{node.port} closed connection"
328
- else
329
- # Serialization type of the response is same as sent data.
330
- res = MessagePack.unpack(raw_data)
331
-
332
- if res['ack'] != option['chunk']
333
- # Some errors may have occured when ack and chunk id is different, so send the chunk again.
334
- raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different"
335
- end
336
- end
337
-
338
- else
339
- # IO.select returns nil on timeout.
340
- # There are 2 types of cases when no response has been received:
341
- # (1) the node does not support sending responses
342
- # (2) the node does support sending response but responses have not arrived for some reasons.
343
- @log.warn "no response from #{node.host}:#{node.port}. regard it as unavailable."
344
- node.disable!
345
- raise ForwardOutputACKTimeoutError, "node #{node.host}:#{node.port} does not return ACK"
346
- end
347
- end
348
-
349
- node.heartbeat(false)
350
- res # for test
351
- ensure
352
- sock.close_write
353
- sock.close
354
- end
355
- end
356
-
357
- def connect(node)
358
- # TODO unix socket?
359
- TCPSocket.new(node.resolved_host, node.port)
360
- end
361
-
362
270
  class HeartbeatRequestTimer < Coolio::TimerWatcher
363
271
  def initialize(interval, callback)
364
272
  super(interval, true)
@@ -379,15 +287,11 @@ module Fluent
379
287
  rebuild_weight_array
380
288
  end
381
289
  begin
382
- #log.trace "sending heartbeat #{n.host}:#{n.port} on #{@heartbeat_type}"
383
- if @heartbeat_type == :tcp
384
- send_heartbeat_tcp(n)
385
- else
386
- @usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
387
- end
290
+ log.trace "sending heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type
291
+ n.usock = @usock if @usock
292
+ n.send_heartbeat
388
293
  rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED
389
- # TODO log
390
- log.debug "failed to send heartbeat packet to #{n.host}:#{n.port}", error: $!.to_s
294
+ log.debug "failed to send heartbeat packet", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: $!
391
295
  end
392
296
  }
393
297
  end
@@ -423,27 +327,38 @@ module Fluent
423
327
  end
424
328
  end
425
329
 
426
- NodeConfig = Struct.new("NodeConfig", :name, :host, :port, :weight, :standby, :failure,
427
- :phi_threshold, :recover_sample_size, :expire_dns_cache, :phi_failure_detector, :dns_round_robin)
428
-
429
330
  class Node
430
- def initialize(log, conf)
431
- @log = log
432
- @conf = conf
433
- @name = @conf.name
434
- @host = @conf.host
435
- @port = @conf.port
436
- @weight = @conf.weight
437
- @failure = @conf.failure
331
+ def initialize(sender, server, failure:)
332
+ @sender = sender
333
+ @log = sender.log
334
+
335
+ @name = server.name
336
+ @host = server.host
337
+ @port = server.port
338
+ @weight = server.weight
339
+ @standby = server.standby
340
+ @failure = failure
438
341
  @available = true
342
+ @state = nil
343
+
344
+ @usock = nil
345
+
346
+ @username = server.username
347
+ @password = server.password
348
+ @shared_key = server.shared_key || (sender.security && sender.security.shared_key) || ""
349
+ @shared_key_salt = generate_salt
350
+ @shared_key_nonce = ""
351
+
352
+ @unpacker = Fluent::Engine.msgpack_unpacker
439
353
 
440
354
  @resolved_host = nil
441
355
  @resolved_time = 0
442
356
  resolved_host # check dns
443
357
  end
444
358
 
445
- attr_reader :conf
446
- attr_reader :name, :host, :port, :weight
359
+ attr_accessor :usock
360
+
361
+ attr_reader :name, :host, :port, :weight, :standby, :state
447
362
  attr_reader :sockaddr # used by on_heartbeat
448
363
  attr_reader :failure, :available # for test
449
364
 
@@ -456,33 +371,177 @@ module Fluent
456
371
  end
457
372
 
458
373
  def standby?
459
- @conf.standby
374
+ @standby
375
+ end
376
+
377
+ def connect
378
+ TCPSocket.new(resolved_host, port)
379
+ end
380
+
381
+ def set_socket_options(sock)
382
+ opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
383
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
384
+
385
+ opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
386
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
387
+
388
+ sock
389
+ end
390
+
391
+ def establish_connection(sock)
392
+ while available? && @state != :established
393
+ begin
394
+ # TODO: On Ruby 2.2 or earlier, read_nonblock doesn't work expectedly.
395
+ # We need rewrite around here using new socket/server plugin helper.
396
+ buf = sock.read_nonblock(@sender.read_length)
397
+ if buf.empty?
398
+ sleep @sender.read_interval
399
+ next
400
+ end
401
+ @unpacker.feed_each(buf) do |data|
402
+ on_read(sock, data)
403
+ end
404
+ rescue IO::WaitReadable
405
+ # If the exception is Errno::EWOULDBLOCK or Errno::EAGAIN, it is extended by IO::WaitReadable.
406
+ # So IO::WaitReadable can be used to rescue the exceptions for retrying read_nonblock.
407
+ # http://docs.ruby-lang.org/en/2.3.0/IO.html#method-i-read_nonblock
408
+ sleep @sender.read_interval unless @state == :established
409
+ rescue SystemCallError => e
410
+ @log.warn "disconnected by error", host: @host, port: @port, error: e
411
+ disable!
412
+ break
413
+ rescue EOFError
414
+ @log.warn "disconnected", host: @host, port: @port
415
+ disable!
416
+ break
417
+ end
418
+ end
419
+ end
420
+
421
+ def send_data(tag, chunk)
422
+ sock = connect
423
+ @state = @sender.security ? :helo : :established
424
+ begin
425
+ set_socket_options(sock)
426
+
427
+ if @state != :established
428
+ establish_connection(sock)
429
+ end
430
+
431
+ unless available?
432
+ raise ForwardOutputConnectionClosedError, "failed to establish connection with node #{@name}"
433
+ end
434
+
435
+ option = { 'size' => chunk.size_of_events }
436
+ option['chunk'] = Base64.encode64(chunk.unique_id) if @sender.require_ack_response
437
+
438
+ # out_forward always uses Raw32 type for content.
439
+ # Raw16 can store only 64kbytes, and it should be much smaller than buffer chunk size.
440
+
441
+ sock.write @sender.forward_header # beginArray(3)
442
+ sock.write tag.to_msgpack # 1. writeRaw(tag)
443
+ sock.write [0xdb, chunk.size].pack('CN') # 2. beginRaw(size) raw32
444
+ chunk.write_to(sock) # writeRawBody(packed_es)
445
+ sock.write option.to_msgpack # 3. writeOption(option)
446
+
447
+ if @sender.require_ack_response
448
+ # Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
449
+ # To avoid a decrease of troughput, it is necessary to prepare a list of chunks that wait for responses
450
+ # and process them asynchronously.
451
+ if IO.select([sock], nil, nil, @sender.ack_response_timeout)
452
+ raw_data = begin
453
+ sock.recv(1024)
454
+ rescue Errno::ECONNRESET
455
+ ""
456
+ end
457
+
458
+ # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
459
+ # If this happens we assume the data wasn't delivered and retry it.
460
+ if raw_data.empty?
461
+ @log.warn "node closed the connection. regard it as unavailable.", host: @host, port: @port
462
+ disable!
463
+ raise ForwardOutputConnectionClosedError, "node #{@host}:#{@port} closed connection"
464
+ else
465
+ @unpacker.feed(raw_data)
466
+ res = @unpacker.read
467
+ if res['ack'] != option['chunk']
468
+ # Some errors may have occured when ack and chunk id is different, so send the chunk again.
469
+ raise ForwardOutputResponseError, "ack in response and chunk id in sent data are different"
470
+ end
471
+ end
472
+
473
+ else
474
+ # IO.select returns nil on timeout.
475
+ # There are 2 types of cases when no response has been received:
476
+ # (1) the node does not support sending responses
477
+ # (2) the node does support sending response but responses have not arrived for some reasons.
478
+ @log.warn "no response from node. regard it as unavailable.", host: @host, port: @port
479
+ disable!
480
+ raise ForwardOutputACKTimeoutError, "node #{host}:#{port} does not return ACK"
481
+ end
482
+ end
483
+
484
+ heartbeat(false)
485
+ res # for test
486
+ ensure
487
+ sock.close_write
488
+ sock.close
489
+ end
490
+ end
491
+
492
+ # FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
493
+ def send_heartbeat
494
+ case @sender.heartbeat_type
495
+ when :tcp
496
+ sock = connect
497
+ begin
498
+ opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
499
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
500
+ # opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
501
+ # sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
502
+
503
+ ## don't send any data to not cause a compatibility problem
504
+ # sock.write FORWARD_TCP_HEARTBEAT_DATA
505
+
506
+ # successful tcp connection establishment is considered as valid heartbeat
507
+ heartbeat(true)
508
+ ensure
509
+ sock.close_write
510
+ sock.close
511
+ end
512
+ when :udp
513
+ @usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
514
+ when :none # :none doesn't use this class
515
+ raise "BUG: heartbeat_type none must not use Node"
516
+ else
517
+ raise "BUG: unknown heartbeat_type '#{@sender.heartbeat_type}'"
518
+ end
460
519
  end
461
520
 
462
521
  def resolved_host
463
- case @conf.expire_dns_cache
522
+ case @sender.expire_dns_cache
464
523
  when 0
465
524
  # cache is disabled
466
- return resolve_dns!
525
+ resolve_dns!
467
526
 
468
527
  when nil
469
528
  # persistent cache
470
- return @resolved_host ||= resolve_dns!
529
+ @resolved_host ||= resolve_dns!
471
530
 
472
531
  else
473
532
  now = Engine.now
474
533
  rh = @resolved_host
475
- if !rh || now - @resolved_time >= @conf.expire_dns_cache
534
+ if !rh || now - @resolved_time >= @sender.expire_dns_cache
476
535
  rh = @resolved_host = resolve_dns!
477
536
  @resolved_time = now
478
537
  end
479
- return rh
538
+ rh
480
539
  end
481
540
  end
482
541
 
483
542
  def resolve_dns!
484
543
  addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM)
485
- addrinfo = @conf.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
544
+ addrinfo = @sender.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
486
545
  @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_heartbeat
487
546
  addrinfo[3]
488
547
  end
@@ -505,36 +564,122 @@ module Fluent
505
564
  return true
506
565
  end
507
566
 
508
- if @conf.phi_failure_detector
567
+ if @sender.phi_failure_detector
509
568
  phi = @failure.phi(now)
510
- #$log.trace "phi '#{@name}'", :host=>@host, :port=>@port, :phi=>phi
511
- if phi > @conf.phi_threshold
512
- @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi
569
+ if phi > @sender.phi_threshold
570
+ @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi, phi_threshold: @sender.phi_threshold
513
571
  @available = false
514
572
  @resolved_host = nil # expire cached host
515
573
  @failure.clear
516
574
  return true
517
575
  end
518
576
  end
519
- return false
577
+ false
520
578
  end
521
579
 
522
580
  def heartbeat(detect=true)
523
581
  now = Time.now.to_f
524
582
  @failure.add(now)
525
- #@log.trace "heartbeat from '#{@name}'", :host=>@host, :port=>@port, :available=>@available, :sample_size=>@failure.sample_size
526
- if detect && !@available && @failure.sample_size > @conf.recover_sample_size
583
+ if detect && !@available && @failure.sample_size > @sender.recover_sample_size
527
584
  @available = true
528
585
  @log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port
529
- return true
586
+ true
530
587
  else
531
- return nil
588
+ nil
532
589
  end
533
590
  end
534
591
 
592
+ # TODO: #to_msgpack(string) is deprecated
535
593
  def to_msgpack(out = '')
536
594
  [@host, @port, @weight, @available].to_msgpack(out)
537
595
  end
596
+
597
+ def generate_salt
598
+ SecureRandom.hex(16)
599
+ end
600
+
601
+ def check_helo(message)
602
+ @log.debug "checking helo"
603
+ # ['HELO', options(hash)]
604
+ unless message.size == 2 && message[0] == 'HELO'
605
+ return false
606
+ end
607
+ opts = message[1] || {}
608
+ # make shared_key_check failed (instead of error) if protocol version mismatch exist
609
+ @shared_key_nonce = opts['nonce'] || ''
610
+ @authentication = opts['auth'] || ''
611
+ true
612
+ end
613
+
614
+ def generate_ping
615
+ @log.debug "generating ping"
616
+ # ['PING', self_hostname, sharedkey\_salt, sha512\_hex(sharedkey\_salt + self_hostname + nonce + shared_key),
617
+ # username || '', sha512\_hex(auth\_salt + username + password) || '']
618
+ shared_key_hexdigest = Digest::SHA512.new.update(@shared_key_salt)
619
+ .update(@sender.security.self_hostname)
620
+ .update(@shared_key_nonce)
621
+ .update(@shared_key)
622
+ .hexdigest
623
+ ping = ['PING', @sender.security.self_hostname, @shared_key_salt, shared_key_hexdigest]
624
+ if !@authentication.empty?
625
+ password_hexdigest = Digest::SHA512.new.update(@authentication).update(@username).update(@password).hexdigest
626
+ ping.push(@username, password_hexdigest)
627
+ else
628
+ ping.push('','')
629
+ end
630
+ ping
631
+ end
632
+
633
+ def check_pong(message)
634
+ @log.debug "checking pong"
635
+ # ['PONG', bool(authentication result), 'reason if authentication failed',
636
+ # self_hostname, sha512\_hex(salt + self_hostname + nonce + sharedkey)]
637
+ unless message.size == 5 && message[0] == 'PONG'
638
+ return false, 'invalid format for PONG message'
639
+ end
640
+ _pong, auth_result, reason, hostname, shared_key_hexdigest = message
641
+
642
+ unless auth_result
643
+ return false, 'authentication failed: ' + reason
644
+ end
645
+
646
+ if hostname == @sender.security.self_hostname
647
+ return false, 'same hostname between input and output: invalid configuration'
648
+ end
649
+
650
+ clientside = Digest::SHA512.new.update(@shared_key_salt).update(hostname).update(@shared_key_nonce).update(@shared_key).hexdigest
651
+ unless shared_key_hexdigest == clientside
652
+ return false, 'shared key mismatch'
653
+ end
654
+
655
+ return true, nil
656
+ end
657
+
658
+ def on_read(sock, data)
659
+ @log.trace __callee__
660
+
661
+ case @state
662
+ when :helo
663
+ unless check_helo(data)
664
+ @log.warn "received invalid helo message from #{@name}"
665
+ disable! # shutdown
666
+ return
667
+ end
668
+ sock.write(generate_ping.to_msgpack)
669
+ @state = :pingpong
670
+ when :pingpong
671
+ succeeded, reason = check_pong(data)
672
+ unless succeeded
673
+ @log.warn "connection refused to #{@name}: #{reason}"
674
+ disable! # shutdown
675
+ return
676
+ end
677
+ @state = :established
678
+ @log.debug "connection established", host: @host, port: @port
679
+ else
680
+ raise "BUG: unknown session state: #{@state}"
681
+ end
682
+ end
538
683
  end
539
684
 
540
685
  # Override Node to disable heartbeat
@@ -614,33 +759,5 @@ module Fluent
614
759
  @last = 0
615
760
  end
616
761
  end
617
-
618
- ## TODO
619
- #class RPC
620
- # def initialize(this)
621
- # @this = this
622
- # end
623
- #
624
- # def list_nodes
625
- # @this.nodes
626
- # end
627
- #
628
- # def list_fault_nodes
629
- # list_nodes.select {|n| !n.available? }
630
- # end
631
- #
632
- # def list_available_nodes
633
- # list_nodes.select {|n| n.available? }
634
- # end
635
- #
636
- # def add_node(name, host, port, weight)
637
- # end
638
- #
639
- # def recover_node(host, port)
640
- # end
641
- #
642
- # def remove_node(host, port)
643
- # end
644
- #end
645
762
  end
646
763
  end