fluentd 1.6.3 → 1.7.0.rc1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of fluentd might be problematic. Click here for more details.

Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.drone.yml +35 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +2 -0
  4. data/README.md +5 -1
  5. data/fluentd.gemspec +1 -1
  6. data/lib/fluent/clock.rb +4 -0
  7. data/lib/fluent/compat/output.rb +3 -3
  8. data/lib/fluent/compat/socket_util.rb +1 -1
  9. data/lib/fluent/config/element.rb +3 -3
  10. data/lib/fluent/config/literal_parser.rb +1 -1
  11. data/lib/fluent/config/section.rb +4 -1
  12. data/lib/fluent/error.rb +4 -0
  13. data/lib/fluent/event.rb +28 -24
  14. data/lib/fluent/event_router.rb +2 -1
  15. data/lib/fluent/log.rb +1 -1
  16. data/lib/fluent/msgpack_factory.rb +8 -0
  17. data/lib/fluent/plugin/bare_output.rb +4 -4
  18. data/lib/fluent/plugin/buf_file_single.rb +211 -0
  19. data/lib/fluent/plugin/buffer.rb +62 -63
  20. data/lib/fluent/plugin/buffer/chunk.rb +21 -3
  21. data/lib/fluent/plugin/buffer/file_chunk.rb +37 -12
  22. data/lib/fluent/plugin/buffer/file_single_chunk.rb +314 -0
  23. data/lib/fluent/plugin/buffer/memory_chunk.rb +2 -1
  24. data/lib/fluent/plugin/compressable.rb +10 -6
  25. data/lib/fluent/plugin/filter_grep.rb +2 -2
  26. data/lib/fluent/plugin/formatter_csv.rb +10 -6
  27. data/lib/fluent/plugin/in_syslog.rb +10 -3
  28. data/lib/fluent/plugin/in_tail.rb +7 -2
  29. data/lib/fluent/plugin/in_tcp.rb +34 -7
  30. data/lib/fluent/plugin/multi_output.rb +4 -4
  31. data/lib/fluent/plugin/out_exec_filter.rb +1 -0
  32. data/lib/fluent/plugin/out_file.rb +13 -3
  33. data/lib/fluent/plugin/out_forward.rb +126 -588
  34. data/lib/fluent/plugin/out_forward/ack_handler.rb +161 -0
  35. data/lib/fluent/plugin/out_forward/connection_manager.rb +113 -0
  36. data/lib/fluent/plugin/out_forward/error.rb +28 -0
  37. data/lib/fluent/plugin/out_forward/failure_detector.rb +84 -0
  38. data/lib/fluent/plugin/out_forward/handshake_protocol.rb +121 -0
  39. data/lib/fluent/plugin/out_forward/load_balancer.rb +111 -0
  40. data/lib/fluent/plugin/out_forward/socket_cache.rb +138 -0
  41. data/lib/fluent/plugin/out_http.rb +231 -0
  42. data/lib/fluent/plugin/output.rb +29 -35
  43. data/lib/fluent/plugin/parser.rb +77 -0
  44. data/lib/fluent/plugin/parser_csv.rb +75 -0
  45. data/lib/fluent/plugin_helper/server.rb +1 -1
  46. data/lib/fluent/plugin_helper/thread.rb +1 -0
  47. data/lib/fluent/root_agent.rb +1 -1
  48. data/lib/fluent/time.rb +4 -2
  49. data/lib/fluent/timezone.rb +21 -7
  50. data/lib/fluent/version.rb +1 -1
  51. data/test/command/test_fluentd.rb +1 -1
  52. data/test/command/test_plugin_generator.rb +18 -2
  53. data/test/config/test_configurable.rb +78 -40
  54. data/test/counter/test_store.rb +1 -1
  55. data/test/helper.rb +1 -0
  56. data/test/helpers/process_extenstion.rb +33 -0
  57. data/test/plugin/out_forward/test_ack_handler.rb +101 -0
  58. data/test/plugin/out_forward/test_connection_manager.rb +145 -0
  59. data/test/plugin/out_forward/test_handshake_protocol.rb +103 -0
  60. data/test/plugin/out_forward/test_load_balancer.rb +60 -0
  61. data/test/plugin/out_forward/test_socket_cache.rb +139 -0
  62. data/test/plugin/test_buf_file.rb +118 -2
  63. data/test/plugin/test_buf_file_single.rb +734 -0
  64. data/test/plugin/test_buffer.rb +4 -48
  65. data/test/plugin/test_buffer_file_chunk.rb +19 -1
  66. data/test/plugin/test_buffer_file_single_chunk.rb +620 -0
  67. data/test/plugin/test_formatter_csv.rb +16 -0
  68. data/test/plugin/test_in_syslog.rb +56 -6
  69. data/test/plugin/test_in_tail.rb +1 -1
  70. data/test/plugin/test_in_tcp.rb +25 -0
  71. data/test/plugin/test_out_forward.rb +75 -201
  72. data/test/plugin/test_out_http.rb +352 -0
  73. data/test/plugin/test_output_as_buffered.rb +27 -24
  74. data/test/plugin/test_parser.rb +40 -0
  75. data/test/plugin/test_parser_csv.rb +83 -0
  76. data/test/plugin_helper/test_record_accessor.rb +1 -1
  77. data/test/test_time_formatter.rb +140 -121
  78. metadata +35 -6
@@ -43,7 +43,8 @@ module Fluent
43
43
  @chunk_bytes += @adding_bytes
44
44
 
45
45
  @adding_bytes = @adding_size = 0
46
- @modified_at = Time.now
46
+ @modified_at = Fluent::Clock.real_now
47
+ @modified_at_object = nil
47
48
  true
48
49
  end
49
50
 
@@ -64,9 +64,11 @@ module Fluent
64
64
  unused = gz.unused
65
65
  gz.finish
66
66
 
67
- break if unused.nil?
68
- adjust = unused.length
69
- io.pos -= adjust
67
+ unless unused.nil?
68
+ adjust = unused.length
69
+ io.pos -= adjust
70
+ end
71
+ break if io.eof?
70
72
  end
71
73
 
72
74
  out
@@ -80,9 +82,11 @@ module Fluent
80
82
  unused = gz.unused
81
83
  gz.finish
82
84
 
83
- break if unused.nil?
84
- adjust = unused.length
85
- input.pos -= adjust
85
+ unless unused.nil?
86
+ adjust = unused.length
87
+ input.pos -= adjust
88
+ end
89
+ break if input.eof?
86
90
  end
87
91
 
88
92
  output
@@ -110,7 +110,7 @@ module Fluent::Plugin
110
110
  end
111
111
 
112
112
  if @regexps.size > 1
113
- log.info "Top level multiple <regexp> is intepreted as 'and' condition"
113
+ log.info "Top level multiple <regexp> is interpreted as 'and' condition"
114
114
  end
115
115
  @regexps.each do |e|
116
116
  raise Fluent::ConfigError, "Duplicate key: #{e.key}" if regexp_and_conditions.key?(e.key)
@@ -118,7 +118,7 @@ module Fluent::Plugin
118
118
  end
119
119
 
120
120
  if @excludes.size > 1
121
- log.info "Top level multiple <exclude> is intepreted as 'or' condition"
121
+ log.info "Top level multiple <exclude> is interpreted as 'or' condition"
122
122
  end
123
123
  @excludes.each do |e|
124
124
  raise Fluent::ConfigError, "Duplicate key: #{e.key}" if exclude_or_conditions.key?(e.key)
@@ -33,18 +33,22 @@ module Fluent
33
33
 
34
34
  def configure(conf)
35
35
  super
36
+
36
37
  @fields = fields.select{|f| !f.empty? }
37
38
  raise ConfigError, "empty value is specified in fields parameter" if @fields.empty?
38
39
 
39
- @generate_opts = {col_sep: @delimiter, force_quotes: @force_quotes}
40
+ @generate_opts = {col_sep: @delimiter, force_quotes: @force_quotes, headers: @fields,
41
+ row_sep: @add_newline ? :auto : "".force_encoding(Encoding::ASCII_8BIT)}
42
+ # Cache CSV object per thread to avoid internal state sharing
43
+ @cache = {}
40
44
  end
41
45
 
42
46
  def format(tag, time, record)
43
- row = @fields.map do |key|
44
- record[key]
45
- end
46
- line = CSV.generate_line(row, @generate_opts)
47
- line.chomp! unless @add_newline
47
+ csv = (@cache[Thread.current] ||= CSV.new("".force_encoding(Encoding::ASCII_8BIT), @generate_opts))
48
+ line = (csv << record).string.dup
49
+ # Need manual cleanup because CSV writer doesn't provide such method.
50
+ csv.rewind
51
+ csv.truncate(0)
48
52
  line
49
53
  end
50
54
  end
@@ -199,6 +199,13 @@ module Fluent::Plugin
199
199
 
200
200
  private
201
201
 
202
+ def emit_unmatched(data, sock)
203
+ record = {"unmatched_line" => data}
204
+ record[@source_address_key] = sock.remote_addr if @source_address_key
205
+ record[@source_hostname_key] = sock.remote_host if @source_hostname_key
206
+ emit("#{@tag}.unmatched", Fluent::EventTime.now, record)
207
+ end
208
+
202
209
  def message_handler(data, sock)
203
210
  pri = nil
204
211
  text = data
@@ -206,7 +213,7 @@ module Fluent::Plugin
206
213
  m = SYSLOG_REGEXP.match(data)
207
214
  unless m
208
215
  if @emit_unmatched_lines
209
- emit("#{@tag}.unmatched", Fluent::EventTime.now, {"unmatched_line" => data})
216
+ emit_unmatched(data, sock)
210
217
  end
211
218
  log.warn "invalid syslog message: #{data.dump}"
212
219
  return
@@ -218,7 +225,7 @@ module Fluent::Plugin
218
225
  @parser.parse(text) do |time, record|
219
226
  unless time && record
220
227
  if @emit_unmatched_lines
221
- emit("#{@tag}.unmatched", Fluent::EventTime.now, {"unmatched_line" => text})
228
+ emit_unmatched(data, sock)
222
229
  end
223
230
  log.warn "failed to parse message", data: data
224
231
  return
@@ -238,7 +245,7 @@ module Fluent::Plugin
238
245
  end
239
246
  rescue => e
240
247
  if @emit_unmatched_lines
241
- emit("#{@tag}.unmatched", Fluent::EventTime.now, {"unmatched_line" => text})
248
+ emit_unmatched(data, sock)
242
249
  end
243
250
  log.error "invalid input", data: data, error: e
244
251
  log.error_backtrace
@@ -97,6 +97,10 @@ module Fluent::Plugin
97
97
  desc 'Ignore repeated permission error logs'
98
98
  config_param :ignore_repeated_permission_error, :bool, default: false
99
99
 
100
+ config_section :parse, required: false, multi: true, init: true, param_name: :parser_configs do
101
+ config_argument :usage, :string, default: 'in_tail_parser'
102
+ end
103
+
100
104
  attr_reader :paths
101
105
 
102
106
  @@pos_file_paths = {}
@@ -148,7 +152,8 @@ module Fluent::Plugin
148
152
  method(:parse_singleline)
149
153
  end
150
154
  @file_perm = system_config.file_permission || FILE_PERMISSION
151
- @parser = parser_create(conf: parser_config)
155
+ # parser is already created by parser helper
156
+ @parser = parser_create(usage: parser_config['usage'] || @parser_configs.first.usage)
152
157
  end
153
158
 
154
159
  def configure_tag
@@ -431,7 +436,7 @@ module Fluent::Plugin
431
436
  end
432
437
  }
433
438
  rescue => e
434
- log.warn line.dump, error: e.to_s
439
+ log.warn 'invalid line found', file: tail_watcher.path, line: line, error: e.to_s
435
440
  log.debug_backtrace(e.backtrace)
436
441
  end
437
442
  end
@@ -61,13 +61,15 @@ module Fluent::Plugin
61
61
  def start
62
62
  super
63
63
 
64
- server_create(:in_tcp_server, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
65
- conn.buffer << data
66
- begin
64
+ del_size = @delimiter.length
65
+ if @_extract_enabled && @_extract_tag_key
66
+ server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
67
+ conn.buffer << data
68
+ buf = conn.buffer
67
69
  pos = 0
68
- while i = conn.buffer.index(@delimiter, pos)
69
- msg = conn.buffer[pos...i]
70
- pos = i + @delimiter.length
70
+ while i = buf.index(@delimiter, pos)
71
+ msg = buf[pos...i]
72
+ pos = i + del_size
71
73
 
72
74
  @parser.parse(msg) do |time, record|
73
75
  unless time && record
@@ -83,7 +85,32 @@ module Fluent::Plugin
83
85
  router.emit(tag, time, record)
84
86
  end
85
87
  end
86
- conn.buffer.slice!(0, pos) if pos > 0
88
+ buf.slice!(0, pos) if pos > 0
89
+ end
90
+ else
91
+ server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
92
+ conn.buffer << data
93
+ buf = conn.buffer
94
+ pos = 0
95
+ es = Fluent::MultiEventStream.new
96
+ while i = buf.index(@delimiter, pos)
97
+ msg = buf[pos...i]
98
+ pos = i + del_size
99
+
100
+ @parser.parse(msg) do |time, record|
101
+ unless time && record
102
+ log.warn "pattern not matched", message: msg
103
+ next
104
+ end
105
+
106
+ time ||= extract_time_from_record(record) || Fluent::EventTime.now
107
+ record[@source_address_key] = conn.remote_addr if @source_address_key
108
+ record[@source_hostname_key] = conn.remote_host if @source_hostname_key
109
+ es.add(time, record)
110
+ end
111
+ end
112
+ router.emit_stream(@tag, es)
113
+ buf.slice!(0, pos) if pos > 0
87
114
  end
88
115
  end
89
116
  end
@@ -44,7 +44,7 @@ module Fluent
44
44
  @outputs = []
45
45
  @outputs_statically_created = false
46
46
 
47
- @counters_monitor = Monitor.new
47
+ @counter_mutex = Mutex.new
48
48
  # TODO: well organized counters
49
49
  @num_errors = 0
50
50
  @emit_count = 0
@@ -143,12 +143,12 @@ module Fluent
143
143
  end
144
144
 
145
145
  def emit_sync(tag, es)
146
- @counters_monitor.synchronize{ @emit_count += 1 }
146
+ @counter_mutex.synchronize{ @emit_count += 1 }
147
147
  begin
148
148
  process(tag, es)
149
- @counters_monitor.synchronize{ @emit_records += es.size }
149
+ @counter_mutex.synchronize{ @emit_records += es.size }
150
150
  rescue
151
- @counters_monitor.synchronize{ @num_errors += 1 }
151
+ @counter_mutex.synchronize{ @num_errors += 1 }
152
152
  raise
153
153
  end
154
154
  end
@@ -284,6 +284,7 @@ module Fluent::Plugin
284
284
  end
285
285
 
286
286
  def run(io)
287
+ io.set_encoding(Encoding::ASCII_8BIT)
287
288
  case
288
289
  when @parser.implement?(:parse_io)
289
290
  @parser.parse_io(io, &method(:on_record))
@@ -72,6 +72,17 @@ module Fluent::Plugin
72
72
  attr_accessor :last_written_path # for tests
73
73
 
74
74
  module SymlinkBufferMixin
75
+ def metadata(timekey: nil, tag: nil, variables: nil)
76
+ metadata = super
77
+
78
+ @latest_metadata ||= new_metadata(timekey: 0)
79
+ if metadata.timekey && (metadata.timekey >= @latest_metadata.timekey)
80
+ @latest_metadata = metadata
81
+ end
82
+
83
+ metadata
84
+ end
85
+
75
86
  def output_plugin_for_symlink=(output_plugin)
76
87
  @_output_plugin_for_symlink = output_plugin
77
88
  end
@@ -86,8 +97,7 @@ module Fluent::Plugin
86
97
  # timekey will be appended into that file chunk. On the other side, resumed file chunks might NOT
87
98
  # have timekey, especially in the cases that resumed file chunks are generated by Fluentd v0.12.
88
99
  # These chunks will be enqueued immediately, and will be flushed soon.
89
- latest_metadata = metadata_list.select{|m| m.timekey }.sort_by(&:timekey).last
90
- if chunk.metadata == latest_metadata
100
+ if chunk.metadata == @latest_metadata
91
101
  sym_path = @_output_plugin_for_symlink.extract_placeholders(@_symlink_path, chunk)
92
102
  FileUtils.mkdir_p(File.dirname(sym_path), mode: @_output_plugin_for_symlink.dir_perm)
93
103
  FileUtils.ln_sf(chunk.path, sym_path)
@@ -115,7 +125,7 @@ module Fluent::Plugin
115
125
 
116
126
  if conf.has_key?('utc') || conf.has_key?('localtime')
117
127
  param_name = conf.has_key?('utc') ? 'utc' : 'localtime'
118
- log.warn "'#{param_name}' is deperecated for output plugin. This parameter is used for formatter plugin in compatibility layer. If you want to use same feature, use timekey_use_utc parameter in <buffer> directive instead"
128
+ log.warn "'#{param_name}' is deprecated for output plugin. This parameter is used for formatter plugin in compatibility layer. If you want to use same feature, use timekey_use_utc parameter in <buffer> directive instead"
119
129
  end
120
130
 
121
131
  super
@@ -20,13 +20,16 @@ require 'fluent/clock'
20
20
  require 'base64'
21
21
 
22
22
  require 'fluent/compat/socket_util'
23
+ require 'fluent/plugin/out_forward/handshake_protocol'
24
+ require 'fluent/plugin/out_forward/load_balancer'
25
+ require 'fluent/plugin/out_forward/socket_cache'
26
+ require 'fluent/plugin/out_forward/failure_detector'
27
+ require 'fluent/plugin/out_forward/error'
28
+ require 'fluent/plugin/out_forward/connection_manager'
29
+ require 'fluent/plugin/out_forward/ack_handler'
23
30
 
24
31
  module Fluent::Plugin
25
32
  class ForwardOutput < Output
26
- class Error < StandardError; end
27
- class NoNodesAvailable < Error; end
28
- class ConnectionClosedError < Error; end
29
-
30
33
  Fluent::Plugin.register_output('forward', self)
31
34
 
32
35
  helpers :socket, :server, :timer, :thread, :compat_parameters
@@ -154,8 +157,6 @@ module Fluent::Plugin
154
157
  @thread = nil
155
158
 
156
159
  @usock = nil
157
- @sock_ack_waiting = nil
158
- @sock_ack_waiting_mutex = nil
159
160
  @keep_alive_watcher_interval = 5 # TODO
160
161
  end
161
162
 
@@ -176,10 +177,8 @@ module Fluent::Plugin
176
177
  @heartbeat_type = :transport
177
178
  end
178
179
 
179
- if @dns_round_robin
180
- if @heartbeat_type == :udp
181
- raise Fluent::ConfigError, "forward output heartbeat type must be 'transport' or 'none' to use dns_round_robin option"
182
- end
180
+ if @dns_round_robin && @heartbeat_type == :udp
181
+ raise Fluent::ConfigError, "forward output heartbeat type must be 'transport' or 'none' to use dns_round_robin option"
183
182
  end
184
183
 
185
184
  if @transport == :tls
@@ -201,15 +200,24 @@ module Fluent::Plugin
201
200
  end
202
201
  end
203
202
 
203
+ @ack_handler = @require_ack_response ? AckHandler.new(timeout: @ack_response_timeout, log: @log, read_length: @read_length) : nil
204
+ socket_cache = @keepalive ? SocketCache.new(@keepalive_timeout, @log) : nil
205
+ @connection_manager = ConnectionManager.new(
206
+ log: @log,
207
+ secure: !!@security,
208
+ connection_factory: method(:create_transfer_socket),
209
+ socket_cache: socket_cache,
210
+ )
211
+
204
212
  @servers.each do |server|
205
213
  failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
206
214
  name = server.name || "#{server.host}:#{server.port}"
207
215
 
208
216
  log.info "adding forwarding server '#{name}'", host: server.host, port: server.port, weight: server.weight, plugin_id: plugin_id
209
217
  if @heartbeat_type == :none
210
- @nodes << NoneHeartbeatNode.new(self, server, failure: failure, keepalive: @keepalive, keepalive_timeout: @keepalive_timeout)
218
+ @nodes << NoneHeartbeatNode.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler)
211
219
  else
212
- node = Node.new(self, server, failure: failure, keepalive: @keepalive, keepalive_timeout: @keepalive_timeout)
220
+ node = Node.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler)
213
221
  begin
214
222
  node.validate_host_resolution!
215
223
  rescue => e
@@ -251,31 +259,25 @@ module Fluent::Plugin
251
259
  def start
252
260
  super
253
261
 
254
- # Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout
255
- # But it should be overwritten by ack_response_timeout to rollback chunks after timeout
256
- if @ack_response_timeout && @delayed_commit_timeout != @ack_response_timeout
257
- log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
258
- @delayed_commit_timeout = @ack_response_timeout + 2 # minimum ack_reader IO.select interval is 1s
259
- end
260
-
261
- @rand_seed = Random.new.seed
262
- rebuild_weight_array
263
- @rr = 0
262
+ @load_balancer = LoadBalancer.new(log)
263
+ @load_balancer.rebuild_weight_array(@nodes)
264
264
 
265
265
  unless @heartbeat_type == :none
266
266
  if @heartbeat_type == :udp
267
267
  @usock = socket_create_udp(@nodes.first.host, @nodes.first.port, nonblock: true)
268
- server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length) do |data, sock|
269
- sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
270
- on_heartbeat(sockaddr, data)
271
- end
268
+ server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length, &method(:on_udp_heatbeat_response_recv))
272
269
  end
273
- timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_timer))
270
+ timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_heartbeat_timer))
274
271
  end
275
272
 
276
273
  if @require_ack_response
277
- @sock_ack_waiting_mutex = Mutex.new
278
- @sock_ack_waiting = []
274
+ # Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout
275
+ # But it should be overwritten by ack_response_timeout to rollback chunks after timeout
276
+ if @delayed_commit_timeout != @ack_response_timeout
277
+ log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
278
+ @delayed_commit_timeout = @ack_response_timeout + 2 # minimum ack_reader IO.select interval is 1s
279
+ end
280
+
279
281
  thread_create(:out_forward_receiving_ack, &method(:ack_reader))
280
282
  end
281
283
 
@@ -301,22 +303,22 @@ module Fluent::Plugin
301
303
  @usock.close rescue nil
302
304
  end
303
305
 
304
- if @keepalive && @keepalive_timeout
305
- @nodes.each(&:clear)
306
- end
307
306
  super
308
307
  end
309
308
 
309
+ def stop
310
+ super
311
+
312
+ if @keepalive
313
+ @connection_manager.stop
314
+ end
315
+ end
316
+
310
317
  def write(chunk)
311
318
  return if chunk.empty?
312
319
  tag = chunk.metadata.tag
313
- select_a_healthy_node{|node| node.send_data(tag, chunk) }
314
- end
315
320
 
316
- ACKWaitingSockInfo = Struct.new(:sock, :chunk_id, :chunk_id_base64, :node, :time, :timeout) do
317
- def expired?(now)
318
- time + timeout < now
319
- end
321
+ @load_balancer.select_healthy_node { |node| node.send_data(tag, chunk) }
320
322
  end
321
323
 
322
324
  def try_write(chunk)
@@ -326,35 +328,7 @@ module Fluent::Plugin
326
328
  return
327
329
  end
328
330
  tag = chunk.metadata.tag
329
- sock, node = select_a_healthy_node{|n| n.send_data(tag, chunk) }
330
- chunk_id_base64 = Base64.encode64(chunk.unique_id)
331
- current_time = Fluent::Clock.now
332
- info = ACKWaitingSockInfo.new(sock, chunk.unique_id, chunk_id_base64, node, current_time, @ack_response_timeout)
333
- @sock_ack_waiting_mutex.synchronize do
334
- @sock_ack_waiting << info
335
- end
336
- end
337
-
338
- def select_a_healthy_node
339
- error = nil
340
-
341
- wlen = @weight_array.length
342
- wlen.times do
343
- @rr = (@rr + 1) % wlen
344
- node = @weight_array[@rr]
345
- next unless node.available?
346
-
347
- begin
348
- ret = yield node
349
- return ret, node
350
- rescue
351
- # for load balancing during detecting crashed servers
352
- error = $! # use the latest error
353
- end
354
- end
355
-
356
- raise error if error
357
- raise NoNodesAvailable, "no nodes are available"
331
+ @load_balancer.select_healthy_node { |n| n.send_data(tag, chunk) }
358
332
  end
359
333
 
360
334
  def create_transfer_socket(host, port, hostname, &block)
@@ -403,130 +377,41 @@ module Fluent::Plugin
403
377
 
404
378
  private
405
379
 
406
- def rebuild_weight_array
407
- standby_nodes, regular_nodes = @nodes.partition {|n|
408
- n.standby?
409
- }
410
-
411
- lost_weight = 0
412
- regular_nodes.each {|n|
413
- unless n.available?
414
- lost_weight += n.weight
415
- end
416
- }
417
- log.debug "rebuilding weight array", lost_weight: lost_weight
418
-
419
- if lost_weight > 0
420
- standby_nodes.each {|n|
421
- if n.available?
422
- regular_nodes << n
423
- log.warn "using standby node #{n.host}:#{n.port}", weight: n.weight
424
- lost_weight -= n.weight
425
- break if lost_weight <= 0
426
- end
427
- }
428
- end
429
-
430
- weight_array = []
431
- if regular_nodes.empty?
432
- log.warn('No nodes are available')
433
- @weight_array = weight_array
434
- return @weight_array
435
- end
436
-
437
- gcd = regular_nodes.map {|n| n.weight }.inject(0) {|r,w| r.gcd(w) }
438
- regular_nodes.each {|n|
439
- (n.weight / gcd).times {
440
- weight_array << n
441
- }
442
- }
443
-
444
- # for load balancing during detecting crashed servers
445
- coe = (regular_nodes.size * 6) / weight_array.size
446
- weight_array *= coe if coe > 1
447
-
448
- r = Random.new(@rand_seed)
449
- weight_array.sort_by! { r.rand }
450
-
451
- @weight_array = weight_array
452
- end
453
-
454
- def on_timer
455
- @nodes.each {|n|
380
+ def on_heartbeat_timer
381
+ need_rebuild = false
382
+ @nodes.each do |n|
456
383
  begin
457
384
  log.trace "sending heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type
458
385
  n.usock = @usock if @usock
459
- if n.send_heartbeat
460
- rebuild_weight_array
461
- end
386
+ need_rebuild = n.send_heartbeat || need_rebuild
462
387
  rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
463
388
  log.debug "failed to send heartbeat packet", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e
464
389
  rescue => e
465
390
  log.debug "unexpected error happen during heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e
466
391
  end
467
- if n.tick
468
- rebuild_weight_array
469
- end
470
- }
471
- end
472
392
 
473
- def on_heartbeat(sockaddr, msg)
474
- if node = @nodes.find {|n| n.sockaddr == sockaddr }
475
- # log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
476
- if node.heartbeat
477
- rebuild_weight_array
478
- end
393
+ need_rebuild = n.tick || need_rebuild
479
394
  end
480
- end
481
395
 
482
- def on_purge_obsolete_socks
483
- @nodes.each(&:purge_obsolete_socks)
396
+ if need_rebuild
397
+ @load_balancer.rebuild_weight_array(@nodes)
398
+ end
484
399
  end
485
400
 
486
- # return chunk id to be committed
487
- def read_ack_from_sock(sock, unpacker)
488
- begin
489
- raw_data = sock.instance_of?(Fluent::PluginHelper::Socket::WrappedSocket::TLS) ? sock.readpartial(@read_length) : sock.recv(@read_length)
490
- rescue Errno::ECONNRESET, EOFError # ECONNRESET for #recv, #EOFError for #readpartial
491
- raw_data = ""
492
- end
493
- info = @sock_ack_waiting_mutex.synchronize{ @sock_ack_waiting.find{|i| i.sock == sock } }
494
-
495
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
496
- # If this happens we assume the data wasn't delivered and retry it.
497
- if raw_data.empty?
498
- log.warn "destination node closed the connection. regard it as unavailable.", host: info.node.host, port: info.node.port
499
- info.node.disable!
500
- rollback_write(info.chunk_id, update_retry: false)
501
- return nil
502
- else
503
- unpacker.feed(raw_data)
504
- res = unpacker.read
505
- log.trace "getting response from destination", host: info.node.host, port: info.node.port, chunk_id: dump_unique_id_hex(info.chunk_id), response: res
506
- if res['ack'] != info.chunk_id_base64
507
- # Some errors may have occurred when ack and chunk id is different, so send the chunk again.
508
- log.warn "ack in response and chunk id in sent data are different", chunk_id: dump_unique_id_hex(info.chunk_id), ack: res['ack']
509
- rollback_write(info.chunk_id, update_retry: false)
510
- return nil
511
- else
512
- log.trace "got a correct ack response", chunk_id: dump_unique_id_hex(info.chunk_id)
401
+ def on_udp_heatbeat_response_recv(data, sock)
402
+ sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
403
+ if node = @nodes.find { |n| n.sockaddr == sockaddr }
404
+ # log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
405
+ if node.heartbeat
406
+ @load_balancer.rebuild_weight_array(@nodes)
513
407
  end
514
- return info.chunk_id
515
- end
516
- rescue => e
517
- log.error "unexpected error while receiving ack message", error: e
518
- log.error_backtrace
519
- ensure
520
- if @keepalive
521
- info.node.socket_cache.dec_ref_by_value(info.sock)
522
408
  else
523
- info.sock.close_write rescue nil
524
- info.sock.close rescue nil
409
+ log.warn("Unknown heartbeat response received from #{sock.remote_host}:#{sock.remote_port}")
525
410
  end
411
+ end
526
412
 
527
- @sock_ack_waiting_mutex.synchronize do
528
- @sock_ack_waiting.delete(info)
529
- end
413
+ def on_purge_obsolete_socks
414
+ @connection_manager.purge_obsolete_socks
530
415
  end
531
416
 
532
417
  def ack_reader
@@ -536,185 +421,33 @@ module Fluent::Plugin
536
421
  @delayed_commit_timeout / 3.0
537
422
  end
538
423
 
539
- unpacker = Fluent::Engine.msgpack_unpacker
540
-
541
424
  while thread_current_running?
542
- now = Fluent::Clock.now
543
- sockets = []
544
- begin
545
- @sock_ack_waiting_mutex.synchronize do
546
- new_list = []
547
- @sock_ack_waiting.each do |info|
548
- if info.expired?(now)
549
- # There are 2 types of cases when no response has been received from socket:
550
- # (1) the node does not support sending responses
551
- # (2) the node does support sending response but responses have not arrived for some reasons.
552
- log.warn "no response from node. regard it as unavailable.", host: info.node.host, port: info.node.port
553
- info.node.disable!
554
- if @keepalive
555
- info.node.socket_cache.revoke_by_value(info.sock)
556
- end
557
- info.sock.close rescue nil
558
- rollback_write(info.chunk_id, update_retry: false)
559
- else
560
- sockets << info.sock
561
- new_list << info
562
- end
563
- end
564
- @sock_ack_waiting = new_list
565
- end
425
+ @ack_handler.collect_response(select_interval) do |chunk_id, node, sock, result|
426
+ @connection_manager.close(sock)
566
427
 
567
- readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
568
- next unless readable_sockets
428
+ case result
429
+ when AckHandler::Result::SUCCESS
430
+ commit_write(chunk_id)
431
+ when AckHandler::Result::FAILED
432
+ node.disable!
433
+ rollback_write(chunk_id, update_retry: false)
434
+ when AckHandler::Result::CHUNKID_UNMATCHED
435
+ rollback_write(chunk_id, update_retry: false)
436
+ else
437
+ log.warn("BUG: invalid status #{result} #{chunk_id}")
569
438
 
570
- readable_sockets.each do |sock|
571
- chunk_id = read_ack_from_sock(sock, unpacker)
572
- commit_write(chunk_id) if chunk_id
439
+ if chunk_id
440
+ rollback_write(chunk_id, update_retry: false)
441
+ end
573
442
  end
574
- rescue => e
575
- log.error "unexpected error while receiving ack", error: e
576
- log.error_backtrace
577
443
  end
578
444
  end
579
445
  end
580
446
 
581
447
  class Node
582
- class SocketCache
583
- TimedSocket = Struct.new(:timeout, :sock, :ref)
584
-
585
- def initialize(timeout, log)
586
- @log = log
587
- @timeout = timeout
588
- @active_socks = {}
589
- @inactive_socks = {}
590
- @mutex = Mutex.new
591
- end
592
-
593
- def revoke(key = Thread.current.object_id)
594
- @mutex.synchronize do
595
- if @active_socks[key]
596
- @inactive_socks[key] = @active_socks.delete(key)
597
- @inactive_socks[key].ref = 0
598
- end
599
- end
600
- end
601
-
602
- def clear
603
- @mutex.synchronize do
604
- @inactive_socks.values.each do |s|
605
- s.sock.close rescue nil
606
- end
607
- @inactive_socks.clear
608
-
609
- @active_socks.values.each do |s|
610
- s.sock.close rescue nil
611
- end
612
- @active_socks.clear
613
- end
614
- end
615
-
616
- def purge_obsolete_socks
617
- @mutex.synchronize do
618
- @inactive_socks.keys.each do |k|
619
- # 0 means sockets stored in this class received all acks
620
- if @inactive_socks[k].ref <= 0
621
- s = @inactive_socks.delete(k)
622
- s.sock.close rescue nil
623
- @log.debug("purged obsolete socket #{s.sock}")
624
- end
625
- end
626
-
627
- @active_socks.keys.each do |k|
628
- if expired?(k) && @active_socks[k].ref <= 0
629
- @inactive_socks[k] = @active_socks.delete(k)
630
- end
631
- end
632
- end
633
- end
634
-
635
- # We expect that `yield` returns a unique object in this class
636
- def fetch_or(key = Thread.current.object_id)
637
- @mutex.synchronize do
638
- unless @active_socks[key]
639
- @active_socks[key] = TimedSocket.new(timeout, yield, 1)
640
- @log.debug("connect new socket #{@active_socks[key]}")
641
- return @active_socks[key].sock
642
- end
643
-
644
- if expired?(key)
645
- # Do not close this socket here in case of it will be used by other place (e.g. wait for receiving ack)
646
- @inactive_socks[key] = @active_socks.delete(key)
647
- @log.debug("connection #{@inactive_socks[key]} is expired. reconnecting...")
648
- @active_socks[key] = TimedSocket.new(timeout, yield, 0)
649
- end
650
-
651
- @active_socks[key].ref += 1
652
- @active_socks[key].sock
653
- end
654
- end
655
-
656
- def dec_ref(key = Thread.current.object_id)
657
- @mutex.synchronize do
658
- if @active_socks[key]
659
- @active_socks[key].ref -= 1
660
- elsif @inactive_socks[key]
661
- @inactive_socks[key].ref -= 1
662
- else
663
- @log.warn("Not found key for dec_ref: #{key}")
664
- end
665
- end
666
- end
667
-
668
- # This method is expected to be called in class which doesn't call #inc_ref
669
- def dec_ref_by_value(val)
670
- @mutex.synchronize do
671
- sock = @active_socks.detect { |_, v| v.sock == val }
672
- if sock
673
- key = sock.first
674
- @active_socks[key].ref -= 1
675
- return
676
- end
677
-
678
- sock = @inactive_socks.detect { |_, v| v.sock == val }
679
- if sock
680
- key = sock.first
681
- @inactive_socks[key].ref -= 1
682
- return
683
- else
684
- @log.warn("Not found key for dec_ref_by_value: #{key}")
685
- end
686
- end
687
- end
688
-
689
- # This method is expected to be called in class which doesn't call #fetch_or
690
- def revoke_by_value(val)
691
- @mutex.synchronize do
692
- sock = @active_socks.detect { |_, v| v.sock == val }
693
- if sock
694
- key = sock.first
695
- @inactive_socks[key] = @active_socks.delete(key)
696
- @inactive_socks[key].ref = 0
697
- else
698
- @log.debug("Not found for revoke_by_value :#{val}")
699
- end
700
- end
701
- end
702
-
703
- private
704
-
705
- def timeout
706
- @timeout && Time.now + @timeout
707
- end
708
-
709
- # This method is thread unsafe
710
- def expired?(key = Thread.current.object_id)
711
- @active_socks[key].timeout ? @active_socks[key].timeout < Time.now : false
712
- end
713
- end
714
-
715
- # @param keepalive [Bool]
716
- # @param keepalive_timeout [Integer | nil]
717
- def initialize(sender, server, failure:, keepalive: false, keepalive_timeout: nil)
448
+ # @param connection_manager [Fluent::Plugin::ForwardOutput::ConnectionManager]
449
+ # @param ack_handler [Fluent::Plugin::ForwardOutput::AckHandler]
450
+ def initialize(sender, server, failure:, connection_manager:, ack_handler:)
718
451
  @sender = sender
719
452
  @log = sender.log
720
453
  @compress = sender.compress
@@ -737,10 +470,13 @@ module Fluent::Plugin
737
470
 
738
471
  @usock = nil
739
472
 
740
- @username = server.username
741
- @password = server.password
742
- @shared_key = server.shared_key || (sender.security && sender.security.shared_key) || ""
743
- @shared_key_salt = generate_salt
473
+ @handshake = HandshakeProtocol.new(
474
+ log: @log,
475
+ hostname: sender.security && sender.security.self_hostname,
476
+ shared_key: server.shared_key || (sender.security && sender.security.shared_key) || '',
477
+ password: server.password,
478
+ username: server.username,
479
+ )
744
480
 
745
481
  @unpacker = Fluent::Engine.msgpack_unpacker
746
482
 
@@ -748,20 +484,15 @@ module Fluent::Plugin
748
484
  @resolved_time = 0
749
485
  @resolved_once = false
750
486
 
751
- @keepalive = keepalive
752
- if @keepalive
753
- @socket_cache = SocketCache.new(keepalive_timeout, @log)
754
- end
487
+ @connection_manager = connection_manager
488
+ @ack_handler = ack_handler
755
489
  end
756
490
 
757
491
  attr_accessor :usock
758
492
 
759
493
  attr_reader :name, :host, :port, :weight, :standby, :state
760
- attr_reader :sockaddr # used by on_heartbeat
761
- attr_reader :failure, :available # for test
762
- attr_reader :socket_cache # for ack
763
-
764
- RequestInfo = Struct.new(:state, :shared_key_nonce, :auth)
494
+ attr_reader :sockaddr # used by on_udp_heatbeat_response_recv
495
+ attr_reader :failure # for test
765
496
 
766
497
  def validate_host_resolution!
767
498
  resolved_host
@@ -783,13 +514,15 @@ module Fluent::Plugin
783
514
  connect do |sock, ri|
784
515
  if ri.state != :established
785
516
  establish_connection(sock, ri)
786
- raise if ri.state != :established
517
+ if ri.state != :established
518
+ raise "Failed to establish connection to #{@host}:#{@port}"
519
+ end
787
520
  end
788
521
  end
789
522
  end
790
523
 
791
524
  def establish_connection(sock, ri)
792
- while available? && ri.state != :established
525
+ while ri.state != :established
793
526
  begin
794
527
  # TODO: On Ruby 2.2 or earlier, read_nonblock doesn't work expectedly.
795
528
  # We need rewrite around here using new socket/server plugin helper.
@@ -799,7 +532,9 @@ module Fluent::Plugin
799
532
  next
800
533
  end
801
534
  @unpacker.feed_each(buf) do |data|
802
- on_read(sock, ri, data)
535
+ if @handshake.invoke(sock, ri, data) == :established
536
+ @log.debug "connection established", host: @host, port: @port
537
+ end
803
538
  end
804
539
  rescue IO::WaitReadable
805
540
  # If the exception is Errno::EWOULDBLOCK or Errno::EAGAIN, it is extended by IO::WaitReadable.
@@ -814,17 +549,21 @@ module Fluent::Plugin
814
549
  @log.warn "disconnected", host: @host, port: @port
815
550
  disable!
816
551
  break
552
+ rescue HeloError => e
553
+ @log.warn "received invalid helo message from #{@name}"
554
+ disable!
555
+ break
556
+ rescue PingpongError => e
557
+ @log.warn "connection refused to #{@name || @host}: #{e.message}"
558
+ disable!
559
+ break
817
560
  end
818
561
  end
819
562
  end
820
563
 
821
564
  def send_data_actual(sock, tag, chunk)
822
- unless available?
823
- raise ConnectionClosedError, "failed to establish connection with node #{@name}"
824
- end
825
-
826
565
  option = { 'size' => chunk.size, 'compressed' => @compress }
827
- option['chunk'] = Base64.encode64(chunk.unique_id) if @sender.require_ack_response
566
+ option['chunk'] = Base64.encode64(chunk.unique_id) if @ack_handler
828
567
 
829
568
  # https://github.com/fluent/fluentd/wiki/Forward-Protocol-Specification-v1#packedforward-mode
830
569
  # out_forward always uses str32 type for entries.
@@ -845,48 +584,26 @@ module Fluent::Plugin
845
584
  end
846
585
 
847
586
  def send_data(tag, chunk)
848
- sock, ri = connect
849
- if ri.state != :established
850
- establish_connection(sock, ri)
851
- end
587
+ ack = @ack_handler && @ack_handler.create_ack(chunk.unique_id, self)
588
+ connect(nil, ack: ack) do |sock, ri|
589
+ if ri.state != :established
590
+ establish_connection(sock, ri)
852
591
 
853
- begin
854
- send_data_actual(sock, tag, chunk)
855
- rescue
856
- if @keepalive
857
- @socket_cache.revoke
858
- else
859
- sock.close rescue nil
592
+ if ri.state != :established
593
+ raise ConnectionClosedError, "failed to establish connection with node #{@name}"
594
+ end
860
595
  end
861
- raise
862
- end
863
596
 
864
- if @sender.require_ack_response
865
- return sock # to read ACK from socket
597
+ send_data_actual(sock, tag, chunk)
866
598
  end
867
599
 
868
- if @keepalive
869
- @socket_cache.dec_ref
870
- else
871
- sock.close_write rescue nil
872
- sock.close rescue nil
873
- end
874
600
  heartbeat(false)
875
601
  nil
876
602
  end
877
603
 
878
- def clear
879
- @keepalive && @socket_cache.clear
880
- end
881
-
882
- def purge_obsolete_socks
883
- unless @keepalive
884
- raise "Don not call this method without keepalive option"
885
- end
886
- @socket_cache.purge_obsolete_socks
887
- end
888
-
889
604
  # FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
605
+ #
606
+ # @return [Boolean] return true if it needs to rebuild nodes
890
607
  def send_heartbeat
891
608
  begin
892
609
  dest_addr = resolved_host
@@ -894,14 +611,14 @@ module Fluent::Plugin
894
611
  rescue ::SocketError => e
895
612
  if !@resolved_once && @sender.ignore_network_errors_at_startup
896
613
  @log.warn "failed to resolve node name in heartbeating", server: @name || @host, error: e
897
- return
614
+ return false
898
615
  end
899
616
  raise
900
617
  end
901
618
 
902
619
  case @sender.heartbeat_type
903
620
  when :transport
904
- connect(dest_addr) do |sock|
621
+ connect(dest_addr) do |_ri, _sock|
905
622
  ## don't send any data to not cause a compatibility problem
906
623
  # sock.write FORWARD_TCP_HEARTBEAT_DATA
907
624
 
@@ -910,8 +627,9 @@ module Fluent::Plugin
910
627
  heartbeat(true)
911
628
  end
912
629
  when :udp
913
- @usock.send "\0", 0, Socket.pack_sockaddr_in(@port, resolved_host)
914
- nil
630
+ @usock.send "\0", 0, Socket.pack_sockaddr_in(@port, dest_addr)
631
+ # response is going to receive at on_udp_heatbeat_response_recv
632
+ false
915
633
  when :none # :none doesn't use this class
916
634
  raise "BUG: heartbeat_type none must not use Node"
917
635
  else
@@ -943,14 +661,14 @@ module Fluent::Plugin
943
661
  def resolve_dns!
944
662
  addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM)
945
663
  addrinfo = @sender.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
946
- @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_heartbeat
664
+ @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_udp_heatbeat_response_recv
947
665
  addrinfo[3]
948
666
  end
949
667
  private :resolve_dns!
950
668
 
951
669
  def tick
952
670
  now = Time.now.to_f
953
- if !@available
671
+ unless available?
954
672
  if @failure.hard_timeout?(now)
955
673
  @failure.clear
956
674
  end
@@ -959,7 +677,7 @@ module Fluent::Plugin
959
677
 
960
678
  if @failure.hard_timeout?(now)
961
679
  @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, hard_timeout: true
962
- @available = false
680
+ disable!
963
681
  @resolved_host = nil # expire cached host
964
682
  @failure.clear
965
683
  return true
@@ -969,7 +687,7 @@ module Fluent::Plugin
969
687
  phi = @failure.phi(now)
970
688
  if phi > @sender.phi_threshold
971
689
  @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi, phi_threshold: @sender.phi_threshold
972
- @available = false
690
+ disable!
973
691
  @resolved_host = nil # expire cached host
974
692
  @failure.clear
975
693
  return true
@@ -981,7 +699,7 @@ module Fluent::Plugin
981
699
  def heartbeat(detect=true)
982
700
  now = Time.now.to_f
983
701
  @failure.add(now)
984
- if detect && !@available && @failure.sample_size > @sender.recover_sample_size
702
+ if detect && !available? && @failure.sample_size > @sender.recover_sample_size
985
703
  @available = true
986
704
  @log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port
987
705
  true
@@ -990,127 +708,10 @@ module Fluent::Plugin
990
708
  end
991
709
  end
992
710
 
993
- def generate_salt
994
- SecureRandom.hex(16)
995
- end
996
-
997
- def check_helo(ri, message)
998
- @log.debug "checking helo"
999
- # ['HELO', options(hash)]
1000
- unless message.size == 2 && message[0] == 'HELO'
1001
- return false
1002
- end
1003
- opts = message[1] || {}
1004
- # make shared_key_check failed (instead of error) if protocol version mismatch exist
1005
- ri.shared_key_nonce = opts['nonce'] || ''
1006
- ri.auth = opts['auth'] || ''
1007
- true
1008
- end
1009
-
1010
- def generate_ping(ri)
1011
- @log.debug "generating ping"
1012
- # ['PING', self_hostname, sharedkey\_salt, sha512\_hex(sharedkey\_salt + self_hostname + nonce + shared_key),
1013
- # username || '', sha512\_hex(auth\_salt + username + password) || '']
1014
- shared_key_hexdigest = Digest::SHA512.new.update(@shared_key_salt)
1015
- .update(@sender.security.self_hostname)
1016
- .update(ri.shared_key_nonce)
1017
- .update(@shared_key)
1018
- .hexdigest
1019
- ping = ['PING', @sender.security.self_hostname, @shared_key_salt, shared_key_hexdigest]
1020
- if !ri.auth.empty?
1021
- password_hexdigest = Digest::SHA512.new.update(ri.auth).update(@username).update(@password).hexdigest
1022
- ping.push(@username, password_hexdigest)
1023
- else
1024
- ping.push('','')
1025
- end
1026
- ping
1027
- end
1028
-
1029
- def check_pong(ri, message)
1030
- @log.debug "checking pong"
1031
- # ['PONG', bool(authentication result), 'reason if authentication failed',
1032
- # self_hostname, sha512\_hex(salt + self_hostname + nonce + sharedkey)]
1033
- unless message.size == 5 && message[0] == 'PONG'
1034
- return false, 'invalid format for PONG message'
1035
- end
1036
- _pong, auth_result, reason, hostname, shared_key_hexdigest = message
1037
-
1038
- unless auth_result
1039
- return false, 'authentication failed: ' + reason
1040
- end
1041
-
1042
- if hostname == @sender.security.self_hostname
1043
- return false, 'same hostname between input and output: invalid configuration'
1044
- end
1045
-
1046
- clientside = Digest::SHA512.new.update(@shared_key_salt).update(hostname).update(ri.shared_key_nonce).update(@shared_key).hexdigest
1047
- unless shared_key_hexdigest == clientside
1048
- return false, 'shared key mismatch'
1049
- end
1050
-
1051
- return true, nil
1052
- end
1053
-
1054
- def on_read(sock, ri, data)
1055
- @log.trace __callee__
1056
-
1057
- case ri.state
1058
- when :helo
1059
- unless check_helo(ri, data)
1060
- @log.warn "received invalid helo message from #{@name}"
1061
- disable! # shutdown
1062
- return
1063
- end
1064
- sock.write(generate_ping(ri).to_msgpack)
1065
- ri.state = :pingpong
1066
- when :pingpong
1067
- succeeded, reason = check_pong(ri, data)
1068
- unless succeeded
1069
- @log.warn "connection refused to #{@name || @host}: #{reason}"
1070
- disable! # shutdown
1071
- return
1072
- end
1073
- ri.state = :established
1074
- @log.debug "connection established", host: @host, port: @port
1075
- else
1076
- raise "BUG: unknown session state: #{ri.state}"
1077
- end
1078
- end
1079
-
1080
711
  private
1081
712
 
1082
- def connect(host = nil)
1083
- socket, request_info =
1084
- if @keepalive
1085
- ri = RequestInfo.new(:established)
1086
- sock = @socket_cache.fetch_or do
1087
- s = @sender.create_transfer_socket(host || resolved_host, port, @hostname)
1088
- ri = RequestInfo.new(@sender.security ? :helo : :established) # overwrite if new connection
1089
- s
1090
- end
1091
- [sock, ri]
1092
- else
1093
- @log.debug('connect new socket')
1094
- [@sender.create_transfer_socket(host || resolved_host, port, @hostname), RequestInfo.new(@sender.security ? :helo : :established)]
1095
- end
1096
-
1097
- if block_given?
1098
- ret = nil
1099
- begin
1100
- ret = yield(socket, request_info)
1101
- rescue
1102
- @socket_cache.revoke if @keepalive
1103
- raise
1104
- else
1105
- @socket_cache.dec_ref if @keepalive
1106
- ensure
1107
- socket.close unless @keepalive
1108
- end
1109
-
1110
- ret
1111
- else
1112
- [socket, request_info]
1113
- end
713
+ def connect(host = nil, ack: false, &block)
714
+ @connection_manager.connect(host: host || resolved_host, port: port, hostname: @hostname, ack: ack, &block)
1114
715
  end
1115
716
  end
1116
717
 
@@ -1128,68 +729,5 @@ module Fluent::Plugin
1128
729
  true
1129
730
  end
1130
731
  end
1131
-
1132
- class FailureDetector
1133
- PHI_FACTOR = 1.0 / Math.log(10.0)
1134
- SAMPLE_SIZE = 1000
1135
-
1136
- def initialize(heartbeat_interval, hard_timeout, init_last)
1137
- @heartbeat_interval = heartbeat_interval
1138
- @last = init_last
1139
- @hard_timeout = hard_timeout
1140
-
1141
- # microsec
1142
- @init_gap = (heartbeat_interval * 1e6).to_i
1143
- @window = [@init_gap]
1144
- end
1145
-
1146
- def hard_timeout?(now)
1147
- now - @last > @hard_timeout
1148
- end
1149
-
1150
- def add(now)
1151
- if @window.empty?
1152
- @window << @init_gap
1153
- @last = now
1154
- else
1155
- gap = now - @last
1156
- @window << (gap * 1e6).to_i
1157
- @window.shift if @window.length > SAMPLE_SIZE
1158
- @last = now
1159
- end
1160
- end
1161
-
1162
- def phi(now)
1163
- size = @window.size
1164
- return 0.0 if size == 0
1165
-
1166
- # Calculate weighted moving average
1167
- mean_usec = 0
1168
- fact = 0
1169
- @window.each_with_index {|gap,i|
1170
- mean_usec += gap * (1+i)
1171
- fact += (1+i)
1172
- }
1173
- mean_usec = mean_usec / fact
1174
-
1175
- # Normalize arrive intervals into 1sec
1176
- mean = (mean_usec.to_f / 1e6) - @heartbeat_interval + 1
1177
-
1178
- # Calculate phi of the phi accrual failure detector
1179
- t = now - @last - @heartbeat_interval + 1
1180
- phi = PHI_FACTOR * t / mean
1181
-
1182
- return phi
1183
- end
1184
-
1185
- def sample_size
1186
- @window.size
1187
- end
1188
-
1189
- def clear
1190
- @window.clear
1191
- @last = 0
1192
- end
1193
- end
1194
732
  end
1195
733
  end