fluentd 1.6.3 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of fluentd might be problematic. Click here for more details.

Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.drone.yml +35 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +2 -0
  4. data/CHANGELOG.md +83 -0
  5. data/README.md +5 -1
  6. data/fluentd.gemspec +3 -2
  7. data/lib/fluent/clock.rb +4 -0
  8. data/lib/fluent/compat/output.rb +3 -3
  9. data/lib/fluent/compat/socket_util.rb +1 -1
  10. data/lib/fluent/config/element.rb +3 -3
  11. data/lib/fluent/config/literal_parser.rb +1 -1
  12. data/lib/fluent/config/section.rb +4 -1
  13. data/lib/fluent/error.rb +4 -0
  14. data/lib/fluent/event.rb +28 -24
  15. data/lib/fluent/event_router.rb +2 -1
  16. data/lib/fluent/log.rb +1 -1
  17. data/lib/fluent/msgpack_factory.rb +8 -0
  18. data/lib/fluent/plugin/bare_output.rb +4 -4
  19. data/lib/fluent/plugin/buf_file.rb +10 -1
  20. data/lib/fluent/plugin/buf_file_single.rb +219 -0
  21. data/lib/fluent/plugin/buffer.rb +62 -63
  22. data/lib/fluent/plugin/buffer/chunk.rb +21 -3
  23. data/lib/fluent/plugin/buffer/file_chunk.rb +44 -12
  24. data/lib/fluent/plugin/buffer/file_single_chunk.rb +314 -0
  25. data/lib/fluent/plugin/buffer/memory_chunk.rb +2 -1
  26. data/lib/fluent/plugin/compressable.rb +10 -6
  27. data/lib/fluent/plugin/filter_grep.rb +2 -2
  28. data/lib/fluent/plugin/formatter_csv.rb +10 -6
  29. data/lib/fluent/plugin/in_syslog.rb +10 -3
  30. data/lib/fluent/plugin/in_tail.rb +7 -2
  31. data/lib/fluent/plugin/in_tcp.rb +34 -7
  32. data/lib/fluent/plugin/multi_output.rb +4 -4
  33. data/lib/fluent/plugin/out_exec_filter.rb +1 -0
  34. data/lib/fluent/plugin/out_file.rb +13 -3
  35. data/lib/fluent/plugin/out_forward.rb +144 -588
  36. data/lib/fluent/plugin/out_forward/ack_handler.rb +161 -0
  37. data/lib/fluent/plugin/out_forward/connection_manager.rb +113 -0
  38. data/lib/fluent/plugin/out_forward/error.rb +28 -0
  39. data/lib/fluent/plugin/out_forward/failure_detector.rb +84 -0
  40. data/lib/fluent/plugin/out_forward/handshake_protocol.rb +121 -0
  41. data/lib/fluent/plugin/out_forward/load_balancer.rb +111 -0
  42. data/lib/fluent/plugin/out_forward/socket_cache.rb +138 -0
  43. data/lib/fluent/plugin/out_http.rb +231 -0
  44. data/lib/fluent/plugin/output.rb +29 -35
  45. data/lib/fluent/plugin/parser.rb +77 -0
  46. data/lib/fluent/plugin/parser_csv.rb +75 -0
  47. data/lib/fluent/plugin/parser_syslog.rb +106 -3
  48. data/lib/fluent/plugin_helper/server.rb +2 -2
  49. data/lib/fluent/plugin_helper/socket.rb +14 -1
  50. data/lib/fluent/plugin_helper/thread.rb +1 -0
  51. data/lib/fluent/root_agent.rb +1 -1
  52. data/lib/fluent/time.rb +4 -2
  53. data/lib/fluent/timezone.rb +21 -7
  54. data/lib/fluent/version.rb +1 -1
  55. data/test/command/test_fluentd.rb +1 -1
  56. data/test/command/test_plugin_generator.rb +18 -2
  57. data/test/config/test_configurable.rb +78 -40
  58. data/test/counter/test_store.rb +1 -1
  59. data/test/helper.rb +1 -0
  60. data/test/helpers/process_extenstion.rb +33 -0
  61. data/test/plugin/out_forward/test_ack_handler.rb +101 -0
  62. data/test/plugin/out_forward/test_connection_manager.rb +145 -0
  63. data/test/plugin/out_forward/test_handshake_protocol.rb +103 -0
  64. data/test/plugin/out_forward/test_load_balancer.rb +60 -0
  65. data/test/plugin/out_forward/test_socket_cache.rb +139 -0
  66. data/test/plugin/test_buf_file.rb +172 -2
  67. data/test/plugin/test_buf_file_single.rb +801 -0
  68. data/test/plugin/test_buffer.rb +4 -48
  69. data/test/plugin/test_buffer_file_chunk.rb +38 -1
  70. data/test/plugin/test_buffer_file_single_chunk.rb +621 -0
  71. data/test/plugin/test_buffer_memory_chunk.rb +1 -0
  72. data/test/plugin/test_formatter_csv.rb +16 -0
  73. data/test/plugin/test_in_syslog.rb +56 -6
  74. data/test/plugin/test_in_tail.rb +1 -1
  75. data/test/plugin/test_in_tcp.rb +25 -0
  76. data/test/plugin/test_out_forward.rb +150 -201
  77. data/test/plugin/test_out_http.rb +352 -0
  78. data/test/plugin/test_output_as_buffered.rb +27 -24
  79. data/test/plugin/test_parser.rb +40 -0
  80. data/test/plugin/test_parser_csv.rb +83 -0
  81. data/test/plugin/test_parser_syslog.rb +118 -19
  82. data/test/plugin_helper/test_record_accessor.rb +1 -1
  83. data/test/test_time_formatter.rb +140 -121
  84. metadata +35 -6
@@ -43,7 +43,8 @@ module Fluent
43
43
  @chunk_bytes += @adding_bytes
44
44
 
45
45
  @adding_bytes = @adding_size = 0
46
- @modified_at = Time.now
46
+ @modified_at = Fluent::Clock.real_now
47
+ @modified_at_object = nil
47
48
  true
48
49
  end
49
50
 
@@ -64,9 +64,11 @@ module Fluent
64
64
  unused = gz.unused
65
65
  gz.finish
66
66
 
67
- break if unused.nil?
68
- adjust = unused.length
69
- io.pos -= adjust
67
+ unless unused.nil?
68
+ adjust = unused.length
69
+ io.pos -= adjust
70
+ end
71
+ break if io.eof?
70
72
  end
71
73
 
72
74
  out
@@ -80,9 +82,11 @@ module Fluent
80
82
  unused = gz.unused
81
83
  gz.finish
82
84
 
83
- break if unused.nil?
84
- adjust = unused.length
85
- input.pos -= adjust
85
+ unless unused.nil?
86
+ adjust = unused.length
87
+ input.pos -= adjust
88
+ end
89
+ break if input.eof?
86
90
  end
87
91
 
88
92
  output
@@ -110,7 +110,7 @@ module Fluent::Plugin
110
110
  end
111
111
 
112
112
  if @regexps.size > 1
113
- log.info "Top level multiple <regexp> is intepreted as 'and' condition"
113
+ log.info "Top level multiple <regexp> is interpreted as 'and' condition"
114
114
  end
115
115
  @regexps.each do |e|
116
116
  raise Fluent::ConfigError, "Duplicate key: #{e.key}" if regexp_and_conditions.key?(e.key)
@@ -118,7 +118,7 @@ module Fluent::Plugin
118
118
  end
119
119
 
120
120
  if @excludes.size > 1
121
- log.info "Top level multiple <exclude> is intepreted as 'or' condition"
121
+ log.info "Top level multiple <exclude> is interpreted as 'or' condition"
122
122
  end
123
123
  @excludes.each do |e|
124
124
  raise Fluent::ConfigError, "Duplicate key: #{e.key}" if exclude_or_conditions.key?(e.key)
@@ -33,18 +33,22 @@ module Fluent
33
33
 
34
34
  def configure(conf)
35
35
  super
36
+
36
37
  @fields = fields.select{|f| !f.empty? }
37
38
  raise ConfigError, "empty value is specified in fields parameter" if @fields.empty?
38
39
 
39
- @generate_opts = {col_sep: @delimiter, force_quotes: @force_quotes}
40
+ @generate_opts = {col_sep: @delimiter, force_quotes: @force_quotes, headers: @fields,
41
+ row_sep: @add_newline ? :auto : "".force_encoding(Encoding::ASCII_8BIT)}
42
+ # Cache CSV object per thread to avoid internal state sharing
43
+ @cache = {}
40
44
  end
41
45
 
42
46
  def format(tag, time, record)
43
- row = @fields.map do |key|
44
- record[key]
45
- end
46
- line = CSV.generate_line(row, @generate_opts)
47
- line.chomp! unless @add_newline
47
+ csv = (@cache[Thread.current] ||= CSV.new("".force_encoding(Encoding::ASCII_8BIT), @generate_opts))
48
+ line = (csv << record).string.dup
49
+ # Need manual cleanup because CSV writer doesn't provide such method.
50
+ csv.rewind
51
+ csv.truncate(0)
48
52
  line
49
53
  end
50
54
  end
@@ -199,6 +199,13 @@ module Fluent::Plugin
199
199
 
200
200
  private
201
201
 
202
+ def emit_unmatched(data, sock)
203
+ record = {"unmatched_line" => data}
204
+ record[@source_address_key] = sock.remote_addr if @source_address_key
205
+ record[@source_hostname_key] = sock.remote_host if @source_hostname_key
206
+ emit("#{@tag}.unmatched", Fluent::EventTime.now, record)
207
+ end
208
+
202
209
  def message_handler(data, sock)
203
210
  pri = nil
204
211
  text = data
@@ -206,7 +213,7 @@ module Fluent::Plugin
206
213
  m = SYSLOG_REGEXP.match(data)
207
214
  unless m
208
215
  if @emit_unmatched_lines
209
- emit("#{@tag}.unmatched", Fluent::EventTime.now, {"unmatched_line" => data})
216
+ emit_unmatched(data, sock)
210
217
  end
211
218
  log.warn "invalid syslog message: #{data.dump}"
212
219
  return
@@ -218,7 +225,7 @@ module Fluent::Plugin
218
225
  @parser.parse(text) do |time, record|
219
226
  unless time && record
220
227
  if @emit_unmatched_lines
221
- emit("#{@tag}.unmatched", Fluent::EventTime.now, {"unmatched_line" => text})
228
+ emit_unmatched(data, sock)
222
229
  end
223
230
  log.warn "failed to parse message", data: data
224
231
  return
@@ -238,7 +245,7 @@ module Fluent::Plugin
238
245
  end
239
246
  rescue => e
240
247
  if @emit_unmatched_lines
241
- emit("#{@tag}.unmatched", Fluent::EventTime.now, {"unmatched_line" => text})
248
+ emit_unmatched(data, sock)
242
249
  end
243
250
  log.error "invalid input", data: data, error: e
244
251
  log.error_backtrace
@@ -97,6 +97,10 @@ module Fluent::Plugin
97
97
  desc 'Ignore repeated permission error logs'
98
98
  config_param :ignore_repeated_permission_error, :bool, default: false
99
99
 
100
+ config_section :parse, required: false, multi: true, init: true, param_name: :parser_configs do
101
+ config_argument :usage, :string, default: 'in_tail_parser'
102
+ end
103
+
100
104
  attr_reader :paths
101
105
 
102
106
  @@pos_file_paths = {}
@@ -148,7 +152,8 @@ module Fluent::Plugin
148
152
  method(:parse_singleline)
149
153
  end
150
154
  @file_perm = system_config.file_permission || FILE_PERMISSION
151
- @parser = parser_create(conf: parser_config)
155
+ # parser is already created by parser helper
156
+ @parser = parser_create(usage: parser_config['usage'] || @parser_configs.first.usage)
152
157
  end
153
158
 
154
159
  def configure_tag
@@ -431,7 +436,7 @@ module Fluent::Plugin
431
436
  end
432
437
  }
433
438
  rescue => e
434
- log.warn line.dump, error: e.to_s
439
+ log.warn 'invalid line found', file: tail_watcher.path, line: line, error: e.to_s
435
440
  log.debug_backtrace(e.backtrace)
436
441
  end
437
442
  end
@@ -61,13 +61,15 @@ module Fluent::Plugin
61
61
  def start
62
62
  super
63
63
 
64
- server_create(:in_tcp_server, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
65
- conn.buffer << data
66
- begin
64
+ del_size = @delimiter.length
65
+ if @_extract_enabled && @_extract_tag_key
66
+ server_create(:in_tcp_server_single_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
67
+ conn.buffer << data
68
+ buf = conn.buffer
67
69
  pos = 0
68
- while i = conn.buffer.index(@delimiter, pos)
69
- msg = conn.buffer[pos...i]
70
- pos = i + @delimiter.length
70
+ while i = buf.index(@delimiter, pos)
71
+ msg = buf[pos...i]
72
+ pos = i + del_size
71
73
 
72
74
  @parser.parse(msg) do |time, record|
73
75
  unless time && record
@@ -83,7 +85,32 @@ module Fluent::Plugin
83
85
  router.emit(tag, time, record)
84
86
  end
85
87
  end
86
- conn.buffer.slice!(0, pos) if pos > 0
88
+ buf.slice!(0, pos) if pos > 0
89
+ end
90
+ else
91
+ server_create(:in_tcp_server_batch_emit, @port, bind: @bind, resolve_name: !!@source_hostname_key) do |data, conn|
92
+ conn.buffer << data
93
+ buf = conn.buffer
94
+ pos = 0
95
+ es = Fluent::MultiEventStream.new
96
+ while i = buf.index(@delimiter, pos)
97
+ msg = buf[pos...i]
98
+ pos = i + del_size
99
+
100
+ @parser.parse(msg) do |time, record|
101
+ unless time && record
102
+ log.warn "pattern not matched", message: msg
103
+ next
104
+ end
105
+
106
+ time ||= extract_time_from_record(record) || Fluent::EventTime.now
107
+ record[@source_address_key] = conn.remote_addr if @source_address_key
108
+ record[@source_hostname_key] = conn.remote_host if @source_hostname_key
109
+ es.add(time, record)
110
+ end
111
+ end
112
+ router.emit_stream(@tag, es)
113
+ buf.slice!(0, pos) if pos > 0
87
114
  end
88
115
  end
89
116
  end
@@ -44,7 +44,7 @@ module Fluent
44
44
  @outputs = []
45
45
  @outputs_statically_created = false
46
46
 
47
- @counters_monitor = Monitor.new
47
+ @counter_mutex = Mutex.new
48
48
  # TODO: well organized counters
49
49
  @num_errors = 0
50
50
  @emit_count = 0
@@ -143,12 +143,12 @@ module Fluent
143
143
  end
144
144
 
145
145
  def emit_sync(tag, es)
146
- @counters_monitor.synchronize{ @emit_count += 1 }
146
+ @counter_mutex.synchronize{ @emit_count += 1 }
147
147
  begin
148
148
  process(tag, es)
149
- @counters_monitor.synchronize{ @emit_records += es.size }
149
+ @counter_mutex.synchronize{ @emit_records += es.size }
150
150
  rescue
151
- @counters_monitor.synchronize{ @num_errors += 1 }
151
+ @counter_mutex.synchronize{ @num_errors += 1 }
152
152
  raise
153
153
  end
154
154
  end
@@ -284,6 +284,7 @@ module Fluent::Plugin
284
284
  end
285
285
 
286
286
  def run(io)
287
+ io.set_encoding(Encoding::ASCII_8BIT)
287
288
  case
288
289
  when @parser.implement?(:parse_io)
289
290
  @parser.parse_io(io, &method(:on_record))
@@ -72,6 +72,17 @@ module Fluent::Plugin
72
72
  attr_accessor :last_written_path # for tests
73
73
 
74
74
  module SymlinkBufferMixin
75
+ def metadata(timekey: nil, tag: nil, variables: nil)
76
+ metadata = super
77
+
78
+ @latest_metadata ||= new_metadata(timekey: 0)
79
+ if metadata.timekey && (metadata.timekey >= @latest_metadata.timekey)
80
+ @latest_metadata = metadata
81
+ end
82
+
83
+ metadata
84
+ end
85
+
75
86
  def output_plugin_for_symlink=(output_plugin)
76
87
  @_output_plugin_for_symlink = output_plugin
77
88
  end
@@ -86,8 +97,7 @@ module Fluent::Plugin
86
97
  # timekey will be appended into that file chunk. On the other side, resumed file chunks might NOT
87
98
  # have timekey, especially in the cases that resumed file chunks are generated by Fluentd v0.12.
88
99
  # These chunks will be enqueued immediately, and will be flushed soon.
89
- latest_metadata = metadata_list.select{|m| m.timekey }.sort_by(&:timekey).last
90
- if chunk.metadata == latest_metadata
100
+ if chunk.metadata == @latest_metadata
91
101
  sym_path = @_output_plugin_for_symlink.extract_placeholders(@_symlink_path, chunk)
92
102
  FileUtils.mkdir_p(File.dirname(sym_path), mode: @_output_plugin_for_symlink.dir_perm)
93
103
  FileUtils.ln_sf(chunk.path, sym_path)
@@ -115,7 +125,7 @@ module Fluent::Plugin
115
125
 
116
126
  if conf.has_key?('utc') || conf.has_key?('localtime')
117
127
  param_name = conf.has_key?('utc') ? 'utc' : 'localtime'
118
- log.warn "'#{param_name}' is deperecated for output plugin. This parameter is used for formatter plugin in compatibility layer. If you want to use same feature, use timekey_use_utc parameter in <buffer> directive instead"
128
+ log.warn "'#{param_name}' is deprecated for output plugin. This parameter is used for formatter plugin in compatibility layer. If you want to use same feature, use timekey_use_utc parameter in <buffer> directive instead"
119
129
  end
120
130
 
121
131
  super
@@ -20,13 +20,16 @@ require 'fluent/clock'
20
20
  require 'base64'
21
21
 
22
22
  require 'fluent/compat/socket_util'
23
+ require 'fluent/plugin/out_forward/handshake_protocol'
24
+ require 'fluent/plugin/out_forward/load_balancer'
25
+ require 'fluent/plugin/out_forward/socket_cache'
26
+ require 'fluent/plugin/out_forward/failure_detector'
27
+ require 'fluent/plugin/out_forward/error'
28
+ require 'fluent/plugin/out_forward/connection_manager'
29
+ require 'fluent/plugin/out_forward/ack_handler'
23
30
 
24
31
  module Fluent::Plugin
25
32
  class ForwardOutput < Output
26
- class Error < StandardError; end
27
- class NoNodesAvailable < Error; end
28
- class ConnectionClosedError < Error; end
29
-
30
33
  Fluent::Plugin.register_output('forward', self)
31
34
 
32
35
  helpers :socket, :server, :timer, :thread, :compat_parameters
@@ -104,6 +107,12 @@ module Fluent::Plugin
104
107
  config_param :tls_client_private_key_path, :string, default: nil
105
108
  desc 'The client private key passphrase for TLS.'
106
109
  config_param :tls_client_private_key_passphrase, :string, default: nil, secret: true
110
+ desc 'The certificate thumbprint for searching from Windows system certstore.'
111
+ config_param :tls_cert_thumbprint, :string, default: nil, secret: true
112
+ desc 'The certificate logical store name on Windows system certstore.'
113
+ config_param :tls_cert_logical_store_name, :string, default: nil
114
+ desc 'Enable to use certificate enterprise store on Windows system certstore.'
115
+ config_param :tls_cert_use_enterprise_store, :bool, default: true
107
116
  desc "Enable keepalive connection."
108
117
  config_param :keepalive, :bool, default: false
109
118
  desc "Expired time of keepalive. Default value is nil, which means to keep connection as long as possible"
@@ -154,8 +163,6 @@ module Fluent::Plugin
154
163
  @thread = nil
155
164
 
156
165
  @usock = nil
157
- @sock_ack_waiting = nil
158
- @sock_ack_waiting_mutex = nil
159
166
  @keep_alive_watcher_interval = 5 # TODO
160
167
  end
161
168
 
@@ -176,10 +183,8 @@ module Fluent::Plugin
176
183
  @heartbeat_type = :transport
177
184
  end
178
185
 
179
- if @dns_round_robin
180
- if @heartbeat_type == :udp
181
- raise Fluent::ConfigError, "forward output heartbeat type must be 'transport' or 'none' to use dns_round_robin option"
182
- end
186
+ if @dns_round_robin && @heartbeat_type == :udp
187
+ raise Fluent::ConfigError, "forward output heartbeat type must be 'transport' or 'none' to use dns_round_robin option"
183
188
  end
184
189
 
185
190
  if @transport == :tls
@@ -199,17 +204,35 @@ module Fluent::Plugin
199
204
  @tls_verify_hostname = false
200
205
  @tls_allow_self_signed_cert = true
201
206
  end
207
+
208
+ if Fluent.windows?
209
+ if (@tls_cert_path || @tls_ca_cert_path) && @tls_cert_logical_store_name
210
+ raise Fluent::ConfigError, "specified both cert path and tls_cert_logical_store_name is not permitted"
211
+ end
212
+ else
213
+ raise Fluent::ConfigError, "This parameter is for only Windows" if @tls_cert_logical_store_name
214
+ raise Fluent::ConfigError, "This parameter is for only Windows" if @tls_cert_thumbprint
215
+ end
202
216
  end
203
217
 
218
+ @ack_handler = @require_ack_response ? AckHandler.new(timeout: @ack_response_timeout, log: @log, read_length: @read_length) : nil
219
+ socket_cache = @keepalive ? SocketCache.new(@keepalive_timeout, @log) : nil
220
+ @connection_manager = ConnectionManager.new(
221
+ log: @log,
222
+ secure: !!@security,
223
+ connection_factory: method(:create_transfer_socket),
224
+ socket_cache: socket_cache,
225
+ )
226
+
204
227
  @servers.each do |server|
205
228
  failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
206
229
  name = server.name || "#{server.host}:#{server.port}"
207
230
 
208
231
  log.info "adding forwarding server '#{name}'", host: server.host, port: server.port, weight: server.weight, plugin_id: plugin_id
209
232
  if @heartbeat_type == :none
210
- @nodes << NoneHeartbeatNode.new(self, server, failure: failure, keepalive: @keepalive, keepalive_timeout: @keepalive_timeout)
233
+ @nodes << NoneHeartbeatNode.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler)
211
234
  else
212
- node = Node.new(self, server, failure: failure, keepalive: @keepalive, keepalive_timeout: @keepalive_timeout)
235
+ node = Node.new(self, server, failure: failure, connection_manager: @connection_manager, ack_handler: @ack_handler)
213
236
  begin
214
237
  node.validate_host_resolution!
215
238
  rescue => e
@@ -251,31 +274,25 @@ module Fluent::Plugin
251
274
  def start
252
275
  super
253
276
 
254
- # Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout
255
- # But it should be overwritten by ack_response_timeout to rollback chunks after timeout
256
- if @ack_response_timeout && @delayed_commit_timeout != @ack_response_timeout
257
- log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
258
- @delayed_commit_timeout = @ack_response_timeout + 2 # minimum ack_reader IO.select interval is 1s
259
- end
260
-
261
- @rand_seed = Random.new.seed
262
- rebuild_weight_array
263
- @rr = 0
277
+ @load_balancer = LoadBalancer.new(log)
278
+ @load_balancer.rebuild_weight_array(@nodes)
264
279
 
265
280
  unless @heartbeat_type == :none
266
281
  if @heartbeat_type == :udp
267
282
  @usock = socket_create_udp(@nodes.first.host, @nodes.first.port, nonblock: true)
268
- server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length) do |data, sock|
269
- sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
270
- on_heartbeat(sockaddr, data)
271
- end
283
+ server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length, &method(:on_udp_heatbeat_response_recv))
272
284
  end
273
- timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_timer))
285
+ timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_heartbeat_timer))
274
286
  end
275
287
 
276
288
  if @require_ack_response
277
- @sock_ack_waiting_mutex = Mutex.new
278
- @sock_ack_waiting = []
289
+ # Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout
290
+ # But it should be overwritten by ack_response_timeout to rollback chunks after timeout
291
+ if @delayed_commit_timeout != @ack_response_timeout
292
+ log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
293
+ @delayed_commit_timeout = @ack_response_timeout + 2 # minimum ack_reader IO.select interval is 1s
294
+ end
295
+
279
296
  thread_create(:out_forward_receiving_ack, &method(:ack_reader))
280
297
  end
281
298
 
@@ -301,22 +318,22 @@ module Fluent::Plugin
301
318
  @usock.close rescue nil
302
319
  end
303
320
 
304
- if @keepalive && @keepalive_timeout
305
- @nodes.each(&:clear)
306
- end
307
321
  super
308
322
  end
309
323
 
324
+ def stop
325
+ super
326
+
327
+ if @keepalive
328
+ @connection_manager.stop
329
+ end
330
+ end
331
+
310
332
  def write(chunk)
311
333
  return if chunk.empty?
312
334
  tag = chunk.metadata.tag
313
- select_a_healthy_node{|node| node.send_data(tag, chunk) }
314
- end
315
335
 
316
- ACKWaitingSockInfo = Struct.new(:sock, :chunk_id, :chunk_id_base64, :node, :time, :timeout) do
317
- def expired?(now)
318
- time + timeout < now
319
- end
336
+ @load_balancer.select_healthy_node { |node| node.send_data(tag, chunk) }
320
337
  end
321
338
 
322
339
  def try_write(chunk)
@@ -326,35 +343,7 @@ module Fluent::Plugin
326
343
  return
327
344
  end
328
345
  tag = chunk.metadata.tag
329
- sock, node = select_a_healthy_node{|n| n.send_data(tag, chunk) }
330
- chunk_id_base64 = Base64.encode64(chunk.unique_id)
331
- current_time = Fluent::Clock.now
332
- info = ACKWaitingSockInfo.new(sock, chunk.unique_id, chunk_id_base64, node, current_time, @ack_response_timeout)
333
- @sock_ack_waiting_mutex.synchronize do
334
- @sock_ack_waiting << info
335
- end
336
- end
337
-
338
- def select_a_healthy_node
339
- error = nil
340
-
341
- wlen = @weight_array.length
342
- wlen.times do
343
- @rr = (@rr + 1) % wlen
344
- node = @weight_array[@rr]
345
- next unless node.available?
346
-
347
- begin
348
- ret = yield node
349
- return ret, node
350
- rescue
351
- # for load balancing during detecting crashed servers
352
- error = $! # use the latest error
353
- end
354
- end
355
-
356
- raise error if error
357
- raise NoNodesAvailable, "no nodes are available"
346
+ @load_balancer.select_healthy_node { |n| n.send_data(tag, chunk) }
358
347
  end
359
348
 
360
349
  def create_transfer_socket(host, port, hostname, &block)
@@ -372,6 +361,9 @@ module Fluent::Plugin
372
361
  cert_path: @tls_client_cert_path,
373
362
  private_key_path: @tls_client_private_key_path,
374
363
  private_key_passphrase: @tls_client_private_key_passphrase,
364
+ cert_thumbprint: @tls_cert_thumbprint,
365
+ cert_logical_store_name: @tls_cert_logical_store_name,
366
+ cert_use_enterprise_store: @tls_cert_use_enterprise_store,
375
367
 
376
368
  # Enabling SO_LINGER causes data loss on Windows
377
369
  # https://github.com/fluent/fluentd/issues/1968
@@ -403,130 +395,41 @@ module Fluent::Plugin
403
395
 
404
396
  private
405
397
 
406
- def rebuild_weight_array
407
- standby_nodes, regular_nodes = @nodes.partition {|n|
408
- n.standby?
409
- }
410
-
411
- lost_weight = 0
412
- regular_nodes.each {|n|
413
- unless n.available?
414
- lost_weight += n.weight
415
- end
416
- }
417
- log.debug "rebuilding weight array", lost_weight: lost_weight
418
-
419
- if lost_weight > 0
420
- standby_nodes.each {|n|
421
- if n.available?
422
- regular_nodes << n
423
- log.warn "using standby node #{n.host}:#{n.port}", weight: n.weight
424
- lost_weight -= n.weight
425
- break if lost_weight <= 0
426
- end
427
- }
428
- end
429
-
430
- weight_array = []
431
- if regular_nodes.empty?
432
- log.warn('No nodes are available')
433
- @weight_array = weight_array
434
- return @weight_array
435
- end
436
-
437
- gcd = regular_nodes.map {|n| n.weight }.inject(0) {|r,w| r.gcd(w) }
438
- regular_nodes.each {|n|
439
- (n.weight / gcd).times {
440
- weight_array << n
441
- }
442
- }
443
-
444
- # for load balancing during detecting crashed servers
445
- coe = (regular_nodes.size * 6) / weight_array.size
446
- weight_array *= coe if coe > 1
447
-
448
- r = Random.new(@rand_seed)
449
- weight_array.sort_by! { r.rand }
450
-
451
- @weight_array = weight_array
452
- end
453
-
454
- def on_timer
455
- @nodes.each {|n|
398
+ def on_heartbeat_timer
399
+ need_rebuild = false
400
+ @nodes.each do |n|
456
401
  begin
457
402
  log.trace "sending heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type
458
403
  n.usock = @usock if @usock
459
- if n.send_heartbeat
460
- rebuild_weight_array
461
- end
404
+ need_rebuild = n.send_heartbeat || need_rebuild
462
405
  rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
463
406
  log.debug "failed to send heartbeat packet", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e
464
407
  rescue => e
465
408
  log.debug "unexpected error happen during heartbeat", host: n.host, port: n.port, heartbeat_type: @heartbeat_type, error: e
466
409
  end
467
- if n.tick
468
- rebuild_weight_array
469
- end
470
- }
471
- end
472
410
 
473
- def on_heartbeat(sockaddr, msg)
474
- if node = @nodes.find {|n| n.sockaddr == sockaddr }
475
- # log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
476
- if node.heartbeat
477
- rebuild_weight_array
478
- end
411
+ need_rebuild = n.tick || need_rebuild
479
412
  end
480
- end
481
413
 
482
- def on_purge_obsolete_socks
483
- @nodes.each(&:purge_obsolete_socks)
414
+ if need_rebuild
415
+ @load_balancer.rebuild_weight_array(@nodes)
416
+ end
484
417
  end
485
418
 
486
- # return chunk id to be committed
487
- def read_ack_from_sock(sock, unpacker)
488
- begin
489
- raw_data = sock.instance_of?(Fluent::PluginHelper::Socket::WrappedSocket::TLS) ? sock.readpartial(@read_length) : sock.recv(@read_length)
490
- rescue Errno::ECONNRESET, EOFError # ECONNRESET for #recv, #EOFError for #readpartial
491
- raw_data = ""
492
- end
493
- info = @sock_ack_waiting_mutex.synchronize{ @sock_ack_waiting.find{|i| i.sock == sock } }
494
-
495
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
496
- # If this happens we assume the data wasn't delivered and retry it.
497
- if raw_data.empty?
498
- log.warn "destination node closed the connection. regard it as unavailable.", host: info.node.host, port: info.node.port
499
- info.node.disable!
500
- rollback_write(info.chunk_id, update_retry: false)
501
- return nil
502
- else
503
- unpacker.feed(raw_data)
504
- res = unpacker.read
505
- log.trace "getting response from destination", host: info.node.host, port: info.node.port, chunk_id: dump_unique_id_hex(info.chunk_id), response: res
506
- if res['ack'] != info.chunk_id_base64
507
- # Some errors may have occurred when ack and chunk id is different, so send the chunk again.
508
- log.warn "ack in response and chunk id in sent data are different", chunk_id: dump_unique_id_hex(info.chunk_id), ack: res['ack']
509
- rollback_write(info.chunk_id, update_retry: false)
510
- return nil
511
- else
512
- log.trace "got a correct ack response", chunk_id: dump_unique_id_hex(info.chunk_id)
419
+ def on_udp_heatbeat_response_recv(data, sock)
420
+ sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
421
+ if node = @nodes.find { |n| n.sockaddr == sockaddr }
422
+ # log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
423
+ if node.heartbeat
424
+ @load_balancer.rebuild_weight_array(@nodes)
513
425
  end
514
- return info.chunk_id
515
- end
516
- rescue => e
517
- log.error "unexpected error while receiving ack message", error: e
518
- log.error_backtrace
519
- ensure
520
- if @keepalive
521
- info.node.socket_cache.dec_ref_by_value(info.sock)
522
426
  else
523
- info.sock.close_write rescue nil
524
- info.sock.close rescue nil
427
+ log.warn("Unknown heartbeat response received from #{sock.remote_host}:#{sock.remote_port}")
525
428
  end
429
+ end
526
430
 
527
- @sock_ack_waiting_mutex.synchronize do
528
- @sock_ack_waiting.delete(info)
529
- end
431
+ def on_purge_obsolete_socks
432
+ @connection_manager.purge_obsolete_socks
530
433
  end
531
434
 
532
435
  def ack_reader
@@ -536,185 +439,33 @@ module Fluent::Plugin
536
439
  @delayed_commit_timeout / 3.0
537
440
  end
538
441
 
539
- unpacker = Fluent::Engine.msgpack_unpacker
540
-
541
442
  while thread_current_running?
542
- now = Fluent::Clock.now
543
- sockets = []
544
- begin
545
- @sock_ack_waiting_mutex.synchronize do
546
- new_list = []
547
- @sock_ack_waiting.each do |info|
548
- if info.expired?(now)
549
- # There are 2 types of cases when no response has been received from socket:
550
- # (1) the node does not support sending responses
551
- # (2) the node does support sending response but responses have not arrived for some reasons.
552
- log.warn "no response from node. regard it as unavailable.", host: info.node.host, port: info.node.port
553
- info.node.disable!
554
- if @keepalive
555
- info.node.socket_cache.revoke_by_value(info.sock)
556
- end
557
- info.sock.close rescue nil
558
- rollback_write(info.chunk_id, update_retry: false)
559
- else
560
- sockets << info.sock
561
- new_list << info
562
- end
563
- end
564
- @sock_ack_waiting = new_list
565
- end
443
+ @ack_handler.collect_response(select_interval) do |chunk_id, node, sock, result|
444
+ @connection_manager.close(sock)
566
445
 
567
- readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
568
- next unless readable_sockets
446
+ case result
447
+ when AckHandler::Result::SUCCESS
448
+ commit_write(chunk_id)
449
+ when AckHandler::Result::FAILED
450
+ node.disable!
451
+ rollback_write(chunk_id, update_retry: false)
452
+ when AckHandler::Result::CHUNKID_UNMATCHED
453
+ rollback_write(chunk_id, update_retry: false)
454
+ else
455
+ log.warn("BUG: invalid status #{result} #{chunk_id}")
569
456
 
570
- readable_sockets.each do |sock|
571
- chunk_id = read_ack_from_sock(sock, unpacker)
572
- commit_write(chunk_id) if chunk_id
457
+ if chunk_id
458
+ rollback_write(chunk_id, update_retry: false)
459
+ end
573
460
  end
574
- rescue => e
575
- log.error "unexpected error while receiving ack", error: e
576
- log.error_backtrace
577
461
  end
578
462
  end
579
463
  end
580
464
 
581
465
  class Node
582
- class SocketCache
583
- TimedSocket = Struct.new(:timeout, :sock, :ref)
584
-
585
- def initialize(timeout, log)
586
- @log = log
587
- @timeout = timeout
588
- @active_socks = {}
589
- @inactive_socks = {}
590
- @mutex = Mutex.new
591
- end
592
-
593
- def revoke(key = Thread.current.object_id)
594
- @mutex.synchronize do
595
- if @active_socks[key]
596
- @inactive_socks[key] = @active_socks.delete(key)
597
- @inactive_socks[key].ref = 0
598
- end
599
- end
600
- end
601
-
602
- def clear
603
- @mutex.synchronize do
604
- @inactive_socks.values.each do |s|
605
- s.sock.close rescue nil
606
- end
607
- @inactive_socks.clear
608
-
609
- @active_socks.values.each do |s|
610
- s.sock.close rescue nil
611
- end
612
- @active_socks.clear
613
- end
614
- end
615
-
616
- def purge_obsolete_socks
617
- @mutex.synchronize do
618
- @inactive_socks.keys.each do |k|
619
- # 0 means sockets stored in this class received all acks
620
- if @inactive_socks[k].ref <= 0
621
- s = @inactive_socks.delete(k)
622
- s.sock.close rescue nil
623
- @log.debug("purged obsolete socket #{s.sock}")
624
- end
625
- end
626
-
627
- @active_socks.keys.each do |k|
628
- if expired?(k) && @active_socks[k].ref <= 0
629
- @inactive_socks[k] = @active_socks.delete(k)
630
- end
631
- end
632
- end
633
- end
634
-
635
- # We expect that `yield` returns a unique object in this class
636
- def fetch_or(key = Thread.current.object_id)
637
- @mutex.synchronize do
638
- unless @active_socks[key]
639
- @active_socks[key] = TimedSocket.new(timeout, yield, 1)
640
- @log.debug("connect new socket #{@active_socks[key]}")
641
- return @active_socks[key].sock
642
- end
643
-
644
- if expired?(key)
645
- # Do not close this socket here in case of it will be used by other place (e.g. wait for receiving ack)
646
- @inactive_socks[key] = @active_socks.delete(key)
647
- @log.debug("connection #{@inactive_socks[key]} is expired. reconnecting...")
648
- @active_socks[key] = TimedSocket.new(timeout, yield, 0)
649
- end
650
-
651
- @active_socks[key].ref += 1
652
- @active_socks[key].sock
653
- end
654
- end
655
-
656
- def dec_ref(key = Thread.current.object_id)
657
- @mutex.synchronize do
658
- if @active_socks[key]
659
- @active_socks[key].ref -= 1
660
- elsif @inactive_socks[key]
661
- @inactive_socks[key].ref -= 1
662
- else
663
- @log.warn("Not found key for dec_ref: #{key}")
664
- end
665
- end
666
- end
667
-
668
- # This method is expected to be called in class which doesn't call #inc_ref
669
- def dec_ref_by_value(val)
670
- @mutex.synchronize do
671
- sock = @active_socks.detect { |_, v| v.sock == val }
672
- if sock
673
- key = sock.first
674
- @active_socks[key].ref -= 1
675
- return
676
- end
677
-
678
- sock = @inactive_socks.detect { |_, v| v.sock == val }
679
- if sock
680
- key = sock.first
681
- @inactive_socks[key].ref -= 1
682
- return
683
- else
684
- @log.warn("Not found key for dec_ref_by_value: #{key}")
685
- end
686
- end
687
- end
688
-
689
- # This method is expected to be called in class which doesn't call #fetch_or
690
- def revoke_by_value(val)
691
- @mutex.synchronize do
692
- sock = @active_socks.detect { |_, v| v.sock == val }
693
- if sock
694
- key = sock.first
695
- @inactive_socks[key] = @active_socks.delete(key)
696
- @inactive_socks[key].ref = 0
697
- else
698
- @log.debug("Not found for revoke_by_value :#{val}")
699
- end
700
- end
701
- end
702
-
703
- private
704
-
705
- def timeout
706
- @timeout && Time.now + @timeout
707
- end
708
-
709
- # This method is thread unsafe
710
- def expired?(key = Thread.current.object_id)
711
- @active_socks[key].timeout ? @active_socks[key].timeout < Time.now : false
712
- end
713
- end
714
-
715
- # @param keepalive [Bool]
716
- # @param keepalive_timeout [Integer | nil]
717
- def initialize(sender, server, failure:, keepalive: false, keepalive_timeout: nil)
466
+ # @param connection_manager [Fluent::Plugin::ForwardOutput::ConnectionManager]
467
+ # @param ack_handler [Fluent::Plugin::ForwardOutput::AckHandler]
468
+ def initialize(sender, server, failure:, connection_manager:, ack_handler:)
718
469
  @sender = sender
719
470
  @log = sender.log
720
471
  @compress = sender.compress
@@ -737,10 +488,13 @@ module Fluent::Plugin
737
488
 
738
489
  @usock = nil
739
490
 
740
- @username = server.username
741
- @password = server.password
742
- @shared_key = server.shared_key || (sender.security && sender.security.shared_key) || ""
743
- @shared_key_salt = generate_salt
491
+ @handshake = HandshakeProtocol.new(
492
+ log: @log,
493
+ hostname: sender.security && sender.security.self_hostname,
494
+ shared_key: server.shared_key || (sender.security && sender.security.shared_key) || '',
495
+ password: server.password,
496
+ username: server.username,
497
+ )
744
498
 
745
499
  @unpacker = Fluent::Engine.msgpack_unpacker
746
500
 
@@ -748,20 +502,15 @@ module Fluent::Plugin
748
502
  @resolved_time = 0
749
503
  @resolved_once = false
750
504
 
751
- @keepalive = keepalive
752
- if @keepalive
753
- @socket_cache = SocketCache.new(keepalive_timeout, @log)
754
- end
505
+ @connection_manager = connection_manager
506
+ @ack_handler = ack_handler
755
507
  end
756
508
 
757
509
  attr_accessor :usock
758
510
 
759
511
  attr_reader :name, :host, :port, :weight, :standby, :state
760
- attr_reader :sockaddr # used by on_heartbeat
761
- attr_reader :failure, :available # for test
762
- attr_reader :socket_cache # for ack
763
-
764
- RequestInfo = Struct.new(:state, :shared_key_nonce, :auth)
512
+ attr_reader :sockaddr # used by on_udp_heatbeat_response_recv
513
+ attr_reader :failure # for test
765
514
 
766
515
  def validate_host_resolution!
767
516
  resolved_host
@@ -783,13 +532,15 @@ module Fluent::Plugin
783
532
  connect do |sock, ri|
784
533
  if ri.state != :established
785
534
  establish_connection(sock, ri)
786
- raise if ri.state != :established
535
+ if ri.state != :established
536
+ raise "Failed to establish connection to #{@host}:#{@port}"
537
+ end
787
538
  end
788
539
  end
789
540
  end
790
541
 
791
542
  def establish_connection(sock, ri)
792
- while available? && ri.state != :established
543
+ while ri.state != :established
793
544
  begin
794
545
  # TODO: On Ruby 2.2 or earlier, read_nonblock doesn't work expectedly.
795
546
  # We need rewrite around here using new socket/server plugin helper.
@@ -799,7 +550,9 @@ module Fluent::Plugin
799
550
  next
800
551
  end
801
552
  @unpacker.feed_each(buf) do |data|
802
- on_read(sock, ri, data)
553
+ if @handshake.invoke(sock, ri, data) == :established
554
+ @log.debug "connection established", host: @host, port: @port
555
+ end
803
556
  end
804
557
  rescue IO::WaitReadable
805
558
  # If the exception is Errno::EWOULDBLOCK or Errno::EAGAIN, it is extended by IO::WaitReadable.
@@ -814,17 +567,21 @@ module Fluent::Plugin
814
567
  @log.warn "disconnected", host: @host, port: @port
815
568
  disable!
816
569
  break
570
+ rescue HeloError => e
571
+ @log.warn "received invalid helo message from #{@name}"
572
+ disable!
573
+ break
574
+ rescue PingpongError => e
575
+ @log.warn "connection refused to #{@name || @host}: #{e.message}"
576
+ disable!
577
+ break
817
578
  end
818
579
  end
819
580
  end
820
581
 
821
582
  def send_data_actual(sock, tag, chunk)
822
- unless available?
823
- raise ConnectionClosedError, "failed to establish connection with node #{@name}"
824
- end
825
-
826
583
  option = { 'size' => chunk.size, 'compressed' => @compress }
827
- option['chunk'] = Base64.encode64(chunk.unique_id) if @sender.require_ack_response
584
+ option['chunk'] = Base64.encode64(chunk.unique_id) if @ack_handler
828
585
 
829
586
  # https://github.com/fluent/fluentd/wiki/Forward-Protocol-Specification-v1#packedforward-mode
830
587
  # out_forward always uses str32 type for entries.
@@ -845,48 +602,26 @@ module Fluent::Plugin
845
602
  end
846
603
 
847
604
  def send_data(tag, chunk)
848
- sock, ri = connect
849
- if ri.state != :established
850
- establish_connection(sock, ri)
851
- end
605
+ ack = @ack_handler && @ack_handler.create_ack(chunk.unique_id, self)
606
+ connect(nil, ack: ack) do |sock, ri|
607
+ if ri.state != :established
608
+ establish_connection(sock, ri)
852
609
 
853
- begin
854
- send_data_actual(sock, tag, chunk)
855
- rescue
856
- if @keepalive
857
- @socket_cache.revoke
858
- else
859
- sock.close rescue nil
610
+ if ri.state != :established
611
+ raise ConnectionClosedError, "failed to establish connection with node #{@name}"
612
+ end
860
613
  end
861
- raise
862
- end
863
614
 
864
- if @sender.require_ack_response
865
- return sock # to read ACK from socket
615
+ send_data_actual(sock, tag, chunk)
866
616
  end
867
617
 
868
- if @keepalive
869
- @socket_cache.dec_ref
870
- else
871
- sock.close_write rescue nil
872
- sock.close rescue nil
873
- end
874
618
  heartbeat(false)
875
619
  nil
876
620
  end
877
621
 
878
- def clear
879
- @keepalive && @socket_cache.clear
880
- end
881
-
882
- def purge_obsolete_socks
883
- unless @keepalive
884
- raise "Don not call this method without keepalive option"
885
- end
886
- @socket_cache.purge_obsolete_socks
887
- end
888
-
889
622
  # FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
623
+ #
624
+ # @return [Boolean] return true if it needs to rebuild nodes
890
625
  def send_heartbeat
891
626
  begin
892
627
  dest_addr = resolved_host
@@ -894,14 +629,14 @@ module Fluent::Plugin
894
629
  rescue ::SocketError => e
895
630
  if !@resolved_once && @sender.ignore_network_errors_at_startup
896
631
  @log.warn "failed to resolve node name in heartbeating", server: @name || @host, error: e
897
- return
632
+ return false
898
633
  end
899
634
  raise
900
635
  end
901
636
 
902
637
  case @sender.heartbeat_type
903
638
  when :transport
904
- connect(dest_addr) do |sock|
639
+ connect(dest_addr) do |_ri, _sock|
905
640
  ## don't send any data to not cause a compatibility problem
906
641
  # sock.write FORWARD_TCP_HEARTBEAT_DATA
907
642
 
@@ -910,8 +645,9 @@ module Fluent::Plugin
910
645
  heartbeat(true)
911
646
  end
912
647
  when :udp
913
- @usock.send "\0", 0, Socket.pack_sockaddr_in(@port, resolved_host)
914
- nil
648
+ @usock.send "\0", 0, Socket.pack_sockaddr_in(@port, dest_addr)
649
+ # response is going to receive at on_udp_heatbeat_response_recv
650
+ false
915
651
  when :none # :none doesn't use this class
916
652
  raise "BUG: heartbeat_type none must not use Node"
917
653
  else
@@ -943,14 +679,14 @@ module Fluent::Plugin
943
679
  def resolve_dns!
944
680
  addrinfo_list = Socket.getaddrinfo(@host, @port, nil, Socket::SOCK_STREAM)
945
681
  addrinfo = @sender.dns_round_robin ? addrinfo_list.sample : addrinfo_list.first
946
- @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_heartbeat
682
+ @sockaddr = Socket.pack_sockaddr_in(addrinfo[1], addrinfo[3]) # used by on_udp_heatbeat_response_recv
947
683
  addrinfo[3]
948
684
  end
949
685
  private :resolve_dns!
950
686
 
951
687
  def tick
952
688
  now = Time.now.to_f
953
- if !@available
689
+ unless available?
954
690
  if @failure.hard_timeout?(now)
955
691
  @failure.clear
956
692
  end
@@ -959,7 +695,7 @@ module Fluent::Plugin
959
695
 
960
696
  if @failure.hard_timeout?(now)
961
697
  @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, hard_timeout: true
962
- @available = false
698
+ disable!
963
699
  @resolved_host = nil # expire cached host
964
700
  @failure.clear
965
701
  return true
@@ -969,7 +705,7 @@ module Fluent::Plugin
969
705
  phi = @failure.phi(now)
970
706
  if phi > @sender.phi_threshold
971
707
  @log.warn "detached forwarding server '#{@name}'", host: @host, port: @port, phi: phi, phi_threshold: @sender.phi_threshold
972
- @available = false
708
+ disable!
973
709
  @resolved_host = nil # expire cached host
974
710
  @failure.clear
975
711
  return true
@@ -981,7 +717,7 @@ module Fluent::Plugin
981
717
  def heartbeat(detect=true)
982
718
  now = Time.now.to_f
983
719
  @failure.add(now)
984
- if detect && !@available && @failure.sample_size > @sender.recover_sample_size
720
+ if detect && !available? && @failure.sample_size > @sender.recover_sample_size
985
721
  @available = true
986
722
  @log.warn "recovered forwarding server '#{@name}'", host: @host, port: @port
987
723
  true
@@ -990,127 +726,10 @@ module Fluent::Plugin
990
726
  end
991
727
  end
992
728
 
993
- def generate_salt
994
- SecureRandom.hex(16)
995
- end
996
-
997
- def check_helo(ri, message)
998
- @log.debug "checking helo"
999
- # ['HELO', options(hash)]
1000
- unless message.size == 2 && message[0] == 'HELO'
1001
- return false
1002
- end
1003
- opts = message[1] || {}
1004
- # make shared_key_check failed (instead of error) if protocol version mismatch exist
1005
- ri.shared_key_nonce = opts['nonce'] || ''
1006
- ri.auth = opts['auth'] || ''
1007
- true
1008
- end
1009
-
1010
- def generate_ping(ri)
1011
- @log.debug "generating ping"
1012
- # ['PING', self_hostname, sharedkey\_salt, sha512\_hex(sharedkey\_salt + self_hostname + nonce + shared_key),
1013
- # username || '', sha512\_hex(auth\_salt + username + password) || '']
1014
- shared_key_hexdigest = Digest::SHA512.new.update(@shared_key_salt)
1015
- .update(@sender.security.self_hostname)
1016
- .update(ri.shared_key_nonce)
1017
- .update(@shared_key)
1018
- .hexdigest
1019
- ping = ['PING', @sender.security.self_hostname, @shared_key_salt, shared_key_hexdigest]
1020
- if !ri.auth.empty?
1021
- password_hexdigest = Digest::SHA512.new.update(ri.auth).update(@username).update(@password).hexdigest
1022
- ping.push(@username, password_hexdigest)
1023
- else
1024
- ping.push('','')
1025
- end
1026
- ping
1027
- end
1028
-
1029
- def check_pong(ri, message)
1030
- @log.debug "checking pong"
1031
- # ['PONG', bool(authentication result), 'reason if authentication failed',
1032
- # self_hostname, sha512\_hex(salt + self_hostname + nonce + sharedkey)]
1033
- unless message.size == 5 && message[0] == 'PONG'
1034
- return false, 'invalid format for PONG message'
1035
- end
1036
- _pong, auth_result, reason, hostname, shared_key_hexdigest = message
1037
-
1038
- unless auth_result
1039
- return false, 'authentication failed: ' + reason
1040
- end
1041
-
1042
- if hostname == @sender.security.self_hostname
1043
- return false, 'same hostname between input and output: invalid configuration'
1044
- end
1045
-
1046
- clientside = Digest::SHA512.new.update(@shared_key_salt).update(hostname).update(ri.shared_key_nonce).update(@shared_key).hexdigest
1047
- unless shared_key_hexdigest == clientside
1048
- return false, 'shared key mismatch'
1049
- end
1050
-
1051
- return true, nil
1052
- end
1053
-
1054
- def on_read(sock, ri, data)
1055
- @log.trace __callee__
1056
-
1057
- case ri.state
1058
- when :helo
1059
- unless check_helo(ri, data)
1060
- @log.warn "received invalid helo message from #{@name}"
1061
- disable! # shutdown
1062
- return
1063
- end
1064
- sock.write(generate_ping(ri).to_msgpack)
1065
- ri.state = :pingpong
1066
- when :pingpong
1067
- succeeded, reason = check_pong(ri, data)
1068
- unless succeeded
1069
- @log.warn "connection refused to #{@name || @host}: #{reason}"
1070
- disable! # shutdown
1071
- return
1072
- end
1073
- ri.state = :established
1074
- @log.debug "connection established", host: @host, port: @port
1075
- else
1076
- raise "BUG: unknown session state: #{ri.state}"
1077
- end
1078
- end
1079
-
1080
729
  private
1081
730
 
1082
- def connect(host = nil)
1083
- socket, request_info =
1084
- if @keepalive
1085
- ri = RequestInfo.new(:established)
1086
- sock = @socket_cache.fetch_or do
1087
- s = @sender.create_transfer_socket(host || resolved_host, port, @hostname)
1088
- ri = RequestInfo.new(@sender.security ? :helo : :established) # overwrite if new connection
1089
- s
1090
- end
1091
- [sock, ri]
1092
- else
1093
- @log.debug('connect new socket')
1094
- [@sender.create_transfer_socket(host || resolved_host, port, @hostname), RequestInfo.new(@sender.security ? :helo : :established)]
1095
- end
1096
-
1097
- if block_given?
1098
- ret = nil
1099
- begin
1100
- ret = yield(socket, request_info)
1101
- rescue
1102
- @socket_cache.revoke if @keepalive
1103
- raise
1104
- else
1105
- @socket_cache.dec_ref if @keepalive
1106
- ensure
1107
- socket.close unless @keepalive
1108
- end
1109
-
1110
- ret
1111
- else
1112
- [socket, request_info]
1113
- end
731
+ def connect(host = nil, ack: false, &block)
732
+ @connection_manager.connect(host: host || resolved_host, port: port, hostname: @hostname, ack: ack, &block)
1114
733
  end
1115
734
  end
1116
735
 
@@ -1128,68 +747,5 @@ module Fluent::Plugin
1128
747
  true
1129
748
  end
1130
749
  end
1131
-
1132
- class FailureDetector
1133
- PHI_FACTOR = 1.0 / Math.log(10.0)
1134
- SAMPLE_SIZE = 1000
1135
-
1136
- def initialize(heartbeat_interval, hard_timeout, init_last)
1137
- @heartbeat_interval = heartbeat_interval
1138
- @last = init_last
1139
- @hard_timeout = hard_timeout
1140
-
1141
- # microsec
1142
- @init_gap = (heartbeat_interval * 1e6).to_i
1143
- @window = [@init_gap]
1144
- end
1145
-
1146
- def hard_timeout?(now)
1147
- now - @last > @hard_timeout
1148
- end
1149
-
1150
- def add(now)
1151
- if @window.empty?
1152
- @window << @init_gap
1153
- @last = now
1154
- else
1155
- gap = now - @last
1156
- @window << (gap * 1e6).to_i
1157
- @window.shift if @window.length > SAMPLE_SIZE
1158
- @last = now
1159
- end
1160
- end
1161
-
1162
- def phi(now)
1163
- size = @window.size
1164
- return 0.0 if size == 0
1165
-
1166
- # Calculate weighted moving average
1167
- mean_usec = 0
1168
- fact = 0
1169
- @window.each_with_index {|gap,i|
1170
- mean_usec += gap * (1+i)
1171
- fact += (1+i)
1172
- }
1173
- mean_usec = mean_usec / fact
1174
-
1175
- # Normalize arrive intervals into 1sec
1176
- mean = (mean_usec.to_f / 1e6) - @heartbeat_interval + 1
1177
-
1178
- # Calculate phi of the phi accrual failure detector
1179
- t = now - @last - @heartbeat_interval + 1
1180
- phi = PHI_FACTOR * t / mean
1181
-
1182
- return phi
1183
- end
1184
-
1185
- def sample_size
1186
- @window.size
1187
- end
1188
-
1189
- def clear
1190
- @window.clear
1191
- @last = 0
1192
- end
1193
- end
1194
750
  end
1195
751
  end