fluentd 0.14.9 → 0.14.10

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of fluentd might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -0
  3. data/ChangeLog +44 -0
  4. data/appveyor.yml +1 -0
  5. data/code-of-conduct.md +3 -0
  6. data/fluentd.gemspec +1 -1
  7. data/lib/fluent/command/cat.rb +11 -3
  8. data/lib/fluent/compat/output.rb +6 -3
  9. data/lib/fluent/compat/parser.rb +2 -0
  10. data/lib/fluent/config/section.rb +1 -1
  11. data/lib/fluent/env.rb +1 -1
  12. data/lib/fluent/plugin/filter_record_transformer.rb +12 -30
  13. data/lib/fluent/plugin/in_forward.rb +50 -169
  14. data/lib/fluent/plugin/in_monitor_agent.rb +8 -4
  15. data/lib/fluent/plugin/in_syslog.rb +13 -7
  16. data/lib/fluent/plugin/in_tail.rb +29 -14
  17. data/lib/fluent/plugin/in_tcp.rb +54 -14
  18. data/lib/fluent/plugin/in_udp.rb +49 -13
  19. data/lib/fluent/plugin/out_file.rb +30 -14
  20. data/lib/fluent/plugin/out_forward.rb +199 -173
  21. data/lib/fluent/plugin/output.rb +71 -46
  22. data/lib/fluent/plugin/parser_json.rb +1 -1
  23. data/lib/fluent/plugin_helper.rb +2 -0
  24. data/lib/fluent/plugin_helper/event_loop.rb +24 -6
  25. data/lib/fluent/plugin_helper/inject.rb +12 -1
  26. data/lib/fluent/plugin_helper/server.rb +494 -0
  27. data/lib/fluent/plugin_helper/socket.rb +101 -0
  28. data/lib/fluent/plugin_helper/socket_option.rb +84 -0
  29. data/lib/fluent/plugin_helper/timer.rb +1 -0
  30. data/lib/fluent/test/driver/base.rb +45 -13
  31. data/lib/fluent/version.rb +1 -1
  32. data/lib/fluent/winsvc.rb +1 -1
  33. data/test/compat/test_parser.rb +10 -0
  34. data/test/config/test_configurable.rb +20 -0
  35. data/test/helper.rb +36 -1
  36. data/test/plugin/test_filter_record_transformer.rb +31 -103
  37. data/test/plugin/test_in_forward.rb +13 -75
  38. data/test/plugin/test_in_monitor_agent.rb +65 -35
  39. data/test/plugin/test_in_syslog.rb +39 -3
  40. data/test/plugin/test_in_tcp.rb +78 -62
  41. data/test/plugin/test_in_udp.rb +101 -80
  42. data/test/plugin/test_out_file.rb +17 -0
  43. data/test/plugin/test_out_forward.rb +155 -125
  44. data/test/plugin/test_output_as_buffered.rb +4 -2
  45. data/test/plugin_helper/test_inject.rb +21 -0
  46. data/test/plugin_helper/test_server.rb +905 -0
  47. data/test/test_event_time.rb +3 -1
  48. data/test/test_output.rb +30 -1
  49. data/test/test_test_drivers.rb +5 -2
  50. metadata +19 -6
@@ -18,8 +18,7 @@ require 'json'
18
18
  require 'webrick'
19
19
  require 'cgi'
20
20
 
21
- require 'cool.io'
22
-
21
+ require 'fluent/config/types'
23
22
  require 'fluent/plugin/input'
24
23
  require 'fluent/plugin/output'
25
24
  require 'fluent/plugin/multi_output'
@@ -35,6 +34,7 @@ module Fluent::Plugin
35
34
  config_param :port, :integer, default: 24220
36
35
  config_param :tag, :string, default: nil
37
36
  config_param :emit_interval, :time, default: 60
37
+ config_param :include_config, :bool, default: true
38
38
 
39
39
  class MonitorServlet < WEBrick::HTTPServlet::AbstractServlet
40
40
  def initialize(server, agent)
@@ -78,12 +78,16 @@ module Fluent::Plugin
78
78
 
79
79
  # if ?debug=1 is set, set :with_debug_info for get_monitor_info
80
80
  # and :pretty_json for render_json_error
81
- opts = {}
81
+ opts = {with_config: @agent.include_config}
82
82
  if s = qs['debug'] and s[0]
83
83
  opts[:with_debug_info] = true
84
84
  opts[:pretty_json] = true
85
85
  end
86
86
 
87
+ if with_config = get_search_parameter(qs, 'with_config'.freeze)
88
+ opts[:with_config] = Fluent::Config.bool_value(with_config)
89
+ end
90
+
87
91
  if tag = get_search_parameter(qs, 'tag'.freeze)
88
92
  # ?tag= to search an output plugin by match pattern
89
93
  if obj = @agent.plugin_info_by_tag(tag, opts)
@@ -329,7 +333,7 @@ module Fluent::Plugin
329
333
  obj['plugin_id'] = pe.plugin_id
330
334
  obj['plugin_category'] = plugin_category(pe)
331
335
  obj['type'] = pe.config['@type']
332
- obj['config'] = pe.config if !opts.has_key?(:with_config) || opts[:with_config]
336
+ obj['config'] = pe.config if opts[:with_config]
333
337
 
334
338
  # run MONITOR_INFO in plugins' instance context and store the info to obj
335
339
  MONITOR_INFO.each_pair {|key,code|
@@ -85,6 +85,10 @@ module Fluent::Plugin
85
85
  config_param :include_source_host, :bool, default: false
86
86
  desc 'Specify key of source host when include_source_host is true.'
87
87
  config_param :source_host_key, :string, default: 'source_host'.freeze
88
+ desc 'The field name of the priority.'
89
+ config_param :priority_key, :string, default: nil
90
+ desc 'The field name of the facility.'
91
+ config_param :facility_key, :string, default: nil
88
92
  config_param :blocking_timeout, :time, default: 0.5
89
93
  config_param :message_length_limit, :size, default: 2048
90
94
 
@@ -141,8 +145,15 @@ module Fluent::Plugin
141
145
  end
142
146
 
143
147
  pri ||= record.delete('pri')
148
+ facility = FACILITY_MAP[pri >> 3]
149
+ priority = PRIORITY_MAP[pri & 0b111]
150
+
151
+ record[@priority_key] = priority if @priority_key
152
+ record[@facility_key] = facility if @facility_key
144
153
  record[@source_host_key] = addr[2] if @include_source_host
145
- emit(pri, time, record)
154
+
155
+ tag = "#{@tag}.#{facility}.#{priority}"
156
+ emit(tag, time, record)
146
157
  end
147
158
  rescue => e
148
159
  log.error "invalid input", data: data, error: e
@@ -168,12 +179,7 @@ module Fluent::Plugin
168
179
  end
169
180
  end
170
181
 
171
- def emit(pri, time, record)
172
- facility = FACILITY_MAP[pri >> 3]
173
- priority = PRIORITY_MAP[pri & 0b111]
174
-
175
- tag = "#{@tag}.#{facility}.#{priority}"
176
-
182
+ def emit(tag, time, record)
177
183
  router.emit(tag, time, record)
178
184
  rescue => e
179
185
  log.error "syslog failed to emit", error: e, tag: tag, record: Yajl.dump(record)
@@ -164,6 +164,12 @@ module Fluent::Plugin
164
164
  super
165
165
  end
166
166
 
167
+ def close
168
+ super
169
+ # close file handles after all threads stopped (in #close of thread plugin helper)
170
+ close_watcher_handles
171
+ end
172
+
167
173
  def expand_paths
168
174
  date = Time.now
169
175
  paths = []
@@ -234,18 +240,27 @@ module Fluent::Plugin
234
240
 
235
241
  def stop_watchers(paths, immediate = false, unwatched = false)
236
242
  paths.each { |path|
237
- tw = @tails.delete(path)
243
+ tw = @tails[path]
238
244
  if tw
239
245
  tw.unwatched = unwatched
240
246
  if immediate
241
- close_watcher(tw, false)
247
+ detach_watcher(tw, false)
242
248
  else
243
- close_watcher_after_rotate_wait(tw)
249
+ detach_watcher_after_rotate_wait(tw)
244
250
  end
245
251
  end
246
252
  }
247
253
  end
248
254
 
255
+ def close_watcher_handles
256
+ @tails.keys.each do |path|
257
+ tw = @tails.delete(path)
258
+ if tw
259
+ tw.close
260
+ end
261
+ end
262
+ end
263
+
249
264
  # refresh_watchers calls @tails.keys so we don't use stop_watcher -> start_watcher sequence for safety.
250
265
  def update_watcher(path, pe)
251
266
  if @pf
@@ -256,24 +271,25 @@ module Fluent::Plugin
256
271
  end
257
272
  rotated_tw = @tails[path]
258
273
  @tails[path] = setup_watcher(path, pe)
259
- close_watcher_after_rotate_wait(rotated_tw) if rotated_tw
274
+ detach_watcher_after_rotate_wait(rotated_tw) if rotated_tw
260
275
  end
261
276
 
262
277
  # TailWatcher#close is called by another thread at shutdown phase.
263
278
  # It causes 'can't modify string; temporarily locked' error in IOHandler
264
279
  # so adding close_io argument to avoid this problem.
265
280
  # At shutdown, IOHandler's io will be released automatically after detached the event loop
266
- def close_watcher(tw, close_io = true)
267
- tw.close(close_io)
281
+ def detach_watcher(tw, close_io = true)
282
+ tw.detach
283
+ tw.close if close_io
268
284
  flush_buffer(tw)
269
285
  if tw.unwatched && @pf
270
286
  @pf[tw.path].update_pos(PositionFile::UNWATCHED_POSITION)
271
287
  end
272
288
  end
273
289
 
274
- def close_watcher_after_rotate_wait(tw)
290
+ def detach_watcher_after_rotate_wait(tw)
275
291
  timer_execute(:in_tail_close_watcher, @rotate_wait, repeat: false) do
276
- close_watcher(tw)
292
+ detach_watcher(tw)
277
293
  end
278
294
  end
279
295
 
@@ -435,14 +451,13 @@ module Fluent::Plugin
435
451
  def detach
436
452
  @timer_trigger.detach if @enable_watch_timer && @timer_trigger.attached?
437
453
  @stat_trigger.detach if @stat_trigger.attached?
454
+ @io_handler.on_notify if @io_handler
438
455
  end
439
456
 
440
- def close(close_io = true)
441
- if close_io && @io_handler
442
- @io_handler.on_notify
457
+ def close
458
+ if @io_handler
443
459
  @io_handler.close
444
460
  end
445
- detach
446
461
  end
447
462
 
448
463
  def on_notify
@@ -570,8 +585,8 @@ module Fluent::Plugin
570
585
  else
571
586
  @buffer << @io.readpartial(2048, @iobuf)
572
587
  end
573
- while line = @buffer.slice!(/.*?\n/m)
574
- @lines << line
588
+ while idx = @buffer.index("\n".freeze)
589
+ @lines << @buffer.slice!(0, idx + 1)
575
590
  end
576
591
  if @lines.size >= @read_lines_limit
577
592
  # not to use too much memory in case the file is very large
@@ -14,28 +14,68 @@
14
14
  # limitations under the License.
15
15
  #
16
16
 
17
- require 'cool.io'
17
+ require 'fluent/plugin/input'
18
18
 
19
- require 'fluent/plugin/socket_util'
19
+ module Fluent::Plugin
20
+ class TcpInput < Input
21
+ Fluent::Plugin.register_input('tcp', self)
20
22
 
21
- module Fluent
22
- class TcpInput < SocketUtil::BaseInput
23
- Plugin.register_input('tcp', self)
23
+ helpers :server, :parser, :extract, :compat_parameters
24
+
25
+ desc 'Tag of output events.'
26
+ config_param :tag, :string
27
+ desc 'The port to listen to.'
28
+ config_param :port, :integer, default: 5170
29
+ desc 'The bind address to listen to.'
30
+ config_param :bind, :string, default: '0.0.0.0'
31
+
32
+ desc "The field name of the client's hostname."
33
+ config_param :source_host_key, :string, default: nil, deprecated: "use source_hostname_key instead."
34
+ desc "The field name of the client's hostname."
35
+ config_param :source_hostname_key, :string, default: nil
36
+
37
+ config_param :blocking_timeout, :time, default: 0.5
24
38
 
25
- config_set_default :port, 5170
26
39
  desc 'The payload is read up to this character.'
27
40
  config_param :delimiter, :string, default: "\n" # syslog family add "\n" to each message and this seems only way to split messages in tcp stream
28
41
 
29
- def listen(callback)
30
- log.info "listening tcp socket on #{@bind}:#{@port}"
42
+ def configure(conf)
43
+ compat_parameters_convert(conf, :parser)
44
+ super
45
+ @_event_loop_blocking_timeout = @blocking_timeout
46
+ @source_hostname_key ||= @source_host_key if @source_host_key
47
+
48
+ @parser = parser_create
49
+ end
50
+
51
+ def start
52
+ super
53
+
54
+ @buffer = ''
55
+ server_create(:in_tcp_server, @port, proto: :tcp, bind: @bind) do |data, conn|
56
+ @buffer << data
57
+ begin
58
+ pos = 0
59
+ while i = @buffer.index(@delimiter, pos)
60
+ msg = @buffer[pos...i]
61
+ pos = i + @delimiter.length
62
+
63
+ @parser.parse(msg) do |time, record|
64
+ unless time && record
65
+ log.warn "pattern not match", message: msg
66
+ next
67
+ end
31
68
 
32
- socket_manager_path = ENV['SERVERENGINE_SOCKETMANAGER_PATH']
33
- if Fluent.windows?
34
- socket_manager_path = socket_manager_path.to_i
69
+ tag = extract_tag_from_record(record)
70
+ tag ||= @tag
71
+ time ||= extract_time_from_record(record) || Fluent::EventTime.now
72
+ record[@source_hostname_key] = conn.remote_host if @source_hostname_key
73
+ router.emit(tag, time, record)
74
+ end
75
+ end
76
+ @buffer.slice!(0, pos) if pos > 0
77
+ end
35
78
  end
36
- client = ServerEngine::SocketManager::Client.new(socket_manager_path)
37
- lsock = client.listen_tcp(@bind, @port)
38
- Coolio::TCPServer.new(lsock, nil, SocketUtil::TcpHandler, log, @delimiter, callback)
39
79
  end
40
80
  end
41
81
  end
@@ -14,24 +14,60 @@
14
14
  # limitations under the License.
15
15
  #
16
16
 
17
- require 'fluent/plugin/socket_util'
17
+ require 'fluent/plugin/input'
18
18
 
19
- module Fluent
20
- class UdpInput < SocketUtil::BaseInput
21
- Plugin.register_input('udp', self)
19
+ module Fluent::Plugin
20
+ class UdpInput < Input
21
+ Fluent::Plugin.register_input('udp', self)
22
+
23
+ helpers :server, :parser, :extract, :compat_parameters
24
+
25
+ desc 'Tag of output events.'
26
+ config_param :tag, :string
27
+ desc 'The port to listen to.'
28
+ config_param :port, :integer, default: 5160
29
+ desc 'The bind address to listen to.'
30
+ config_param :bind, :string, default: '0.0.0.0'
31
+
32
+ desc "The field name of the client's hostname."
33
+ config_param :source_host_key, :string, default: nil, deprecated: "use source_hostname_key instead."
34
+ desc "The field name of the client's hostname."
35
+ config_param :source_hostname_key, :string, default: nil
22
36
 
23
- config_set_default :port, 5160
24
37
  config_param :body_size_limit, :size, default: 4096
25
38
 
26
- def listen(callback)
27
- log.info "listening udp socket on #{@bind}:#{@port}"
28
- socket_manager_path = ENV['SERVERENGINE_SOCKETMANAGER_PATH']
29
- if Fluent.windows?
30
- socket_manager_path = socket_manager_path.to_i
39
+ config_param :blocking_timeout, :time, default: 0.5
40
+
41
+ def configure(conf)
42
+ compat_parameters_convert(conf, :parser)
43
+ super
44
+ @_event_loop_blocking_timeout = @blocking_timeout
45
+ @source_hostname_key ||= @source_host_key if @source_host_key
46
+
47
+ @parser = parser_create
48
+ end
49
+
50
+ def start
51
+ super
52
+
53
+ log.info "listening udp socket", bind: @bind, port: @port
54
+ server_create(:in_udp_server, @port, proto: :udp, bind: @bind, max_bytes: @body_size_limit) do |data, sock|
55
+ data.chomp!
56
+ begin
57
+ @parser.parse(data) do |time, record|
58
+ unless time && record
59
+ log.warn "pattern not match", data: data
60
+ next
61
+ end
62
+
63
+ tag = extract_tag_from_record(record)
64
+ tag ||= @tag
65
+ time ||= extract_time_from_record(record) || Fluent::EventTime.now
66
+ record[@source_hostname_key] = sock.remote_host if @source_hostname_key
67
+ router.emit(tag, time, record)
68
+ end
69
+ end
31
70
  end
32
- client = ServerEngine::SocketManager::Client.new(socket_manager_path)
33
- @usock = client.listen_udp(@bind, @port)
34
- SocketUtil::UdpHandler.new(@usock, log, @body_size_limit, callback)
35
71
  end
36
72
  end
37
73
  end
@@ -113,25 +113,41 @@ module Fluent::Plugin
113
113
  end
114
114
 
115
115
  path_suffix = @add_path_suffix ? @path_suffix : ''
116
- @path_template = generate_path_template(@path, @buffer_config.timekey, @append, @compress_method, path_suffix: path_suffix, time_slice_format: configured_time_slice_format)
117
-
118
- placeholder_validate!(:path, @path_template)
119
-
120
- max_tag_index = get_placeholders_tag(@path_template).max || 1
121
- max_tag_index = 1 if max_tag_index < 1
122
- dummy_tag = (['a'] * max_tag_index).join('.')
123
- dummy_record_keys = get_placeholders_keys(@path_template) || ['message']
124
- dummy_record = Hash[dummy_record_keys.zip(['data'] * dummy_record_keys.size)]
125
-
126
- test_meta1 = metadata_for_test(dummy_tag, Fluent::Engine.now, dummy_record)
127
- test_path = extract_placeholders(@path_template, test_meta1)
128
- unless ::Fluent::FileUtil.writable_p?(test_path)
129
- raise Fluent::ConfigError, "out_file: `#{test_path}` is not writable"
116
+ path_timekey = if @chunk_key_time
117
+ @as_secondary ? @primary_instance.buffer_config.timekey : @buffer_config.timekey
118
+ else
119
+ nil
120
+ end
121
+ @path_template = generate_path_template(@path, path_timekey, @append, @compress_method, path_suffix: path_suffix, time_slice_format: configured_time_slice_format)
122
+
123
+ if @as_secondary
124
+ # When this plugin is configured as secondary & primary plugin has tag key, but this plugin may not have it.
125
+ # Increment placeholder can make another output file per chunk tag/keys even if original path doesn't include it.
126
+ placeholder_validators(:path, @path_template).select{|v| v.type == :time }.each do |v|
127
+ v.validate!
128
+ end
129
+ else
130
+ placeholder_validate!(:path, @path_template)
131
+
132
+ max_tag_index = get_placeholders_tag(@path_template).max || 1
133
+ max_tag_index = 1 if max_tag_index < 1
134
+ dummy_tag = (['a'] * max_tag_index).join('.')
135
+ dummy_record_keys = get_placeholders_keys(@path_template) || ['message']
136
+ dummy_record = Hash[dummy_record_keys.zip(['data'] * dummy_record_keys.size)]
137
+
138
+ test_meta1 = metadata_for_test(dummy_tag, Fluent::Engine.now, dummy_record)
139
+ test_path = extract_placeholders(@path_template, test_meta1)
140
+ unless ::Fluent::FileUtil.writable_p?(test_path)
141
+ raise Fluent::ConfigError, "out_file: `#{test_path}` is not writable"
142
+ end
130
143
  end
131
144
 
132
145
  @formatter = formatter_create
133
146
 
134
147
  if @symlink_path && @buffer.respond_to?(:path)
148
+ if @as_secondary
149
+ raise Fluent::ConfigError, "symlink_path option is unavailable in <secondary>: consider to use secondary_file plugin"
150
+ end
135
151
  if Fluent.windows?
136
152
  log.warn "symlink_path is unavailable on Windows platform. disabled."
137
153
  @symlink_path = nil
@@ -23,16 +23,21 @@ require 'fluent/compat/socket_util'
23
23
  module Fluent::Plugin
24
24
  class ForwardOutput < Output
25
25
  class Error < StandardError; end
26
- class ResponseError < Error; end
26
+ class NoNodesAvailable < Error; end
27
27
  class ConnectionClosedError < Error; end
28
- class ACKTimeoutError < Error; end
29
28
 
30
29
  Fluent::Plugin.register_output('forward', self)
31
30
 
31
+ helpers :socket, :server, :timer, :thread, :compat_parameters
32
+
32
33
  LISTEN_PORT = 24224
33
34
 
35
+ PROCESS_CLOCK_ID = Process::CLOCK_MONOTONIC_RAW rescue Process::CLOCK_MONOTONIC
36
+
34
37
  desc 'The timeout time when sending event logs.'
35
38
  config_param :send_timeout, :time, default: 60
39
+ # TODO: add linger_timeout, recv_timeout
40
+
36
41
  desc 'The transport protocol to use for heartbeats.(udp,tcp,none)'
37
42
  config_param :heartbeat_type, :enum, list: [:tcp, :udp, :none], default: :tcp
38
43
  desc 'The interval of the heartbeat packer.'
@@ -41,8 +46,6 @@ module Fluent::Plugin
41
46
  config_param :recover_wait, :time, default: 10
42
47
  desc 'The hard timeout used to detect server failure.'
43
48
  config_param :hard_timeout, :time, default: 60
44
- desc 'Set TTL to expire DNS cache in seconds.'
45
- config_param :expire_dns_cache, :time, default: nil # 0 means disable cache
46
49
  desc 'The threshold parameter used to detect server faults.'
47
50
  config_param :phi_threshold, :integer, default: 16
48
51
  desc 'Use the "Phi accrual failure detector" to detect server failure.'
@@ -50,14 +53,20 @@ module Fluent::Plugin
50
53
 
51
54
  desc 'Change the protocol to at-least-once.'
52
55
  config_param :require_ack_response, :bool, default: false # require in_forward to respond with ack
56
+
57
+ ## The reason of default value of :ack_response_timeout:
58
+ # Linux default tcp_syn_retries is 5 (in many environment)
59
+ # 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec)
53
60
  desc 'This option is used when require_ack_response is true.'
54
61
  config_param :ack_response_timeout, :time, default: 190
55
- desc 'Reading data size from server'
56
- config_param :read_length, :size, default: 512 # 512bytes
62
+
57
63
  desc 'The interval while reading data from server'
58
64
  config_param :read_interval_msec, :integer, default: 50 # 50ms
59
- # Linux default tcp_syn_retries is 5 (in many environment)
60
- # 3 + 6 + 12 + 24 + 48 + 96 -> 189 (sec)
65
+ desc 'Reading data size from server'
66
+ config_param :read_length, :size, default: 512 # 512bytes
67
+
68
+ desc 'Set TTL to expire DNS cache in seconds.'
69
+ config_param :expire_dns_cache, :time, default: nil # 0 means disable cache
61
70
  desc 'Enable client-side DNS round robin.'
62
71
  config_param :dns_round_robin, :bool, default: false # heartbeat_type 'udp' is not available for this
63
72
 
@@ -107,10 +116,15 @@ module Fluent::Plugin
107
116
  @nodes = [] #=> [Node]
108
117
  @loop = nil
109
118
  @thread = nil
110
- @finished = false
119
+
120
+ @usock = nil
121
+ @sock_ack_waiting = nil
122
+ @sock_ack_waiting_mutex = nil
111
123
  end
112
124
 
113
125
  def configure(conf)
126
+ compat_parameters_convert(conf, :buffer, default_chunk_key: 'tag')
127
+
114
128
  super
115
129
 
116
130
  unless @chunk_key_tag
@@ -153,79 +167,104 @@ module Fluent::Plugin
153
167
  raise Fluent::ConfigError, "ack_response_timeout must be a positive integer" if @ack_response_timeout < 1
154
168
  end
155
169
 
170
+ def prefer_delayed_commit
171
+ @require_ack_response
172
+ end
173
+
156
174
  def start
157
175
  super
158
176
 
177
+ # Output#start sets @delayed_commit_timeout by @buffer_config.delayed_commit_timeout
178
+ # But it should be overwritten by ack_response_timeout to rollback chunks after timeout
179
+ if @ack_response_timeout && @delayed_commit_timeout != @ack_response_timeout
180
+ log.info "delayed_commit_timeout is overwritten by ack_response_timeout"
181
+ @delayed_commit_timeout = @ack_response_timeout
182
+ end
183
+
159
184
  @rand_seed = Random.new.seed
160
185
  rebuild_weight_array
161
186
  @rr = 0
162
- @usock = nil
163
187
 
164
188
  unless @heartbeat_type == :none
165
- @loop = Coolio::Loop.new
166
-
167
189
  if @heartbeat_type == :udp
168
- # assuming all hosts use udp
169
- @usock = Fluent::Compat::SocketUtil.create_udp_socket(@nodes.first.host)
170
- @usock.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK)
171
- @hb = HeartbeatHandler.new(@usock, method(:on_heartbeat))
172
- @loop.attach(@hb)
190
+ @usock = socket_create_udp(@nodes.first.host, @nodes.first.port, nonblock: true)
191
+ server_create_udp(:out_forward_heartbeat_receiver, 0, socket: @usock, max_bytes: @read_length) do |data, sock|
192
+ sockaddr = Socket.pack_sockaddr_in(sock.remote_port, sock.remote_host)
193
+ on_heartbeat(sockaddr, data)
194
+ end
173
195
  end
196
+ timer_execute(:out_forward_heartbeat_request, @heartbeat_interval, &method(:on_timer))
197
+ end
174
198
 
175
- @timer = HeartbeatRequestTimer.new(@heartbeat_interval, method(:on_timer))
176
- @loop.attach(@timer)
177
-
178
- @thread = Thread.new(&method(:run))
199
+ if @require_ack_response
200
+ @sock_ack_waiting_mutex = Mutex.new
201
+ @sock_ack_waiting = []
202
+ thread_create(:out_forward_receiving_ack, &method(:ack_reader))
179
203
  end
180
204
  end
181
205
 
182
- def shutdown
183
- @finished = true
184
- if @loop
185
- @loop.watchers.each {|w| w.detach }
186
- # @loop.stop
187
- @loop.stop rescue nil
188
- end
189
- @thread.join if @thread
206
+ def close
190
207
  @usock.close if @usock
191
-
192
208
  super
193
209
  end
194
210
 
195
- def run
196
- @loop.run if @loop
197
- rescue
198
- log.error "unexpected error", error: $!.to_s
199
- log.error_backtrace
200
- end
201
-
202
211
  def write(chunk)
203
212
  return if chunk.empty?
213
+ tag = chunk.metadata.tag
214
+ select_a_healthy_node{|node| node.send_data(tag, chunk) }
215
+ end
204
216
 
217
+ ACKWaitingSockInfo = Struct.new(:sock, :chunk_id, :node, :time, :timeout) do
218
+ def expired?(now)
219
+ time + timeout < now
220
+ end
221
+ end
222
+
223
+ def try_write(chunk)
224
+ if chunk.empty?
225
+ commit_write(chunk.unique_id)
226
+ return
227
+ end
205
228
  tag = chunk.metadata.tag
229
+ sock, node = select_a_healthy_node{|n| n.send_data(tag, chunk) }
230
+ chunk_id = Base64.encode64(chunk.unique_id)
231
+ current_time = Process.clock_gettime(PROCESS_CLOCK_ID)
232
+ info = ACKWaitingSockInfo.new(sock, chunk_id, node, current_time, @ack_response_timeout)
233
+ @sock_ack_waiting_mutex.synchronize do
234
+ @sock_ack_waiting << info
235
+ end
236
+ end
237
+
238
+ def select_a_healthy_node
206
239
  error = nil
207
240
 
208
241
  wlen = @weight_array.length
209
242
  wlen.times do
210
243
  @rr = (@rr + 1) % wlen
211
244
  node = @weight_array[@rr]
245
+ next unless node.available?
212
246
 
213
- if node.available?
214
- begin
215
- node.send_data(tag, chunk)
216
- return
217
- rescue
218
- # for load balancing during detecting crashed servers
219
- error = $! # use the latest error
220
- end
247
+ begin
248
+ ret = yield node
249
+ return ret, node
250
+ rescue
251
+ # for load balancing during detecting crashed servers
252
+ error = $! # use the latest error
221
253
  end
222
254
  end
223
255
 
224
- if error
225
- raise error
226
- else
227
- raise "no nodes are available" # TODO message
228
- end
256
+ raise error if error
257
+ raise NoNodesAvailable, "no nodes are available"
258
+ end
259
+
260
+ def create_transfer_socket(host, port, &block)
261
+ socket_create_tcp(
262
+ host, port,
263
+ linger_timeout: @send_timeout,
264
+ send_timeout: @send_timeout,
265
+ recv_timeout: @ack_response_timeout,
266
+ &block
267
+ )
229
268
  end
230
269
 
231
270
  # MessagePack FixArray length is 3
@@ -278,21 +317,7 @@ module Fluent::Plugin
278
317
  @weight_array = weight_array
279
318
  end
280
319
 
281
- class HeartbeatRequestTimer < Coolio::TimerWatcher
282
- def initialize(interval, callback)
283
- super(interval, true)
284
- @callback = callback
285
- end
286
-
287
- def on_timer
288
- @callback.call
289
- rescue
290
- # TODO log?
291
- end
292
- end
293
-
294
320
  def on_timer
295
- return if @finished
296
321
  @nodes.each {|n|
297
322
  if n.tick
298
323
  rebuild_weight_array
@@ -307,33 +332,86 @@ module Fluent::Plugin
307
332
  }
308
333
  end
309
334
 
310
- class HeartbeatHandler < Coolio::IO
311
- def initialize(io, callback)
312
- super(io)
313
- @io = io
314
- @callback = callback
335
+ def on_heartbeat(sockaddr, msg)
336
+ if node = @nodes.find {|n| n.sockaddr == sockaddr }
337
+ # log.trace "heartbeat arrived", name: node.name, host: node.host, port: node.port
338
+ if node.heartbeat
339
+ rebuild_weight_array
340
+ end
315
341
  end
342
+ end
316
343
 
317
- def on_readable
318
- begin
319
- msg, addr = @io.recvfrom(1024)
320
- rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR
321
- return
344
+ # return chunk id when succeeded for tests
345
+ def read_ack_from_sock(sock, unpacker)
346
+ begin
347
+ raw_data = sock.recv(@read_length)
348
+ rescue Errno::ECONNRESET
349
+ raw_data = ""
350
+ end
351
+ info = @sock_ack_waiting_mutex.synchronize{ @sock_ack_waiting.find{|i| i.sock == sock } }
352
+
353
+ # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
354
+ # If this happens we assume the data wasn't delivered and retry it.
355
+ if raw_data.empty?
356
+ log.warn "destination node closed the connection. regard it as unavailable.", host: info.node.host, port: info.node.port
357
+ info.node.disable!
358
+ return nil
359
+ else
360
+ unpacker.feed(raw_data)
361
+ res = unpacker.read
362
+ if res['ack'] != info.chunk_id
363
+ # Some errors may have occured when ack and chunk id is different, so send the chunk again.
364
+ log.warn "ack in response and chunk id in sent data are different", chunk_id: info.chunk_id, ack: res['ack']
365
+ rollback_write(info.chunk_id)
366
+ return nil
322
367
  end
323
- host = addr[3]
324
- port = addr[1]
325
- sockaddr = Socket.pack_sockaddr_in(port, host)
326
- @callback.call(sockaddr, msg)
327
- rescue
328
- # TODO log?
368
+ return info.chunk_id
369
+ end
370
+ rescue => e
371
+ log.error "unexpected error while receiving ack message", error: e
372
+ log.error_backtrace
373
+ ensure
374
+ @sock_ack_waiting_mutex.synchronize do
375
+ @sock_ack_waiting.delete(info)
329
376
  end
330
377
  end
331
378
 
332
- def on_heartbeat(sockaddr, msg)
333
- if node = @nodes.find {|n| n.sockaddr == sockaddr }
334
- #log.trace "heartbeat from '#{node.name}'", :host=>node.host, :port=>node.port
335
- if node.heartbeat
336
- rebuild_weight_array
379
+ def ack_reader
380
+ select_interval = if @delayed_commit_timeout > 3
381
+ 2
382
+ else
383
+ @delayed_commit_timeout / 2.0
384
+ end
385
+
386
+ unpacker = Fluent::Engine.msgpack_unpacker
387
+
388
+ while thread_current_running?
389
+ now = Process.clock_gettime(PROCESS_CLOCK_ID)
390
+ sockets = []
391
+ @sock_ack_waiting_mutex.synchronize do
392
+ new_list = []
393
+ @sock_ack_waiting.each do |info|
394
+ if info.expired?(now)
395
+ # There are 2 types of cases when no response has been received from socket:
396
+ # (1) the node does not support sending responses
397
+ # (2) the node does support sending response but responses have not arrived for some reasons.
398
+ log.warn "no response from node. regard it as unavailable.", host: info.node.host, port: info.node.port
399
+ info.node.disable!
400
+ info.sock.close rescue nil
401
+ rollback_write(info.chunk_id)
402
+ else
403
+ sockets << info.sock
404
+ new_list << info
405
+ end
406
+ end
407
+ @sock_ack_waiting = new_list
408
+ end
409
+
410
+ readable_sockets, _, _ = IO.select(sockets, nil, nil, select_interval)
411
+ next unless readable_sockets
412
+
413
+ readable_sockets.each do |sock|
414
+ read_ack_from_sock(sock, unpacker)
337
415
  end
338
416
  end
339
417
  end
@@ -386,20 +464,6 @@ module Fluent::Plugin
386
464
  @standby
387
465
  end
388
466
 
389
- def connect
390
- TCPSocket.new(resolved_host, port)
391
- end
392
-
393
- def set_socket_options(sock)
394
- opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
395
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
396
-
397
- opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
398
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
399
-
400
- sock
401
- end
402
-
403
467
  def establish_connection(sock)
404
468
  while available? && @state != :established
405
469
  begin
@@ -430,98 +494,60 @@ module Fluent::Plugin
430
494
  end
431
495
  end
432
496
 
433
- def send_data(tag, chunk)
434
- sock = connect
497
+ def send_data_actual(sock, tag, chunk)
435
498
  @state = @sender.security ? :helo : :established
436
- begin
437
- set_socket_options(sock)
438
-
439
- if @state != :established
440
- establish_connection(sock)
441
- end
499
+ if @state != :established
500
+ establish_connection(sock)
501
+ end
442
502
 
443
- unless available?
444
- raise ConnectionClosedError, "failed to establish connection with node #{@name}"
445
- end
503
+ unless available?
504
+ raise ConnectionClosedError, "failed to establish connection with node #{@name}"
505
+ end
446
506
 
447
- option = { 'size' => chunk.size, 'compressed' => @compress }
448
- option['chunk'] = Base64.encode64(chunk.unique_id) if @sender.require_ack_response
507
+ option = { 'size' => chunk.size, 'compressed' => @compress }
508
+ option['chunk'] = Base64.encode64(chunk.unique_id) if @sender.require_ack_response
449
509
 
450
- # out_forward always uses Raw32 type for content.
451
- # Raw16 can store only 64kbytes, and it should be much smaller than buffer chunk size.
510
+ # out_forward always uses Raw32 type for content.
511
+ # Raw16 can store only 64kbytes, and it should be much smaller than buffer chunk size.
452
512
 
453
- sock.write @sender.forward_header # beginArray(3)
454
- sock.write tag.to_msgpack # 1. writeRaw(tag)
455
- chunk.open(compressed: @compress) do |chunk_io|
456
- sock.write [0xdb, chunk_io.size].pack('CN') # 2. beginRaw(size) raw32
457
- IO.copy_stream(chunk_io, sock) # writeRawBody(packed_es)
458
- end
459
- sock.write option.to_msgpack # 3. writeOption(option)
460
-
461
- if @sender.require_ack_response
462
- # Waiting for a response here results in a decrease of throughput because a chunk queue is locked.
463
- # To avoid a decrease of throughput, it is necessary to prepare a list of chunks that wait for responses
464
- # and process them asynchronously.
465
- if IO.select([sock], nil, nil, @sender.ack_response_timeout)
466
- raw_data = begin
467
- sock.recv(1024)
468
- rescue Errno::ECONNRESET
469
- ""
470
- end
471
-
472
- # When connection is closed by remote host, socket is ready to read and #recv returns an empty string that means EOF.
473
- # If this happens we assume the data wasn't delivered and retry it.
474
- if raw_data.empty?
475
- @log.warn "node closed the connection. regard it as unavailable.", host: @host, port: @port
476
- disable!
477
- raise ConnectionClosedError, "node #{@host}:#{@port} closed connection"
478
- else
479
- @unpacker.feed(raw_data)
480
- res = @unpacker.read
481
- if res['ack'] != option['chunk']
482
- # Some errors may have occured when ack and chunk id is different, so send the chunk again.
483
- raise ResponseError, "ack in response and chunk id in sent data are different"
484
- end
485
- end
513
+ sock.write @sender.forward_header # beginArray(3)
514
+ sock.write tag.to_msgpack # 1. writeRaw(tag)
515
+ chunk.open(compressed: @compress) do |chunk_io|
516
+ sock.write [0xdb, chunk_io.size].pack('CN') # 2. beginRaw(size) raw32
517
+ IO.copy_stream(chunk_io, sock) # writeRawBody(packed_es)
518
+ end
519
+ sock.write option.to_msgpack # 3. writeOption(option)
520
+ end
486
521
 
487
- else
488
- # IO.select returns nil on timeout.
489
- # There are 2 types of cases when no response has been received:
490
- # (1) the node does not support sending responses
491
- # (2) the node does support sending response but responses have not arrived for some reasons.
492
- @log.warn "no response from node. regard it as unavailable.", host: @host, port: @port
493
- disable!
494
- raise ACKTimeoutError, "node #{host}:#{port} does not return ACK"
495
- end
496
- end
522
+ def send_data(tag, chunk)
523
+ sock = @sender.create_transfer_socket(resolved_host, port)
524
+ begin
525
+ send_data_actual(sock, tag, chunk)
526
+ rescue
527
+ sock.close rescue nil
528
+ raise
529
+ end
497
530
 
498
- heartbeat(false)
499
- res # for test
500
- ensure
501
- sock.close_write
502
- sock.close
531
+ if @sender.require_ack_response
532
+ return sock # to read ACK from socket
503
533
  end
534
+
535
+ sock.close_write rescue nil
536
+ sock.close rescue nil
537
+ heartbeat(false)
538
+ nil
504
539
  end
505
540
 
506
541
  # FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
507
542
  def send_heartbeat
508
543
  case @sender.heartbeat_type
509
544
  when :tcp
510
- sock = connect
511
- begin
512
- opt = [1, @sender.send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
513
- sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
514
- # opt = [@sender.send_timeout.to_i, 0].pack('L!L!') # struct timeval
515
- # sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
516
-
545
+ @sender.create_transfer_socket(resolved_host, port) do |sock|
517
546
  ## don't send any data to not cause a compatibility problem
518
547
  # sock.write FORWARD_TCP_HEARTBEAT_DATA
519
548
 
520
549
  # successful tcp connection establishment is considered as valid heartbeat
521
550
  heartbeat(true)
522
- ensure
523
- sock.close_write
524
- sock.close
525
551
  end
526
552
  when :udp
527
553
  @usock.send "\0", 0, Socket.pack_sockaddr_in(@port, resolved_host)
@@ -543,7 +569,7 @@ module Fluent::Plugin
543
569
  @resolved_host ||= resolve_dns!
544
570
 
545
571
  else
546
- now = Engine.now
572
+ now = Fluent::Engine.now
547
573
  rh = @resolved_host
548
574
  if !rh || now - @resolved_time >= @sender.expire_dns_cache
549
575
  rh = @resolved_host = resolve_dns!