fluentd 1.13.3 → 1.14.0.rc

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of fluentd might be problematic. Click here for more details.

Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/lib/fluent/command/fluentd.rb +8 -0
  3. data/lib/fluent/compat/output.rb +9 -6
  4. data/lib/fluent/event_router.rb +28 -1
  5. data/lib/fluent/plugin.rb +10 -1
  6. data/lib/fluent/plugin/bare_output.rb +49 -8
  7. data/lib/fluent/plugin/buffer.rb +84 -22
  8. data/lib/fluent/plugin/filter.rb +35 -1
  9. data/lib/fluent/plugin/in_monitor_agent.rb +4 -2
  10. data/lib/fluent/plugin/in_syslog.rb +13 -1
  11. data/lib/fluent/plugin/in_tail.rb +4 -1
  12. data/lib/fluent/plugin/in_tail/position_file.rb +1 -1
  13. data/lib/fluent/plugin/input.rb +39 -1
  14. data/lib/fluent/plugin/metrics.rb +119 -0
  15. data/lib/fluent/plugin/metrics_local.rb +96 -0
  16. data/lib/fluent/plugin/multi_output.rb +43 -6
  17. data/lib/fluent/plugin/output.rb +74 -33
  18. data/lib/fluent/plugin_helper.rb +1 -0
  19. data/lib/fluent/plugin_helper/event_emitter.rb +8 -1
  20. data/lib/fluent/plugin_helper/metrics.rb +129 -0
  21. data/lib/fluent/plugin_helper/server.rb +4 -2
  22. data/lib/fluent/root_agent.rb +6 -0
  23. data/lib/fluent/supervisor.rb +2 -0
  24. data/lib/fluent/system_config.rb +9 -1
  25. data/lib/fluent/version.rb +1 -1
  26. data/test/config/test_system_config.rb +6 -0
  27. data/test/plugin/in_tail/test_position_file.rb +26 -4
  28. data/test/plugin/test_bare_output.rb +13 -0
  29. data/test/plugin/test_buffer.rb +8 -2
  30. data/test/plugin/test_filter.rb +11 -0
  31. data/test/plugin/test_in_monitor_agent.rb +214 -8
  32. data/test/plugin/test_in_syslog.rb +35 -0
  33. data/test/plugin/test_in_tail.rb +9 -26
  34. data/test/plugin/test_input.rb +11 -0
  35. data/test/plugin/test_metrics.rb +294 -0
  36. data/test/plugin/test_metrics_local.rb +96 -0
  37. data/test/plugin/test_multi_output.rb +25 -1
  38. data/test/plugin/test_output.rb +16 -0
  39. data/test/plugin_helper/test_event_emitter.rb +29 -0
  40. data/test/plugin_helper/test_metrics.rb +137 -0
  41. data/test/test_plugin_classes.rb +102 -0
  42. data/test/test_root_agent.rb +30 -1
  43. metadata +17 -8
@@ -37,7 +37,7 @@ module Fluent
37
37
  include PluginHelper::Mixin
38
38
  include UniqueId::Mixin
39
39
 
40
- helpers_internal :thread, :retry_state
40
+ helpers_internal :thread, :retry_state, :metrics
41
41
 
42
42
  CHUNK_KEY_PATTERN = /^[-_.@a-zA-Z0-9]+$/
43
43
  CHUNK_KEY_PLACEHOLDER_PATTERN = /\$\{([-_.@$a-zA-Z0-9]+)\}/
@@ -164,7 +164,6 @@ module Fluent
164
164
  end
165
165
 
166
166
  attr_reader :as_secondary, :delayed_commit, :delayed_commit_timeout, :timekey_zone
167
- attr_reader :num_errors, :emit_count, :emit_records, :write_count, :rollback_count
168
167
 
169
168
  # for tests
170
169
  attr_reader :buffer, :retry, :secondary, :chunk_keys, :chunk_key_accessors, :chunk_key_time, :chunk_key_tag
@@ -172,6 +171,30 @@ module Fluent
172
171
  # output_enqueue_thread_waiting: for test of output.rb itself
173
172
  attr_accessor :retry_for_error_chunk # if true, error flush will be retried even if under_plugin_development is true
174
173
 
174
+ def num_errors
175
+ @num_errors_metrics.get
176
+ end
177
+
178
+ def emit_count
179
+ @emit_count_metrics.get
180
+ end
181
+
182
+ def emit_size
183
+ @emit_size_metrics.get
184
+ end
185
+
186
+ def emit_records
187
+ @emit_records_metrics.get
188
+ end
189
+
190
+ def write_count
191
+ @write_count_metrics.get
192
+ end
193
+
194
+ def rollback_count
195
+ @rollback_count_metrics.get
196
+ end
197
+
175
198
  def initialize
176
199
  super
177
200
  @counter_mutex = Mutex.new
@@ -181,13 +204,15 @@ module Fluent
181
204
  @primary_instance = nil
182
205
 
183
206
  # TODO: well organized counters
184
- @num_errors = 0
185
- @emit_count = 0
186
- @emit_records = 0
187
- @write_count = 0
188
- @rollback_count = 0
189
- @flush_time_count = 0
190
- @slow_flush_count = 0
207
+ @num_errors_metrics = nil
208
+ @emit_count_metrics = nil
209
+ @emit_records_metrics = nil
210
+ @emit_size_metrics = nil
211
+ @write_count_metrics = nil
212
+ @rollback_count_metrics = nil
213
+ @flush_time_count_metrics = nil
214
+ @slow_flush_count_metrics = nil
215
+ @enable_size_metrics = false
191
216
 
192
217
  # How to process events is decided here at once, but it will be decided in delayed way on #configure & #start
193
218
  if implement?(:synchronous)
@@ -246,6 +271,15 @@ module Fluent
246
271
 
247
272
  super
248
273
 
274
+ @num_errors_metrics = metrics_create(namespace: "fluentd", subsystem: "output", name: "num_errors", help_text: "Number of count num errors")
275
+ @emit_count_metrics = metrics_create(namespace: "fluentd", subsystem: "output", name: "emit_records", help_text: "Number of count emits")
276
+ @emit_records_metrics = metrics_create(namespace: "fluentd", subsystem: "output", name: "emit_records", help_text: "Number of emit records")
277
+ @emit_size_metrics = metrics_create(namespace: "fluentd", subsystem: "output", name: "emit_size", help_text: "Total size of emit events")
278
+ @write_count_metrics = metrics_create(namespace: "fluentd", subsystem: "output", name: "write_count", help_text: "Number of writing events")
279
+ @rollback_count_metrics = metrics_create(namespace: "fluentd", subsystem: "output", name: "rollback_count", help_text: "Number of rollbacking operations")
280
+ @flush_time_count_metrics = metrics_create(namespace: "fluentd", subsystem: "output", name: "flush_time_count", help_text: "Count of flush time")
281
+ @slow_flush_count_metrics = metrics_create(namespace: "fluentd", subsystem: "output", name: "slow_flush_count", help_text: "Count of slow flush occurred time(s)")
282
+
249
283
  if has_buffer_section
250
284
  unless implement?(:buffered) || implement?(:delayed_commit)
251
285
  raise Fluent::ConfigError, "<buffer> section is configured, but plugin '#{self.class}' doesn't support buffering"
@@ -271,6 +305,8 @@ module Fluent
271
305
  @buffering = true
272
306
  end
273
307
  end
308
+ # Enable to update record size metrics or not
309
+ @enable_size_metrics = !!system_config.enable_size_metrics
274
310
 
275
311
  if @as_secondary
276
312
  if !@buffering && !@buffering.nil?
@@ -797,18 +833,19 @@ module Fluent
797
833
  end
798
834
 
799
835
  def emit_sync(tag, es)
800
- @counter_mutex.synchronize{ @emit_count += 1 }
836
+ @emit_count_metrics.inc
801
837
  begin
802
838
  process(tag, es)
803
- @counter_mutex.synchronize{ @emit_records += es.size }
839
+ @emit_records_metrics.add(es.size)
840
+ @emit_size_metrics.add(es.to_msgpack_stream.bytesize) if @enable_size_metrics
804
841
  rescue
805
- @counter_mutex.synchronize{ @num_errors += 1 }
842
+ @num_errors_metrics.inc
806
843
  raise
807
844
  end
808
845
  end
809
846
 
810
847
  def emit_buffered(tag, es)
811
- @counter_mutex.synchronize{ @emit_count += 1 }
848
+ @emit_count_metrics.inc
812
849
  begin
813
850
  execute_chunking(tag, es, enqueue: (@flush_mode == :immediate))
814
851
  if !@retry && @buffer.queued?(nil, optimistic: true)
@@ -816,7 +853,7 @@ module Fluent
816
853
  end
817
854
  rescue
818
855
  # TODO: separate number of errors into emit errors and write/flush errors
819
- @counter_mutex.synchronize{ @num_errors += 1 }
856
+ @num_errors_metrics.inc
820
857
  raise
821
858
  end
822
859
  end
@@ -966,7 +1003,8 @@ module Fluent
966
1003
  write_guard do
967
1004
  @buffer.write(meta_and_data, enqueue: enqueue)
968
1005
  end
969
- @counter_mutex.synchronize{ @emit_records += records }
1006
+ @emit_records_metrics.add(es.size)
1007
+ @emit_size_metrics.add(es.to_msgpack_stream.bytesize) if @enable_size_metrics
970
1008
  true
971
1009
  end
972
1010
 
@@ -983,7 +1021,8 @@ module Fluent
983
1021
  write_guard do
984
1022
  @buffer.write(meta_and_data, format: format_proc, enqueue: enqueue)
985
1023
  end
986
- @counter_mutex.synchronize{ @emit_records += records }
1024
+ @emit_records_metrics.add(es.size)
1025
+ @emit_size_metrics.add(es.to_msgpack_stream.bytesize) if @enable_size_metrics
987
1026
  true
988
1027
  end
989
1028
 
@@ -1008,7 +1047,8 @@ module Fluent
1008
1047
  write_guard do
1009
1048
  @buffer.write({meta => data}, format: format_proc, enqueue: enqueue)
1010
1049
  end
1011
- @counter_mutex.synchronize{ @emit_records += records }
1050
+ @emit_records_metrics.add(es.size)
1051
+ @emit_size_metrics.add(es.to_msgpack_stream.bytesize) if @enable_size_metrics
1012
1052
  true
1013
1053
  end
1014
1054
 
@@ -1046,7 +1086,7 @@ module Fluent
1046
1086
  # false if chunk was already flushed and couldn't be rollbacked unexpectedly
1047
1087
  # in many cases, false can be just ignored
1048
1088
  if @buffer.takeback_chunk(chunk_id)
1049
- @counter_mutex.synchronize{ @rollback_count += 1 }
1089
+ @rollback_count_metrics.inc
1050
1090
  if update_retry
1051
1091
  primary = @as_secondary ? @primary_instance : self
1052
1092
  primary.update_retry_state(chunk_id, @as_secondary)
@@ -1062,7 +1102,7 @@ module Fluent
1062
1102
  while @dequeued_chunks.first && @dequeued_chunks.first.expired?
1063
1103
  info = @dequeued_chunks.shift
1064
1104
  if @buffer.takeback_chunk(info.chunk_id)
1065
- @counter_mutex.synchronize{ @rollback_count += 1 }
1105
+ @rollback_count_metrics.inc
1066
1106
  log.warn "failed to flush the buffer chunk, timeout to commit.", chunk_id: dump_unique_id_hex(info.chunk_id), flushed_at: info.time
1067
1107
  primary = @as_secondary ? @primary_instance : self
1068
1108
  primary.update_retry_state(info.chunk_id, @as_secondary)
@@ -1077,7 +1117,7 @@ module Fluent
1077
1117
  until @dequeued_chunks.empty?
1078
1118
  info = @dequeued_chunks.shift
1079
1119
  if @buffer.takeback_chunk(info.chunk_id)
1080
- @counter_mutex.synchronize{ @rollback_count += 1 }
1120
+ @rollback_count_metrics.inc
1081
1121
  log.info "delayed commit for buffer chunks was cancelled in shutdown", chunk_id: dump_unique_id_hex(info.chunk_id)
1082
1122
  primary = @as_secondary ? @primary_instance : self
1083
1123
  primary.update_retry_state(info.chunk_id, @as_secondary)
@@ -1120,7 +1160,7 @@ module Fluent
1120
1160
 
1121
1161
  if output.delayed_commit
1122
1162
  log.trace "executing delayed write and commit", chunk: dump_unique_id_hex(chunk.unique_id)
1123
- @counter_mutex.synchronize{ @write_count += 1 }
1163
+ @write_count_metrics.inc
1124
1164
  @dequeued_chunks_mutex.synchronize do
1125
1165
  # delayed_commit_timeout for secondary is configured in <buffer> of primary (<secondary> don't get <buffer>)
1126
1166
  @dequeued_chunks << DequeuedChunkInfo.new(chunk.unique_id, Time.now, self.delayed_commit_timeout)
@@ -1132,7 +1172,7 @@ module Fluent
1132
1172
  chunk_id = chunk.unique_id
1133
1173
  dump_chunk_id = dump_unique_id_hex(chunk_id)
1134
1174
  log.trace "adding write count", instance: self.object_id
1135
- @counter_mutex.synchronize{ @write_count += 1 }
1175
+ @write_count_metrics.inc
1136
1176
  log.trace "executing sync write", chunk: dump_chunk_id
1137
1177
 
1138
1178
  output.write(chunk)
@@ -1188,7 +1228,7 @@ module Fluent
1188
1228
  end
1189
1229
 
1190
1230
  if @buffer.takeback_chunk(chunk.unique_id)
1191
- @counter_mutex.synchronize { @rollback_count += 1 }
1231
+ @rollback_count_metrics.inc
1192
1232
  end
1193
1233
 
1194
1234
  update_retry_state(chunk.unique_id, using_secondary, e)
@@ -1219,9 +1259,9 @@ module Fluent
1219
1259
  def check_slow_flush(start)
1220
1260
  elapsed_time = Fluent::Clock.now - start
1221
1261
  elapsed_millsec = (elapsed_time * 1000).to_i
1222
- @counter_mutex.synchronize { @flush_time_count += elapsed_millsec }
1262
+ @flush_time_count_metrics.add(elapsed_millsec)
1223
1263
  if elapsed_time > @slow_flush_log_threshold
1224
- @counter_mutex.synchronize { @slow_flush_count += 1 }
1264
+ @slow_flush_count_metrics.inc
1225
1265
  log.warn "buffer flush took longer time than slow_flush_log_threshold:",
1226
1266
  elapsed_time: elapsed_time, slow_flush_log_threshold: @slow_flush_log_threshold, plugin_id: self.plugin_id
1227
1267
  end
@@ -1229,7 +1269,7 @@ module Fluent
1229
1269
 
1230
1270
  def update_retry_state(chunk_id, using_secondary, error = nil)
1231
1271
  @retry_mutex.synchronize do
1232
- @counter_mutex.synchronize{ @num_errors += 1 }
1272
+ @num_errors_metrics.inc
1233
1273
  chunk_id_hex = dump_unique_id_hex(chunk_id)
1234
1274
 
1235
1275
  unless @retry
@@ -1490,15 +1530,16 @@ module Fluent
1490
1530
 
1491
1531
  def statistics
1492
1532
  stats = {
1493
- 'emit_records' => @emit_records,
1533
+ 'emit_records' => @emit_records_metrics.get,
1534
+ 'emit_size' => @emit_size_metrics.get,
1494
1535
  # Respect original name
1495
1536
  # https://github.com/fluent/fluentd/blob/45c7b75ba77763eaf87136864d4942c4e0c5bfcd/lib/fluent/plugin/in_monitor_agent.rb#L284
1496
- 'retry_count' => @num_errors,
1497
- 'emit_count' => @emit_count,
1498
- 'write_count' => @write_count,
1499
- 'rollback_count' => @rollback_count,
1500
- 'slow_flush_count' => @slow_flush_count,
1501
- 'flush_time_count' => @flush_time_count,
1537
+ 'retry_count' => @num_errors_metrics.get,
1538
+ 'emit_count' => @emit_count_metrics.get,
1539
+ 'write_count' => @write_count_metrics.get,
1540
+ 'rollback_count' => @rollback_count_metrics.get,
1541
+ 'slow_flush_count' => @slow_flush_count_metrics.get,
1542
+ 'flush_time_count' => @flush_time_count_metrics.get,
1502
1543
  }
1503
1544
 
1504
1545
  if @buffer && @buffer.respond_to?(:statistics)
@@ -32,6 +32,7 @@ require 'fluent/plugin_helper/retry_state'
32
32
  require 'fluent/plugin_helper/record_accessor'
33
33
  require 'fluent/plugin_helper/compat_parameters'
34
34
  require 'fluent/plugin_helper/service_discovery'
35
+ require 'fluent/plugin_helper/metrics'
35
36
 
36
37
  module Fluent
37
38
  module PluginHelper
@@ -29,6 +29,9 @@ module Fluent
29
29
  if @_event_emitter_lazy_init
30
30
  @router = @primary_instance.router
31
31
  end
32
+ if @router.respond_to?(:caller_plugin_id=)
33
+ @router.caller_plugin_id = self.plugin_id
34
+ end
32
35
  @router
33
36
  end
34
37
 
@@ -47,7 +50,11 @@ module Fluent
47
50
 
48
51
  def event_emitter_router(label_name)
49
52
  if label_name
50
- Engine.root_agent.find_label(label_name).event_router
53
+ if label_name == "@ROOT"
54
+ Engine.root_agent.event_router
55
+ else
56
+ Engine.root_agent.find_label(label_name).event_router
57
+ end
51
58
  elsif self.respond_to?(:as_secondary) && self.as_secondary
52
59
  if @primary_instance.has_router?
53
60
  @_event_emitter_lazy_init = true
@@ -0,0 +1,129 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'forwardable'
18
+
19
+ require 'fluent/plugin'
20
+ require 'fluent/plugin/metrics'
21
+ require 'fluent/plugin_helper/timer'
22
+ require 'fluent/config/element'
23
+ require 'fluent/configurable'
24
+ require 'fluent/system_config'
25
+
26
+ module Fluent
27
+ module PluginHelper
28
+ module Metrics
29
+ include Fluent::SystemConfig::Mixin
30
+
31
+ attr_reader :_metrics # For tests.
32
+
33
+ def initialize
34
+ super
35
+ @_metrics_started = false
36
+ @_metrics = {} # usage => metrics_state
37
+ end
38
+
39
+ def configure(conf)
40
+ super
41
+
42
+ @plugin_type_or_id = if self.plugin_id_configured?
43
+ self.plugin_id
44
+ else
45
+ if type = (conf["@type"] || conf["type"])
46
+ "#{type}.#{self.plugin_id}"
47
+ else
48
+ "#{self.class.to_s.split("::").last.downcase}.#{self.plugin_id}"
49
+ end
50
+ end
51
+ end
52
+
53
+ def metrics_create(namespace: "fluentd", subsystem: "metrics", name:, help_text:, labels: {}, prefer_gauge: false)
54
+ metrics = if system_config.metrics
55
+ Fluent::Plugin.new_metrics(system_config.metrics[:@type], parent: self)
56
+ else
57
+ Fluent::Plugin.new_metrics(Fluent::Plugin::Metrics::DEFAULT_TYPE, parent: self)
58
+ end
59
+ config = if system_config.metrics
60
+ system_config.metrics.corresponding_config_element
61
+ else
62
+ Fluent::Config::Element.new('metrics', '', {'@type' => Fluent::Plugin::Metrics::DEFAULT_TYPE}, [])
63
+ end
64
+ metrics.use_gauge_metric = prefer_gauge
65
+ metrics.configure(config)
66
+ # For multi workers environment, cmetrics should be distinguish with static labels.
67
+ if Fluent::Engine.system_config.workers > 1
68
+ labels.merge!(worker_id: fluentd_worker_id.to_s)
69
+ end
70
+ labels.merge!(plugin: @plugin_type_or_id)
71
+ metrics.create(namespace: namespace, subsystem: subsystem, name: name, help_text: help_text, labels: labels)
72
+
73
+ @_metrics["#{@plugin_type_or_id}_#{namespace}_#{subsystem}_#{name}"] = metrics
74
+
75
+ metrics
76
+ end
77
+
78
+ def metrics_operate(method_name, &block)
79
+ @_metrics.each_pair do |key, m|
80
+ begin
81
+ block.call(s) if block_given?
82
+ m.__send__(method_name)
83
+ rescue => e
84
+ log.error "unexpected error while #{method_name}", key: key, metrics: m, error: e
85
+ end
86
+ end
87
+ end
88
+
89
+ def start
90
+ super
91
+
92
+ metrics_operate(:start)
93
+ @_metrics_started = true
94
+ end
95
+
96
+ def stop
97
+ super
98
+ # timer stops automatically in super
99
+ metrics_operate(:stop)
100
+ end
101
+
102
+ def before_shutdown
103
+ metrics_operate(:before_shutdown)
104
+ super
105
+ end
106
+
107
+ def shutdown
108
+ metrics_operate(:shutdown)
109
+ super
110
+ end
111
+
112
+ def after_shutdown
113
+ metrics_operate(:after_shutdown)
114
+ super
115
+ end
116
+
117
+ def close
118
+ metrics_operate(:close)
119
+ super
120
+ end
121
+
122
+ def terminate
123
+ metrics_operate(:terminate)
124
+ @_metrics = {}
125
+ super
126
+ end
127
+ end
128
+ end
129
+ end
@@ -709,13 +709,15 @@ module Fluent
709
709
  return true
710
710
  end
711
711
  rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH => e
712
+ peeraddr = (@_handler_socket.peeraddr rescue PEERADDR_FAILED)
712
713
  @log.trace "unexpected error before accepting TLS connection",
713
- host: @_handler_socket.peeraddr[3], port: @_handler_socket.peeraddr[1], error: e
714
+ addr: peeraddr[3], host: peeraddr[2], port: peeraddr[1], error: e
714
715
  close rescue nil
715
716
  rescue OpenSSL::SSL::SSLError => e
717
+ peeraddr = (@_handler_socket.peeraddr rescue PEERADDR_FAILED)
716
718
  # Use same log level as on_readable
717
719
  @log.warn "unexpected error before accepting TLS connection by OpenSSL",
718
- host: @_handler_socket.peeraddr[3], port: @_handler_socket.peeraddr[1], error: e
720
+ addr: peeraddr[3], host: peeraddr[2], port: peeraddr[1], error: e
719
721
  close rescue nil
720
722
  end
721
723
 
@@ -55,9 +55,11 @@ module Fluent
55
55
  @suppress_emit_error_log_interval = 0
56
56
  @next_emit_error_log_time = nil
57
57
  @without_source = false
58
+ @enable_input_metrics = false
58
59
 
59
60
  suppress_interval(system_config.emit_error_log_interval) unless system_config.emit_error_log_interval.nil?
60
61
  @without_source = system_config.without_source unless system_config.without_source.nil?
62
+ @enable_input_metrics = !!system_config.enable_input_metrics
61
63
  end
62
64
 
63
65
  attr_reader :inputs
@@ -131,6 +133,7 @@ module Fluent
131
133
  end
132
134
  name = e.arg
133
135
  raise ConfigError, "Missing symbol argument on <label> directive" if name.empty?
136
+ raise ConfigError, "@ROOT for <label> is not permitted, reserved for getting root router" if name == '@ROOT'
134
137
 
135
138
  if name == ERROR_LABEL
136
139
  error_label_config = e
@@ -315,6 +318,9 @@ module Fluent
315
318
  # See also 'fluentd/plugin/input.rb'
316
319
  input.context_router = @event_router
317
320
  input.configure(conf)
321
+ if @enable_input_metrics
322
+ @event_router.add_metric_callbacks(input.plugin_id, Proc.new {|es| input.metric_callback(es) })
323
+ end
318
324
  @inputs << input
319
325
 
320
326
  input