fluentd 1.17.1 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +116 -0
  3. data/CHANGELOG.md +293 -16
  4. data/MAINTAINERS.md +8 -2
  5. data/README.md +3 -7
  6. data/Rakefile +2 -0
  7. data/SECURITY.md +5 -3
  8. data/lib/fluent/command/cap_ctl.rb +2 -2
  9. data/lib/fluent/command/fluentd.rb +13 -3
  10. data/lib/fluent/compat/formatter.rb +6 -0
  11. data/lib/fluent/compat/socket_util.rb +2 -2
  12. data/lib/fluent/config/configure_proxy.rb +1 -1
  13. data/lib/fluent/config/element.rb +2 -2
  14. data/lib/fluent/config/literal_parser.rb +12 -5
  15. data/lib/fluent/config/parser.rb +15 -3
  16. data/lib/fluent/config/section.rb +2 -2
  17. data/lib/fluent/config/types.rb +1 -1
  18. data/lib/fluent/config/v1_parser.rb +3 -3
  19. data/lib/fluent/counter/store.rb +1 -1
  20. data/lib/fluent/engine.rb +50 -34
  21. data/lib/fluent/env.rb +6 -2
  22. data/lib/fluent/event.rb +7 -6
  23. data/lib/fluent/event_router.rb +2 -2
  24. data/lib/fluent/log/console_adapter.rb +5 -7
  25. data/lib/fluent/log.rb +23 -0
  26. data/lib/fluent/plugin/bare_output.rb +0 -16
  27. data/lib/fluent/plugin/base.rb +2 -2
  28. data/lib/fluent/plugin/buf_file.rb +15 -1
  29. data/lib/fluent/plugin/buf_file_single.rb +15 -1
  30. data/lib/fluent/plugin/buffer/chunk.rb +74 -10
  31. data/lib/fluent/plugin/buffer/file_chunk.rb +9 -5
  32. data/lib/fluent/plugin/buffer/file_single_chunk.rb +3 -3
  33. data/lib/fluent/plugin/buffer/memory_chunk.rb +2 -2
  34. data/lib/fluent/plugin/buffer.rb +34 -6
  35. data/lib/fluent/plugin/compressable.rb +68 -22
  36. data/lib/fluent/plugin/filter.rb +0 -8
  37. data/lib/fluent/plugin/filter_parser.rb +27 -51
  38. data/lib/fluent/plugin/filter_record_transformer.rb +1 -1
  39. data/lib/fluent/plugin/formatter_csv.rb +18 -4
  40. data/lib/fluent/plugin/formatter_json.rb +7 -4
  41. data/lib/fluent/plugin/formatter_out_file.rb +5 -2
  42. data/lib/fluent/plugin/in_forward.rb +9 -5
  43. data/lib/fluent/plugin/in_http.rb +14 -4
  44. data/lib/fluent/plugin/in_monitor_agent.rb +4 -8
  45. data/lib/fluent/plugin/in_syslog.rb +4 -0
  46. data/lib/fluent/plugin/in_tail/position_file.rb +1 -1
  47. data/lib/fluent/plugin/in_tail.rb +80 -57
  48. data/lib/fluent/plugin/in_tcp.rb +6 -2
  49. data/lib/fluent/plugin/in_udp.rb +11 -2
  50. data/lib/fluent/plugin/input.rb +4 -8
  51. data/lib/fluent/plugin/multi_output.rb +1 -17
  52. data/lib/fluent/plugin/out_buffer.rb +40 -0
  53. data/lib/fluent/plugin/out_exec_filter.rb +2 -2
  54. data/lib/fluent/plugin/out_file.rb +37 -30
  55. data/lib/fluent/plugin/out_forward/connection_manager.rb +2 -2
  56. data/lib/fluent/plugin/out_forward.rb +23 -13
  57. data/lib/fluent/plugin/out_http.rb +1 -1
  58. data/lib/fluent/plugin/out_secondary_file.rb +2 -2
  59. data/lib/fluent/plugin/out_stdout.rb +10 -3
  60. data/lib/fluent/plugin/out_stream.rb +3 -3
  61. data/lib/fluent/plugin/output.rb +26 -35
  62. data/lib/fluent/plugin/owned_by_mixin.rb +2 -2
  63. data/lib/fluent/plugin/parser.rb +3 -3
  64. data/lib/fluent/plugin/parser_json.rb +3 -3
  65. data/lib/fluent/plugin/sd_file.rb +2 -2
  66. data/lib/fluent/plugin/storage_local.rb +8 -4
  67. data/lib/fluent/plugin.rb +1 -1
  68. data/lib/fluent/plugin_helper/cert_option.rb +8 -0
  69. data/lib/fluent/plugin_helper/child_process.rb +2 -2
  70. data/lib/fluent/plugin_helper/event_emitter.rb +12 -0
  71. data/lib/fluent/plugin_helper/http_server/request.rb +13 -2
  72. data/lib/fluent/plugin_helper/http_server/server.rb +14 -8
  73. data/lib/fluent/plugin_helper/http_server.rb +1 -8
  74. data/lib/fluent/plugin_helper/metrics.rb +7 -0
  75. data/lib/fluent/plugin_helper/server.rb +13 -1
  76. data/lib/fluent/plugin_helper/service_discovery.rb +1 -1
  77. data/lib/fluent/plugin_helper/socket_option.rb +2 -2
  78. data/lib/fluent/plugin_helper/storage.rb +1 -1
  79. data/lib/fluent/plugin_id.rb +3 -3
  80. data/lib/fluent/root_agent.rb +117 -21
  81. data/lib/fluent/source_only_buffer_agent.rb +102 -0
  82. data/lib/fluent/static_config_analysis.rb +3 -2
  83. data/lib/fluent/supervisor.rb +258 -39
  84. data/lib/fluent/system_config.rb +27 -6
  85. data/lib/fluent/test/base.rb +1 -1
  86. data/lib/fluent/test/driver/base.rb +2 -2
  87. data/lib/fluent/test/filter_test.rb +2 -2
  88. data/lib/fluent/test/formatter_test.rb +1 -1
  89. data/lib/fluent/test/helpers.rb +4 -0
  90. data/lib/fluent/test/input_test.rb +2 -2
  91. data/lib/fluent/test/output_test.rb +4 -4
  92. data/lib/fluent/test/parser_test.rb +1 -1
  93. data/lib/fluent/tls.rb +24 -0
  94. data/lib/fluent/variable_store.rb +1 -1
  95. data/lib/fluent/version.rb +1 -1
  96. data/lib/fluent/winsvc.rb +38 -8
  97. metadata +85 -28
  98. data/lib/fluent/plugin_helper/http_server/compat/server.rb +0 -92
  99. data/lib/fluent/plugin_helper/http_server/compat/ssl_context_extractor.rb +0 -52
  100. data/lib/fluent/plugin_helper/http_server/compat/webrick_handler.rb +0 -58
@@ -0,0 +1,102 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'fluent/agent'
18
+ require 'fluent/system_config'
19
+
20
+ module Fluent
21
+ class SourceOnlyBufferAgent < Agent
22
+ # Use INSTANCE_ID to use the same base dir as the other workers.
23
+ # This will make recovery easier.
24
+ BUFFER_DIR_NAME = Fluent::INSTANCE_ID
25
+
26
+ def initialize(log:, system_config:)
27
+ super(log: log)
28
+
29
+ @default_buffer_path = File.join(system_config.root_dir || DEFAULT_BACKUP_DIR, 'source-only-buffer', BUFFER_DIR_NAME)
30
+ @optional_buffer_config = system_config.source_only_buffer.to_h.transform_keys(&:to_s)
31
+ @base_buffer_dir = nil
32
+ @actual_buffer_dir = nil
33
+ end
34
+
35
+ def configure(flush: false)
36
+ buffer_config = @optional_buffer_config.compact
37
+ buffer_config['flush_at_shutdown'] = flush ? 'true' : 'false'
38
+ buffer_config['flush_thread_count'] = 0 unless flush
39
+ buffer_config['path'] ||= @default_buffer_path
40
+
41
+ super(
42
+ Config::Element.new('SOURCE_ONLY_BUFFER', '', {}, [
43
+ Config::Element.new('match', '**', {'@type' => 'buffer', '@label' => '@ROOT'}, [
44
+ Config::Element.new('buffer', '', buffer_config, [])
45
+ ])
46
+ ])
47
+ )
48
+
49
+ @base_buffer_dir = buffer_config['path']
50
+ # It can be "#{@base_buffer_dir}/worker#{fluentd_worker_id}/" when using multiple workers
51
+ @actual_buffer_dir = File.dirname(outputs[0].buffer.path)
52
+
53
+ unless flush
54
+ log.info "with-source-only: the emitted data will be stored in the buffer files under" +
55
+ " #{@base_buffer_dir}. You can send SIGWINCH to the supervisor process to cancel" +
56
+ " with-source-only mode and process data."
57
+ end
58
+ end
59
+
60
+ def cleanup
61
+ unless (Dir.empty?(@actual_buffer_dir) rescue true)
62
+ log.warn "some buffer files remain in #{@base_buffer_dir}." +
63
+ " Please consider recovering or saving the buffer files in the directory." +
64
+ " To recover them, you can set the buffer path manually to system config and" +
65
+ " retry, i.e., restart Fluentd with with-source-only mode and send SIGWINCH again." +
66
+ " Config Example:\n#{config_example_to_recover(@base_buffer_dir)}"
67
+ return
68
+ end
69
+
70
+ begin
71
+ FileUtils.remove_dir(@base_buffer_dir)
72
+ rescue Errno::ENOENT
73
+ # This worker doesn't need to do anything. Another worker may remove the dir first.
74
+ rescue => e
75
+ log.warn "failed to remove the buffer directory: #{@base_buffer_dir}", error: e
76
+ end
77
+ end
78
+
79
+ def emit_error_event(tag, time, record, error)
80
+ error_info = {error: error, location: (error.backtrace ? error.backtrace.first : nil), tag: tag, time: time, record: record}
81
+ log.warn "SourceOnlyBufferAgent: dump an error event:", error_info
82
+ end
83
+
84
+ def handle_emits_error(tag, es, error)
85
+ error_info = {error: error, location: (error.backtrace ? error.backtrace.first : nil), tag: tag}
86
+ log.warn "SourceOnlyBufferAgent: emit transaction failed:", error_info
87
+ log.warn_backtrace
88
+ end
89
+
90
+ private
91
+
92
+ def config_example_to_recover(path)
93
+ <<~EOC
94
+ <system>
95
+ <source_only_buffer>
96
+ path #{path}
97
+ </source_only_buffer>
98
+ </system>
99
+ EOC
100
+ end
101
+ end
102
+ end
@@ -18,7 +18,7 @@ require 'fluent/config'
18
18
  require 'fluent/plugin'
19
19
 
20
20
  module Fluent
21
- # Static Analysis means analysing all plugins and Fluent::Element without invokeing Plugin#configure
21
+ # Static Analysis means analysing all plugins and Fluent::Element without invoking Plugin#configure
22
22
  class StaticConfigAnalysis
23
23
  module Elem
24
24
  Input = Struct.new(:plugin, :config)
@@ -72,6 +72,7 @@ module Fluent
72
72
  available_worker_ids = [*0...@workers]
73
73
 
74
74
  ret = []
75
+ supported_directives = %w[source match filter label]
75
76
  conf.elements(name: 'worker').each do |config|
76
77
  ids = parse_worker_id(config)
77
78
  ids.each do |id|
@@ -83,7 +84,7 @@ module Fluent
83
84
  end
84
85
 
85
86
  config.elements.each do |elem|
86
- unless %w[source match filter label].include?(elem.name)
87
+ unless supported_directives.include?(elem.name)
87
88
  raise Fluent::ConfigError, "<worker> section cannot have <#{elem.name}> directive"
88
89
  end
89
90
  end
@@ -17,6 +17,7 @@
17
17
  require 'fileutils'
18
18
  require 'open3'
19
19
  require 'pathname'
20
+ require 'find'
20
21
 
21
22
  require 'fluent/config'
22
23
  require 'fluent/counter'
@@ -43,11 +44,16 @@ module Fluent
43
44
  @rpc_endpoint = nil
44
45
  @rpc_server = nil
45
46
  @counter = nil
47
+ @socket_manager_server = nil
48
+ @starting_new_supervisor_with_zero_downtime = false
49
+ @new_supervisor_pid = nil
50
+ start_in_parallel = ENV.key?("FLUENT_RUNNING_IN_PARALLEL_WITH_OLD")
51
+ @zero_downtime_restart_mutex = Mutex.new
46
52
 
47
53
  @fluentd_lock_dir = Dir.mktmpdir("fluentd-lock-")
48
54
  ENV['FLUENTD_LOCK_DIR'] = @fluentd_lock_dir
49
55
 
50
- if config[:rpc_endpoint]
56
+ if config[:rpc_endpoint] and not start_in_parallel
51
57
  @rpc_endpoint = config[:rpc_endpoint]
52
58
  @enable_get_dump = config[:enable_get_dump]
53
59
  run_rpc_server
@@ -59,16 +65,27 @@ module Fluent
59
65
  install_supervisor_signal_handlers
60
66
  end
61
67
 
62
- if counter = config[:counter_server]
68
+ if counter = config[:counter_server] and not start_in_parallel
63
69
  run_counter_server(counter)
64
70
  end
65
71
 
66
72
  if config[:disable_shared_socket]
67
73
  $log.info "shared socket for multiple workers is disabled"
74
+ elsif start_in_parallel
75
+ begin
76
+ raise "[BUG] SERVERENGINE_SOCKETMANAGER_PATH env var must exist when starting in parallel" unless ENV.key?('SERVERENGINE_SOCKETMANAGER_PATH')
77
+ @socket_manager_server = ServerEngine::SocketManager::Server.share_sockets_with_another_server(ENV['SERVERENGINE_SOCKETMANAGER_PATH'])
78
+ $log.info "zero-downtime-restart: took over the shared sockets", path: ENV['SERVERENGINE_SOCKETMANAGER_PATH']
79
+ rescue => e
80
+ $log.error "zero-downtime-restart: cancel sequence because failed to take over the shared sockets", error: e
81
+ raise
82
+ end
68
83
  else
69
- server = ServerEngine::SocketManager::Server.open
70
- ENV['SERVERENGINE_SOCKETMANAGER_PATH'] = server.path.to_s
84
+ @socket_manager_server = ServerEngine::SocketManager::Server.open
85
+ ENV['SERVERENGINE_SOCKETMANAGER_PATH'] = @socket_manager_server.path.to_s
71
86
  end
87
+
88
+ stop_parallel_old_supervisor_after_delay if start_in_parallel
72
89
  end
73
90
 
74
91
  def after_run
@@ -76,7 +93,9 @@ module Fluent
76
93
  stop_rpc_server if @rpc_endpoint
77
94
  stop_counter_server if @counter
78
95
  cleanup_lock_dir
79
- Fluent::Supervisor.cleanup_resources
96
+ Fluent::Supervisor.cleanup_socketmanager_path unless @starting_new_supervisor_with_zero_downtime
97
+ ensure
98
+ notify_new_supervisor_that_old_one_has_stopped if @starting_new_supervisor_with_zero_downtime
80
99
  end
81
100
 
82
101
  def cleanup_lock_dir
@@ -109,6 +128,13 @@ module Fluent
109
128
  end
110
129
  nil
111
130
  }
131
+ unless Fluent.windows?
132
+ @rpc_server.mount_proc('/api/processes.zeroDowntimeRestart') { |req, res|
133
+ $log.debug "fluentd RPC got /api/processes.zeroDowntimeRestart request"
134
+ Process.kill :USR2, Process.pid
135
+ nil
136
+ }
137
+ end
112
138
  @rpc_server.mount_proc('/api/plugins.flushBuffers') { |req, res|
113
139
  $log.debug "fluentd RPC got /api/plugins.flushBuffers request"
114
140
  if Fluent.windows?
@@ -137,27 +163,24 @@ module Fluent
137
163
 
138
164
  @rpc_server.mount_proc('/api/config.gracefulReload') { |req, res|
139
165
  $log.debug "fluentd RPC got /api/config.gracefulReload request"
140
- if Fluent.windows?
141
- supervisor_sigusr2_handler
142
- else
143
- Process.kill :USR2, Process.pid
144
- end
145
-
166
+ graceful_reload
146
167
  nil
147
168
  }
148
169
 
149
- @rpc_server.mount_proc('/api/config.getDump') { |req, res|
150
- $log.debug "fluentd RPC got /api/config.getDump request"
151
- $log.info "get dump in-memory config via HTTP"
152
- res.body = supervisor_get_dump_config_handler
153
- [nil, nil, res]
154
- } if @enable_get_dump
170
+ if @enable_get_dump
171
+ @rpc_server.mount_proc('/api/config.getDump') { |req, res|
172
+ $log.debug "fluentd RPC got /api/config.getDump request"
173
+ $log.info "get dump in-memory config via HTTP"
174
+ res.body = supervisor_get_dump_config_handler
175
+ [nil, nil, res]
176
+ }
177
+ end
155
178
 
156
179
  @rpc_server.start
157
180
  end
158
181
 
159
182
  def stop_rpc_server
160
- @rpc_server.shutdown
183
+ @rpc_server&.shutdown
161
184
  end
162
185
 
163
186
  def run_counter_server(counter_conf)
@@ -172,6 +195,44 @@ module Fluent
172
195
  @counter.stop
173
196
  end
174
197
 
198
+ def stop_parallel_old_supervisor_after_delay
199
+ Thread.new do
200
+ # Delay to wait the new workers to start up.
201
+ # Even if it takes a long time to start the new workers and stop the old Fluentd first,
202
+ # it is no problem because the socket buffer works, as long as the capacity is not exceeded.
203
+ sleep 10
204
+ old_pid = ENV["FLUENT_RUNNING_IN_PARALLEL_WITH_OLD"]&.to_i
205
+ if old_pid
206
+ $log.info "zero-downtime-restart: stop the old supervisor"
207
+ Process.kill :TERM, old_pid
208
+ end
209
+ rescue => e
210
+ $log.warn "zero-downtime-restart: failed to stop the old supervisor." +
211
+ " If the old one does not exist, please send SIGWINCH to this new process to start to work fully." +
212
+ " If it exists, something went wrong. Please kill the old one manually.",
213
+ error: e
214
+ end
215
+ end
216
+
217
+ def notify_new_supervisor_that_old_one_has_stopped
218
+ if config[:pid_path]
219
+ new_pid = File.read(config[:pid_path]).to_i
220
+ else
221
+ raise "[BUG] new_supervisor_pid is not saved" unless @new_supervisor_pid
222
+ new_pid = @new_supervisor_pid
223
+ end
224
+
225
+ $log.info "zero-downtime-restart: notify the new supervisor (pid: #{new_pid}) that old one has stopped"
226
+ Process.kill :WINCH, new_pid
227
+ rescue => e
228
+ $log.error(
229
+ "zero-downtime-restart: failed to notify the new supervisor." +
230
+ " Please send SIGWINCH to the new supervisor process manually" +
231
+ " if it does not start to work fully.",
232
+ error: e
233
+ )
234
+ end
235
+
175
236
  def install_supervisor_signal_handlers
176
237
  return if Fluent.windows?
177
238
 
@@ -187,7 +248,16 @@ module Fluent
187
248
 
188
249
  trap :USR2 do
189
250
  $log.debug 'fluentd supervisor process got SIGUSR2'
190
- supervisor_sigusr2_handler
251
+ if Fluent.windows?
252
+ graceful_reload
253
+ else
254
+ zero_downtime_restart
255
+ end
256
+ end
257
+
258
+ trap :WINCH do
259
+ $log.debug 'fluentd supervisor process got SIGWINCH'
260
+ cancel_source_only
191
261
  end
192
262
  end
193
263
 
@@ -254,7 +324,7 @@ module Fluent
254
324
  when :usr1
255
325
  supervisor_sigusr1_handler
256
326
  when :usr2
257
- supervisor_sigusr2_handler
327
+ graceful_reload
258
328
  when :cont
259
329
  supervisor_dump_handler_for_windows
260
330
  when :stop_event_thread
@@ -284,7 +354,7 @@ module Fluent
284
354
  send_signal_to_workers(:USR1)
285
355
  end
286
356
 
287
- def supervisor_sigusr2_handler
357
+ def graceful_reload
288
358
  conf = nil
289
359
  t = Thread.new do
290
360
  $log.info 'Reloading new config'
@@ -312,6 +382,79 @@ module Fluent
312
382
  $log.error "Failed to reload config file: #{e}"
313
383
  end
314
384
 
385
+ def zero_downtime_restart
386
+ Thread.new do
387
+ @zero_downtime_restart_mutex.synchronize do
388
+ $log.info "start zero-downtime-restart sequence"
389
+
390
+ if @starting_new_supervisor_with_zero_downtime
391
+ $log.warn "zero-downtime-restart: canceled because it is already starting"
392
+ Thread.exit
393
+ end
394
+ if ENV.key?("FLUENT_RUNNING_IN_PARALLEL_WITH_OLD")
395
+ $log.warn "zero-downtime-restart: canceled because the previous sequence is still running"
396
+ Thread.exit
397
+ end
398
+
399
+ @starting_new_supervisor_with_zero_downtime = true
400
+ commands = [ServerEngine.ruby_bin_path, $0] + ARGV
401
+ env_to_add = {
402
+ "SERVERENGINE_SOCKETMANAGER_INTERNAL_TOKEN" => ServerEngine::SocketManager::INTERNAL_TOKEN,
403
+ "FLUENT_RUNNING_IN_PARALLEL_WITH_OLD" => "#{Process.pid}",
404
+ }
405
+ pid = Process.spawn(env_to_add, commands.join(" "))
406
+ @new_supervisor_pid = pid unless config[:daemonize]
407
+
408
+ if config[:daemonize]
409
+ Thread.new(pid) do |pid|
410
+ _, status = Process.wait2(pid)
411
+ # check if `ServerEngine::Daemon#daemonize_with_double_fork` succeeded or not
412
+ unless status.success?
413
+ @starting_new_supervisor_with_zero_downtime = false
414
+ $log.error "zero-downtime-restart: failed because new supervisor exits unexpectedly"
415
+ end
416
+ end
417
+ else
418
+ Thread.new(pid) do |pid|
419
+ _, status = Process.wait2(pid)
420
+ @starting_new_supervisor_with_zero_downtime = false
421
+ $log.error "zero-downtime-restart: failed because new supervisor exits unexpectedly", status: status
422
+ end
423
+ end
424
+ end
425
+ rescue => e
426
+ $log.error "zero-downtime-restart: failed", error: e
427
+ @starting_new_supervisor_with_zero_downtime = false
428
+ end
429
+ end
430
+
431
+ def cancel_source_only
432
+ if ENV.key?("FLUENT_RUNNING_IN_PARALLEL_WITH_OLD")
433
+ if config[:rpc_endpoint]
434
+ begin
435
+ @rpc_endpoint = config[:rpc_endpoint]
436
+ @enable_get_dump = config[:enable_get_dump]
437
+ run_rpc_server
438
+ rescue => e
439
+ $log.error "failed to start RPC server", error: e
440
+ end
441
+ end
442
+
443
+ if counter = config[:counter_server]
444
+ begin
445
+ run_counter_server(counter)
446
+ rescue => e
447
+ $log.error "failed to start counter server", error: e
448
+ end
449
+ end
450
+
451
+ $log.info "zero-downtime-restart: done all sequences, now new processes start to work fully"
452
+ ENV.delete("FLUENT_RUNNING_IN_PARALLEL_WITH_OLD")
453
+ end
454
+
455
+ send_signal_to_workers(:WINCH)
456
+ end
457
+
315
458
  def supervisor_dump_handler_for_windows
316
459
  # As for UNIX-like, SIGCONT signal to each process makes the process output its dump-file,
317
460
  # and it is implemented before the implementation of the function for Windows.
@@ -409,6 +552,7 @@ module Fluent
409
552
  main_cmd = config[:main_cmd]
410
553
  env = {
411
554
  'SERVERENGINE_WORKER_ID' => @worker_id.to_i.to_s,
555
+ 'FLUENT_INSTANCE_ID' => Fluent::INSTANCE_ID,
412
556
  }
413
557
  @pm = process_manager.spawn(env, *main_cmd)
414
558
  end
@@ -440,11 +584,11 @@ module Fluent
440
584
  stop_immediately_at_unrecoverable_exit: true,
441
585
  root_dir: params['root_dir'],
442
586
  logger: $log,
443
- log: $log.out,
587
+ log: $log&.out,
444
588
  log_level: params['log_level'],
445
589
  chuser: params['chuser'],
446
590
  chgroup: params['chgroup'],
447
- chumask: params['chumask'],
591
+ chumask: params['chumask'].is_a?(Integer) ? params['chumask'] : params['chumask']&.to_i(8),
448
592
  daemonize: daemonize,
449
593
  rpc_endpoint: params['rpc_endpoint'],
450
594
  counter_server: params['counter_server'],
@@ -486,7 +630,8 @@ module Fluent
486
630
  suppress_repeated_stacktrace: true,
487
631
  ignore_repeated_log_interval: nil,
488
632
  without_source: nil,
489
- enable_input_metrics: nil,
633
+ with_source_only: nil,
634
+ enable_input_metrics: true,
490
635
  enable_size_metrics: nil,
491
636
  use_v1_config: true,
492
637
  strict_config_value: nil,
@@ -499,12 +644,11 @@ module Fluent
499
644
  }
500
645
  end
501
646
 
502
- def self.cleanup_resources
503
- unless Fluent.windows?
504
- if ENV.has_key?('SERVERENGINE_SOCKETMANAGER_PATH')
505
- FileUtils.rm_f(ENV['SERVERENGINE_SOCKETMANAGER_PATH'])
506
- end
507
- end
647
+ def self.cleanup_socketmanager_path
648
+ return if Fluent.windows?
649
+ return unless ENV.key?('SERVERENGINE_SOCKETMANAGER_PATH')
650
+
651
+ FileUtils.rm_f(ENV['SERVERENGINE_SOCKETMANAGER_PATH'])
508
652
  end
509
653
 
510
654
  def initialize(cl_opt)
@@ -518,7 +662,6 @@ module Fluent
518
662
  @inline_config = opt[:inline_config]
519
663
  @use_v1_config = opt[:use_v1_config]
520
664
  @conf_encoding = opt[:conf_encoding]
521
- @log_path = opt[:log_path]
522
665
  @show_plugin_config = opt[:show_plugin_config]
523
666
  @libs = opt[:libs]
524
667
  @plugin_dirs = opt[:plugin_dirs]
@@ -527,13 +670,15 @@ module Fluent
527
670
  @chumask = opt[:chumask]
528
671
  @signame = opt[:signame]
529
672
 
530
- # TODO: `@log_rotate_age` and `@log_rotate_size` should be removed
673
+ # TODO: `@log_path`, `@log_rotate_age` and `@log_rotate_size` should be removed
531
674
  # since it should be merged with SystemConfig in `build_system_config()`.
532
- # We should always use `system_config.log.rotate_age` and `system_config.log.rotate_size`.
675
+ # We should always use `system_config.log.path`, `system_config.log.rotate_age`
676
+ # and `system_config.log.rotate_size`.
533
677
  # However, currently, there is a bug that `system_config.log` parameters
534
678
  # are not in `Fluent::SystemConfig::SYSTEM_CONFIG_PARAMETERS`, and these
535
679
  # parameters are not merged in `build_system_config()`.
536
680
  # Until we fix the bug of `Fluent::SystemConfig`, we need to use these instance variables.
681
+ @log_path = opt[:log_path]
537
682
  @log_rotate_age = opt[:log_rotate_age]
538
683
  @log_rotate_size = opt[:log_rotate_size]
539
684
 
@@ -549,6 +694,10 @@ module Fluent
549
694
  raise Fluent::ConfigError, "invalid number of workers (must be > 0):#{@system_config.workers}"
550
695
  end
551
696
 
697
+ if Fluent.windows? && @system_config.with_source_only
698
+ raise Fluent::ConfigError, "with-source-only is not supported on Windows"
699
+ end
700
+
552
701
  root_dir = @system_config.root_dir
553
702
  if root_dir
554
703
  if File.exist?(root_dir)
@@ -567,12 +716,24 @@ module Fluent
567
716
  begin
568
717
  ServerEngine::Privilege.change(@chuser, @chgroup)
569
718
  MessagePackFactory.init(enable_time_support: @system_config.enable_msgpack_time_support)
570
- Fluent::Engine.init(@system_config, supervisor_mode: true)
719
+ Fluent::Engine.init(@system_config, supervisor_mode: true, start_in_parallel: ENV.key?("FLUENT_RUNNING_IN_PARALLEL_WITH_OLD"))
571
720
  Fluent::Engine.run_configure(@conf, dry_run: dry_run)
572
721
  rescue Fluent::ConfigError => e
573
722
  $log.error 'config error', file: @config_path, error: e
574
723
  $log.debug_backtrace
575
724
  exit!(1)
725
+ rescue ScriptError => e # LoadError, NotImplementedError, SyntaxError
726
+ if e.respond_to?(:path)
727
+ $log.error e.message, path: e.path, error: e
728
+ else
729
+ $log.error e.message, error: e
730
+ end
731
+ $log.debug_backtrace
732
+ exit!(1)
733
+ rescue => e
734
+ $log.error "unexpected error", error: e
735
+ $log.debug_backtrace
736
+ exit!(1)
576
737
  end
577
738
 
578
739
  if dry_run
@@ -600,9 +761,13 @@ module Fluent
600
761
  raise Fluent::ConfigError, "invalid number of workers (must be 1 or unspecified) with --no-supervisor: #{@system_config.workers}"
601
762
  end
602
763
 
764
+ if Fluent.windows? && @system_config.with_source_only
765
+ raise Fluent::ConfigError, "with-source-only is not supported on Windows"
766
+ end
767
+
603
768
  install_main_process_signal_handlers
604
769
 
605
- # This is the only log messsage for @standalone_worker
770
+ # This is the only log message for @standalone_worker
606
771
  $log.info "starting fluentd-#{Fluent::VERSION} without supervision", pid: Process.pid, ruby: RUBY_VERSION if @standalone_worker
607
772
 
608
773
  main_process do
@@ -612,10 +777,10 @@ module Fluent
612
777
  File.umask(@chumask.to_i(8))
613
778
  end
614
779
  MessagePackFactory.init(enable_time_support: @system_config.enable_msgpack_time_support)
615
- Fluent::Engine.init(@system_config)
780
+ Fluent::Engine.init(@system_config, start_in_parallel: ENV.key?("FLUENT_RUNNING_IN_PARALLEL_WITH_OLD"))
616
781
  Fluent::Engine.run_configure(@conf)
617
782
  Fluent::Engine.run
618
- self.class.cleanup_resources if @standalone_worker
783
+ self.class.cleanup_socketmanager_path if @standalone_worker
619
784
  exit 0
620
785
  end
621
786
  end
@@ -642,6 +807,10 @@ module Fluent
642
807
 
643
808
  $log.info :supervisor, 'parsing config file is succeeded', path: @config_path
644
809
 
810
+ build_additional_configurations do |additional_conf|
811
+ @conf += additional_conf
812
+ end
813
+
645
814
  @libs.each do |lib|
646
815
  require lib
647
816
  end
@@ -690,6 +859,7 @@ module Fluent
690
859
 
691
860
  # TODO: we should remove this logic. This merging process should be done
692
861
  # in `build_system_config()`.
862
+ @log_path ||= system_config.log.path
693
863
  @log_rotate_age ||= system_config.log.rotate_age
694
864
  @log_rotate_size ||= system_config.log.rotate_size
695
865
 
@@ -738,11 +908,12 @@ module Fluent
738
908
  ignore_repeated_log_interval: system_config.ignore_repeated_log_interval,
739
909
  ignore_same_log_interval: system_config.ignore_same_log_interval,
740
910
  )
911
+ $log.force_stacktrace_level(system_config.log.forced_stacktrace_level) if system_config.force_stacktrace_level?
741
912
  $log.enable_color(false) if actual_log_path
742
913
  $log.enable_debug if system_config.log_level <= Fluent::Log::LEVEL_DEBUG
743
914
 
744
915
  $log.info "init #{process_type} logger",
745
- path: actual_log_path,
916
+ path: actual_log_path,
746
917
  rotate_age: @log_rotate_age,
747
918
  rotate_size: @log_rotate_size
748
919
  end
@@ -771,6 +942,7 @@ module Fluent
771
942
  'inline_config' => @inline_config,
772
943
  'chuser' => @chuser,
773
944
  'chgroup' => @chgroup,
945
+ 'chumask' => @chumask,
774
946
  'fluentd_conf_path' => @config_path,
775
947
  'fluentd_conf' => @conf.to_s,
776
948
  'use_v1_config' => @use_v1_config,
@@ -832,12 +1004,20 @@ module Fluent
832
1004
  end
833
1005
 
834
1006
  trap :USR2 do
1007
+ # Leave the old GracefulReload feature, just in case.
1008
+ # We can send SIGUSR2 to the worker process to use this old GracefulReload feature.
1009
+ # (Note: Normally, we can send SIGUSR2 to the supervisor process to use
1010
+ # zero-downtime-restart feature as GracefulReload on non-Windows.)
835
1011
  reload_config
836
1012
  end
837
1013
 
838
1014
  trap :CONT do
839
1015
  dump_non_windows
840
1016
  end
1017
+
1018
+ trap :WINCH do
1019
+ cancel_source_only
1020
+ end
841
1021
  end
842
1022
  end
843
1023
 
@@ -891,6 +1071,18 @@ module Fluent
891
1071
  end
892
1072
  end
893
1073
 
1074
+ def cancel_source_only
1075
+ Thread.new do
1076
+ begin
1077
+ $log.debug "fluentd main process get SIGWINCH"
1078
+ $log.info "try to cancel with-source-only mode"
1079
+ Fluent::Engine.cancel_source_only!
1080
+ rescue Exception => e
1081
+ $log.warn "failed to cancel source only", error: e
1082
+ end
1083
+ end
1084
+ end
1085
+
894
1086
  def reload_config
895
1087
  Thread.new do
896
1088
  $log.debug('worker got SIGUSR2')
@@ -904,6 +1096,10 @@ module Fluent
904
1096
  type: @config_file_type,
905
1097
  )
906
1098
 
1099
+ build_additional_configurations do |additional_conf|
1100
+ conf += additional_conf
1101
+ end
1102
+
907
1103
  Fluent::VariableStore.try_to_reset do
908
1104
  Fluent::Engine.reload_config(conf)
909
1105
  end
@@ -995,7 +1191,7 @@ module Fluent
995
1191
  def build_system_config(conf)
996
1192
  system_config = SystemConfig.create(conf, @cl_opt[:strict_config_value])
997
1193
  # Prefer the options explicitly specified in the command line
998
- #
1194
+ #
999
1195
  # TODO: There is a bug that `system_config.log.rotate_age/rotate_size` are
1000
1196
  # not merged with the command line options since they are not in
1001
1197
  # `SYSTEM_CONFIG_PARAMETERS`.
@@ -1010,6 +1206,29 @@ module Fluent
1010
1206
  system_config
1011
1207
  end
1012
1208
 
1209
+ def build_additional_configurations
1210
+ if @system_config.config_include_dir&.empty?
1211
+ $log.info :supervisor, 'configuration include directory is disabled'
1212
+ return
1213
+ end
1214
+ begin
1215
+ supported_suffixes = [".conf", ".yaml", ".yml"]
1216
+ Find.find(@system_config.config_include_dir) do |path|
1217
+ next if File.directory?(path)
1218
+ next unless supported_suffixes.include?(File.extname(path))
1219
+ # NOTE: both types of normal config (.conf) and YAML will be loaded.
1220
+ # Thus, it does not care whether @config_path is .conf or .yml.
1221
+ $log.info :supervisor, 'loading additional configuration file', path: path
1222
+ yield Fluent::Config.build(config_path: path,
1223
+ encoding: @conf_encoding,
1224
+ use_v1_config: @use_v1_config,
1225
+ type: :guess)
1226
+ end
1227
+ rescue Errno::ENOENT
1228
+ $log.info :supervisor, 'inaccessible include directory was specified', path: @system_config.config_include_dir
1229
+ end
1230
+ end
1231
+
1013
1232
  RUBY_ENCODING_OPTIONS_REGEX = %r{\A(-E|--encoding=|--internal-encoding=|--external-encoding=)}.freeze
1014
1233
 
1015
1234
  def build_spawn_command