fluent-plugin-kafka-xst 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,394 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :client_id, :string, :default => 'kafka'
15
+ config_param :sasl_over_ssl, :bool, :default => true,
16
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
17
+ config_param :format, :string, :default => 'json',
18
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
19
+ config_param :message_key, :string, :default => 'message',
20
+ :desc => "For 'text' format only."
21
+ config_param :add_headers, :bool, :default => false,
22
+ :desc => "Add kafka's message headers to event record"
23
+ config_param :add_prefix, :string, :default => nil,
24
+ :desc => "Tag prefix (Optional)"
25
+ config_param :add_suffix, :string, :default => nil,
26
+ :desc => "Tag suffix (Optional)"
27
+ config_param :retry_emit_limit, :integer, :default => nil,
28
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
29
+ config_param :use_record_time, :bool, :default => false,
30
+ :desc => "Replace message timestamp with contents of 'time' field.",
31
+ :deprecated => "Use 'time_source record' instead."
32
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
33
+ :desc => "Source for message timestamp."
34
+ config_param :record_time_key, :string, :default => 'time',
35
+ :desc => "Time field when time_source is 'record'"
36
+ config_param :get_kafka_client_log, :bool, :default => false
37
+ config_param :time_format, :string, :default => nil,
38
+ :desc => "Time format to be used to parse 'time' field."
39
+ config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
40
+ :desc => "Source for the fluentd event tag"
41
+ config_param :record_tag_key, :string, :default => 'tag',
42
+ :desc => "Tag field when tag_source is 'record'"
43
+ config_param :kafka_message_key, :string, :default => nil,
44
+ :desc => "Set kafka's message key to this field"
45
+ config_param :connect_timeout, :integer, :default => nil,
46
+ :desc => "[Integer, nil] the timeout setting for connecting to brokers"
47
+ config_param :socket_timeout, :integer, :default => nil,
48
+ :desc => "[Integer, nil] the timeout setting for socket connection"
49
+
50
+ config_param :retry_wait_seconds, :integer, :default => 30
51
+ config_param :disable_retry_limit, :bool, :default => false,
52
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
53
+ config_param :retry_limit, :integer, :default => 10,
54
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
55
+ # Kafka consumer options
56
+ config_param :max_bytes, :integer, :default => 1048576,
57
+ :desc => "Maximum number of bytes to fetch."
58
+ config_param :max_wait_time, :integer, :default => nil,
59
+ :desc => "How long to block until the server sends us data."
60
+ config_param :min_bytes, :integer, :default => nil,
61
+ :desc => "Smallest amount of data the server should send us."
62
+ config_param :session_timeout, :integer, :default => nil,
63
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
64
+ config_param :offset_commit_interval, :integer, :default => nil,
65
+ :desc => "The interval between offset commits, in seconds"
66
+ config_param :offset_commit_threshold, :integer, :default => nil,
67
+ :desc => "The number of messages that can be processed before their offsets are committed"
68
+ config_param :fetcher_max_queue_size, :integer, :default => nil,
69
+ :desc => "The number of fetched messages per partition that are queued in fetcher queue"
70
+ config_param :refresh_topic_interval, :integer, :default => nil,
71
+ :desc => "The interval of refreshing the topic list in seconds. Zero or unset disables this"
72
+ config_param :start_from_beginning, :bool, :default => true,
73
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
74
+
75
+ include Fluent::KafkaPluginUtil::SSLSettings
76
+ include Fluent::KafkaPluginUtil::SaslSettings
77
+
78
+ class ForShutdown < StandardError
79
+ end
80
+
81
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
82
+ Fluent::Plugin::Buffer::BufferOverflowError
83
+ else
84
+ Fluent::BufferQueueLimitError
85
+ end
86
+
87
+ unless method_defined?(:router)
88
+ define_method("router") { Fluent::Engine }
89
+ end
90
+
91
+ def initialize
92
+ super
93
+ require 'kafka'
94
+
95
+ @time_parser = nil
96
+ @retry_count = 1
97
+ end
98
+
99
+ def _config_to_array(config)
100
+ config_array = config.split(',').map {|k| k.strip }
101
+ if config_array.empty?
102
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
103
+ end
104
+ config_array
105
+ end
106
+
107
+ def multi_workers_ready?
108
+ true
109
+ end
110
+
111
+ private :_config_to_array
112
+
113
+ def configure(conf)
114
+ super
115
+
116
+ $log.info "Will watch for topics #{@topics} at brokers " \
117
+ "#{@brokers} and '#{@consumer_group}' group"
118
+
119
+ @topics = _config_to_array(@topics)
120
+
121
+ if conf['max_wait_ms']
122
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
123
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
124
+ end
125
+
126
+ @parser_proc = setup_parser(conf)
127
+
128
+ @consumer_opts = {:group_id => @consumer_group}
129
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
130
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
131
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
132
+ @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
133
+ @consumer_opts[:refresh_topic_interval] = @refresh_topic_interval if @refresh_topic_interval
134
+
135
+ @fetch_opts = {}
136
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
137
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
138
+
139
+ @time_source = :record if @use_record_time
140
+
141
+ if @time_source == :record and @time_format
142
+ if defined?(Fluent::TimeParser)
143
+ @time_parser = Fluent::TimeParser.new(@time_format)
144
+ else
145
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
146
+ end
147
+ end
148
+
149
+ if @time_source == :record && defined?(Fluent::NumericTimeParser)
150
+ @float_numeric_parse = Fluent::NumericTimeParser.new(:float)
151
+ end
152
+ end
153
+
154
+ def setup_parser(conf)
155
+ case @format
156
+ when 'json'
157
+ begin
158
+ require 'oj'
159
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
160
+ Proc.new { |msg| Oj.load(msg.value) }
161
+ rescue LoadError
162
+ require 'yajl'
163
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
164
+ end
165
+ when 'ltsv'
166
+ require 'ltsv'
167
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
168
+ when 'msgpack'
169
+ require 'msgpack'
170
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
171
+ when 'text'
172
+ Proc.new { |msg| {@message_key => msg.value} }
173
+ else
174
+ @custom_parser = Fluent::Plugin.new_parser(conf['format'])
175
+ @custom_parser.configure(conf)
176
+ Proc.new { |msg|
177
+ @custom_parser.parse(msg.value) {|_time, record|
178
+ record
179
+ }
180
+ }
181
+ end
182
+ end
183
+
184
+ def start
185
+ super
186
+
187
+ logger = @get_kafka_client_log ? log : nil
188
+ if @scram_mechanism != nil && @username != nil && @password != nil
189
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
190
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
191
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
192
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
193
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
194
+ elsif @username != nil && @password != nil
195
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
196
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
197
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
198
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password,
199
+ sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
200
+ else
201
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
202
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
203
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
204
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
205
+ ssl_verify_hostname: @ssl_verify_hostname)
206
+ end
207
+
208
+ @consumer = setup_consumer
209
+ @thread = Thread.new(&method(:run))
210
+ end
211
+
212
+ def shutdown
213
+ # This nil assignment should be guarded by mutex in multithread programming manner.
214
+ # But the situation is very low contention, so we don't use mutex for now.
215
+ # If the problem happens, we will add a guard for consumer.
216
+ consumer = @consumer
217
+ @consumer = nil
218
+ consumer.stop
219
+
220
+ @thread.join
221
+ @kafka.close
222
+ super
223
+ end
224
+
225
+ def setup_consumer
226
+ consumer = @kafka.consumer(**@consumer_opts)
227
+ @topics.each { |topic|
228
+ if m = /^\/(.+)\/$/.match(topic)
229
+ topic_or_regex = Regexp.new(m[1])
230
+ $log.info "Subscribe to topics matching the regex #{topic}"
231
+ else
232
+ topic_or_regex = topic
233
+ $log.info "Subscribe to topic #{topic}"
234
+ end
235
+ consumer.subscribe(topic_or_regex, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
236
+ }
237
+ consumer
238
+ end
239
+
240
+ def reconnect_consumer
241
+ log.warn "Stopping Consumer"
242
+ consumer = @consumer
243
+ @consumer = nil
244
+ if consumer
245
+ consumer.stop
246
+ end
247
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
248
+ @retry_count = @retry_count + 1
249
+ sleep @retry_wait_seconds
250
+ @consumer = setup_consumer
251
+ log.warn "Re-starting consumer #{Time.now.to_s}"
252
+ @retry_count = 0
253
+ rescue =>e
254
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
255
+ log.error_backtrace
256
+ if @retry_count <= @retry_limit or disable_retry_limit
257
+ reconnect_consumer
258
+ end
259
+ end
260
+
261
+ def process_batch_with_record_tag(batch)
262
+ es = {}
263
+ batch.messages.each { |msg|
264
+ begin
265
+ record = @parser_proc.call(msg)
266
+ tag = record[@record_tag_key]
267
+ tag = @add_prefix + "." + tag if @add_prefix
268
+ tag = tag + "." + @add_suffix if @add_suffix
269
+ es[tag] ||= Fluent::MultiEventStream.new
270
+ case @time_source
271
+ when :kafka
272
+ record_time = Fluent::EventTime.from_time(msg.create_time)
273
+ when :now
274
+ record_time = Fluent::Engine.now
275
+ when :record
276
+ if @time_format
277
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
278
+ else
279
+ record_time = record[@record_time_key]
280
+ end
281
+ else
282
+ log.fatal "BUG: invalid time_source: #{@time_source}"
283
+ end
284
+ if @kafka_message_key
285
+ record[@kafka_message_key] = msg.key
286
+ end
287
+ if @add_headers
288
+ msg.headers.each_pair { |k, v|
289
+ record[k] = v
290
+ }
291
+ end
292
+ es[tag].add(record_time, record)
293
+ rescue => e
294
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
295
+ log.debug_backtrace
296
+ end
297
+ }
298
+
299
+ unless es.empty?
300
+ es.each { |tag,es|
301
+ emit_events(tag, es)
302
+ }
303
+ end
304
+ end
305
+
306
+ def process_batch(batch)
307
+ es = Fluent::MultiEventStream.new
308
+ tag = batch.topic
309
+ tag = @add_prefix + "." + tag if @add_prefix
310
+ tag = tag + "." + @add_suffix if @add_suffix
311
+
312
+ batch.messages.each { |msg|
313
+ begin
314
+ record = @parser_proc.call(msg)
315
+ case @time_source
316
+ when :kafka
317
+ record_time = Fluent::EventTime.from_time(msg.create_time)
318
+ when :now
319
+ record_time = Fluent::Engine.now
320
+ when :record
321
+ record_time = record[@record_time_key]
322
+
323
+ if @time_format
324
+ record_time = @time_parser.parse(record_time.to_s)
325
+ elsif record_time.is_a?(Float) && @float_numeric_parse
326
+ record_time = @float_numeric_parse.parse(record_time)
327
+ end
328
+ else
329
+ log.fatal "BUG: invalid time_source: #{@time_source}"
330
+ end
331
+ if @kafka_message_key
332
+ record[@kafka_message_key] = msg.key
333
+ end
334
+ if @add_headers
335
+ msg.headers.each_pair { |k, v|
336
+ record[k] = v
337
+ }
338
+ end
339
+ es.add(record_time, record)
340
+ rescue => e
341
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
342
+ log.debug_backtrace
343
+ end
344
+ }
345
+
346
+ unless es.empty?
347
+ emit_events(tag, es)
348
+ end
349
+ end
350
+
351
+ def run
352
+ while @consumer
353
+ begin
354
+ @consumer.each_batch(**@fetch_opts) { |batch|
355
+ if @tag_source == :record
356
+ process_batch_with_record_tag(batch)
357
+ else
358
+ process_batch(batch)
359
+ end
360
+ }
361
+ rescue ForShutdown
362
+ rescue => e
363
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
364
+ log.error_backtrace
365
+ reconnect_consumer
366
+ end
367
+ end
368
+ rescue => e
369
+ log.error "unexpected error during consumer object access", :error => e.to_s
370
+ log.error_backtrace
371
+ end
372
+
373
+ def emit_events(tag, es)
374
+ retries = 0
375
+ begin
376
+ router.emit_stream(tag, es)
377
+ rescue BufferError
378
+ raise ForShutdown if @consumer.nil?
379
+
380
+ if @retry_emit_limit.nil?
381
+ sleep 1
382
+ retry
383
+ end
384
+
385
+ if retries < @retry_emit_limit
386
+ retries += 1
387
+ sleep 1
388
+ retry
389
+ else
390
+ raise RuntimeError, "Exceeds retry_emit_limit"
391
+ end
392
+ end
393
+ end
394
+ end
@@ -0,0 +1,305 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+
7
+ class Fluent::Plugin::RdKafkaGroupInput < Fluent::Plugin::Input
8
+ Fluent::Plugin.register_input('rdkafka_group', self)
9
+
10
+ helpers :thread, :parser, :compat_parameters
11
+
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+
15
+ config_param :format, :string, :default => 'json',
16
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
17
+ config_param :message_key, :string, :default => 'message',
18
+ :desc => "For 'text' format only."
19
+ config_param :add_headers, :bool, :default => false,
20
+ :desc => "Add kafka's message headers to event record"
21
+ config_param :add_prefix, :string, :default => nil,
22
+ :desc => "Tag prefix (Optional)"
23
+ config_param :add_suffix, :string, :default => nil,
24
+ :desc => "Tag suffix (Optional)"
25
+ config_param :use_record_time, :bool, :default => false,
26
+ :desc => "Replace message timestamp with contents of 'time' field.",
27
+ :deprecated => "Use 'time_source record' instead."
28
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
29
+ :desc => "Source for message timestamp."
30
+ config_param :record_time_key, :string, :default => 'time',
31
+ :desc => "Time field when time_source is 'record'"
32
+ config_param :time_format, :string, :default => nil,
33
+ :desc => "Time format to be used to parse 'time' field."
34
+ config_param :kafka_message_key, :string, :default => nil,
35
+ :desc => "Set kafka's message key to this field"
36
+
37
+ config_param :retry_emit_limit, :integer, :default => nil,
38
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
39
+ config_param :retry_wait_seconds, :integer, :default => 30
40
+ config_param :disable_retry_limit, :bool, :default => false,
41
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
42
+ config_param :retry_limit, :integer, :default => 10,
43
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
44
+
45
+ config_param :max_wait_time_ms, :integer, :default => 250,
46
+ :desc => "How long to block polls in milliseconds until the server sends us data."
47
+ config_param :max_batch_size, :integer, :default => 10000,
48
+ :desc => "Maximum number of log lines emitted in a single batch."
49
+
50
+ config_param :kafka_configs, :hash, :default => {},
51
+ :desc => "Kafka configuration properties as desribed in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
52
+
53
+ config_section :parse do
54
+ config_set_default :@type, 'json'
55
+ end
56
+
57
+ include Fluent::KafkaPluginUtil::SSLSettings
58
+ include Fluent::KafkaPluginUtil::SaslSettings
59
+
60
+ class ForShutdown < StandardError
61
+ end
62
+
63
+ BufferError = Fluent::Plugin::Buffer::BufferOverflowError
64
+
65
+ def initialize
66
+ super
67
+
68
+ @time_parser = nil
69
+ @retry_count = 1
70
+ end
71
+
72
+ def _config_to_array(config)
73
+ config_array = config.split(',').map {|k| k.strip }
74
+ if config_array.empty?
75
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
76
+ end
77
+ config_array
78
+ end
79
+
80
+ def multi_workers_ready?
81
+ true
82
+ end
83
+
84
+ private :_config_to_array
85
+
86
+ def configure(conf)
87
+ compat_parameters_convert(conf, :parser)
88
+
89
+ super
90
+
91
+ log.warn "The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!"
92
+
93
+ log.info "Will watch for topics #{@topics} at brokers " \
94
+ "#{@kafka_configs["bootstrap.servers"]} and '#{@kafka_configs["group.id"]}' group"
95
+
96
+ @topics = _config_to_array(@topics)
97
+
98
+ parser_conf = conf.elements('parse').first
99
+ unless parser_conf
100
+ raise Fluent::ConfigError, "<parse> section or format parameter is required."
101
+ end
102
+ unless parser_conf["@type"]
103
+ raise Fluent::ConfigError, "parse/@type is required."
104
+ end
105
+ @parser_proc = setup_parser(parser_conf)
106
+
107
+ @time_source = :record if @use_record_time
108
+
109
+ if @time_source == :record and @time_format
110
+ @time_parser = Fluent::TimeParser.new(@time_format)
111
+ end
112
+ end
113
+
114
+ def setup_parser(parser_conf)
115
+ format = parser_conf["@type"]
116
+ case format
117
+ when 'json'
118
+ begin
119
+ require 'oj'
120
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
121
+ Proc.new { |msg| Oj.load(msg.payload) }
122
+ rescue LoadError
123
+ require 'yajl'
124
+ Proc.new { |msg| Yajl::Parser.parse(msg.payload) }
125
+ end
126
+ when 'ltsv'
127
+ require 'ltsv'
128
+ Proc.new { |msg| LTSV.parse(msg.payload, {:symbolize_keys => false}).first }
129
+ when 'msgpack'
130
+ require 'msgpack'
131
+ Proc.new { |msg| MessagePack.unpack(msg.payload) }
132
+ when 'text'
133
+ Proc.new { |msg| {@message_key => msg.payload} }
134
+ else
135
+ @custom_parser = parser_create(usage: 'in-rdkafka-plugin', conf: parser_conf)
136
+ Proc.new { |msg|
137
+ @custom_parser.parse(msg.payload) {|_time, record|
138
+ record
139
+ }
140
+ }
141
+ end
142
+ end
143
+
144
+ def start
145
+ super
146
+
147
+ @consumer = setup_consumer
148
+
149
+ thread_create(:in_rdkafka_group, &method(:run))
150
+ end
151
+
152
+ def shutdown
153
+ # This nil assignment should be guarded by mutex in multithread programming manner.
154
+ # But the situation is very low contention, so we don't use mutex for now.
155
+ # If the problem happens, we will add a guard for consumer.
156
+ consumer = @consumer
157
+ @consumer = nil
158
+ consumer.close
159
+
160
+ super
161
+ end
162
+
163
+ def setup_consumer
164
+ consumer = Rdkafka::Config.new(@kafka_configs).consumer
165
+ consumer.subscribe(*@topics)
166
+ consumer
167
+ end
168
+
169
+ def reconnect_consumer
170
+ log.warn "Stopping Consumer"
171
+ consumer = @consumer
172
+ @consumer = nil
173
+ if consumer
174
+ consumer.close
175
+ end
176
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
177
+ @retry_count = @retry_count + 1
178
+ sleep @retry_wait_seconds
179
+ @consumer = setup_consumer
180
+ log.warn "Re-starting consumer #{Time.now.to_s}"
181
+ @retry_count = 0
182
+ rescue =>e
183
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
184
+ log.error_backtrace
185
+ if @retry_count <= @retry_limit or disable_retry_limit
186
+ reconnect_consumer
187
+ end
188
+ end
189
+
190
+ class Batch
191
+ attr_reader :topic
192
+ attr_reader :messages
193
+
194
+ def initialize(topic)
195
+ @topic = topic
196
+ @messages = []
197
+ end
198
+ end
199
+
200
+ # Executes the passed codeblock on a batch of messages.
201
+ # It is guaranteed that every message in a given batch belongs to the same topic, because the tagging logic in :run expects that property.
202
+ # The number of maximum messages in a batch is capped by the :max_batch_size configuration value. It ensures that consuming from a single
203
+ # topic for a long time (e.g. with `auto.offset.reset` set to `earliest`) does not lead to memory exhaustion. Also, calling consumer.poll
204
+ # advances thes consumer offset, so in case the process crashes we might lose at most :max_batch_size messages.
205
+ def each_batch(&block)
206
+ batch = nil
207
+ message = nil
208
+ while @consumer
209
+ message = @consumer.poll(@max_wait_time_ms)
210
+ if message
211
+ if not batch
212
+ batch = Batch.new(message.topic)
213
+ elsif batch.topic != message.topic || batch.messages.size >= @max_batch_size
214
+ yield batch
215
+ batch = Batch.new(message.topic)
216
+ end
217
+ batch.messages << message
218
+ else
219
+ yield batch if batch
220
+ batch = nil
221
+ end
222
+ end
223
+ yield batch if batch
224
+ end
225
+
226
+ def run
227
+ while @consumer
228
+ begin
229
+ each_batch { |batch|
230
+ log.debug "A new batch for topic #{batch.topic} with #{batch.messages.size} messages"
231
+ es = Fluent::MultiEventStream.new
232
+ tag = batch.topic
233
+ tag = @add_prefix + "." + tag if @add_prefix
234
+ tag = tag + "." + @add_suffix if @add_suffix
235
+
236
+ batch.messages.each { |msg|
237
+ begin
238
+ record = @parser_proc.call(msg)
239
+ case @time_source
240
+ when :kafka
241
+ record_time = Fluent::EventTime.from_time(msg.timestamp)
242
+ when :now
243
+ record_time = Fluent::Engine.now
244
+ when :record
245
+ if @time_format
246
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
247
+ else
248
+ record_time = record[@record_time_key]
249
+ end
250
+ else
251
+ log.fatal "BUG: invalid time_source: #{@time_source}"
252
+ end
253
+ if @kafka_message_key
254
+ record[@kafka_message_key] = msg.key
255
+ end
256
+ if @add_headers
257
+ msg.headers.each_pair { |k, v|
258
+ record[k] = v
259
+ }
260
+ end
261
+ es.add(record_time, record)
262
+ rescue => e
263
+ log.warn "parser error in #{msg.topic}/#{msg.partition}", :error => e.to_s, :value => msg.payload, :offset => msg.offset
264
+ log.debug_backtrace
265
+ end
266
+ }
267
+
268
+ unless es.empty?
269
+ emit_events(tag, es)
270
+ end
271
+ }
272
+ rescue ForShutdown
273
+ rescue => e
274
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
275
+ log.error_backtrace
276
+ reconnect_consumer
277
+ end
278
+ end
279
+ rescue => e
280
+ log.error "unexpected error during consumer object access", :error => e.to_s
281
+ log.error_backtrace
282
+ end
283
+
284
+ def emit_events(tag, es)
285
+ retries = 0
286
+ begin
287
+ router.emit_stream(tag, es)
288
+ rescue BufferError
289
+ raise ForShutdown if @consumer.nil?
290
+
291
+ if @retry_emit_limit.nil?
292
+ sleep 1
293
+ retry
294
+ end
295
+
296
+ if retries < @retry_emit_limit
297
+ retries += 1
298
+ sleep 1
299
+ retry
300
+ else
301
+ raise RuntimeError, "Exceeds retry_emit_limit"
302
+ end
303
+ end
304
+ end
305
+ end