fluent-plugin-kafka-xst 0.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,394 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :client_id, :string, :default => 'kafka'
15
+ config_param :sasl_over_ssl, :bool, :default => true,
16
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
17
+ config_param :format, :string, :default => 'json',
18
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
19
+ config_param :message_key, :string, :default => 'message',
20
+ :desc => "For 'text' format only."
21
+ config_param :add_headers, :bool, :default => false,
22
+ :desc => "Add kafka's message headers to event record"
23
+ config_param :add_prefix, :string, :default => nil,
24
+ :desc => "Tag prefix (Optional)"
25
+ config_param :add_suffix, :string, :default => nil,
26
+ :desc => "Tag suffix (Optional)"
27
+ config_param :retry_emit_limit, :integer, :default => nil,
28
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
29
+ config_param :use_record_time, :bool, :default => false,
30
+ :desc => "Replace message timestamp with contents of 'time' field.",
31
+ :deprecated => "Use 'time_source record' instead."
32
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
33
+ :desc => "Source for message timestamp."
34
+ config_param :record_time_key, :string, :default => 'time',
35
+ :desc => "Time field when time_source is 'record'"
36
+ config_param :get_kafka_client_log, :bool, :default => false
37
+ config_param :time_format, :string, :default => nil,
38
+ :desc => "Time format to be used to parse 'time' field."
39
+ config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
40
+ :desc => "Source for the fluentd event tag"
41
+ config_param :record_tag_key, :string, :default => 'tag',
42
+ :desc => "Tag field when tag_source is 'record'"
43
+ config_param :kafka_message_key, :string, :default => nil,
44
+ :desc => "Set kafka's message key to this field"
45
+ config_param :connect_timeout, :integer, :default => nil,
46
+ :desc => "[Integer, nil] the timeout setting for connecting to brokers"
47
+ config_param :socket_timeout, :integer, :default => nil,
48
+ :desc => "[Integer, nil] the timeout setting for socket connection"
49
+
50
+ config_param :retry_wait_seconds, :integer, :default => 30
51
+ config_param :disable_retry_limit, :bool, :default => false,
52
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
53
+ config_param :retry_limit, :integer, :default => 10,
54
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
55
+ # Kafka consumer options
56
+ config_param :max_bytes, :integer, :default => 1048576,
57
+ :desc => "Maximum number of bytes to fetch."
58
+ config_param :max_wait_time, :integer, :default => nil,
59
+ :desc => "How long to block until the server sends us data."
60
+ config_param :min_bytes, :integer, :default => nil,
61
+ :desc => "Smallest amount of data the server should send us."
62
+ config_param :session_timeout, :integer, :default => nil,
63
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
64
+ config_param :offset_commit_interval, :integer, :default => nil,
65
+ :desc => "The interval between offset commits, in seconds"
66
+ config_param :offset_commit_threshold, :integer, :default => nil,
67
+ :desc => "The number of messages that can be processed before their offsets are committed"
68
+ config_param :fetcher_max_queue_size, :integer, :default => nil,
69
+ :desc => "The number of fetched messages per partition that are queued in fetcher queue"
70
+ config_param :refresh_topic_interval, :integer, :default => nil,
71
+ :desc => "The interval of refreshing the topic list in seconds. Zero or unset disables this"
72
+ config_param :start_from_beginning, :bool, :default => true,
73
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
74
+
75
+ include Fluent::KafkaPluginUtil::SSLSettings
76
+ include Fluent::KafkaPluginUtil::SaslSettings
77
+
78
+ class ForShutdown < StandardError
79
+ end
80
+
81
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
82
+ Fluent::Plugin::Buffer::BufferOverflowError
83
+ else
84
+ Fluent::BufferQueueLimitError
85
+ end
86
+
87
+ unless method_defined?(:router)
88
+ define_method("router") { Fluent::Engine }
89
+ end
90
+
91
+ def initialize
92
+ super
93
+ require 'kafka'
94
+
95
+ @time_parser = nil
96
+ @retry_count = 1
97
+ end
98
+
99
+ def _config_to_array(config)
100
+ config_array = config.split(',').map {|k| k.strip }
101
+ if config_array.empty?
102
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
103
+ end
104
+ config_array
105
+ end
106
+
107
+ def multi_workers_ready?
108
+ true
109
+ end
110
+
111
+ private :_config_to_array
112
+
113
+ def configure(conf)
114
+ super
115
+
116
+ $log.info "Will watch for topics #{@topics} at brokers " \
117
+ "#{@brokers} and '#{@consumer_group}' group"
118
+
119
+ @topics = _config_to_array(@topics)
120
+
121
+ if conf['max_wait_ms']
122
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
123
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
124
+ end
125
+
126
+ @parser_proc = setup_parser(conf)
127
+
128
+ @consumer_opts = {:group_id => @consumer_group}
129
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
130
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
131
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
132
+ @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
133
+ @consumer_opts[:refresh_topic_interval] = @refresh_topic_interval if @refresh_topic_interval
134
+
135
+ @fetch_opts = {}
136
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
137
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
138
+
139
+ @time_source = :record if @use_record_time
140
+
141
+ if @time_source == :record and @time_format
142
+ if defined?(Fluent::TimeParser)
143
+ @time_parser = Fluent::TimeParser.new(@time_format)
144
+ else
145
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
146
+ end
147
+ end
148
+
149
+ if @time_source == :record && defined?(Fluent::NumericTimeParser)
150
+ @float_numeric_parse = Fluent::NumericTimeParser.new(:float)
151
+ end
152
+ end
153
+
154
+ def setup_parser(conf)
155
+ case @format
156
+ when 'json'
157
+ begin
158
+ require 'oj'
159
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
160
+ Proc.new { |msg| Oj.load(msg.value) }
161
+ rescue LoadError
162
+ require 'yajl'
163
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
164
+ end
165
+ when 'ltsv'
166
+ require 'ltsv'
167
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
168
+ when 'msgpack'
169
+ require 'msgpack'
170
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
171
+ when 'text'
172
+ Proc.new { |msg| {@message_key => msg.value} }
173
+ else
174
+ @custom_parser = Fluent::Plugin.new_parser(conf['format'])
175
+ @custom_parser.configure(conf)
176
+ Proc.new { |msg|
177
+ @custom_parser.parse(msg.value) {|_time, record|
178
+ record
179
+ }
180
+ }
181
+ end
182
+ end
183
+
184
+ def start
185
+ super
186
+
187
+ logger = @get_kafka_client_log ? log : nil
188
+ if @scram_mechanism != nil && @username != nil && @password != nil
189
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
190
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
191
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
192
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
193
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
194
+ elsif @username != nil && @password != nil
195
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
196
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
197
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
198
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password,
199
+ sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
200
+ else
201
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
202
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
203
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
204
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
205
+ ssl_verify_hostname: @ssl_verify_hostname)
206
+ end
207
+
208
+ @consumer = setup_consumer
209
+ @thread = Thread.new(&method(:run))
210
+ end
211
+
212
+ def shutdown
213
+ # This nil assignment should be guarded by mutex in multithread programming manner.
214
+ # But the situation is very low contention, so we don't use mutex for now.
215
+ # If the problem happens, we will add a guard for consumer.
216
+ consumer = @consumer
217
+ @consumer = nil
218
+ consumer.stop
219
+
220
+ @thread.join
221
+ @kafka.close
222
+ super
223
+ end
224
+
225
+ def setup_consumer
226
+ consumer = @kafka.consumer(**@consumer_opts)
227
+ @topics.each { |topic|
228
+ if m = /^\/(.+)\/$/.match(topic)
229
+ topic_or_regex = Regexp.new(m[1])
230
+ $log.info "Subscribe to topics matching the regex #{topic}"
231
+ else
232
+ topic_or_regex = topic
233
+ $log.info "Subscribe to topic #{topic}"
234
+ end
235
+ consumer.subscribe(topic_or_regex, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
236
+ }
237
+ consumer
238
+ end
239
+
240
+ def reconnect_consumer
241
+ log.warn "Stopping Consumer"
242
+ consumer = @consumer
243
+ @consumer = nil
244
+ if consumer
245
+ consumer.stop
246
+ end
247
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
248
+ @retry_count = @retry_count + 1
249
+ sleep @retry_wait_seconds
250
+ @consumer = setup_consumer
251
+ log.warn "Re-starting consumer #{Time.now.to_s}"
252
+ @retry_count = 0
253
+ rescue =>e
254
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
255
+ log.error_backtrace
256
+ if @retry_count <= @retry_limit or disable_retry_limit
257
+ reconnect_consumer
258
+ end
259
+ end
260
+
261
+ def process_batch_with_record_tag(batch)
262
+ es = {}
263
+ batch.messages.each { |msg|
264
+ begin
265
+ record = @parser_proc.call(msg)
266
+ tag = record[@record_tag_key]
267
+ tag = @add_prefix + "." + tag if @add_prefix
268
+ tag = tag + "." + @add_suffix if @add_suffix
269
+ es[tag] ||= Fluent::MultiEventStream.new
270
+ case @time_source
271
+ when :kafka
272
+ record_time = Fluent::EventTime.from_time(msg.create_time)
273
+ when :now
274
+ record_time = Fluent::Engine.now
275
+ when :record
276
+ if @time_format
277
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
278
+ else
279
+ record_time = record[@record_time_key]
280
+ end
281
+ else
282
+ log.fatal "BUG: invalid time_source: #{@time_source}"
283
+ end
284
+ if @kafka_message_key
285
+ record[@kafka_message_key] = msg.key
286
+ end
287
+ if @add_headers
288
+ msg.headers.each_pair { |k, v|
289
+ record[k] = v
290
+ }
291
+ end
292
+ es[tag].add(record_time, record)
293
+ rescue => e
294
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
295
+ log.debug_backtrace
296
+ end
297
+ }
298
+
299
+ unless es.empty?
300
+ es.each { |tag,es|
301
+ emit_events(tag, es)
302
+ }
303
+ end
304
+ end
305
+
306
+ def process_batch(batch)
307
+ es = Fluent::MultiEventStream.new
308
+ tag = batch.topic
309
+ tag = @add_prefix + "." + tag if @add_prefix
310
+ tag = tag + "." + @add_suffix if @add_suffix
311
+
312
+ batch.messages.each { |msg|
313
+ begin
314
+ record = @parser_proc.call(msg)
315
+ case @time_source
316
+ when :kafka
317
+ record_time = Fluent::EventTime.from_time(msg.create_time)
318
+ when :now
319
+ record_time = Fluent::Engine.now
320
+ when :record
321
+ record_time = record[@record_time_key]
322
+
323
+ if @time_format
324
+ record_time = @time_parser.parse(record_time.to_s)
325
+ elsif record_time.is_a?(Float) && @float_numeric_parse
326
+ record_time = @float_numeric_parse.parse(record_time)
327
+ end
328
+ else
329
+ log.fatal "BUG: invalid time_source: #{@time_source}"
330
+ end
331
+ if @kafka_message_key
332
+ record[@kafka_message_key] = msg.key
333
+ end
334
+ if @add_headers
335
+ msg.headers.each_pair { |k, v|
336
+ record[k] = v
337
+ }
338
+ end
339
+ es.add(record_time, record)
340
+ rescue => e
341
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
342
+ log.debug_backtrace
343
+ end
344
+ }
345
+
346
+ unless es.empty?
347
+ emit_events(tag, es)
348
+ end
349
+ end
350
+
351
+ def run
352
+ while @consumer
353
+ begin
354
+ @consumer.each_batch(**@fetch_opts) { |batch|
355
+ if @tag_source == :record
356
+ process_batch_with_record_tag(batch)
357
+ else
358
+ process_batch(batch)
359
+ end
360
+ }
361
+ rescue ForShutdown
362
+ rescue => e
363
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
364
+ log.error_backtrace
365
+ reconnect_consumer
366
+ end
367
+ end
368
+ rescue => e
369
+ log.error "unexpected error during consumer object access", :error => e.to_s
370
+ log.error_backtrace
371
+ end
372
+
373
+ def emit_events(tag, es)
374
+ retries = 0
375
+ begin
376
+ router.emit_stream(tag, es)
377
+ rescue BufferError
378
+ raise ForShutdown if @consumer.nil?
379
+
380
+ if @retry_emit_limit.nil?
381
+ sleep 1
382
+ retry
383
+ end
384
+
385
+ if retries < @retry_emit_limit
386
+ retries += 1
387
+ sleep 1
388
+ retry
389
+ else
390
+ raise RuntimeError, "Exceeds retry_emit_limit"
391
+ end
392
+ end
393
+ end
394
+ end
@@ -0,0 +1,305 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+
7
+ class Fluent::Plugin::RdKafkaGroupInput < Fluent::Plugin::Input
8
+ Fluent::Plugin.register_input('rdkafka_group', self)
9
+
10
+ helpers :thread, :parser, :compat_parameters
11
+
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+
15
+ config_param :format, :string, :default => 'json',
16
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
17
+ config_param :message_key, :string, :default => 'message',
18
+ :desc => "For 'text' format only."
19
+ config_param :add_headers, :bool, :default => false,
20
+ :desc => "Add kafka's message headers to event record"
21
+ config_param :add_prefix, :string, :default => nil,
22
+ :desc => "Tag prefix (Optional)"
23
+ config_param :add_suffix, :string, :default => nil,
24
+ :desc => "Tag suffix (Optional)"
25
+ config_param :use_record_time, :bool, :default => false,
26
+ :desc => "Replace message timestamp with contents of 'time' field.",
27
+ :deprecated => "Use 'time_source record' instead."
28
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
29
+ :desc => "Source for message timestamp."
30
+ config_param :record_time_key, :string, :default => 'time',
31
+ :desc => "Time field when time_source is 'record'"
32
+ config_param :time_format, :string, :default => nil,
33
+ :desc => "Time format to be used to parse 'time' field."
34
+ config_param :kafka_message_key, :string, :default => nil,
35
+ :desc => "Set kafka's message key to this field"
36
+
37
+ config_param :retry_emit_limit, :integer, :default => nil,
38
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
39
+ config_param :retry_wait_seconds, :integer, :default => 30
40
+ config_param :disable_retry_limit, :bool, :default => false,
41
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
42
+ config_param :retry_limit, :integer, :default => 10,
43
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
44
+
45
+ config_param :max_wait_time_ms, :integer, :default => 250,
46
+ :desc => "How long to block polls in milliseconds until the server sends us data."
47
+ config_param :max_batch_size, :integer, :default => 10000,
48
+ :desc => "Maximum number of log lines emitted in a single batch."
49
+
50
+ config_param :kafka_configs, :hash, :default => {},
51
+ :desc => "Kafka configuration properties as desribed in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
52
+
53
+ config_section :parse do
54
+ config_set_default :@type, 'json'
55
+ end
56
+
57
+ include Fluent::KafkaPluginUtil::SSLSettings
58
+ include Fluent::KafkaPluginUtil::SaslSettings
59
+
60
+ class ForShutdown < StandardError
61
+ end
62
+
63
+ BufferError = Fluent::Plugin::Buffer::BufferOverflowError
64
+
65
+ def initialize
66
+ super
67
+
68
+ @time_parser = nil
69
+ @retry_count = 1
70
+ end
71
+
72
+ def _config_to_array(config)
73
+ config_array = config.split(',').map {|k| k.strip }
74
+ if config_array.empty?
75
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
76
+ end
77
+ config_array
78
+ end
79
+
80
+ def multi_workers_ready?
81
+ true
82
+ end
83
+
84
+ private :_config_to_array
85
+
86
+ def configure(conf)
87
+ compat_parameters_convert(conf, :parser)
88
+
89
+ super
90
+
91
+ log.warn "The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!"
92
+
93
+ log.info "Will watch for topics #{@topics} at brokers " \
94
+ "#{@kafka_configs["bootstrap.servers"]} and '#{@kafka_configs["group.id"]}' group"
95
+
96
+ @topics = _config_to_array(@topics)
97
+
98
+ parser_conf = conf.elements('parse').first
99
+ unless parser_conf
100
+ raise Fluent::ConfigError, "<parse> section or format parameter is required."
101
+ end
102
+ unless parser_conf["@type"]
103
+ raise Fluent::ConfigError, "parse/@type is required."
104
+ end
105
+ @parser_proc = setup_parser(parser_conf)
106
+
107
+ @time_source = :record if @use_record_time
108
+
109
+ if @time_source == :record and @time_format
110
+ @time_parser = Fluent::TimeParser.new(@time_format)
111
+ end
112
+ end
113
+
114
+ def setup_parser(parser_conf)
115
+ format = parser_conf["@type"]
116
+ case format
117
+ when 'json'
118
+ begin
119
+ require 'oj'
120
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
121
+ Proc.new { |msg| Oj.load(msg.payload) }
122
+ rescue LoadError
123
+ require 'yajl'
124
+ Proc.new { |msg| Yajl::Parser.parse(msg.payload) }
125
+ end
126
+ when 'ltsv'
127
+ require 'ltsv'
128
+ Proc.new { |msg| LTSV.parse(msg.payload, {:symbolize_keys => false}).first }
129
+ when 'msgpack'
130
+ require 'msgpack'
131
+ Proc.new { |msg| MessagePack.unpack(msg.payload) }
132
+ when 'text'
133
+ Proc.new { |msg| {@message_key => msg.payload} }
134
+ else
135
+ @custom_parser = parser_create(usage: 'in-rdkafka-plugin', conf: parser_conf)
136
+ Proc.new { |msg|
137
+ @custom_parser.parse(msg.payload) {|_time, record|
138
+ record
139
+ }
140
+ }
141
+ end
142
+ end
143
+
144
+ def start
145
+ super
146
+
147
+ @consumer = setup_consumer
148
+
149
+ thread_create(:in_rdkafka_group, &method(:run))
150
+ end
151
+
152
+ def shutdown
153
+ # This nil assignment should be guarded by mutex in multithread programming manner.
154
+ # But the situation is very low contention, so we don't use mutex for now.
155
+ # If the problem happens, we will add a guard for consumer.
156
+ consumer = @consumer
157
+ @consumer = nil
158
+ consumer.close
159
+
160
+ super
161
+ end
162
+
163
+ def setup_consumer
164
+ consumer = Rdkafka::Config.new(@kafka_configs).consumer
165
+ consumer.subscribe(*@topics)
166
+ consumer
167
+ end
168
+
169
+ def reconnect_consumer
170
+ log.warn "Stopping Consumer"
171
+ consumer = @consumer
172
+ @consumer = nil
173
+ if consumer
174
+ consumer.close
175
+ end
176
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
177
+ @retry_count = @retry_count + 1
178
+ sleep @retry_wait_seconds
179
+ @consumer = setup_consumer
180
+ log.warn "Re-starting consumer #{Time.now.to_s}"
181
+ @retry_count = 0
182
+ rescue =>e
183
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
184
+ log.error_backtrace
185
+ if @retry_count <= @retry_limit or disable_retry_limit
186
+ reconnect_consumer
187
+ end
188
+ end
189
+
190
+ class Batch
191
+ attr_reader :topic
192
+ attr_reader :messages
193
+
194
+ def initialize(topic)
195
+ @topic = topic
196
+ @messages = []
197
+ end
198
+ end
199
+
200
+ # Executes the passed codeblock on a batch of messages.
201
+ # It is guaranteed that every message in a given batch belongs to the same topic, because the tagging logic in :run expects that property.
202
+ # The number of maximum messages in a batch is capped by the :max_batch_size configuration value. It ensures that consuming from a single
203
+ # topic for a long time (e.g. with `auto.offset.reset` set to `earliest`) does not lead to memory exhaustion. Also, calling consumer.poll
204
+ # advances thes consumer offset, so in case the process crashes we might lose at most :max_batch_size messages.
205
+ def each_batch(&block)
206
+ batch = nil
207
+ message = nil
208
+ while @consumer
209
+ message = @consumer.poll(@max_wait_time_ms)
210
+ if message
211
+ if not batch
212
+ batch = Batch.new(message.topic)
213
+ elsif batch.topic != message.topic || batch.messages.size >= @max_batch_size
214
+ yield batch
215
+ batch = Batch.new(message.topic)
216
+ end
217
+ batch.messages << message
218
+ else
219
+ yield batch if batch
220
+ batch = nil
221
+ end
222
+ end
223
+ yield batch if batch
224
+ end
225
+
226
+ def run
227
+ while @consumer
228
+ begin
229
+ each_batch { |batch|
230
+ log.debug "A new batch for topic #{batch.topic} with #{batch.messages.size} messages"
231
+ es = Fluent::MultiEventStream.new
232
+ tag = batch.topic
233
+ tag = @add_prefix + "." + tag if @add_prefix
234
+ tag = tag + "." + @add_suffix if @add_suffix
235
+
236
+ batch.messages.each { |msg|
237
+ begin
238
+ record = @parser_proc.call(msg)
239
+ case @time_source
240
+ when :kafka
241
+ record_time = Fluent::EventTime.from_time(msg.timestamp)
242
+ when :now
243
+ record_time = Fluent::Engine.now
244
+ when :record
245
+ if @time_format
246
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
247
+ else
248
+ record_time = record[@record_time_key]
249
+ end
250
+ else
251
+ log.fatal "BUG: invalid time_source: #{@time_source}"
252
+ end
253
+ if @kafka_message_key
254
+ record[@kafka_message_key] = msg.key
255
+ end
256
+ if @add_headers
257
+ msg.headers.each_pair { |k, v|
258
+ record[k] = v
259
+ }
260
+ end
261
+ es.add(record_time, record)
262
+ rescue => e
263
+ log.warn "parser error in #{msg.topic}/#{msg.partition}", :error => e.to_s, :value => msg.payload, :offset => msg.offset
264
+ log.debug_backtrace
265
+ end
266
+ }
267
+
268
+ unless es.empty?
269
+ emit_events(tag, es)
270
+ end
271
+ }
272
+ rescue ForShutdown
273
+ rescue => e
274
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
275
+ log.error_backtrace
276
+ reconnect_consumer
277
+ end
278
+ end
279
+ rescue => e
280
+ log.error "unexpected error during consumer object access", :error => e.to_s
281
+ log.error_backtrace
282
+ end
283
+
284
+ def emit_events(tag, es)
285
+ retries = 0
286
+ begin
287
+ router.emit_stream(tag, es)
288
+ rescue BufferError
289
+ raise ForShutdown if @consumer.nil?
290
+
291
+ if @retry_emit_limit.nil?
292
+ sleep 1
293
+ retry
294
+ end
295
+
296
+ if retries < @retry_emit_limit
297
+ retries += 1
298
+ sleep 1
299
+ retry
300
+ else
301
+ raise RuntimeError, "Exceeds retry_emit_limit"
302
+ end
303
+ end
304
+ end
305
+ end