fluent-plugin-kafka 0.14.2 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14cde202b38079778e0d694692f05c19c9576be8622f35a8896df35f33ea9733
4
- data.tar.gz: 80cbc1050e85239dabfe78dbffd97a100c939ec4e9227bd68956a7b3d15aa75e
3
+ metadata.gz: 7fcaf3e8fbb836ab8db3fa21003a713f9048de076097e2c95a9f30e5d1b05c08
4
+ data.tar.gz: 8437b2c9401238d811973422a65d2c9ee34bd8afc513f32412a22f93e03204a0
5
5
  SHA512:
6
- metadata.gz: 5ec7ed5f16a7d78a0dcd5f6eb15ecd94dd47adc7ae77208896a0ede1d341ede1f5a668141e7ac4e5413734105e276bd483f3f2b8c2041d329a4d619a9c469a76
7
- data.tar.gz: 11877dd67f3b0f714b38153368611c8f234b6a96976a00e21117e9523fa5bcde210fd13fa5e22086cf604ee61fc94a3472bc4946c69a6f5623eeace486e7eb75
6
+ metadata.gz: 6d407e6f12dbafc6f5fad59ac6cbae4b13fa5f0a212cf82dba604f871afc286899290972b3afca5d1031888ee2f141c12d5d2e3d6b7324c7246317029149c491
7
+ data.tar.gz: b278441842361cc53836fce087e87e6d2d800bccbe9b307c2f0b39d3c4c357aae96275d2bfb808ab2455ebce692948d1241f4f991a5036473da1d31da699832b
@@ -0,0 +1,26 @@
1
+ name: linux
2
+ on:
3
+ - push
4
+ - pull_request
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ '2.4', '2.5', '2.6', '2.7', '3.0' ]
12
+ os:
13
+ - ubuntu-latest
14
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
15
+ steps:
16
+ - uses: actions/checkout@v2
17
+ - uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby }}
20
+ - name: unit testing
21
+ env:
22
+ CI: true
23
+ run: |
24
+ gem install bundler rake
25
+ bundle install --jobs 4 --retry 3
26
+ bundle exec rake test
data/ChangeLog CHANGED
@@ -1,3 +1,25 @@
1
+ Release 0.16.0 - 2021/01/25
2
+
3
+ * input: Add `tag_source` and `record_tag_key` parameters for using record field as tag
4
+ * in_kafka_group: Use NumericParser for floating point
5
+
6
+ Release 0.15.3 - 2020/12/08
7
+
8
+ * in_kafka: Fix `record_time_key` parameter not working
9
+
10
+ Release 0.15.2 - 2020/09/30
11
+
12
+ * input: Support 3rd party parser
13
+
14
+ Release 0.15.1 - 2020/09/17
15
+
16
+ * out_kafka2: Fix wrong class name for configuration error
17
+
18
+ Release 0.15.0 - 2020/09/14
19
+
20
+ * Add experimental `in_rdkafka_group`
21
+ * in_kafka: Expose `ssl_verify_hostname` parameter
22
+
1
23
  Release 0.14.2 - 2020/08/26
2
24
 
3
25
  * in_kafka_group: Add `add_headers` parameter
@@ -15,6 +37,7 @@ Release 0.14.0 - 2020/08/07
15
37
  Release 0.13.1 - 2020/07/17
16
38
 
17
39
  * in_kafka_group: Support ssl_verify_hostname parameter
40
+ * in_kafka_group: Support regex based topics
18
41
  * out_kafka2/out_rdkafka2: Support topic parameter with placeholders
19
42
 
20
43
  Release 0.13.0 - 2020/03/09
data/README.md CHANGED
@@ -139,6 +139,41 @@ Consume events by kafka consumer group features..
139
139
 
140
140
  See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-messages-from-kafka) for more detailed documentation about ruby-kafka options.
141
141
 
142
+ `topics` supports regex pattern since v0.13.1. If you want to use regex pattern, use `/pattern/` like `/foo.*/`.
143
+
144
+ Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
145
+
146
+ ### Input plugin (@type 'rdkafka_group', supports kafka consumer groups, uses rdkafka-ruby)
147
+
148
+ :warning: **The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!**
149
+
150
+ With the introduction of the rdkafka-ruby based input plugin we hope to support Kafka brokers above version 2.1 where we saw [compatibility issues](https://github.com/fluent/fluent-plugin-kafka/issues/315) when using the ruby-kafka based @kafka_group input type. The rdkafka-ruby lib wraps the highly performant and production ready librdkafka C lib.
151
+
152
+ <source>
153
+ @type rdkafka_group
154
+ topics <listening topics(separate with comma',')>
155
+ format <input text type (text|json|ltsv|msgpack)> :default => json
156
+ message_key <key (Optional, for text format only, default is message)>
157
+ kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
158
+ add_headers <If true, add kafka's message headers to record>
159
+ add_prefix <tag prefix (Optional)>
160
+ add_suffix <tag suffix (Optional)>
161
+ retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
162
+ use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
163
+ time_source <source for message timestamp (now|kafka|record)> :default => now
164
+ time_format <string (Optional when use_record_time is used)>
165
+
166
+ # kafka consumer options
167
+ max_wait_time_ms 500
168
+ max_batch_size 10000
169
+ kafka_configs {
170
+ "bootstrap.servers": "brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>",
171
+ "group.id": "<consumer group name>"
172
+ }
173
+ </source>
174
+
175
+ See also [rdkafka-ruby](https://github.com/appsignal/rdkafka-ruby) and [librdkafka](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for more detailed documentation about Kafka consumer options.
176
+
142
177
  Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
143
178
 
144
179
  ### Output plugin
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.14.2'
16
+ gem.version = '0.16.0'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -21,4 +21,5 @@ Gem::Specification.new do |gem|
21
21
  gem.add_dependency 'ruby-kafka', '>= 1.2.0', '< 2'
22
22
  gem.add_development_dependency "rake", ">= 0.9.2"
23
23
  gem.add_development_dependency "test-unit", ">= 3.0.8"
24
+ gem.add_development_dependency "webrick"
24
25
  end
@@ -31,6 +31,10 @@ class Fluent::KafkaInput < Fluent::Input
31
31
  config_param :add_suffix, :string, :default => nil,
32
32
  :desc => "tag suffix"
33
33
  config_param :add_offset_in_record, :bool, :default => false
34
+ config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
35
+ :desc => "Source for the fluentd event tag"
36
+ config_param :record_tag_key, :string, :default => 'tag',
37
+ :desc => "Tag field when tag_source is 'record'"
34
38
 
35
39
  config_param :offset_zookeeper, :string, :default => nil
36
40
  config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
@@ -113,7 +117,7 @@ class Fluent::KafkaInput < Fluent::Input
113
117
 
114
118
  require 'zookeeper' if @offset_zookeeper
115
119
 
116
- @parser_proc = setup_parser
120
+ @parser_proc = setup_parser(conf)
117
121
 
118
122
  @time_source = :record if @use_record_time
119
123
 
@@ -126,7 +130,7 @@ class Fluent::KafkaInput < Fluent::Input
126
130
  end
127
131
  end
128
132
 
129
- def setup_parser
133
+ def setup_parser(conf)
130
134
  case @format
131
135
  when 'json'
132
136
  begin
@@ -165,6 +169,14 @@ class Fluent::KafkaInput < Fluent::Input
165
169
  add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
166
170
  r
167
171
  }
172
+ else
173
+ @custom_parser = Fluent::Plugin.new_parser(conf['format'])
174
+ @custom_parser.configure(conf)
175
+ Proc.new { |msg|
176
+ @custom_parser.parse(msg.value) {|_time, record|
177
+ record
178
+ }
179
+ }
168
180
  end
169
181
  end
170
182
 
@@ -188,16 +200,17 @@ class Fluent::KafkaInput < Fluent::Input
188
200
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
189
201
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
190
202
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
191
- sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
203
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
192
204
  elsif @username != nil && @password != nil
193
205
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
194
206
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
195
207
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
196
- sasl_over_ssl: @sasl_over_ssl)
208
+ sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
197
209
  else
198
210
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
199
211
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
200
- ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
212
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
213
+ ssl_verify_hostname: @ssl_verify_hostname)
201
214
  end
202
215
 
203
216
  @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
@@ -215,6 +228,9 @@ class Fluent::KafkaInput < Fluent::Input
215
228
  router,
216
229
  @kafka_message_key,
217
230
  @time_source,
231
+ @record_time_key,
232
+ @tag_source,
233
+ @record_tag_key,
218
234
  opt)
219
235
  }
220
236
  @topic_watchers.each {|tw|
@@ -239,7 +255,7 @@ class Fluent::KafkaInput < Fluent::Input
239
255
  end
240
256
 
241
257
  class TopicWatcher < Coolio::TimerWatcher
242
- def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, time_source, options={})
258
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, time_source, record_time_key, tag_source, record_tag_key, options={})
243
259
  @topic_entry = topic_entry
244
260
  @kafka = kafka
245
261
  @callback = method(:consume)
@@ -251,6 +267,9 @@ class Fluent::KafkaInput < Fluent::Input
251
267
  @router = router
252
268
  @kafka_message_key = kafka_message_key
253
269
  @time_source = time_source
270
+ @record_time_key = record_time_key
271
+ @tag_source = tag_source
272
+ @record_tag_key = record_tag_key
254
273
 
255
274
  @next_offset = @topic_entry.offset
256
275
  if @topic_entry.offset == -1 && offset_manager
@@ -287,6 +306,11 @@ class Fluent::KafkaInput < Fluent::Input
287
306
  messages.each { |msg|
288
307
  begin
289
308
  record = @parser.call(msg, @topic_entry)
309
+ if @tag_source == :record
310
+ tag = record[@record_tag_key]
311
+ tag = @add_prefix + "." + tag if @add_prefix
312
+ tag = tag + "." + @add_suffix if @add_suffix
313
+ end
290
314
  case @time_source
291
315
  when :kafka
292
316
  record_time = Fluent::EventTime.from_time(msg.create_time)
@@ -36,6 +36,10 @@ class Fluent::KafkaGroupInput < Fluent::Input
36
36
  config_param :get_kafka_client_log, :bool, :default => false
37
37
  config_param :time_format, :string, :default => nil,
38
38
  :desc => "Time format to be used to parse 'time' field."
39
+ config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
40
+ :desc => "Source for the fluentd event tag"
41
+ config_param :record_tag_key, :string, :default => 'tag',
42
+ :desc => "Tag field when tag_source is 'record'"
39
43
  config_param :kafka_message_key, :string, :default => nil,
40
44
  :desc => "Set kafka's message key to this field"
41
45
  config_param :connect_timeout, :integer, :default => nil,
@@ -117,7 +121,7 @@ class Fluent::KafkaGroupInput < Fluent::Input
117
121
  @max_wait_time = conf['max_wait_ms'].to_i / 1000
118
122
  end
119
123
 
120
- @parser_proc = setup_parser
124
+ @parser_proc = setup_parser(conf)
121
125
 
122
126
  @consumer_opts = {:group_id => @consumer_group}
123
127
  @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
@@ -138,9 +142,13 @@ class Fluent::KafkaGroupInput < Fluent::Input
138
142
  @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
139
143
  end
140
144
  end
145
+
146
+ if @time_source == :record && defined?(Fluent::NumericTimeParser)
147
+ @float_numeric_parse = Fluent::NumericTimeParser.new(:float)
148
+ end
141
149
  end
142
150
 
143
- def setup_parser
151
+ def setup_parser(conf)
144
152
  case @format
145
153
  when 'json'
146
154
  begin
@@ -159,6 +167,14 @@ class Fluent::KafkaGroupInput < Fluent::Input
159
167
  Proc.new { |msg| MessagePack.unpack(msg.value) }
160
168
  when 'text'
161
169
  Proc.new { |msg| {@message_key => msg.value} }
170
+ else
171
+ @custom_parser = Fluent::Plugin.new_parser(conf['format'])
172
+ @custom_parser.configure(conf)
173
+ Proc.new { |msg|
174
+ @custom_parser.parse(msg.value) {|_time, record|
175
+ record
176
+ }
177
+ }
162
178
  end
163
179
  end
164
180
 
@@ -236,49 +252,104 @@ class Fluent::KafkaGroupInput < Fluent::Input
236
252
  end
237
253
  end
238
254
 
255
+ def process_batch_with_record_tag(batch)
256
+ es = {}
257
+ batch.messages.each { |msg|
258
+ begin
259
+ record = @parser_proc.call(msg)
260
+ tag = record[@record_tag_key]
261
+ tag = @add_prefix + "." + tag if @add_prefix
262
+ tag = tag + "." + @add_suffix if @add_suffix
263
+ es[tag] ||= Fluent::MultiEventStream.new
264
+ case @time_source
265
+ when :kafka
266
+ record_time = Fluent::EventTime.from_time(msg.create_time)
267
+ when :now
268
+ record_time = Fluent::Engine.now
269
+ when :record
270
+ if @time_format
271
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
272
+ else
273
+ record_time = record[@record_time_key]
274
+ end
275
+ else
276
+ log.fatal "BUG: invalid time_source: #{@time_source}"
277
+ end
278
+ if @kafka_message_key
279
+ record[@kafka_message_key] = msg.key
280
+ end
281
+ if @add_headers
282
+ msg.headers.each_pair { |k, v|
283
+ record[k] = v
284
+ }
285
+ end
286
+ es[tag].add(record_time, record)
287
+ rescue => e
288
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
289
+ log.debug_backtrace
290
+ end
291
+ }
292
+
293
+ unless es.empty?
294
+ es.each { |tag,es|
295
+ emit_events(tag, es)
296
+ }
297
+ end
298
+ end
299
+
300
+ def process_batch(batch)
301
+ es = Fluent::MultiEventStream.new
302
+ tag = batch.topic
303
+ tag = @add_prefix + "." + tag if @add_prefix
304
+ tag = tag + "." + @add_suffix if @add_suffix
305
+
306
+ batch.messages.each { |msg|
307
+ begin
308
+ record = @parser_proc.call(msg)
309
+ case @time_source
310
+ when :kafka
311
+ record_time = Fluent::EventTime.from_time(msg.create_time)
312
+ when :now
313
+ record_time = Fluent::Engine.now
314
+ when :record
315
+ record_time = record[@record_time_key]
316
+
317
+ if @time_format
318
+ record_time = @time_parser.parse(record_time.to_s)
319
+ elsif record_time.is_a?(Float) && @float_numeric_parse
320
+ record_time = @float_numeric_parse.parse(record_time)
321
+ end
322
+ else
323
+ log.fatal "BUG: invalid time_source: #{@time_source}"
324
+ end
325
+ if @kafka_message_key
326
+ record[@kafka_message_key] = msg.key
327
+ end
328
+ if @add_headers
329
+ msg.headers.each_pair { |k, v|
330
+ record[k] = v
331
+ }
332
+ end
333
+ es.add(record_time, record)
334
+ rescue => e
335
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
336
+ log.debug_backtrace
337
+ end
338
+ }
339
+
340
+ unless es.empty?
341
+ emit_events(tag, es)
342
+ end
343
+ end
344
+
239
345
  def run
240
346
  while @consumer
241
347
  begin
242
348
  @consumer.each_batch(@fetch_opts) { |batch|
243
- es = Fluent::MultiEventStream.new
244
- tag = batch.topic
245
- tag = @add_prefix + "." + tag if @add_prefix
246
- tag = tag + "." + @add_suffix if @add_suffix
247
-
248
- batch.messages.each { |msg|
249
- begin
250
- record = @parser_proc.call(msg)
251
- case @time_source
252
- when :kafka
253
- record_time = Fluent::EventTime.from_time(msg.create_time)
254
- when :now
255
- record_time = Fluent::Engine.now
256
- when :record
257
- if @time_format
258
- record_time = @time_parser.parse(record[@record_time_key].to_s)
259
- else
260
- record_time = record[@record_time_key]
261
- end
262
- else
263
- log.fatal "BUG: invalid time_source: #{@time_source}"
264
- end
265
- if @kafka_message_key
266
- record[@kafka_message_key] = msg.key
267
- end
268
- if @add_headers
269
- msg.headers.each_pair { |k, v|
270
- record[k] = v
271
- }
272
- end
273
- es.add(record_time, record)
274
- rescue => e
275
- log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
276
- log.debug_backtrace
277
- end
278
- }
279
-
280
- unless es.empty?
281
- emit_events(tag, es)
349
+ if @tag_source == :record
350
+ process_batch_with_record_tag(batch)
351
+ else
352
+ process_batch(batch)
282
353
  end
283
354
  }
284
355
  rescue ForShutdown
@@ -0,0 +1,305 @@
1
+ require 'fluent/plugin/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+
7
+ class Fluent::Plugin::RdKafkaGroupInput < Fluent::Plugin::Input
8
+ Fluent::Plugin.register_input('rdkafka_group', self)
9
+
10
+ helpers :thread, :parser, :compat_parameters
11
+
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+
15
+ config_param :format, :string, :default => 'json',
16
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
17
+ config_param :message_key, :string, :default => 'message',
18
+ :desc => "For 'text' format only."
19
+ config_param :add_headers, :bool, :default => false,
20
+ :desc => "Add kafka's message headers to event record"
21
+ config_param :add_prefix, :string, :default => nil,
22
+ :desc => "Tag prefix (Optional)"
23
+ config_param :add_suffix, :string, :default => nil,
24
+ :desc => "Tag suffix (Optional)"
25
+ config_param :use_record_time, :bool, :default => false,
26
+ :desc => "Replace message timestamp with contents of 'time' field.",
27
+ :deprecated => "Use 'time_source record' instead."
28
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
29
+ :desc => "Source for message timestamp."
30
+ config_param :record_time_key, :string, :default => 'time',
31
+ :desc => "Time field when time_source is 'record'"
32
+ config_param :time_format, :string, :default => nil,
33
+ :desc => "Time format to be used to parse 'time' field."
34
+ config_param :kafka_message_key, :string, :default => nil,
35
+ :desc => "Set kafka's message key to this field"
36
+
37
+ config_param :retry_emit_limit, :integer, :default => nil,
38
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
39
+ config_param :retry_wait_seconds, :integer, :default => 30
40
+ config_param :disable_retry_limit, :bool, :default => false,
41
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
42
+ config_param :retry_limit, :integer, :default => 10,
43
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
44
+
45
+ config_param :max_wait_time_ms, :integer, :default => 250,
46
+ :desc => "How long to block polls in milliseconds until the server sends us data."
47
+ config_param :max_batch_size, :integer, :default => 10000,
48
+ :desc => "Maximum number of log lines emitted in a single batch."
49
+
50
+ config_param :kafka_configs, :hash, :default => {},
51
+ :desc => "Kafka configuration properties as desribed in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
52
+
53
+ config_section :parse do
54
+ config_set_default :@type, 'json'
55
+ end
56
+
57
+ include Fluent::KafkaPluginUtil::SSLSettings
58
+ include Fluent::KafkaPluginUtil::SaslSettings
59
+
60
+ class ForShutdown < StandardError
61
+ end
62
+
63
+ BufferError = Fluent::Plugin::Buffer::BufferOverflowError
64
+
65
+ def initialize
66
+ super
67
+
68
+ @time_parser = nil
69
+ @retry_count = 1
70
+ end
71
+
72
+ def _config_to_array(config)
73
+ config_array = config.split(',').map {|k| k.strip }
74
+ if config_array.empty?
75
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
76
+ end
77
+ config_array
78
+ end
79
+
80
+ def multi_workers_ready?
81
+ true
82
+ end
83
+
84
+ private :_config_to_array
85
+
86
+ def configure(conf)
87
+ compat_parameters_convert(conf, :parser)
88
+
89
+ super
90
+
91
+ log.warn "The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!"
92
+
93
+ log.info "Will watch for topics #{@topics} at brokers " \
94
+ "#{@kafka_configs["bootstrap.servers"]} and '#{@kafka_configs["group.id"]}' group"
95
+
96
+ @topics = _config_to_array(@topics)
97
+
98
+ parser_conf = conf.elements('parse').first
99
+ unless parser_conf
100
+ raise Fluent::ConfigError, "<parse> section or format parameter is required."
101
+ end
102
+ unless parser_conf["@type"]
103
+ raise Fluent::ConfigError, "parse/@type is required."
104
+ end
105
+ @parser_proc = setup_parser(parser_conf)
106
+
107
+ @time_source = :record if @use_record_time
108
+
109
+ if @time_source == :record and @time_format
110
+ @time_parser = Fluent::TimeParser.new(@time_format)
111
+ end
112
+ end
113
+
114
+ def setup_parser(parser_conf)
115
+ format = parser_conf["@type"]
116
+ case format
117
+ when 'json'
118
+ begin
119
+ require 'oj'
120
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
121
+ Proc.new { |msg| Oj.load(msg.payload) }
122
+ rescue LoadError
123
+ require 'yajl'
124
+ Proc.new { |msg| Yajl::Parser.parse(msg.payload) }
125
+ end
126
+ when 'ltsv'
127
+ require 'ltsv'
128
+ Proc.new { |msg| LTSV.parse(msg.payload, {:symbolize_keys => false}).first }
129
+ when 'msgpack'
130
+ require 'msgpack'
131
+ Proc.new { |msg| MessagePack.unpack(msg.payload) }
132
+ when 'text'
133
+ Proc.new { |msg| {@message_key => msg.payload} }
134
+ else
135
+ @custom_parser = parser_create(usage: 'in-rdkafka-plugin', conf: parser_conf)
136
+ Proc.new { |msg|
137
+ @custom_parser.parse(msg.payload) {|_time, record|
138
+ record
139
+ }
140
+ }
141
+ end
142
+ end
143
+
144
+ def start
145
+ super
146
+
147
+ @consumer = setup_consumer
148
+
149
+ thread_create(:in_rdkafka_group, &method(:run))
150
+ end
151
+
152
+ def shutdown
153
+ # This nil assignment should be guarded by mutex in multithread programming manner.
154
+ # But the situation is very low contention, so we don't use mutex for now.
155
+ # If the problem happens, we will add a guard for consumer.
156
+ consumer = @consumer
157
+ @consumer = nil
158
+ consumer.close
159
+
160
+ super
161
+ end
162
+
163
+ def setup_consumer
164
+ consumer = Rdkafka::Config.new(@kafka_configs).consumer
165
+ consumer.subscribe(*@topics)
166
+ consumer
167
+ end
168
+
169
+ def reconnect_consumer
170
+ log.warn "Stopping Consumer"
171
+ consumer = @consumer
172
+ @consumer = nil
173
+ if consumer
174
+ consumer.close
175
+ end
176
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
177
+ @retry_count = @retry_count + 1
178
+ sleep @retry_wait_seconds
179
+ @consumer = setup_consumer
180
+ log.warn "Re-starting consumer #{Time.now.to_s}"
181
+ @retry_count = 0
182
+ rescue =>e
183
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
184
+ log.error_backtrace
185
+ if @retry_count <= @retry_limit or disable_retry_limit
186
+ reconnect_consumer
187
+ end
188
+ end
189
+
190
+ class Batch
191
+ attr_reader :topic
192
+ attr_reader :messages
193
+
194
+ def initialize(topic)
195
+ @topic = topic
196
+ @messages = []
197
+ end
198
+ end
199
+
200
+ # Executes the passed codeblock on a batch of messages.
201
+ # It is guaranteed that every message in a given batch belongs to the same topic, because the tagging logic in :run expects that property.
202
+ # The number of maximum messages in a batch is capped by the :max_batch_size configuration value. It ensures that consuming from a single
203
+ # topic for a long time (e.g. with `auto.offset.reset` set to `earliest`) does not lead to memory exhaustion. Also, calling consumer.poll
204
+ # advances thes consumer offset, so in case the process crashes we might lose at most :max_batch_size messages.
205
+ def each_batch(&block)
206
+ batch = nil
207
+ message = nil
208
+ while @consumer
209
+ message = @consumer.poll(@max_wait_time_ms)
210
+ if message
211
+ if not batch
212
+ batch = Batch.new(message.topic)
213
+ elsif batch.topic != message.topic || batch.messages.size >= @max_batch_size
214
+ yield batch
215
+ batch = Batch.new(message.topic)
216
+ end
217
+ batch.messages << message
218
+ else
219
+ yield batch if batch
220
+ batch = nil
221
+ end
222
+ end
223
+ yield batch if batch
224
+ end
225
+
226
+ def run
227
+ while @consumer
228
+ begin
229
+ each_batch { |batch|
230
+ log.debug "A new batch for topic #{batch.topic} with #{batch.messages.size} messages"
231
+ es = Fluent::MultiEventStream.new
232
+ tag = batch.topic
233
+ tag = @add_prefix + "." + tag if @add_prefix
234
+ tag = tag + "." + @add_suffix if @add_suffix
235
+
236
+ batch.messages.each { |msg|
237
+ begin
238
+ record = @parser_proc.call(msg)
239
+ case @time_source
240
+ when :kafka
241
+ record_time = Fluent::EventTime.from_time(msg.timestamp)
242
+ when :now
243
+ record_time = Fluent::Engine.now
244
+ when :record
245
+ if @time_format
246
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
247
+ else
248
+ record_time = record[@record_time_key]
249
+ end
250
+ else
251
+ log.fatal "BUG: invalid time_source: #{@time_source}"
252
+ end
253
+ if @kafka_message_key
254
+ record[@kafka_message_key] = msg.key
255
+ end
256
+ if @add_headers
257
+ msg.headers.each_pair { |k, v|
258
+ record[k] = v
259
+ }
260
+ end
261
+ es.add(record_time, record)
262
+ rescue => e
263
+ log.warn "parser error in #{msg.topic}/#{msg.partition}", :error => e.to_s, :value => msg.payload, :offset => msg.offset
264
+ log.debug_backtrace
265
+ end
266
+ }
267
+
268
+ unless es.empty?
269
+ emit_events(tag, es)
270
+ end
271
+ }
272
+ rescue ForShutdown
273
+ rescue => e
274
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
275
+ log.error_backtrace
276
+ reconnect_consumer
277
+ end
278
+ end
279
+ rescue => e
280
+ log.error "unexpected error during consumer object access", :error => e.to_s
281
+ log.error_backtrace
282
+ end
283
+
284
+ def emit_events(tag, es)
285
+ retries = 0
286
+ begin
287
+ router.emit_stream(tag, es)
288
+ rescue BufferError
289
+ raise ForShutdown if @consumer.nil?
290
+
291
+ if @retry_emit_limit.nil?
292
+ sleep 1
293
+ retry
294
+ end
295
+
296
+ if retries < @retry_emit_limit
297
+ retries += 1
298
+ sleep 1
299
+ retry
300
+ else
301
+ raise RuntimeError, "Exceeds retry_emit_limit"
302
+ end
303
+ end
304
+ end
305
+ end
@@ -128,7 +128,7 @@ DESC
128
128
  @seed_brokers = @brokers
129
129
  log.info "brokers has been set: #{@seed_brokers}"
130
130
  else
131
- raise Fluent::Config, 'No brokers specified. Need one broker at least.'
131
+ raise Fluent::ConfigError, 'No brokers specified. Need one broker at least.'
132
132
  end
133
133
 
134
134
  formatter_conf = conf.elements('format').first
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.2
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-08-26 00:00:00.000000000 Z
12
+ date: 2021-01-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -93,6 +93,20 @@ dependencies:
93
93
  - - ">="
94
94
  - !ruby/object:Gem::Version
95
95
  version: 3.0.8
96
+ - !ruby/object:Gem::Dependency
97
+ name: webrick
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
96
110
  description: Fluentd plugin for Apache Kafka > 0.8
97
111
  email:
98
112
  - togachiro@gmail.com
@@ -101,6 +115,7 @@ executables: []
101
115
  extensions: []
102
116
  extra_rdoc_files: []
103
117
  files:
118
+ - ".github/workflows/linux.yml"
104
119
  - ".gitignore"
105
120
  - ".travis.yml"
106
121
  - ChangeLog
@@ -111,6 +126,7 @@ files:
111
126
  - fluent-plugin-kafka.gemspec
112
127
  - lib/fluent/plugin/in_kafka.rb
113
128
  - lib/fluent/plugin/in_kafka_group.rb
129
+ - lib/fluent/plugin/in_rdkafka_group.rb
114
130
  - lib/fluent/plugin/kafka_plugin_util.rb
115
131
  - lib/fluent/plugin/kafka_producer_ext.rb
116
132
  - lib/fluent/plugin/out_kafka.rb