fluent-plugin-kafka 0.17.5 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb7f693666ff7fd4bdb43b249d3e60e882ebf99e80402de2a5310b61668ef9e7
4
- data.tar.gz: d4b141409cf83402ad1e28b9579317c672b20c9760b8f58b8d54800e8c46313b
3
+ metadata.gz: 0a8590f6d34bbdb2faa991bba6e32a1424623c7ddd9609dc1d2ffcbeabaa20e1
4
+ data.tar.gz: a653d9dba00fa82f18071304be9093cd6d620bf4bda29d5e00a75113d05e8e3e
5
5
  SHA512:
6
- metadata.gz: 8f86c7c82fbf5db63ef0c92737ba4935a3a60534d434dfb9552c2ad624ec1e28fea9876c689a29ca6b16c16abc3818fdfb24bd7540495f980daa8a5509b29ca0
7
- data.tar.gz: 8b1dfba11d40e12f9b0b1fbf4ff947678fb5ef2ab3fbd3969e363935c8e895c29f497273ddb86c80c4e1ebc9d254045275dc7e08b07788d4573799c97ce7a393
6
+ metadata.gz: 69a784e7d69e2229a036110997b2691143008d4ca786e18415e9a806a6d577b38465a4a3931166418a5b7e022dddbc62d014c0c784277ccaaecc9b6570822564
7
+ data.tar.gz: 9f14d8d10a45bf390c801c93d79b893f4248ded7e37dc87f8c4453b04158754e1006aa7603874986e8c93812fe899197219abccb3bdb0ae4daacef6d516a0af3
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.18.0 - 2022/07/21
2
+ * out_kafka2: Keep alive Kafka connections between flushes
3
+ * out_rdkafka2: Enable to set SASL credentials via `username` and `password` parameters
4
+ * out_kafka2/out_rdkafka2: Add `record_key` parameter
5
+
1
6
  Release 0.17.5 - 2022/03/18
2
7
  * out_kafka2: Add `resolve_seed_brokers` parameter
3
8
 
data/README.md CHANGED
@@ -193,6 +193,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
193
193
  message_key_key (string) :default => 'message_key'
194
194
  default_topic (string) :default => nil
195
195
  default_partition_key (string) :default => nil
196
+ record_key (string) :default => nil
196
197
  default_message_key (string) :default => nil
197
198
  exclude_topic_key (bool) :default => false
198
199
  exclude_partition_key (bool) :default => false
@@ -205,6 +206,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
205
206
  use_default_for_unknown_topic (bool) :default => false
206
207
  discard_kafka_delivery_failed (bool) :default => false (No discard)
207
208
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
209
+ share_producer (bool) :default => false
208
210
 
209
211
  <format>
210
212
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -335,6 +337,40 @@ For example, `$.source.ip` can be extracted with config `headers_from_record` an
335
337
 
336
338
  > Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
337
339
 
340
+ #### Send only a sub field as a message payload
341
+
342
+ If `record_key` is provided, the plugin sends only a sub field given by that key.
343
+ The configuration format is jsonpath.
344
+
345
+ e.g. When the following configuration and the incoming record are given:
346
+
347
+ configuration:
348
+
349
+ <match **>
350
+ @type kafka2
351
+ [...]
352
+ record_key '$.data'
353
+ </match>
354
+
355
+ record:
356
+
357
+ {
358
+ "specversion" : "1.0",
359
+ "type" : "com.example.someevent",
360
+ "id" : "C234-1234-1234",
361
+ "time" : "2018-04-05T17:31:00Z",
362
+ "datacontenttype" : "application/json",
363
+ "data" : {
364
+ "appinfoA" : "abc",
365
+ "appinfoB" : 123,
366
+ "appinfoC" : true
367
+ },
368
+ ...
369
+ }
370
+
371
+ only the `data` field will be serialized by the formatter and sent to Kafka.
372
+ The toplevel `data` key will be removed.
373
+
338
374
  ### Buffered output plugin
339
375
 
340
376
  This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
@@ -460,6 +496,7 @@ You need to install rdkafka gem.
460
496
  # same with kafka2
461
497
  headers (hash) :default => {}
462
498
  headers_from_record (hash) :default => {}
499
+ record_key (string) :default => nil
463
500
 
464
501
  <format>
465
502
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.17.5'
16
+ gem.version = '0.18.0'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -27,6 +27,11 @@ DESC
27
27
  config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
28
28
  :desc => "Specify kafka patrtitioner hash algorithm"
29
29
  config_param :default_partition, :integer, :default => nil
30
+ config_param :record_key, :string, :default => nil,
31
+ :desc => <<-DESC
32
+ A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
33
+ If defined, only this field in the record will be sent to Kafka as the message payload.
34
+ DESC
30
35
  config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
31
36
  config_param :client_id, :string, :default => 'fluentd'
32
37
  config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
@@ -81,6 +86,7 @@ DESC
81
86
  Add a regular expression to capture ActiveSupport notifications from the Kafka client
82
87
  requires activesupport gem - records will be generated under fluent_kafka_stats.**
83
88
  DESC
89
+ config_param :share_producer, :bool, :default => false, :desc => 'share kafka producer between flush threads'
84
90
 
85
91
  config_section :buffer do
86
92
  config_set_default :chunk_keys, ["topic"]
@@ -96,6 +102,12 @@ DESC
96
102
  super
97
103
 
98
104
  @kafka = nil
105
+ @producers = nil
106
+ @producers_mutex = nil
107
+ @shared_producer = nil
108
+
109
+ @writing_threads_mutex = Mutex.new
110
+ @writing_threads = Set.new
99
111
  end
100
112
 
101
113
  def refresh_client(raise_error = true)
@@ -185,15 +197,29 @@ DESC
185
197
  @exclude_field_accessors = @exclude_fields.map do |field|
186
198
  record_accessor_create(field)
187
199
  end
200
+
201
+ @record_field_accessor = nil
202
+ @record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
188
203
  end
189
204
 
190
205
  def multi_workers_ready?
191
206
  true
192
207
  end
193
208
 
209
+ def create_producer
210
+ @kafka.producer(**@producer_opts)
211
+ end
212
+
194
213
  def start
195
214
  super
196
215
  refresh_client
216
+
217
+ if @share_producer
218
+ @shared_producer = create_producer
219
+ else
220
+ @producers = {}
221
+ @producers_mutex = Mutex.new
222
+ end
197
223
  end
198
224
 
199
225
  def close
@@ -206,6 +232,56 @@ DESC
206
232
  @kafka = nil
207
233
  end
208
234
 
235
+ def wait_writing_threads
236
+ done = false
237
+ until done do
238
+ @writing_threads_mutex.synchronize do
239
+ done = true if @writing_threads.empty?
240
+ end
241
+ sleep(1) unless done
242
+ end
243
+ end
244
+
245
+ def shutdown
246
+ super
247
+ wait_writing_threads
248
+ shutdown_producers
249
+ end
250
+
251
+ def shutdown_producers
252
+ if @share_producer
253
+ @shared_producer.shutdown
254
+ @shared_producer = nil
255
+ else
256
+ @producers_mutex.synchronize {
257
+ shutdown_threads = @producers.map { |key, producer|
258
+ th = Thread.new {
259
+ producer.shutdown
260
+ }
261
+ th.abort_on_exception = true
262
+ th
263
+ }
264
+ shutdown_threads.each { |th| th.join }
265
+ @producers = {}
266
+ }
267
+ end
268
+ end
269
+
270
+ def get_producer
271
+ if @share_producer
272
+ @shared_producer
273
+ else
274
+ @producers_mutex.synchronize {
275
+ producer = @producers[Thread.current.object_id]
276
+ unless producer
277
+ producer = create_producer
278
+ @producers[Thread.current.object_id] = producer
279
+ end
280
+ producer
281
+ }
282
+ end
283
+ end
284
+
209
285
  def setup_formatter(conf)
210
286
  type = conf['@type']
211
287
  case type
@@ -229,6 +305,8 @@ DESC
229
305
 
230
306
  # TODO: optimize write performance
231
307
  def write(chunk)
308
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
309
+
232
310
  tag = chunk.metadata.tag
233
311
  topic = if @topic
234
312
  extract_placeholders(@topic, chunk)
@@ -237,13 +315,12 @@ DESC
237
315
  end
238
316
 
239
317
  messages = 0
240
- record_buf = nil
241
318
 
242
319
  base_headers = @headers
243
320
  mutate_headers = !@headers_from_record_accessors.empty?
244
321
 
245
322
  begin
246
- producer = @kafka.topic_producer(topic, **@producer_opts)
323
+ producer = get_producer
247
324
  chunk.msgpack_each { |time, record|
248
325
  begin
249
326
  record = inject_values_to_record(tag, time, record)
@@ -267,6 +344,7 @@ DESC
267
344
  end
268
345
  end
269
346
 
347
+ record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
270
348
  record_buf = @formatter_proc.call(tag, time, record)
271
349
  record_buf_bytes = record_buf.bytesize
272
350
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
@@ -283,7 +361,7 @@ DESC
283
361
  messages += 1
284
362
 
285
363
  producer.produce(record_buf, key: message_key, partition_key: partition_key, partition: partition, headers: headers,
286
- create_time: @use_event_time ? Time.at(time) : Time.now)
364
+ create_time: @use_event_time ? Time.at(time) : Time.now, topic: topic)
287
365
  }
288
366
 
289
367
  if messages > 0
@@ -301,7 +379,6 @@ DESC
301
379
  end
302
380
  rescue Kafka::UnknownTopicOrPartition
303
381
  if @use_default_for_unknown_topic && topic != @default_topic
304
- producer.shutdown if producer
305
382
  log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
306
383
  topic = @default_topic
307
384
  retry
@@ -321,7 +398,7 @@ DESC
321
398
  # Raise exception to retry sendind messages
322
399
  raise e unless ignore
323
400
  ensure
324
- producer.shutdown if producer
401
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
325
402
  end
326
403
  end
327
404
  end
@@ -73,6 +73,11 @@ DESC
73
73
  :desc => <<-DESC
74
74
  The codec the producer uses to compress messages. Used for compression.codec
75
75
  Supported codecs: (gzip|snappy)
76
+ DESC
77
+ config_param :record_key, :string, :default => nil,
78
+ :desc => <<-DESC
79
+ A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
80
+ If defined, only this field in the record will be sent to Kafka as the message payload.
76
81
  DESC
77
82
  config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
78
83
  config_param :max_send_limit_bytes, :size, :default => nil
@@ -230,6 +235,9 @@ DESC
230
235
  end
231
236
 
232
237
  @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
238
+
239
+ @record_field_accessor = nil
240
+ @record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
233
241
  end
234
242
 
235
243
  def build_config
@@ -270,6 +278,8 @@ DESC
270
278
  config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
271
279
  config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
272
280
  config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
281
+ config[:"sasl.username"] = @username if @username
282
+ config[:"sasl.password"] = @password if @password
273
283
 
274
284
  @rdkafka_options.each { |k, v|
275
285
  config[k.to_sym] = v
@@ -371,8 +381,6 @@ DESC
371
381
  end
372
382
 
373
383
  handlers = []
374
- record_buf = nil
375
- record_buf_bytes = nil
376
384
 
377
385
  headers = @headers.clone
378
386
 
@@ -395,6 +403,7 @@ DESC
395
403
  end
396
404
  end
397
405
 
406
+ record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
398
407
  record_buf = @formatter_proc.call(tag, time, record)
399
408
  record_buf_bytes = record_buf.bytesize
400
409
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
@@ -105,6 +105,21 @@ class Kafka2OutputTest < Test::Unit::TestCase
105
105
  assert_equal([expected_message], actual_messages)
106
106
  end
107
107
 
108
+ def test_record_key
109
+ conf = config(default_topic: TOPIC_NAME) +
110
+ config_element('ROOT', '', {"record_key" => "$.data"}, [])
111
+ target_driver = create_target_driver
112
+ target_driver.run(expect_records: 1, timeout: 5) do
113
+ sleep 2
114
+ d = create_driver(conf)
115
+ d.run do
116
+ d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
117
+ end
118
+ end
119
+ actual_messages = target_driver.events.collect { |event| event[2] }
120
+ assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
121
+ end
122
+
108
123
  def test_exclude_fields
109
124
  conf = config(default_topic: TOPIC_NAME) +
110
125
  config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
@@ -163,5 +163,20 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
163
163
  actual_messages = target_driver.events.collect { |event| event[2] }
164
164
  assert_equal(expected_messages, actual_messages)
165
165
  end
166
+
167
+ def test_record_key
168
+ conf = config(default_topic: TOPIC_NAME) +
169
+ config_element('ROOT', '', {"record_key" => "$.data"}, [])
170
+ target_driver = create_target_driver
171
+ target_driver.run(expect_records: 1, timeout: 5) do
172
+ sleep 2
173
+ d = create_driver(conf)
174
+ d.run do
175
+ d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
176
+ end
177
+ end
178
+ actual_messages = target_driver.events.collect { |event| event[2] }
179
+ assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
180
+ end
166
181
  end
167
182
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.5
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-03-18 00:00:00.000000000 Z
12
+ date: 2022-07-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0'
195
195
  requirements: []
196
- rubygems_version: 3.2.5
196
+ rubygems_version: 3.3.5
197
197
  signing_key:
198
198
  specification_version: 4
199
199
  summary: Fluentd plugin for Apache Kafka > 0.8