fluent-plugin-kafka 0.17.5 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb7f693666ff7fd4bdb43b249d3e60e882ebf99e80402de2a5310b61668ef9e7
4
- data.tar.gz: d4b141409cf83402ad1e28b9579317c672b20c9760b8f58b8d54800e8c46313b
3
+ metadata.gz: 0a8590f6d34bbdb2faa991bba6e32a1424623c7ddd9609dc1d2ffcbeabaa20e1
4
+ data.tar.gz: a653d9dba00fa82f18071304be9093cd6d620bf4bda29d5e00a75113d05e8e3e
5
5
  SHA512:
6
- metadata.gz: 8f86c7c82fbf5db63ef0c92737ba4935a3a60534d434dfb9552c2ad624ec1e28fea9876c689a29ca6b16c16abc3818fdfb24bd7540495f980daa8a5509b29ca0
7
- data.tar.gz: 8b1dfba11d40e12f9b0b1fbf4ff947678fb5ef2ab3fbd3969e363935c8e895c29f497273ddb86c80c4e1ebc9d254045275dc7e08b07788d4573799c97ce7a393
6
+ metadata.gz: 69a784e7d69e2229a036110997b2691143008d4ca786e18415e9a806a6d577b38465a4a3931166418a5b7e022dddbc62d014c0c784277ccaaecc9b6570822564
7
+ data.tar.gz: 9f14d8d10a45bf390c801c93d79b893f4248ded7e37dc87f8c4453b04158754e1006aa7603874986e8c93812fe899197219abccb3bdb0ae4daacef6d516a0af3
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.18.0 - 2022/07/21
2
+ * out_kafka2: Keep alive Kafka connections between flushes
3
+ * out_rdkafka2: Enable to set SASL credentials via `username` and `password` parameters
4
+ * out_kafka2/out_rdkafka2: Add `record_key` parameter
5
+
1
6
  Release 0.17.5 - 2022/03/18
2
7
  * out_kafka2: Add `resolve_seed_brokers` parameter
3
8
 
data/README.md CHANGED
@@ -193,6 +193,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
193
193
  message_key_key (string) :default => 'message_key'
194
194
  default_topic (string) :default => nil
195
195
  default_partition_key (string) :default => nil
196
+ record_key (string) :default => nil
196
197
  default_message_key (string) :default => nil
197
198
  exclude_topic_key (bool) :default => false
198
199
  exclude_partition_key (bool) :default => false
@@ -205,6 +206,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
205
206
  use_default_for_unknown_topic (bool) :default => false
206
207
  discard_kafka_delivery_failed (bool) :default => false (No discard)
207
208
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
209
+ share_producer (bool) :default => false
208
210
 
209
211
  <format>
210
212
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -335,6 +337,40 @@ For example, `$.source.ip` can be extracted with config `headers_from_record` an
335
337
 
336
338
  > Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
337
339
 
340
+ #### Send only a sub field as a message payload
341
+
342
+ If `record_key` is provided, the plugin sends only a sub field given by that key.
343
+ The configuration format is jsonpath.
344
+
345
+ e.g. When the following configuration and the incoming record are given:
346
+
347
+ configuration:
348
+
349
+ <match **>
350
+ @type kafka2
351
+ [...]
352
+ record_key '$.data'
353
+ </match>
354
+
355
+ record:
356
+
357
+ {
358
+ "specversion" : "1.0",
359
+ "type" : "com.example.someevent",
360
+ "id" : "C234-1234-1234",
361
+ "time" : "2018-04-05T17:31:00Z",
362
+ "datacontenttype" : "application/json",
363
+ "data" : {
364
+ "appinfoA" : "abc",
365
+ "appinfoB" : 123,
366
+ "appinfoC" : true
367
+ },
368
+ ...
369
+ }
370
+
371
+ only the `data` field will be serialized by the formatter and sent to Kafka.
372
+ The toplevel `data` key will be removed.
373
+
338
374
  ### Buffered output plugin
339
375
 
340
376
  This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
@@ -460,6 +496,7 @@ You need to install rdkafka gem.
460
496
  # same with kafka2
461
497
  headers (hash) :default => {}
462
498
  headers_from_record (hash) :default => {}
499
+ record_key (string) :default => nil
463
500
 
464
501
  <format>
465
502
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.17.5'
16
+ gem.version = '0.18.0'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -27,6 +27,11 @@ DESC
27
27
  config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
28
28
  :desc => "Specify kafka patrtitioner hash algorithm"
29
29
  config_param :default_partition, :integer, :default => nil
30
+ config_param :record_key, :string, :default => nil,
31
+ :desc => <<-DESC
32
+ A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
33
+ If defined, only this field in the record will be sent to Kafka as the message payload.
34
+ DESC
30
35
  config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
31
36
  config_param :client_id, :string, :default => 'fluentd'
32
37
  config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
@@ -81,6 +86,7 @@ DESC
81
86
  Add a regular expression to capture ActiveSupport notifications from the Kafka client
82
87
  requires activesupport gem - records will be generated under fluent_kafka_stats.**
83
88
  DESC
89
+ config_param :share_producer, :bool, :default => false, :desc => 'share kafka producer between flush threads'
84
90
 
85
91
  config_section :buffer do
86
92
  config_set_default :chunk_keys, ["topic"]
@@ -96,6 +102,12 @@ DESC
96
102
  super
97
103
 
98
104
  @kafka = nil
105
+ @producers = nil
106
+ @producers_mutex = nil
107
+ @shared_producer = nil
108
+
109
+ @writing_threads_mutex = Mutex.new
110
+ @writing_threads = Set.new
99
111
  end
100
112
 
101
113
  def refresh_client(raise_error = true)
@@ -185,15 +197,29 @@ DESC
185
197
  @exclude_field_accessors = @exclude_fields.map do |field|
186
198
  record_accessor_create(field)
187
199
  end
200
+
201
+ @record_field_accessor = nil
202
+ @record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
188
203
  end
189
204
 
190
205
  def multi_workers_ready?
191
206
  true
192
207
  end
193
208
 
209
+ def create_producer
210
+ @kafka.producer(**@producer_opts)
211
+ end
212
+
194
213
  def start
195
214
  super
196
215
  refresh_client
216
+
217
+ if @share_producer
218
+ @shared_producer = create_producer
219
+ else
220
+ @producers = {}
221
+ @producers_mutex = Mutex.new
222
+ end
197
223
  end
198
224
 
199
225
  def close
@@ -206,6 +232,56 @@ DESC
206
232
  @kafka = nil
207
233
  end
208
234
 
235
+ def wait_writing_threads
236
+ done = false
237
+ until done do
238
+ @writing_threads_mutex.synchronize do
239
+ done = true if @writing_threads.empty?
240
+ end
241
+ sleep(1) unless done
242
+ end
243
+ end
244
+
245
+ def shutdown
246
+ super
247
+ wait_writing_threads
248
+ shutdown_producers
249
+ end
250
+
251
+ def shutdown_producers
252
+ if @share_producer
253
+ @shared_producer.shutdown
254
+ @shared_producer = nil
255
+ else
256
+ @producers_mutex.synchronize {
257
+ shutdown_threads = @producers.map { |key, producer|
258
+ th = Thread.new {
259
+ producer.shutdown
260
+ }
261
+ th.abort_on_exception = true
262
+ th
263
+ }
264
+ shutdown_threads.each { |th| th.join }
265
+ @producers = {}
266
+ }
267
+ end
268
+ end
269
+
270
+ def get_producer
271
+ if @share_producer
272
+ @shared_producer
273
+ else
274
+ @producers_mutex.synchronize {
275
+ producer = @producers[Thread.current.object_id]
276
+ unless producer
277
+ producer = create_producer
278
+ @producers[Thread.current.object_id] = producer
279
+ end
280
+ producer
281
+ }
282
+ end
283
+ end
284
+
209
285
  def setup_formatter(conf)
210
286
  type = conf['@type']
211
287
  case type
@@ -229,6 +305,8 @@ DESC
229
305
 
230
306
  # TODO: optimize write performance
231
307
  def write(chunk)
308
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
309
+
232
310
  tag = chunk.metadata.tag
233
311
  topic = if @topic
234
312
  extract_placeholders(@topic, chunk)
@@ -237,13 +315,12 @@ DESC
237
315
  end
238
316
 
239
317
  messages = 0
240
- record_buf = nil
241
318
 
242
319
  base_headers = @headers
243
320
  mutate_headers = !@headers_from_record_accessors.empty?
244
321
 
245
322
  begin
246
- producer = @kafka.topic_producer(topic, **@producer_opts)
323
+ producer = get_producer
247
324
  chunk.msgpack_each { |time, record|
248
325
  begin
249
326
  record = inject_values_to_record(tag, time, record)
@@ -267,6 +344,7 @@ DESC
267
344
  end
268
345
  end
269
346
 
347
+ record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
270
348
  record_buf = @formatter_proc.call(tag, time, record)
271
349
  record_buf_bytes = record_buf.bytesize
272
350
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
@@ -283,7 +361,7 @@ DESC
283
361
  messages += 1
284
362
 
285
363
  producer.produce(record_buf, key: message_key, partition_key: partition_key, partition: partition, headers: headers,
286
- create_time: @use_event_time ? Time.at(time) : Time.now)
364
+ create_time: @use_event_time ? Time.at(time) : Time.now, topic: topic)
287
365
  }
288
366
 
289
367
  if messages > 0
@@ -301,7 +379,6 @@ DESC
301
379
  end
302
380
  rescue Kafka::UnknownTopicOrPartition
303
381
  if @use_default_for_unknown_topic && topic != @default_topic
304
- producer.shutdown if producer
305
382
  log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
306
383
  topic = @default_topic
307
384
  retry
@@ -321,7 +398,7 @@ DESC
321
398
  # Raise exception to retry sendind messages
322
399
  raise e unless ignore
323
400
  ensure
324
- producer.shutdown if producer
401
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
325
402
  end
326
403
  end
327
404
  end
@@ -73,6 +73,11 @@ DESC
73
73
  :desc => <<-DESC
74
74
  The codec the producer uses to compress messages. Used for compression.codec
75
75
  Supported codecs: (gzip|snappy)
76
+ DESC
77
+ config_param :record_key, :string, :default => nil,
78
+ :desc => <<-DESC
79
+ A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
80
+ If defined, only this field in the record will be sent to Kafka as the message payload.
76
81
  DESC
77
82
  config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
78
83
  config_param :max_send_limit_bytes, :size, :default => nil
@@ -230,6 +235,9 @@ DESC
230
235
  end
231
236
 
232
237
  @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
238
+
239
+ @record_field_accessor = nil
240
+ @record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
233
241
  end
234
242
 
235
243
  def build_config
@@ -270,6 +278,8 @@ DESC
270
278
  config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
271
279
  config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
272
280
  config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
281
+ config[:"sasl.username"] = @username if @username
282
+ config[:"sasl.password"] = @password if @password
273
283
 
274
284
  @rdkafka_options.each { |k, v|
275
285
  config[k.to_sym] = v
@@ -371,8 +381,6 @@ DESC
371
381
  end
372
382
 
373
383
  handlers = []
374
- record_buf = nil
375
- record_buf_bytes = nil
376
384
 
377
385
  headers = @headers.clone
378
386
 
@@ -395,6 +403,7 @@ DESC
395
403
  end
396
404
  end
397
405
 
406
+ record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
398
407
  record_buf = @formatter_proc.call(tag, time, record)
399
408
  record_buf_bytes = record_buf.bytesize
400
409
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
@@ -105,6 +105,21 @@ class Kafka2OutputTest < Test::Unit::TestCase
105
105
  assert_equal([expected_message], actual_messages)
106
106
  end
107
107
 
108
+ def test_record_key
109
+ conf = config(default_topic: TOPIC_NAME) +
110
+ config_element('ROOT', '', {"record_key" => "$.data"}, [])
111
+ target_driver = create_target_driver
112
+ target_driver.run(expect_records: 1, timeout: 5) do
113
+ sleep 2
114
+ d = create_driver(conf)
115
+ d.run do
116
+ d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
117
+ end
118
+ end
119
+ actual_messages = target_driver.events.collect { |event| event[2] }
120
+ assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
121
+ end
122
+
108
123
  def test_exclude_fields
109
124
  conf = config(default_topic: TOPIC_NAME) +
110
125
  config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
@@ -163,5 +163,20 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
163
163
  actual_messages = target_driver.events.collect { |event| event[2] }
164
164
  assert_equal(expected_messages, actual_messages)
165
165
  end
166
+
167
+ def test_record_key
168
+ conf = config(default_topic: TOPIC_NAME) +
169
+ config_element('ROOT', '', {"record_key" => "$.data"}, [])
170
+ target_driver = create_target_driver
171
+ target_driver.run(expect_records: 1, timeout: 5) do
172
+ sleep 2
173
+ d = create_driver(conf)
174
+ d.run do
175
+ d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
176
+ end
177
+ end
178
+ actual_messages = target_driver.events.collect { |event| event[2] }
179
+ assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
180
+ end
166
181
  end
167
182
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.5
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-03-18 00:00:00.000000000 Z
12
+ date: 2022-07-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0'
195
195
  requirements: []
196
- rubygems_version: 3.2.5
196
+ rubygems_version: 3.3.5
197
197
  signing_key:
198
198
  specification_version: 4
199
199
  summary: Fluentd plugin for Apache Kafka > 0.8