fluent-plugin-kafka 0.17.5 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +5 -0
- data/README.md +37 -0
- data/fluent-plugin-kafka.gemspec +1 -1
- data/lib/fluent/plugin/out_kafka2.rb +82 -5
- data/lib/fluent/plugin/out_rdkafka2.rb +11 -2
- data/test/plugin/test_out_kafka2.rb +15 -0
- data/test/plugin/test_out_rdkafka2.rb +15 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a8590f6d34bbdb2faa991bba6e32a1424623c7ddd9609dc1d2ffcbeabaa20e1
|
4
|
+
data.tar.gz: a653d9dba00fa82f18071304be9093cd6d620bf4bda29d5e00a75113d05e8e3e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69a784e7d69e2229a036110997b2691143008d4ca786e18415e9a806a6d577b38465a4a3931166418a5b7e022dddbc62d014c0c784277ccaaecc9b6570822564
|
7
|
+
data.tar.gz: 9f14d8d10a45bf390c801c93d79b893f4248ded7e37dc87f8c4453b04158754e1006aa7603874986e8c93812fe899197219abccb3bdb0ae4daacef6d516a0af3
|
data/ChangeLog
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
Release 0.18.0 - 2022/07/21
|
2
|
+
* out_kafka2: Keep alive Kafka connections between flushes
|
3
|
+
* out_rdkafka2: Enable to set SASL credentials via `username` and `password` parameters
|
4
|
+
* out_kafka2/out_rdkafka2: Add `record_key` parameter
|
5
|
+
|
1
6
|
Release 0.17.5 - 2022/03/18
|
2
7
|
* out_kafka2: Add `resolve_seed_brokers` parameter
|
3
8
|
|
data/README.md
CHANGED
@@ -193,6 +193,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
|
|
193
193
|
message_key_key (string) :default => 'message_key'
|
194
194
|
default_topic (string) :default => nil
|
195
195
|
default_partition_key (string) :default => nil
|
196
|
+
record_key (string) :default => nil
|
196
197
|
default_message_key (string) :default => nil
|
197
198
|
exclude_topic_key (bool) :default => false
|
198
199
|
exclude_partition_key (bool) :default => false
|
@@ -205,6 +206,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
|
|
205
206
|
use_default_for_unknown_topic (bool) :default => false
|
206
207
|
discard_kafka_delivery_failed (bool) :default => false (No discard)
|
207
208
|
partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
|
209
|
+
share_producer (bool) :default => false
|
208
210
|
|
209
211
|
<format>
|
210
212
|
@type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
|
@@ -335,6 +337,40 @@ For example, `$.source.ip` can be extracted with config `headers_from_record` an
|
|
335
337
|
|
336
338
|
> Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
|
337
339
|
|
340
|
+
#### Send only a sub field as a message payload
|
341
|
+
|
342
|
+
If `record_key` is provided, the plugin sends only a sub field given by that key.
|
343
|
+
The configuration format is jsonpath.
|
344
|
+
|
345
|
+
e.g. When the following configuration and the incoming record are given:
|
346
|
+
|
347
|
+
configuration:
|
348
|
+
|
349
|
+
<match **>
|
350
|
+
@type kafka2
|
351
|
+
[...]
|
352
|
+
record_key '$.data'
|
353
|
+
</match>
|
354
|
+
|
355
|
+
record:
|
356
|
+
|
357
|
+
{
|
358
|
+
"specversion" : "1.0",
|
359
|
+
"type" : "com.example.someevent",
|
360
|
+
"id" : "C234-1234-1234",
|
361
|
+
"time" : "2018-04-05T17:31:00Z",
|
362
|
+
"datacontenttype" : "application/json",
|
363
|
+
"data" : {
|
364
|
+
"appinfoA" : "abc",
|
365
|
+
"appinfoB" : 123,
|
366
|
+
"appinfoC" : true
|
367
|
+
},
|
368
|
+
...
|
369
|
+
}
|
370
|
+
|
371
|
+
only the `data` field will be serialized by the formatter and sent to Kafka.
|
372
|
+
The toplevel `data` key will be removed.
|
373
|
+
|
338
374
|
### Buffered output plugin
|
339
375
|
|
340
376
|
This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
|
@@ -460,6 +496,7 @@ You need to install rdkafka gem.
|
|
460
496
|
# same with kafka2
|
461
497
|
headers (hash) :default => {}
|
462
498
|
headers_from_record (hash) :default => {}
|
499
|
+
record_key (string) :default => nil
|
463
500
|
|
464
501
|
<format>
|
465
502
|
@type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
|
data/fluent-plugin-kafka.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
14
|
gem.name = "fluent-plugin-kafka"
|
15
15
|
gem.require_paths = ["lib"]
|
16
|
-
gem.version = '0.
|
16
|
+
gem.version = '0.18.0'
|
17
17
|
gem.required_ruby_version = ">= 2.1.0"
|
18
18
|
|
19
19
|
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
@@ -27,6 +27,11 @@ DESC
|
|
27
27
|
config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
|
28
28
|
:desc => "Specify kafka patrtitioner hash algorithm"
|
29
29
|
config_param :default_partition, :integer, :default => nil
|
30
|
+
config_param :record_key, :string, :default => nil,
|
31
|
+
:desc => <<-DESC
|
32
|
+
A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
|
33
|
+
If defined, only this field in the record will be sent to Kafka as the message payload.
|
34
|
+
DESC
|
30
35
|
config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
|
31
36
|
config_param :client_id, :string, :default => 'fluentd'
|
32
37
|
config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
|
@@ -81,6 +86,7 @@ DESC
|
|
81
86
|
Add a regular expression to capture ActiveSupport notifications from the Kafka client
|
82
87
|
requires activesupport gem - records will be generated under fluent_kafka_stats.**
|
83
88
|
DESC
|
89
|
+
config_param :share_producer, :bool, :default => false, :desc => 'share kafka producer between flush threads'
|
84
90
|
|
85
91
|
config_section :buffer do
|
86
92
|
config_set_default :chunk_keys, ["topic"]
|
@@ -96,6 +102,12 @@ DESC
|
|
96
102
|
super
|
97
103
|
|
98
104
|
@kafka = nil
|
105
|
+
@producers = nil
|
106
|
+
@producers_mutex = nil
|
107
|
+
@shared_producer = nil
|
108
|
+
|
109
|
+
@writing_threads_mutex = Mutex.new
|
110
|
+
@writing_threads = Set.new
|
99
111
|
end
|
100
112
|
|
101
113
|
def refresh_client(raise_error = true)
|
@@ -185,15 +197,29 @@ DESC
|
|
185
197
|
@exclude_field_accessors = @exclude_fields.map do |field|
|
186
198
|
record_accessor_create(field)
|
187
199
|
end
|
200
|
+
|
201
|
+
@record_field_accessor = nil
|
202
|
+
@record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
|
188
203
|
end
|
189
204
|
|
190
205
|
def multi_workers_ready?
|
191
206
|
true
|
192
207
|
end
|
193
208
|
|
209
|
+
def create_producer
|
210
|
+
@kafka.producer(**@producer_opts)
|
211
|
+
end
|
212
|
+
|
194
213
|
def start
|
195
214
|
super
|
196
215
|
refresh_client
|
216
|
+
|
217
|
+
if @share_producer
|
218
|
+
@shared_producer = create_producer
|
219
|
+
else
|
220
|
+
@producers = {}
|
221
|
+
@producers_mutex = Mutex.new
|
222
|
+
end
|
197
223
|
end
|
198
224
|
|
199
225
|
def close
|
@@ -206,6 +232,56 @@ DESC
|
|
206
232
|
@kafka = nil
|
207
233
|
end
|
208
234
|
|
235
|
+
def wait_writing_threads
|
236
|
+
done = false
|
237
|
+
until done do
|
238
|
+
@writing_threads_mutex.synchronize do
|
239
|
+
done = true if @writing_threads.empty?
|
240
|
+
end
|
241
|
+
sleep(1) unless done
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def shutdown
|
246
|
+
super
|
247
|
+
wait_writing_threads
|
248
|
+
shutdown_producers
|
249
|
+
end
|
250
|
+
|
251
|
+
def shutdown_producers
|
252
|
+
if @share_producer
|
253
|
+
@shared_producer.shutdown
|
254
|
+
@shared_producer = nil
|
255
|
+
else
|
256
|
+
@producers_mutex.synchronize {
|
257
|
+
shutdown_threads = @producers.map { |key, producer|
|
258
|
+
th = Thread.new {
|
259
|
+
producer.shutdown
|
260
|
+
}
|
261
|
+
th.abort_on_exception = true
|
262
|
+
th
|
263
|
+
}
|
264
|
+
shutdown_threads.each { |th| th.join }
|
265
|
+
@producers = {}
|
266
|
+
}
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
def get_producer
|
271
|
+
if @share_producer
|
272
|
+
@shared_producer
|
273
|
+
else
|
274
|
+
@producers_mutex.synchronize {
|
275
|
+
producer = @producers[Thread.current.object_id]
|
276
|
+
unless producer
|
277
|
+
producer = create_producer
|
278
|
+
@producers[Thread.current.object_id] = producer
|
279
|
+
end
|
280
|
+
producer
|
281
|
+
}
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
209
285
|
def setup_formatter(conf)
|
210
286
|
type = conf['@type']
|
211
287
|
case type
|
@@ -229,6 +305,8 @@ DESC
|
|
229
305
|
|
230
306
|
# TODO: optimize write performance
|
231
307
|
def write(chunk)
|
308
|
+
@writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
|
309
|
+
|
232
310
|
tag = chunk.metadata.tag
|
233
311
|
topic = if @topic
|
234
312
|
extract_placeholders(@topic, chunk)
|
@@ -237,13 +315,12 @@ DESC
|
|
237
315
|
end
|
238
316
|
|
239
317
|
messages = 0
|
240
|
-
record_buf = nil
|
241
318
|
|
242
319
|
base_headers = @headers
|
243
320
|
mutate_headers = !@headers_from_record_accessors.empty?
|
244
321
|
|
245
322
|
begin
|
246
|
-
producer =
|
323
|
+
producer = get_producer
|
247
324
|
chunk.msgpack_each { |time, record|
|
248
325
|
begin
|
249
326
|
record = inject_values_to_record(tag, time, record)
|
@@ -267,6 +344,7 @@ DESC
|
|
267
344
|
end
|
268
345
|
end
|
269
346
|
|
347
|
+
record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
|
270
348
|
record_buf = @formatter_proc.call(tag, time, record)
|
271
349
|
record_buf_bytes = record_buf.bytesize
|
272
350
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
@@ -283,7 +361,7 @@ DESC
|
|
283
361
|
messages += 1
|
284
362
|
|
285
363
|
producer.produce(record_buf, key: message_key, partition_key: partition_key, partition: partition, headers: headers,
|
286
|
-
create_time: @use_event_time ? Time.at(time) : Time.now)
|
364
|
+
create_time: @use_event_time ? Time.at(time) : Time.now, topic: topic)
|
287
365
|
}
|
288
366
|
|
289
367
|
if messages > 0
|
@@ -301,7 +379,6 @@ DESC
|
|
301
379
|
end
|
302
380
|
rescue Kafka::UnknownTopicOrPartition
|
303
381
|
if @use_default_for_unknown_topic && topic != @default_topic
|
304
|
-
producer.shutdown if producer
|
305
382
|
log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
|
306
383
|
topic = @default_topic
|
307
384
|
retry
|
@@ -321,7 +398,7 @@ DESC
|
|
321
398
|
# Raise exception to retry sendind messages
|
322
399
|
raise e unless ignore
|
323
400
|
ensure
|
324
|
-
|
401
|
+
@writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
|
325
402
|
end
|
326
403
|
end
|
327
404
|
end
|
@@ -73,6 +73,11 @@ DESC
|
|
73
73
|
:desc => <<-DESC
|
74
74
|
The codec the producer uses to compress messages. Used for compression.codec
|
75
75
|
Supported codecs: (gzip|snappy)
|
76
|
+
DESC
|
77
|
+
config_param :record_key, :string, :default => nil,
|
78
|
+
:desc => <<-DESC
|
79
|
+
A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
|
80
|
+
If defined, only this field in the record will be sent to Kafka as the message payload.
|
76
81
|
DESC
|
77
82
|
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
|
78
83
|
config_param :max_send_limit_bytes, :size, :default => nil
|
@@ -230,6 +235,9 @@ DESC
|
|
230
235
|
end
|
231
236
|
|
232
237
|
@enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
|
238
|
+
|
239
|
+
@record_field_accessor = nil
|
240
|
+
@record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
|
233
241
|
end
|
234
242
|
|
235
243
|
def build_config
|
@@ -270,6 +278,8 @@ DESC
|
|
270
278
|
config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
|
271
279
|
config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
|
272
280
|
config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
|
281
|
+
config[:"sasl.username"] = @username if @username
|
282
|
+
config[:"sasl.password"] = @password if @password
|
273
283
|
|
274
284
|
@rdkafka_options.each { |k, v|
|
275
285
|
config[k.to_sym] = v
|
@@ -371,8 +381,6 @@ DESC
|
|
371
381
|
end
|
372
382
|
|
373
383
|
handlers = []
|
374
|
-
record_buf = nil
|
375
|
-
record_buf_bytes = nil
|
376
384
|
|
377
385
|
headers = @headers.clone
|
378
386
|
|
@@ -395,6 +403,7 @@ DESC
|
|
395
403
|
end
|
396
404
|
end
|
397
405
|
|
406
|
+
record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
|
398
407
|
record_buf = @formatter_proc.call(tag, time, record)
|
399
408
|
record_buf_bytes = record_buf.bytesize
|
400
409
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
@@ -105,6 +105,21 @@ class Kafka2OutputTest < Test::Unit::TestCase
|
|
105
105
|
assert_equal([expected_message], actual_messages)
|
106
106
|
end
|
107
107
|
|
108
|
+
def test_record_key
|
109
|
+
conf = config(default_topic: TOPIC_NAME) +
|
110
|
+
config_element('ROOT', '', {"record_key" => "$.data"}, [])
|
111
|
+
target_driver = create_target_driver
|
112
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
113
|
+
sleep 2
|
114
|
+
d = create_driver(conf)
|
115
|
+
d.run do
|
116
|
+
d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
|
117
|
+
end
|
118
|
+
end
|
119
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
120
|
+
assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
|
121
|
+
end
|
122
|
+
|
108
123
|
def test_exclude_fields
|
109
124
|
conf = config(default_topic: TOPIC_NAME) +
|
110
125
|
config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
|
@@ -163,5 +163,20 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
|
|
163
163
|
actual_messages = target_driver.events.collect { |event| event[2] }
|
164
164
|
assert_equal(expected_messages, actual_messages)
|
165
165
|
end
|
166
|
+
|
167
|
+
def test_record_key
|
168
|
+
conf = config(default_topic: TOPIC_NAME) +
|
169
|
+
config_element('ROOT', '', {"record_key" => "$.data"}, [])
|
170
|
+
target_driver = create_target_driver
|
171
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
172
|
+
sleep 2
|
173
|
+
d = create_driver(conf)
|
174
|
+
d.run do
|
175
|
+
d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
|
176
|
+
end
|
177
|
+
end
|
178
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
179
|
+
assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
|
180
|
+
end
|
166
181
|
end
|
167
182
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidemasa Togashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-07-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
193
|
- !ruby/object:Gem::Version
|
194
194
|
version: '0'
|
195
195
|
requirements: []
|
196
|
-
rubygems_version: 3.
|
196
|
+
rubygems_version: 3.3.5
|
197
197
|
signing_key:
|
198
198
|
specification_version: 4
|
199
199
|
summary: Fluentd plugin for Apache Kafka > 0.8
|