fluent-plugin-kafka 0.17.5 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +5 -0
- data/README.md +37 -0
- data/fluent-plugin-kafka.gemspec +1 -1
- data/lib/fluent/plugin/out_kafka2.rb +82 -5
- data/lib/fluent/plugin/out_rdkafka2.rb +11 -2
- data/test/plugin/test_out_kafka2.rb +15 -0
- data/test/plugin/test_out_rdkafka2.rb +15 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a8590f6d34bbdb2faa991bba6e32a1424623c7ddd9609dc1d2ffcbeabaa20e1
|
4
|
+
data.tar.gz: a653d9dba00fa82f18071304be9093cd6d620bf4bda29d5e00a75113d05e8e3e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69a784e7d69e2229a036110997b2691143008d4ca786e18415e9a806a6d577b38465a4a3931166418a5b7e022dddbc62d014c0c784277ccaaecc9b6570822564
|
7
|
+
data.tar.gz: 9f14d8d10a45bf390c801c93d79b893f4248ded7e37dc87f8c4453b04158754e1006aa7603874986e8c93812fe899197219abccb3bdb0ae4daacef6d516a0af3
|
data/ChangeLog
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
Release 0.18.0 - 2022/07/21
|
2
|
+
* out_kafka2: Keep alive Kafka connections between flushes
|
3
|
+
* out_rdkafka2: Enable to set SASL credentials via `username` and `password` parameters
|
4
|
+
* out_kafka2/out_rdkafka2: Add `record_key` parameter
|
5
|
+
|
1
6
|
Release 0.17.5 - 2022/03/18
|
2
7
|
* out_kafka2: Add `resolve_seed_brokers` parameter
|
3
8
|
|
data/README.md
CHANGED
@@ -193,6 +193,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
|
|
193
193
|
message_key_key (string) :default => 'message_key'
|
194
194
|
default_topic (string) :default => nil
|
195
195
|
default_partition_key (string) :default => nil
|
196
|
+
record_key (string) :default => nil
|
196
197
|
default_message_key (string) :default => nil
|
197
198
|
exclude_topic_key (bool) :default => false
|
198
199
|
exclude_partition_key (bool) :default => false
|
@@ -205,6 +206,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
|
|
205
206
|
use_default_for_unknown_topic (bool) :default => false
|
206
207
|
discard_kafka_delivery_failed (bool) :default => false (No discard)
|
207
208
|
partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
|
209
|
+
share_producer (bool) :default => false
|
208
210
|
|
209
211
|
<format>
|
210
212
|
@type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
|
@@ -335,6 +337,40 @@ For example, `$.source.ip` can be extracted with config `headers_from_record` an
|
|
335
337
|
|
336
338
|
> Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
|
337
339
|
|
340
|
+
#### Send only a sub field as a message payload
|
341
|
+
|
342
|
+
If `record_key` is provided, the plugin sends only a sub field given by that key.
|
343
|
+
The configuration format is jsonpath.
|
344
|
+
|
345
|
+
e.g. When the following configuration and the incoming record are given:
|
346
|
+
|
347
|
+
configuration:
|
348
|
+
|
349
|
+
<match **>
|
350
|
+
@type kafka2
|
351
|
+
[...]
|
352
|
+
record_key '$.data'
|
353
|
+
</match>
|
354
|
+
|
355
|
+
record:
|
356
|
+
|
357
|
+
{
|
358
|
+
"specversion" : "1.0",
|
359
|
+
"type" : "com.example.someevent",
|
360
|
+
"id" : "C234-1234-1234",
|
361
|
+
"time" : "2018-04-05T17:31:00Z",
|
362
|
+
"datacontenttype" : "application/json",
|
363
|
+
"data" : {
|
364
|
+
"appinfoA" : "abc",
|
365
|
+
"appinfoB" : 123,
|
366
|
+
"appinfoC" : true
|
367
|
+
},
|
368
|
+
...
|
369
|
+
}
|
370
|
+
|
371
|
+
only the `data` field will be serialized by the formatter and sent to Kafka.
|
372
|
+
The toplevel `data` key will be removed.
|
373
|
+
|
338
374
|
### Buffered output plugin
|
339
375
|
|
340
376
|
This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
|
@@ -460,6 +496,7 @@ You need to install rdkafka gem.
|
|
460
496
|
# same with kafka2
|
461
497
|
headers (hash) :default => {}
|
462
498
|
headers_from_record (hash) :default => {}
|
499
|
+
record_key (string) :default => nil
|
463
500
|
|
464
501
|
<format>
|
465
502
|
@type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
|
data/fluent-plugin-kafka.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
14
|
gem.name = "fluent-plugin-kafka"
|
15
15
|
gem.require_paths = ["lib"]
|
16
|
-
gem.version = '0.
|
16
|
+
gem.version = '0.18.0'
|
17
17
|
gem.required_ruby_version = ">= 2.1.0"
|
18
18
|
|
19
19
|
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
@@ -27,6 +27,11 @@ DESC
|
|
27
27
|
config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
|
28
28
|
:desc => "Specify kafka patrtitioner hash algorithm"
|
29
29
|
config_param :default_partition, :integer, :default => nil
|
30
|
+
config_param :record_key, :string, :default => nil,
|
31
|
+
:desc => <<-DESC
|
32
|
+
A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
|
33
|
+
If defined, only this field in the record will be sent to Kafka as the message payload.
|
34
|
+
DESC
|
30
35
|
config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
|
31
36
|
config_param :client_id, :string, :default => 'fluentd'
|
32
37
|
config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
|
@@ -81,6 +86,7 @@ DESC
|
|
81
86
|
Add a regular expression to capture ActiveSupport notifications from the Kafka client
|
82
87
|
requires activesupport gem - records will be generated under fluent_kafka_stats.**
|
83
88
|
DESC
|
89
|
+
config_param :share_producer, :bool, :default => false, :desc => 'share kafka producer between flush threads'
|
84
90
|
|
85
91
|
config_section :buffer do
|
86
92
|
config_set_default :chunk_keys, ["topic"]
|
@@ -96,6 +102,12 @@ DESC
|
|
96
102
|
super
|
97
103
|
|
98
104
|
@kafka = nil
|
105
|
+
@producers = nil
|
106
|
+
@producers_mutex = nil
|
107
|
+
@shared_producer = nil
|
108
|
+
|
109
|
+
@writing_threads_mutex = Mutex.new
|
110
|
+
@writing_threads = Set.new
|
99
111
|
end
|
100
112
|
|
101
113
|
def refresh_client(raise_error = true)
|
@@ -185,15 +197,29 @@ DESC
|
|
185
197
|
@exclude_field_accessors = @exclude_fields.map do |field|
|
186
198
|
record_accessor_create(field)
|
187
199
|
end
|
200
|
+
|
201
|
+
@record_field_accessor = nil
|
202
|
+
@record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
|
188
203
|
end
|
189
204
|
|
190
205
|
def multi_workers_ready?
|
191
206
|
true
|
192
207
|
end
|
193
208
|
|
209
|
+
def create_producer
|
210
|
+
@kafka.producer(**@producer_opts)
|
211
|
+
end
|
212
|
+
|
194
213
|
def start
|
195
214
|
super
|
196
215
|
refresh_client
|
216
|
+
|
217
|
+
if @share_producer
|
218
|
+
@shared_producer = create_producer
|
219
|
+
else
|
220
|
+
@producers = {}
|
221
|
+
@producers_mutex = Mutex.new
|
222
|
+
end
|
197
223
|
end
|
198
224
|
|
199
225
|
def close
|
@@ -206,6 +232,56 @@ DESC
|
|
206
232
|
@kafka = nil
|
207
233
|
end
|
208
234
|
|
235
|
+
def wait_writing_threads
|
236
|
+
done = false
|
237
|
+
until done do
|
238
|
+
@writing_threads_mutex.synchronize do
|
239
|
+
done = true if @writing_threads.empty?
|
240
|
+
end
|
241
|
+
sleep(1) unless done
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def shutdown
|
246
|
+
super
|
247
|
+
wait_writing_threads
|
248
|
+
shutdown_producers
|
249
|
+
end
|
250
|
+
|
251
|
+
def shutdown_producers
|
252
|
+
if @share_producer
|
253
|
+
@shared_producer.shutdown
|
254
|
+
@shared_producer = nil
|
255
|
+
else
|
256
|
+
@producers_mutex.synchronize {
|
257
|
+
shutdown_threads = @producers.map { |key, producer|
|
258
|
+
th = Thread.new {
|
259
|
+
producer.shutdown
|
260
|
+
}
|
261
|
+
th.abort_on_exception = true
|
262
|
+
th
|
263
|
+
}
|
264
|
+
shutdown_threads.each { |th| th.join }
|
265
|
+
@producers = {}
|
266
|
+
}
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
def get_producer
|
271
|
+
if @share_producer
|
272
|
+
@shared_producer
|
273
|
+
else
|
274
|
+
@producers_mutex.synchronize {
|
275
|
+
producer = @producers[Thread.current.object_id]
|
276
|
+
unless producer
|
277
|
+
producer = create_producer
|
278
|
+
@producers[Thread.current.object_id] = producer
|
279
|
+
end
|
280
|
+
producer
|
281
|
+
}
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
209
285
|
def setup_formatter(conf)
|
210
286
|
type = conf['@type']
|
211
287
|
case type
|
@@ -229,6 +305,8 @@ DESC
|
|
229
305
|
|
230
306
|
# TODO: optimize write performance
|
231
307
|
def write(chunk)
|
308
|
+
@writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
|
309
|
+
|
232
310
|
tag = chunk.metadata.tag
|
233
311
|
topic = if @topic
|
234
312
|
extract_placeholders(@topic, chunk)
|
@@ -237,13 +315,12 @@ DESC
|
|
237
315
|
end
|
238
316
|
|
239
317
|
messages = 0
|
240
|
-
record_buf = nil
|
241
318
|
|
242
319
|
base_headers = @headers
|
243
320
|
mutate_headers = !@headers_from_record_accessors.empty?
|
244
321
|
|
245
322
|
begin
|
246
|
-
producer =
|
323
|
+
producer = get_producer
|
247
324
|
chunk.msgpack_each { |time, record|
|
248
325
|
begin
|
249
326
|
record = inject_values_to_record(tag, time, record)
|
@@ -267,6 +344,7 @@ DESC
|
|
267
344
|
end
|
268
345
|
end
|
269
346
|
|
347
|
+
record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
|
270
348
|
record_buf = @formatter_proc.call(tag, time, record)
|
271
349
|
record_buf_bytes = record_buf.bytesize
|
272
350
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
@@ -283,7 +361,7 @@ DESC
|
|
283
361
|
messages += 1
|
284
362
|
|
285
363
|
producer.produce(record_buf, key: message_key, partition_key: partition_key, partition: partition, headers: headers,
|
286
|
-
create_time: @use_event_time ? Time.at(time) : Time.now)
|
364
|
+
create_time: @use_event_time ? Time.at(time) : Time.now, topic: topic)
|
287
365
|
}
|
288
366
|
|
289
367
|
if messages > 0
|
@@ -301,7 +379,6 @@ DESC
|
|
301
379
|
end
|
302
380
|
rescue Kafka::UnknownTopicOrPartition
|
303
381
|
if @use_default_for_unknown_topic && topic != @default_topic
|
304
|
-
producer.shutdown if producer
|
305
382
|
log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
|
306
383
|
topic = @default_topic
|
307
384
|
retry
|
@@ -321,7 +398,7 @@ DESC
|
|
321
398
|
# Raise exception to retry sendind messages
|
322
399
|
raise e unless ignore
|
323
400
|
ensure
|
324
|
-
|
401
|
+
@writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
|
325
402
|
end
|
326
403
|
end
|
327
404
|
end
|
@@ -73,6 +73,11 @@ DESC
|
|
73
73
|
:desc => <<-DESC
|
74
74
|
The codec the producer uses to compress messages. Used for compression.codec
|
75
75
|
Supported codecs: (gzip|snappy)
|
76
|
+
DESC
|
77
|
+
config_param :record_key, :string, :default => nil,
|
78
|
+
:desc => <<-DESC
|
79
|
+
A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
|
80
|
+
If defined, only this field in the record will be sent to Kafka as the message payload.
|
76
81
|
DESC
|
77
82
|
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
|
78
83
|
config_param :max_send_limit_bytes, :size, :default => nil
|
@@ -230,6 +235,9 @@ DESC
|
|
230
235
|
end
|
231
236
|
|
232
237
|
@enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
|
238
|
+
|
239
|
+
@record_field_accessor = nil
|
240
|
+
@record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
|
233
241
|
end
|
234
242
|
|
235
243
|
def build_config
|
@@ -270,6 +278,8 @@ DESC
|
|
270
278
|
config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
|
271
279
|
config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
|
272
280
|
config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
|
281
|
+
config[:"sasl.username"] = @username if @username
|
282
|
+
config[:"sasl.password"] = @password if @password
|
273
283
|
|
274
284
|
@rdkafka_options.each { |k, v|
|
275
285
|
config[k.to_sym] = v
|
@@ -371,8 +381,6 @@ DESC
|
|
371
381
|
end
|
372
382
|
|
373
383
|
handlers = []
|
374
|
-
record_buf = nil
|
375
|
-
record_buf_bytes = nil
|
376
384
|
|
377
385
|
headers = @headers.clone
|
378
386
|
|
@@ -395,6 +403,7 @@ DESC
|
|
395
403
|
end
|
396
404
|
end
|
397
405
|
|
406
|
+
record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
|
398
407
|
record_buf = @formatter_proc.call(tag, time, record)
|
399
408
|
record_buf_bytes = record_buf.bytesize
|
400
409
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
@@ -105,6 +105,21 @@ class Kafka2OutputTest < Test::Unit::TestCase
|
|
105
105
|
assert_equal([expected_message], actual_messages)
|
106
106
|
end
|
107
107
|
|
108
|
+
def test_record_key
|
109
|
+
conf = config(default_topic: TOPIC_NAME) +
|
110
|
+
config_element('ROOT', '', {"record_key" => "$.data"}, [])
|
111
|
+
target_driver = create_target_driver
|
112
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
113
|
+
sleep 2
|
114
|
+
d = create_driver(conf)
|
115
|
+
d.run do
|
116
|
+
d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
|
117
|
+
end
|
118
|
+
end
|
119
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
120
|
+
assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
|
121
|
+
end
|
122
|
+
|
108
123
|
def test_exclude_fields
|
109
124
|
conf = config(default_topic: TOPIC_NAME) +
|
110
125
|
config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
|
@@ -163,5 +163,20 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
|
|
163
163
|
actual_messages = target_driver.events.collect { |event| event[2] }
|
164
164
|
assert_equal(expected_messages, actual_messages)
|
165
165
|
end
|
166
|
+
|
167
|
+
def test_record_key
|
168
|
+
conf = config(default_topic: TOPIC_NAME) +
|
169
|
+
config_element('ROOT', '', {"record_key" => "$.data"}, [])
|
170
|
+
target_driver = create_target_driver
|
171
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
172
|
+
sleep 2
|
173
|
+
d = create_driver(conf)
|
174
|
+
d.run do
|
175
|
+
d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
|
176
|
+
end
|
177
|
+
end
|
178
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
179
|
+
assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
|
180
|
+
end
|
166
181
|
end
|
167
182
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidemasa Togashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-07-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
193
|
- !ruby/object:Gem::Version
|
194
194
|
version: '0'
|
195
195
|
requirements: []
|
196
|
-
rubygems_version: 3.
|
196
|
+
rubygems_version: 3.3.5
|
197
197
|
signing_key:
|
198
198
|
specification_version: 4
|
199
199
|
summary: Fluentd plugin for Apache Kafka > 0.8
|