fluent-plugin-kafka 0.17.0 → 0.17.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +2 -0
- data/ChangeLog +16 -0
- data/Gemfile +2 -0
- data/README.md +30 -5
- data/fluent-plugin-kafka.gemspec +1 -1
- data/lib/fluent/plugin/in_kafka.rb +1 -0
- data/lib/fluent/plugin/in_kafka_group.rb +3 -0
- data/lib/fluent/plugin/out_kafka.rb +3 -1
- data/lib/fluent/plugin/out_kafka2.rb +15 -2
- data/lib/fluent/plugin/out_kafka_buffered.rb +3 -1
- data/lib/fluent/plugin/out_rdkafka.rb +20 -12
- data/lib/fluent/plugin/out_rdkafka2.rb +111 -12
- data/test/plugin/test_in_kafka_group.rb +2 -0
- data/test/plugin/test_out_kafka2.rb +59 -3
- data/test/plugin/test_out_rdkafka2.rb +167 -0
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5cc122034295e37318cd7510ef3347eeda14cc43b8c0132053cb944d68141feb
|
4
|
+
data.tar.gz: e725b07eaa95f639b2122f1a4c8342101314f2f721e3625c73889dc8caf9aead
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e432e7f2670132022b18fa9460b8eda69a18a4dd3a35aa775619c6a45ff8cb6ea5bad869ebc5cefe804b9bb4261ab12150cb77ad10af62dc2e54fd6de435aec
|
7
|
+
data.tar.gz: edbebd57c325292d197d342ff8f5151aa1fcfbd47128fc09d1b71e2bf4d7ccf196d54b48df106f1b9f655fe334ab1f9fe907fce8b4f1b1d20edd9c8254c6c8cd
|
data/.github/workflows/linux.yml
CHANGED
data/ChangeLog
CHANGED
@@ -1,3 +1,19 @@
|
|
1
|
+
Release 0.17.4 - 2022/01/25
|
2
|
+
* in_kafka_group: Add `refresh_topic_interval` parameter
|
3
|
+
|
4
|
+
Release 0.17.3 - 2021/11/26
|
5
|
+
* output: Suppress large warning logs for events skipped by `max_send_limit_bytes`
|
6
|
+
|
7
|
+
Release 0.17.2 - 2021/10/14
|
8
|
+
* out_rdkafka2: Add `max_enqueue_bytes_per_second` parameter
|
9
|
+
* out_rdkafka2: Support `use_event_time` parameter
|
10
|
+
* out_rdkafka2: Fix a potential bug that the plugin might exit without receiving responses from Kafka.
|
11
|
+
|
12
|
+
Release 0.17.1 - 2021/09/24
|
13
|
+
* out_rdkafka/out_rdkafka2: Support rdkafka 0.9.0 or later
|
14
|
+
* out_rdkafka/out_rdkafka2: Add `exclude_fields` parameter
|
15
|
+
* out_kafka2.rb: Fix one more Ruby 3.0 keyword arguments issue
|
16
|
+
|
1
17
|
Release 0.17.0 - 2021/08/30
|
2
18
|
* out_kafka/out_kafka_buffered/out_kafka2: Provide murmur2 partitioner hash function choice
|
3
19
|
* in_kafka/in_kafka_group/out_kafka/out_kafka_buffered/out_kafka2: Use Ruby Kafka's ssl_ca_cert_file_path parameter to feed the CA certs
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -40,14 +40,14 @@ If you want to use zookeeper related parameters, you also need to install zookee
|
|
40
40
|
|
41
41
|
Set path to SSL related files. See [Encryption and Authentication using SSL](https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl) for more detail.
|
42
42
|
|
43
|
-
#### SASL authentication
|
43
|
+
#### SASL authentication
|
44
44
|
|
45
45
|
##### with GSSAPI
|
46
46
|
|
47
47
|
- principal
|
48
48
|
- keytab
|
49
49
|
|
50
|
-
Set principal and path to keytab for SASL/GSSAPI authentication.
|
50
|
+
Set principal and path to keytab for SASL/GSSAPI authentication.
|
51
51
|
See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
|
52
52
|
|
53
53
|
##### with Plain/SCRAM
|
@@ -57,7 +57,7 @@ See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentica
|
|
57
57
|
- scram_mechanism
|
58
58
|
- sasl_over_ssl
|
59
59
|
|
60
|
-
Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
|
60
|
+
Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
|
61
61
|
See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
|
62
62
|
|
63
63
|
### Input plugin (@type 'kafka')
|
@@ -119,7 +119,7 @@ Consume events by kafka consumer group features..
|
|
119
119
|
topics <listening topics(separate with comma',')>
|
120
120
|
format <input text type (text|json|ltsv|msgpack)> :default => json
|
121
121
|
message_key <key (Optional, for text format only, default is message)>
|
122
|
-
|
122
|
+
kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
|
123
123
|
add_headers <If true, add kafka's message headers to record>
|
124
124
|
add_prefix <tag prefix (Optional)>
|
125
125
|
add_suffix <tag suffix (Optional)>
|
@@ -135,6 +135,7 @@ Consume events by kafka consumer group features..
|
|
135
135
|
offset_commit_interval (integer) :default => nil (Use default of ruby-kafka)
|
136
136
|
offset_commit_threshold (integer) :default => nil (Use default of ruby-kafka)
|
137
137
|
fetcher_max_queue_size (integer) :default => nil (Use default of ruby-kafka)
|
138
|
+
refresh_topic_interval (integer) :default => nil (Use default of ruby-kafka)
|
138
139
|
start_from_beginning (bool) :default => true
|
139
140
|
</source>
|
140
141
|
|
@@ -155,7 +156,7 @@ With the introduction of the rdkafka-ruby based input plugin we hope to support
|
|
155
156
|
topics <listening topics(separate with comma',')>
|
156
157
|
format <input text type (text|json|ltsv|msgpack)> :default => json
|
157
158
|
message_key <key (Optional, for text format only, default is message)>
|
158
|
-
|
159
|
+
kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
|
159
160
|
add_headers <If true, add kafka's message headers to record>
|
160
161
|
add_prefix <tag prefix (Optional)>
|
161
162
|
add_suffix <tag suffix (Optional)>
|
@@ -200,6 +201,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
|
|
200
201
|
get_kafka_client_log (bool) :default => false
|
201
202
|
headers (hash) :default => {}
|
202
203
|
headers_from_record (hash) :default => {}
|
204
|
+
use_event_time (bool) :default => false
|
203
205
|
use_default_for_unknown_topic (bool) :default => false
|
204
206
|
discard_kafka_delivery_failed (bool) :default => false (No discard)
|
205
207
|
partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
|
@@ -316,6 +318,23 @@ The Kafka message will have a header of source_ip=12.7.0.0.1.
|
|
316
318
|
|
317
319
|
The configuration format is jsonpath. It is descibed in https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor
|
318
320
|
|
321
|
+
#### Excluding fields
|
322
|
+
Fields can be excluded from output data. Only works for kafka2 and rdkafka2 output plugin.
|
323
|
+
|
324
|
+
Fields must be specified using an array of dot notation `$.`, for example:
|
325
|
+
|
326
|
+
<match app.**>
|
327
|
+
@type kafka2
|
328
|
+
[...]
|
329
|
+
exclude_fields $.source.ip,$.HTTP_FOO
|
330
|
+
<match>
|
331
|
+
|
332
|
+
This config can be used to remove fields used on another configs.
|
333
|
+
|
334
|
+
For example, `$.source.ip` can be extracted with config `headers_from_record` and excluded from message payload.
|
335
|
+
|
336
|
+
> Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
|
337
|
+
|
319
338
|
### Buffered output plugin
|
320
339
|
|
321
340
|
This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
|
@@ -346,6 +365,7 @@ Support of fluentd v0.12 has ended. `kafka_buffered` will be an alias of `kafka2
|
|
346
365
|
exclude_topic_key (bool) :default => false
|
347
366
|
exclude_partition_key (bool) :default => false
|
348
367
|
get_kafka_client_log (bool) :default => false
|
368
|
+
use_event_time (bool) :default => false
|
349
369
|
partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
|
350
370
|
|
351
371
|
# See fluentd document for buffer related parameters: https://docs.fluentd.org/v/0.12/buffer
|
@@ -435,6 +455,7 @@ You need to install rdkafka gem.
|
|
435
455
|
exclude_topic_key (bool) :default => false
|
436
456
|
exclude_partition_key (bool) :default => false
|
437
457
|
discard_kafka_delivery_failed (bool) :default => false (No discard)
|
458
|
+
use_event_time (bool) :default => false
|
438
459
|
|
439
460
|
# same with kafka2
|
440
461
|
headers (hash) :default => {}
|
@@ -469,6 +490,10 @@ You need to install rdkafka gem.
|
|
469
490
|
rdkafka_delivery_handle_poll_timeout (integer) :default => 30
|
470
491
|
# If the record size is larger than this value, such records are ignored. Default is no limit
|
471
492
|
max_send_limit_bytes (integer) :default => nil
|
493
|
+
# The maximum number of enqueueing bytes per second. It can reduce the
|
494
|
+
# load of both Fluentd and Kafka when excessive messages are attempted
|
495
|
+
# to send. Default is no limit.
|
496
|
+
max_enqueue_bytes_per_second (integer) :default => nil
|
472
497
|
</match>
|
473
498
|
|
474
499
|
If you use v0.12, use `rdkafka` instead.
|
data/fluent-plugin-kafka.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
14
|
gem.name = "fluent-plugin-kafka"
|
15
15
|
gem.require_paths = ["lib"]
|
16
|
-
gem.version = '0.17.
|
16
|
+
gem.version = '0.17.4'
|
17
17
|
gem.required_ruby_version = ">= 2.1.0"
|
18
18
|
|
19
19
|
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
@@ -67,6 +67,8 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
67
67
|
:desc => "The number of messages that can be processed before their offsets are committed"
|
68
68
|
config_param :fetcher_max_queue_size, :integer, :default => nil,
|
69
69
|
:desc => "The number of fetched messages per partition that are queued in fetcher queue"
|
70
|
+
config_param :refresh_topic_interval, :integer, :default => nil,
|
71
|
+
:desc => "The interval of refreshing the topic list in seconds. Zero or unset disables this"
|
70
72
|
config_param :start_from_beginning, :bool, :default => true,
|
71
73
|
:desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
|
72
74
|
|
@@ -128,6 +130,7 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
128
130
|
@consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
|
129
131
|
@consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
|
130
132
|
@consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
|
133
|
+
@consumer_opts[:refresh_topic_interval] = @refresh_topic_interval if @refresh_topic_interval
|
131
134
|
|
132
135
|
@fetch_opts = {}
|
133
136
|
@fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
|
@@ -88,6 +88,7 @@ DESC
|
|
88
88
|
require 'kafka'
|
89
89
|
|
90
90
|
@kafka = nil
|
91
|
+
@field_separator = nil
|
91
92
|
end
|
92
93
|
|
93
94
|
def refresh_client
|
@@ -239,7 +240,8 @@ DESC
|
|
239
240
|
record_buf = @formatter_proc.call(tag, time, record)
|
240
241
|
record_buf_bytes = record_buf.bytesize
|
241
242
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
242
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
243
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
244
|
+
log.debug "Skipped event:", :record => record
|
243
245
|
next
|
244
246
|
end
|
245
247
|
log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
|
@@ -42,6 +42,8 @@ DESC
|
|
42
42
|
:desc => 'Set true to remove message key from data'
|
43
43
|
config_param :exclude_topic_key, :bool, :default => false,
|
44
44
|
:desc => 'Set true to remove topic name key from data'
|
45
|
+
config_param :exclude_fields, :array, :default => [], value_type: :string,
|
46
|
+
:desc => 'Fields to remove from data where the value is a jsonpath to a record value'
|
45
47
|
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for kafka create_time'
|
46
48
|
config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
|
47
49
|
:desc => 'Kafka message headers'
|
@@ -177,6 +179,10 @@ DESC
|
|
177
179
|
@headers_from_record.each do |key, value|
|
178
180
|
@headers_from_record_accessors[key] = record_accessor_create(value)
|
179
181
|
end
|
182
|
+
|
183
|
+
@exclude_field_accessors = @exclude_fields.map do |field|
|
184
|
+
record_accessor_create(field)
|
185
|
+
end
|
180
186
|
end
|
181
187
|
|
182
188
|
def multi_workers_ready?
|
@@ -235,7 +241,7 @@ DESC
|
|
235
241
|
mutate_headers = !@headers_from_record_accessors.empty?
|
236
242
|
|
237
243
|
begin
|
238
|
-
producer = @kafka.topic_producer(topic,
|
244
|
+
producer = @kafka.topic_producer(topic, **@producer_opts)
|
239
245
|
chunk.msgpack_each { |time, record|
|
240
246
|
begin
|
241
247
|
record = inject_values_to_record(tag, time, record)
|
@@ -253,10 +259,17 @@ DESC
|
|
253
259
|
headers = base_headers
|
254
260
|
end
|
255
261
|
|
262
|
+
unless @exclude_fields.empty?
|
263
|
+
@exclude_field_accessors.each do |exclude_field_accessor|
|
264
|
+
exclude_field_accessor.delete(record)
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
256
268
|
record_buf = @formatter_proc.call(tag, time, record)
|
257
269
|
record_buf_bytes = record_buf.bytesize
|
258
270
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
259
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
271
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
272
|
+
log.debug "Skipped event:", :record => record
|
260
273
|
next
|
261
274
|
end
|
262
275
|
rescue StandardError => e
|
@@ -107,6 +107,7 @@ DESC
|
|
107
107
|
@kafka = nil
|
108
108
|
@producers = {}
|
109
109
|
@producers_mutex = Mutex.new
|
110
|
+
@field_separator = nil
|
110
111
|
end
|
111
112
|
|
112
113
|
def multi_workers_ready?
|
@@ -331,7 +332,8 @@ DESC
|
|
331
332
|
record_buf = @formatter_proc.call(tag, time, record)
|
332
333
|
record_buf_bytes = record_buf.bytesize
|
333
334
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
334
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
335
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
336
|
+
log.debug "Skipped event:", :record => record
|
335
337
|
next
|
336
338
|
end
|
337
339
|
rescue StandardError => e
|
@@ -65,6 +65,7 @@ DESC
|
|
65
65
|
The codec the producer uses to compress messages.
|
66
66
|
Supported codecs: (gzip|snappy)
|
67
67
|
DESC
|
68
|
+
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
|
68
69
|
config_param :max_send_limit_bytes, :size, :default => nil
|
69
70
|
config_param :rdkafka_buffering_max_ms, :integer, :default => nil
|
70
71
|
config_param :rdkafka_buffering_max_messages, :integer, :default => nil
|
@@ -91,23 +92,29 @@ DESC
|
|
91
92
|
def configure(conf)
|
92
93
|
super
|
93
94
|
log.instance_eval {
|
94
|
-
def add(level,
|
95
|
-
|
95
|
+
def add(level, message = nil)
|
96
|
+
if message.nil?
|
97
|
+
if block_given?
|
98
|
+
message = yield
|
99
|
+
else
|
100
|
+
return
|
101
|
+
end
|
102
|
+
end
|
96
103
|
|
97
104
|
# Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
|
98
105
|
case level
|
99
106
|
when Logger::FATAL
|
100
|
-
self.fatal(
|
107
|
+
self.fatal(message)
|
101
108
|
when Logger::ERROR
|
102
|
-
self.error(
|
109
|
+
self.error(message)
|
103
110
|
when Logger::WARN
|
104
|
-
self.warn(
|
111
|
+
self.warn(message)
|
105
112
|
when Logger::INFO
|
106
|
-
self.info(
|
113
|
+
self.info(message)
|
107
114
|
when Logger::DEBUG
|
108
|
-
self.debug(
|
115
|
+
self.debug(message)
|
109
116
|
else
|
110
|
-
self.trace(
|
117
|
+
self.trace(message)
|
111
118
|
end
|
112
119
|
end
|
113
120
|
}
|
@@ -271,7 +278,8 @@ DESC
|
|
271
278
|
record_buf = @formatter_proc.call(tag, time, record)
|
272
279
|
record_buf_bytes = record_buf.bytesize
|
273
280
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
274
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
281
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
282
|
+
log.debug "Skipped event:", :record => record
|
275
283
|
next
|
276
284
|
end
|
277
285
|
rescue StandardError => e
|
@@ -280,7 +288,7 @@ DESC
|
|
280
288
|
end
|
281
289
|
|
282
290
|
producer = get_producer
|
283
|
-
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
|
291
|
+
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
|
284
292
|
handler
|
285
293
|
}.each { |handler|
|
286
294
|
handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
|
@@ -292,11 +300,11 @@ DESC
|
|
292
300
|
raise e
|
293
301
|
end
|
294
302
|
|
295
|
-
def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
|
303
|
+
def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
|
296
304
|
attempt = 0
|
297
305
|
loop do
|
298
306
|
begin
|
299
|
-
handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
|
307
|
+
handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
|
300
308
|
return handler
|
301
309
|
rescue Exception => e
|
302
310
|
if e.respond_to?(:code) && e.code == :queue_full
|
@@ -56,6 +56,8 @@ DESC
|
|
56
56
|
:desc => <<-DESC
|
57
57
|
Set true to remove topic key from data
|
58
58
|
DESC
|
59
|
+
config_param :exclude_fields, :array, :default => [], value_type: :string,
|
60
|
+
:desc => 'Fields to remove from data where the value is a jsonpath to a record value'
|
59
61
|
config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
|
60
62
|
:desc => 'Kafka message headers'
|
61
63
|
config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
|
@@ -72,6 +74,7 @@ DESC
|
|
72
74
|
The codec the producer uses to compress messages. Used for compression.codec
|
73
75
|
Supported codecs: (gzip|snappy)
|
74
76
|
DESC
|
77
|
+
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
|
75
78
|
config_param :max_send_limit_bytes, :size, :default => nil
|
76
79
|
config_param :discard_kafka_delivery_failed, :bool, :default => false
|
77
80
|
config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
|
@@ -84,6 +87,7 @@ DESC
|
|
84
87
|
|
85
88
|
config_param :max_enqueue_retries, :integer, :default => 3
|
86
89
|
config_param :enqueue_retry_backoff, :integer, :default => 3
|
90
|
+
config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
|
87
91
|
|
88
92
|
config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
|
89
93
|
config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
|
@@ -99,34 +103,96 @@ DESC
|
|
99
103
|
include Fluent::KafkaPluginUtil::SSLSettings
|
100
104
|
include Fluent::KafkaPluginUtil::SaslSettings
|
101
105
|
|
106
|
+
class EnqueueRate
|
107
|
+
class LimitExceeded < StandardError
|
108
|
+
attr_reader :next_retry_clock
|
109
|
+
def initialize(next_retry_clock)
|
110
|
+
@next_retry_clock = next_retry_clock
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def initialize(limit_bytes_per_second)
|
115
|
+
@mutex = Mutex.new
|
116
|
+
@start_clock = Fluent::Clock.now
|
117
|
+
@bytes_per_second = 0
|
118
|
+
@limit_bytes_per_second = limit_bytes_per_second
|
119
|
+
@commits = {}
|
120
|
+
end
|
121
|
+
|
122
|
+
def raise_if_limit_exceeded(bytes_to_enqueue)
|
123
|
+
return if @limit_bytes_per_second.nil?
|
124
|
+
|
125
|
+
@mutex.synchronize do
|
126
|
+
@commits[Thread.current] = {
|
127
|
+
clock: Fluent::Clock.now,
|
128
|
+
bytesize: bytes_to_enqueue,
|
129
|
+
}
|
130
|
+
|
131
|
+
@bytes_per_second += @commits[Thread.current][:bytesize]
|
132
|
+
duration = @commits[Thread.current][:clock] - @start_clock
|
133
|
+
|
134
|
+
if duration < 1.0
|
135
|
+
if @bytes_per_second > @limit_bytes_per_second
|
136
|
+
raise LimitExceeded.new(@start_clock + 1.0)
|
137
|
+
end
|
138
|
+
else
|
139
|
+
@start_clock = @commits[Thread.current][:clock]
|
140
|
+
@bytes_per_second = @commits[Thread.current][:bytesize]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def revert
|
146
|
+
return if @limit_bytes_per_second.nil?
|
147
|
+
|
148
|
+
@mutex.synchronize do
|
149
|
+
return unless @commits[Thread.current]
|
150
|
+
return unless @commits[Thread.current][:clock]
|
151
|
+
if @commits[Thread.current][:clock] >= @start_clock
|
152
|
+
@bytes_per_second -= @commits[Thread.current][:bytesize]
|
153
|
+
end
|
154
|
+
@commits[Thread.current] = nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
102
159
|
def initialize
|
103
160
|
super
|
104
161
|
|
105
162
|
@producers = nil
|
106
163
|
@producers_mutex = nil
|
107
164
|
@shared_producer = nil
|
165
|
+
@enqueue_rate = nil
|
166
|
+
@writing_threads_mutex = Mutex.new
|
167
|
+
@writing_threads = Set.new
|
108
168
|
end
|
109
169
|
|
110
170
|
def configure(conf)
|
111
171
|
super
|
112
172
|
log.instance_eval {
|
113
|
-
def add(level,
|
114
|
-
|
173
|
+
def add(level, message = nil)
|
174
|
+
if message.nil?
|
175
|
+
if block_given?
|
176
|
+
message = yield
|
177
|
+
else
|
178
|
+
return
|
179
|
+
end
|
180
|
+
end
|
115
181
|
|
116
182
|
# Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
|
117
183
|
case level
|
118
184
|
when Logger::FATAL
|
119
|
-
self.fatal(
|
185
|
+
self.fatal(message)
|
120
186
|
when Logger::ERROR
|
121
|
-
self.error(
|
187
|
+
self.error(message)
|
122
188
|
when Logger::WARN
|
123
|
-
self.warn(
|
189
|
+
self.warn(message)
|
124
190
|
when Logger::INFO
|
125
|
-
self.info(
|
191
|
+
self.info(message)
|
126
192
|
when Logger::DEBUG
|
127
|
-
self.debug(
|
193
|
+
self.debug(message)
|
128
194
|
else
|
129
|
-
self.trace(
|
195
|
+
self.trace(message)
|
130
196
|
end
|
131
197
|
end
|
132
198
|
}
|
@@ -158,6 +224,12 @@ DESC
|
|
158
224
|
@headers_from_record.each do |key, value|
|
159
225
|
@headers_from_record_accessors[key] = record_accessor_create(value)
|
160
226
|
end
|
227
|
+
|
228
|
+
@exclude_field_accessors = @exclude_fields.map do |field|
|
229
|
+
record_accessor_create(field)
|
230
|
+
end
|
231
|
+
|
232
|
+
@enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
|
161
233
|
end
|
162
234
|
|
163
235
|
def build_config
|
@@ -221,8 +293,19 @@ DESC
|
|
221
293
|
true
|
222
294
|
end
|
223
295
|
|
296
|
+
def wait_writing_threads
|
297
|
+
done = false
|
298
|
+
until done do
|
299
|
+
@writing_threads_mutex.synchronize do
|
300
|
+
done = true if @writing_threads.empty?
|
301
|
+
end
|
302
|
+
sleep(1) unless done
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
224
306
|
def shutdown
|
225
307
|
super
|
308
|
+
wait_writing_threads
|
226
309
|
shutdown_producers
|
227
310
|
end
|
228
311
|
|
@@ -279,6 +362,7 @@ DESC
|
|
279
362
|
end
|
280
363
|
|
281
364
|
def write(chunk)
|
365
|
+
@writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
|
282
366
|
tag = chunk.metadata.tag
|
283
367
|
topic = if @topic
|
284
368
|
extract_placeholders(@topic, chunk)
|
@@ -305,10 +389,17 @@ DESC
|
|
305
389
|
headers[key] = header_accessor.call(record)
|
306
390
|
end
|
307
391
|
|
392
|
+
unless @exclude_fields.empty?
|
393
|
+
@exclude_field_accessors.each do |exclude_field_acessor|
|
394
|
+
exclude_field_acessor.delete(record)
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
308
398
|
record_buf = @formatter_proc.call(tag, time, record)
|
309
399
|
record_buf_bytes = record_buf.bytesize
|
310
400
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
311
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
401
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
402
|
+
log.debug "Skipped event:", :record => record
|
312
403
|
next
|
313
404
|
end
|
314
405
|
rescue StandardError => e
|
@@ -316,7 +407,7 @@ DESC
|
|
316
407
|
next
|
317
408
|
end
|
318
409
|
|
319
|
-
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
|
410
|
+
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
|
320
411
|
if @rdkafka_delivery_handle_poll_timeout != 0
|
321
412
|
handlers << handler
|
322
413
|
end
|
@@ -333,14 +424,22 @@ DESC
|
|
333
424
|
# Raise exception to retry sendind messages
|
334
425
|
raise e
|
335
426
|
end
|
427
|
+
ensure
|
428
|
+
@writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
|
336
429
|
end
|
337
430
|
|
338
|
-
def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
|
431
|
+
def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
|
339
432
|
attempt = 0
|
340
433
|
loop do
|
341
434
|
begin
|
342
|
-
|
435
|
+
@enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
|
436
|
+
return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
|
437
|
+
rescue EnqueueRate::LimitExceeded => e
|
438
|
+
@enqueue_rate.revert if @enqueue_rate
|
439
|
+
duration = e.next_retry_clock - Fluent::Clock.now
|
440
|
+
sleep(duration) if duration > 0.0
|
343
441
|
rescue Exception => e
|
442
|
+
@enqueue_rate.revert if @enqueue_rate
|
344
443
|
if e.respond_to?(:code) && e.code == :queue_full
|
345
444
|
if attempt <= @max_enqueue_retries
|
346
445
|
log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
|
@@ -14,6 +14,7 @@ class KafkaGroupInputTest < Test::Unit::TestCase
|
|
14
14
|
brokers localhost:9092
|
15
15
|
consumer_group fluentd
|
16
16
|
format text
|
17
|
+
refresh_topic_interval 0
|
17
18
|
@label @kafka
|
18
19
|
topics #{TOPIC_NAME}
|
19
20
|
]
|
@@ -52,6 +53,7 @@ class KafkaGroupInputTest < Test::Unit::TestCase
|
|
52
53
|
brokers localhost:9092
|
53
54
|
format text
|
54
55
|
@label @kafka
|
56
|
+
refresh_topic_interval 0
|
55
57
|
topics #{TOPIC_NAME}
|
56
58
|
]
|
57
59
|
d = create_driver
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'helper'
|
2
2
|
require 'fluent/test/helpers'
|
3
|
-
require 'fluent/
|
3
|
+
require 'fluent/test/driver/input'
|
4
|
+
require 'fluent/test/driver/output'
|
5
|
+
require 'securerandom'
|
4
6
|
|
5
7
|
class Kafka2OutputTest < Test::Unit::TestCase
|
6
8
|
include Fluent::Test::Helpers
|
@@ -15,8 +17,8 @@ class Kafka2OutputTest < Test::Unit::TestCase
|
|
15
17
|
])
|
16
18
|
end
|
17
19
|
|
18
|
-
def config
|
19
|
-
base_config + config_element('ROOT', '', {"default_topic" =>
|
20
|
+
def config(default_topic: "kitagawakeiko")
|
21
|
+
base_config + config_element('ROOT', '', {"default_topic" => default_topic,
|
20
22
|
"brokers" => "localhost:9092"}, [
|
21
23
|
])
|
22
24
|
end
|
@@ -57,4 +59,58 @@ class Kafka2OutputTest < Test::Unit::TestCase
|
|
57
59
|
d = create_driver
|
58
60
|
assert_equal true, d.instance.multi_workers_ready?
|
59
61
|
end
|
62
|
+
|
63
|
+
class WriteTest < self
|
64
|
+
TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
|
65
|
+
|
66
|
+
INPUT_CONFIG = %[
|
67
|
+
@type kafka
|
68
|
+
brokers localhost:9092
|
69
|
+
format json
|
70
|
+
@label @kafka
|
71
|
+
topics #{TOPIC_NAME}
|
72
|
+
]
|
73
|
+
|
74
|
+
def create_target_driver(conf = INPUT_CONFIG)
|
75
|
+
Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
|
76
|
+
end
|
77
|
+
|
78
|
+
def setup
|
79
|
+
@kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
|
80
|
+
end
|
81
|
+
|
82
|
+
def teardown
|
83
|
+
@kafka.delete_topic(TOPIC_NAME)
|
84
|
+
@kafka.close
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_write
|
88
|
+
target_driver = create_target_driver
|
89
|
+
expected_message = {"a" => 2}
|
90
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
91
|
+
sleep 2
|
92
|
+
d = create_driver(config(default_topic: TOPIC_NAME))
|
93
|
+
d.run do
|
94
|
+
d.feed("test", event_time, expected_message)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
98
|
+
assert_equal([expected_message], actual_messages)
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_exclude_fields
|
102
|
+
conf = config(default_topic: TOPIC_NAME) +
|
103
|
+
config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
|
104
|
+
target_driver = create_target_driver
|
105
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
106
|
+
sleep 2
|
107
|
+
d = create_driver(conf)
|
108
|
+
d.run do
|
109
|
+
d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
|
110
|
+
end
|
111
|
+
end
|
112
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
113
|
+
assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
|
114
|
+
end
|
115
|
+
end
|
60
116
|
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'fluent/test/helpers'
|
3
|
+
require 'fluent/test/driver/input'
|
4
|
+
require 'fluent/test/driver/output'
|
5
|
+
require 'securerandom'
|
6
|
+
|
7
|
+
class Rdkafka2OutputTest < Test::Unit::TestCase
|
8
|
+
include Fluent::Test::Helpers
|
9
|
+
|
10
|
+
def have_rdkafka
|
11
|
+
begin
|
12
|
+
require 'fluent/plugin/out_rdkafka2'
|
13
|
+
true
|
14
|
+
rescue LoadError
|
15
|
+
false
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def setup
|
20
|
+
omit_unless(have_rdkafka, "rdkafka isn't installed")
|
21
|
+
Fluent::Test.setup
|
22
|
+
end
|
23
|
+
|
24
|
+
def base_config
|
25
|
+
config_element('ROOT', '', {"@type" => "rdkafka2"}, [
|
26
|
+
config_element('format', "", {"@type" => "json"})
|
27
|
+
])
|
28
|
+
end
|
29
|
+
|
30
|
+
def config(default_topic: "kitagawakeiko")
|
31
|
+
base_config + config_element('ROOT', '', {"default_topic" => default_topic,
|
32
|
+
"brokers" => "localhost:9092"}, [
|
33
|
+
])
|
34
|
+
end
|
35
|
+
|
36
|
+
def create_driver(conf = config, tag='test')
|
37
|
+
Fluent::Test::Driver::Output.new(Fluent::Rdkafka2Output).configure(conf)
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_configure
|
41
|
+
assert_nothing_raised(Fluent::ConfigError) {
|
42
|
+
create_driver(base_config)
|
43
|
+
}
|
44
|
+
|
45
|
+
assert_nothing_raised(Fluent::ConfigError) {
|
46
|
+
create_driver(config)
|
47
|
+
}
|
48
|
+
|
49
|
+
assert_nothing_raised(Fluent::ConfigError) {
|
50
|
+
create_driver(config + config_element('buffer', "", {"@type" => "memory"}))
|
51
|
+
}
|
52
|
+
|
53
|
+
d = create_driver
|
54
|
+
assert_equal 'kitagawakeiko', d.instance.default_topic
|
55
|
+
assert_equal 'localhost:9092', d.instance.brokers
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_mutli_worker_support
|
59
|
+
d = create_driver
|
60
|
+
assert_equal true, d.instance.multi_workers_ready?
|
61
|
+
end
|
62
|
+
|
63
|
+
class WriteTest < self
|
64
|
+
TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
|
65
|
+
|
66
|
+
INPUT_CONFIG = %[
|
67
|
+
@type kafka
|
68
|
+
brokers localhost:9092
|
69
|
+
format json
|
70
|
+
@label @kafka
|
71
|
+
topics #{TOPIC_NAME}
|
72
|
+
]
|
73
|
+
|
74
|
+
def create_target_driver(conf = INPUT_CONFIG)
|
75
|
+
Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
|
76
|
+
end
|
77
|
+
|
78
|
+
def setup
|
79
|
+
@kafka = nil
|
80
|
+
omit_unless(have_rdkafka, "rdkafka isn't installed")
|
81
|
+
@kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
|
82
|
+
end
|
83
|
+
|
84
|
+
def teardown
|
85
|
+
if @kafka
|
86
|
+
@kafka.delete_topic(TOPIC_NAME)
|
87
|
+
@kafka.close
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_write
|
92
|
+
target_driver = create_target_driver
|
93
|
+
expected_message = {"a" => 2}
|
94
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
95
|
+
sleep 2
|
96
|
+
d = create_driver(config(default_topic: TOPIC_NAME))
|
97
|
+
d.run do
|
98
|
+
d.feed("test", event_time, expected_message)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
102
|
+
assert_equal([expected_message], actual_messages)
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_write_with_use_event_time
|
106
|
+
input_config = %[
|
107
|
+
@type kafka
|
108
|
+
brokers localhost:9092
|
109
|
+
format json
|
110
|
+
@label @kafka
|
111
|
+
topics #{TOPIC_NAME}
|
112
|
+
time_source kafka
|
113
|
+
]
|
114
|
+
target_driver = create_target_driver(input_config)
|
115
|
+
expected_message = {"a" => 2}
|
116
|
+
now = event_time
|
117
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
118
|
+
sleep 2
|
119
|
+
d = create_driver(config(default_topic: TOPIC_NAME) + config_element('ROOT', '', {"use_event_time" => true}))
|
120
|
+
d.run do
|
121
|
+
d.feed("test", now, expected_message)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
actual_time = target_driver.events.collect { |event| event[1] }.last
|
125
|
+
assert_in_delta(actual_time, now, 0.001) # expects millseconds precision
|
126
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
127
|
+
assert_equal([expected_message], actual_messages)
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_exclude_fields
|
131
|
+
conf = config(default_topic: TOPIC_NAME) +
|
132
|
+
config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
|
133
|
+
target_driver = create_target_driver
|
134
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
135
|
+
sleep 2
|
136
|
+
d = create_driver(conf)
|
137
|
+
d.run do
|
138
|
+
d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
|
139
|
+
end
|
140
|
+
end
|
141
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
142
|
+
assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_max_enqueue_bytes_per_second
|
146
|
+
conf = config(default_topic: TOPIC_NAME) +
|
147
|
+
config_element('ROOT', '', {"max_enqueue_bytes_per_second" => 32 * 3}, [])
|
148
|
+
target_driver = create_target_driver
|
149
|
+
expected_messages = []
|
150
|
+
target_driver.run(expect_records: 9, timeout: 10) do
|
151
|
+
sleep 2
|
152
|
+
d = create_driver(conf)
|
153
|
+
start_time = Fluent::Clock.now
|
154
|
+
d.run do
|
155
|
+
9.times do |i|
|
156
|
+
message = {"message" => "32bytes message: #{i}"}
|
157
|
+
d.feed("test", event_time, message)
|
158
|
+
expected_messages << message
|
159
|
+
end
|
160
|
+
end
|
161
|
+
assert_in_delta(2.0, Fluent::Clock.now - start_time, 0.5)
|
162
|
+
end
|
163
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
164
|
+
assert_equal(expected_messages, actual_messages)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidemasa Togashi
|
8
8
|
- Masahiro Nakagawa
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-01-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -173,11 +173,12 @@ files:
|
|
173
173
|
- test/plugin/test_out_kafka.rb
|
174
174
|
- test/plugin/test_out_kafka2.rb
|
175
175
|
- test/plugin/test_out_kafka_buffered.rb
|
176
|
+
- test/plugin/test_out_rdkafka2.rb
|
176
177
|
homepage: https://github.com/fluent/fluent-plugin-kafka
|
177
178
|
licenses:
|
178
179
|
- Apache-2.0
|
179
180
|
metadata: {}
|
180
|
-
post_install_message:
|
181
|
+
post_install_message:
|
181
182
|
rdoc_options: []
|
182
183
|
require_paths:
|
183
184
|
- lib
|
@@ -192,8 +193,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
192
193
|
- !ruby/object:Gem::Version
|
193
194
|
version: '0'
|
194
195
|
requirements: []
|
195
|
-
rubygems_version: 3.
|
196
|
-
signing_key:
|
196
|
+
rubygems_version: 3.2.5
|
197
|
+
signing_key:
|
197
198
|
specification_version: 4
|
198
199
|
summary: Fluentd plugin for Apache Kafka > 0.8
|
199
200
|
test_files:
|
@@ -204,3 +205,4 @@ test_files:
|
|
204
205
|
- test/plugin/test_out_kafka.rb
|
205
206
|
- test/plugin/test_out_kafka2.rb
|
206
207
|
- test/plugin/test_out_kafka_buffered.rb
|
208
|
+
- test/plugin/test_out_rdkafka2.rb
|