fluent-plugin-kafka 0.17.0 → 0.17.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +2 -0
- data/ChangeLog +16 -0
- data/Gemfile +2 -0
- data/README.md +30 -5
- data/fluent-plugin-kafka.gemspec +1 -1
- data/lib/fluent/plugin/in_kafka.rb +1 -0
- data/lib/fluent/plugin/in_kafka_group.rb +3 -0
- data/lib/fluent/plugin/out_kafka.rb +3 -1
- data/lib/fluent/plugin/out_kafka2.rb +15 -2
- data/lib/fluent/plugin/out_kafka_buffered.rb +3 -1
- data/lib/fluent/plugin/out_rdkafka.rb +20 -12
- data/lib/fluent/plugin/out_rdkafka2.rb +111 -12
- data/test/plugin/test_in_kafka_group.rb +2 -0
- data/test/plugin/test_out_kafka2.rb +59 -3
- data/test/plugin/test_out_rdkafka2.rb +167 -0
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5cc122034295e37318cd7510ef3347eeda14cc43b8c0132053cb944d68141feb
|
4
|
+
data.tar.gz: e725b07eaa95f639b2122f1a4c8342101314f2f721e3625c73889dc8caf9aead
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e432e7f2670132022b18fa9460b8eda69a18a4dd3a35aa775619c6a45ff8cb6ea5bad869ebc5cefe804b9bb4261ab12150cb77ad10af62dc2e54fd6de435aec
|
7
|
+
data.tar.gz: edbebd57c325292d197d342ff8f5151aa1fcfbd47128fc09d1b71e2bf4d7ccf196d54b48df106f1b9f655fe334ab1f9fe907fce8b4f1b1d20edd9c8254c6c8cd
|
data/.github/workflows/linux.yml
CHANGED
data/ChangeLog
CHANGED
@@ -1,3 +1,19 @@
|
|
1
|
+
Release 0.17.4 - 2022/01/25
|
2
|
+
* in_kafka_group: Add `refresh_topic_interval` parameter
|
3
|
+
|
4
|
+
Release 0.17.3 - 2021/11/26
|
5
|
+
* output: Suppress large warning logs for events skipped by `max_send_limit_bytes`
|
6
|
+
|
7
|
+
Release 0.17.2 - 2021/10/14
|
8
|
+
* out_rdkafka2: Add `max_enqueue_bytes_per_second` parameter
|
9
|
+
* out_rdkafka2: Support `use_event_time` parameter
|
10
|
+
* out_rdkafka2: Fix a potential bug that the plugin might exit without receiving responses from Kafka.
|
11
|
+
|
12
|
+
Release 0.17.1 - 2021/09/24
|
13
|
+
* out_rdkafka/out_rdkafka2: Support rdkafka 0.9.0 or later
|
14
|
+
* out_rdkafka/out_rdkafka2: Add `exclude_fields` parameter
|
15
|
+
* out_kafka2.rb: Fix one more Ruby 3.0 keyword arguments issue
|
16
|
+
|
1
17
|
Release 0.17.0 - 2021/08/30
|
2
18
|
* out_kafka/out_kafka_buffered/out_kafka2: Provide murmur2 partitioner hash function choice
|
3
19
|
* in_kafka/in_kafka_group/out_kafka/out_kafka_buffered/out_kafka2: Use Ruby Kafka's ssl_ca_cert_file_path parameter to feed the CA certs
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -40,14 +40,14 @@ If you want to use zookeeper related parameters, you also need to install zookee
|
|
40
40
|
|
41
41
|
Set path to SSL related files. See [Encryption and Authentication using SSL](https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl) for more detail.
|
42
42
|
|
43
|
-
#### SASL authentication
|
43
|
+
#### SASL authentication
|
44
44
|
|
45
45
|
##### with GSSAPI
|
46
46
|
|
47
47
|
- principal
|
48
48
|
- keytab
|
49
49
|
|
50
|
-
Set principal and path to keytab for SASL/GSSAPI authentication.
|
50
|
+
Set principal and path to keytab for SASL/GSSAPI authentication.
|
51
51
|
See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
|
52
52
|
|
53
53
|
##### with Plain/SCRAM
|
@@ -57,7 +57,7 @@ See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentica
|
|
57
57
|
- scram_mechanism
|
58
58
|
- sasl_over_ssl
|
59
59
|
|
60
|
-
Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
|
60
|
+
Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
|
61
61
|
See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
|
62
62
|
|
63
63
|
### Input plugin (@type 'kafka')
|
@@ -119,7 +119,7 @@ Consume events by kafka consumer group features..
|
|
119
119
|
topics <listening topics(separate with comma',')>
|
120
120
|
format <input text type (text|json|ltsv|msgpack)> :default => json
|
121
121
|
message_key <key (Optional, for text format only, default is message)>
|
122
|
-
|
122
|
+
kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
|
123
123
|
add_headers <If true, add kafka's message headers to record>
|
124
124
|
add_prefix <tag prefix (Optional)>
|
125
125
|
add_suffix <tag suffix (Optional)>
|
@@ -135,6 +135,7 @@ Consume events by kafka consumer group features..
|
|
135
135
|
offset_commit_interval (integer) :default => nil (Use default of ruby-kafka)
|
136
136
|
offset_commit_threshold (integer) :default => nil (Use default of ruby-kafka)
|
137
137
|
fetcher_max_queue_size (integer) :default => nil (Use default of ruby-kafka)
|
138
|
+
refresh_topic_interval (integer) :default => nil (Use default of ruby-kafka)
|
138
139
|
start_from_beginning (bool) :default => true
|
139
140
|
</source>
|
140
141
|
|
@@ -155,7 +156,7 @@ With the introduction of the rdkafka-ruby based input plugin we hope to support
|
|
155
156
|
topics <listening topics(separate with comma',')>
|
156
157
|
format <input text type (text|json|ltsv|msgpack)> :default => json
|
157
158
|
message_key <key (Optional, for text format only, default is message)>
|
158
|
-
|
159
|
+
kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
|
159
160
|
add_headers <If true, add kafka's message headers to record>
|
160
161
|
add_prefix <tag prefix (Optional)>
|
161
162
|
add_suffix <tag suffix (Optional)>
|
@@ -200,6 +201,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
|
|
200
201
|
get_kafka_client_log (bool) :default => false
|
201
202
|
headers (hash) :default => {}
|
202
203
|
headers_from_record (hash) :default => {}
|
204
|
+
use_event_time (bool) :default => false
|
203
205
|
use_default_for_unknown_topic (bool) :default => false
|
204
206
|
discard_kafka_delivery_failed (bool) :default => false (No discard)
|
205
207
|
partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
|
@@ -316,6 +318,23 @@ The Kafka message will have a header of source_ip=12.7.0.0.1.
|
|
316
318
|
|
317
319
|
The configuration format is jsonpath. It is descibed in https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor
|
318
320
|
|
321
|
+
#### Excluding fields
|
322
|
+
Fields can be excluded from output data. Only works for kafka2 and rdkafka2 output plugin.
|
323
|
+
|
324
|
+
Fields must be specified using an array of dot notation `$.`, for example:
|
325
|
+
|
326
|
+
<match app.**>
|
327
|
+
@type kafka2
|
328
|
+
[...]
|
329
|
+
exclude_fields $.source.ip,$.HTTP_FOO
|
330
|
+
<match>
|
331
|
+
|
332
|
+
This config can be used to remove fields used on another configs.
|
333
|
+
|
334
|
+
For example, `$.source.ip` can be extracted with config `headers_from_record` and excluded from message payload.
|
335
|
+
|
336
|
+
> Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
|
337
|
+
|
319
338
|
### Buffered output plugin
|
320
339
|
|
321
340
|
This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
|
@@ -346,6 +365,7 @@ Support of fluentd v0.12 has ended. `kafka_buffered` will be an alias of `kafka2
|
|
346
365
|
exclude_topic_key (bool) :default => false
|
347
366
|
exclude_partition_key (bool) :default => false
|
348
367
|
get_kafka_client_log (bool) :default => false
|
368
|
+
use_event_time (bool) :default => false
|
349
369
|
partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
|
350
370
|
|
351
371
|
# See fluentd document for buffer related parameters: https://docs.fluentd.org/v/0.12/buffer
|
@@ -435,6 +455,7 @@ You need to install rdkafka gem.
|
|
435
455
|
exclude_topic_key (bool) :default => false
|
436
456
|
exclude_partition_key (bool) :default => false
|
437
457
|
discard_kafka_delivery_failed (bool) :default => false (No discard)
|
458
|
+
use_event_time (bool) :default => false
|
438
459
|
|
439
460
|
# same with kafka2
|
440
461
|
headers (hash) :default => {}
|
@@ -469,6 +490,10 @@ You need to install rdkafka gem.
|
|
469
490
|
rdkafka_delivery_handle_poll_timeout (integer) :default => 30
|
470
491
|
# If the record size is larger than this value, such records are ignored. Default is no limit
|
471
492
|
max_send_limit_bytes (integer) :default => nil
|
493
|
+
# The maximum number of enqueueing bytes per second. It can reduce the
|
494
|
+
# load of both Fluentd and Kafka when excessive messages are attempted
|
495
|
+
# to send. Default is no limit.
|
496
|
+
max_enqueue_bytes_per_second (integer) :default => nil
|
472
497
|
</match>
|
473
498
|
|
474
499
|
If you use v0.12, use `rdkafka` instead.
|
data/fluent-plugin-kafka.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
14
|
gem.name = "fluent-plugin-kafka"
|
15
15
|
gem.require_paths = ["lib"]
|
16
|
-
gem.version = '0.17.
|
16
|
+
gem.version = '0.17.4'
|
17
17
|
gem.required_ruby_version = ">= 2.1.0"
|
18
18
|
|
19
19
|
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
@@ -67,6 +67,8 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
67
67
|
:desc => "The number of messages that can be processed before their offsets are committed"
|
68
68
|
config_param :fetcher_max_queue_size, :integer, :default => nil,
|
69
69
|
:desc => "The number of fetched messages per partition that are queued in fetcher queue"
|
70
|
+
config_param :refresh_topic_interval, :integer, :default => nil,
|
71
|
+
:desc => "The interval of refreshing the topic list in seconds. Zero or unset disables this"
|
70
72
|
config_param :start_from_beginning, :bool, :default => true,
|
71
73
|
:desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
|
72
74
|
|
@@ -128,6 +130,7 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
128
130
|
@consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
|
129
131
|
@consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
|
130
132
|
@consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
|
133
|
+
@consumer_opts[:refresh_topic_interval] = @refresh_topic_interval if @refresh_topic_interval
|
131
134
|
|
132
135
|
@fetch_opts = {}
|
133
136
|
@fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
|
@@ -88,6 +88,7 @@ DESC
|
|
88
88
|
require 'kafka'
|
89
89
|
|
90
90
|
@kafka = nil
|
91
|
+
@field_separator = nil
|
91
92
|
end
|
92
93
|
|
93
94
|
def refresh_client
|
@@ -239,7 +240,8 @@ DESC
|
|
239
240
|
record_buf = @formatter_proc.call(tag, time, record)
|
240
241
|
record_buf_bytes = record_buf.bytesize
|
241
242
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
242
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
243
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
244
|
+
log.debug "Skipped event:", :record => record
|
243
245
|
next
|
244
246
|
end
|
245
247
|
log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
|
@@ -42,6 +42,8 @@ DESC
|
|
42
42
|
:desc => 'Set true to remove message key from data'
|
43
43
|
config_param :exclude_topic_key, :bool, :default => false,
|
44
44
|
:desc => 'Set true to remove topic name key from data'
|
45
|
+
config_param :exclude_fields, :array, :default => [], value_type: :string,
|
46
|
+
:desc => 'Fields to remove from data where the value is a jsonpath to a record value'
|
45
47
|
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for kafka create_time'
|
46
48
|
config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
|
47
49
|
:desc => 'Kafka message headers'
|
@@ -177,6 +179,10 @@ DESC
|
|
177
179
|
@headers_from_record.each do |key, value|
|
178
180
|
@headers_from_record_accessors[key] = record_accessor_create(value)
|
179
181
|
end
|
182
|
+
|
183
|
+
@exclude_field_accessors = @exclude_fields.map do |field|
|
184
|
+
record_accessor_create(field)
|
185
|
+
end
|
180
186
|
end
|
181
187
|
|
182
188
|
def multi_workers_ready?
|
@@ -235,7 +241,7 @@ DESC
|
|
235
241
|
mutate_headers = !@headers_from_record_accessors.empty?
|
236
242
|
|
237
243
|
begin
|
238
|
-
producer = @kafka.topic_producer(topic,
|
244
|
+
producer = @kafka.topic_producer(topic, **@producer_opts)
|
239
245
|
chunk.msgpack_each { |time, record|
|
240
246
|
begin
|
241
247
|
record = inject_values_to_record(tag, time, record)
|
@@ -253,10 +259,17 @@ DESC
|
|
253
259
|
headers = base_headers
|
254
260
|
end
|
255
261
|
|
262
|
+
unless @exclude_fields.empty?
|
263
|
+
@exclude_field_accessors.each do |exclude_field_accessor|
|
264
|
+
exclude_field_accessor.delete(record)
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
256
268
|
record_buf = @formatter_proc.call(tag, time, record)
|
257
269
|
record_buf_bytes = record_buf.bytesize
|
258
270
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
259
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
271
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
272
|
+
log.debug "Skipped event:", :record => record
|
260
273
|
next
|
261
274
|
end
|
262
275
|
rescue StandardError => e
|
@@ -107,6 +107,7 @@ DESC
|
|
107
107
|
@kafka = nil
|
108
108
|
@producers = {}
|
109
109
|
@producers_mutex = Mutex.new
|
110
|
+
@field_separator = nil
|
110
111
|
end
|
111
112
|
|
112
113
|
def multi_workers_ready?
|
@@ -331,7 +332,8 @@ DESC
|
|
331
332
|
record_buf = @formatter_proc.call(tag, time, record)
|
332
333
|
record_buf_bytes = record_buf.bytesize
|
333
334
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
334
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
335
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
336
|
+
log.debug "Skipped event:", :record => record
|
335
337
|
next
|
336
338
|
end
|
337
339
|
rescue StandardError => e
|
@@ -65,6 +65,7 @@ DESC
|
|
65
65
|
The codec the producer uses to compress messages.
|
66
66
|
Supported codecs: (gzip|snappy)
|
67
67
|
DESC
|
68
|
+
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
|
68
69
|
config_param :max_send_limit_bytes, :size, :default => nil
|
69
70
|
config_param :rdkafka_buffering_max_ms, :integer, :default => nil
|
70
71
|
config_param :rdkafka_buffering_max_messages, :integer, :default => nil
|
@@ -91,23 +92,29 @@ DESC
|
|
91
92
|
def configure(conf)
|
92
93
|
super
|
93
94
|
log.instance_eval {
|
94
|
-
def add(level,
|
95
|
-
|
95
|
+
def add(level, message = nil)
|
96
|
+
if message.nil?
|
97
|
+
if block_given?
|
98
|
+
message = yield
|
99
|
+
else
|
100
|
+
return
|
101
|
+
end
|
102
|
+
end
|
96
103
|
|
97
104
|
# Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
|
98
105
|
case level
|
99
106
|
when Logger::FATAL
|
100
|
-
self.fatal(
|
107
|
+
self.fatal(message)
|
101
108
|
when Logger::ERROR
|
102
|
-
self.error(
|
109
|
+
self.error(message)
|
103
110
|
when Logger::WARN
|
104
|
-
self.warn(
|
111
|
+
self.warn(message)
|
105
112
|
when Logger::INFO
|
106
|
-
self.info(
|
113
|
+
self.info(message)
|
107
114
|
when Logger::DEBUG
|
108
|
-
self.debug(
|
115
|
+
self.debug(message)
|
109
116
|
else
|
110
|
-
self.trace(
|
117
|
+
self.trace(message)
|
111
118
|
end
|
112
119
|
end
|
113
120
|
}
|
@@ -271,7 +278,8 @@ DESC
|
|
271
278
|
record_buf = @formatter_proc.call(tag, time, record)
|
272
279
|
record_buf_bytes = record_buf.bytesize
|
273
280
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
274
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
281
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
282
|
+
log.debug "Skipped event:", :record => record
|
275
283
|
next
|
276
284
|
end
|
277
285
|
rescue StandardError => e
|
@@ -280,7 +288,7 @@ DESC
|
|
280
288
|
end
|
281
289
|
|
282
290
|
producer = get_producer
|
283
|
-
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
|
291
|
+
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
|
284
292
|
handler
|
285
293
|
}.each { |handler|
|
286
294
|
handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
|
@@ -292,11 +300,11 @@ DESC
|
|
292
300
|
raise e
|
293
301
|
end
|
294
302
|
|
295
|
-
def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
|
303
|
+
def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
|
296
304
|
attempt = 0
|
297
305
|
loop do
|
298
306
|
begin
|
299
|
-
handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
|
307
|
+
handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
|
300
308
|
return handler
|
301
309
|
rescue Exception => e
|
302
310
|
if e.respond_to?(:code) && e.code == :queue_full
|
@@ -56,6 +56,8 @@ DESC
|
|
56
56
|
:desc => <<-DESC
|
57
57
|
Set true to remove topic key from data
|
58
58
|
DESC
|
59
|
+
config_param :exclude_fields, :array, :default => [], value_type: :string,
|
60
|
+
:desc => 'Fields to remove from data where the value is a jsonpath to a record value'
|
59
61
|
config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
|
60
62
|
:desc => 'Kafka message headers'
|
61
63
|
config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
|
@@ -72,6 +74,7 @@ DESC
|
|
72
74
|
The codec the producer uses to compress messages. Used for compression.codec
|
73
75
|
Supported codecs: (gzip|snappy)
|
74
76
|
DESC
|
77
|
+
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
|
75
78
|
config_param :max_send_limit_bytes, :size, :default => nil
|
76
79
|
config_param :discard_kafka_delivery_failed, :bool, :default => false
|
77
80
|
config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
|
@@ -84,6 +87,7 @@ DESC
|
|
84
87
|
|
85
88
|
config_param :max_enqueue_retries, :integer, :default => 3
|
86
89
|
config_param :enqueue_retry_backoff, :integer, :default => 3
|
90
|
+
config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
|
87
91
|
|
88
92
|
config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
|
89
93
|
config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
|
@@ -99,34 +103,96 @@ DESC
|
|
99
103
|
include Fluent::KafkaPluginUtil::SSLSettings
|
100
104
|
include Fluent::KafkaPluginUtil::SaslSettings
|
101
105
|
|
106
|
+
class EnqueueRate
|
107
|
+
class LimitExceeded < StandardError
|
108
|
+
attr_reader :next_retry_clock
|
109
|
+
def initialize(next_retry_clock)
|
110
|
+
@next_retry_clock = next_retry_clock
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def initialize(limit_bytes_per_second)
|
115
|
+
@mutex = Mutex.new
|
116
|
+
@start_clock = Fluent::Clock.now
|
117
|
+
@bytes_per_second = 0
|
118
|
+
@limit_bytes_per_second = limit_bytes_per_second
|
119
|
+
@commits = {}
|
120
|
+
end
|
121
|
+
|
122
|
+
def raise_if_limit_exceeded(bytes_to_enqueue)
|
123
|
+
return if @limit_bytes_per_second.nil?
|
124
|
+
|
125
|
+
@mutex.synchronize do
|
126
|
+
@commits[Thread.current] = {
|
127
|
+
clock: Fluent::Clock.now,
|
128
|
+
bytesize: bytes_to_enqueue,
|
129
|
+
}
|
130
|
+
|
131
|
+
@bytes_per_second += @commits[Thread.current][:bytesize]
|
132
|
+
duration = @commits[Thread.current][:clock] - @start_clock
|
133
|
+
|
134
|
+
if duration < 1.0
|
135
|
+
if @bytes_per_second > @limit_bytes_per_second
|
136
|
+
raise LimitExceeded.new(@start_clock + 1.0)
|
137
|
+
end
|
138
|
+
else
|
139
|
+
@start_clock = @commits[Thread.current][:clock]
|
140
|
+
@bytes_per_second = @commits[Thread.current][:bytesize]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def revert
|
146
|
+
return if @limit_bytes_per_second.nil?
|
147
|
+
|
148
|
+
@mutex.synchronize do
|
149
|
+
return unless @commits[Thread.current]
|
150
|
+
return unless @commits[Thread.current][:clock]
|
151
|
+
if @commits[Thread.current][:clock] >= @start_clock
|
152
|
+
@bytes_per_second -= @commits[Thread.current][:bytesize]
|
153
|
+
end
|
154
|
+
@commits[Thread.current] = nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
102
159
|
def initialize
|
103
160
|
super
|
104
161
|
|
105
162
|
@producers = nil
|
106
163
|
@producers_mutex = nil
|
107
164
|
@shared_producer = nil
|
165
|
+
@enqueue_rate = nil
|
166
|
+
@writing_threads_mutex = Mutex.new
|
167
|
+
@writing_threads = Set.new
|
108
168
|
end
|
109
169
|
|
110
170
|
def configure(conf)
|
111
171
|
super
|
112
172
|
log.instance_eval {
|
113
|
-
def add(level,
|
114
|
-
|
173
|
+
def add(level, message = nil)
|
174
|
+
if message.nil?
|
175
|
+
if block_given?
|
176
|
+
message = yield
|
177
|
+
else
|
178
|
+
return
|
179
|
+
end
|
180
|
+
end
|
115
181
|
|
116
182
|
# Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
|
117
183
|
case level
|
118
184
|
when Logger::FATAL
|
119
|
-
self.fatal(
|
185
|
+
self.fatal(message)
|
120
186
|
when Logger::ERROR
|
121
|
-
self.error(
|
187
|
+
self.error(message)
|
122
188
|
when Logger::WARN
|
123
|
-
self.warn(
|
189
|
+
self.warn(message)
|
124
190
|
when Logger::INFO
|
125
|
-
self.info(
|
191
|
+
self.info(message)
|
126
192
|
when Logger::DEBUG
|
127
|
-
self.debug(
|
193
|
+
self.debug(message)
|
128
194
|
else
|
129
|
-
self.trace(
|
195
|
+
self.trace(message)
|
130
196
|
end
|
131
197
|
end
|
132
198
|
}
|
@@ -158,6 +224,12 @@ DESC
|
|
158
224
|
@headers_from_record.each do |key, value|
|
159
225
|
@headers_from_record_accessors[key] = record_accessor_create(value)
|
160
226
|
end
|
227
|
+
|
228
|
+
@exclude_field_accessors = @exclude_fields.map do |field|
|
229
|
+
record_accessor_create(field)
|
230
|
+
end
|
231
|
+
|
232
|
+
@enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
|
161
233
|
end
|
162
234
|
|
163
235
|
def build_config
|
@@ -221,8 +293,19 @@ DESC
|
|
221
293
|
true
|
222
294
|
end
|
223
295
|
|
296
|
+
def wait_writing_threads
|
297
|
+
done = false
|
298
|
+
until done do
|
299
|
+
@writing_threads_mutex.synchronize do
|
300
|
+
done = true if @writing_threads.empty?
|
301
|
+
end
|
302
|
+
sleep(1) unless done
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
224
306
|
def shutdown
|
225
307
|
super
|
308
|
+
wait_writing_threads
|
226
309
|
shutdown_producers
|
227
310
|
end
|
228
311
|
|
@@ -279,6 +362,7 @@ DESC
|
|
279
362
|
end
|
280
363
|
|
281
364
|
def write(chunk)
|
365
|
+
@writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
|
282
366
|
tag = chunk.metadata.tag
|
283
367
|
topic = if @topic
|
284
368
|
extract_placeholders(@topic, chunk)
|
@@ -305,10 +389,17 @@ DESC
|
|
305
389
|
headers[key] = header_accessor.call(record)
|
306
390
|
end
|
307
391
|
|
392
|
+
unless @exclude_fields.empty?
|
393
|
+
@exclude_field_accessors.each do |exclude_field_acessor|
|
394
|
+
exclude_field_acessor.delete(record)
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
308
398
|
record_buf = @formatter_proc.call(tag, time, record)
|
309
399
|
record_buf_bytes = record_buf.bytesize
|
310
400
|
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
311
|
-
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :
|
401
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
402
|
+
log.debug "Skipped event:", :record => record
|
312
403
|
next
|
313
404
|
end
|
314
405
|
rescue StandardError => e
|
@@ -316,7 +407,7 @@ DESC
|
|
316
407
|
next
|
317
408
|
end
|
318
409
|
|
319
|
-
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
|
410
|
+
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
|
320
411
|
if @rdkafka_delivery_handle_poll_timeout != 0
|
321
412
|
handlers << handler
|
322
413
|
end
|
@@ -333,14 +424,22 @@ DESC
|
|
333
424
|
# Raise exception to retry sendind messages
|
334
425
|
raise e
|
335
426
|
end
|
427
|
+
ensure
|
428
|
+
@writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
|
336
429
|
end
|
337
430
|
|
338
|
-
def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
|
431
|
+
def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
|
339
432
|
attempt = 0
|
340
433
|
loop do
|
341
434
|
begin
|
342
|
-
|
435
|
+
@enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
|
436
|
+
return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
|
437
|
+
rescue EnqueueRate::LimitExceeded => e
|
438
|
+
@enqueue_rate.revert if @enqueue_rate
|
439
|
+
duration = e.next_retry_clock - Fluent::Clock.now
|
440
|
+
sleep(duration) if duration > 0.0
|
343
441
|
rescue Exception => e
|
442
|
+
@enqueue_rate.revert if @enqueue_rate
|
344
443
|
if e.respond_to?(:code) && e.code == :queue_full
|
345
444
|
if attempt <= @max_enqueue_retries
|
346
445
|
log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
|
@@ -14,6 +14,7 @@ class KafkaGroupInputTest < Test::Unit::TestCase
|
|
14
14
|
brokers localhost:9092
|
15
15
|
consumer_group fluentd
|
16
16
|
format text
|
17
|
+
refresh_topic_interval 0
|
17
18
|
@label @kafka
|
18
19
|
topics #{TOPIC_NAME}
|
19
20
|
]
|
@@ -52,6 +53,7 @@ class KafkaGroupInputTest < Test::Unit::TestCase
|
|
52
53
|
brokers localhost:9092
|
53
54
|
format text
|
54
55
|
@label @kafka
|
56
|
+
refresh_topic_interval 0
|
55
57
|
topics #{TOPIC_NAME}
|
56
58
|
]
|
57
59
|
d = create_driver
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'helper'
|
2
2
|
require 'fluent/test/helpers'
|
3
|
-
require 'fluent/
|
3
|
+
require 'fluent/test/driver/input'
|
4
|
+
require 'fluent/test/driver/output'
|
5
|
+
require 'securerandom'
|
4
6
|
|
5
7
|
class Kafka2OutputTest < Test::Unit::TestCase
|
6
8
|
include Fluent::Test::Helpers
|
@@ -15,8 +17,8 @@ class Kafka2OutputTest < Test::Unit::TestCase
|
|
15
17
|
])
|
16
18
|
end
|
17
19
|
|
18
|
-
def config
|
19
|
-
base_config + config_element('ROOT', '', {"default_topic" =>
|
20
|
+
def config(default_topic: "kitagawakeiko")
|
21
|
+
base_config + config_element('ROOT', '', {"default_topic" => default_topic,
|
20
22
|
"brokers" => "localhost:9092"}, [
|
21
23
|
])
|
22
24
|
end
|
@@ -57,4 +59,58 @@ class Kafka2OutputTest < Test::Unit::TestCase
|
|
57
59
|
d = create_driver
|
58
60
|
assert_equal true, d.instance.multi_workers_ready?
|
59
61
|
end
|
62
|
+
|
63
|
+
class WriteTest < self
|
64
|
+
TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
|
65
|
+
|
66
|
+
INPUT_CONFIG = %[
|
67
|
+
@type kafka
|
68
|
+
brokers localhost:9092
|
69
|
+
format json
|
70
|
+
@label @kafka
|
71
|
+
topics #{TOPIC_NAME}
|
72
|
+
]
|
73
|
+
|
74
|
+
def create_target_driver(conf = INPUT_CONFIG)
|
75
|
+
Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
|
76
|
+
end
|
77
|
+
|
78
|
+
def setup
|
79
|
+
@kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
|
80
|
+
end
|
81
|
+
|
82
|
+
def teardown
|
83
|
+
@kafka.delete_topic(TOPIC_NAME)
|
84
|
+
@kafka.close
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_write
|
88
|
+
target_driver = create_target_driver
|
89
|
+
expected_message = {"a" => 2}
|
90
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
91
|
+
sleep 2
|
92
|
+
d = create_driver(config(default_topic: TOPIC_NAME))
|
93
|
+
d.run do
|
94
|
+
d.feed("test", event_time, expected_message)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
98
|
+
assert_equal([expected_message], actual_messages)
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_exclude_fields
|
102
|
+
conf = config(default_topic: TOPIC_NAME) +
|
103
|
+
config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
|
104
|
+
target_driver = create_target_driver
|
105
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
106
|
+
sleep 2
|
107
|
+
d = create_driver(conf)
|
108
|
+
d.run do
|
109
|
+
d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
|
110
|
+
end
|
111
|
+
end
|
112
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
113
|
+
assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
|
114
|
+
end
|
115
|
+
end
|
60
116
|
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'fluent/test/helpers'
|
3
|
+
require 'fluent/test/driver/input'
|
4
|
+
require 'fluent/test/driver/output'
|
5
|
+
require 'securerandom'
|
6
|
+
|
7
|
+
class Rdkafka2OutputTest < Test::Unit::TestCase
|
8
|
+
include Fluent::Test::Helpers
|
9
|
+
|
10
|
+
def have_rdkafka
|
11
|
+
begin
|
12
|
+
require 'fluent/plugin/out_rdkafka2'
|
13
|
+
true
|
14
|
+
rescue LoadError
|
15
|
+
false
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def setup
|
20
|
+
omit_unless(have_rdkafka, "rdkafka isn't installed")
|
21
|
+
Fluent::Test.setup
|
22
|
+
end
|
23
|
+
|
24
|
+
def base_config
|
25
|
+
config_element('ROOT', '', {"@type" => "rdkafka2"}, [
|
26
|
+
config_element('format', "", {"@type" => "json"})
|
27
|
+
])
|
28
|
+
end
|
29
|
+
|
30
|
+
def config(default_topic: "kitagawakeiko")
|
31
|
+
base_config + config_element('ROOT', '', {"default_topic" => default_topic,
|
32
|
+
"brokers" => "localhost:9092"}, [
|
33
|
+
])
|
34
|
+
end
|
35
|
+
|
36
|
+
def create_driver(conf = config, tag='test')
|
37
|
+
Fluent::Test::Driver::Output.new(Fluent::Rdkafka2Output).configure(conf)
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_configure
|
41
|
+
assert_nothing_raised(Fluent::ConfigError) {
|
42
|
+
create_driver(base_config)
|
43
|
+
}
|
44
|
+
|
45
|
+
assert_nothing_raised(Fluent::ConfigError) {
|
46
|
+
create_driver(config)
|
47
|
+
}
|
48
|
+
|
49
|
+
assert_nothing_raised(Fluent::ConfigError) {
|
50
|
+
create_driver(config + config_element('buffer', "", {"@type" => "memory"}))
|
51
|
+
}
|
52
|
+
|
53
|
+
d = create_driver
|
54
|
+
assert_equal 'kitagawakeiko', d.instance.default_topic
|
55
|
+
assert_equal 'localhost:9092', d.instance.brokers
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_mutli_worker_support
|
59
|
+
d = create_driver
|
60
|
+
assert_equal true, d.instance.multi_workers_ready?
|
61
|
+
end
|
62
|
+
|
63
|
+
class WriteTest < self
|
64
|
+
TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
|
65
|
+
|
66
|
+
INPUT_CONFIG = %[
|
67
|
+
@type kafka
|
68
|
+
brokers localhost:9092
|
69
|
+
format json
|
70
|
+
@label @kafka
|
71
|
+
topics #{TOPIC_NAME}
|
72
|
+
]
|
73
|
+
|
74
|
+
def create_target_driver(conf = INPUT_CONFIG)
|
75
|
+
Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
|
76
|
+
end
|
77
|
+
|
78
|
+
def setup
|
79
|
+
@kafka = nil
|
80
|
+
omit_unless(have_rdkafka, "rdkafka isn't installed")
|
81
|
+
@kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
|
82
|
+
end
|
83
|
+
|
84
|
+
def teardown
|
85
|
+
if @kafka
|
86
|
+
@kafka.delete_topic(TOPIC_NAME)
|
87
|
+
@kafka.close
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_write
|
92
|
+
target_driver = create_target_driver
|
93
|
+
expected_message = {"a" => 2}
|
94
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
95
|
+
sleep 2
|
96
|
+
d = create_driver(config(default_topic: TOPIC_NAME))
|
97
|
+
d.run do
|
98
|
+
d.feed("test", event_time, expected_message)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
102
|
+
assert_equal([expected_message], actual_messages)
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_write_with_use_event_time
|
106
|
+
input_config = %[
|
107
|
+
@type kafka
|
108
|
+
brokers localhost:9092
|
109
|
+
format json
|
110
|
+
@label @kafka
|
111
|
+
topics #{TOPIC_NAME}
|
112
|
+
time_source kafka
|
113
|
+
]
|
114
|
+
target_driver = create_target_driver(input_config)
|
115
|
+
expected_message = {"a" => 2}
|
116
|
+
now = event_time
|
117
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
118
|
+
sleep 2
|
119
|
+
d = create_driver(config(default_topic: TOPIC_NAME) + config_element('ROOT', '', {"use_event_time" => true}))
|
120
|
+
d.run do
|
121
|
+
d.feed("test", now, expected_message)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
actual_time = target_driver.events.collect { |event| event[1] }.last
|
125
|
+
assert_in_delta(actual_time, now, 0.001) # expects millseconds precision
|
126
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
127
|
+
assert_equal([expected_message], actual_messages)
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_exclude_fields
|
131
|
+
conf = config(default_topic: TOPIC_NAME) +
|
132
|
+
config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
|
133
|
+
target_driver = create_target_driver
|
134
|
+
target_driver.run(expect_records: 1, timeout: 5) do
|
135
|
+
sleep 2
|
136
|
+
d = create_driver(conf)
|
137
|
+
d.run do
|
138
|
+
d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
|
139
|
+
end
|
140
|
+
end
|
141
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
142
|
+
assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_max_enqueue_bytes_per_second
|
146
|
+
conf = config(default_topic: TOPIC_NAME) +
|
147
|
+
config_element('ROOT', '', {"max_enqueue_bytes_per_second" => 32 * 3}, [])
|
148
|
+
target_driver = create_target_driver
|
149
|
+
expected_messages = []
|
150
|
+
target_driver.run(expect_records: 9, timeout: 10) do
|
151
|
+
sleep 2
|
152
|
+
d = create_driver(conf)
|
153
|
+
start_time = Fluent::Clock.now
|
154
|
+
d.run do
|
155
|
+
9.times do |i|
|
156
|
+
message = {"message" => "32bytes message: #{i}"}
|
157
|
+
d.feed("test", event_time, message)
|
158
|
+
expected_messages << message
|
159
|
+
end
|
160
|
+
end
|
161
|
+
assert_in_delta(2.0, Fluent::Clock.now - start_time, 0.5)
|
162
|
+
end
|
163
|
+
actual_messages = target_driver.events.collect { |event| event[2] }
|
164
|
+
assert_equal(expected_messages, actual_messages)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidemasa Togashi
|
8
8
|
- Masahiro Nakagawa
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-01-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -173,11 +173,12 @@ files:
|
|
173
173
|
- test/plugin/test_out_kafka.rb
|
174
174
|
- test/plugin/test_out_kafka2.rb
|
175
175
|
- test/plugin/test_out_kafka_buffered.rb
|
176
|
+
- test/plugin/test_out_rdkafka2.rb
|
176
177
|
homepage: https://github.com/fluent/fluent-plugin-kafka
|
177
178
|
licenses:
|
178
179
|
- Apache-2.0
|
179
180
|
metadata: {}
|
180
|
-
post_install_message:
|
181
|
+
post_install_message:
|
181
182
|
rdoc_options: []
|
182
183
|
require_paths:
|
183
184
|
- lib
|
@@ -192,8 +193,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
192
193
|
- !ruby/object:Gem::Version
|
193
194
|
version: '0'
|
194
195
|
requirements: []
|
195
|
-
rubygems_version: 3.
|
196
|
-
signing_key:
|
196
|
+
rubygems_version: 3.2.5
|
197
|
+
signing_key:
|
197
198
|
specification_version: 4
|
198
199
|
summary: Fluentd plugin for Apache Kafka > 0.8
|
199
200
|
test_files:
|
@@ -204,3 +205,4 @@ test_files:
|
|
204
205
|
- test/plugin/test_out_kafka.rb
|
205
206
|
- test/plugin/test_out_kafka2.rb
|
206
207
|
- test/plugin/test_out_kafka_buffered.rb
|
208
|
+
- test/plugin/test_out_rdkafka2.rb
|