fluent-plugin-kafka 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +24 -0
- data/README.md +42 -3
- data/fluent-plugin-kafka.gemspec +2 -2
- data/lib/fluent/plugin/in_kafka.rb +8 -5
- data/lib/fluent/plugin/in_kafka_group.rb +23 -6
- data/lib/fluent/plugin/in_rdkafka_group.rb +284 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +4 -2
- data/lib/fluent/plugin/out_kafka2.rb +17 -2
- data/lib/fluent/plugin/out_rdkafka2.rb +14 -4
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43c3a759f4636168c932c33f45c38105ebb522b5ea8222f1b1a7eceb53343348
|
4
|
+
data.tar.gz: c64a103244e721fa2de124f466f2480c960daafc713fd16f685ea4dd4a545a3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 707d92f2a23041b53daf6410d3fadb0e84053c4eb250b20c6dd3c72a15969273d2279b71950334187d156767bf6646a0af468a0f84e85ca683a34c127e47e363
|
7
|
+
data.tar.gz: 978883c8a72152bb6b9262ccea4e6b65b91bca1a3907ea43a7930cf7b4d414f1a9f47cb593d420738a48bd47d86451f376fe4cc6e7dec6b4f2c4e81ad5213d00
|
data/ChangeLog
CHANGED
@@ -1,3 +1,27 @@
|
|
1
|
+
Release 0.15.0 - 2020/09/14
|
2
|
+
|
3
|
+
* Add experimental `in_rdkafka_group`
|
4
|
+
* in_kafka: Expose `ssl_verify_hostname` parameter
|
5
|
+
|
6
|
+
Release 0.14.2 - 2020/08/26
|
7
|
+
|
8
|
+
* in_kafka_group: Add `add_headers` parameter
|
9
|
+
* out_kafka2/out_rdkafka2: Support `discard_kafka_delivery_failed` parameter
|
10
|
+
|
11
|
+
Release 0.14.1 - 2020/08/11
|
12
|
+
|
13
|
+
* kafka_producer_ext: Fix regression by v0.14.0 changes
|
14
|
+
|
15
|
+
Release 0.14.0 - 2020/08/07
|
16
|
+
|
17
|
+
* Update ruby-kafka dependency to v1.2.0 or later. Check https://github.com/zendesk/ruby-kafka#compatibility
|
18
|
+
* kafka_producer_ext: Follow Paritioner API change
|
19
|
+
|
20
|
+
Release 0.13.1 - 2020/07/17
|
21
|
+
|
22
|
+
* in_kafka_group: Support ssl_verify_hostname parameter
|
23
|
+
* out_kafka2/out_rdkafka2: Support topic parameter with placeholders
|
24
|
+
|
1
25
|
Release 0.13.0 - 2020/03/09
|
2
26
|
|
3
27
|
* Accept ruby-kafka v1 or later
|
data/README.md
CHANGED
@@ -118,10 +118,13 @@ Consume events by kafka consumer group features..
|
|
118
118
|
topics <listening topics(separate with comma',')>
|
119
119
|
format <input text type (text|json|ltsv|msgpack)> :default => json
|
120
120
|
message_key <key (Optional, for text format only, default is message)>
|
121
|
+
kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
|
122
|
+
add_headers <If true, add kafka's message headers to record>
|
121
123
|
add_prefix <tag prefix (Optional)>
|
122
124
|
add_suffix <tag suffix (Optional)>
|
123
125
|
retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
|
124
|
-
use_record_time <If true, replace event time with contents of 'time' field of fetched record>
|
126
|
+
use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
|
127
|
+
time_source <source for message timestamp (now|kafka|record)> :default => now
|
125
128
|
time_format <string (Optional when use_record_time is used)>
|
126
129
|
|
127
130
|
# ruby-kafka consumer options
|
@@ -138,9 +141,43 @@ See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-mes
|
|
138
141
|
|
139
142
|
Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
|
140
143
|
|
144
|
+
### Input plugin (@type 'rdkafka_group', supports kafka consumer groups, uses rdkafka-ruby)
|
145
|
+
|
146
|
+
:warning: **The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!**
|
147
|
+
|
148
|
+
With the introduction of the rdkafka-ruby based input plugin we hope to support Kafka brokers above version 2.1 where we saw [compatibility issues](https://github.com/fluent/fluent-plugin-kafka/issues/315) when using the ruby-kafka based @kafka_group input type. The rdkafka-ruby lib wraps the highly performant and production ready librdkafka C lib.
|
149
|
+
|
150
|
+
<source>
|
151
|
+
@type rdkafka_group
|
152
|
+
topics <listening topics(separate with comma',')>
|
153
|
+
format <input text type (text|json|ltsv|msgpack)> :default => json
|
154
|
+
message_key <key (Optional, for text format only, default is message)>
|
155
|
+
kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
|
156
|
+
add_headers <If true, add kafka's message headers to record>
|
157
|
+
add_prefix <tag prefix (Optional)>
|
158
|
+
add_suffix <tag suffix (Optional)>
|
159
|
+
retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
|
160
|
+
use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
|
161
|
+
time_source <source for message timestamp (now|kafka|record)> :default => now
|
162
|
+
time_format <string (Optional when use_record_time is used)>
|
163
|
+
|
164
|
+
# kafka consumer options
|
165
|
+
max_wait_time_ms 500
|
166
|
+
max_batch_size 10000
|
167
|
+
kafka_configs {
|
168
|
+
"bootstrap.servers": "brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>",
|
169
|
+
"group.id": "<consumer group name>"
|
170
|
+
}
|
171
|
+
</source>
|
172
|
+
|
173
|
+
See also [rdkafka-ruby](https://github.com/appsignal/rdkafka-ruby) and [librdkafka](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for more detailed documentation about Kafka consumer options.
|
174
|
+
|
175
|
+
Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
|
176
|
+
|
141
177
|
### Output plugin
|
142
178
|
|
143
|
-
This `kafka2` plugin is for fluentd v1
|
179
|
+
This `kafka2` plugin is for fluentd v1 or later. This plugin uses `ruby-kafka` producer for writing data.
|
180
|
+
If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin instead. This will be `out_kafka` plugin in the future.
|
144
181
|
|
145
182
|
<match app.**>
|
146
183
|
@type kafka2
|
@@ -161,6 +198,7 @@ This `kafka2` plugin is for fluentd v1.0 or later. This will be `out_kafka` plug
|
|
161
198
|
headers (hash) :default => {}
|
162
199
|
headers_from_record (hash) :default => {}
|
163
200
|
use_default_for_unknown_topic (bool) :default => false
|
201
|
+
discard_kafka_delivery_failed (bool) :default => false (No discard)
|
164
202
|
|
165
203
|
<format>
|
166
204
|
@type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
|
@@ -384,6 +422,7 @@ You need to install rdkafka gem.
|
|
384
422
|
default_message_key (string) :default => nil
|
385
423
|
exclude_topic_key (bool) :default => false
|
386
424
|
exclude_partition_key (bool) :default => false
|
425
|
+
discard_kafka_delivery_failed (bool) :default => false (No discard)
|
387
426
|
|
388
427
|
# same with kafka2
|
389
428
|
headers (hash) :default => {}
|
@@ -443,7 +482,7 @@ See ruby-kafka README for more details: https://github.com/zendesk/ruby-kafka#co
|
|
443
482
|
|
444
483
|
To avoid the problem, there are 2 approaches:
|
445
484
|
|
446
|
-
- Upgrade your kafka cluster to latest version. This is better
|
485
|
+
- Upgrade your kafka cluster to latest version. This is better because recent version is faster and robust.
|
447
486
|
- Downgrade ruby-kafka/fluent-plugin-kafka to work with your older kafka.
|
448
487
|
|
449
488
|
## Contributing
|
data/fluent-plugin-kafka.gemspec
CHANGED
@@ -13,12 +13,12 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
14
|
gem.name = "fluent-plugin-kafka"
|
15
15
|
gem.require_paths = ["lib"]
|
16
|
-
gem.version = '0.
|
16
|
+
gem.version = '0.15.0'
|
17
17
|
gem.required_ruby_version = ">= 2.1.0"
|
18
18
|
|
19
19
|
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
20
20
|
gem.add_dependency 'ltsv'
|
21
|
-
gem.add_dependency 'ruby-kafka', '>=
|
21
|
+
gem.add_dependency 'ruby-kafka', '>= 1.2.0', '< 2'
|
22
22
|
gem.add_development_dependency "rake", ">= 0.9.2"
|
23
23
|
gem.add_development_dependency "test-unit", ">= 3.0.8"
|
24
24
|
end
|
@@ -39,6 +39,8 @@ class Fluent::KafkaInput < Fluent::Input
|
|
39
39
|
:deprecated => "Use 'time_source record' instead."
|
40
40
|
config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
|
41
41
|
:desc => "Source for message timestamp."
|
42
|
+
config_param :record_time_key, :string, :default => 'time',
|
43
|
+
:desc => "Time field when time_source is 'record'"
|
42
44
|
config_param :get_kafka_client_log, :bool, :default => false
|
43
45
|
config_param :time_format, :string, :default => nil,
|
44
46
|
:desc => "Time format to be used to parse 'time' field."
|
@@ -186,16 +188,17 @@ class Fluent::KafkaInput < Fluent::Input
|
|
186
188
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
187
189
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
188
190
|
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
|
189
|
-
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
|
191
|
+
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
190
192
|
elsif @username != nil && @password != nil
|
191
193
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
192
194
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
193
195
|
ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
|
194
|
-
sasl_over_ssl: @sasl_over_ssl)
|
196
|
+
sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
195
197
|
else
|
196
198
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
197
199
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
198
|
-
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab
|
200
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
|
201
|
+
ssl_verify_hostname: @ssl_verify_hostname)
|
199
202
|
end
|
200
203
|
|
201
204
|
@zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
|
@@ -292,9 +295,9 @@ class Fluent::KafkaInput < Fluent::Input
|
|
292
295
|
record_time = Fluent::Engine.now
|
293
296
|
when :record
|
294
297
|
if @time_format
|
295
|
-
record_time = @time_parser.parse(record[
|
298
|
+
record_time = @time_parser.parse(record[@record_time_key])
|
296
299
|
else
|
297
|
-
record_time = record[
|
300
|
+
record_time = record[@record_time_key]
|
298
301
|
end
|
299
302
|
else
|
300
303
|
$log.fatal "BUG: invalid time_source: #{@time_source}"
|
@@ -18,6 +18,8 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
18
18
|
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
19
19
|
config_param :message_key, :string, :default => 'message',
|
20
20
|
:desc => "For 'text' format only."
|
21
|
+
config_param :add_headers, :bool, :default => false,
|
22
|
+
:desc => "Add kafka's message headers to event record"
|
21
23
|
config_param :add_prefix, :string, :default => nil,
|
22
24
|
:desc => "Tag prefix (Optional)"
|
23
25
|
config_param :add_suffix, :string, :default => nil,
|
@@ -29,6 +31,8 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
29
31
|
:deprecated => "Use 'time_source record' instead."
|
30
32
|
config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
|
31
33
|
:desc => "Source for message timestamp."
|
34
|
+
config_param :record_time_key, :string, :default => 'time',
|
35
|
+
:desc => "Time field when time_source is 'record'"
|
32
36
|
config_param :get_kafka_client_log, :bool, :default => false
|
33
37
|
config_param :time_format, :string, :default => nil,
|
34
38
|
:desc => "Time format to be used to parse 'time' field."
|
@@ -166,16 +170,17 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
166
170
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
167
171
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
168
172
|
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
|
169
|
-
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
|
173
|
+
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
170
174
|
elsif @username != nil && @password != nil
|
171
175
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
172
176
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
173
177
|
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password,
|
174
|
-
sasl_over_ssl: @sasl_over_ssl)
|
178
|
+
sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
175
179
|
else
|
176
180
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
177
181
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
178
|
-
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab
|
182
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
|
183
|
+
ssl_verify_hostname: @ssl_verify_hostname)
|
179
184
|
end
|
180
185
|
|
181
186
|
@consumer = setup_consumer
|
@@ -198,7 +203,14 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
198
203
|
def setup_consumer
|
199
204
|
consumer = @kafka.consumer(@consumer_opts)
|
200
205
|
@topics.each { |topic|
|
201
|
-
|
206
|
+
if m = /^\/(.+)\/$/.match(topic)
|
207
|
+
topic_or_regex = Regexp.new(m[1])
|
208
|
+
$log.info "Subscribe to topics matching the regex #{topic}"
|
209
|
+
else
|
210
|
+
topic_or_regex = topic
|
211
|
+
$log.info "Subscribe to topic #{topic}"
|
212
|
+
end
|
213
|
+
consumer.subscribe(topic_or_regex, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
|
202
214
|
}
|
203
215
|
consumer
|
204
216
|
end
|
@@ -243,9 +255,9 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
243
255
|
record_time = Fluent::Engine.now
|
244
256
|
when :record
|
245
257
|
if @time_format
|
246
|
-
record_time = @time_parser.parse(record[
|
258
|
+
record_time = @time_parser.parse(record[@record_time_key].to_s)
|
247
259
|
else
|
248
|
-
record_time = record[
|
260
|
+
record_time = record[@record_time_key]
|
249
261
|
end
|
250
262
|
else
|
251
263
|
log.fatal "BUG: invalid time_source: #{@time_source}"
|
@@ -253,6 +265,11 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
253
265
|
if @kafka_message_key
|
254
266
|
record[@kafka_message_key] = msg.key
|
255
267
|
end
|
268
|
+
if @add_headers
|
269
|
+
msg.headers.each_pair { |k, v|
|
270
|
+
record[k] = v
|
271
|
+
}
|
272
|
+
end
|
256
273
|
es.add(record_time, record)
|
257
274
|
rescue => e
|
258
275
|
log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
@@ -0,0 +1,284 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
require 'rdkafka'
|
6
|
+
|
7
|
+
class Fluent::Plugin::RdKafkaGroupInput < Fluent::Plugin::Input
|
8
|
+
Fluent::Plugin.register_input('rdkafka_group', self)
|
9
|
+
|
10
|
+
helpers :thread
|
11
|
+
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
|
15
|
+
config_param :format, :string, :default => 'json',
|
16
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
17
|
+
config_param :message_key, :string, :default => 'message',
|
18
|
+
:desc => "For 'text' format only."
|
19
|
+
config_param :add_headers, :bool, :default => false,
|
20
|
+
:desc => "Add kafka's message headers to event record"
|
21
|
+
config_param :add_prefix, :string, :default => nil,
|
22
|
+
:desc => "Tag prefix (Optional)"
|
23
|
+
config_param :add_suffix, :string, :default => nil,
|
24
|
+
:desc => "Tag suffix (Optional)"
|
25
|
+
config_param :use_record_time, :bool, :default => false,
|
26
|
+
:desc => "Replace message timestamp with contents of 'time' field.",
|
27
|
+
:deprecated => "Use 'time_source record' instead."
|
28
|
+
config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
|
29
|
+
:desc => "Source for message timestamp."
|
30
|
+
config_param :record_time_key, :string, :default => 'time',
|
31
|
+
:desc => "Time field when time_source is 'record'"
|
32
|
+
config_param :time_format, :string, :default => nil,
|
33
|
+
:desc => "Time format to be used to parse 'time' field."
|
34
|
+
config_param :kafka_message_key, :string, :default => nil,
|
35
|
+
:desc => "Set kafka's message key to this field"
|
36
|
+
|
37
|
+
config_param :retry_emit_limit, :integer, :default => nil,
|
38
|
+
:desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
|
39
|
+
config_param :retry_wait_seconds, :integer, :default => 30
|
40
|
+
config_param :disable_retry_limit, :bool, :default => false,
|
41
|
+
:desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
|
42
|
+
config_param :retry_limit, :integer, :default => 10,
|
43
|
+
:desc => "The maximum number of retries for connecting kafka (default: 10)"
|
44
|
+
|
45
|
+
config_param :max_wait_time_ms, :integer, :default => 250,
|
46
|
+
:desc => "How long to block polls in milliseconds until the server sends us data."
|
47
|
+
config_param :max_batch_size, :integer, :default => 10000,
|
48
|
+
:desc => "Maximum number of log lines emitted in a single batch."
|
49
|
+
|
50
|
+
config_param :kafka_configs, :hash, :default => {},
|
51
|
+
:desc => "Kafka configuration properties as desribed in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
|
52
|
+
|
53
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
54
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
55
|
+
|
56
|
+
class ForShutdown < StandardError
|
57
|
+
end
|
58
|
+
|
59
|
+
BufferError = Fluent::Plugin::Buffer::BufferOverflowError
|
60
|
+
|
61
|
+
def initialize
|
62
|
+
super
|
63
|
+
|
64
|
+
@time_parser = nil
|
65
|
+
@retry_count = 1
|
66
|
+
end
|
67
|
+
|
68
|
+
def _config_to_array(config)
|
69
|
+
config_array = config.split(',').map {|k| k.strip }
|
70
|
+
if config_array.empty?
|
71
|
+
raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
|
72
|
+
end
|
73
|
+
config_array
|
74
|
+
end
|
75
|
+
|
76
|
+
def multi_workers_ready?
|
77
|
+
true
|
78
|
+
end
|
79
|
+
|
80
|
+
private :_config_to_array
|
81
|
+
|
82
|
+
def configure(conf)
|
83
|
+
super
|
84
|
+
|
85
|
+
log.warn "The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!"
|
86
|
+
|
87
|
+
log.info "Will watch for topics #{@topics} at brokers " \
|
88
|
+
"#{@kafka_configs["bootstrap.servers"]} and '#{@kafka_configs["group.id"]}' group"
|
89
|
+
|
90
|
+
@topics = _config_to_array(@topics)
|
91
|
+
|
92
|
+
@parser_proc = setup_parser
|
93
|
+
|
94
|
+
@time_source = :record if @use_record_time
|
95
|
+
|
96
|
+
if @time_source == :record and @time_format
|
97
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def setup_parser
|
102
|
+
case @format
|
103
|
+
when 'json'
|
104
|
+
begin
|
105
|
+
require 'oj'
|
106
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
107
|
+
Proc.new { |msg| Oj.load(msg.payload) }
|
108
|
+
rescue LoadError
|
109
|
+
require 'yajl'
|
110
|
+
Proc.new { |msg| Yajl::Parser.parse(msg.payload) }
|
111
|
+
end
|
112
|
+
when 'ltsv'
|
113
|
+
require 'ltsv'
|
114
|
+
Proc.new { |msg| LTSV.parse(msg.payload, {:symbolize_keys => false}).first }
|
115
|
+
when 'msgpack'
|
116
|
+
require 'msgpack'
|
117
|
+
Proc.new { |msg| MessagePack.unpack(msg.payload) }
|
118
|
+
when 'text'
|
119
|
+
Proc.new { |msg| {@message_key => msg.payload} }
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def start
|
124
|
+
super
|
125
|
+
|
126
|
+
@consumer = setup_consumer
|
127
|
+
|
128
|
+
thread_create(:in_rdkafka_group, &method(:run))
|
129
|
+
end
|
130
|
+
|
131
|
+
def shutdown
|
132
|
+
# This nil assignment should be guarded by mutex in multithread programming manner.
|
133
|
+
# But the situation is very low contention, so we don't use mutex for now.
|
134
|
+
# If the problem happens, we will add a guard for consumer.
|
135
|
+
consumer = @consumer
|
136
|
+
@consumer = nil
|
137
|
+
consumer.close
|
138
|
+
|
139
|
+
super
|
140
|
+
end
|
141
|
+
|
142
|
+
def setup_consumer
|
143
|
+
consumer = Rdkafka::Config.new(@kafka_configs).consumer
|
144
|
+
consumer.subscribe(*@topics)
|
145
|
+
consumer
|
146
|
+
end
|
147
|
+
|
148
|
+
def reconnect_consumer
|
149
|
+
log.warn "Stopping Consumer"
|
150
|
+
consumer = @consumer
|
151
|
+
@consumer = nil
|
152
|
+
if consumer
|
153
|
+
consumer.close
|
154
|
+
end
|
155
|
+
log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
|
156
|
+
@retry_count = @retry_count + 1
|
157
|
+
sleep @retry_wait_seconds
|
158
|
+
@consumer = setup_consumer
|
159
|
+
log.warn "Re-starting consumer #{Time.now.to_s}"
|
160
|
+
@retry_count = 0
|
161
|
+
rescue =>e
|
162
|
+
log.error "unexpected error during re-starting consumer object access", :error => e.to_s
|
163
|
+
log.error_backtrace
|
164
|
+
if @retry_count <= @retry_limit or disable_retry_limit
|
165
|
+
reconnect_consumer
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class Batch
|
170
|
+
attr_reader :topic
|
171
|
+
attr_reader :messages
|
172
|
+
|
173
|
+
def initialize(topic)
|
174
|
+
@topic = topic
|
175
|
+
@messages = []
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Executes the passed codeblock on a batch of messages.
|
180
|
+
# It is guaranteed that every message in a given batch belongs to the same topic, because the tagging logic in :run expects that property.
|
181
|
+
# The number of maximum messages in a batch is capped by the :max_batch_size configuration value. It ensures that consuming from a single
|
182
|
+
# topic for a long time (e.g. with `auto.offset.reset` set to `earliest`) does not lead to memory exhaustion. Also, calling consumer.poll
|
183
|
+
# advances thes consumer offset, so in case the process crashes we might lose at most :max_batch_size messages.
|
184
|
+
def each_batch(&block)
|
185
|
+
batch = nil
|
186
|
+
message = nil
|
187
|
+
while @consumer
|
188
|
+
message = @consumer.poll(@max_wait_time_ms)
|
189
|
+
if message
|
190
|
+
if not batch
|
191
|
+
batch = Batch.new(message.topic)
|
192
|
+
elsif batch.topic != message.topic || batch.messages.size >= @max_batch_size
|
193
|
+
yield batch
|
194
|
+
batch = Batch.new(message.topic)
|
195
|
+
end
|
196
|
+
batch.messages << message
|
197
|
+
else
|
198
|
+
yield batch if batch
|
199
|
+
batch = nil
|
200
|
+
end
|
201
|
+
end
|
202
|
+
yield batch if batch
|
203
|
+
end
|
204
|
+
|
205
|
+
def run
|
206
|
+
while @consumer
|
207
|
+
begin
|
208
|
+
each_batch { |batch|
|
209
|
+
log.debug "A new batch for topic #{batch.topic} with #{batch.messages.size} messages"
|
210
|
+
es = Fluent::MultiEventStream.new
|
211
|
+
tag = batch.topic
|
212
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
213
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
214
|
+
|
215
|
+
batch.messages.each { |msg|
|
216
|
+
begin
|
217
|
+
record = @parser_proc.call(msg)
|
218
|
+
case @time_source
|
219
|
+
when :kafka
|
220
|
+
record_time = Fluent::EventTime.from_time(msg.timestamp)
|
221
|
+
when :now
|
222
|
+
record_time = Fluent::Engine.now
|
223
|
+
when :record
|
224
|
+
if @time_format
|
225
|
+
record_time = @time_parser.parse(record[@record_time_key].to_s)
|
226
|
+
else
|
227
|
+
record_time = record[@record_time_key]
|
228
|
+
end
|
229
|
+
else
|
230
|
+
log.fatal "BUG: invalid time_source: #{@time_source}"
|
231
|
+
end
|
232
|
+
if @kafka_message_key
|
233
|
+
record[@kafka_message_key] = msg.key
|
234
|
+
end
|
235
|
+
if @add_headers
|
236
|
+
msg.headers.each_pair { |k, v|
|
237
|
+
record[k] = v
|
238
|
+
}
|
239
|
+
end
|
240
|
+
es.add(record_time, record)
|
241
|
+
rescue => e
|
242
|
+
log.warn "parser error in #{msg.topic}/#{msg.partition}", :error => e.to_s, :value => msg.payload, :offset => msg.offset
|
243
|
+
log.debug_backtrace
|
244
|
+
end
|
245
|
+
}
|
246
|
+
|
247
|
+
unless es.empty?
|
248
|
+
emit_events(tag, es)
|
249
|
+
end
|
250
|
+
}
|
251
|
+
rescue ForShutdown
|
252
|
+
rescue => e
|
253
|
+
log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
|
254
|
+
log.error_backtrace
|
255
|
+
reconnect_consumer
|
256
|
+
end
|
257
|
+
end
|
258
|
+
rescue => e
|
259
|
+
log.error "unexpected error during consumer object access", :error => e.to_s
|
260
|
+
log.error_backtrace
|
261
|
+
end
|
262
|
+
|
263
|
+
def emit_events(tag, es)
|
264
|
+
retries = 0
|
265
|
+
begin
|
266
|
+
router.emit_stream(tag, es)
|
267
|
+
rescue BufferError
|
268
|
+
raise ForShutdown if @consumer.nil?
|
269
|
+
|
270
|
+
if @retry_emit_limit.nil?
|
271
|
+
sleep 1
|
272
|
+
retry
|
273
|
+
end
|
274
|
+
|
275
|
+
if retries < @retry_emit_limit
|
276
|
+
retries += 1
|
277
|
+
sleep 1
|
278
|
+
retry
|
279
|
+
else
|
280
|
+
raise RuntimeError, "Exceeds retry_emit_limit"
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
@@ -69,12 +69,13 @@ module Kafka
|
|
69
69
|
retry_backoff: retry_backoff,
|
70
70
|
max_buffer_size: max_buffer_size,
|
71
71
|
max_buffer_bytesize: max_buffer_bytesize,
|
72
|
+
partitioner: @partitioner,
|
72
73
|
)
|
73
74
|
end
|
74
75
|
end
|
75
76
|
|
76
77
|
class TopicProducer
|
77
|
-
def initialize(topic, cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
|
78
|
+
def initialize(topic, cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:, partitioner:)
|
78
79
|
@cluster = cluster
|
79
80
|
@transaction_manager = transaction_manager
|
80
81
|
@logger = logger
|
@@ -86,6 +87,7 @@ module Kafka
|
|
86
87
|
@max_buffer_size = max_buffer_size
|
87
88
|
@max_buffer_bytesize = max_buffer_bytesize
|
88
89
|
@compressor = compressor
|
90
|
+
@partitioner = partitioner
|
89
91
|
|
90
92
|
@topic = topic
|
91
93
|
@cluster.add_target_topics(Set.new([topic]))
|
@@ -250,7 +252,7 @@ module Kafka
|
|
250
252
|
|
251
253
|
begin
|
252
254
|
if partition.nil?
|
253
|
-
partition =
|
255
|
+
partition = @partitioner.call(partition_count, message)
|
254
256
|
end
|
255
257
|
|
256
258
|
@buffer.write(
|
@@ -15,6 +15,7 @@ module Fluent::Plugin
|
|
15
15
|
Set brokers directly:
|
16
16
|
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
17
17
|
DESC
|
18
|
+
config_param :topic, :string, :default => nil, :desc => "kafka topic. Placeholders are supported"
|
18
19
|
config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
|
19
20
|
config_param :default_topic, :string, :default => nil,
|
20
21
|
:desc => "Default output topic when record doesn't have topic field"
|
@@ -68,6 +69,7 @@ The codec the producer uses to compress messages.
|
|
68
69
|
Supported codecs depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression
|
69
70
|
DESC
|
70
71
|
config_param :max_send_limit_bytes, :size, :default => nil
|
72
|
+
config_param :discard_kafka_delivery_failed, :bool, :default => false
|
71
73
|
config_param :active_support_notification_regex, :string, :default => nil,
|
72
74
|
:desc => <<-DESC
|
73
75
|
Add a regular expression to capture ActiveSupport notifications from the Kafka client
|
@@ -215,7 +217,11 @@ DESC
|
|
215
217
|
# TODO: optimize write performance
|
216
218
|
def write(chunk)
|
217
219
|
tag = chunk.metadata.tag
|
218
|
-
topic =
|
220
|
+
topic = if @topic
|
221
|
+
extract_placeholders(@topic, chunk)
|
222
|
+
else
|
223
|
+
(chunk.metadata.variables && chunk.metadata.variables[@topic_key_sym]) || @default_topic || tag
|
224
|
+
end
|
219
225
|
|
220
226
|
messages = 0
|
221
227
|
record_buf = nil
|
@@ -262,7 +268,16 @@ DESC
|
|
262
268
|
|
263
269
|
if messages > 0
|
264
270
|
log.debug { "#{messages} messages send." }
|
265
|
-
|
271
|
+
if @discard_kafka_delivery_failed
|
272
|
+
begin
|
273
|
+
producer.deliver_messages
|
274
|
+
rescue Kafka::DeliveryFailed => e
|
275
|
+
log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
|
276
|
+
producer.clear_buffer
|
277
|
+
end
|
278
|
+
else
|
279
|
+
producer.deliver_messages
|
280
|
+
end
|
266
281
|
end
|
267
282
|
rescue Kafka::UnknownTopicOrPartition
|
268
283
|
if @use_default_for_unknown_topic && topic != @default_topic
|
@@ -33,6 +33,7 @@ Set brokers directly:
|
|
33
33
|
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
34
34
|
Brokers: you can choose to use either brokers or zookeeper.
|
35
35
|
DESC
|
36
|
+
config_param :topic, :string, :default => nil, :desc => "kafka topic. Placeholders are supported"
|
36
37
|
config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
|
37
38
|
config_param :default_topic, :string, :default => nil,
|
38
39
|
:desc => "Default output topic when record doesn't have topic field"
|
@@ -72,6 +73,7 @@ The codec the producer uses to compress messages. Used for compression.codec
|
|
72
73
|
Supported codecs: (gzip|snappy)
|
73
74
|
DESC
|
74
75
|
config_param :max_send_limit_bytes, :size, :default => nil
|
76
|
+
config_param :discard_kafka_delivery_failed, :bool, :default => false
|
75
77
|
config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
|
76
78
|
config_param :rdkafka_buffering_max_messages, :integer, :default => nil, :desc => 'Used for queue.buffering.max.messages'
|
77
79
|
config_param :rdkafka_message_max_bytes, :integer, :default => nil, :desc => 'Used for message.max.bytes'
|
@@ -278,7 +280,11 @@ DESC
|
|
278
280
|
|
279
281
|
def write(chunk)
|
280
282
|
tag = chunk.metadata.tag
|
281
|
-
topic =
|
283
|
+
topic = if @topic
|
284
|
+
extract_placeholders(@topic, chunk)
|
285
|
+
else
|
286
|
+
(chunk.metadata.variables && chunk.metadata.variables[@topic_key_sym]) || @default_topic || tag
|
287
|
+
end
|
282
288
|
|
283
289
|
handlers = []
|
284
290
|
record_buf = nil
|
@@ -320,9 +326,13 @@ DESC
|
|
320
326
|
}
|
321
327
|
end
|
322
328
|
rescue Exception => e
|
323
|
-
|
324
|
-
|
325
|
-
|
329
|
+
if @discard_kafka_delivery_failed
|
330
|
+
log.warn "Delivery failed. Discard events:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
|
331
|
+
else
|
332
|
+
log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
|
333
|
+
# Raise exception to retry sendind messages
|
334
|
+
raise e
|
335
|
+
end
|
326
336
|
end
|
327
337
|
|
328
338
|
def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidemasa Togashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-09-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -51,7 +51,7 @@ dependencies:
|
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 1.2.0
|
55
55
|
- - "<"
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: '2'
|
@@ -61,7 +61,7 @@ dependencies:
|
|
61
61
|
requirements:
|
62
62
|
- - ">="
|
63
63
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
64
|
+
version: 1.2.0
|
65
65
|
- - "<"
|
66
66
|
- !ruby/object:Gem::Version
|
67
67
|
version: '2'
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- fluent-plugin-kafka.gemspec
|
112
112
|
- lib/fluent/plugin/in_kafka.rb
|
113
113
|
- lib/fluent/plugin/in_kafka_group.rb
|
114
|
+
- lib/fluent/plugin/in_rdkafka_group.rb
|
114
115
|
- lib/fluent/plugin/kafka_plugin_util.rb
|
115
116
|
- lib/fluent/plugin/kafka_producer_ext.rb
|
116
117
|
- lib/fluent/plugin/out_kafka.rb
|