fluent-plugin-kafka 0.14.2 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/linux.yml +26 -0
- data/ChangeLog +23 -0
- data/README.md +35 -0
- data/fluent-plugin-kafka.gemspec +2 -1
- data/lib/fluent/plugin/in_kafka.rb +30 -6
- data/lib/fluent/plugin/in_kafka_group.rb +112 -41
- data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
- data/lib/fluent/plugin/out_kafka2.rb +1 -1
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7fcaf3e8fbb836ab8db3fa21003a713f9048de076097e2c95a9f30e5d1b05c08
|
4
|
+
data.tar.gz: 8437b2c9401238d811973422a65d2c9ee34bd8afc513f32412a22f93e03204a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d407e6f12dbafc6f5fad59ac6cbae4b13fa5f0a212cf82dba604f871afc286899290972b3afca5d1031888ee2f141c12d5d2e3d6b7324c7246317029149c491
|
7
|
+
data.tar.gz: b278441842361cc53836fce087e87e6d2d800bccbe9b307c2f0b39d3c4c357aae96275d2bfb808ab2455ebce692948d1241f4f991a5036473da1d31da699832b
|
@@ -0,0 +1,26 @@
|
|
1
|
+
name: linux
|
2
|
+
on:
|
3
|
+
- push
|
4
|
+
- pull_request
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ${{ matrix.os }}
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby: [ '2.4', '2.5', '2.6', '2.7', '3.0' ]
|
12
|
+
os:
|
13
|
+
- ubuntu-latest
|
14
|
+
name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v2
|
17
|
+
- uses: ruby/setup-ruby@v1
|
18
|
+
with:
|
19
|
+
ruby-version: ${{ matrix.ruby }}
|
20
|
+
- name: unit testing
|
21
|
+
env:
|
22
|
+
CI: true
|
23
|
+
run: |
|
24
|
+
gem install bundler rake
|
25
|
+
bundle install --jobs 4 --retry 3
|
26
|
+
bundle exec rake test
|
data/ChangeLog
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
Release 0.16.0 - 2021/01/25
|
2
|
+
|
3
|
+
* input: Add `tag_source` and `record_tag_key` parameters for using record field as tag
|
4
|
+
* in_kafka_group: Use NumericParser for floating point
|
5
|
+
|
6
|
+
Release 0.15.3 - 2020/12/08
|
7
|
+
|
8
|
+
* in_kafka: Fix `record_time_key` parameter not working
|
9
|
+
|
10
|
+
Release 0.15.2 - 2020/09/30
|
11
|
+
|
12
|
+
* input: Support 3rd party parser
|
13
|
+
|
14
|
+
Release 0.15.1 - 2020/09/17
|
15
|
+
|
16
|
+
* out_kafka2: Fix wrong class name for configuration error
|
17
|
+
|
18
|
+
Release 0.15.0 - 2020/09/14
|
19
|
+
|
20
|
+
* Add experimental `in_rdkafka_group`
|
21
|
+
* in_kafka: Expose `ssl_verify_hostname` parameter
|
22
|
+
|
1
23
|
Release 0.14.2 - 2020/08/26
|
2
24
|
|
3
25
|
* in_kafka_group: Add `add_headers` parameter
|
@@ -15,6 +37,7 @@ Release 0.14.0 - 2020/08/07
|
|
15
37
|
Release 0.13.1 - 2020/07/17
|
16
38
|
|
17
39
|
* in_kafka_group: Support ssl_verify_hostname parameter
|
40
|
+
* in_kafka_group: Support regex based topics
|
18
41
|
* out_kafka2/out_rdkafka2: Support topic parameter with placeholders
|
19
42
|
|
20
43
|
Release 0.13.0 - 2020/03/09
|
data/README.md
CHANGED
@@ -139,6 +139,41 @@ Consume events by kafka consumer group features..
|
|
139
139
|
|
140
140
|
See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-messages-from-kafka) for more detailed documentation about ruby-kafka options.
|
141
141
|
|
142
|
+
`topics` supports regex pattern since v0.13.1. If you want to use regex pattern, use `/pattern/` like `/foo.*/`.
|
143
|
+
|
144
|
+
Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
|
145
|
+
|
146
|
+
### Input plugin (@type 'rdkafka_group', supports kafka consumer groups, uses rdkafka-ruby)
|
147
|
+
|
148
|
+
:warning: **The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!**
|
149
|
+
|
150
|
+
With the introduction of the rdkafka-ruby based input plugin we hope to support Kafka brokers above version 2.1 where we saw [compatibility issues](https://github.com/fluent/fluent-plugin-kafka/issues/315) when using the ruby-kafka based @kafka_group input type. The rdkafka-ruby lib wraps the highly performant and production ready librdkafka C lib.
|
151
|
+
|
152
|
+
<source>
|
153
|
+
@type rdkafka_group
|
154
|
+
topics <listening topics(separate with comma',')>
|
155
|
+
format <input text type (text|json|ltsv|msgpack)> :default => json
|
156
|
+
message_key <key (Optional, for text format only, default is message)>
|
157
|
+
kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
|
158
|
+
add_headers <If true, add kafka's message headers to record>
|
159
|
+
add_prefix <tag prefix (Optional)>
|
160
|
+
add_suffix <tag suffix (Optional)>
|
161
|
+
retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
|
162
|
+
use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
|
163
|
+
time_source <source for message timestamp (now|kafka|record)> :default => now
|
164
|
+
time_format <string (Optional when use_record_time is used)>
|
165
|
+
|
166
|
+
# kafka consumer options
|
167
|
+
max_wait_time_ms 500
|
168
|
+
max_batch_size 10000
|
169
|
+
kafka_configs {
|
170
|
+
"bootstrap.servers": "brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>",
|
171
|
+
"group.id": "<consumer group name>"
|
172
|
+
}
|
173
|
+
</source>
|
174
|
+
|
175
|
+
See also [rdkafka-ruby](https://github.com/appsignal/rdkafka-ruby) and [librdkafka](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for more detailed documentation about Kafka consumer options.
|
176
|
+
|
142
177
|
Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
|
143
178
|
|
144
179
|
### Output plugin
|
data/fluent-plugin-kafka.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
14
|
gem.name = "fluent-plugin-kafka"
|
15
15
|
gem.require_paths = ["lib"]
|
16
|
-
gem.version = '0.
|
16
|
+
gem.version = '0.16.0'
|
17
17
|
gem.required_ruby_version = ">= 2.1.0"
|
18
18
|
|
19
19
|
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
@@ -21,4 +21,5 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_dependency 'ruby-kafka', '>= 1.2.0', '< 2'
|
22
22
|
gem.add_development_dependency "rake", ">= 0.9.2"
|
23
23
|
gem.add_development_dependency "test-unit", ">= 3.0.8"
|
24
|
+
gem.add_development_dependency "webrick"
|
24
25
|
end
|
@@ -31,6 +31,10 @@ class Fluent::KafkaInput < Fluent::Input
|
|
31
31
|
config_param :add_suffix, :string, :default => nil,
|
32
32
|
:desc => "tag suffix"
|
33
33
|
config_param :add_offset_in_record, :bool, :default => false
|
34
|
+
config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
|
35
|
+
:desc => "Source for the fluentd event tag"
|
36
|
+
config_param :record_tag_key, :string, :default => 'tag',
|
37
|
+
:desc => "Tag field when tag_source is 'record'"
|
34
38
|
|
35
39
|
config_param :offset_zookeeper, :string, :default => nil
|
36
40
|
config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
|
@@ -113,7 +117,7 @@ class Fluent::KafkaInput < Fluent::Input
|
|
113
117
|
|
114
118
|
require 'zookeeper' if @offset_zookeeper
|
115
119
|
|
116
|
-
@parser_proc = setup_parser
|
120
|
+
@parser_proc = setup_parser(conf)
|
117
121
|
|
118
122
|
@time_source = :record if @use_record_time
|
119
123
|
|
@@ -126,7 +130,7 @@ class Fluent::KafkaInput < Fluent::Input
|
|
126
130
|
end
|
127
131
|
end
|
128
132
|
|
129
|
-
def setup_parser
|
133
|
+
def setup_parser(conf)
|
130
134
|
case @format
|
131
135
|
when 'json'
|
132
136
|
begin
|
@@ -165,6 +169,14 @@ class Fluent::KafkaInput < Fluent::Input
|
|
165
169
|
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
166
170
|
r
|
167
171
|
}
|
172
|
+
else
|
173
|
+
@custom_parser = Fluent::Plugin.new_parser(conf['format'])
|
174
|
+
@custom_parser.configure(conf)
|
175
|
+
Proc.new { |msg|
|
176
|
+
@custom_parser.parse(msg.value) {|_time, record|
|
177
|
+
record
|
178
|
+
}
|
179
|
+
}
|
168
180
|
end
|
169
181
|
end
|
170
182
|
|
@@ -188,16 +200,17 @@ class Fluent::KafkaInput < Fluent::Input
|
|
188
200
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
189
201
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
190
202
|
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
|
191
|
-
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
|
203
|
+
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
192
204
|
elsif @username != nil && @password != nil
|
193
205
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
194
206
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
195
207
|
ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
|
196
|
-
sasl_over_ssl: @sasl_over_ssl)
|
208
|
+
sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
197
209
|
else
|
198
210
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
199
211
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
200
|
-
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab
|
212
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
|
213
|
+
ssl_verify_hostname: @ssl_verify_hostname)
|
201
214
|
end
|
202
215
|
|
203
216
|
@zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
|
@@ -215,6 +228,9 @@ class Fluent::KafkaInput < Fluent::Input
|
|
215
228
|
router,
|
216
229
|
@kafka_message_key,
|
217
230
|
@time_source,
|
231
|
+
@record_time_key,
|
232
|
+
@tag_source,
|
233
|
+
@record_tag_key,
|
218
234
|
opt)
|
219
235
|
}
|
220
236
|
@topic_watchers.each {|tw|
|
@@ -239,7 +255,7 @@ class Fluent::KafkaInput < Fluent::Input
|
|
239
255
|
end
|
240
256
|
|
241
257
|
class TopicWatcher < Coolio::TimerWatcher
|
242
|
-
def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, time_source, options={})
|
258
|
+
def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, time_source, record_time_key, tag_source, record_tag_key, options={})
|
243
259
|
@topic_entry = topic_entry
|
244
260
|
@kafka = kafka
|
245
261
|
@callback = method(:consume)
|
@@ -251,6 +267,9 @@ class Fluent::KafkaInput < Fluent::Input
|
|
251
267
|
@router = router
|
252
268
|
@kafka_message_key = kafka_message_key
|
253
269
|
@time_source = time_source
|
270
|
+
@record_time_key = record_time_key
|
271
|
+
@tag_source = tag_source
|
272
|
+
@record_tag_key = record_tag_key
|
254
273
|
|
255
274
|
@next_offset = @topic_entry.offset
|
256
275
|
if @topic_entry.offset == -1 && offset_manager
|
@@ -287,6 +306,11 @@ class Fluent::KafkaInput < Fluent::Input
|
|
287
306
|
messages.each { |msg|
|
288
307
|
begin
|
289
308
|
record = @parser.call(msg, @topic_entry)
|
309
|
+
if @tag_source == :record
|
310
|
+
tag = record[@record_tag_key]
|
311
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
312
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
313
|
+
end
|
290
314
|
case @time_source
|
291
315
|
when :kafka
|
292
316
|
record_time = Fluent::EventTime.from_time(msg.create_time)
|
@@ -36,6 +36,10 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
36
36
|
config_param :get_kafka_client_log, :bool, :default => false
|
37
37
|
config_param :time_format, :string, :default => nil,
|
38
38
|
:desc => "Time format to be used to parse 'time' field."
|
39
|
+
config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
|
40
|
+
:desc => "Source for the fluentd event tag"
|
41
|
+
config_param :record_tag_key, :string, :default => 'tag',
|
42
|
+
:desc => "Tag field when tag_source is 'record'"
|
39
43
|
config_param :kafka_message_key, :string, :default => nil,
|
40
44
|
:desc => "Set kafka's message key to this field"
|
41
45
|
config_param :connect_timeout, :integer, :default => nil,
|
@@ -117,7 +121,7 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
117
121
|
@max_wait_time = conf['max_wait_ms'].to_i / 1000
|
118
122
|
end
|
119
123
|
|
120
|
-
@parser_proc = setup_parser
|
124
|
+
@parser_proc = setup_parser(conf)
|
121
125
|
|
122
126
|
@consumer_opts = {:group_id => @consumer_group}
|
123
127
|
@consumer_opts[:session_timeout] = @session_timeout if @session_timeout
|
@@ -138,9 +142,13 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
138
142
|
@time_parser = Fluent::TextParser::TimeParser.new(@time_format)
|
139
143
|
end
|
140
144
|
end
|
145
|
+
|
146
|
+
if @time_source == :record && defined?(Fluent::NumericTimeParser)
|
147
|
+
@float_numeric_parse = Fluent::NumericTimeParser.new(:float)
|
148
|
+
end
|
141
149
|
end
|
142
150
|
|
143
|
-
def setup_parser
|
151
|
+
def setup_parser(conf)
|
144
152
|
case @format
|
145
153
|
when 'json'
|
146
154
|
begin
|
@@ -159,6 +167,14 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
159
167
|
Proc.new { |msg| MessagePack.unpack(msg.value) }
|
160
168
|
when 'text'
|
161
169
|
Proc.new { |msg| {@message_key => msg.value} }
|
170
|
+
else
|
171
|
+
@custom_parser = Fluent::Plugin.new_parser(conf['format'])
|
172
|
+
@custom_parser.configure(conf)
|
173
|
+
Proc.new { |msg|
|
174
|
+
@custom_parser.parse(msg.value) {|_time, record|
|
175
|
+
record
|
176
|
+
}
|
177
|
+
}
|
162
178
|
end
|
163
179
|
end
|
164
180
|
|
@@ -236,49 +252,104 @@ class Fluent::KafkaGroupInput < Fluent::Input
|
|
236
252
|
end
|
237
253
|
end
|
238
254
|
|
255
|
+
def process_batch_with_record_tag(batch)
|
256
|
+
es = {}
|
257
|
+
batch.messages.each { |msg|
|
258
|
+
begin
|
259
|
+
record = @parser_proc.call(msg)
|
260
|
+
tag = record[@record_tag_key]
|
261
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
262
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
263
|
+
es[tag] ||= Fluent::MultiEventStream.new
|
264
|
+
case @time_source
|
265
|
+
when :kafka
|
266
|
+
record_time = Fluent::EventTime.from_time(msg.create_time)
|
267
|
+
when :now
|
268
|
+
record_time = Fluent::Engine.now
|
269
|
+
when :record
|
270
|
+
if @time_format
|
271
|
+
record_time = @time_parser.parse(record[@record_time_key].to_s)
|
272
|
+
else
|
273
|
+
record_time = record[@record_time_key]
|
274
|
+
end
|
275
|
+
else
|
276
|
+
log.fatal "BUG: invalid time_source: #{@time_source}"
|
277
|
+
end
|
278
|
+
if @kafka_message_key
|
279
|
+
record[@kafka_message_key] = msg.key
|
280
|
+
end
|
281
|
+
if @add_headers
|
282
|
+
msg.headers.each_pair { |k, v|
|
283
|
+
record[k] = v
|
284
|
+
}
|
285
|
+
end
|
286
|
+
es[tag].add(record_time, record)
|
287
|
+
rescue => e
|
288
|
+
log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
289
|
+
log.debug_backtrace
|
290
|
+
end
|
291
|
+
}
|
292
|
+
|
293
|
+
unless es.empty?
|
294
|
+
es.each { |tag,es|
|
295
|
+
emit_events(tag, es)
|
296
|
+
}
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
def process_batch(batch)
|
301
|
+
es = Fluent::MultiEventStream.new
|
302
|
+
tag = batch.topic
|
303
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
304
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
305
|
+
|
306
|
+
batch.messages.each { |msg|
|
307
|
+
begin
|
308
|
+
record = @parser_proc.call(msg)
|
309
|
+
case @time_source
|
310
|
+
when :kafka
|
311
|
+
record_time = Fluent::EventTime.from_time(msg.create_time)
|
312
|
+
when :now
|
313
|
+
record_time = Fluent::Engine.now
|
314
|
+
when :record
|
315
|
+
record_time = record[@record_time_key]
|
316
|
+
|
317
|
+
if @time_format
|
318
|
+
record_time = @time_parser.parse(record_time.to_s)
|
319
|
+
elsif record_time.is_a?(Float) && @float_numeric_parse
|
320
|
+
record_time = @float_numeric_parse.parse(record_time)
|
321
|
+
end
|
322
|
+
else
|
323
|
+
log.fatal "BUG: invalid time_source: #{@time_source}"
|
324
|
+
end
|
325
|
+
if @kafka_message_key
|
326
|
+
record[@kafka_message_key] = msg.key
|
327
|
+
end
|
328
|
+
if @add_headers
|
329
|
+
msg.headers.each_pair { |k, v|
|
330
|
+
record[k] = v
|
331
|
+
}
|
332
|
+
end
|
333
|
+
es.add(record_time, record)
|
334
|
+
rescue => e
|
335
|
+
log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
336
|
+
log.debug_backtrace
|
337
|
+
end
|
338
|
+
}
|
339
|
+
|
340
|
+
unless es.empty?
|
341
|
+
emit_events(tag, es)
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
239
345
|
def run
|
240
346
|
while @consumer
|
241
347
|
begin
|
242
348
|
@consumer.each_batch(@fetch_opts) { |batch|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
batch.messages.each { |msg|
|
249
|
-
begin
|
250
|
-
record = @parser_proc.call(msg)
|
251
|
-
case @time_source
|
252
|
-
when :kafka
|
253
|
-
record_time = Fluent::EventTime.from_time(msg.create_time)
|
254
|
-
when :now
|
255
|
-
record_time = Fluent::Engine.now
|
256
|
-
when :record
|
257
|
-
if @time_format
|
258
|
-
record_time = @time_parser.parse(record[@record_time_key].to_s)
|
259
|
-
else
|
260
|
-
record_time = record[@record_time_key]
|
261
|
-
end
|
262
|
-
else
|
263
|
-
log.fatal "BUG: invalid time_source: #{@time_source}"
|
264
|
-
end
|
265
|
-
if @kafka_message_key
|
266
|
-
record[@kafka_message_key] = msg.key
|
267
|
-
end
|
268
|
-
if @add_headers
|
269
|
-
msg.headers.each_pair { |k, v|
|
270
|
-
record[k] = v
|
271
|
-
}
|
272
|
-
end
|
273
|
-
es.add(record_time, record)
|
274
|
-
rescue => e
|
275
|
-
log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
276
|
-
log.debug_backtrace
|
277
|
-
end
|
278
|
-
}
|
279
|
-
|
280
|
-
unless es.empty?
|
281
|
-
emit_events(tag, es)
|
349
|
+
if @tag_source == :record
|
350
|
+
process_batch_with_record_tag(batch)
|
351
|
+
else
|
352
|
+
process_batch(batch)
|
282
353
|
end
|
283
354
|
}
|
284
355
|
rescue ForShutdown
|
@@ -0,0 +1,305 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
require 'rdkafka'
|
6
|
+
|
7
|
+
class Fluent::Plugin::RdKafkaGroupInput < Fluent::Plugin::Input
|
8
|
+
Fluent::Plugin.register_input('rdkafka_group', self)
|
9
|
+
|
10
|
+
helpers :thread, :parser, :compat_parameters
|
11
|
+
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
|
15
|
+
config_param :format, :string, :default => 'json',
|
16
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
17
|
+
config_param :message_key, :string, :default => 'message',
|
18
|
+
:desc => "For 'text' format only."
|
19
|
+
config_param :add_headers, :bool, :default => false,
|
20
|
+
:desc => "Add kafka's message headers to event record"
|
21
|
+
config_param :add_prefix, :string, :default => nil,
|
22
|
+
:desc => "Tag prefix (Optional)"
|
23
|
+
config_param :add_suffix, :string, :default => nil,
|
24
|
+
:desc => "Tag suffix (Optional)"
|
25
|
+
config_param :use_record_time, :bool, :default => false,
|
26
|
+
:desc => "Replace message timestamp with contents of 'time' field.",
|
27
|
+
:deprecated => "Use 'time_source record' instead."
|
28
|
+
config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
|
29
|
+
:desc => "Source for message timestamp."
|
30
|
+
config_param :record_time_key, :string, :default => 'time',
|
31
|
+
:desc => "Time field when time_source is 'record'"
|
32
|
+
config_param :time_format, :string, :default => nil,
|
33
|
+
:desc => "Time format to be used to parse 'time' field."
|
34
|
+
config_param :kafka_message_key, :string, :default => nil,
|
35
|
+
:desc => "Set kafka's message key to this field"
|
36
|
+
|
37
|
+
config_param :retry_emit_limit, :integer, :default => nil,
|
38
|
+
:desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
|
39
|
+
config_param :retry_wait_seconds, :integer, :default => 30
|
40
|
+
config_param :disable_retry_limit, :bool, :default => false,
|
41
|
+
:desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
|
42
|
+
config_param :retry_limit, :integer, :default => 10,
|
43
|
+
:desc => "The maximum number of retries for connecting kafka (default: 10)"
|
44
|
+
|
45
|
+
config_param :max_wait_time_ms, :integer, :default => 250,
|
46
|
+
:desc => "How long to block polls in milliseconds until the server sends us data."
|
47
|
+
config_param :max_batch_size, :integer, :default => 10000,
|
48
|
+
:desc => "Maximum number of log lines emitted in a single batch."
|
49
|
+
|
50
|
+
config_param :kafka_configs, :hash, :default => {},
|
51
|
+
:desc => "Kafka configuration properties as desribed in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
|
52
|
+
|
53
|
+
config_section :parse do
|
54
|
+
config_set_default :@type, 'json'
|
55
|
+
end
|
56
|
+
|
57
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
58
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
59
|
+
|
60
|
+
class ForShutdown < StandardError
|
61
|
+
end
|
62
|
+
|
63
|
+
BufferError = Fluent::Plugin::Buffer::BufferOverflowError
|
64
|
+
|
65
|
+
def initialize
|
66
|
+
super
|
67
|
+
|
68
|
+
@time_parser = nil
|
69
|
+
@retry_count = 1
|
70
|
+
end
|
71
|
+
|
72
|
+
def _config_to_array(config)
|
73
|
+
config_array = config.split(',').map {|k| k.strip }
|
74
|
+
if config_array.empty?
|
75
|
+
raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
|
76
|
+
end
|
77
|
+
config_array
|
78
|
+
end
|
79
|
+
|
80
|
+
def multi_workers_ready?
|
81
|
+
true
|
82
|
+
end
|
83
|
+
|
84
|
+
private :_config_to_array
|
85
|
+
|
86
|
+
def configure(conf)
|
87
|
+
compat_parameters_convert(conf, :parser)
|
88
|
+
|
89
|
+
super
|
90
|
+
|
91
|
+
log.warn "The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!"
|
92
|
+
|
93
|
+
log.info "Will watch for topics #{@topics} at brokers " \
|
94
|
+
"#{@kafka_configs["bootstrap.servers"]} and '#{@kafka_configs["group.id"]}' group"
|
95
|
+
|
96
|
+
@topics = _config_to_array(@topics)
|
97
|
+
|
98
|
+
parser_conf = conf.elements('parse').first
|
99
|
+
unless parser_conf
|
100
|
+
raise Fluent::ConfigError, "<parse> section or format parameter is required."
|
101
|
+
end
|
102
|
+
unless parser_conf["@type"]
|
103
|
+
raise Fluent::ConfigError, "parse/@type is required."
|
104
|
+
end
|
105
|
+
@parser_proc = setup_parser(parser_conf)
|
106
|
+
|
107
|
+
@time_source = :record if @use_record_time
|
108
|
+
|
109
|
+
if @time_source == :record and @time_format
|
110
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def setup_parser(parser_conf)
|
115
|
+
format = parser_conf["@type"]
|
116
|
+
case format
|
117
|
+
when 'json'
|
118
|
+
begin
|
119
|
+
require 'oj'
|
120
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
121
|
+
Proc.new { |msg| Oj.load(msg.payload) }
|
122
|
+
rescue LoadError
|
123
|
+
require 'yajl'
|
124
|
+
Proc.new { |msg| Yajl::Parser.parse(msg.payload) }
|
125
|
+
end
|
126
|
+
when 'ltsv'
|
127
|
+
require 'ltsv'
|
128
|
+
Proc.new { |msg| LTSV.parse(msg.payload, {:symbolize_keys => false}).first }
|
129
|
+
when 'msgpack'
|
130
|
+
require 'msgpack'
|
131
|
+
Proc.new { |msg| MessagePack.unpack(msg.payload) }
|
132
|
+
when 'text'
|
133
|
+
Proc.new { |msg| {@message_key => msg.payload} }
|
134
|
+
else
|
135
|
+
@custom_parser = parser_create(usage: 'in-rdkafka-plugin', conf: parser_conf)
|
136
|
+
Proc.new { |msg|
|
137
|
+
@custom_parser.parse(msg.payload) {|_time, record|
|
138
|
+
record
|
139
|
+
}
|
140
|
+
}
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def start
|
145
|
+
super
|
146
|
+
|
147
|
+
@consumer = setup_consumer
|
148
|
+
|
149
|
+
thread_create(:in_rdkafka_group, &method(:run))
|
150
|
+
end
|
151
|
+
|
152
|
+
def shutdown
|
153
|
+
# This nil assignment should be guarded by mutex in multithread programming manner.
|
154
|
+
# But the situation is very low contention, so we don't use mutex for now.
|
155
|
+
# If the problem happens, we will add a guard for consumer.
|
156
|
+
consumer = @consumer
|
157
|
+
@consumer = nil
|
158
|
+
consumer.close
|
159
|
+
|
160
|
+
super
|
161
|
+
end
|
162
|
+
|
163
|
+
def setup_consumer
|
164
|
+
consumer = Rdkafka::Config.new(@kafka_configs).consumer
|
165
|
+
consumer.subscribe(*@topics)
|
166
|
+
consumer
|
167
|
+
end
|
168
|
+
|
169
|
+
def reconnect_consumer
|
170
|
+
log.warn "Stopping Consumer"
|
171
|
+
consumer = @consumer
|
172
|
+
@consumer = nil
|
173
|
+
if consumer
|
174
|
+
consumer.close
|
175
|
+
end
|
176
|
+
log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
|
177
|
+
@retry_count = @retry_count + 1
|
178
|
+
sleep @retry_wait_seconds
|
179
|
+
@consumer = setup_consumer
|
180
|
+
log.warn "Re-starting consumer #{Time.now.to_s}"
|
181
|
+
@retry_count = 0
|
182
|
+
rescue =>e
|
183
|
+
log.error "unexpected error during re-starting consumer object access", :error => e.to_s
|
184
|
+
log.error_backtrace
|
185
|
+
if @retry_count <= @retry_limit or disable_retry_limit
|
186
|
+
reconnect_consumer
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
class Batch
|
191
|
+
attr_reader :topic
|
192
|
+
attr_reader :messages
|
193
|
+
|
194
|
+
def initialize(topic)
|
195
|
+
@topic = topic
|
196
|
+
@messages = []
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
# Executes the passed codeblock on a batch of messages.
|
201
|
+
# It is guaranteed that every message in a given batch belongs to the same topic, because the tagging logic in :run expects that property.
|
202
|
+
# The number of maximum messages in a batch is capped by the :max_batch_size configuration value. It ensures that consuming from a single
|
203
|
+
# topic for a long time (e.g. with `auto.offset.reset` set to `earliest`) does not lead to memory exhaustion. Also, calling consumer.poll
|
204
|
+
# advances thes consumer offset, so in case the process crashes we might lose at most :max_batch_size messages.
|
205
|
+
def each_batch(&block)
|
206
|
+
batch = nil
|
207
|
+
message = nil
|
208
|
+
while @consumer
|
209
|
+
message = @consumer.poll(@max_wait_time_ms)
|
210
|
+
if message
|
211
|
+
if not batch
|
212
|
+
batch = Batch.new(message.topic)
|
213
|
+
elsif batch.topic != message.topic || batch.messages.size >= @max_batch_size
|
214
|
+
yield batch
|
215
|
+
batch = Batch.new(message.topic)
|
216
|
+
end
|
217
|
+
batch.messages << message
|
218
|
+
else
|
219
|
+
yield batch if batch
|
220
|
+
batch = nil
|
221
|
+
end
|
222
|
+
end
|
223
|
+
yield batch if batch
|
224
|
+
end
|
225
|
+
|
226
|
+
def run
|
227
|
+
while @consumer
|
228
|
+
begin
|
229
|
+
each_batch { |batch|
|
230
|
+
log.debug "A new batch for topic #{batch.topic} with #{batch.messages.size} messages"
|
231
|
+
es = Fluent::MultiEventStream.new
|
232
|
+
tag = batch.topic
|
233
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
234
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
235
|
+
|
236
|
+
batch.messages.each { |msg|
|
237
|
+
begin
|
238
|
+
record = @parser_proc.call(msg)
|
239
|
+
case @time_source
|
240
|
+
when :kafka
|
241
|
+
record_time = Fluent::EventTime.from_time(msg.timestamp)
|
242
|
+
when :now
|
243
|
+
record_time = Fluent::Engine.now
|
244
|
+
when :record
|
245
|
+
if @time_format
|
246
|
+
record_time = @time_parser.parse(record[@record_time_key].to_s)
|
247
|
+
else
|
248
|
+
record_time = record[@record_time_key]
|
249
|
+
end
|
250
|
+
else
|
251
|
+
log.fatal "BUG: invalid time_source: #{@time_source}"
|
252
|
+
end
|
253
|
+
if @kafka_message_key
|
254
|
+
record[@kafka_message_key] = msg.key
|
255
|
+
end
|
256
|
+
if @add_headers
|
257
|
+
msg.headers.each_pair { |k, v|
|
258
|
+
record[k] = v
|
259
|
+
}
|
260
|
+
end
|
261
|
+
es.add(record_time, record)
|
262
|
+
rescue => e
|
263
|
+
log.warn "parser error in #{msg.topic}/#{msg.partition}", :error => e.to_s, :value => msg.payload, :offset => msg.offset
|
264
|
+
log.debug_backtrace
|
265
|
+
end
|
266
|
+
}
|
267
|
+
|
268
|
+
unless es.empty?
|
269
|
+
emit_events(tag, es)
|
270
|
+
end
|
271
|
+
}
|
272
|
+
rescue ForShutdown
|
273
|
+
rescue => e
|
274
|
+
log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
|
275
|
+
log.error_backtrace
|
276
|
+
reconnect_consumer
|
277
|
+
end
|
278
|
+
end
|
279
|
+
rescue => e
|
280
|
+
log.error "unexpected error during consumer object access", :error => e.to_s
|
281
|
+
log.error_backtrace
|
282
|
+
end
|
283
|
+
|
284
|
+
def emit_events(tag, es)
|
285
|
+
retries = 0
|
286
|
+
begin
|
287
|
+
router.emit_stream(tag, es)
|
288
|
+
rescue BufferError
|
289
|
+
raise ForShutdown if @consumer.nil?
|
290
|
+
|
291
|
+
if @retry_emit_limit.nil?
|
292
|
+
sleep 1
|
293
|
+
retry
|
294
|
+
end
|
295
|
+
|
296
|
+
if retries < @retry_emit_limit
|
297
|
+
retries += 1
|
298
|
+
sleep 1
|
299
|
+
retry
|
300
|
+
else
|
301
|
+
raise RuntimeError, "Exceeds retry_emit_limit"
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|
@@ -128,7 +128,7 @@ DESC
|
|
128
128
|
@seed_brokers = @brokers
|
129
129
|
log.info "brokers has been set: #{@seed_brokers}"
|
130
130
|
else
|
131
|
-
raise Fluent::
|
131
|
+
raise Fluent::ConfigError, 'No brokers specified. Need one broker at least.'
|
132
132
|
end
|
133
133
|
|
134
134
|
formatter_conf = conf.elements('format').first
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidemasa Togashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-01-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -93,6 +93,20 @@ dependencies:
|
|
93
93
|
- - ">="
|
94
94
|
- !ruby/object:Gem::Version
|
95
95
|
version: 3.0.8
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: webrick
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
type: :development
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
96
110
|
description: Fluentd plugin for Apache Kafka > 0.8
|
97
111
|
email:
|
98
112
|
- togachiro@gmail.com
|
@@ -101,6 +115,7 @@ executables: []
|
|
101
115
|
extensions: []
|
102
116
|
extra_rdoc_files: []
|
103
117
|
files:
|
118
|
+
- ".github/workflows/linux.yml"
|
104
119
|
- ".gitignore"
|
105
120
|
- ".travis.yml"
|
106
121
|
- ChangeLog
|
@@ -111,6 +126,7 @@ files:
|
|
111
126
|
- fluent-plugin-kafka.gemspec
|
112
127
|
- lib/fluent/plugin/in_kafka.rb
|
113
128
|
- lib/fluent/plugin/in_kafka_group.rb
|
129
|
+
- lib/fluent/plugin/in_rdkafka_group.rb
|
114
130
|
- lib/fluent/plugin/kafka_plugin_util.rb
|
115
131
|
- lib/fluent/plugin/kafka_producer_ext.rb
|
116
132
|
- lib/fluent/plugin/out_kafka.rb
|