fluent-plugin-kafka 0.14.2 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +5 -0
- data/README.md +33 -0
- data/fluent-plugin-kafka.gemspec +1 -1
- data/lib/fluent/plugin/in_kafka.rb +4 -3
- data/lib/fluent/plugin/in_rdkafka_group.rb +284 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43c3a759f4636168c932c33f45c38105ebb522b5ea8222f1b1a7eceb53343348
|
4
|
+
data.tar.gz: c64a103244e721fa2de124f466f2480c960daafc713fd16f685ea4dd4a545a3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 707d92f2a23041b53daf6410d3fadb0e84053c4eb250b20c6dd3c72a15969273d2279b71950334187d156767bf6646a0af468a0f84e85ca683a34c127e47e363
|
7
|
+
data.tar.gz: 978883c8a72152bb6b9262ccea4e6b65b91bca1a3907ea43a7930cf7b4d414f1a9f47cb593d420738a48bd47d86451f376fe4cc6e7dec6b4f2c4e81ad5213d00
|
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -141,6 +141,39 @@ See also [ruby-kafka README](https://github.com/zendesk/ruby-kafka#consuming-mes
|
|
141
141
|
|
142
142
|
Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
|
143
143
|
|
144
|
+
### Input plugin (@type 'rdkafka_group', supports kafka consumer groups, uses rdkafka-ruby)
|
145
|
+
|
146
|
+
:warning: **The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!**
|
147
|
+
|
148
|
+
With the introduction of the rdkafka-ruby based input plugin we hope to support Kafka brokers above version 2.1 where we saw [compatibility issues](https://github.com/fluent/fluent-plugin-kafka/issues/315) when using the ruby-kafka based @kafka_group input type. The rdkafka-ruby lib wraps the highly performant and production ready librdkafka C lib.
|
149
|
+
|
150
|
+
<source>
|
151
|
+
@type rdkafka_group
|
152
|
+
topics <listening topics(separate with comma',')>
|
153
|
+
format <input text type (text|json|ltsv|msgpack)> :default => json
|
154
|
+
message_key <key (Optional, for text format only, default is message)>
|
155
|
+
kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
|
156
|
+
add_headers <If true, add kafka's message headers to record>
|
157
|
+
add_prefix <tag prefix (Optional)>
|
158
|
+
add_suffix <tag suffix (Optional)>
|
159
|
+
retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
|
160
|
+
use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
|
161
|
+
time_source <source for message timestamp (now|kafka|record)> :default => now
|
162
|
+
time_format <string (Optional when use_record_time is used)>
|
163
|
+
|
164
|
+
# kafka consumer options
|
165
|
+
max_wait_time_ms 500
|
166
|
+
max_batch_size 10000
|
167
|
+
kafka_configs {
|
168
|
+
"bootstrap.servers": "brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>",
|
169
|
+
"group.id": "<consumer group name>"
|
170
|
+
}
|
171
|
+
</source>
|
172
|
+
|
173
|
+
See also [rdkafka-ruby](https://github.com/appsignal/rdkafka-ruby) and [librdkafka](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for more detailed documentation about Kafka consumer options.
|
174
|
+
|
175
|
+
Consuming topic name is used for event tag. So when the target topic name is `app_event`, the tag is `app_event`. If you want to modify tag, use `add_prefix` or `add_suffix` parameter. With `add_prefix kafka`, the tag is `kafka.app_event`.
|
176
|
+
|
144
177
|
### Output plugin
|
145
178
|
|
146
179
|
This `kafka2` plugin is for fluentd v1 or later. This plugin uses `ruby-kafka` producer for writing data.
|
data/fluent-plugin-kafka.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
14
|
gem.name = "fluent-plugin-kafka"
|
15
15
|
gem.require_paths = ["lib"]
|
16
|
-
gem.version = '0.
|
16
|
+
gem.version = '0.15.0'
|
17
17
|
gem.required_ruby_version = ">= 2.1.0"
|
18
18
|
|
19
19
|
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
@@ -188,16 +188,17 @@ class Fluent::KafkaInput < Fluent::Input
|
|
188
188
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
189
189
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
190
190
|
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
|
191
|
-
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
|
191
|
+
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
192
192
|
elsif @username != nil && @password != nil
|
193
193
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
194
194
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
195
195
|
ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
|
196
|
-
sasl_over_ssl: @sasl_over_ssl)
|
196
|
+
sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
197
197
|
else
|
198
198
|
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
199
199
|
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
200
|
-
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab
|
200
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
|
201
|
+
ssl_verify_hostname: @ssl_verify_hostname)
|
201
202
|
end
|
202
203
|
|
203
204
|
@zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
|
@@ -0,0 +1,284 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
require 'rdkafka'
|
6
|
+
|
7
|
+
class Fluent::Plugin::RdKafkaGroupInput < Fluent::Plugin::Input
|
8
|
+
Fluent::Plugin.register_input('rdkafka_group', self)
|
9
|
+
|
10
|
+
helpers :thread
|
11
|
+
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
|
15
|
+
config_param :format, :string, :default => 'json',
|
16
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
17
|
+
config_param :message_key, :string, :default => 'message',
|
18
|
+
:desc => "For 'text' format only."
|
19
|
+
config_param :add_headers, :bool, :default => false,
|
20
|
+
:desc => "Add kafka's message headers to event record"
|
21
|
+
config_param :add_prefix, :string, :default => nil,
|
22
|
+
:desc => "Tag prefix (Optional)"
|
23
|
+
config_param :add_suffix, :string, :default => nil,
|
24
|
+
:desc => "Tag suffix (Optional)"
|
25
|
+
config_param :use_record_time, :bool, :default => false,
|
26
|
+
:desc => "Replace message timestamp with contents of 'time' field.",
|
27
|
+
:deprecated => "Use 'time_source record' instead."
|
28
|
+
config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
|
29
|
+
:desc => "Source for message timestamp."
|
30
|
+
config_param :record_time_key, :string, :default => 'time',
|
31
|
+
:desc => "Time field when time_source is 'record'"
|
32
|
+
config_param :time_format, :string, :default => nil,
|
33
|
+
:desc => "Time format to be used to parse 'time' field."
|
34
|
+
config_param :kafka_message_key, :string, :default => nil,
|
35
|
+
:desc => "Set kafka's message key to this field"
|
36
|
+
|
37
|
+
config_param :retry_emit_limit, :integer, :default => nil,
|
38
|
+
:desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
|
39
|
+
config_param :retry_wait_seconds, :integer, :default => 30
|
40
|
+
config_param :disable_retry_limit, :bool, :default => false,
|
41
|
+
:desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
|
42
|
+
config_param :retry_limit, :integer, :default => 10,
|
43
|
+
:desc => "The maximum number of retries for connecting kafka (default: 10)"
|
44
|
+
|
45
|
+
config_param :max_wait_time_ms, :integer, :default => 250,
|
46
|
+
:desc => "How long to block polls in milliseconds until the server sends us data."
|
47
|
+
config_param :max_batch_size, :integer, :default => 10000,
|
48
|
+
:desc => "Maximum number of log lines emitted in a single batch."
|
49
|
+
|
50
|
+
config_param :kafka_configs, :hash, :default => {},
|
51
|
+
:desc => "Kafka configuration properties as desribed in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
|
52
|
+
|
53
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
54
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
55
|
+
|
56
|
+
class ForShutdown < StandardError
|
57
|
+
end
|
58
|
+
|
59
|
+
BufferError = Fluent::Plugin::Buffer::BufferOverflowError
|
60
|
+
|
61
|
+
def initialize
|
62
|
+
super
|
63
|
+
|
64
|
+
@time_parser = nil
|
65
|
+
@retry_count = 1
|
66
|
+
end
|
67
|
+
|
68
|
+
def _config_to_array(config)
|
69
|
+
config_array = config.split(',').map {|k| k.strip }
|
70
|
+
if config_array.empty?
|
71
|
+
raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
|
72
|
+
end
|
73
|
+
config_array
|
74
|
+
end
|
75
|
+
|
76
|
+
def multi_workers_ready?
|
77
|
+
true
|
78
|
+
end
|
79
|
+
|
80
|
+
private :_config_to_array
|
81
|
+
|
82
|
+
def configure(conf)
|
83
|
+
super
|
84
|
+
|
85
|
+
log.warn "The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!"
|
86
|
+
|
87
|
+
log.info "Will watch for topics #{@topics} at brokers " \
|
88
|
+
"#{@kafka_configs["bootstrap.servers"]} and '#{@kafka_configs["group.id"]}' group"
|
89
|
+
|
90
|
+
@topics = _config_to_array(@topics)
|
91
|
+
|
92
|
+
@parser_proc = setup_parser
|
93
|
+
|
94
|
+
@time_source = :record if @use_record_time
|
95
|
+
|
96
|
+
if @time_source == :record and @time_format
|
97
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def setup_parser
|
102
|
+
case @format
|
103
|
+
when 'json'
|
104
|
+
begin
|
105
|
+
require 'oj'
|
106
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
107
|
+
Proc.new { |msg| Oj.load(msg.payload) }
|
108
|
+
rescue LoadError
|
109
|
+
require 'yajl'
|
110
|
+
Proc.new { |msg| Yajl::Parser.parse(msg.payload) }
|
111
|
+
end
|
112
|
+
when 'ltsv'
|
113
|
+
require 'ltsv'
|
114
|
+
Proc.new { |msg| LTSV.parse(msg.payload, {:symbolize_keys => false}).first }
|
115
|
+
when 'msgpack'
|
116
|
+
require 'msgpack'
|
117
|
+
Proc.new { |msg| MessagePack.unpack(msg.payload) }
|
118
|
+
when 'text'
|
119
|
+
Proc.new { |msg| {@message_key => msg.payload} }
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def start
|
124
|
+
super
|
125
|
+
|
126
|
+
@consumer = setup_consumer
|
127
|
+
|
128
|
+
thread_create(:in_rdkafka_group, &method(:run))
|
129
|
+
end
|
130
|
+
|
131
|
+
def shutdown
|
132
|
+
# This nil assignment should be guarded by mutex in multithread programming manner.
|
133
|
+
# But the situation is very low contention, so we don't use mutex for now.
|
134
|
+
# If the problem happens, we will add a guard for consumer.
|
135
|
+
consumer = @consumer
|
136
|
+
@consumer = nil
|
137
|
+
consumer.close
|
138
|
+
|
139
|
+
super
|
140
|
+
end
|
141
|
+
|
142
|
+
def setup_consumer
|
143
|
+
consumer = Rdkafka::Config.new(@kafka_configs).consumer
|
144
|
+
consumer.subscribe(*@topics)
|
145
|
+
consumer
|
146
|
+
end
|
147
|
+
|
148
|
+
def reconnect_consumer
|
149
|
+
log.warn "Stopping Consumer"
|
150
|
+
consumer = @consumer
|
151
|
+
@consumer = nil
|
152
|
+
if consumer
|
153
|
+
consumer.close
|
154
|
+
end
|
155
|
+
log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
|
156
|
+
@retry_count = @retry_count + 1
|
157
|
+
sleep @retry_wait_seconds
|
158
|
+
@consumer = setup_consumer
|
159
|
+
log.warn "Re-starting consumer #{Time.now.to_s}"
|
160
|
+
@retry_count = 0
|
161
|
+
rescue =>e
|
162
|
+
log.error "unexpected error during re-starting consumer object access", :error => e.to_s
|
163
|
+
log.error_backtrace
|
164
|
+
if @retry_count <= @retry_limit or disable_retry_limit
|
165
|
+
reconnect_consumer
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class Batch
|
170
|
+
attr_reader :topic
|
171
|
+
attr_reader :messages
|
172
|
+
|
173
|
+
def initialize(topic)
|
174
|
+
@topic = topic
|
175
|
+
@messages = []
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Executes the passed codeblock on a batch of messages.
|
180
|
+
# It is guaranteed that every message in a given batch belongs to the same topic, because the tagging logic in :run expects that property.
|
181
|
+
# The number of maximum messages in a batch is capped by the :max_batch_size configuration value. It ensures that consuming from a single
|
182
|
+
# topic for a long time (e.g. with `auto.offset.reset` set to `earliest`) does not lead to memory exhaustion. Also, calling consumer.poll
|
183
|
+
# advances thes consumer offset, so in case the process crashes we might lose at most :max_batch_size messages.
|
184
|
+
def each_batch(&block)
|
185
|
+
batch = nil
|
186
|
+
message = nil
|
187
|
+
while @consumer
|
188
|
+
message = @consumer.poll(@max_wait_time_ms)
|
189
|
+
if message
|
190
|
+
if not batch
|
191
|
+
batch = Batch.new(message.topic)
|
192
|
+
elsif batch.topic != message.topic || batch.messages.size >= @max_batch_size
|
193
|
+
yield batch
|
194
|
+
batch = Batch.new(message.topic)
|
195
|
+
end
|
196
|
+
batch.messages << message
|
197
|
+
else
|
198
|
+
yield batch if batch
|
199
|
+
batch = nil
|
200
|
+
end
|
201
|
+
end
|
202
|
+
yield batch if batch
|
203
|
+
end
|
204
|
+
|
205
|
+
def run
|
206
|
+
while @consumer
|
207
|
+
begin
|
208
|
+
each_batch { |batch|
|
209
|
+
log.debug "A new batch for topic #{batch.topic} with #{batch.messages.size} messages"
|
210
|
+
es = Fluent::MultiEventStream.new
|
211
|
+
tag = batch.topic
|
212
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
213
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
214
|
+
|
215
|
+
batch.messages.each { |msg|
|
216
|
+
begin
|
217
|
+
record = @parser_proc.call(msg)
|
218
|
+
case @time_source
|
219
|
+
when :kafka
|
220
|
+
record_time = Fluent::EventTime.from_time(msg.timestamp)
|
221
|
+
when :now
|
222
|
+
record_time = Fluent::Engine.now
|
223
|
+
when :record
|
224
|
+
if @time_format
|
225
|
+
record_time = @time_parser.parse(record[@record_time_key].to_s)
|
226
|
+
else
|
227
|
+
record_time = record[@record_time_key]
|
228
|
+
end
|
229
|
+
else
|
230
|
+
log.fatal "BUG: invalid time_source: #{@time_source}"
|
231
|
+
end
|
232
|
+
if @kafka_message_key
|
233
|
+
record[@kafka_message_key] = msg.key
|
234
|
+
end
|
235
|
+
if @add_headers
|
236
|
+
msg.headers.each_pair { |k, v|
|
237
|
+
record[k] = v
|
238
|
+
}
|
239
|
+
end
|
240
|
+
es.add(record_time, record)
|
241
|
+
rescue => e
|
242
|
+
log.warn "parser error in #{msg.topic}/#{msg.partition}", :error => e.to_s, :value => msg.payload, :offset => msg.offset
|
243
|
+
log.debug_backtrace
|
244
|
+
end
|
245
|
+
}
|
246
|
+
|
247
|
+
unless es.empty?
|
248
|
+
emit_events(tag, es)
|
249
|
+
end
|
250
|
+
}
|
251
|
+
rescue ForShutdown
|
252
|
+
rescue => e
|
253
|
+
log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
|
254
|
+
log.error_backtrace
|
255
|
+
reconnect_consumer
|
256
|
+
end
|
257
|
+
end
|
258
|
+
rescue => e
|
259
|
+
log.error "unexpected error during consumer object access", :error => e.to_s
|
260
|
+
log.error_backtrace
|
261
|
+
end
|
262
|
+
|
263
|
+
def emit_events(tag, es)
|
264
|
+
retries = 0
|
265
|
+
begin
|
266
|
+
router.emit_stream(tag, es)
|
267
|
+
rescue BufferError
|
268
|
+
raise ForShutdown if @consumer.nil?
|
269
|
+
|
270
|
+
if @retry_emit_limit.nil?
|
271
|
+
sleep 1
|
272
|
+
retry
|
273
|
+
end
|
274
|
+
|
275
|
+
if retries < @retry_emit_limit
|
276
|
+
retries += 1
|
277
|
+
sleep 1
|
278
|
+
retry
|
279
|
+
else
|
280
|
+
raise RuntimeError, "Exceeds retry_emit_limit"
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidemasa Togashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2020-
|
12
|
+
date: 2020-09-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- fluent-plugin-kafka.gemspec
|
112
112
|
- lib/fluent/plugin/in_kafka.rb
|
113
113
|
- lib/fluent/plugin/in_kafka_group.rb
|
114
|
+
- lib/fluent/plugin/in_rdkafka_group.rb
|
114
115
|
- lib/fluent/plugin/kafka_plugin_util.rb
|
115
116
|
- lib/fluent/plugin/kafka_producer_ext.rb
|
116
117
|
- lib/fluent/plugin/out_kafka.rb
|