fluent-plugin-kafka-xst 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
- data/.github/ISSUE_TEMPLATE/config.yml +5 -0
- data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/linux.yml +45 -0
- data/.github/workflows/stale-actions.yml +24 -0
- data/.gitignore +2 -0
- data/ChangeLog +344 -0
- data/Gemfile +6 -0
- data/LICENSE +14 -0
- data/README.md +594 -0
- data/Rakefile +12 -0
- data/ci/prepare-kafka-server.sh +33 -0
- data/examples/README.md +3 -0
- data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
- data/examples/out_kafka2/protobuf-formatter.conf +23 -0
- data/examples/out_kafka2/record_key.conf +31 -0
- data/fluent-plugin-kafka.gemspec +27 -0
- data/lib/fluent/plugin/in_kafka.rb +388 -0
- data/lib/fluent/plugin/in_kafka_group.rb +394 -0
- data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
- data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
- data/lib/fluent/plugin/out_kafka.rb +268 -0
- data/lib/fluent/plugin/out_kafka2.rb +427 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
- data/lib/fluent/plugin/out_rdkafka.rb +324 -0
- data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
- data/test/helper.rb +34 -0
- data/test/plugin/test_in_kafka.rb +66 -0
- data/test/plugin/test_in_kafka_group.rb +69 -0
- data/test/plugin/test_kafka_plugin_util.rb +44 -0
- data/test/plugin/test_out_kafka.rb +68 -0
- data/test/plugin/test_out_kafka2.rb +138 -0
- data/test/plugin/test_out_kafka_buffered.rb +68 -0
- data/test/plugin/test_out_rdkafka2.rb +182 -0
- metadata +214 -0
@@ -0,0 +1,394 @@
|
|
1
|
+
require 'fluent/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
class Fluent::KafkaGroupInput < Fluent::Input
|
6
|
+
Fluent::Plugin.register_input('kafka_group', self)
|
7
|
+
|
8
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
9
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
10
|
+
config_param :consumer_group, :string,
|
11
|
+
:desc => "Consumer group name, must set."
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
config_param :client_id, :string, :default => 'kafka'
|
15
|
+
config_param :sasl_over_ssl, :bool, :default => true,
|
16
|
+
:desc => "Set to false to prevent SSL strict mode when using SASL authentication"
|
17
|
+
config_param :format, :string, :default => 'json',
|
18
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
19
|
+
config_param :message_key, :string, :default => 'message',
|
20
|
+
:desc => "For 'text' format only."
|
21
|
+
config_param :add_headers, :bool, :default => false,
|
22
|
+
:desc => "Add kafka's message headers to event record"
|
23
|
+
config_param :add_prefix, :string, :default => nil,
|
24
|
+
:desc => "Tag prefix (Optional)"
|
25
|
+
config_param :add_suffix, :string, :default => nil,
|
26
|
+
:desc => "Tag suffix (Optional)"
|
27
|
+
config_param :retry_emit_limit, :integer, :default => nil,
|
28
|
+
:desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
|
29
|
+
config_param :use_record_time, :bool, :default => false,
|
30
|
+
:desc => "Replace message timestamp with contents of 'time' field.",
|
31
|
+
:deprecated => "Use 'time_source record' instead."
|
32
|
+
config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
|
33
|
+
:desc => "Source for message timestamp."
|
34
|
+
config_param :record_time_key, :string, :default => 'time',
|
35
|
+
:desc => "Time field when time_source is 'record'"
|
36
|
+
config_param :get_kafka_client_log, :bool, :default => false
|
37
|
+
config_param :time_format, :string, :default => nil,
|
38
|
+
:desc => "Time format to be used to parse 'time' field."
|
39
|
+
config_param :tag_source, :enum, :list => [:topic, :record], :default => :topic,
|
40
|
+
:desc => "Source for the fluentd event tag"
|
41
|
+
config_param :record_tag_key, :string, :default => 'tag',
|
42
|
+
:desc => "Tag field when tag_source is 'record'"
|
43
|
+
config_param :kafka_message_key, :string, :default => nil,
|
44
|
+
:desc => "Set kafka's message key to this field"
|
45
|
+
config_param :connect_timeout, :integer, :default => nil,
|
46
|
+
:desc => "[Integer, nil] the timeout setting for connecting to brokers"
|
47
|
+
config_param :socket_timeout, :integer, :default => nil,
|
48
|
+
:desc => "[Integer, nil] the timeout setting for socket connection"
|
49
|
+
|
50
|
+
config_param :retry_wait_seconds, :integer, :default => 30
|
51
|
+
config_param :disable_retry_limit, :bool, :default => false,
|
52
|
+
:desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
|
53
|
+
config_param :retry_limit, :integer, :default => 10,
|
54
|
+
:desc => "The maximum number of retries for connecting kafka (default: 10)"
|
55
|
+
# Kafka consumer options
|
56
|
+
config_param :max_bytes, :integer, :default => 1048576,
|
57
|
+
:desc => "Maximum number of bytes to fetch."
|
58
|
+
config_param :max_wait_time, :integer, :default => nil,
|
59
|
+
:desc => "How long to block until the server sends us data."
|
60
|
+
config_param :min_bytes, :integer, :default => nil,
|
61
|
+
:desc => "Smallest amount of data the server should send us."
|
62
|
+
config_param :session_timeout, :integer, :default => nil,
|
63
|
+
:desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
|
64
|
+
config_param :offset_commit_interval, :integer, :default => nil,
|
65
|
+
:desc => "The interval between offset commits, in seconds"
|
66
|
+
config_param :offset_commit_threshold, :integer, :default => nil,
|
67
|
+
:desc => "The number of messages that can be processed before their offsets are committed"
|
68
|
+
config_param :fetcher_max_queue_size, :integer, :default => nil,
|
69
|
+
:desc => "The number of fetched messages per partition that are queued in fetcher queue"
|
70
|
+
config_param :refresh_topic_interval, :integer, :default => nil,
|
71
|
+
:desc => "The interval of refreshing the topic list in seconds. Zero or unset disables this"
|
72
|
+
config_param :start_from_beginning, :bool, :default => true,
|
73
|
+
:desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
|
74
|
+
|
75
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
76
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
77
|
+
|
78
|
+
class ForShutdown < StandardError
|
79
|
+
end
|
80
|
+
|
81
|
+
BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
|
82
|
+
Fluent::Plugin::Buffer::BufferOverflowError
|
83
|
+
else
|
84
|
+
Fluent::BufferQueueLimitError
|
85
|
+
end
|
86
|
+
|
87
|
+
unless method_defined?(:router)
|
88
|
+
define_method("router") { Fluent::Engine }
|
89
|
+
end
|
90
|
+
|
91
|
+
def initialize
|
92
|
+
super
|
93
|
+
require 'kafka'
|
94
|
+
|
95
|
+
@time_parser = nil
|
96
|
+
@retry_count = 1
|
97
|
+
end
|
98
|
+
|
99
|
+
def _config_to_array(config)
|
100
|
+
config_array = config.split(',').map {|k| k.strip }
|
101
|
+
if config_array.empty?
|
102
|
+
raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
|
103
|
+
end
|
104
|
+
config_array
|
105
|
+
end
|
106
|
+
|
107
|
+
def multi_workers_ready?
|
108
|
+
true
|
109
|
+
end
|
110
|
+
|
111
|
+
private :_config_to_array
|
112
|
+
|
113
|
+
def configure(conf)
|
114
|
+
super
|
115
|
+
|
116
|
+
$log.info "Will watch for topics #{@topics} at brokers " \
|
117
|
+
"#{@brokers} and '#{@consumer_group}' group"
|
118
|
+
|
119
|
+
@topics = _config_to_array(@topics)
|
120
|
+
|
121
|
+
if conf['max_wait_ms']
|
122
|
+
log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
|
123
|
+
@max_wait_time = conf['max_wait_ms'].to_i / 1000
|
124
|
+
end
|
125
|
+
|
126
|
+
@parser_proc = setup_parser(conf)
|
127
|
+
|
128
|
+
@consumer_opts = {:group_id => @consumer_group}
|
129
|
+
@consumer_opts[:session_timeout] = @session_timeout if @session_timeout
|
130
|
+
@consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
|
131
|
+
@consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
|
132
|
+
@consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
|
133
|
+
@consumer_opts[:refresh_topic_interval] = @refresh_topic_interval if @refresh_topic_interval
|
134
|
+
|
135
|
+
@fetch_opts = {}
|
136
|
+
@fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
|
137
|
+
@fetch_opts[:min_bytes] = @min_bytes if @min_bytes
|
138
|
+
|
139
|
+
@time_source = :record if @use_record_time
|
140
|
+
|
141
|
+
if @time_source == :record and @time_format
|
142
|
+
if defined?(Fluent::TimeParser)
|
143
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
144
|
+
else
|
145
|
+
@time_parser = Fluent::TextParser::TimeParser.new(@time_format)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
if @time_source == :record && defined?(Fluent::NumericTimeParser)
|
150
|
+
@float_numeric_parse = Fluent::NumericTimeParser.new(:float)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def setup_parser(conf)
|
155
|
+
case @format
|
156
|
+
when 'json'
|
157
|
+
begin
|
158
|
+
require 'oj'
|
159
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
160
|
+
Proc.new { |msg| Oj.load(msg.value) }
|
161
|
+
rescue LoadError
|
162
|
+
require 'yajl'
|
163
|
+
Proc.new { |msg| Yajl::Parser.parse(msg.value) }
|
164
|
+
end
|
165
|
+
when 'ltsv'
|
166
|
+
require 'ltsv'
|
167
|
+
Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
|
168
|
+
when 'msgpack'
|
169
|
+
require 'msgpack'
|
170
|
+
Proc.new { |msg| MessagePack.unpack(msg.value) }
|
171
|
+
when 'text'
|
172
|
+
Proc.new { |msg| {@message_key => msg.value} }
|
173
|
+
else
|
174
|
+
@custom_parser = Fluent::Plugin.new_parser(conf['format'])
|
175
|
+
@custom_parser.configure(conf)
|
176
|
+
Proc.new { |msg|
|
177
|
+
@custom_parser.parse(msg.value) {|_time, record|
|
178
|
+
record
|
179
|
+
}
|
180
|
+
}
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def start
|
185
|
+
super
|
186
|
+
|
187
|
+
logger = @get_kafka_client_log ? log : nil
|
188
|
+
if @scram_mechanism != nil && @username != nil && @password != nil
|
189
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
|
190
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
191
|
+
ssl_client_cert_key_password: @ssl_client_cert_key_password,
|
192
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
|
193
|
+
sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
194
|
+
elsif @username != nil && @password != nil
|
195
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
|
196
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
197
|
+
ssl_client_cert_key_password: @ssl_client_cert_key_password,
|
198
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password,
|
199
|
+
sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
|
200
|
+
else
|
201
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
|
202
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
203
|
+
ssl_client_cert_key_password: @ssl_client_cert_key_password,
|
204
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
|
205
|
+
ssl_verify_hostname: @ssl_verify_hostname)
|
206
|
+
end
|
207
|
+
|
208
|
+
@consumer = setup_consumer
|
209
|
+
@thread = Thread.new(&method(:run))
|
210
|
+
end
|
211
|
+
|
212
|
+
def shutdown
|
213
|
+
# This nil assignment should be guarded by mutex in multithread programming manner.
|
214
|
+
# But the situation is very low contention, so we don't use mutex for now.
|
215
|
+
# If the problem happens, we will add a guard for consumer.
|
216
|
+
consumer = @consumer
|
217
|
+
@consumer = nil
|
218
|
+
consumer.stop
|
219
|
+
|
220
|
+
@thread.join
|
221
|
+
@kafka.close
|
222
|
+
super
|
223
|
+
end
|
224
|
+
|
225
|
+
def setup_consumer
|
226
|
+
consumer = @kafka.consumer(**@consumer_opts)
|
227
|
+
@topics.each { |topic|
|
228
|
+
if m = /^\/(.+)\/$/.match(topic)
|
229
|
+
topic_or_regex = Regexp.new(m[1])
|
230
|
+
$log.info "Subscribe to topics matching the regex #{topic}"
|
231
|
+
else
|
232
|
+
topic_or_regex = topic
|
233
|
+
$log.info "Subscribe to topic #{topic}"
|
234
|
+
end
|
235
|
+
consumer.subscribe(topic_or_regex, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
|
236
|
+
}
|
237
|
+
consumer
|
238
|
+
end
|
239
|
+
|
240
|
+
def reconnect_consumer
|
241
|
+
log.warn "Stopping Consumer"
|
242
|
+
consumer = @consumer
|
243
|
+
@consumer = nil
|
244
|
+
if consumer
|
245
|
+
consumer.stop
|
246
|
+
end
|
247
|
+
log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
|
248
|
+
@retry_count = @retry_count + 1
|
249
|
+
sleep @retry_wait_seconds
|
250
|
+
@consumer = setup_consumer
|
251
|
+
log.warn "Re-starting consumer #{Time.now.to_s}"
|
252
|
+
@retry_count = 0
|
253
|
+
rescue =>e
|
254
|
+
log.error "unexpected error during re-starting consumer object access", :error => e.to_s
|
255
|
+
log.error_backtrace
|
256
|
+
if @retry_count <= @retry_limit or disable_retry_limit
|
257
|
+
reconnect_consumer
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def process_batch_with_record_tag(batch)
|
262
|
+
es = {}
|
263
|
+
batch.messages.each { |msg|
|
264
|
+
begin
|
265
|
+
record = @parser_proc.call(msg)
|
266
|
+
tag = record[@record_tag_key]
|
267
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
268
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
269
|
+
es[tag] ||= Fluent::MultiEventStream.new
|
270
|
+
case @time_source
|
271
|
+
when :kafka
|
272
|
+
record_time = Fluent::EventTime.from_time(msg.create_time)
|
273
|
+
when :now
|
274
|
+
record_time = Fluent::Engine.now
|
275
|
+
when :record
|
276
|
+
if @time_format
|
277
|
+
record_time = @time_parser.parse(record[@record_time_key].to_s)
|
278
|
+
else
|
279
|
+
record_time = record[@record_time_key]
|
280
|
+
end
|
281
|
+
else
|
282
|
+
log.fatal "BUG: invalid time_source: #{@time_source}"
|
283
|
+
end
|
284
|
+
if @kafka_message_key
|
285
|
+
record[@kafka_message_key] = msg.key
|
286
|
+
end
|
287
|
+
if @add_headers
|
288
|
+
msg.headers.each_pair { |k, v|
|
289
|
+
record[k] = v
|
290
|
+
}
|
291
|
+
end
|
292
|
+
es[tag].add(record_time, record)
|
293
|
+
rescue => e
|
294
|
+
log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
295
|
+
log.debug_backtrace
|
296
|
+
end
|
297
|
+
}
|
298
|
+
|
299
|
+
unless es.empty?
|
300
|
+
es.each { |tag,es|
|
301
|
+
emit_events(tag, es)
|
302
|
+
}
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
def process_batch(batch)
|
307
|
+
es = Fluent::MultiEventStream.new
|
308
|
+
tag = batch.topic
|
309
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
310
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
311
|
+
|
312
|
+
batch.messages.each { |msg|
|
313
|
+
begin
|
314
|
+
record = @parser_proc.call(msg)
|
315
|
+
case @time_source
|
316
|
+
when :kafka
|
317
|
+
record_time = Fluent::EventTime.from_time(msg.create_time)
|
318
|
+
when :now
|
319
|
+
record_time = Fluent::Engine.now
|
320
|
+
when :record
|
321
|
+
record_time = record[@record_time_key]
|
322
|
+
|
323
|
+
if @time_format
|
324
|
+
record_time = @time_parser.parse(record_time.to_s)
|
325
|
+
elsif record_time.is_a?(Float) && @float_numeric_parse
|
326
|
+
record_time = @float_numeric_parse.parse(record_time)
|
327
|
+
end
|
328
|
+
else
|
329
|
+
log.fatal "BUG: invalid time_source: #{@time_source}"
|
330
|
+
end
|
331
|
+
if @kafka_message_key
|
332
|
+
record[@kafka_message_key] = msg.key
|
333
|
+
end
|
334
|
+
if @add_headers
|
335
|
+
msg.headers.each_pair { |k, v|
|
336
|
+
record[k] = v
|
337
|
+
}
|
338
|
+
end
|
339
|
+
es.add(record_time, record)
|
340
|
+
rescue => e
|
341
|
+
log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
342
|
+
log.debug_backtrace
|
343
|
+
end
|
344
|
+
}
|
345
|
+
|
346
|
+
unless es.empty?
|
347
|
+
emit_events(tag, es)
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
def run
|
352
|
+
while @consumer
|
353
|
+
begin
|
354
|
+
@consumer.each_batch(**@fetch_opts) { |batch|
|
355
|
+
if @tag_source == :record
|
356
|
+
process_batch_with_record_tag(batch)
|
357
|
+
else
|
358
|
+
process_batch(batch)
|
359
|
+
end
|
360
|
+
}
|
361
|
+
rescue ForShutdown
|
362
|
+
rescue => e
|
363
|
+
log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
|
364
|
+
log.error_backtrace
|
365
|
+
reconnect_consumer
|
366
|
+
end
|
367
|
+
end
|
368
|
+
rescue => e
|
369
|
+
log.error "unexpected error during consumer object access", :error => e.to_s
|
370
|
+
log.error_backtrace
|
371
|
+
end
|
372
|
+
|
373
|
+
def emit_events(tag, es)
|
374
|
+
retries = 0
|
375
|
+
begin
|
376
|
+
router.emit_stream(tag, es)
|
377
|
+
rescue BufferError
|
378
|
+
raise ForShutdown if @consumer.nil?
|
379
|
+
|
380
|
+
if @retry_emit_limit.nil?
|
381
|
+
sleep 1
|
382
|
+
retry
|
383
|
+
end
|
384
|
+
|
385
|
+
if retries < @retry_emit_limit
|
386
|
+
retries += 1
|
387
|
+
sleep 1
|
388
|
+
retry
|
389
|
+
else
|
390
|
+
raise RuntimeError, "Exceeds retry_emit_limit"
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end
|
394
|
+
end
|
@@ -0,0 +1,305 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
require 'rdkafka'
|
6
|
+
|
7
|
+
class Fluent::Plugin::RdKafkaGroupInput < Fluent::Plugin::Input
|
8
|
+
Fluent::Plugin.register_input('rdkafka_group', self)
|
9
|
+
|
10
|
+
helpers :thread, :parser, :compat_parameters
|
11
|
+
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
|
15
|
+
config_param :format, :string, :default => 'json',
|
16
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
17
|
+
config_param :message_key, :string, :default => 'message',
|
18
|
+
:desc => "For 'text' format only."
|
19
|
+
config_param :add_headers, :bool, :default => false,
|
20
|
+
:desc => "Add kafka's message headers to event record"
|
21
|
+
config_param :add_prefix, :string, :default => nil,
|
22
|
+
:desc => "Tag prefix (Optional)"
|
23
|
+
config_param :add_suffix, :string, :default => nil,
|
24
|
+
:desc => "Tag suffix (Optional)"
|
25
|
+
config_param :use_record_time, :bool, :default => false,
|
26
|
+
:desc => "Replace message timestamp with contents of 'time' field.",
|
27
|
+
:deprecated => "Use 'time_source record' instead."
|
28
|
+
config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
|
29
|
+
:desc => "Source for message timestamp."
|
30
|
+
config_param :record_time_key, :string, :default => 'time',
|
31
|
+
:desc => "Time field when time_source is 'record'"
|
32
|
+
config_param :time_format, :string, :default => nil,
|
33
|
+
:desc => "Time format to be used to parse 'time' field."
|
34
|
+
config_param :kafka_message_key, :string, :default => nil,
|
35
|
+
:desc => "Set kafka's message key to this field"
|
36
|
+
|
37
|
+
config_param :retry_emit_limit, :integer, :default => nil,
|
38
|
+
:desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
|
39
|
+
config_param :retry_wait_seconds, :integer, :default => 30
|
40
|
+
config_param :disable_retry_limit, :bool, :default => false,
|
41
|
+
:desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
|
42
|
+
config_param :retry_limit, :integer, :default => 10,
|
43
|
+
:desc => "The maximum number of retries for connecting kafka (default: 10)"
|
44
|
+
|
45
|
+
config_param :max_wait_time_ms, :integer, :default => 250,
|
46
|
+
:desc => "How long to block polls in milliseconds until the server sends us data."
|
47
|
+
config_param :max_batch_size, :integer, :default => 10000,
|
48
|
+
:desc => "Maximum number of log lines emitted in a single batch."
|
49
|
+
|
50
|
+
config_param :kafka_configs, :hash, :default => {},
|
51
|
+
:desc => "Kafka configuration properties as desribed in https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
|
52
|
+
|
53
|
+
config_section :parse do
|
54
|
+
config_set_default :@type, 'json'
|
55
|
+
end
|
56
|
+
|
57
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
58
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
59
|
+
|
60
|
+
class ForShutdown < StandardError
|
61
|
+
end
|
62
|
+
|
63
|
+
BufferError = Fluent::Plugin::Buffer::BufferOverflowError
|
64
|
+
|
65
|
+
def initialize
|
66
|
+
super
|
67
|
+
|
68
|
+
@time_parser = nil
|
69
|
+
@retry_count = 1
|
70
|
+
end
|
71
|
+
|
72
|
+
def _config_to_array(config)
|
73
|
+
config_array = config.split(',').map {|k| k.strip }
|
74
|
+
if config_array.empty?
|
75
|
+
raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
|
76
|
+
end
|
77
|
+
config_array
|
78
|
+
end
|
79
|
+
|
80
|
+
def multi_workers_ready?
|
81
|
+
true
|
82
|
+
end
|
83
|
+
|
84
|
+
private :_config_to_array
|
85
|
+
|
86
|
+
def configure(conf)
|
87
|
+
compat_parameters_convert(conf, :parser)
|
88
|
+
|
89
|
+
super
|
90
|
+
|
91
|
+
log.warn "The in_rdkafka_group consumer was not yet tested under heavy production load. Use it at your own risk!"
|
92
|
+
|
93
|
+
log.info "Will watch for topics #{@topics} at brokers " \
|
94
|
+
"#{@kafka_configs["bootstrap.servers"]} and '#{@kafka_configs["group.id"]}' group"
|
95
|
+
|
96
|
+
@topics = _config_to_array(@topics)
|
97
|
+
|
98
|
+
parser_conf = conf.elements('parse').first
|
99
|
+
unless parser_conf
|
100
|
+
raise Fluent::ConfigError, "<parse> section or format parameter is required."
|
101
|
+
end
|
102
|
+
unless parser_conf["@type"]
|
103
|
+
raise Fluent::ConfigError, "parse/@type is required."
|
104
|
+
end
|
105
|
+
@parser_proc = setup_parser(parser_conf)
|
106
|
+
|
107
|
+
@time_source = :record if @use_record_time
|
108
|
+
|
109
|
+
if @time_source == :record and @time_format
|
110
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def setup_parser(parser_conf)
|
115
|
+
format = parser_conf["@type"]
|
116
|
+
case format
|
117
|
+
when 'json'
|
118
|
+
begin
|
119
|
+
require 'oj'
|
120
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
121
|
+
Proc.new { |msg| Oj.load(msg.payload) }
|
122
|
+
rescue LoadError
|
123
|
+
require 'yajl'
|
124
|
+
Proc.new { |msg| Yajl::Parser.parse(msg.payload) }
|
125
|
+
end
|
126
|
+
when 'ltsv'
|
127
|
+
require 'ltsv'
|
128
|
+
Proc.new { |msg| LTSV.parse(msg.payload, {:symbolize_keys => false}).first }
|
129
|
+
when 'msgpack'
|
130
|
+
require 'msgpack'
|
131
|
+
Proc.new { |msg| MessagePack.unpack(msg.payload) }
|
132
|
+
when 'text'
|
133
|
+
Proc.new { |msg| {@message_key => msg.payload} }
|
134
|
+
else
|
135
|
+
@custom_parser = parser_create(usage: 'in-rdkafka-plugin', conf: parser_conf)
|
136
|
+
Proc.new { |msg|
|
137
|
+
@custom_parser.parse(msg.payload) {|_time, record|
|
138
|
+
record
|
139
|
+
}
|
140
|
+
}
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def start
|
145
|
+
super
|
146
|
+
|
147
|
+
@consumer = setup_consumer
|
148
|
+
|
149
|
+
thread_create(:in_rdkafka_group, &method(:run))
|
150
|
+
end
|
151
|
+
|
152
|
+
def shutdown
|
153
|
+
# This nil assignment should be guarded by mutex in multithread programming manner.
|
154
|
+
# But the situation is very low contention, so we don't use mutex for now.
|
155
|
+
# If the problem happens, we will add a guard for consumer.
|
156
|
+
consumer = @consumer
|
157
|
+
@consumer = nil
|
158
|
+
consumer.close
|
159
|
+
|
160
|
+
super
|
161
|
+
end
|
162
|
+
|
163
|
+
def setup_consumer
|
164
|
+
consumer = Rdkafka::Config.new(@kafka_configs).consumer
|
165
|
+
consumer.subscribe(*@topics)
|
166
|
+
consumer
|
167
|
+
end
|
168
|
+
|
169
|
+
def reconnect_consumer
|
170
|
+
log.warn "Stopping Consumer"
|
171
|
+
consumer = @consumer
|
172
|
+
@consumer = nil
|
173
|
+
if consumer
|
174
|
+
consumer.close
|
175
|
+
end
|
176
|
+
log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
|
177
|
+
@retry_count = @retry_count + 1
|
178
|
+
sleep @retry_wait_seconds
|
179
|
+
@consumer = setup_consumer
|
180
|
+
log.warn "Re-starting consumer #{Time.now.to_s}"
|
181
|
+
@retry_count = 0
|
182
|
+
rescue =>e
|
183
|
+
log.error "unexpected error during re-starting consumer object access", :error => e.to_s
|
184
|
+
log.error_backtrace
|
185
|
+
if @retry_count <= @retry_limit or disable_retry_limit
|
186
|
+
reconnect_consumer
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
class Batch
|
191
|
+
attr_reader :topic
|
192
|
+
attr_reader :messages
|
193
|
+
|
194
|
+
def initialize(topic)
|
195
|
+
@topic = topic
|
196
|
+
@messages = []
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
# Executes the passed codeblock on a batch of messages.
|
201
|
+
# It is guaranteed that every message in a given batch belongs to the same topic, because the tagging logic in :run expects that property.
|
202
|
+
# The number of maximum messages in a batch is capped by the :max_batch_size configuration value. It ensures that consuming from a single
|
203
|
+
# topic for a long time (e.g. with `auto.offset.reset` set to `earliest`) does not lead to memory exhaustion. Also, calling consumer.poll
|
204
|
+
# advances thes consumer offset, so in case the process crashes we might lose at most :max_batch_size messages.
|
205
|
+
def each_batch(&block)
|
206
|
+
batch = nil
|
207
|
+
message = nil
|
208
|
+
while @consumer
|
209
|
+
message = @consumer.poll(@max_wait_time_ms)
|
210
|
+
if message
|
211
|
+
if not batch
|
212
|
+
batch = Batch.new(message.topic)
|
213
|
+
elsif batch.topic != message.topic || batch.messages.size >= @max_batch_size
|
214
|
+
yield batch
|
215
|
+
batch = Batch.new(message.topic)
|
216
|
+
end
|
217
|
+
batch.messages << message
|
218
|
+
else
|
219
|
+
yield batch if batch
|
220
|
+
batch = nil
|
221
|
+
end
|
222
|
+
end
|
223
|
+
yield batch if batch
|
224
|
+
end
|
225
|
+
|
226
|
+
def run
|
227
|
+
while @consumer
|
228
|
+
begin
|
229
|
+
each_batch { |batch|
|
230
|
+
log.debug "A new batch for topic #{batch.topic} with #{batch.messages.size} messages"
|
231
|
+
es = Fluent::MultiEventStream.new
|
232
|
+
tag = batch.topic
|
233
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
234
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
235
|
+
|
236
|
+
batch.messages.each { |msg|
|
237
|
+
begin
|
238
|
+
record = @parser_proc.call(msg)
|
239
|
+
case @time_source
|
240
|
+
when :kafka
|
241
|
+
record_time = Fluent::EventTime.from_time(msg.timestamp)
|
242
|
+
when :now
|
243
|
+
record_time = Fluent::Engine.now
|
244
|
+
when :record
|
245
|
+
if @time_format
|
246
|
+
record_time = @time_parser.parse(record[@record_time_key].to_s)
|
247
|
+
else
|
248
|
+
record_time = record[@record_time_key]
|
249
|
+
end
|
250
|
+
else
|
251
|
+
log.fatal "BUG: invalid time_source: #{@time_source}"
|
252
|
+
end
|
253
|
+
if @kafka_message_key
|
254
|
+
record[@kafka_message_key] = msg.key
|
255
|
+
end
|
256
|
+
if @add_headers
|
257
|
+
msg.headers.each_pair { |k, v|
|
258
|
+
record[k] = v
|
259
|
+
}
|
260
|
+
end
|
261
|
+
es.add(record_time, record)
|
262
|
+
rescue => e
|
263
|
+
log.warn "parser error in #{msg.topic}/#{msg.partition}", :error => e.to_s, :value => msg.payload, :offset => msg.offset
|
264
|
+
log.debug_backtrace
|
265
|
+
end
|
266
|
+
}
|
267
|
+
|
268
|
+
unless es.empty?
|
269
|
+
emit_events(tag, es)
|
270
|
+
end
|
271
|
+
}
|
272
|
+
rescue ForShutdown
|
273
|
+
rescue => e
|
274
|
+
log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
|
275
|
+
log.error_backtrace
|
276
|
+
reconnect_consumer
|
277
|
+
end
|
278
|
+
end
|
279
|
+
rescue => e
|
280
|
+
log.error "unexpected error during consumer object access", :error => e.to_s
|
281
|
+
log.error_backtrace
|
282
|
+
end
|
283
|
+
|
284
|
+
def emit_events(tag, es)
|
285
|
+
retries = 0
|
286
|
+
begin
|
287
|
+
router.emit_stream(tag, es)
|
288
|
+
rescue BufferError
|
289
|
+
raise ForShutdown if @consumer.nil?
|
290
|
+
|
291
|
+
if @retry_emit_limit.nil?
|
292
|
+
sleep 1
|
293
|
+
retry
|
294
|
+
end
|
295
|
+
|
296
|
+
if retries < @retry_emit_limit
|
297
|
+
retries += 1
|
298
|
+
sleep 1
|
299
|
+
retry
|
300
|
+
else
|
301
|
+
raise RuntimeError, "Exceeds retry_emit_limit"
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|