fluent-plugin-kafka-xst 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
- data/.github/ISSUE_TEMPLATE/config.yml +5 -0
- data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/linux.yml +45 -0
- data/.github/workflows/stale-actions.yml +24 -0
- data/.gitignore +2 -0
- data/ChangeLog +344 -0
- data/Gemfile +6 -0
- data/LICENSE +14 -0
- data/README.md +594 -0
- data/Rakefile +12 -0
- data/ci/prepare-kafka-server.sh +33 -0
- data/examples/README.md +3 -0
- data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
- data/examples/out_kafka2/protobuf-formatter.conf +23 -0
- data/examples/out_kafka2/record_key.conf +31 -0
- data/fluent-plugin-kafka.gemspec +27 -0
- data/lib/fluent/plugin/in_kafka.rb +388 -0
- data/lib/fluent/plugin/in_kafka_group.rb +394 -0
- data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
- data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
- data/lib/fluent/plugin/out_kafka.rb +268 -0
- data/lib/fluent/plugin/out_kafka2.rb +427 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
- data/lib/fluent/plugin/out_rdkafka.rb +324 -0
- data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
- data/test/helper.rb +34 -0
- data/test/plugin/test_in_kafka.rb +66 -0
- data/test/plugin/test_in_kafka_group.rb +69 -0
- data/test/plugin/test_kafka_plugin_util.rb +44 -0
- data/test/plugin/test_out_kafka.rb +68 -0
- data/test/plugin/test_out_kafka2.rb +138 -0
- data/test/plugin/test_out_kafka_buffered.rb +68 -0
- data/test/plugin/test_out_rdkafka2.rb +182 -0
- metadata +214 -0
@@ -0,0 +1,374 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'fluent/output'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
|
6
|
+
Fluent::Plugin.register_output('kafka_buffered', self)
|
7
|
+
|
8
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
9
|
+
:desc => <<-DESC
|
10
|
+
Set brokers directly:
|
11
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
12
|
+
Brokers: you can choose to use either brokers or zookeeper.
|
13
|
+
DESC
|
14
|
+
config_param :zookeeper, :string, :default => nil,
|
15
|
+
:desc => <<-DESC
|
16
|
+
Set brokers via Zookeeper:
|
17
|
+
<zookeeper_host>:<zookeeper_port>
|
18
|
+
DESC
|
19
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids', :desc => "Path in path for Broker id. Default to /brokers/ids"
|
20
|
+
|
21
|
+
config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
|
22
|
+
config_param :default_topic, :string, :default => nil, :desc => "Default output topic when record doesn't have topic field"
|
23
|
+
config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
|
24
|
+
config_param :default_message_key, :string, :default => nil
|
25
|
+
config_param :partition_key_key, :string, :default => 'partition_key', :desc => "Field for kafka partition key"
|
26
|
+
config_param :default_partition_key, :string, :default => nil
|
27
|
+
config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
|
28
|
+
config_param :default_partition, :integer, :default => nil
|
29
|
+
config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
|
30
|
+
:desc => "Specify kafka patrtitioner hash algorithm"
|
31
|
+
config_param :client_id, :string, :default => 'kafka'
|
32
|
+
config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
|
33
|
+
config_param :sasl_over_ssl, :bool, :default => true,
|
34
|
+
:desc => <<-DESC
|
35
|
+
Set to false to prevent SSL strict mode when using SASL authentication
|
36
|
+
DESC
|
37
|
+
config_param :output_data_type, :string, :default => 'json',
|
38
|
+
:desc => <<-DESC
|
39
|
+
Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
40
|
+
DESC
|
41
|
+
config_param :output_include_tag, :bool, :default => false
|
42
|
+
config_param :output_include_time, :bool, :default => false
|
43
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
44
|
+
:desc => <<-DESC
|
45
|
+
Set true to remove partition key from data
|
46
|
+
DESC
|
47
|
+
config_param :exclude_partition, :bool, :default => false,
|
48
|
+
:desc => <<-DESC
|
49
|
+
Set true to remove partition from data
|
50
|
+
DESC
|
51
|
+
config_param :exclude_message_key, :bool, :default => false,
|
52
|
+
:desc => <<-DESC
|
53
|
+
Set true to remove message key from data
|
54
|
+
DESC
|
55
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
56
|
+
:desc => <<-DESC
|
57
|
+
Set true to remove topic name key from data
|
58
|
+
DESC
|
59
|
+
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for kafka create_time'
|
60
|
+
|
61
|
+
config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
|
62
|
+
config_param :kafka_agg_max_messages, :integer, :default => nil
|
63
|
+
config_param :get_kafka_client_log, :bool, :default => false
|
64
|
+
|
65
|
+
# ruby-kafka producer options
|
66
|
+
config_param :max_send_retries, :integer, :default => 2,
|
67
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
68
|
+
config_param :required_acks, :integer, :default => -1,
|
69
|
+
:desc => "The number of acks required per request."
|
70
|
+
config_param :ack_timeout, :time, :default => nil,
|
71
|
+
:desc => "How long the producer waits for acks."
|
72
|
+
config_param :compression_codec, :string, :default => nil,
|
73
|
+
:desc => <<-DESC
|
74
|
+
The codec the producer uses to compress messages.
|
75
|
+
Supported codecs depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression
|
76
|
+
DESC
|
77
|
+
config_param :max_send_limit_bytes, :size, :default => nil
|
78
|
+
config_param :discard_kafka_delivery_failed, :bool, :default => false
|
79
|
+
|
80
|
+
config_param :time_format, :string, :default => nil
|
81
|
+
|
82
|
+
config_param :active_support_notification_regex, :string, :default => nil,
|
83
|
+
:desc => <<-DESC
|
84
|
+
Add a regular expression to capture ActiveSupport notifications from the Kafka client
|
85
|
+
requires activesupport gem - records will be generated under fluent_kafka_stats.**
|
86
|
+
DESC
|
87
|
+
|
88
|
+
config_param :monitoring_list, :array, :default => [],
|
89
|
+
:desc => "library to be used to monitor. statsd and datadog are supported"
|
90
|
+
|
91
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
92
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
93
|
+
|
94
|
+
attr_accessor :output_data_type
|
95
|
+
attr_accessor :field_separator
|
96
|
+
|
97
|
+
unless method_defined?(:log)
|
98
|
+
define_method("log") { $log }
|
99
|
+
end
|
100
|
+
|
101
|
+
def initialize
|
102
|
+
super
|
103
|
+
|
104
|
+
require 'kafka'
|
105
|
+
require 'fluent/plugin/kafka_producer_ext'
|
106
|
+
|
107
|
+
@kafka = nil
|
108
|
+
@producers = {}
|
109
|
+
@producers_mutex = Mutex.new
|
110
|
+
@field_separator = nil
|
111
|
+
end
|
112
|
+
|
113
|
+
def multi_workers_ready?
|
114
|
+
true
|
115
|
+
end
|
116
|
+
|
117
|
+
def refresh_client(raise_error = true)
|
118
|
+
if @zookeeper
|
119
|
+
@seed_brokers = []
|
120
|
+
z = Zookeeper.new(@zookeeper)
|
121
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
122
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
123
|
+
if @ssl_client_cert
|
124
|
+
@seed_brokers.push(pickup_ssl_endpoint(broker))
|
125
|
+
else
|
126
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
z.close
|
130
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
131
|
+
end
|
132
|
+
begin
|
133
|
+
if @seed_brokers.length > 0
|
134
|
+
logger = @get_kafka_client_log ? log : nil
|
135
|
+
if @scram_mechanism != nil && @username != nil && @password != nil
|
136
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
|
137
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
138
|
+
sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl,
|
139
|
+
ssl_verify_hostname: @ssl_verify_hostname,
|
140
|
+
partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
|
141
|
+
elsif @username != nil && @password != nil
|
142
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
|
143
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
144
|
+
sasl_plain_username: @username, sasl_plain_password: @password, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname,
|
145
|
+
partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
|
146
|
+
else
|
147
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
|
148
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
|
149
|
+
sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname,
|
150
|
+
partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
|
151
|
+
end
|
152
|
+
log.info "initialized kafka producer: #{@client_id}"
|
153
|
+
else
|
154
|
+
log.warn "No brokers found on Zookeeper"
|
155
|
+
end
|
156
|
+
rescue Exception => e
|
157
|
+
if raise_error # During startup, error should be reported to engine and stop its phase for safety.
|
158
|
+
raise e
|
159
|
+
else
|
160
|
+
log.error e
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def configure(conf)
|
166
|
+
super
|
167
|
+
|
168
|
+
log.warn "Support of fluentd v0.12 has ended. Use kafka2 instead. kafka_buffered will be an alias of kafka2"
|
169
|
+
|
170
|
+
if @zookeeper
|
171
|
+
require 'zookeeper'
|
172
|
+
else
|
173
|
+
@seed_brokers = @brokers.split(",")
|
174
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
175
|
+
end
|
176
|
+
|
177
|
+
if conf['ack_timeout_ms']
|
178
|
+
log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
|
179
|
+
@ack_timeout = conf['ack_timeout_ms'].to_i / 1000
|
180
|
+
end
|
181
|
+
|
182
|
+
@f_separator = case @field_separator
|
183
|
+
when /SPACE/i then ' '
|
184
|
+
when /COMMA/i then ','
|
185
|
+
when /SOH/i then "\x01"
|
186
|
+
else "\t"
|
187
|
+
end
|
188
|
+
|
189
|
+
@formatter_proc = setup_formatter(conf)
|
190
|
+
|
191
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks, idempotent: @idempotent}
|
192
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
193
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
194
|
+
|
195
|
+
if @discard_kafka_delivery_failed
|
196
|
+
log.warn "'discard_kafka_delivery_failed' option discards events which cause delivery failure, e.g. invalid topic or something."
|
197
|
+
log.warn "If this is unexpected, you need to check your configuration or data."
|
198
|
+
end
|
199
|
+
|
200
|
+
if @active_support_notification_regex
|
201
|
+
require 'active_support/notifications'
|
202
|
+
require 'active_support/core_ext/hash/keys'
|
203
|
+
ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
|
204
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
205
|
+
message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
|
206
|
+
@router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
@monitoring_list.each { |m|
|
211
|
+
require "kafka/#{m}"
|
212
|
+
log.info "#{m} monitoring started"
|
213
|
+
}
|
214
|
+
end
|
215
|
+
|
216
|
+
def start
|
217
|
+
super
|
218
|
+
refresh_client
|
219
|
+
end
|
220
|
+
|
221
|
+
def shutdown
|
222
|
+
super
|
223
|
+
shutdown_producers
|
224
|
+
@kafka = nil
|
225
|
+
end
|
226
|
+
|
227
|
+
def emit(tag, es, chain)
|
228
|
+
super(tag, es, chain, tag)
|
229
|
+
end
|
230
|
+
|
231
|
+
def format_stream(tag, es)
|
232
|
+
es.to_msgpack_stream
|
233
|
+
end
|
234
|
+
|
235
|
+
def shutdown_producers
|
236
|
+
@producers_mutex.synchronize {
|
237
|
+
@producers.each { |key, producer|
|
238
|
+
producer.shutdown
|
239
|
+
}
|
240
|
+
@producers = {}
|
241
|
+
}
|
242
|
+
end
|
243
|
+
|
244
|
+
def get_producer
|
245
|
+
@producers_mutex.synchronize {
|
246
|
+
producer = @producers[Thread.current.object_id]
|
247
|
+
unless producer
|
248
|
+
producer = @kafka.producer(**@producer_opts)
|
249
|
+
@producers[Thread.current.object_id] = producer
|
250
|
+
end
|
251
|
+
producer
|
252
|
+
}
|
253
|
+
end
|
254
|
+
|
255
|
+
def setup_formatter(conf)
|
256
|
+
if @output_data_type == 'json'
|
257
|
+
begin
|
258
|
+
require 'oj'
|
259
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
260
|
+
Proc.new { |tag, time, record| Oj.dump(record) }
|
261
|
+
rescue LoadError
|
262
|
+
require 'yajl'
|
263
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
264
|
+
end
|
265
|
+
elsif @output_data_type == 'ltsv'
|
266
|
+
require 'ltsv'
|
267
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
268
|
+
elsif @output_data_type == 'msgpack'
|
269
|
+
require 'msgpack'
|
270
|
+
Proc.new { |tag, time, record| record.to_msgpack }
|
271
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
272
|
+
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
|
273
|
+
@custom_attributes.unshift('time') if @output_include_time
|
274
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
275
|
+
Proc.new { |tag, time, record|
|
276
|
+
@custom_attributes.map { |attr|
|
277
|
+
record[attr].nil? ? '' : record[attr].to_s
|
278
|
+
}.join(@f_separator)
|
279
|
+
}
|
280
|
+
else
|
281
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
282
|
+
@formatter.configure(conf)
|
283
|
+
@formatter.method(:format)
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def deliver_messages(producer, tag)
|
288
|
+
if @discard_kafka_delivery_failed
|
289
|
+
begin
|
290
|
+
producer.deliver_messages
|
291
|
+
rescue Kafka::DeliveryFailed => e
|
292
|
+
log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
|
293
|
+
producer.clear_buffer
|
294
|
+
end
|
295
|
+
else
|
296
|
+
producer.deliver_messages
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
def write(chunk)
|
301
|
+
tag = chunk.key
|
302
|
+
def_topic = @default_topic || tag
|
303
|
+
producer = get_producer
|
304
|
+
|
305
|
+
records_by_topic = {}
|
306
|
+
bytes_by_topic = {}
|
307
|
+
messages = 0
|
308
|
+
messages_bytes = 0
|
309
|
+
record_buf = nil
|
310
|
+
record_buf_bytes = nil
|
311
|
+
|
312
|
+
begin
|
313
|
+
chunk.msgpack_each { |time, record|
|
314
|
+
begin
|
315
|
+
if @output_include_time
|
316
|
+
if @time_format
|
317
|
+
record['time'.freeze] = Time.at(time).strftime(@time_format)
|
318
|
+
else
|
319
|
+
record['time'.freeze] = time
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
record['tag'] = tag if @output_include_tag
|
324
|
+
topic = (@exclude_topic_key ? record.delete(@topic_key) : record[@topic_key]) || def_topic
|
325
|
+
partition_key = (@exclude_partition_key ? record.delete(@partition_key_key) : record[@partition_key_key]) || @default_partition_key
|
326
|
+
partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
|
327
|
+
message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
|
328
|
+
|
329
|
+
records_by_topic[topic] ||= 0
|
330
|
+
bytes_by_topic[topic] ||= 0
|
331
|
+
|
332
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
333
|
+
record_buf_bytes = record_buf.bytesize
|
334
|
+
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
335
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
336
|
+
log.debug "Skipped event:", :record => record
|
337
|
+
next
|
338
|
+
end
|
339
|
+
rescue StandardError => e
|
340
|
+
log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
|
341
|
+
next
|
342
|
+
end
|
343
|
+
|
344
|
+
if (messages > 0) and (messages_bytes + record_buf_bytes > @kafka_agg_max_bytes) or (@kafka_agg_max_messages && messages >= @kafka_agg_max_messages)
|
345
|
+
log.debug { "#{messages} messages send because reaches the limit of batch transmission." }
|
346
|
+
deliver_messages(producer, tag)
|
347
|
+
messages = 0
|
348
|
+
messages_bytes = 0
|
349
|
+
end
|
350
|
+
log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
|
351
|
+
messages += 1
|
352
|
+
producer.produce_for_buffered(record_buf, topic: topic, key: message_key, partition_key: partition_key, partition: partition,
|
353
|
+
create_time: @use_event_time ? Time.at(time) : Time.now)
|
354
|
+
messages_bytes += record_buf_bytes
|
355
|
+
|
356
|
+
records_by_topic[topic] += 1
|
357
|
+
bytes_by_topic[topic] += record_buf_bytes
|
358
|
+
}
|
359
|
+
if messages > 0
|
360
|
+
log.debug { "#{messages} messages send." }
|
361
|
+
deliver_messages(producer, tag)
|
362
|
+
end
|
363
|
+
log.debug { "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})" }
|
364
|
+
end
|
365
|
+
rescue Exception => e
|
366
|
+
log.warn "Send exception occurred: #{e}"
|
367
|
+
log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
|
368
|
+
# For safety, refresh client and its producers
|
369
|
+
shutdown_producers
|
370
|
+
refresh_client(false)
|
371
|
+
# Raise exception to retry sendind messages
|
372
|
+
raise e
|
373
|
+
end
|
374
|
+
end
|
@@ -0,0 +1,324 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'logger'
|
3
|
+
require 'fluent/output'
|
4
|
+
require 'fluent/plugin/kafka_plugin_util'
|
5
|
+
|
6
|
+
require 'rdkafka'
|
7
|
+
require 'fluent/plugin/kafka_producer_ext'
|
8
|
+
|
9
|
+
class Rdkafka::Producer
|
10
|
+
# return false if producer is forcefully closed, otherwise return true
|
11
|
+
def close(timeout = nil)
|
12
|
+
@closing = true
|
13
|
+
# Wait for the polling thread to finish up
|
14
|
+
# If the broker isn't alive, the thread doesn't exit
|
15
|
+
if timeout
|
16
|
+
thr = @polling_thread.join(timeout)
|
17
|
+
return !!thr
|
18
|
+
else
|
19
|
+
@polling_thread.join
|
20
|
+
return true
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Fluent::KafkaOutputBuffered2 < Fluent::BufferedOutput
|
26
|
+
Fluent::Plugin.register_output('rdkafka', self)
|
27
|
+
|
28
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
29
|
+
:desc => <<-DESC
|
30
|
+
Set brokers directly:
|
31
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
32
|
+
Brokers: you can choose to use either brokers or zookeeper.
|
33
|
+
DESC
|
34
|
+
config_param :default_topic, :string, :default => nil,
|
35
|
+
:desc => "Output topic"
|
36
|
+
config_param :default_message_key, :string, :default => nil
|
37
|
+
config_param :default_partition, :integer, :default => nil
|
38
|
+
config_param :client_id, :string, :default => 'kafka'
|
39
|
+
config_param :output_data_type, :string, :default => 'json',
|
40
|
+
:desc => <<-DESC
|
41
|
+
Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
42
|
+
DESC
|
43
|
+
config_param :output_include_tag, :bool, :default => false
|
44
|
+
config_param :output_include_time, :bool, :default => false
|
45
|
+
config_param :exclude_partition, :bool, :default => false,
|
46
|
+
:desc => <<-DESC
|
47
|
+
Set true to remove partition from data
|
48
|
+
DESC
|
49
|
+
config_param :exclude_message_key, :bool, :default => false,
|
50
|
+
:desc => <<-DESC
|
51
|
+
Set true to remove message key from data
|
52
|
+
DESC
|
53
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
54
|
+
:desc => <<-DESC
|
55
|
+
Set true to remove topic name key from data
|
56
|
+
DESC
|
57
|
+
config_param :max_send_retries, :integer, :default => 2,
|
58
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
59
|
+
config_param :required_acks, :integer, :default => -1,
|
60
|
+
:desc => "The number of acks required per request."
|
61
|
+
config_param :ack_timeout, :time, :default => nil,
|
62
|
+
:desc => "How long the producer waits for acks."
|
63
|
+
config_param :compression_codec, :string, :default => nil,
|
64
|
+
:desc => <<-DESC
|
65
|
+
The codec the producer uses to compress messages.
|
66
|
+
Supported codecs: (gzip|snappy)
|
67
|
+
DESC
|
68
|
+
config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
|
69
|
+
config_param :max_send_limit_bytes, :size, :default => nil
|
70
|
+
config_param :rdkafka_buffering_max_ms, :integer, :default => nil
|
71
|
+
config_param :rdkafka_buffering_max_messages, :integer, :default => nil
|
72
|
+
config_param :rdkafka_message_max_bytes, :integer, :default => nil
|
73
|
+
config_param :rdkafka_message_max_num, :integer, :default => nil
|
74
|
+
config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
|
75
|
+
config_param :rdkafka_options, :hash, :default => {}
|
76
|
+
|
77
|
+
config_param :max_enqueue_retries, :integer, :default => 3
|
78
|
+
config_param :enqueue_retry_backoff, :integer, :default => 3
|
79
|
+
|
80
|
+
config_param :service_name, :string, :default => nil
|
81
|
+
config_param :ssl_client_cert_key_password, :string, :default => nil
|
82
|
+
|
83
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
84
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
85
|
+
|
86
|
+
def initialize
|
87
|
+
super
|
88
|
+
@producers = {}
|
89
|
+
@producers_mutex = Mutex.new
|
90
|
+
end
|
91
|
+
|
92
|
+
def configure(conf)
|
93
|
+
super
|
94
|
+
log.instance_eval {
|
95
|
+
def add(level, message = nil)
|
96
|
+
if message.nil?
|
97
|
+
if block_given?
|
98
|
+
message = yield
|
99
|
+
else
|
100
|
+
return
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
|
105
|
+
case level
|
106
|
+
when Logger::FATAL
|
107
|
+
self.fatal(message)
|
108
|
+
when Logger::ERROR
|
109
|
+
self.error(message)
|
110
|
+
when Logger::WARN
|
111
|
+
self.warn(message)
|
112
|
+
when Logger::INFO
|
113
|
+
self.info(message)
|
114
|
+
when Logger::DEBUG
|
115
|
+
self.debug(message)
|
116
|
+
else
|
117
|
+
self.trace(message)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
}
|
121
|
+
Rdkafka::Config.logger = log
|
122
|
+
config = build_config
|
123
|
+
@rdkafka = Rdkafka::Config.new(config)
|
124
|
+
@formatter_proc = setup_formatter(conf)
|
125
|
+
end
|
126
|
+
|
127
|
+
def build_config
|
128
|
+
config = {
|
129
|
+
:"bootstrap.servers" => @brokers,
|
130
|
+
}
|
131
|
+
|
132
|
+
if @ssl_ca_cert && @ssl_ca_cert[0]
|
133
|
+
ssl = true
|
134
|
+
config[:"ssl.ca.location"] = @ssl_ca_cert[0]
|
135
|
+
config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
|
136
|
+
config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
|
137
|
+
config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
|
138
|
+
end
|
139
|
+
|
140
|
+
if @principal
|
141
|
+
sasl = true
|
142
|
+
config[:"sasl.mechanisms"] = "GSSAPI"
|
143
|
+
config[:"sasl.kerberos.principal"] = @principal
|
144
|
+
config[:"sasl.kerberos.service.name"] = @service_name if @service_name
|
145
|
+
config[:"sasl.kerberos.keytab"] = @keytab if @keytab
|
146
|
+
end
|
147
|
+
|
148
|
+
if ssl && sasl
|
149
|
+
security_protocol = "SASL_SSL"
|
150
|
+
elsif ssl && !sasl
|
151
|
+
security_protocol = "SSL"
|
152
|
+
elsif !ssl && sasl
|
153
|
+
security_protocol = "SASL_PLAINTEXT"
|
154
|
+
else
|
155
|
+
security_protocol = "PLAINTEXT"
|
156
|
+
end
|
157
|
+
config[:"security.protocol"] = security_protocol
|
158
|
+
|
159
|
+
config[:"compression.codec"] = @compression_codec if @compression_codec
|
160
|
+
config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
|
161
|
+
config[:"request.required.acks"] = @required_acks if @required_acks
|
162
|
+
config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
|
163
|
+
config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
|
164
|
+
config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
|
165
|
+
config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
|
166
|
+
config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
|
167
|
+
|
168
|
+
@rdkafka_options.each { |k, v|
|
169
|
+
config[k.to_sym] = v
|
170
|
+
}
|
171
|
+
|
172
|
+
config
|
173
|
+
end
|
174
|
+
|
175
|
+
def start
|
176
|
+
super
|
177
|
+
end
|
178
|
+
|
179
|
+
def multi_workers_ready?
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
def shutdown
|
184
|
+
super
|
185
|
+
shutdown_producers
|
186
|
+
end
|
187
|
+
|
188
|
+
def shutdown_producers
|
189
|
+
@producers_mutex.synchronize {
|
190
|
+
shutdown_threads = @producers.map { |key, producer|
|
191
|
+
th = Thread.new {
|
192
|
+
unless producer.close(10)
|
193
|
+
log.warn("Queue is forcefully closed after 10 seconds wait")
|
194
|
+
end
|
195
|
+
}
|
196
|
+
th.abort_on_exception = true
|
197
|
+
th
|
198
|
+
}
|
199
|
+
shutdown_threads.each { |th| th.join }
|
200
|
+
@producers = {}
|
201
|
+
}
|
202
|
+
end
|
203
|
+
|
204
|
+
def get_producer
|
205
|
+
@producers_mutex.synchronize {
|
206
|
+
producer = @producers[Thread.current.object_id]
|
207
|
+
unless producer
|
208
|
+
producer = @rdkafka.producer
|
209
|
+
@producers[Thread.current.object_id] = producer
|
210
|
+
end
|
211
|
+
producer
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
def emit(tag, es, chain)
|
216
|
+
super(tag, es, chain, tag)
|
217
|
+
end
|
218
|
+
|
219
|
+
def format_stream(tag, es)
|
220
|
+
es.to_msgpack_stream
|
221
|
+
end
|
222
|
+
|
223
|
+
def setup_formatter(conf)
|
224
|
+
if @output_data_type == 'json'
|
225
|
+
begin
|
226
|
+
require 'oj'
|
227
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
228
|
+
Proc.new { |tag, time, record| Oj.dump(record) }
|
229
|
+
rescue LoadError
|
230
|
+
require 'yajl'
|
231
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
232
|
+
end
|
233
|
+
elsif @output_data_type == 'ltsv'
|
234
|
+
require 'ltsv'
|
235
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
236
|
+
elsif @output_data_type == 'msgpack'
|
237
|
+
require 'msgpack'
|
238
|
+
Proc.new { |tag, time, record| record.to_msgpack }
|
239
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
240
|
+
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
|
241
|
+
@custom_attributes.unshift('time') if @output_include_time
|
242
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
243
|
+
Proc.new { |tag, time, record|
|
244
|
+
@custom_attributes.map { |attr|
|
245
|
+
record[attr].nil? ? '' : record[attr].to_s
|
246
|
+
}.join(@f_separator)
|
247
|
+
}
|
248
|
+
else
|
249
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
250
|
+
@formatter.configure(conf)
|
251
|
+
@formatter.method(:format)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
def write(chunk)
|
256
|
+
tag = chunk.key
|
257
|
+
def_topic = @default_topic || tag
|
258
|
+
|
259
|
+
record_buf = nil
|
260
|
+
record_buf_bytes = nil
|
261
|
+
|
262
|
+
begin
|
263
|
+
chunk.msgpack_each.map { |time, record|
|
264
|
+
begin
|
265
|
+
if @output_include_time
|
266
|
+
if @time_format
|
267
|
+
record['time'.freeze] = Time.at(time).strftime(@time_format)
|
268
|
+
else
|
269
|
+
record['time'.freeze] = time
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
record['tag'] = tag if @output_include_tag
|
274
|
+
topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
|
275
|
+
partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
|
276
|
+
message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
|
277
|
+
|
278
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
279
|
+
record_buf_bytes = record_buf.bytesize
|
280
|
+
if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
|
281
|
+
log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
|
282
|
+
log.debug "Skipped event:", :record => record
|
283
|
+
next
|
284
|
+
end
|
285
|
+
rescue StandardError => e
|
286
|
+
log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
|
287
|
+
next
|
288
|
+
end
|
289
|
+
|
290
|
+
producer = get_producer
|
291
|
+
handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
|
292
|
+
handler
|
293
|
+
}.each { |handler|
|
294
|
+
handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
|
295
|
+
}
|
296
|
+
end
|
297
|
+
rescue Exception => e
|
298
|
+
log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
|
299
|
+
# Raise exception to retry sendind messages
|
300
|
+
raise e
|
301
|
+
end
|
302
|
+
|
303
|
+
def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
|
304
|
+
attempt = 0
|
305
|
+
loop do
|
306
|
+
begin
|
307
|
+
handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
|
308
|
+
return handler
|
309
|
+
rescue Exception => e
|
310
|
+
if e.respond_to?(:code) && e.code == :queue_full
|
311
|
+
if attempt <= @max_enqueue_retries
|
312
|
+
log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
|
313
|
+
sleep @enqueue_retry_backoff
|
314
|
+
attempt += 1
|
315
|
+
else
|
316
|
+
raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
|
317
|
+
end
|
318
|
+
else
|
319
|
+
raise e
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|