fluent-plugin-kafka-xst 0.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,374 @@
1
+ require 'thread'
2
+ require 'fluent/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
6
+ Fluent::Plugin.register_output('kafka_buffered', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => <<-DESC
10
+ Set brokers directly:
11
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
12
+ Brokers: you can choose to use either brokers or zookeeper.
13
+ DESC
14
+ config_param :zookeeper, :string, :default => nil,
15
+ :desc => <<-DESC
16
+ Set brokers via Zookeeper:
17
+ <zookeeper_host>:<zookeeper_port>
18
+ DESC
19
+ config_param :zookeeper_path, :string, :default => '/brokers/ids', :desc => "Path in path for Broker id. Default to /brokers/ids"
20
+
21
+ config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
22
+ config_param :default_topic, :string, :default => nil, :desc => "Default output topic when record doesn't have topic field"
23
+ config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
24
+ config_param :default_message_key, :string, :default => nil
25
+ config_param :partition_key_key, :string, :default => 'partition_key', :desc => "Field for kafka partition key"
26
+ config_param :default_partition_key, :string, :default => nil
27
+ config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
28
+ config_param :default_partition, :integer, :default => nil
29
+ config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
30
+ :desc => "Specify kafka patrtitioner hash algorithm"
31
+ config_param :client_id, :string, :default => 'kafka'
32
+ config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
33
+ config_param :sasl_over_ssl, :bool, :default => true,
34
+ :desc => <<-DESC
35
+ Set to false to prevent SSL strict mode when using SASL authentication
36
+ DESC
37
+ config_param :output_data_type, :string, :default => 'json',
38
+ :desc => <<-DESC
39
+ Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
40
+ DESC
41
+ config_param :output_include_tag, :bool, :default => false
42
+ config_param :output_include_time, :bool, :default => false
43
+ config_param :exclude_partition_key, :bool, :default => false,
44
+ :desc => <<-DESC
45
+ Set true to remove partition key from data
46
+ DESC
47
+ config_param :exclude_partition, :bool, :default => false,
48
+ :desc => <<-DESC
49
+ Set true to remove partition from data
50
+ DESC
51
+ config_param :exclude_message_key, :bool, :default => false,
52
+ :desc => <<-DESC
53
+ Set true to remove message key from data
54
+ DESC
55
+ config_param :exclude_topic_key, :bool, :default => false,
56
+ :desc => <<-DESC
57
+ Set true to remove topic name key from data
58
+ DESC
59
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for kafka create_time'
60
+
61
+ config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
62
+ config_param :kafka_agg_max_messages, :integer, :default => nil
63
+ config_param :get_kafka_client_log, :bool, :default => false
64
+
65
+ # ruby-kafka producer options
66
+ config_param :max_send_retries, :integer, :default => 2,
67
+ :desc => "Number of times to retry sending of messages to a leader."
68
+ config_param :required_acks, :integer, :default => -1,
69
+ :desc => "The number of acks required per request."
70
+ config_param :ack_timeout, :time, :default => nil,
71
+ :desc => "How long the producer waits for acks."
72
+ config_param :compression_codec, :string, :default => nil,
73
+ :desc => <<-DESC
74
+ The codec the producer uses to compress messages.
75
+ Supported codecs depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression
76
+ DESC
77
+ config_param :max_send_limit_bytes, :size, :default => nil
78
+ config_param :discard_kafka_delivery_failed, :bool, :default => false
79
+
80
+ config_param :time_format, :string, :default => nil
81
+
82
+ config_param :active_support_notification_regex, :string, :default => nil,
83
+ :desc => <<-DESC
84
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
85
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
86
+ DESC
87
+
88
+ config_param :monitoring_list, :array, :default => [],
89
+ :desc => "library to be used to monitor. statsd and datadog are supported"
90
+
91
+ include Fluent::KafkaPluginUtil::SSLSettings
92
+ include Fluent::KafkaPluginUtil::SaslSettings
93
+
94
+ attr_accessor :output_data_type
95
+ attr_accessor :field_separator
96
+
97
+ unless method_defined?(:log)
98
+ define_method("log") { $log }
99
+ end
100
+
101
+ def initialize
102
+ super
103
+
104
+ require 'kafka'
105
+ require 'fluent/plugin/kafka_producer_ext'
106
+
107
+ @kafka = nil
108
+ @producers = {}
109
+ @producers_mutex = Mutex.new
110
+ @field_separator = nil
111
+ end
112
+
113
+ def multi_workers_ready?
114
+ true
115
+ end
116
+
117
+ def refresh_client(raise_error = true)
118
+ if @zookeeper
119
+ @seed_brokers = []
120
+ z = Zookeeper.new(@zookeeper)
121
+ z.get_children(:path => @zookeeper_path)[:children].each do |id|
122
+ broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
123
+ if @ssl_client_cert
124
+ @seed_brokers.push(pickup_ssl_endpoint(broker))
125
+ else
126
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
127
+ end
128
+ end
129
+ z.close
130
+ log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
131
+ end
132
+ begin
133
+ if @seed_brokers.length > 0
134
+ logger = @get_kafka_client_log ? log : nil
135
+ if @scram_mechanism != nil && @username != nil && @password != nil
136
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
137
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
138
+ sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl,
139
+ ssl_verify_hostname: @ssl_verify_hostname,
140
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
141
+ elsif @username != nil && @password != nil
142
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
143
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
144
+ sasl_plain_username: @username, sasl_plain_password: @password, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname,
145
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
146
+ else
147
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
148
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
149
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname,
150
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
151
+ end
152
+ log.info "initialized kafka producer: #{@client_id}"
153
+ else
154
+ log.warn "No brokers found on Zookeeper"
155
+ end
156
+ rescue Exception => e
157
+ if raise_error # During startup, error should be reported to engine and stop its phase for safety.
158
+ raise e
159
+ else
160
+ log.error e
161
+ end
162
+ end
163
+ end
164
+
165
+ def configure(conf)
166
+ super
167
+
168
+ log.warn "Support of fluentd v0.12 has ended. Use kafka2 instead. kafka_buffered will be an alias of kafka2"
169
+
170
+ if @zookeeper
171
+ require 'zookeeper'
172
+ else
173
+ @seed_brokers = @brokers.split(",")
174
+ log.info "brokers has been set directly: #{@seed_brokers}"
175
+ end
176
+
177
+ if conf['ack_timeout_ms']
178
+ log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
179
+ @ack_timeout = conf['ack_timeout_ms'].to_i / 1000
180
+ end
181
+
182
+ @f_separator = case @field_separator
183
+ when /SPACE/i then ' '
184
+ when /COMMA/i then ','
185
+ when /SOH/i then "\x01"
186
+ else "\t"
187
+ end
188
+
189
+ @formatter_proc = setup_formatter(conf)
190
+
191
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks, idempotent: @idempotent}
192
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
193
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
194
+
195
+ if @discard_kafka_delivery_failed
196
+ log.warn "'discard_kafka_delivery_failed' option discards events which cause delivery failure, e.g. invalid topic or something."
197
+ log.warn "If this is unexpected, you need to check your configuration or data."
198
+ end
199
+
200
+ if @active_support_notification_regex
201
+ require 'active_support/notifications'
202
+ require 'active_support/core_ext/hash/keys'
203
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
204
+ event = ActiveSupport::Notifications::Event.new(*args)
205
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
206
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
207
+ end
208
+ end
209
+
210
+ @monitoring_list.each { |m|
211
+ require "kafka/#{m}"
212
+ log.info "#{m} monitoring started"
213
+ }
214
+ end
215
+
216
+ def start
217
+ super
218
+ refresh_client
219
+ end
220
+
221
+ def shutdown
222
+ super
223
+ shutdown_producers
224
+ @kafka = nil
225
+ end
226
+
227
+ def emit(tag, es, chain)
228
+ super(tag, es, chain, tag)
229
+ end
230
+
231
+ def format_stream(tag, es)
232
+ es.to_msgpack_stream
233
+ end
234
+
235
+ def shutdown_producers
236
+ @producers_mutex.synchronize {
237
+ @producers.each { |key, producer|
238
+ producer.shutdown
239
+ }
240
+ @producers = {}
241
+ }
242
+ end
243
+
244
+ def get_producer
245
+ @producers_mutex.synchronize {
246
+ producer = @producers[Thread.current.object_id]
247
+ unless producer
248
+ producer = @kafka.producer(**@producer_opts)
249
+ @producers[Thread.current.object_id] = producer
250
+ end
251
+ producer
252
+ }
253
+ end
254
+
255
+ def setup_formatter(conf)
256
+ if @output_data_type == 'json'
257
+ begin
258
+ require 'oj'
259
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
260
+ Proc.new { |tag, time, record| Oj.dump(record) }
261
+ rescue LoadError
262
+ require 'yajl'
263
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
264
+ end
265
+ elsif @output_data_type == 'ltsv'
266
+ require 'ltsv'
267
+ Proc.new { |tag, time, record| LTSV.dump(record) }
268
+ elsif @output_data_type == 'msgpack'
269
+ require 'msgpack'
270
+ Proc.new { |tag, time, record| record.to_msgpack }
271
+ elsif @output_data_type =~ /^attr:(.*)$/
272
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
273
+ @custom_attributes.unshift('time') if @output_include_time
274
+ @custom_attributes.unshift('tag') if @output_include_tag
275
+ Proc.new { |tag, time, record|
276
+ @custom_attributes.map { |attr|
277
+ record[attr].nil? ? '' : record[attr].to_s
278
+ }.join(@f_separator)
279
+ }
280
+ else
281
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
282
+ @formatter.configure(conf)
283
+ @formatter.method(:format)
284
+ end
285
+ end
286
+
287
+ def deliver_messages(producer, tag)
288
+ if @discard_kafka_delivery_failed
289
+ begin
290
+ producer.deliver_messages
291
+ rescue Kafka::DeliveryFailed => e
292
+ log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
293
+ producer.clear_buffer
294
+ end
295
+ else
296
+ producer.deliver_messages
297
+ end
298
+ end
299
+
300
+ def write(chunk)
301
+ tag = chunk.key
302
+ def_topic = @default_topic || tag
303
+ producer = get_producer
304
+
305
+ records_by_topic = {}
306
+ bytes_by_topic = {}
307
+ messages = 0
308
+ messages_bytes = 0
309
+ record_buf = nil
310
+ record_buf_bytes = nil
311
+
312
+ begin
313
+ chunk.msgpack_each { |time, record|
314
+ begin
315
+ if @output_include_time
316
+ if @time_format
317
+ record['time'.freeze] = Time.at(time).strftime(@time_format)
318
+ else
319
+ record['time'.freeze] = time
320
+ end
321
+ end
322
+
323
+ record['tag'] = tag if @output_include_tag
324
+ topic = (@exclude_topic_key ? record.delete(@topic_key) : record[@topic_key]) || def_topic
325
+ partition_key = (@exclude_partition_key ? record.delete(@partition_key_key) : record[@partition_key_key]) || @default_partition_key
326
+ partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
327
+ message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
328
+
329
+ records_by_topic[topic] ||= 0
330
+ bytes_by_topic[topic] ||= 0
331
+
332
+ record_buf = @formatter_proc.call(tag, time, record)
333
+ record_buf_bytes = record_buf.bytesize
334
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
335
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
336
+ log.debug "Skipped event:", :record => record
337
+ next
338
+ end
339
+ rescue StandardError => e
340
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
341
+ next
342
+ end
343
+
344
+ if (messages > 0) and (messages_bytes + record_buf_bytes > @kafka_agg_max_bytes) or (@kafka_agg_max_messages && messages >= @kafka_agg_max_messages)
345
+ log.debug { "#{messages} messages send because reaches the limit of batch transmission." }
346
+ deliver_messages(producer, tag)
347
+ messages = 0
348
+ messages_bytes = 0
349
+ end
350
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
351
+ messages += 1
352
+ producer.produce_for_buffered(record_buf, topic: topic, key: message_key, partition_key: partition_key, partition: partition,
353
+ create_time: @use_event_time ? Time.at(time) : Time.now)
354
+ messages_bytes += record_buf_bytes
355
+
356
+ records_by_topic[topic] += 1
357
+ bytes_by_topic[topic] += record_buf_bytes
358
+ }
359
+ if messages > 0
360
+ log.debug { "#{messages} messages send." }
361
+ deliver_messages(producer, tag)
362
+ end
363
+ log.debug { "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})" }
364
+ end
365
+ rescue Exception => e
366
+ log.warn "Send exception occurred: #{e}"
367
+ log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
368
+ # For safety, refresh client and its producers
369
+ shutdown_producers
370
+ refresh_client(false)
371
+ # Raise exception to retry sendind messages
372
+ raise e
373
+ end
374
+ end
@@ -0,0 +1,324 @@
1
+ require 'thread'
2
+ require 'logger'
3
+ require 'fluent/output'
4
+ require 'fluent/plugin/kafka_plugin_util'
5
+
6
+ require 'rdkafka'
7
+ require 'fluent/plugin/kafka_producer_ext'
8
+
9
+ class Rdkafka::Producer
10
+ # return false if producer is forcefully closed, otherwise return true
11
+ def close(timeout = nil)
12
+ @closing = true
13
+ # Wait for the polling thread to finish up
14
+ # If the broker isn't alive, the thread doesn't exit
15
+ if timeout
16
+ thr = @polling_thread.join(timeout)
17
+ return !!thr
18
+ else
19
+ @polling_thread.join
20
+ return true
21
+ end
22
+ end
23
+ end
24
+
25
+ class Fluent::KafkaOutputBuffered2 < Fluent::BufferedOutput
26
+ Fluent::Plugin.register_output('rdkafka', self)
27
+
28
+ config_param :brokers, :string, :default => 'localhost:9092',
29
+ :desc => <<-DESC
30
+ Set brokers directly:
31
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
32
+ Brokers: you can choose to use either brokers or zookeeper.
33
+ DESC
34
+ config_param :default_topic, :string, :default => nil,
35
+ :desc => "Output topic"
36
+ config_param :default_message_key, :string, :default => nil
37
+ config_param :default_partition, :integer, :default => nil
38
+ config_param :client_id, :string, :default => 'kafka'
39
+ config_param :output_data_type, :string, :default => 'json',
40
+ :desc => <<-DESC
41
+ Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
42
+ DESC
43
+ config_param :output_include_tag, :bool, :default => false
44
+ config_param :output_include_time, :bool, :default => false
45
+ config_param :exclude_partition, :bool, :default => false,
46
+ :desc => <<-DESC
47
+ Set true to remove partition from data
48
+ DESC
49
+ config_param :exclude_message_key, :bool, :default => false,
50
+ :desc => <<-DESC
51
+ Set true to remove message key from data
52
+ DESC
53
+ config_param :exclude_topic_key, :bool, :default => false,
54
+ :desc => <<-DESC
55
+ Set true to remove topic name key from data
56
+ DESC
57
+ config_param :max_send_retries, :integer, :default => 2,
58
+ :desc => "Number of times to retry sending of messages to a leader."
59
+ config_param :required_acks, :integer, :default => -1,
60
+ :desc => "The number of acks required per request."
61
+ config_param :ack_timeout, :time, :default => nil,
62
+ :desc => "How long the producer waits for acks."
63
+ config_param :compression_codec, :string, :default => nil,
64
+ :desc => <<-DESC
65
+ The codec the producer uses to compress messages.
66
+ Supported codecs: (gzip|snappy)
67
+ DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
69
+ config_param :max_send_limit_bytes, :size, :default => nil
70
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
71
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
72
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
73
+ config_param :rdkafka_message_max_num, :integer, :default => nil
74
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
75
+ config_param :rdkafka_options, :hash, :default => {}
76
+
77
+ config_param :max_enqueue_retries, :integer, :default => 3
78
+ config_param :enqueue_retry_backoff, :integer, :default => 3
79
+
80
+ config_param :service_name, :string, :default => nil
81
+ config_param :ssl_client_cert_key_password, :string, :default => nil
82
+
83
+ include Fluent::KafkaPluginUtil::SSLSettings
84
+ include Fluent::KafkaPluginUtil::SaslSettings
85
+
86
+ def initialize
87
+ super
88
+ @producers = {}
89
+ @producers_mutex = Mutex.new
90
+ end
91
+
92
+ def configure(conf)
93
+ super
94
+ log.instance_eval {
95
+ def add(level, message = nil)
96
+ if message.nil?
97
+ if block_given?
98
+ message = yield
99
+ else
100
+ return
101
+ end
102
+ end
103
+
104
+ # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
105
+ case level
106
+ when Logger::FATAL
107
+ self.fatal(message)
108
+ when Logger::ERROR
109
+ self.error(message)
110
+ when Logger::WARN
111
+ self.warn(message)
112
+ when Logger::INFO
113
+ self.info(message)
114
+ when Logger::DEBUG
115
+ self.debug(message)
116
+ else
117
+ self.trace(message)
118
+ end
119
+ end
120
+ }
121
+ Rdkafka::Config.logger = log
122
+ config = build_config
123
+ @rdkafka = Rdkafka::Config.new(config)
124
+ @formatter_proc = setup_formatter(conf)
125
+ end
126
+
127
+ def build_config
128
+ config = {
129
+ :"bootstrap.servers" => @brokers,
130
+ }
131
+
132
+ if @ssl_ca_cert && @ssl_ca_cert[0]
133
+ ssl = true
134
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
135
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
136
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
137
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
138
+ end
139
+
140
+ if @principal
141
+ sasl = true
142
+ config[:"sasl.mechanisms"] = "GSSAPI"
143
+ config[:"sasl.kerberos.principal"] = @principal
144
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
145
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
146
+ end
147
+
148
+ if ssl && sasl
149
+ security_protocol = "SASL_SSL"
150
+ elsif ssl && !sasl
151
+ security_protocol = "SSL"
152
+ elsif !ssl && sasl
153
+ security_protocol = "SASL_PLAINTEXT"
154
+ else
155
+ security_protocol = "PLAINTEXT"
156
+ end
157
+ config[:"security.protocol"] = security_protocol
158
+
159
+ config[:"compression.codec"] = @compression_codec if @compression_codec
160
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
161
+ config[:"request.required.acks"] = @required_acks if @required_acks
162
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
163
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
164
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
165
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
166
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
167
+
168
+ @rdkafka_options.each { |k, v|
169
+ config[k.to_sym] = v
170
+ }
171
+
172
+ config
173
+ end
174
+
175
+ def start
176
+ super
177
+ end
178
+
179
+ def multi_workers_ready?
180
+ true
181
+ end
182
+
183
+ def shutdown
184
+ super
185
+ shutdown_producers
186
+ end
187
+
188
+ def shutdown_producers
189
+ @producers_mutex.synchronize {
190
+ shutdown_threads = @producers.map { |key, producer|
191
+ th = Thread.new {
192
+ unless producer.close(10)
193
+ log.warn("Queue is forcefully closed after 10 seconds wait")
194
+ end
195
+ }
196
+ th.abort_on_exception = true
197
+ th
198
+ }
199
+ shutdown_threads.each { |th| th.join }
200
+ @producers = {}
201
+ }
202
+ end
203
+
204
+ def get_producer
205
+ @producers_mutex.synchronize {
206
+ producer = @producers[Thread.current.object_id]
207
+ unless producer
208
+ producer = @rdkafka.producer
209
+ @producers[Thread.current.object_id] = producer
210
+ end
211
+ producer
212
+ }
213
+ end
214
+
215
+ def emit(tag, es, chain)
216
+ super(tag, es, chain, tag)
217
+ end
218
+
219
+ def format_stream(tag, es)
220
+ es.to_msgpack_stream
221
+ end
222
+
223
+ def setup_formatter(conf)
224
+ if @output_data_type == 'json'
225
+ begin
226
+ require 'oj'
227
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
228
+ Proc.new { |tag, time, record| Oj.dump(record) }
229
+ rescue LoadError
230
+ require 'yajl'
231
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
232
+ end
233
+ elsif @output_data_type == 'ltsv'
234
+ require 'ltsv'
235
+ Proc.new { |tag, time, record| LTSV.dump(record) }
236
+ elsif @output_data_type == 'msgpack'
237
+ require 'msgpack'
238
+ Proc.new { |tag, time, record| record.to_msgpack }
239
+ elsif @output_data_type =~ /^attr:(.*)$/
240
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
241
+ @custom_attributes.unshift('time') if @output_include_time
242
+ @custom_attributes.unshift('tag') if @output_include_tag
243
+ Proc.new { |tag, time, record|
244
+ @custom_attributes.map { |attr|
245
+ record[attr].nil? ? '' : record[attr].to_s
246
+ }.join(@f_separator)
247
+ }
248
+ else
249
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
250
+ @formatter.configure(conf)
251
+ @formatter.method(:format)
252
+ end
253
+ end
254
+
255
+ def write(chunk)
256
+ tag = chunk.key
257
+ def_topic = @default_topic || tag
258
+
259
+ record_buf = nil
260
+ record_buf_bytes = nil
261
+
262
+ begin
263
+ chunk.msgpack_each.map { |time, record|
264
+ begin
265
+ if @output_include_time
266
+ if @time_format
267
+ record['time'.freeze] = Time.at(time).strftime(@time_format)
268
+ else
269
+ record['time'.freeze] = time
270
+ end
271
+ end
272
+
273
+ record['tag'] = tag if @output_include_tag
274
+ topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
275
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
276
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
277
+
278
+ record_buf = @formatter_proc.call(tag, time, record)
279
+ record_buf_bytes = record_buf.bytesize
280
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
281
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
282
+ log.debug "Skipped event:", :record => record
283
+ next
284
+ end
285
+ rescue StandardError => e
286
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
287
+ next
288
+ end
289
+
290
+ producer = get_producer
291
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
292
+ handler
293
+ }.each { |handler|
294
+ handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
295
+ }
296
+ end
297
+ rescue Exception => e
298
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
299
+ # Raise exception to retry sendind messages
300
+ raise e
301
+ end
302
+
303
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
304
+ attempt = 0
305
+ loop do
306
+ begin
307
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
308
+ return handler
309
+ rescue Exception => e
310
+ if e.respond_to?(:code) && e.code == :queue_full
311
+ if attempt <= @max_enqueue_retries
312
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
313
+ sleep @enqueue_retry_backoff
314
+ attempt += 1
315
+ else
316
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
317
+ end
318
+ else
319
+ raise e
320
+ end
321
+ end
322
+ end
323
+ end
324
+ end