fluent-plugin-kafka-xst 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,427 @@
1
+ require 'fluent/plugin/output'
2
+ require 'fluent/plugin/kafka_plugin_util'
3
+
4
+ require 'kafka'
5
+ require 'fluent/plugin/kafka_producer_ext'
6
+
7
+ module Fluent::Plugin
8
+ class Fluent::Kafka2Output < Output
9
+ Fluent::Plugin.register_output('kafka2', self)
10
+
11
+ helpers :inject, :formatter, :event_emitter, :record_accessor
12
+
13
+ config_param :brokers, :array, :value_type => :string, :default => ['localhost:9092'],
14
+ :desc => <<-DESC
15
+ Set brokers directly:
16
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
17
+ DESC
18
+ config_param :topic, :string, :default => nil, :desc => "kafka topic. Placeholders are supported"
19
+ config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
20
+ config_param :default_topic, :string, :default => nil,
21
+ :desc => "Default output topic when record doesn't have topic field"
22
+ config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
23
+ config_param :default_message_key, :string, :default => nil
24
+ config_param :partition_key_key, :string, :default => 'partition_key', :desc => "Field for kafka partition key"
25
+ config_param :default_partition_key, :string, :default => nil
26
+ config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
27
+ config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
28
+ :desc => "Specify kafka patrtitioner hash algorithm"
29
+ config_param :default_partition, :integer, :default => nil
30
+ config_param :record_key, :string, :default => nil,
31
+ :desc => <<-DESC
32
+ A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
33
+ If defined, only this field in the record will be sent to Kafka as the message payload.
34
+ DESC
35
+ config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
36
+ config_param :client_id, :string, :default => 'fluentd'
37
+ config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
38
+ config_param :sasl_over_ssl, :bool, :default => true,
39
+ :desc => <<-DESC
40
+ Set to false to prevent SSL strict mode when using SASL authentication
41
+ DESC
42
+ config_param :exclude_partition_key, :bool, :default => false,
43
+ :desc => 'Set true to remove partition key from data'
44
+ config_param :exclude_partition, :bool, :default => false,
45
+ :desc => 'Set true to remove partition from data'
46
+ config_param :exclude_message_key, :bool, :default => false,
47
+ :desc => 'Set true to remove message key from data'
48
+ config_param :exclude_topic_key, :bool, :default => false,
49
+ :desc => 'Set true to remove topic name key from data'
50
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
51
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
52
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for kafka create_time'
53
+ config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
54
+ :desc => 'Kafka message headers'
55
+ config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
56
+ :desc => 'Kafka message headers where the header value is a jsonpath to a record value'
57
+ config_param :resolve_seed_brokers, :bool, :default => false,
58
+ :desc => "support brokers' hostname with multiple addresses"
59
+
60
+ config_param :get_kafka_client_log, :bool, :default => false
61
+
62
+ config_param :ignore_exceptions, :array, :default => [], value_type: :string, :desc => "Ignorable exception list"
63
+ config_param :exception_backup, :bool, :default => true, :desc => "Chunk backup flag when ignore exception occured"
64
+
65
+ config_param :connect_timeout, :integer, :default => nil,
66
+ :desc => "The timeout setting for connecting to brokers"
67
+ config_param :socket_timeout, :integer, :default => nil,
68
+ :desc => "The timeout setting for socket connection"
69
+
70
+ # ruby-kafka producer options
71
+ config_param :max_send_retries, :integer, :default => 2,
72
+ :desc => "Number of times to retry sending of messages to a leader."
73
+ config_param :required_acks, :integer, :default => -1,
74
+ :desc => "The number of acks required per request."
75
+ config_param :ack_timeout, :time, :default => nil,
76
+ :desc => "How long the producer waits for acks."
77
+ config_param :compression_codec, :string, :default => nil,
78
+ :desc => <<-DESC
79
+ The codec the producer uses to compress messages.
80
+ Supported codecs depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression
81
+ DESC
82
+ config_param :max_send_limit_bytes, :size, :default => nil
83
+ config_param :discard_kafka_delivery_failed, :bool, :default => false
84
+ config_param :active_support_notification_regex, :string, :default => nil,
85
+ :desc => <<-DESC
86
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
87
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
88
+ DESC
89
+ config_param :share_producer, :bool, :default => false, :desc => 'share kafka producer between flush threads'
90
+
91
+ config_section :buffer do
92
+ config_set_default :chunk_keys, ["topic"]
93
+ end
94
+ config_section :format do
95
+ config_set_default :@type, 'json'
96
+ end
97
+
98
+ include Fluent::KafkaPluginUtil::AwsIamSettings
99
+ include Fluent::KafkaPluginUtil::SSLSettings
100
+ include Fluent::KafkaPluginUtil::SaslSettings
101
+
102
+ def initialize
103
+ super
104
+
105
+ @kafka = nil
106
+ @producers = nil
107
+ @producers_mutex = nil
108
+ @shared_producer = nil
109
+
110
+ @writing_threads_mutex = Mutex.new
111
+ @writing_threads = Set.new
112
+ end
113
+
114
+ def refresh_client(raise_error = true)
115
+ begin
116
+ logger = @get_kafka_client_log ? log : nil
117
+ use_long_lived_aws_credentials = @sasl_aws_msk_iam_access_key_id != nil && @sasl_aws_msk_iam_secret_key_id != nil
118
+ if @scram_mechanism != nil && @username != nil && @password != nil
119
+ sasl_params = {
120
+ sasl_scram_username: @username,
121
+ sasl_scram_password: @password,
122
+ sasl_scram_mechanism: @scram_mechanism,
123
+ }
124
+ elsif @username != nil && @password != nil
125
+ sasl_params = {
126
+ sasl_plain_username: @username,
127
+ sasl_plain_password: @password,
128
+ }
129
+ elsif use_long_lived_aws_credentials
130
+ sasl_params = {
131
+ sasl_aws_msk_iam_access_key_id: @sasl_aws_msk_iam_access_key_id,
132
+ sasl_aws_msk_iam_secret_key_id: @sasl_aws_msk_iam_secret_key_id,
133
+ sasl_aws_msk_iam_aws_region: @sasl_aws_msk_iam_aws_region,
134
+ }
135
+ else
136
+ sasl_params = {
137
+ sasl_gssapi_principal: @principal,
138
+ sasl_gssapi_keytab: @keytab,
139
+ }
140
+ end
141
+ @kafka = Kafka.new(
142
+ seed_brokers: @seed_brokers,
143
+ client_id: @client_id,
144
+ logger: logger,
145
+ connect_timeout: @connect_timeout,
146
+ socket_timeout: @socket_timeout,
147
+ ssl_ca_cert_file_path: @ssl_ca_cert,
148
+ ssl_client_cert: read_ssl_file(@ssl_client_cert),
149
+ ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
150
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
151
+ ssl_client_cert_chain: read_ssl_file(@ssl_client_cert_chain),
152
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
153
+ ssl_verify_hostname: @ssl_verify_hostname,
154
+ resolve_seed_brokers: @resolve_seed_brokers,
155
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function),
156
+ sasl_over_ssl: @sasl_over_ssl,
157
+ **sasl_params)
158
+ log.info "initialized kafka producer: #{@client_id}"
159
+ rescue Exception => e
160
+ if raise_error # During startup, error should be reported to engine and stop its phase for safety.
161
+ raise e
162
+ else
163
+ log.error e
164
+ end
165
+ end
166
+ end
167
+
168
+ def configure(conf)
169
+ super
170
+
171
+ if @brokers.size > 0
172
+ @seed_brokers = @brokers
173
+ log.info "brokers has been set: #{@seed_brokers}"
174
+ else
175
+ raise Fluent::ConfigError, 'No brokers specified. Need one broker at least.'
176
+ end
177
+
178
+ formatter_conf = conf.elements('format').first
179
+ unless formatter_conf
180
+ raise Fluent::ConfigError, "<format> section is required."
181
+ end
182
+ unless formatter_conf["@type"]
183
+ raise Fluent::ConfigError, "format/@type is required."
184
+ end
185
+ @formatter_proc = setup_formatter(formatter_conf)
186
+
187
+ if @default_topic.nil?
188
+ if @use_default_for_unknown_topic
189
+ raise Fluent::ConfigError, "default_topic must be set when use_default_for_unknown_topic is true"
190
+ end
191
+ if @chunk_keys.include?('topic') && !@chunk_key_tag
192
+ log.warn "Use 'topic' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer topic,tag>"
193
+ end
194
+ else
195
+ if @chunk_key_tag
196
+ log.warn "default_topic is set. Fluentd's event tag is not used for topic"
197
+ end
198
+ end
199
+
200
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks, idempotent: @idempotent}
201
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
202
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
203
+ if @active_support_notification_regex
204
+ require 'active_support/notifications'
205
+ require 'active_support/core_ext/hash/keys'
206
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
207
+ event = ActiveSupport::Notifications::Event.new(*args)
208
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
209
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
210
+ end
211
+ end
212
+
213
+ @topic_key_sym = @topic_key.to_sym
214
+
215
+ @headers_from_record_accessors = {}
216
+ @headers_from_record.each do |key, value|
217
+ @headers_from_record_accessors[key] = record_accessor_create(value)
218
+ end
219
+
220
+ @exclude_field_accessors = @exclude_fields.map do |field|
221
+ record_accessor_create(field)
222
+ end
223
+
224
+ @record_field_accessor = nil
225
+ @record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
226
+ end
227
+
228
+ def multi_workers_ready?
229
+ true
230
+ end
231
+
232
+ def create_producer
233
+ @kafka.custom_producer(**@producer_opts)
234
+ end
235
+
236
+ def start
237
+ super
238
+ refresh_client
239
+
240
+ if @share_producer
241
+ @shared_producer = create_producer
242
+ else
243
+ @producers = {}
244
+ @producers_mutex = Mutex.new
245
+ end
246
+ end
247
+
248
+ def close
249
+ super
250
+ @kafka.close if @kafka
251
+ end
252
+
253
+ def terminate
254
+ super
255
+ @kafka = nil
256
+ end
257
+
258
+ def wait_writing_threads
259
+ done = false
260
+ until done do
261
+ @writing_threads_mutex.synchronize do
262
+ done = true if @writing_threads.empty?
263
+ end
264
+ sleep(1) unless done
265
+ end
266
+ end
267
+
268
+ def shutdown
269
+ super
270
+ wait_writing_threads
271
+ shutdown_producers
272
+ end
273
+
274
+ def shutdown_producers
275
+ if @share_producer
276
+ @shared_producer.shutdown
277
+ @shared_producer = nil
278
+ else
279
+ @producers_mutex.synchronize {
280
+ shutdown_threads = @producers.map { |key, producer|
281
+ th = Thread.new {
282
+ producer.shutdown
283
+ }
284
+ th.abort_on_exception = true
285
+ th
286
+ }
287
+ shutdown_threads.each { |th| th.join }
288
+ @producers = {}
289
+ }
290
+ end
291
+ end
292
+
293
+ def get_producer
294
+ if @share_producer
295
+ @shared_producer
296
+ else
297
+ @producers_mutex.synchronize {
298
+ producer = @producers[Thread.current.object_id]
299
+ unless producer
300
+ producer = create_producer
301
+ @producers[Thread.current.object_id] = producer
302
+ end
303
+ producer
304
+ }
305
+ end
306
+ end
307
+
308
+ def setup_formatter(conf)
309
+ type = conf['@type']
310
+ case type
311
+ when 'json'
312
+ begin
313
+ require 'oj'
314
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
315
+ Proc.new { |tag, time, record| Oj.dump(record) }
316
+ rescue LoadError
317
+ require 'yajl'
318
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
319
+ end
320
+ when 'ltsv'
321
+ require 'ltsv'
322
+ Proc.new { |tag, time, record| LTSV.dump(record) }
323
+ else
324
+ @formatter = formatter_create(usage: 'kafka-plugin', conf: conf)
325
+ @formatter.method(:format)
326
+ end
327
+ end
328
+
329
+ # TODO: optimize write performance
330
+ def write(chunk)
331
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
332
+
333
+ tag = chunk.metadata.tag
334
+ topic = if @topic
335
+ extract_placeholders(@topic, chunk)
336
+ else
337
+ (chunk.metadata.variables && chunk.metadata.variables[@topic_key_sym]) || @default_topic || tag
338
+ end
339
+
340
+ messages = 0
341
+
342
+ base_headers = @headers
343
+ mutate_headers = !@headers_from_record_accessors.empty?
344
+
345
+ begin
346
+ producer = get_producer
347
+ chunk.msgpack_each { |time, record|
348
+ begin
349
+ record = inject_values_to_record(tag, time, record)
350
+ record.delete(@topic_key) if @exclude_topic_key
351
+ partition_key = (@exclude_partition_key ? record.delete(@partition_key_key) : record[@partition_key_key]) || @default_partition_key
352
+ partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
353
+ message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
354
+
355
+ if mutate_headers
356
+ headers = base_headers.clone
357
+ @headers_from_record_accessors.each do |key, header_accessor|
358
+ headers[key] = header_accessor.call(record)
359
+ end
360
+ else
361
+ headers = base_headers
362
+ end
363
+
364
+ unless @exclude_fields.empty?
365
+ @exclude_field_accessors.each do |exclude_field_accessor|
366
+ exclude_field_accessor.delete(record)
367
+ end
368
+ end
369
+
370
+ record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
371
+ record_buf = @formatter_proc.call(tag, time, record)
372
+ record_buf_bytes = record_buf.bytesize
373
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
374
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
375
+ log.debug "Skipped event:", :record => record
376
+ next
377
+ end
378
+ rescue StandardError => e
379
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
380
+ next
381
+ end
382
+
383
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
384
+ messages += 1
385
+
386
+ producer.produce(record_buf, key: message_key, partition_key: partition_key, partition: partition, headers: headers,
387
+ create_time: @use_event_time ? Time.at(time) : Time.now, topic: topic)
388
+ }
389
+
390
+ if messages > 0
391
+ log.debug { "#{messages} messages send." }
392
+ if @discard_kafka_delivery_failed
393
+ begin
394
+ producer.deliver_messages
395
+ rescue Kafka::DeliveryFailed => e
396
+ log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
397
+ producer.clear_buffer
398
+ end
399
+ else
400
+ producer.deliver_messages
401
+ end
402
+ end
403
+ rescue Kafka::UnknownTopicOrPartition
404
+ if @use_default_for_unknown_topic && topic != @default_topic
405
+ log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
406
+ topic = @default_topic
407
+ retry
408
+ end
409
+ raise
410
+ end
411
+ rescue Exception => e
412
+ ignore = @ignore_exceptions.include?(e.class.name)
413
+
414
+ log.warn "Send exception occurred: #{e}"
415
+ log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
416
+ log.warn "Exception ignored in tag : #{tag}" if ignore
417
+ # For safety, refresh client and its producers
418
+ refresh_client(false)
419
+ # raise UnrecoverableError for backup ignored exception chunk
420
+ raise Fluent::UnrecoverableError if ignore && exception_backup
421
+ # Raise exception to retry sendind messages
422
+ raise e unless ignore
423
+ ensure
424
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
425
+ end
426
+ end
427
+ end