fluent-plugin-kafka-xst 0.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,427 @@
1
+ require 'fluent/plugin/output'
2
+ require 'fluent/plugin/kafka_plugin_util'
3
+
4
+ require 'kafka'
5
+ require 'fluent/plugin/kafka_producer_ext'
6
+
7
+ module Fluent::Plugin
8
+ class Fluent::Kafka2Output < Output
9
+ Fluent::Plugin.register_output('kafka2', self)
10
+
11
+ helpers :inject, :formatter, :event_emitter, :record_accessor
12
+
13
+ config_param :brokers, :array, :value_type => :string, :default => ['localhost:9092'],
14
+ :desc => <<-DESC
15
+ Set brokers directly:
16
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
17
+ DESC
18
+ config_param :topic, :string, :default => nil, :desc => "kafka topic. Placeholders are supported"
19
+ config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
20
+ config_param :default_topic, :string, :default => nil,
21
+ :desc => "Default output topic when record doesn't have topic field"
22
+ config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
23
+ config_param :default_message_key, :string, :default => nil
24
+ config_param :partition_key_key, :string, :default => 'partition_key', :desc => "Field for kafka partition key"
25
+ config_param :default_partition_key, :string, :default => nil
26
+ config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
27
+ config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
28
+ :desc => "Specify kafka patrtitioner hash algorithm"
29
+ config_param :default_partition, :integer, :default => nil
30
+ config_param :record_key, :string, :default => nil,
31
+ :desc => <<-DESC
32
+ A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
33
+ If defined, only this field in the record will be sent to Kafka as the message payload.
34
+ DESC
35
+ config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
36
+ config_param :client_id, :string, :default => 'fluentd'
37
+ config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
38
+ config_param :sasl_over_ssl, :bool, :default => true,
39
+ :desc => <<-DESC
40
+ Set to false to prevent SSL strict mode when using SASL authentication
41
+ DESC
42
+ config_param :exclude_partition_key, :bool, :default => false,
43
+ :desc => 'Set true to remove partition key from data'
44
+ config_param :exclude_partition, :bool, :default => false,
45
+ :desc => 'Set true to remove partition from data'
46
+ config_param :exclude_message_key, :bool, :default => false,
47
+ :desc => 'Set true to remove message key from data'
48
+ config_param :exclude_topic_key, :bool, :default => false,
49
+ :desc => 'Set true to remove topic name key from data'
50
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
51
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
52
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for kafka create_time'
53
+ config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
54
+ :desc => 'Kafka message headers'
55
+ config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
56
+ :desc => 'Kafka message headers where the header value is a jsonpath to a record value'
57
+ config_param :resolve_seed_brokers, :bool, :default => false,
58
+ :desc => "support brokers' hostname with multiple addresses"
59
+
60
+ config_param :get_kafka_client_log, :bool, :default => false
61
+
62
+ config_param :ignore_exceptions, :array, :default => [], value_type: :string, :desc => "Ignorable exception list"
63
+ config_param :exception_backup, :bool, :default => true, :desc => "Chunk backup flag when ignore exception occured"
64
+
65
+ config_param :connect_timeout, :integer, :default => nil,
66
+ :desc => "The timeout setting for connecting to brokers"
67
+ config_param :socket_timeout, :integer, :default => nil,
68
+ :desc => "The timeout setting for socket connection"
69
+
70
+ # ruby-kafka producer options
71
+ config_param :max_send_retries, :integer, :default => 2,
72
+ :desc => "Number of times to retry sending of messages to a leader."
73
+ config_param :required_acks, :integer, :default => -1,
74
+ :desc => "The number of acks required per request."
75
+ config_param :ack_timeout, :time, :default => nil,
76
+ :desc => "How long the producer waits for acks."
77
+ config_param :compression_codec, :string, :default => nil,
78
+ :desc => <<-DESC
79
+ The codec the producer uses to compress messages.
80
+ Supported codecs depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression
81
+ DESC
82
+ config_param :max_send_limit_bytes, :size, :default => nil
83
+ config_param :discard_kafka_delivery_failed, :bool, :default => false
84
+ config_param :active_support_notification_regex, :string, :default => nil,
85
+ :desc => <<-DESC
86
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
87
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
88
+ DESC
89
+ config_param :share_producer, :bool, :default => false, :desc => 'share kafka producer between flush threads'
90
+
91
+ config_section :buffer do
92
+ config_set_default :chunk_keys, ["topic"]
93
+ end
94
+ config_section :format do
95
+ config_set_default :@type, 'json'
96
+ end
97
+
98
+ include Fluent::KafkaPluginUtil::AwsIamSettings
99
+ include Fluent::KafkaPluginUtil::SSLSettings
100
+ include Fluent::KafkaPluginUtil::SaslSettings
101
+
102
+ def initialize
103
+ super
104
+
105
+ @kafka = nil
106
+ @producers = nil
107
+ @producers_mutex = nil
108
+ @shared_producer = nil
109
+
110
+ @writing_threads_mutex = Mutex.new
111
+ @writing_threads = Set.new
112
+ end
113
+
114
+ def refresh_client(raise_error = true)
115
+ begin
116
+ logger = @get_kafka_client_log ? log : nil
117
+ use_long_lived_aws_credentials = @sasl_aws_msk_iam_access_key_id != nil && @sasl_aws_msk_iam_secret_key_id != nil
118
+ if @scram_mechanism != nil && @username != nil && @password != nil
119
+ sasl_params = {
120
+ sasl_scram_username: @username,
121
+ sasl_scram_password: @password,
122
+ sasl_scram_mechanism: @scram_mechanism,
123
+ }
124
+ elsif @username != nil && @password != nil
125
+ sasl_params = {
126
+ sasl_plain_username: @username,
127
+ sasl_plain_password: @password,
128
+ }
129
+ elsif use_long_lived_aws_credentials
130
+ sasl_params = {
131
+ sasl_aws_msk_iam_access_key_id: @sasl_aws_msk_iam_access_key_id,
132
+ sasl_aws_msk_iam_secret_key_id: @sasl_aws_msk_iam_secret_key_id,
133
+ sasl_aws_msk_iam_aws_region: @sasl_aws_msk_iam_aws_region,
134
+ }
135
+ else
136
+ sasl_params = {
137
+ sasl_gssapi_principal: @principal,
138
+ sasl_gssapi_keytab: @keytab,
139
+ }
140
+ end
141
+ @kafka = Kafka.new(
142
+ seed_brokers: @seed_brokers,
143
+ client_id: @client_id,
144
+ logger: logger,
145
+ connect_timeout: @connect_timeout,
146
+ socket_timeout: @socket_timeout,
147
+ ssl_ca_cert_file_path: @ssl_ca_cert,
148
+ ssl_client_cert: read_ssl_file(@ssl_client_cert),
149
+ ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
150
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
151
+ ssl_client_cert_chain: read_ssl_file(@ssl_client_cert_chain),
152
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
153
+ ssl_verify_hostname: @ssl_verify_hostname,
154
+ resolve_seed_brokers: @resolve_seed_brokers,
155
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function),
156
+ sasl_over_ssl: @sasl_over_ssl,
157
+ **sasl_params)
158
+ log.info "initialized kafka producer: #{@client_id}"
159
+ rescue Exception => e
160
+ if raise_error # During startup, error should be reported to engine and stop its phase for safety.
161
+ raise e
162
+ else
163
+ log.error e
164
+ end
165
+ end
166
+ end
167
+
168
+ def configure(conf)
169
+ super
170
+
171
+ if @brokers.size > 0
172
+ @seed_brokers = @brokers
173
+ log.info "brokers has been set: #{@seed_brokers}"
174
+ else
175
+ raise Fluent::ConfigError, 'No brokers specified. Need one broker at least.'
176
+ end
177
+
178
+ formatter_conf = conf.elements('format').first
179
+ unless formatter_conf
180
+ raise Fluent::ConfigError, "<format> section is required."
181
+ end
182
+ unless formatter_conf["@type"]
183
+ raise Fluent::ConfigError, "format/@type is required."
184
+ end
185
+ @formatter_proc = setup_formatter(formatter_conf)
186
+
187
+ if @default_topic.nil?
188
+ if @use_default_for_unknown_topic
189
+ raise Fluent::ConfigError, "default_topic must be set when use_default_for_unknown_topic is true"
190
+ end
191
+ if @chunk_keys.include?('topic') && !@chunk_key_tag
192
+ log.warn "Use 'topic' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer topic,tag>"
193
+ end
194
+ else
195
+ if @chunk_key_tag
196
+ log.warn "default_topic is set. Fluentd's event tag is not used for topic"
197
+ end
198
+ end
199
+
200
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks, idempotent: @idempotent}
201
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
202
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
203
+ if @active_support_notification_regex
204
+ require 'active_support/notifications'
205
+ require 'active_support/core_ext/hash/keys'
206
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
207
+ event = ActiveSupport::Notifications::Event.new(*args)
208
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
209
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
210
+ end
211
+ end
212
+
213
+ @topic_key_sym = @topic_key.to_sym
214
+
215
+ @headers_from_record_accessors = {}
216
+ @headers_from_record.each do |key, value|
217
+ @headers_from_record_accessors[key] = record_accessor_create(value)
218
+ end
219
+
220
+ @exclude_field_accessors = @exclude_fields.map do |field|
221
+ record_accessor_create(field)
222
+ end
223
+
224
+ @record_field_accessor = nil
225
+ @record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
226
+ end
227
+
228
+ def multi_workers_ready?
229
+ true
230
+ end
231
+
232
+ def create_producer
233
+ @kafka.custom_producer(**@producer_opts)
234
+ end
235
+
236
+ def start
237
+ super
238
+ refresh_client
239
+
240
+ if @share_producer
241
+ @shared_producer = create_producer
242
+ else
243
+ @producers = {}
244
+ @producers_mutex = Mutex.new
245
+ end
246
+ end
247
+
248
+ def close
249
+ super
250
+ @kafka.close if @kafka
251
+ end
252
+
253
+ def terminate
254
+ super
255
+ @kafka = nil
256
+ end
257
+
258
+ def wait_writing_threads
259
+ done = false
260
+ until done do
261
+ @writing_threads_mutex.synchronize do
262
+ done = true if @writing_threads.empty?
263
+ end
264
+ sleep(1) unless done
265
+ end
266
+ end
267
+
268
+ def shutdown
269
+ super
270
+ wait_writing_threads
271
+ shutdown_producers
272
+ end
273
+
274
+ def shutdown_producers
275
+ if @share_producer
276
+ @shared_producer.shutdown
277
+ @shared_producer = nil
278
+ else
279
+ @producers_mutex.synchronize {
280
+ shutdown_threads = @producers.map { |key, producer|
281
+ th = Thread.new {
282
+ producer.shutdown
283
+ }
284
+ th.abort_on_exception = true
285
+ th
286
+ }
287
+ shutdown_threads.each { |th| th.join }
288
+ @producers = {}
289
+ }
290
+ end
291
+ end
292
+
293
+ def get_producer
294
+ if @share_producer
295
+ @shared_producer
296
+ else
297
+ @producers_mutex.synchronize {
298
+ producer = @producers[Thread.current.object_id]
299
+ unless producer
300
+ producer = create_producer
301
+ @producers[Thread.current.object_id] = producer
302
+ end
303
+ producer
304
+ }
305
+ end
306
+ end
307
+
308
+ def setup_formatter(conf)
309
+ type = conf['@type']
310
+ case type
311
+ when 'json'
312
+ begin
313
+ require 'oj'
314
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
315
+ Proc.new { |tag, time, record| Oj.dump(record) }
316
+ rescue LoadError
317
+ require 'yajl'
318
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
319
+ end
320
+ when 'ltsv'
321
+ require 'ltsv'
322
+ Proc.new { |tag, time, record| LTSV.dump(record) }
323
+ else
324
+ @formatter = formatter_create(usage: 'kafka-plugin', conf: conf)
325
+ @formatter.method(:format)
326
+ end
327
+ end
328
+
329
+ # TODO: optimize write performance
330
+ def write(chunk)
331
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
332
+
333
+ tag = chunk.metadata.tag
334
+ topic = if @topic
335
+ extract_placeholders(@topic, chunk)
336
+ else
337
+ (chunk.metadata.variables && chunk.metadata.variables[@topic_key_sym]) || @default_topic || tag
338
+ end
339
+
340
+ messages = 0
341
+
342
+ base_headers = @headers
343
+ mutate_headers = !@headers_from_record_accessors.empty?
344
+
345
+ begin
346
+ producer = get_producer
347
+ chunk.msgpack_each { |time, record|
348
+ begin
349
+ record = inject_values_to_record(tag, time, record)
350
+ record.delete(@topic_key) if @exclude_topic_key
351
+ partition_key = (@exclude_partition_key ? record.delete(@partition_key_key) : record[@partition_key_key]) || @default_partition_key
352
+ partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
353
+ message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
354
+
355
+ if mutate_headers
356
+ headers = base_headers.clone
357
+ @headers_from_record_accessors.each do |key, header_accessor|
358
+ headers[key] = header_accessor.call(record)
359
+ end
360
+ else
361
+ headers = base_headers
362
+ end
363
+
364
+ unless @exclude_fields.empty?
365
+ @exclude_field_accessors.each do |exclude_field_accessor|
366
+ exclude_field_accessor.delete(record)
367
+ end
368
+ end
369
+
370
+ record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
371
+ record_buf = @formatter_proc.call(tag, time, record)
372
+ record_buf_bytes = record_buf.bytesize
373
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
374
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
375
+ log.debug "Skipped event:", :record => record
376
+ next
377
+ end
378
+ rescue StandardError => e
379
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
380
+ next
381
+ end
382
+
383
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
384
+ messages += 1
385
+
386
+ producer.produce(record_buf, key: message_key, partition_key: partition_key, partition: partition, headers: headers,
387
+ create_time: @use_event_time ? Time.at(time) : Time.now, topic: topic)
388
+ }
389
+
390
+ if messages > 0
391
+ log.debug { "#{messages} messages send." }
392
+ if @discard_kafka_delivery_failed
393
+ begin
394
+ producer.deliver_messages
395
+ rescue Kafka::DeliveryFailed => e
396
+ log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
397
+ producer.clear_buffer
398
+ end
399
+ else
400
+ producer.deliver_messages
401
+ end
402
+ end
403
+ rescue Kafka::UnknownTopicOrPartition
404
+ if @use_default_for_unknown_topic && topic != @default_topic
405
+ log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
406
+ topic = @default_topic
407
+ retry
408
+ end
409
+ raise
410
+ end
411
+ rescue Exception => e
412
+ ignore = @ignore_exceptions.include?(e.class.name)
413
+
414
+ log.warn "Send exception occurred: #{e}"
415
+ log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
416
+ log.warn "Exception ignored in tag : #{tag}" if ignore
417
+ # For safety, refresh client and its producers
418
+ refresh_client(false)
419
+ # raise UnrecoverableError for backup ignored exception chunk
420
+ raise Fluent::UnrecoverableError if ignore && exception_backup
421
+ # Raise exception to retry sendind messages
422
+ raise e unless ignore
423
+ ensure
424
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
425
+ end
426
+ end
427
+ end