fluent-plugin-kafka 0.17.1 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 230266adf4ba3d77b8b2fd743863377c4dd2532297f45e4489df62bfcbee1db5
4
- data.tar.gz: 4b82d2f33bbbb3294f547a7af154783c432c6316f1155a0e3a822361232743e4
3
+ metadata.gz: 16968c0e56c22f64225e8e41e905294defc7240df6054813151d3904a79a4107
4
+ data.tar.gz: 71b13953b11048f201c8a8a275350e983a51377b45e84440196b701063e317c4
5
5
  SHA512:
6
- metadata.gz: 9321cd8bd10dcd603b653c101e6695b7071d7b9dc61fb49bc79248b22cbe9f1db46416c40132243c9862e9c4a3888c818f7a8906906ccf0dda545032f2ac53fd
7
- data.tar.gz: 832517ac39c4f775d95775454b07bc0f3ecc2f27b1c0bd4c3f8025e795dfbe869d3e349084f868bdb54cfa72188d4cbd96173192f728fae4281f7f84e5d973aa
6
+ metadata.gz: aedae36f4b7a29408bc96838da3e158a98ddac136987408dcd4bc347068e3d27839e844d1d3d4a239e4f175e023f19d9ecd82e14d4ab089fd43625f60dcca17c
7
+ data.tar.gz: 4a72ea62a754b689944d3f2de81cc495c9ef92df97c127b80b46ace6824aca00df5b4cc499eb3993d0edbd3f156f140c6c8ba718fb82f65b5328170adf26f622
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.17.2 - 2021/10/14
2
+ * out_rdkafka2: Add `max_enqueue_bytes_per_second` parameter
3
+ * out_rdkafka2: Support `use_event_time` parameter
4
+ * out_rdkafka2: Fix a potential bug that the plugin might exit without receiving responses from Kafka.
5
+
1
6
  Release 0.17.1 - 2021/09/24
2
7
  * out_rdkafka/out_rdkafka2: Support rdkafka 0.9.0 or later
3
8
  * out_rdkafka/out_rdkafka2: Add `exclude_fields` parameter
data/README.md CHANGED
@@ -200,6 +200,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
200
200
  get_kafka_client_log (bool) :default => false
201
201
  headers (hash) :default => {}
202
202
  headers_from_record (hash) :default => {}
203
+ use_event_time (bool) :default => false
203
204
  use_default_for_unknown_topic (bool) :default => false
204
205
  discard_kafka_delivery_failed (bool) :default => false (No discard)
205
206
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
@@ -363,6 +364,7 @@ Support of fluentd v0.12 has ended. `kafka_buffered` will be an alias of `kafka2
363
364
  exclude_topic_key (bool) :default => false
364
365
  exclude_partition_key (bool) :default => false
365
366
  get_kafka_client_log (bool) :default => false
367
+ use_event_time (bool) :default => false
366
368
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
367
369
 
368
370
  # See fluentd document for buffer related parameters: https://docs.fluentd.org/v/0.12/buffer
@@ -452,6 +454,7 @@ You need to install rdkafka gem.
452
454
  exclude_topic_key (bool) :default => false
453
455
  exclude_partition_key (bool) :default => false
454
456
  discard_kafka_delivery_failed (bool) :default => false (No discard)
457
+ use_event_time (bool) :default => false
455
458
 
456
459
  # same with kafka2
457
460
  headers (hash) :default => {}
@@ -486,6 +489,10 @@ You need to install rdkafka gem.
486
489
  rdkafka_delivery_handle_poll_timeout (integer) :default => 30
487
490
  # If the record size is larger than this value, such records are ignored. Default is no limit
488
491
  max_send_limit_bytes (integer) :default => nil
492
+ # The maximum number of enqueueing bytes per second. It can reduce the
493
+ # load of both Fluentd and Kafka when excessive messages are attempted
494
+ # to send. Default is no limit.
495
+ max_enqueue_bytes_per_second (integer) :default => nil
489
496
  </match>
490
497
 
491
498
  If you use v0.12, use `rdkafka` instead.
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.17.1'
16
+ gem.version = '0.17.2'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -65,6 +65,7 @@ DESC
65
65
  The codec the producer uses to compress messages.
66
66
  Supported codecs: (gzip|snappy)
67
67
  DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
68
69
  config_param :max_send_limit_bytes, :size, :default => nil
69
70
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil
70
71
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil
@@ -286,7 +287,7 @@ DESC
286
287
  end
287
288
 
288
289
  producer = get_producer
289
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
290
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
290
291
  handler
291
292
  }.each { |handler|
292
293
  handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
@@ -298,11 +299,11 @@ DESC
298
299
  raise e
299
300
  end
300
301
 
301
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
302
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
302
303
  attempt = 0
303
304
  loop do
304
305
  begin
305
- handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
306
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
306
307
  return handler
307
308
  rescue Exception => e
308
309
  if e.respond_to?(:code) && e.code == :queue_full
@@ -74,6 +74,7 @@ DESC
74
74
  The codec the producer uses to compress messages. Used for compression.codec
75
75
  Supported codecs: (gzip|snappy)
76
76
  DESC
77
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
77
78
  config_param :max_send_limit_bytes, :size, :default => nil
78
79
  config_param :discard_kafka_delivery_failed, :bool, :default => false
79
80
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
@@ -86,6 +87,7 @@ DESC
86
87
 
87
88
  config_param :max_enqueue_retries, :integer, :default => 3
88
89
  config_param :enqueue_retry_backoff, :integer, :default => 3
90
+ config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
89
91
 
90
92
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
91
93
  config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
@@ -101,12 +103,68 @@ DESC
101
103
  include Fluent::KafkaPluginUtil::SSLSettings
102
104
  include Fluent::KafkaPluginUtil::SaslSettings
103
105
 
106
+ class EnqueueRate
107
+ class LimitExceeded < StandardError
108
+ attr_reader :next_retry_clock
109
+ def initialize(next_retry_clock)
110
+ @next_retry_clock = next_retry_clock
111
+ end
112
+ end
113
+
114
+ def initialize(limit_bytes_per_second)
115
+ @mutex = Mutex.new
116
+ @start_clock = Fluent::Clock.now
117
+ @bytes_per_second = 0
118
+ @limit_bytes_per_second = limit_bytes_per_second
119
+ @commits = {}
120
+ end
121
+
122
+ def raise_if_limit_exceeded(bytes_to_enqueue)
123
+ return if @limit_bytes_per_second.nil?
124
+
125
+ @mutex.synchronize do
126
+ @commits[Thread.current] = {
127
+ clock: Fluent::Clock.now,
128
+ bytesize: bytes_to_enqueue,
129
+ }
130
+
131
+ @bytes_per_second += @commits[Thread.current][:bytesize]
132
+ duration = @commits[Thread.current][:clock] - @start_clock
133
+
134
+ if duration < 1.0
135
+ if @bytes_per_second > @limit_bytes_per_second
136
+ raise LimitExceeded.new(@start_clock + 1.0)
137
+ end
138
+ else
139
+ @start_clock = @commits[Thread.current][:clock]
140
+ @bytes_per_second = @commits[Thread.current][:bytesize]
141
+ end
142
+ end
143
+ end
144
+
145
+ def revert
146
+ return if @limit_bytes_per_second.nil?
147
+
148
+ @mutex.synchronize do
149
+ return unless @commits[Thread.current]
150
+ return unless @commits[Thread.current][:clock]
151
+ if @commits[Thread.current][:clock] >= @start_clock
152
+ @bytes_per_second -= @commits[Thread.current][:bytesize]
153
+ end
154
+ @commits[Thread.current] = nil
155
+ end
156
+ end
157
+ end
158
+
104
159
  def initialize
105
160
  super
106
161
 
107
162
  @producers = nil
108
163
  @producers_mutex = nil
109
164
  @shared_producer = nil
165
+ @enqueue_rate = nil
166
+ @writing_threads_mutex = Mutex.new
167
+ @writing_threads = Set.new
110
168
  end
111
169
 
112
170
  def configure(conf)
@@ -170,6 +228,8 @@ DESC
170
228
  @exclude_field_accessors = @exclude_fields.map do |field|
171
229
  record_accessor_create(field)
172
230
  end
231
+
232
+ @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
173
233
  end
174
234
 
175
235
  def build_config
@@ -233,8 +293,19 @@ DESC
233
293
  true
234
294
  end
235
295
 
296
+ def wait_writing_threads
297
+ done = false
298
+ until done do
299
+ @writing_threads_mutex.synchronize do
300
+ done = true if @writing_threads.empty?
301
+ end
302
+ sleep(1) unless done
303
+ end
304
+ end
305
+
236
306
  def shutdown
237
307
  super
308
+ wait_writing_threads
238
309
  shutdown_producers
239
310
  end
240
311
 
@@ -291,6 +362,7 @@ DESC
291
362
  end
292
363
 
293
364
  def write(chunk)
365
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
294
366
  tag = chunk.metadata.tag
295
367
  topic = if @topic
296
368
  extract_placeholders(@topic, chunk)
@@ -334,7 +406,7 @@ DESC
334
406
  next
335
407
  end
336
408
 
337
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
409
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
338
410
  if @rdkafka_delivery_handle_poll_timeout != 0
339
411
  handlers << handler
340
412
  end
@@ -351,14 +423,22 @@ DESC
351
423
  # Raise exception to retry sendind messages
352
424
  raise e
353
425
  end
426
+ ensure
427
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
354
428
  end
355
429
 
356
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
430
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
357
431
  attempt = 0
358
432
  loop do
359
433
  begin
360
- return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers)
434
+ @enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
435
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
436
+ rescue EnqueueRate::LimitExceeded => e
437
+ @enqueue_rate.revert if @enqueue_rate
438
+ duration = e.next_retry_clock - Fluent::Clock.now
439
+ sleep(duration) if duration > 0.0
361
440
  rescue Exception => e
441
+ @enqueue_rate.revert if @enqueue_rate
362
442
  if e.respond_to?(:code) && e.code == :queue_full
363
443
  if attempt <= @max_enqueue_retries
364
444
  log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
@@ -102,6 +102,31 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
102
102
  assert_equal([expected_message], actual_messages)
103
103
  end
104
104
 
105
+ def test_write_with_use_event_time
106
+ input_config = %[
107
+ @type kafka
108
+ brokers localhost:9092
109
+ format json
110
+ @label @kafka
111
+ topics #{TOPIC_NAME}
112
+ time_source kafka
113
+ ]
114
+ target_driver = create_target_driver(input_config)
115
+ expected_message = {"a" => 2}
116
+ now = event_time
117
+ target_driver.run(expect_records: 1, timeout: 5) do
118
+ sleep 2
119
+ d = create_driver(config(default_topic: TOPIC_NAME) + config_element('ROOT', '', {"use_event_time" => true}))
120
+ d.run do
121
+ d.feed("test", now, expected_message)
122
+ end
123
+ end
124
+ actual_time = target_driver.events.collect { |event| event[1] }.last
125
+ assert_in_delta(actual_time, now, 0.001) # expects millseconds precision
126
+ actual_messages = target_driver.events.collect { |event| event[2] }
127
+ assert_equal([expected_message], actual_messages)
128
+ end
129
+
105
130
  def test_exclude_fields
106
131
  conf = config(default_topic: TOPIC_NAME) +
107
132
  config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
@@ -116,5 +141,27 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
116
141
  actual_messages = target_driver.events.collect { |event| event[2] }
117
142
  assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
118
143
  end
144
+
145
+ def test_max_enqueue_bytes_per_second
146
+ conf = config(default_topic: TOPIC_NAME) +
147
+ config_element('ROOT', '', {"max_enqueue_bytes_per_second" => 32 * 3}, [])
148
+ target_driver = create_target_driver
149
+ expected_messages = []
150
+ target_driver.run(expect_records: 9, timeout: 10) do
151
+ sleep 2
152
+ d = create_driver(conf)
153
+ start_time = Fluent::Clock.now
154
+ d.run do
155
+ 9.times do |i|
156
+ message = {"message" => "32bytes message: #{i}"}
157
+ d.feed("test", event_time, message)
158
+ expected_messages << message
159
+ end
160
+ end
161
+ assert_in_delta(2.0, Fluent::Clock.now - start_time, 0.5)
162
+ end
163
+ actual_messages = target_driver.events.collect { |event| event[2] }
164
+ assert_equal(expected_messages, actual_messages)
165
+ end
119
166
  end
120
167
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.17.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-09-24 00:00:00.000000000 Z
12
+ date: 2021-10-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd