fluent-plugin-kafka 0.17.1 → 0.17.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 230266adf4ba3d77b8b2fd743863377c4dd2532297f45e4489df62bfcbee1db5
4
- data.tar.gz: 4b82d2f33bbbb3294f547a7af154783c432c6316f1155a0e3a822361232743e4
3
+ metadata.gz: 16968c0e56c22f64225e8e41e905294defc7240df6054813151d3904a79a4107
4
+ data.tar.gz: 71b13953b11048f201c8a8a275350e983a51377b45e84440196b701063e317c4
5
5
  SHA512:
6
- metadata.gz: 9321cd8bd10dcd603b653c101e6695b7071d7b9dc61fb49bc79248b22cbe9f1db46416c40132243c9862e9c4a3888c818f7a8906906ccf0dda545032f2ac53fd
7
- data.tar.gz: 832517ac39c4f775d95775454b07bc0f3ecc2f27b1c0bd4c3f8025e795dfbe869d3e349084f868bdb54cfa72188d4cbd96173192f728fae4281f7f84e5d973aa
6
+ metadata.gz: aedae36f4b7a29408bc96838da3e158a98ddac136987408dcd4bc347068e3d27839e844d1d3d4a239e4f175e023f19d9ecd82e14d4ab089fd43625f60dcca17c
7
+ data.tar.gz: 4a72ea62a754b689944d3f2de81cc495c9ef92df97c127b80b46ace6824aca00df5b4cc499eb3993d0edbd3f156f140c6c8ba718fb82f65b5328170adf26f622
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.17.2 - 2021/10/14
2
+ * out_rdkafka2: Add `max_enqueue_bytes_per_second` parameter
3
+ * out_rdkafka2: Support `use_event_time` parameter
4
+ * out_rdkafka2: Fix a potential bug that the plugin might exit without receiving responses from Kafka.
5
+
1
6
  Release 0.17.1 - 2021/09/24
2
7
  * out_rdkafka/out_rdkafka2: Support rdkafka 0.9.0 or later
3
8
  * out_rdkafka/out_rdkafka2: Add `exclude_fields` parameter
data/README.md CHANGED
@@ -200,6 +200,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
200
200
  get_kafka_client_log (bool) :default => false
201
201
  headers (hash) :default => {}
202
202
  headers_from_record (hash) :default => {}
203
+ use_event_time (bool) :default => false
203
204
  use_default_for_unknown_topic (bool) :default => false
204
205
  discard_kafka_delivery_failed (bool) :default => false (No discard)
205
206
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
@@ -363,6 +364,7 @@ Support of fluentd v0.12 has ended. `kafka_buffered` will be an alias of `kafka2
363
364
  exclude_topic_key (bool) :default => false
364
365
  exclude_partition_key (bool) :default => false
365
366
  get_kafka_client_log (bool) :default => false
367
+ use_event_time (bool) :default => false
366
368
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
367
369
 
368
370
  # See fluentd document for buffer related parameters: https://docs.fluentd.org/v/0.12/buffer
@@ -452,6 +454,7 @@ You need to install rdkafka gem.
452
454
  exclude_topic_key (bool) :default => false
453
455
  exclude_partition_key (bool) :default => false
454
456
  discard_kafka_delivery_failed (bool) :default => false (No discard)
457
+ use_event_time (bool) :default => false
455
458
 
456
459
  # same with kafka2
457
460
  headers (hash) :default => {}
@@ -486,6 +489,10 @@ You need to install rdkafka gem.
486
489
  rdkafka_delivery_handle_poll_timeout (integer) :default => 30
487
490
  # If the record size is larger than this value, such records are ignored. Default is no limit
488
491
  max_send_limit_bytes (integer) :default => nil
492
+ # The maximum number of enqueueing bytes per second. It can reduce the
493
+ # load of both Fluentd and Kafka when excessive messages are attempted
494
+ # to send. Default is no limit.
495
+ max_enqueue_bytes_per_second (integer) :default => nil
489
496
  </match>
490
497
 
491
498
  If you use v0.12, use `rdkafka` instead.
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.17.1'
16
+ gem.version = '0.17.2'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -65,6 +65,7 @@ DESC
65
65
  The codec the producer uses to compress messages.
66
66
  Supported codecs: (gzip|snappy)
67
67
  DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
68
69
  config_param :max_send_limit_bytes, :size, :default => nil
69
70
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil
70
71
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil
@@ -286,7 +287,7 @@ DESC
286
287
  end
287
288
 
288
289
  producer = get_producer
289
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
290
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
290
291
  handler
291
292
  }.each { |handler|
292
293
  handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
@@ -298,11 +299,11 @@ DESC
298
299
  raise e
299
300
  end
300
301
 
301
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
302
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
302
303
  attempt = 0
303
304
  loop do
304
305
  begin
305
- handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
306
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
306
307
  return handler
307
308
  rescue Exception => e
308
309
  if e.respond_to?(:code) && e.code == :queue_full
@@ -74,6 +74,7 @@ DESC
74
74
  The codec the producer uses to compress messages. Used for compression.codec
75
75
  Supported codecs: (gzip|snappy)
76
76
  DESC
77
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
77
78
  config_param :max_send_limit_bytes, :size, :default => nil
78
79
  config_param :discard_kafka_delivery_failed, :bool, :default => false
79
80
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
@@ -86,6 +87,7 @@ DESC
86
87
 
87
88
  config_param :max_enqueue_retries, :integer, :default => 3
88
89
  config_param :enqueue_retry_backoff, :integer, :default => 3
90
+ config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
89
91
 
90
92
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
91
93
  config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
@@ -101,12 +103,68 @@ DESC
101
103
  include Fluent::KafkaPluginUtil::SSLSettings
102
104
  include Fluent::KafkaPluginUtil::SaslSettings
103
105
 
106
+ class EnqueueRate
107
+ class LimitExceeded < StandardError
108
+ attr_reader :next_retry_clock
109
+ def initialize(next_retry_clock)
110
+ @next_retry_clock = next_retry_clock
111
+ end
112
+ end
113
+
114
+ def initialize(limit_bytes_per_second)
115
+ @mutex = Mutex.new
116
+ @start_clock = Fluent::Clock.now
117
+ @bytes_per_second = 0
118
+ @limit_bytes_per_second = limit_bytes_per_second
119
+ @commits = {}
120
+ end
121
+
122
+ def raise_if_limit_exceeded(bytes_to_enqueue)
123
+ return if @limit_bytes_per_second.nil?
124
+
125
+ @mutex.synchronize do
126
+ @commits[Thread.current] = {
127
+ clock: Fluent::Clock.now,
128
+ bytesize: bytes_to_enqueue,
129
+ }
130
+
131
+ @bytes_per_second += @commits[Thread.current][:bytesize]
132
+ duration = @commits[Thread.current][:clock] - @start_clock
133
+
134
+ if duration < 1.0
135
+ if @bytes_per_second > @limit_bytes_per_second
136
+ raise LimitExceeded.new(@start_clock + 1.0)
137
+ end
138
+ else
139
+ @start_clock = @commits[Thread.current][:clock]
140
+ @bytes_per_second = @commits[Thread.current][:bytesize]
141
+ end
142
+ end
143
+ end
144
+
145
+ def revert
146
+ return if @limit_bytes_per_second.nil?
147
+
148
+ @mutex.synchronize do
149
+ return unless @commits[Thread.current]
150
+ return unless @commits[Thread.current][:clock]
151
+ if @commits[Thread.current][:clock] >= @start_clock
152
+ @bytes_per_second -= @commits[Thread.current][:bytesize]
153
+ end
154
+ @commits[Thread.current] = nil
155
+ end
156
+ end
157
+ end
158
+
104
159
  def initialize
105
160
  super
106
161
 
107
162
  @producers = nil
108
163
  @producers_mutex = nil
109
164
  @shared_producer = nil
165
+ @enqueue_rate = nil
166
+ @writing_threads_mutex = Mutex.new
167
+ @writing_threads = Set.new
110
168
  end
111
169
 
112
170
  def configure(conf)
@@ -170,6 +228,8 @@ DESC
170
228
  @exclude_field_accessors = @exclude_fields.map do |field|
171
229
  record_accessor_create(field)
172
230
  end
231
+
232
+ @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
173
233
  end
174
234
 
175
235
  def build_config
@@ -233,8 +293,19 @@ DESC
233
293
  true
234
294
  end
235
295
 
296
+ def wait_writing_threads
297
+ done = false
298
+ until done do
299
+ @writing_threads_mutex.synchronize do
300
+ done = true if @writing_threads.empty?
301
+ end
302
+ sleep(1) unless done
303
+ end
304
+ end
305
+
236
306
  def shutdown
237
307
  super
308
+ wait_writing_threads
238
309
  shutdown_producers
239
310
  end
240
311
 
@@ -291,6 +362,7 @@ DESC
291
362
  end
292
363
 
293
364
  def write(chunk)
365
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
294
366
  tag = chunk.metadata.tag
295
367
  topic = if @topic
296
368
  extract_placeholders(@topic, chunk)
@@ -334,7 +406,7 @@ DESC
334
406
  next
335
407
  end
336
408
 
337
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
409
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
338
410
  if @rdkafka_delivery_handle_poll_timeout != 0
339
411
  handlers << handler
340
412
  end
@@ -351,14 +423,22 @@ DESC
351
423
  # Raise exception to retry sendind messages
352
424
  raise e
353
425
  end
426
+ ensure
427
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
354
428
  end
355
429
 
356
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
430
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
357
431
  attempt = 0
358
432
  loop do
359
433
  begin
360
- return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers)
434
+ @enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
435
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
436
+ rescue EnqueueRate::LimitExceeded => e
437
+ @enqueue_rate.revert if @enqueue_rate
438
+ duration = e.next_retry_clock - Fluent::Clock.now
439
+ sleep(duration) if duration > 0.0
361
440
  rescue Exception => e
441
+ @enqueue_rate.revert if @enqueue_rate
362
442
  if e.respond_to?(:code) && e.code == :queue_full
363
443
  if attempt <= @max_enqueue_retries
364
444
  log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
@@ -102,6 +102,31 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
102
102
  assert_equal([expected_message], actual_messages)
103
103
  end
104
104
 
105
+ def test_write_with_use_event_time
106
+ input_config = %[
107
+ @type kafka
108
+ brokers localhost:9092
109
+ format json
110
+ @label @kafka
111
+ topics #{TOPIC_NAME}
112
+ time_source kafka
113
+ ]
114
+ target_driver = create_target_driver(input_config)
115
+ expected_message = {"a" => 2}
116
+ now = event_time
117
+ target_driver.run(expect_records: 1, timeout: 5) do
118
+ sleep 2
119
+ d = create_driver(config(default_topic: TOPIC_NAME) + config_element('ROOT', '', {"use_event_time" => true}))
120
+ d.run do
121
+ d.feed("test", now, expected_message)
122
+ end
123
+ end
124
+ actual_time = target_driver.events.collect { |event| event[1] }.last
125
+ assert_in_delta(actual_time, now, 0.001) # expects millseconds precision
126
+ actual_messages = target_driver.events.collect { |event| event[2] }
127
+ assert_equal([expected_message], actual_messages)
128
+ end
129
+
105
130
  def test_exclude_fields
106
131
  conf = config(default_topic: TOPIC_NAME) +
107
132
  config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
@@ -116,5 +141,27 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
116
141
  actual_messages = target_driver.events.collect { |event| event[2] }
117
142
  assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
118
143
  end
144
+
145
+ def test_max_enqueue_bytes_per_second
146
+ conf = config(default_topic: TOPIC_NAME) +
147
+ config_element('ROOT', '', {"max_enqueue_bytes_per_second" => 32 * 3}, [])
148
+ target_driver = create_target_driver
149
+ expected_messages = []
150
+ target_driver.run(expect_records: 9, timeout: 10) do
151
+ sleep 2
152
+ d = create_driver(conf)
153
+ start_time = Fluent::Clock.now
154
+ d.run do
155
+ 9.times do |i|
156
+ message = {"message" => "32bytes message: #{i}"}
157
+ d.feed("test", event_time, message)
158
+ expected_messages << message
159
+ end
160
+ end
161
+ assert_in_delta(2.0, Fluent::Clock.now - start_time, 0.5)
162
+ end
163
+ actual_messages = target_driver.events.collect { |event| event[2] }
164
+ assert_equal(expected_messages, actual_messages)
165
+ end
119
166
  end
120
167
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.17.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-09-24 00:00:00.000000000 Z
12
+ date: 2021-10-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd