fluent-plugin-kafka 0.16.2 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,7 +48,7 @@ Set true to remove partition from data
48
48
  DESC
49
49
  config_param :exclude_message_key, :bool, :default => false,
50
50
  :desc => <<-DESC
51
- Set true to remove partition key from data
51
+ Set true to remove message key from data
52
52
  DESC
53
53
  config_param :exclude_topic_key, :bool, :default => false,
54
54
  :desc => <<-DESC
@@ -65,6 +65,7 @@ DESC
65
65
  The codec the producer uses to compress messages.
66
66
  Supported codecs: (gzip|snappy)
67
67
  DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
68
69
  config_param :max_send_limit_bytes, :size, :default => nil
69
70
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil
70
71
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil
@@ -91,23 +92,29 @@ DESC
91
92
  def configure(conf)
92
93
  super
93
94
  log.instance_eval {
94
- def add(level, &block)
95
- return unless block
95
+ def add(level, message = nil)
96
+ if message.nil?
97
+ if block_given?
98
+ message = yield
99
+ else
100
+ return
101
+ end
102
+ end
96
103
 
97
104
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
98
105
  case level
99
106
  when Logger::FATAL
100
- self.fatal(block.call)
107
+ self.fatal(message)
101
108
  when Logger::ERROR
102
- self.error(block.call)
109
+ self.error(message)
103
110
  when Logger::WARN
104
- self.warn(block.call)
111
+ self.warn(message)
105
112
  when Logger::INFO
106
- self.info(block.call)
113
+ self.info(message)
107
114
  when Logger::DEBUG
108
- self.debug(block.call)
115
+ self.debug(message)
109
116
  else
110
- self.trace(block.call)
117
+ self.trace(message)
111
118
  end
112
119
  end
113
120
  }
@@ -280,7 +287,7 @@ DESC
280
287
  end
281
288
 
282
289
  producer = get_producer
283
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
290
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
284
291
  handler
285
292
  }.each { |handler|
286
293
  handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
@@ -292,11 +299,11 @@ DESC
292
299
  raise e
293
300
  end
294
301
 
295
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
302
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
296
303
  attempt = 0
297
304
  loop do
298
305
  begin
299
- handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
306
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
300
307
  return handler
301
308
  rescue Exception => e
302
309
  if e.respond_to?(:code) && e.code == :queue_full
@@ -56,6 +56,8 @@ DESC
56
56
  :desc => <<-DESC
57
57
  Set true to remove topic key from data
58
58
  DESC
59
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
60
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
59
61
  config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
60
62
  :desc => 'Kafka message headers'
61
63
  config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
@@ -72,6 +74,7 @@ DESC
72
74
  The codec the producer uses to compress messages. Used for compression.codec
73
75
  Supported codecs: (gzip|snappy)
74
76
  DESC
77
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
75
78
  config_param :max_send_limit_bytes, :size, :default => nil
76
79
  config_param :discard_kafka_delivery_failed, :bool, :default => false
77
80
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
@@ -84,6 +87,7 @@ DESC
84
87
 
85
88
  config_param :max_enqueue_retries, :integer, :default => 3
86
89
  config_param :enqueue_retry_backoff, :integer, :default => 3
90
+ config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
87
91
 
88
92
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
89
93
  config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
@@ -99,34 +103,96 @@ DESC
99
103
  include Fluent::KafkaPluginUtil::SSLSettings
100
104
  include Fluent::KafkaPluginUtil::SaslSettings
101
105
 
106
+ class EnqueueRate
107
+ class LimitExceeded < StandardError
108
+ attr_reader :next_retry_clock
109
+ def initialize(next_retry_clock)
110
+ @next_retry_clock = next_retry_clock
111
+ end
112
+ end
113
+
114
+ def initialize(limit_bytes_per_second)
115
+ @mutex = Mutex.new
116
+ @start_clock = Fluent::Clock.now
117
+ @bytes_per_second = 0
118
+ @limit_bytes_per_second = limit_bytes_per_second
119
+ @commits = {}
120
+ end
121
+
122
+ def raise_if_limit_exceeded(bytes_to_enqueue)
123
+ return if @limit_bytes_per_second.nil?
124
+
125
+ @mutex.synchronize do
126
+ @commits[Thread.current] = {
127
+ clock: Fluent::Clock.now,
128
+ bytesize: bytes_to_enqueue,
129
+ }
130
+
131
+ @bytes_per_second += @commits[Thread.current][:bytesize]
132
+ duration = @commits[Thread.current][:clock] - @start_clock
133
+
134
+ if duration < 1.0
135
+ if @bytes_per_second > @limit_bytes_per_second
136
+ raise LimitExceeded.new(@start_clock + 1.0)
137
+ end
138
+ else
139
+ @start_clock = @commits[Thread.current][:clock]
140
+ @bytes_per_second = @commits[Thread.current][:bytesize]
141
+ end
142
+ end
143
+ end
144
+
145
+ def revert
146
+ return if @limit_bytes_per_second.nil?
147
+
148
+ @mutex.synchronize do
149
+ return unless @commits[Thread.current]
150
+ return unless @commits[Thread.current][:clock]
151
+ if @commits[Thread.current][:clock] >= @start_clock
152
+ @bytes_per_second -= @commits[Thread.current][:bytesize]
153
+ end
154
+ @commits[Thread.current] = nil
155
+ end
156
+ end
157
+ end
158
+
102
159
  def initialize
103
160
  super
104
161
 
105
162
  @producers = nil
106
163
  @producers_mutex = nil
107
164
  @shared_producer = nil
165
+ @enqueue_rate = nil
166
+ @writing_threads_mutex = Mutex.new
167
+ @writing_threads = Set.new
108
168
  end
109
169
 
110
170
  def configure(conf)
111
171
  super
112
172
  log.instance_eval {
113
- def add(level, &block)
114
- return unless block
173
+ def add(level, message = nil)
174
+ if message.nil?
175
+ if block_given?
176
+ message = yield
177
+ else
178
+ return
179
+ end
180
+ end
115
181
 
116
182
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
117
183
  case level
118
184
  when Logger::FATAL
119
- self.fatal(block.call)
185
+ self.fatal(message)
120
186
  when Logger::ERROR
121
- self.error(block.call)
187
+ self.error(message)
122
188
  when Logger::WARN
123
- self.warn(block.call)
189
+ self.warn(message)
124
190
  when Logger::INFO
125
- self.info(block.call)
191
+ self.info(message)
126
192
  when Logger::DEBUG
127
- self.debug(block.call)
193
+ self.debug(message)
128
194
  else
129
- self.trace(block.call)
195
+ self.trace(message)
130
196
  end
131
197
  end
132
198
  }
@@ -158,6 +224,12 @@ DESC
158
224
  @headers_from_record.each do |key, value|
159
225
  @headers_from_record_accessors[key] = record_accessor_create(value)
160
226
  end
227
+
228
+ @exclude_field_accessors = @exclude_fields.map do |field|
229
+ record_accessor_create(field)
230
+ end
231
+
232
+ @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
161
233
  end
162
234
 
163
235
  def build_config
@@ -221,8 +293,19 @@ DESC
221
293
  true
222
294
  end
223
295
 
296
+ def wait_writing_threads
297
+ done = false
298
+ until done do
299
+ @writing_threads_mutex.synchronize do
300
+ done = true if @writing_threads.empty?
301
+ end
302
+ sleep(1) unless done
303
+ end
304
+ end
305
+
224
306
  def shutdown
225
307
  super
308
+ wait_writing_threads
226
309
  shutdown_producers
227
310
  end
228
311
 
@@ -279,6 +362,7 @@ DESC
279
362
  end
280
363
 
281
364
  def write(chunk)
365
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
282
366
  tag = chunk.metadata.tag
283
367
  topic = if @topic
284
368
  extract_placeholders(@topic, chunk)
@@ -305,6 +389,12 @@ DESC
305
389
  headers[key] = header_accessor.call(record)
306
390
  end
307
391
 
392
+ unless @exclude_fields.empty?
393
+ @exclude_field_accessors.each do |exclude_field_acessor|
394
+ exclude_field_acessor.delete(record)
395
+ end
396
+ end
397
+
308
398
  record_buf = @formatter_proc.call(tag, time, record)
309
399
  record_buf_bytes = record_buf.bytesize
310
400
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
@@ -316,7 +406,7 @@ DESC
316
406
  next
317
407
  end
318
408
 
319
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
409
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
320
410
  if @rdkafka_delivery_handle_poll_timeout != 0
321
411
  handlers << handler
322
412
  end
@@ -333,14 +423,22 @@ DESC
333
423
  # Raise exception to retry sendind messages
334
424
  raise e
335
425
  end
426
+ ensure
427
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
336
428
  end
337
429
 
338
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
430
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
339
431
  attempt = 0
340
432
  loop do
341
433
  begin
342
- return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers)
434
+ @enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
435
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
436
+ rescue EnqueueRate::LimitExceeded => e
437
+ @enqueue_rate.revert if @enqueue_rate
438
+ duration = e.next_retry_clock - Fluent::Clock.now
439
+ sleep(duration) if duration > 0.0
343
440
  rescue Exception => e
441
+ @enqueue_rate.revert if @enqueue_rate
344
442
  if e.respond_to?(:code) && e.code == :queue_full
345
443
  if attempt <= @max_enqueue_retries
346
444
  log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
data/test/helper.rb CHANGED
@@ -8,6 +8,7 @@ rescue Bundler::BundlerError => e
8
8
  exit e.status_code
9
9
  end
10
10
  require 'test/unit'
11
+ require 'test/unit/rr'
11
12
 
12
13
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
14
  $LOAD_PATH.unshift(File.dirname(__FILE__))
@@ -22,6 +23,12 @@ unless ENV.has_key?('VERBOSE')
22
23
  end
23
24
 
24
25
  require 'fluent/plugin/out_kafka'
26
+ require 'fluent/plugin/out_kafka_buffered'
27
+ require 'fluent/plugin/out_kafka2'
28
+ require 'fluent/plugin/in_kafka'
29
+ require 'fluent/plugin/in_kafka_group'
30
+
31
+ require "fluent/test/driver/output"
25
32
 
26
33
  class Test::Unit::TestCase
27
34
  end
@@ -0,0 +1,66 @@
1
+ require 'helper'
2
+ require 'fluent/test/driver/input'
3
+ require 'securerandom'
4
+
5
+ class KafkaInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ TOPIC_NAME = "kafka-input-#{SecureRandom.uuid}"
11
+
12
+ CONFIG = %[
13
+ @type kafka
14
+ brokers localhost:9092
15
+ format text
16
+ @label @kafka
17
+ topics #{TOPIC_NAME}
18
+ ]
19
+
20
+ def create_driver(conf = CONFIG)
21
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
22
+ end
23
+
24
+
25
+ def test_configure
26
+ d = create_driver
27
+ assert_equal TOPIC_NAME, d.instance.topics
28
+ assert_equal 'text', d.instance.format
29
+ assert_equal 'localhost:9092', d.instance.brokers
30
+ end
31
+
32
+ def test_multi_worker_support
33
+ d = create_driver
34
+ assert_false d.instance.multi_workers_ready?
35
+ end
36
+
37
+ class ConsumeTest < self
38
+ def setup
39
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
40
+ @producer = @kafka.producer
41
+ end
42
+
43
+ def teardown
44
+ @kafka.delete_topic(TOPIC_NAME)
45
+ @kafka.close
46
+ end
47
+
48
+ def test_consume
49
+ conf = %[
50
+ @type kafka
51
+ brokers localhost:9092
52
+ format text
53
+ @label @kafka
54
+ topics #{TOPIC_NAME}
55
+ ]
56
+ d = create_driver
57
+
58
+ d.run(expect_records: 1, timeout: 10) do
59
+ @producer.produce("Hello, fluent-plugin-kafka!", topic: TOPIC_NAME)
60
+ @producer.deliver_messages
61
+ end
62
+ expected = {'message' => 'Hello, fluent-plugin-kafka!'}
63
+ assert_equal expected, d.events[0][2]
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,67 @@
1
+ require 'helper'
2
+ require 'fluent/test/driver/input'
3
+ require 'securerandom'
4
+
5
+ class KafkaGroupInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ TOPIC_NAME = "kafka-input-#{SecureRandom.uuid}"
11
+
12
+ CONFIG = %[
13
+ @type kafka
14
+ brokers localhost:9092
15
+ consumer_group fluentd
16
+ format text
17
+ @label @kafka
18
+ topics #{TOPIC_NAME}
19
+ ]
20
+
21
+ def create_driver(conf = CONFIG)
22
+ Fluent::Test::Driver::Input.new(Fluent::KafkaGroupInput).configure(conf)
23
+ end
24
+
25
+
26
+ def test_configure
27
+ d = create_driver
28
+ assert_equal [TOPIC_NAME], d.instance.topics
29
+ assert_equal 'text', d.instance.format
30
+ assert_equal 'localhost:9092', d.instance.brokers
31
+ end
32
+
33
+ def test_multi_worker_support
34
+ d = create_driver
35
+ assert_true d.instance.multi_workers_ready?
36
+ end
37
+
38
+ class ConsumeTest < self
39
+ def setup
40
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
41
+ @producer = @kafka.producer
42
+ end
43
+
44
+ def teardown
45
+ @kafka.delete_topic(TOPIC_NAME)
46
+ @kafka.close
47
+ end
48
+
49
+ def test_consume
50
+ conf = %[
51
+ @type kafka
52
+ brokers localhost:9092
53
+ format text
54
+ @label @kafka
55
+ topics #{TOPIC_NAME}
56
+ ]
57
+ d = create_driver
58
+
59
+ d.run(expect_records: 1, timeout: 10) do
60
+ @producer.produce("Hello, fluent-plugin-kafka!", topic: TOPIC_NAME)
61
+ @producer.deliver_messages
62
+ end
63
+ expected = {'message' => 'Hello, fluent-plugin-kafka!'}
64
+ assert_equal expected, d.events[0][2]
65
+ end
66
+ end
67
+ end
@@ -1,14 +1,8 @@
1
1
  require 'helper'
2
2
  require 'fluent/plugin/kafka_plugin_util'
3
3
 
4
- class File
5
- def File::read(path)
6
- path
7
- end
8
- end
9
-
10
4
  class KafkaPluginUtilTest < Test::Unit::TestCase
11
-
5
+
12
6
  def self.config_param(name, type, options)
13
7
  end
14
8
  include Fluent::KafkaPluginUtil::SSLSettings
@@ -20,19 +14,31 @@ class KafkaPluginUtilTest < Test::Unit::TestCase
20
14
  end
21
15
 
22
16
  def test_read_ssl_file_when_nil
23
- assert_equal(nil, read_ssl_file(nil))
17
+ stub(File).read(anything) do |path|
18
+ path
19
+ end
20
+ assert_equal(nil, read_ssl_file(nil))
24
21
  end
25
22
 
26
23
  def test_read_ssl_file_when_empty_string
27
- assert_equal(nil, read_ssl_file(""))
24
+ stub(File).read(anything) do |path|
25
+ path
26
+ end
27
+ assert_equal(nil, read_ssl_file(""))
28
28
  end
29
29
 
30
30
  def test_read_ssl_file_when_non_empty_path
31
- assert_equal("path", read_ssl_file("path"))
31
+ stub(File).read(anything) do |path|
32
+ path
33
+ end
34
+ assert_equal("path", read_ssl_file("path"))
32
35
  end
33
36
 
34
37
  def test_read_ssl_file_when_non_empty_array
35
- assert_equal(["a","b"], read_ssl_file(["a","b"]))
38
+ stub(File).read(anything) do |path|
39
+ path
40
+ end
41
+ assert_equal(["a","b"], read_ssl_file(["a","b"]))
36
42
  end
37
43
 
38
- end
44
+ end
@@ -43,6 +43,16 @@ class KafkaOutputTest < Test::Unit::TestCase
43
43
  d = create_driver
44
44
  end
45
45
 
46
+ data("crc32" => "crc32",
47
+ "murmur2" => "murmur2")
48
+ def test_partitioner_hash_function(data)
49
+ hash_type = data
50
+ d = create_driver(CONFIG + %[partitioner_hash_function #{hash_type}])
51
+ assert_nothing_raised do
52
+ d.instance.refresh_client
53
+ end
54
+ end
55
+
46
56
  def test_mutli_worker_support
47
57
  d = create_driver
48
58
  assert_equal true, d.instance.multi_workers_ready?
@@ -0,0 +1,116 @@
1
+ require 'helper'
2
+ require 'fluent/test/helpers'
3
+ require 'fluent/test/driver/input'
4
+ require 'fluent/test/driver/output'
5
+ require 'securerandom'
6
+
7
+ class Kafka2OutputTest < Test::Unit::TestCase
8
+ include Fluent::Test::Helpers
9
+
10
+ def setup
11
+ Fluent::Test.setup
12
+ end
13
+
14
+ def base_config
15
+ config_element('ROOT', '', {"@type" => "kafka2"}, [
16
+ config_element('format', "", {"@type" => "json"})
17
+ ])
18
+ end
19
+
20
+ def config(default_topic: "kitagawakeiko")
21
+ base_config + config_element('ROOT', '', {"default_topic" => default_topic,
22
+ "brokers" => "localhost:9092"}, [
23
+ ])
24
+ end
25
+
26
+ def create_driver(conf = config, tag='test')
27
+ Fluent::Test::Driver::Output.new(Fluent::Kafka2Output).configure(conf)
28
+ end
29
+
30
+ def test_configure
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(base_config)
33
+ }
34
+
35
+ assert_nothing_raised(Fluent::ConfigError) {
36
+ create_driver(config)
37
+ }
38
+
39
+ assert_nothing_raised(Fluent::ConfigError) {
40
+ create_driver(config + config_element('buffer', "", {"@type" => "memory"}))
41
+ }
42
+
43
+ d = create_driver
44
+ assert_equal 'kitagawakeiko', d.instance.default_topic
45
+ assert_equal ['localhost:9092'], d.instance.brokers
46
+ end
47
+
48
+ data("crc32" => "crc32",
49
+ "murmur2" => "murmur2")
50
+ def test_partitioner_hash_function(data)
51
+ hash_type = data
52
+ d = create_driver(config + config_element('ROOT', '', {"partitioner_hash_function" => hash_type}))
53
+ assert_nothing_raised do
54
+ d.instance.refresh_client
55
+ end
56
+ end
57
+
58
+ def test_mutli_worker_support
59
+ d = create_driver
60
+ assert_equal true, d.instance.multi_workers_ready?
61
+ end
62
+
63
+ class WriteTest < self
64
+ TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
65
+
66
+ INPUT_CONFIG = %[
67
+ @type kafka
68
+ brokers localhost:9092
69
+ format json
70
+ @label @kafka
71
+ topics #{TOPIC_NAME}
72
+ ]
73
+
74
+ def create_target_driver(conf = INPUT_CONFIG)
75
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
76
+ end
77
+
78
+ def setup
79
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
80
+ end
81
+
82
+ def teardown
83
+ @kafka.delete_topic(TOPIC_NAME)
84
+ @kafka.close
85
+ end
86
+
87
+ def test_write
88
+ target_driver = create_target_driver
89
+ expected_message = {"a" => 2}
90
+ target_driver.run(expect_records: 1, timeout: 5) do
91
+ sleep 2
92
+ d = create_driver(config(default_topic: TOPIC_NAME))
93
+ d.run do
94
+ d.feed("test", event_time, expected_message)
95
+ end
96
+ end
97
+ actual_messages = target_driver.events.collect { |event| event[2] }
98
+ assert_equal([expected_message], actual_messages)
99
+ end
100
+
101
+ def test_exclude_fields
102
+ conf = config(default_topic: TOPIC_NAME) +
103
+ config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
104
+ target_driver = create_target_driver
105
+ target_driver.run(expect_records: 1, timeout: 5) do
106
+ sleep 2
107
+ d = create_driver(conf)
108
+ d.run do
109
+ d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
110
+ end
111
+ end
112
+ actual_messages = target_driver.events.collect { |event| event[2] }
113
+ assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
114
+ end
115
+ end
116
+ end