fluent-plugin-kafka 0.16.2 → 0.17.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -48,7 +48,7 @@ Set true to remove partition from data
48
48
  DESC
49
49
  config_param :exclude_message_key, :bool, :default => false,
50
50
  :desc => <<-DESC
51
- Set true to remove partition key from data
51
+ Set true to remove message key from data
52
52
  DESC
53
53
  config_param :exclude_topic_key, :bool, :default => false,
54
54
  :desc => <<-DESC
@@ -65,6 +65,7 @@ DESC
65
65
  The codec the producer uses to compress messages.
66
66
  Supported codecs: (gzip|snappy)
67
67
  DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
68
69
  config_param :max_send_limit_bytes, :size, :default => nil
69
70
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil
70
71
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil
@@ -91,23 +92,29 @@ DESC
91
92
  def configure(conf)
92
93
  super
93
94
  log.instance_eval {
94
- def add(level, &block)
95
- return unless block
95
+ def add(level, message = nil)
96
+ if message.nil?
97
+ if block_given?
98
+ message = yield
99
+ else
100
+ return
101
+ end
102
+ end
96
103
 
97
104
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
98
105
  case level
99
106
  when Logger::FATAL
100
- self.fatal(block.call)
107
+ self.fatal(message)
101
108
  when Logger::ERROR
102
- self.error(block.call)
109
+ self.error(message)
103
110
  when Logger::WARN
104
- self.warn(block.call)
111
+ self.warn(message)
105
112
  when Logger::INFO
106
- self.info(block.call)
113
+ self.info(message)
107
114
  when Logger::DEBUG
108
- self.debug(block.call)
115
+ self.debug(message)
109
116
  else
110
- self.trace(block.call)
117
+ self.trace(message)
111
118
  end
112
119
  end
113
120
  }
@@ -280,7 +287,7 @@ DESC
280
287
  end
281
288
 
282
289
  producer = get_producer
283
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
290
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
284
291
  handler
285
292
  }.each { |handler|
286
293
  handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
@@ -292,11 +299,11 @@ DESC
292
299
  raise e
293
300
  end
294
301
 
295
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
302
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
296
303
  attempt = 0
297
304
  loop do
298
305
  begin
299
- handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
306
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
300
307
  return handler
301
308
  rescue Exception => e
302
309
  if e.respond_to?(:code) && e.code == :queue_full
@@ -56,6 +56,8 @@ DESC
56
56
  :desc => <<-DESC
57
57
  Set true to remove topic key from data
58
58
  DESC
59
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
60
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
59
61
  config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
60
62
  :desc => 'Kafka message headers'
61
63
  config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
@@ -72,6 +74,7 @@ DESC
72
74
  The codec the producer uses to compress messages. Used for compression.codec
73
75
  Supported codecs: (gzip|snappy)
74
76
  DESC
77
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
75
78
  config_param :max_send_limit_bytes, :size, :default => nil
76
79
  config_param :discard_kafka_delivery_failed, :bool, :default => false
77
80
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
@@ -84,6 +87,7 @@ DESC
84
87
 
85
88
  config_param :max_enqueue_retries, :integer, :default => 3
86
89
  config_param :enqueue_retry_backoff, :integer, :default => 3
90
+ config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
87
91
 
88
92
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
89
93
  config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
@@ -99,34 +103,96 @@ DESC
99
103
  include Fluent::KafkaPluginUtil::SSLSettings
100
104
  include Fluent::KafkaPluginUtil::SaslSettings
101
105
 
106
+ class EnqueueRate
107
+ class LimitExceeded < StandardError
108
+ attr_reader :next_retry_clock
109
+ def initialize(next_retry_clock)
110
+ @next_retry_clock = next_retry_clock
111
+ end
112
+ end
113
+
114
+ def initialize(limit_bytes_per_second)
115
+ @mutex = Mutex.new
116
+ @start_clock = Fluent::Clock.now
117
+ @bytes_per_second = 0
118
+ @limit_bytes_per_second = limit_bytes_per_second
119
+ @commits = {}
120
+ end
121
+
122
+ def raise_if_limit_exceeded(bytes_to_enqueue)
123
+ return if @limit_bytes_per_second.nil?
124
+
125
+ @mutex.synchronize do
126
+ @commits[Thread.current] = {
127
+ clock: Fluent::Clock.now,
128
+ bytesize: bytes_to_enqueue,
129
+ }
130
+
131
+ @bytes_per_second += @commits[Thread.current][:bytesize]
132
+ duration = @commits[Thread.current][:clock] - @start_clock
133
+
134
+ if duration < 1.0
135
+ if @bytes_per_second > @limit_bytes_per_second
136
+ raise LimitExceeded.new(@start_clock + 1.0)
137
+ end
138
+ else
139
+ @start_clock = @commits[Thread.current][:clock]
140
+ @bytes_per_second = @commits[Thread.current][:bytesize]
141
+ end
142
+ end
143
+ end
144
+
145
+ def revert
146
+ return if @limit_bytes_per_second.nil?
147
+
148
+ @mutex.synchronize do
149
+ return unless @commits[Thread.current]
150
+ return unless @commits[Thread.current][:clock]
151
+ if @commits[Thread.current][:clock] >= @start_clock
152
+ @bytes_per_second -= @commits[Thread.current][:bytesize]
153
+ end
154
+ @commits[Thread.current] = nil
155
+ end
156
+ end
157
+ end
158
+
102
159
  def initialize
103
160
  super
104
161
 
105
162
  @producers = nil
106
163
  @producers_mutex = nil
107
164
  @shared_producer = nil
165
+ @enqueue_rate = nil
166
+ @writing_threads_mutex = Mutex.new
167
+ @writing_threads = Set.new
108
168
  end
109
169
 
110
170
  def configure(conf)
111
171
  super
112
172
  log.instance_eval {
113
- def add(level, &block)
114
- return unless block
173
+ def add(level, message = nil)
174
+ if message.nil?
175
+ if block_given?
176
+ message = yield
177
+ else
178
+ return
179
+ end
180
+ end
115
181
 
116
182
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
117
183
  case level
118
184
  when Logger::FATAL
119
- self.fatal(block.call)
185
+ self.fatal(message)
120
186
  when Logger::ERROR
121
- self.error(block.call)
187
+ self.error(message)
122
188
  when Logger::WARN
123
- self.warn(block.call)
189
+ self.warn(message)
124
190
  when Logger::INFO
125
- self.info(block.call)
191
+ self.info(message)
126
192
  when Logger::DEBUG
127
- self.debug(block.call)
193
+ self.debug(message)
128
194
  else
129
- self.trace(block.call)
195
+ self.trace(message)
130
196
  end
131
197
  end
132
198
  }
@@ -158,6 +224,12 @@ DESC
158
224
  @headers_from_record.each do |key, value|
159
225
  @headers_from_record_accessors[key] = record_accessor_create(value)
160
226
  end
227
+
228
+ @exclude_field_accessors = @exclude_fields.map do |field|
229
+ record_accessor_create(field)
230
+ end
231
+
232
+ @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
161
233
  end
162
234
 
163
235
  def build_config
@@ -221,8 +293,19 @@ DESC
221
293
  true
222
294
  end
223
295
 
296
+ def wait_writing_threads
297
+ done = false
298
+ until done do
299
+ @writing_threads_mutex.synchronize do
300
+ done = true if @writing_threads.empty?
301
+ end
302
+ sleep(1) unless done
303
+ end
304
+ end
305
+
224
306
  def shutdown
225
307
  super
308
+ wait_writing_threads
226
309
  shutdown_producers
227
310
  end
228
311
 
@@ -279,6 +362,7 @@ DESC
279
362
  end
280
363
 
281
364
  def write(chunk)
365
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
282
366
  tag = chunk.metadata.tag
283
367
  topic = if @topic
284
368
  extract_placeholders(@topic, chunk)
@@ -305,6 +389,12 @@ DESC
305
389
  headers[key] = header_accessor.call(record)
306
390
  end
307
391
 
392
+ unless @exclude_fields.empty?
393
+ @exclude_field_accessors.each do |exclude_field_acessor|
394
+ exclude_field_acessor.delete(record)
395
+ end
396
+ end
397
+
308
398
  record_buf = @formatter_proc.call(tag, time, record)
309
399
  record_buf_bytes = record_buf.bytesize
310
400
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
@@ -316,7 +406,7 @@ DESC
316
406
  next
317
407
  end
318
408
 
319
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
409
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
320
410
  if @rdkafka_delivery_handle_poll_timeout != 0
321
411
  handlers << handler
322
412
  end
@@ -333,14 +423,22 @@ DESC
333
423
  # Raise exception to retry sendind messages
334
424
  raise e
335
425
  end
426
+ ensure
427
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
336
428
  end
337
429
 
338
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
430
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
339
431
  attempt = 0
340
432
  loop do
341
433
  begin
342
- return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers)
434
+ @enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
435
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
436
+ rescue EnqueueRate::LimitExceeded => e
437
+ @enqueue_rate.revert if @enqueue_rate
438
+ duration = e.next_retry_clock - Fluent::Clock.now
439
+ sleep(duration) if duration > 0.0
343
440
  rescue Exception => e
441
+ @enqueue_rate.revert if @enqueue_rate
344
442
  if e.respond_to?(:code) && e.code == :queue_full
345
443
  if attempt <= @max_enqueue_retries
346
444
  log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
data/test/helper.rb CHANGED
@@ -8,6 +8,7 @@ rescue Bundler::BundlerError => e
8
8
  exit e.status_code
9
9
  end
10
10
  require 'test/unit'
11
+ require 'test/unit/rr'
11
12
 
12
13
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
14
  $LOAD_PATH.unshift(File.dirname(__FILE__))
@@ -22,6 +23,12 @@ unless ENV.has_key?('VERBOSE')
22
23
  end
23
24
 
24
25
  require 'fluent/plugin/out_kafka'
26
+ require 'fluent/plugin/out_kafka_buffered'
27
+ require 'fluent/plugin/out_kafka2'
28
+ require 'fluent/plugin/in_kafka'
29
+ require 'fluent/plugin/in_kafka_group'
30
+
31
+ require "fluent/test/driver/output"
25
32
 
26
33
  class Test::Unit::TestCase
27
34
  end
@@ -0,0 +1,66 @@
1
+ require 'helper'
2
+ require 'fluent/test/driver/input'
3
+ require 'securerandom'
4
+
5
+ class KafkaInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ TOPIC_NAME = "kafka-input-#{SecureRandom.uuid}"
11
+
12
+ CONFIG = %[
13
+ @type kafka
14
+ brokers localhost:9092
15
+ format text
16
+ @label @kafka
17
+ topics #{TOPIC_NAME}
18
+ ]
19
+
20
+ def create_driver(conf = CONFIG)
21
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
22
+ end
23
+
24
+
25
+ def test_configure
26
+ d = create_driver
27
+ assert_equal TOPIC_NAME, d.instance.topics
28
+ assert_equal 'text', d.instance.format
29
+ assert_equal 'localhost:9092', d.instance.brokers
30
+ end
31
+
32
+ def test_multi_worker_support
33
+ d = create_driver
34
+ assert_false d.instance.multi_workers_ready?
35
+ end
36
+
37
+ class ConsumeTest < self
38
+ def setup
39
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
40
+ @producer = @kafka.producer
41
+ end
42
+
43
+ def teardown
44
+ @kafka.delete_topic(TOPIC_NAME)
45
+ @kafka.close
46
+ end
47
+
48
+ def test_consume
49
+ conf = %[
50
+ @type kafka
51
+ brokers localhost:9092
52
+ format text
53
+ @label @kafka
54
+ topics #{TOPIC_NAME}
55
+ ]
56
+ d = create_driver
57
+
58
+ d.run(expect_records: 1, timeout: 10) do
59
+ @producer.produce("Hello, fluent-plugin-kafka!", topic: TOPIC_NAME)
60
+ @producer.deliver_messages
61
+ end
62
+ expected = {'message' => 'Hello, fluent-plugin-kafka!'}
63
+ assert_equal expected, d.events[0][2]
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,67 @@
1
+ require 'helper'
2
+ require 'fluent/test/driver/input'
3
+ require 'securerandom'
4
+
5
+ class KafkaGroupInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ TOPIC_NAME = "kafka-input-#{SecureRandom.uuid}"
11
+
12
+ CONFIG = %[
13
+ @type kafka
14
+ brokers localhost:9092
15
+ consumer_group fluentd
16
+ format text
17
+ @label @kafka
18
+ topics #{TOPIC_NAME}
19
+ ]
20
+
21
+ def create_driver(conf = CONFIG)
22
+ Fluent::Test::Driver::Input.new(Fluent::KafkaGroupInput).configure(conf)
23
+ end
24
+
25
+
26
+ def test_configure
27
+ d = create_driver
28
+ assert_equal [TOPIC_NAME], d.instance.topics
29
+ assert_equal 'text', d.instance.format
30
+ assert_equal 'localhost:9092', d.instance.brokers
31
+ end
32
+
33
+ def test_multi_worker_support
34
+ d = create_driver
35
+ assert_true d.instance.multi_workers_ready?
36
+ end
37
+
38
+ class ConsumeTest < self
39
+ def setup
40
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
41
+ @producer = @kafka.producer
42
+ end
43
+
44
+ def teardown
45
+ @kafka.delete_topic(TOPIC_NAME)
46
+ @kafka.close
47
+ end
48
+
49
+ def test_consume
50
+ conf = %[
51
+ @type kafka
52
+ brokers localhost:9092
53
+ format text
54
+ @label @kafka
55
+ topics #{TOPIC_NAME}
56
+ ]
57
+ d = create_driver
58
+
59
+ d.run(expect_records: 1, timeout: 10) do
60
+ @producer.produce("Hello, fluent-plugin-kafka!", topic: TOPIC_NAME)
61
+ @producer.deliver_messages
62
+ end
63
+ expected = {'message' => 'Hello, fluent-plugin-kafka!'}
64
+ assert_equal expected, d.events[0][2]
65
+ end
66
+ end
67
+ end
@@ -1,14 +1,8 @@
1
1
  require 'helper'
2
2
  require 'fluent/plugin/kafka_plugin_util'
3
3
 
4
- class File
5
- def File::read(path)
6
- path
7
- end
8
- end
9
-
10
4
  class KafkaPluginUtilTest < Test::Unit::TestCase
11
-
5
+
12
6
  def self.config_param(name, type, options)
13
7
  end
14
8
  include Fluent::KafkaPluginUtil::SSLSettings
@@ -20,19 +14,31 @@ class KafkaPluginUtilTest < Test::Unit::TestCase
20
14
  end
21
15
 
22
16
  def test_read_ssl_file_when_nil
23
- assert_equal(nil, read_ssl_file(nil))
17
+ stub(File).read(anything) do |path|
18
+ path
19
+ end
20
+ assert_equal(nil, read_ssl_file(nil))
24
21
  end
25
22
 
26
23
  def test_read_ssl_file_when_empty_string
27
- assert_equal(nil, read_ssl_file(""))
24
+ stub(File).read(anything) do |path|
25
+ path
26
+ end
27
+ assert_equal(nil, read_ssl_file(""))
28
28
  end
29
29
 
30
30
  def test_read_ssl_file_when_non_empty_path
31
- assert_equal("path", read_ssl_file("path"))
31
+ stub(File).read(anything) do |path|
32
+ path
33
+ end
34
+ assert_equal("path", read_ssl_file("path"))
32
35
  end
33
36
 
34
37
  def test_read_ssl_file_when_non_empty_array
35
- assert_equal(["a","b"], read_ssl_file(["a","b"]))
38
+ stub(File).read(anything) do |path|
39
+ path
40
+ end
41
+ assert_equal(["a","b"], read_ssl_file(["a","b"]))
36
42
  end
37
43
 
38
- end
44
+ end
@@ -43,6 +43,16 @@ class KafkaOutputTest < Test::Unit::TestCase
43
43
  d = create_driver
44
44
  end
45
45
 
46
+ data("crc32" => "crc32",
47
+ "murmur2" => "murmur2")
48
+ def test_partitioner_hash_function(data)
49
+ hash_type = data
50
+ d = create_driver(CONFIG + %[partitioner_hash_function #{hash_type}])
51
+ assert_nothing_raised do
52
+ d.instance.refresh_client
53
+ end
54
+ end
55
+
46
56
  def test_mutli_worker_support
47
57
  d = create_driver
48
58
  assert_equal true, d.instance.multi_workers_ready?
@@ -0,0 +1,116 @@
1
+ require 'helper'
2
+ require 'fluent/test/helpers'
3
+ require 'fluent/test/driver/input'
4
+ require 'fluent/test/driver/output'
5
+ require 'securerandom'
6
+
7
+ class Kafka2OutputTest < Test::Unit::TestCase
8
+ include Fluent::Test::Helpers
9
+
10
+ def setup
11
+ Fluent::Test.setup
12
+ end
13
+
14
+ def base_config
15
+ config_element('ROOT', '', {"@type" => "kafka2"}, [
16
+ config_element('format', "", {"@type" => "json"})
17
+ ])
18
+ end
19
+
20
+ def config(default_topic: "kitagawakeiko")
21
+ base_config + config_element('ROOT', '', {"default_topic" => default_topic,
22
+ "brokers" => "localhost:9092"}, [
23
+ ])
24
+ end
25
+
26
+ def create_driver(conf = config, tag='test')
27
+ Fluent::Test::Driver::Output.new(Fluent::Kafka2Output).configure(conf)
28
+ end
29
+
30
+ def test_configure
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(base_config)
33
+ }
34
+
35
+ assert_nothing_raised(Fluent::ConfigError) {
36
+ create_driver(config)
37
+ }
38
+
39
+ assert_nothing_raised(Fluent::ConfigError) {
40
+ create_driver(config + config_element('buffer', "", {"@type" => "memory"}))
41
+ }
42
+
43
+ d = create_driver
44
+ assert_equal 'kitagawakeiko', d.instance.default_topic
45
+ assert_equal ['localhost:9092'], d.instance.brokers
46
+ end
47
+
48
+ data("crc32" => "crc32",
49
+ "murmur2" => "murmur2")
50
+ def test_partitioner_hash_function(data)
51
+ hash_type = data
52
+ d = create_driver(config + config_element('ROOT', '', {"partitioner_hash_function" => hash_type}))
53
+ assert_nothing_raised do
54
+ d.instance.refresh_client
55
+ end
56
+ end
57
+
58
+ def test_mutli_worker_support
59
+ d = create_driver
60
+ assert_equal true, d.instance.multi_workers_ready?
61
+ end
62
+
63
+ class WriteTest < self
64
+ TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
65
+
66
+ INPUT_CONFIG = %[
67
+ @type kafka
68
+ brokers localhost:9092
69
+ format json
70
+ @label @kafka
71
+ topics #{TOPIC_NAME}
72
+ ]
73
+
74
+ def create_target_driver(conf = INPUT_CONFIG)
75
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
76
+ end
77
+
78
+ def setup
79
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
80
+ end
81
+
82
+ def teardown
83
+ @kafka.delete_topic(TOPIC_NAME)
84
+ @kafka.close
85
+ end
86
+
87
+ def test_write
88
+ target_driver = create_target_driver
89
+ expected_message = {"a" => 2}
90
+ target_driver.run(expect_records: 1, timeout: 5) do
91
+ sleep 2
92
+ d = create_driver(config(default_topic: TOPIC_NAME))
93
+ d.run do
94
+ d.feed("test", event_time, expected_message)
95
+ end
96
+ end
97
+ actual_messages = target_driver.events.collect { |event| event[2] }
98
+ assert_equal([expected_message], actual_messages)
99
+ end
100
+
101
+ def test_exclude_fields
102
+ conf = config(default_topic: TOPIC_NAME) +
103
+ config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
104
+ target_driver = create_target_driver
105
+ target_driver.run(expect_records: 1, timeout: 5) do
106
+ sleep 2
107
+ d = create_driver(conf)
108
+ d.run do
109
+ d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
110
+ end
111
+ end
112
+ actual_messages = target_driver.events.collect { |event| event[2] }
113
+ assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
114
+ end
115
+ end
116
+ end