fluent-plugin-kafka 0.16.3 → 0.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,7 +48,7 @@ Set true to remove partition from data
48
48
  DESC
49
49
  config_param :exclude_message_key, :bool, :default => false,
50
50
  :desc => <<-DESC
51
- Set true to remove partition key from data
51
+ Set true to remove message key from data
52
52
  DESC
53
53
  config_param :exclude_topic_key, :bool, :default => false,
54
54
  :desc => <<-DESC
@@ -65,6 +65,7 @@ DESC
65
65
  The codec the producer uses to compress messages.
66
66
  Supported codecs: (gzip|snappy)
67
67
  DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
68
69
  config_param :max_send_limit_bytes, :size, :default => nil
69
70
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil
70
71
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil
@@ -91,23 +92,29 @@ DESC
91
92
  def configure(conf)
92
93
  super
93
94
  log.instance_eval {
94
- def add(level, &block)
95
- return unless block
95
+ def add(level, message = nil)
96
+ if message.nil?
97
+ if block_given?
98
+ message = yield
99
+ else
100
+ return
101
+ end
102
+ end
96
103
 
97
104
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
98
105
  case level
99
106
  when Logger::FATAL
100
- self.fatal(block.call)
107
+ self.fatal(message)
101
108
  when Logger::ERROR
102
- self.error(block.call)
109
+ self.error(message)
103
110
  when Logger::WARN
104
- self.warn(block.call)
111
+ self.warn(message)
105
112
  when Logger::INFO
106
- self.info(block.call)
113
+ self.info(message)
107
114
  when Logger::DEBUG
108
- self.debug(block.call)
115
+ self.debug(message)
109
116
  else
110
- self.trace(block.call)
117
+ self.trace(message)
111
118
  end
112
119
  end
113
120
  }
@@ -271,7 +278,8 @@ DESC
271
278
  record_buf = @formatter_proc.call(tag, time, record)
272
279
  record_buf_bytes = record_buf.bytesize
273
280
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
274
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
281
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
282
+ log.debug "Skipped event:", :record => record
275
283
  next
276
284
  end
277
285
  rescue StandardError => e
@@ -280,7 +288,7 @@ DESC
280
288
  end
281
289
 
282
290
  producer = get_producer
283
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
291
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
284
292
  handler
285
293
  }.each { |handler|
286
294
  handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
@@ -292,11 +300,11 @@ DESC
292
300
  raise e
293
301
  end
294
302
 
295
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
303
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
296
304
  attempt = 0
297
305
  loop do
298
306
  begin
299
- handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
307
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
300
308
  return handler
301
309
  rescue Exception => e
302
310
  if e.respond_to?(:code) && e.code == :queue_full
@@ -56,6 +56,8 @@ DESC
56
56
  :desc => <<-DESC
57
57
  Set true to remove topic key from data
58
58
  DESC
59
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
60
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
59
61
  config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
60
62
  :desc => 'Kafka message headers'
61
63
  config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
@@ -72,6 +74,7 @@ DESC
72
74
  The codec the producer uses to compress messages. Used for compression.codec
73
75
  Supported codecs: (gzip|snappy)
74
76
  DESC
77
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
75
78
  config_param :max_send_limit_bytes, :size, :default => nil
76
79
  config_param :discard_kafka_delivery_failed, :bool, :default => false
77
80
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
@@ -84,6 +87,7 @@ DESC
84
87
 
85
88
  config_param :max_enqueue_retries, :integer, :default => 3
86
89
  config_param :enqueue_retry_backoff, :integer, :default => 3
90
+ config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
87
91
 
88
92
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
89
93
  config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
@@ -99,34 +103,96 @@ DESC
99
103
  include Fluent::KafkaPluginUtil::SSLSettings
100
104
  include Fluent::KafkaPluginUtil::SaslSettings
101
105
 
106
+ class EnqueueRate
107
+ class LimitExceeded < StandardError
108
+ attr_reader :next_retry_clock
109
+ def initialize(next_retry_clock)
110
+ @next_retry_clock = next_retry_clock
111
+ end
112
+ end
113
+
114
+ def initialize(limit_bytes_per_second)
115
+ @mutex = Mutex.new
116
+ @start_clock = Fluent::Clock.now
117
+ @bytes_per_second = 0
118
+ @limit_bytes_per_second = limit_bytes_per_second
119
+ @commits = {}
120
+ end
121
+
122
+ def raise_if_limit_exceeded(bytes_to_enqueue)
123
+ return if @limit_bytes_per_second.nil?
124
+
125
+ @mutex.synchronize do
126
+ @commits[Thread.current] = {
127
+ clock: Fluent::Clock.now,
128
+ bytesize: bytes_to_enqueue,
129
+ }
130
+
131
+ @bytes_per_second += @commits[Thread.current][:bytesize]
132
+ duration = @commits[Thread.current][:clock] - @start_clock
133
+
134
+ if duration < 1.0
135
+ if @bytes_per_second > @limit_bytes_per_second
136
+ raise LimitExceeded.new(@start_clock + 1.0)
137
+ end
138
+ else
139
+ @start_clock = @commits[Thread.current][:clock]
140
+ @bytes_per_second = @commits[Thread.current][:bytesize]
141
+ end
142
+ end
143
+ end
144
+
145
+ def revert
146
+ return if @limit_bytes_per_second.nil?
147
+
148
+ @mutex.synchronize do
149
+ return unless @commits[Thread.current]
150
+ return unless @commits[Thread.current][:clock]
151
+ if @commits[Thread.current][:clock] >= @start_clock
152
+ @bytes_per_second -= @commits[Thread.current][:bytesize]
153
+ end
154
+ @commits[Thread.current] = nil
155
+ end
156
+ end
157
+ end
158
+
102
159
  def initialize
103
160
  super
104
161
 
105
162
  @producers = nil
106
163
  @producers_mutex = nil
107
164
  @shared_producer = nil
165
+ @enqueue_rate = nil
166
+ @writing_threads_mutex = Mutex.new
167
+ @writing_threads = Set.new
108
168
  end
109
169
 
110
170
  def configure(conf)
111
171
  super
112
172
  log.instance_eval {
113
- def add(level, &block)
114
- return unless block
173
+ def add(level, message = nil)
174
+ if message.nil?
175
+ if block_given?
176
+ message = yield
177
+ else
178
+ return
179
+ end
180
+ end
115
181
 
116
182
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
117
183
  case level
118
184
  when Logger::FATAL
119
- self.fatal(block.call)
185
+ self.fatal(message)
120
186
  when Logger::ERROR
121
- self.error(block.call)
187
+ self.error(message)
122
188
  when Logger::WARN
123
- self.warn(block.call)
189
+ self.warn(message)
124
190
  when Logger::INFO
125
- self.info(block.call)
191
+ self.info(message)
126
192
  when Logger::DEBUG
127
- self.debug(block.call)
193
+ self.debug(message)
128
194
  else
129
- self.trace(block.call)
195
+ self.trace(message)
130
196
  end
131
197
  end
132
198
  }
@@ -158,6 +224,12 @@ DESC
158
224
  @headers_from_record.each do |key, value|
159
225
  @headers_from_record_accessors[key] = record_accessor_create(value)
160
226
  end
227
+
228
+ @exclude_field_accessors = @exclude_fields.map do |field|
229
+ record_accessor_create(field)
230
+ end
231
+
232
+ @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
161
233
  end
162
234
 
163
235
  def build_config
@@ -221,8 +293,19 @@ DESC
221
293
  true
222
294
  end
223
295
 
296
+ def wait_writing_threads
297
+ done = false
298
+ until done do
299
+ @writing_threads_mutex.synchronize do
300
+ done = true if @writing_threads.empty?
301
+ end
302
+ sleep(1) unless done
303
+ end
304
+ end
305
+
224
306
  def shutdown
225
307
  super
308
+ wait_writing_threads
226
309
  shutdown_producers
227
310
  end
228
311
 
@@ -279,6 +362,7 @@ DESC
279
362
  end
280
363
 
281
364
  def write(chunk)
365
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
282
366
  tag = chunk.metadata.tag
283
367
  topic = if @topic
284
368
  extract_placeholders(@topic, chunk)
@@ -305,10 +389,17 @@ DESC
305
389
  headers[key] = header_accessor.call(record)
306
390
  end
307
391
 
392
+ unless @exclude_fields.empty?
393
+ @exclude_field_accessors.each do |exclude_field_acessor|
394
+ exclude_field_acessor.delete(record)
395
+ end
396
+ end
397
+
308
398
  record_buf = @formatter_proc.call(tag, time, record)
309
399
  record_buf_bytes = record_buf.bytesize
310
400
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
311
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
401
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
402
+ log.debug "Skipped event:", :record => record
312
403
  next
313
404
  end
314
405
  rescue StandardError => e
@@ -316,7 +407,7 @@ DESC
316
407
  next
317
408
  end
318
409
 
319
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
410
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
320
411
  if @rdkafka_delivery_handle_poll_timeout != 0
321
412
  handlers << handler
322
413
  end
@@ -333,14 +424,22 @@ DESC
333
424
  # Raise exception to retry sendind messages
334
425
  raise e
335
426
  end
427
+ ensure
428
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
336
429
  end
337
430
 
338
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
431
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
339
432
  attempt = 0
340
433
  loop do
341
434
  begin
342
- return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers)
435
+ @enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
436
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
437
+ rescue EnqueueRate::LimitExceeded => e
438
+ @enqueue_rate.revert if @enqueue_rate
439
+ duration = e.next_retry_clock - Fluent::Clock.now
440
+ sleep(duration) if duration > 0.0
343
441
  rescue Exception => e
442
+ @enqueue_rate.revert if @enqueue_rate
344
443
  if e.respond_to?(:code) && e.code == :queue_full
345
444
  if attempt <= @max_enqueue_retries
346
445
  log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
data/test/helper.rb CHANGED
@@ -8,6 +8,7 @@ rescue Bundler::BundlerError => e
8
8
  exit e.status_code
9
9
  end
10
10
  require 'test/unit'
11
+ require 'test/unit/rr'
11
12
 
12
13
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
14
  $LOAD_PATH.unshift(File.dirname(__FILE__))
@@ -22,8 +23,12 @@ unless ENV.has_key?('VERBOSE')
22
23
  end
23
24
 
24
25
  require 'fluent/plugin/out_kafka'
26
+ require 'fluent/plugin/out_kafka_buffered'
27
+ require 'fluent/plugin/out_kafka2'
25
28
  require 'fluent/plugin/in_kafka'
26
29
  require 'fluent/plugin/in_kafka_group'
27
30
 
31
+ require "fluent/test/driver/output"
32
+
28
33
  class Test::Unit::TestCase
29
34
  end
@@ -1,14 +1,8 @@
1
1
  require 'helper'
2
2
  require 'fluent/plugin/kafka_plugin_util'
3
3
 
4
- class File
5
- def File::read(path)
6
- path
7
- end
8
- end
9
-
10
4
  class KafkaPluginUtilTest < Test::Unit::TestCase
11
-
5
+
12
6
  def self.config_param(name, type, options)
13
7
  end
14
8
  include Fluent::KafkaPluginUtil::SSLSettings
@@ -20,19 +14,31 @@ class KafkaPluginUtilTest < Test::Unit::TestCase
20
14
  end
21
15
 
22
16
  def test_read_ssl_file_when_nil
23
- assert_equal(nil, read_ssl_file(nil))
17
+ stub(File).read(anything) do |path|
18
+ path
19
+ end
20
+ assert_equal(nil, read_ssl_file(nil))
24
21
  end
25
22
 
26
23
  def test_read_ssl_file_when_empty_string
27
- assert_equal(nil, read_ssl_file(""))
24
+ stub(File).read(anything) do |path|
25
+ path
26
+ end
27
+ assert_equal(nil, read_ssl_file(""))
28
28
  end
29
29
 
30
30
  def test_read_ssl_file_when_non_empty_path
31
- assert_equal("path", read_ssl_file("path"))
31
+ stub(File).read(anything) do |path|
32
+ path
33
+ end
34
+ assert_equal("path", read_ssl_file("path"))
32
35
  end
33
36
 
34
37
  def test_read_ssl_file_when_non_empty_array
35
- assert_equal(["a","b"], read_ssl_file(["a","b"]))
38
+ stub(File).read(anything) do |path|
39
+ path
40
+ end
41
+ assert_equal(["a","b"], read_ssl_file(["a","b"]))
36
42
  end
37
43
 
38
- end
44
+ end
@@ -43,6 +43,16 @@ class KafkaOutputTest < Test::Unit::TestCase
43
43
  d = create_driver
44
44
  end
45
45
 
46
+ data("crc32" => "crc32",
47
+ "murmur2" => "murmur2")
48
+ def test_partitioner_hash_function(data)
49
+ hash_type = data
50
+ d = create_driver(CONFIG + %[partitioner_hash_function #{hash_type}])
51
+ assert_nothing_raised do
52
+ d.instance.refresh_client
53
+ end
54
+ end
55
+
46
56
  def test_mutli_worker_support
47
57
  d = create_driver
48
58
  assert_equal true, d.instance.multi_workers_ready?
@@ -0,0 +1,116 @@
1
+ require 'helper'
2
+ require 'fluent/test/helpers'
3
+ require 'fluent/test/driver/input'
4
+ require 'fluent/test/driver/output'
5
+ require 'securerandom'
6
+
7
+ class Kafka2OutputTest < Test::Unit::TestCase
8
+ include Fluent::Test::Helpers
9
+
10
+ def setup
11
+ Fluent::Test.setup
12
+ end
13
+
14
+ def base_config
15
+ config_element('ROOT', '', {"@type" => "kafka2"}, [
16
+ config_element('format', "", {"@type" => "json"})
17
+ ])
18
+ end
19
+
20
+ def config(default_topic: "kitagawakeiko")
21
+ base_config + config_element('ROOT', '', {"default_topic" => default_topic,
22
+ "brokers" => "localhost:9092"}, [
23
+ ])
24
+ end
25
+
26
+ def create_driver(conf = config, tag='test')
27
+ Fluent::Test::Driver::Output.new(Fluent::Kafka2Output).configure(conf)
28
+ end
29
+
30
+ def test_configure
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(base_config)
33
+ }
34
+
35
+ assert_nothing_raised(Fluent::ConfigError) {
36
+ create_driver(config)
37
+ }
38
+
39
+ assert_nothing_raised(Fluent::ConfigError) {
40
+ create_driver(config + config_element('buffer', "", {"@type" => "memory"}))
41
+ }
42
+
43
+ d = create_driver
44
+ assert_equal 'kitagawakeiko', d.instance.default_topic
45
+ assert_equal ['localhost:9092'], d.instance.brokers
46
+ end
47
+
48
+ data("crc32" => "crc32",
49
+ "murmur2" => "murmur2")
50
+ def test_partitioner_hash_function(data)
51
+ hash_type = data
52
+ d = create_driver(config + config_element('ROOT', '', {"partitioner_hash_function" => hash_type}))
53
+ assert_nothing_raised do
54
+ d.instance.refresh_client
55
+ end
56
+ end
57
+
58
+ def test_mutli_worker_support
59
+ d = create_driver
60
+ assert_equal true, d.instance.multi_workers_ready?
61
+ end
62
+
63
+ class WriteTest < self
64
+ TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
65
+
66
+ INPUT_CONFIG = %[
67
+ @type kafka
68
+ brokers localhost:9092
69
+ format json
70
+ @label @kafka
71
+ topics #{TOPIC_NAME}
72
+ ]
73
+
74
+ def create_target_driver(conf = INPUT_CONFIG)
75
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
76
+ end
77
+
78
+ def setup
79
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
80
+ end
81
+
82
+ def teardown
83
+ @kafka.delete_topic(TOPIC_NAME)
84
+ @kafka.close
85
+ end
86
+
87
+ def test_write
88
+ target_driver = create_target_driver
89
+ expected_message = {"a" => 2}
90
+ target_driver.run(expect_records: 1, timeout: 5) do
91
+ sleep 2
92
+ d = create_driver(config(default_topic: TOPIC_NAME))
93
+ d.run do
94
+ d.feed("test", event_time, expected_message)
95
+ end
96
+ end
97
+ actual_messages = target_driver.events.collect { |event| event[2] }
98
+ assert_equal([expected_message], actual_messages)
99
+ end
100
+
101
+ def test_exclude_fields
102
+ conf = config(default_topic: TOPIC_NAME) +
103
+ config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
104
+ target_driver = create_target_driver
105
+ target_driver.run(expect_records: 1, timeout: 5) do
106
+ sleep 2
107
+ d = create_driver(conf)
108
+ d.run do
109
+ d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
110
+ end
111
+ end
112
+ actual_messages = target_driver.events.collect { |event| event[2] }
113
+ assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,68 @@
1
+ require 'helper'
2
+ require 'fluent/output'
3
+
4
+ class KafkaBufferedOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ BASE_CONFIG = %[
10
+ type kafka_buffered
11
+ ]
12
+
13
+ CONFIG = BASE_CONFIG + %[
14
+ default_topic kitagawakeiko
15
+ brokers localhost:9092
16
+ ]
17
+
18
+ def create_driver(conf = CONFIG, tag='test')
19
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::KafkaOutputBuffered, tag).configure(conf)
20
+ end
21
+
22
+ def test_configure
23
+ assert_nothing_raised(Fluent::ConfigError) {
24
+ create_driver(BASE_CONFIG)
25
+ }
26
+
27
+ assert_nothing_raised(Fluent::ConfigError) {
28
+ create_driver(CONFIG)
29
+ }
30
+
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(CONFIG + %[
33
+ buffer_type memory
34
+ ])
35
+ }
36
+
37
+ d = create_driver
38
+ assert_equal 'kitagawakeiko', d.instance.default_topic
39
+ assert_equal 'localhost:9092', d.instance.brokers
40
+ end
41
+
42
+ def test_format
43
+ d = create_driver
44
+ end
45
+
46
+ data("crc32" => "crc32",
47
+ "murmur2" => "murmur2")
48
+ def test_partitioner_hash_function(data)
49
+ hash_type = data
50
+ d = create_driver(CONFIG + %[partitioner_hash_function #{hash_type}])
51
+ assert_nothing_raised do
52
+ d.instance.refresh_client
53
+ end
54
+ end
55
+
56
+ def test_mutli_worker_support
57
+ d = create_driver
58
+ assert_equal true, d.instance.multi_workers_ready?
59
+
60
+ end
61
+
62
+ def test_write
63
+ d = create_driver
64
+ time = Time.parse("2011-01-02 13:14:15 UTC").to_i
65
+ d.emit({"a"=>1}, time)
66
+ d.emit({"a"=>2}, time)
67
+ end
68
+ end