fluent-plugin-kafka 0.16.3 → 0.17.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -48,7 +48,7 @@ Set true to remove partition from data
48
48
  DESC
49
49
  config_param :exclude_message_key, :bool, :default => false,
50
50
  :desc => <<-DESC
51
- Set true to remove partition key from data
51
+ Set true to remove message key from data
52
52
  DESC
53
53
  config_param :exclude_topic_key, :bool, :default => false,
54
54
  :desc => <<-DESC
@@ -65,6 +65,7 @@ DESC
65
65
  The codec the producer uses to compress messages.
66
66
  Supported codecs: (gzip|snappy)
67
67
  DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
68
69
  config_param :max_send_limit_bytes, :size, :default => nil
69
70
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil
70
71
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil
@@ -91,23 +92,29 @@ DESC
91
92
  def configure(conf)
92
93
  super
93
94
  log.instance_eval {
94
- def add(level, &block)
95
- return unless block
95
+ def add(level, message = nil)
96
+ if message.nil?
97
+ if block_given?
98
+ message = yield
99
+ else
100
+ return
101
+ end
102
+ end
96
103
 
97
104
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
98
105
  case level
99
106
  when Logger::FATAL
100
- self.fatal(block.call)
107
+ self.fatal(message)
101
108
  when Logger::ERROR
102
- self.error(block.call)
109
+ self.error(message)
103
110
  when Logger::WARN
104
- self.warn(block.call)
111
+ self.warn(message)
105
112
  when Logger::INFO
106
- self.info(block.call)
113
+ self.info(message)
107
114
  when Logger::DEBUG
108
- self.debug(block.call)
115
+ self.debug(message)
109
116
  else
110
- self.trace(block.call)
117
+ self.trace(message)
111
118
  end
112
119
  end
113
120
  }
@@ -271,7 +278,8 @@ DESC
271
278
  record_buf = @formatter_proc.call(tag, time, record)
272
279
  record_buf_bytes = record_buf.bytesize
273
280
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
274
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
281
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
282
+ log.debug "Skipped event:", :record => record
275
283
  next
276
284
  end
277
285
  rescue StandardError => e
@@ -280,7 +288,7 @@ DESC
280
288
  end
281
289
 
282
290
  producer = get_producer
283
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
291
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
284
292
  handler
285
293
  }.each { |handler|
286
294
  handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
@@ -292,11 +300,11 @@ DESC
292
300
  raise e
293
301
  end
294
302
 
295
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
303
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
296
304
  attempt = 0
297
305
  loop do
298
306
  begin
299
- handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
307
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
300
308
  return handler
301
309
  rescue Exception => e
302
310
  if e.respond_to?(:code) && e.code == :queue_full
@@ -56,6 +56,8 @@ DESC
56
56
  :desc => <<-DESC
57
57
  Set true to remove topic key from data
58
58
  DESC
59
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
60
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
59
61
  config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
60
62
  :desc => 'Kafka message headers'
61
63
  config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
@@ -72,6 +74,7 @@ DESC
72
74
  The codec the producer uses to compress messages. Used for compression.codec
73
75
  Supported codecs: (gzip|snappy)
74
76
  DESC
77
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
75
78
  config_param :max_send_limit_bytes, :size, :default => nil
76
79
  config_param :discard_kafka_delivery_failed, :bool, :default => false
77
80
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
@@ -84,6 +87,7 @@ DESC
84
87
 
85
88
  config_param :max_enqueue_retries, :integer, :default => 3
86
89
  config_param :enqueue_retry_backoff, :integer, :default => 3
90
+ config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
87
91
 
88
92
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
89
93
  config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
@@ -99,34 +103,96 @@ DESC
99
103
  include Fluent::KafkaPluginUtil::SSLSettings
100
104
  include Fluent::KafkaPluginUtil::SaslSettings
101
105
 
106
+ class EnqueueRate
107
+ class LimitExceeded < StandardError
108
+ attr_reader :next_retry_clock
109
+ def initialize(next_retry_clock)
110
+ @next_retry_clock = next_retry_clock
111
+ end
112
+ end
113
+
114
+ def initialize(limit_bytes_per_second)
115
+ @mutex = Mutex.new
116
+ @start_clock = Fluent::Clock.now
117
+ @bytes_per_second = 0
118
+ @limit_bytes_per_second = limit_bytes_per_second
119
+ @commits = {}
120
+ end
121
+
122
+ def raise_if_limit_exceeded(bytes_to_enqueue)
123
+ return if @limit_bytes_per_second.nil?
124
+
125
+ @mutex.synchronize do
126
+ @commits[Thread.current] = {
127
+ clock: Fluent::Clock.now,
128
+ bytesize: bytes_to_enqueue,
129
+ }
130
+
131
+ @bytes_per_second += @commits[Thread.current][:bytesize]
132
+ duration = @commits[Thread.current][:clock] - @start_clock
133
+
134
+ if duration < 1.0
135
+ if @bytes_per_second > @limit_bytes_per_second
136
+ raise LimitExceeded.new(@start_clock + 1.0)
137
+ end
138
+ else
139
+ @start_clock = @commits[Thread.current][:clock]
140
+ @bytes_per_second = @commits[Thread.current][:bytesize]
141
+ end
142
+ end
143
+ end
144
+
145
+ def revert
146
+ return if @limit_bytes_per_second.nil?
147
+
148
+ @mutex.synchronize do
149
+ return unless @commits[Thread.current]
150
+ return unless @commits[Thread.current][:clock]
151
+ if @commits[Thread.current][:clock] >= @start_clock
152
+ @bytes_per_second -= @commits[Thread.current][:bytesize]
153
+ end
154
+ @commits[Thread.current] = nil
155
+ end
156
+ end
157
+ end
158
+
102
159
  def initialize
103
160
  super
104
161
 
105
162
  @producers = nil
106
163
  @producers_mutex = nil
107
164
  @shared_producer = nil
165
+ @enqueue_rate = nil
166
+ @writing_threads_mutex = Mutex.new
167
+ @writing_threads = Set.new
108
168
  end
109
169
 
110
170
  def configure(conf)
111
171
  super
112
172
  log.instance_eval {
113
- def add(level, &block)
114
- return unless block
173
+ def add(level, message = nil)
174
+ if message.nil?
175
+ if block_given?
176
+ message = yield
177
+ else
178
+ return
179
+ end
180
+ end
115
181
 
116
182
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
117
183
  case level
118
184
  when Logger::FATAL
119
- self.fatal(block.call)
185
+ self.fatal(message)
120
186
  when Logger::ERROR
121
- self.error(block.call)
187
+ self.error(message)
122
188
  when Logger::WARN
123
- self.warn(block.call)
189
+ self.warn(message)
124
190
  when Logger::INFO
125
- self.info(block.call)
191
+ self.info(message)
126
192
  when Logger::DEBUG
127
- self.debug(block.call)
193
+ self.debug(message)
128
194
  else
129
- self.trace(block.call)
195
+ self.trace(message)
130
196
  end
131
197
  end
132
198
  }
@@ -158,6 +224,12 @@ DESC
158
224
  @headers_from_record.each do |key, value|
159
225
  @headers_from_record_accessors[key] = record_accessor_create(value)
160
226
  end
227
+
228
+ @exclude_field_accessors = @exclude_fields.map do |field|
229
+ record_accessor_create(field)
230
+ end
231
+
232
+ @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
161
233
  end
162
234
 
163
235
  def build_config
@@ -221,8 +293,19 @@ DESC
221
293
  true
222
294
  end
223
295
 
296
+ def wait_writing_threads
297
+ done = false
298
+ until done do
299
+ @writing_threads_mutex.synchronize do
300
+ done = true if @writing_threads.empty?
301
+ end
302
+ sleep(1) unless done
303
+ end
304
+ end
305
+
224
306
  def shutdown
225
307
  super
308
+ wait_writing_threads
226
309
  shutdown_producers
227
310
  end
228
311
 
@@ -279,6 +362,7 @@ DESC
279
362
  end
280
363
 
281
364
  def write(chunk)
365
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
282
366
  tag = chunk.metadata.tag
283
367
  topic = if @topic
284
368
  extract_placeholders(@topic, chunk)
@@ -305,10 +389,17 @@ DESC
305
389
  headers[key] = header_accessor.call(record)
306
390
  end
307
391
 
392
+ unless @exclude_fields.empty?
393
+ @exclude_field_accessors.each do |exclude_field_acessor|
394
+ exclude_field_acessor.delete(record)
395
+ end
396
+ end
397
+
308
398
  record_buf = @formatter_proc.call(tag, time, record)
309
399
  record_buf_bytes = record_buf.bytesize
310
400
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
311
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
401
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
402
+ log.debug "Skipped event:", :record => record
312
403
  next
313
404
  end
314
405
  rescue StandardError => e
@@ -316,7 +407,7 @@ DESC
316
407
  next
317
408
  end
318
409
 
319
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
410
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
320
411
  if @rdkafka_delivery_handle_poll_timeout != 0
321
412
  handlers << handler
322
413
  end
@@ -333,14 +424,22 @@ DESC
333
424
  # Raise exception to retry sendind messages
334
425
  raise e
335
426
  end
427
+ ensure
428
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
336
429
  end
337
430
 
338
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
431
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
339
432
  attempt = 0
340
433
  loop do
341
434
  begin
342
- return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers)
435
+ @enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
436
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
437
+ rescue EnqueueRate::LimitExceeded => e
438
+ @enqueue_rate.revert if @enqueue_rate
439
+ duration = e.next_retry_clock - Fluent::Clock.now
440
+ sleep(duration) if duration > 0.0
343
441
  rescue Exception => e
442
+ @enqueue_rate.revert if @enqueue_rate
344
443
  if e.respond_to?(:code) && e.code == :queue_full
345
444
  if attempt <= @max_enqueue_retries
346
445
  log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
data/test/helper.rb CHANGED
@@ -8,6 +8,7 @@ rescue Bundler::BundlerError => e
8
8
  exit e.status_code
9
9
  end
10
10
  require 'test/unit'
11
+ require 'test/unit/rr'
11
12
 
12
13
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
14
  $LOAD_PATH.unshift(File.dirname(__FILE__))
@@ -22,8 +23,12 @@ unless ENV.has_key?('VERBOSE')
22
23
  end
23
24
 
24
25
  require 'fluent/plugin/out_kafka'
26
+ require 'fluent/plugin/out_kafka_buffered'
27
+ require 'fluent/plugin/out_kafka2'
25
28
  require 'fluent/plugin/in_kafka'
26
29
  require 'fluent/plugin/in_kafka_group'
27
30
 
31
+ require "fluent/test/driver/output"
32
+
28
33
  class Test::Unit::TestCase
29
34
  end
@@ -1,14 +1,8 @@
1
1
  require 'helper'
2
2
  require 'fluent/plugin/kafka_plugin_util'
3
3
 
4
- class File
5
- def File::read(path)
6
- path
7
- end
8
- end
9
-
10
4
  class KafkaPluginUtilTest < Test::Unit::TestCase
11
-
5
+
12
6
  def self.config_param(name, type, options)
13
7
  end
14
8
  include Fluent::KafkaPluginUtil::SSLSettings
@@ -20,19 +14,31 @@ class KafkaPluginUtilTest < Test::Unit::TestCase
20
14
  end
21
15
 
22
16
  def test_read_ssl_file_when_nil
23
- assert_equal(nil, read_ssl_file(nil))
17
+ stub(File).read(anything) do |path|
18
+ path
19
+ end
20
+ assert_equal(nil, read_ssl_file(nil))
24
21
  end
25
22
 
26
23
  def test_read_ssl_file_when_empty_string
27
- assert_equal(nil, read_ssl_file(""))
24
+ stub(File).read(anything) do |path|
25
+ path
26
+ end
27
+ assert_equal(nil, read_ssl_file(""))
28
28
  end
29
29
 
30
30
  def test_read_ssl_file_when_non_empty_path
31
- assert_equal("path", read_ssl_file("path"))
31
+ stub(File).read(anything) do |path|
32
+ path
33
+ end
34
+ assert_equal("path", read_ssl_file("path"))
32
35
  end
33
36
 
34
37
  def test_read_ssl_file_when_non_empty_array
35
- assert_equal(["a","b"], read_ssl_file(["a","b"]))
38
+ stub(File).read(anything) do |path|
39
+ path
40
+ end
41
+ assert_equal(["a","b"], read_ssl_file(["a","b"]))
36
42
  end
37
43
 
38
- end
44
+ end
@@ -43,6 +43,16 @@ class KafkaOutputTest < Test::Unit::TestCase
43
43
  d = create_driver
44
44
  end
45
45
 
46
+ data("crc32" => "crc32",
47
+ "murmur2" => "murmur2")
48
+ def test_partitioner_hash_function(data)
49
+ hash_type = data
50
+ d = create_driver(CONFIG + %[partitioner_hash_function #{hash_type}])
51
+ assert_nothing_raised do
52
+ d.instance.refresh_client
53
+ end
54
+ end
55
+
46
56
  def test_mutli_worker_support
47
57
  d = create_driver
48
58
  assert_equal true, d.instance.multi_workers_ready?
@@ -0,0 +1,116 @@
1
+ require 'helper'
2
+ require 'fluent/test/helpers'
3
+ require 'fluent/test/driver/input'
4
+ require 'fluent/test/driver/output'
5
+ require 'securerandom'
6
+
7
+ class Kafka2OutputTest < Test::Unit::TestCase
8
+ include Fluent::Test::Helpers
9
+
10
+ def setup
11
+ Fluent::Test.setup
12
+ end
13
+
14
+ def base_config
15
+ config_element('ROOT', '', {"@type" => "kafka2"}, [
16
+ config_element('format', "", {"@type" => "json"})
17
+ ])
18
+ end
19
+
20
+ def config(default_topic: "kitagawakeiko")
21
+ base_config + config_element('ROOT', '', {"default_topic" => default_topic,
22
+ "brokers" => "localhost:9092"}, [
23
+ ])
24
+ end
25
+
26
+ def create_driver(conf = config, tag='test')
27
+ Fluent::Test::Driver::Output.new(Fluent::Kafka2Output).configure(conf)
28
+ end
29
+
30
+ def test_configure
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(base_config)
33
+ }
34
+
35
+ assert_nothing_raised(Fluent::ConfigError) {
36
+ create_driver(config)
37
+ }
38
+
39
+ assert_nothing_raised(Fluent::ConfigError) {
40
+ create_driver(config + config_element('buffer', "", {"@type" => "memory"}))
41
+ }
42
+
43
+ d = create_driver
44
+ assert_equal 'kitagawakeiko', d.instance.default_topic
45
+ assert_equal ['localhost:9092'], d.instance.brokers
46
+ end
47
+
48
+ data("crc32" => "crc32",
49
+ "murmur2" => "murmur2")
50
+ def test_partitioner_hash_function(data)
51
+ hash_type = data
52
+ d = create_driver(config + config_element('ROOT', '', {"partitioner_hash_function" => hash_type}))
53
+ assert_nothing_raised do
54
+ d.instance.refresh_client
55
+ end
56
+ end
57
+
58
+ def test_mutli_worker_support
59
+ d = create_driver
60
+ assert_equal true, d.instance.multi_workers_ready?
61
+ end
62
+
63
+ class WriteTest < self
64
+ TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
65
+
66
+ INPUT_CONFIG = %[
67
+ @type kafka
68
+ brokers localhost:9092
69
+ format json
70
+ @label @kafka
71
+ topics #{TOPIC_NAME}
72
+ ]
73
+
74
+ def create_target_driver(conf = INPUT_CONFIG)
75
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
76
+ end
77
+
78
+ def setup
79
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
80
+ end
81
+
82
+ def teardown
83
+ @kafka.delete_topic(TOPIC_NAME)
84
+ @kafka.close
85
+ end
86
+
87
+ def test_write
88
+ target_driver = create_target_driver
89
+ expected_message = {"a" => 2}
90
+ target_driver.run(expect_records: 1, timeout: 5) do
91
+ sleep 2
92
+ d = create_driver(config(default_topic: TOPIC_NAME))
93
+ d.run do
94
+ d.feed("test", event_time, expected_message)
95
+ end
96
+ end
97
+ actual_messages = target_driver.events.collect { |event| event[2] }
98
+ assert_equal([expected_message], actual_messages)
99
+ end
100
+
101
+ def test_exclude_fields
102
+ conf = config(default_topic: TOPIC_NAME) +
103
+ config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
104
+ target_driver = create_target_driver
105
+ target_driver.run(expect_records: 1, timeout: 5) do
106
+ sleep 2
107
+ d = create_driver(conf)
108
+ d.run do
109
+ d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
110
+ end
111
+ end
112
+ actual_messages = target_driver.events.collect { |event| event[2] }
113
+ assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,68 @@
1
+ require 'helper'
2
+ require 'fluent/output'
3
+
4
+ class KafkaBufferedOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ BASE_CONFIG = %[
10
+ type kafka_buffered
11
+ ]
12
+
13
+ CONFIG = BASE_CONFIG + %[
14
+ default_topic kitagawakeiko
15
+ brokers localhost:9092
16
+ ]
17
+
18
+ def create_driver(conf = CONFIG, tag='test')
19
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::KafkaOutputBuffered, tag).configure(conf)
20
+ end
21
+
22
+ def test_configure
23
+ assert_nothing_raised(Fluent::ConfigError) {
24
+ create_driver(BASE_CONFIG)
25
+ }
26
+
27
+ assert_nothing_raised(Fluent::ConfigError) {
28
+ create_driver(CONFIG)
29
+ }
30
+
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(CONFIG + %[
33
+ buffer_type memory
34
+ ])
35
+ }
36
+
37
+ d = create_driver
38
+ assert_equal 'kitagawakeiko', d.instance.default_topic
39
+ assert_equal 'localhost:9092', d.instance.brokers
40
+ end
41
+
42
+ def test_format
43
+ d = create_driver
44
+ end
45
+
46
+ data("crc32" => "crc32",
47
+ "murmur2" => "murmur2")
48
+ def test_partitioner_hash_function(data)
49
+ hash_type = data
50
+ d = create_driver(CONFIG + %[partitioner_hash_function #{hash_type}])
51
+ assert_nothing_raised do
52
+ d.instance.refresh_client
53
+ end
54
+ end
55
+
56
+ def test_mutli_worker_support
57
+ d = create_driver
58
+ assert_equal true, d.instance.multi_workers_ready?
59
+
60
+ end
61
+
62
+ def test_write
63
+ d = create_driver
64
+ time = Time.parse("2011-01-02 13:14:15 UTC").to_i
65
+ d.emit({"a"=>1}, time)
66
+ d.emit({"a"=>2}, time)
67
+ end
68
+ end