fluent-plugin-kafka 0.17.0 → 0.17.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15805411e4029813123b9b636b6faadb937cf38c9841adcd9d998a0f54d8b687
4
- data.tar.gz: 75fef11595c86beb4a54d2d2ff659f77075e328426463f7ef830982c9724ff16
3
+ metadata.gz: 5cc122034295e37318cd7510ef3347eeda14cc43b8c0132053cb944d68141feb
4
+ data.tar.gz: e725b07eaa95f639b2122f1a4c8342101314f2f721e3625c73889dc8caf9aead
5
5
  SHA512:
6
- metadata.gz: db08ffbde4fe36ce38abe6eebf83e6e0dc157bc5d8dd95141ef911d0c5b59074a208c3a8f836e258ffddc34f73fda303006eaef1cd8fc12ae4dd4c79d101d0c7
7
- data.tar.gz: 9b3b098b1bc58654924d50ac8d685af007763942c1cc68187bbd6d39510a27ca31ae795df0de312d97ad67463ae0c428882118640fce052119288e27e3fb0df5
6
+ metadata.gz: 2e432e7f2670132022b18fa9460b8eda69a18a4dd3a35aa775619c6a45ff8cb6ea5bad869ebc5cefe804b9bb4261ab12150cb77ad10af62dc2e54fd6de435aec
7
+ data.tar.gz: edbebd57c325292d197d342ff8f5151aa1fcfbd47128fc09d1b71e2bf4d7ccf196d54b48df106f1b9f655fe334ab1f9fe907fce8b4f1b1d20edd9c8254c6c8cd
@@ -5,6 +5,8 @@ on:
5
5
  jobs:
6
6
  build:
7
7
  runs-on: ${{ matrix.os }}
8
+ env:
9
+ USE_RDKAFKA: 1
8
10
  strategy:
9
11
  fail-fast: false
10
12
  matrix:
data/ChangeLog CHANGED
@@ -1,3 +1,19 @@
1
+ Release 0.17.4 - 2022/01/25
2
+ * in_kafka_group: Add `refresh_topic_interval` parameter
3
+
4
+ Release 0.17.3 - 2021/11/26
5
+ * output: Suppress large warning logs for events skipped by `max_send_limit_bytes`
6
+
7
+ Release 0.17.2 - 2021/10/14
8
+ * out_rdkafka2: Add `max_enqueue_bytes_per_second` parameter
9
+ * out_rdkafka2: Support `use_event_time` parameter
10
+ * out_rdkafka2: Fix a potential bug that the plugin might exit without receiving responses from Kafka.
11
+
12
+ Release 0.17.1 - 2021/09/24
13
+ * out_rdkafka/out_rdkafka2: Support rdkafka 0.9.0 or later
14
+ * out_rdkafka/out_rdkafka2: Add `exclude_fields` parameter
15
+ * out_kafka2.rb: Fix one more Ruby 3.0 keyword arguments issue
16
+
1
17
  Release 0.17.0 - 2021/08/30
2
18
  * out_kafka/out_kafka_buffered/out_kafka2: Provide murmur2 partitioner hash function choice
3
19
  * in_kafka/in_kafka_group/out_kafka/out_kafka_buffered/out_kafka2: Use Ruby Kafka's ssl_ca_cert_file_path parameter to feed the CA certs
data/Gemfile CHANGED
@@ -2,3 +2,5 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in fluent-plugin-kafka.gemspec
4
4
  gemspec
5
+
6
+ gem 'rdkafka', '>= 0.6.0' if ENV["USE_RDKAFKA"]
data/README.md CHANGED
@@ -40,14 +40,14 @@ If you want to use zookeeper related parameters, you also need to install zookee
40
40
 
41
41
  Set path to SSL related files. See [Encryption and Authentication using SSL](https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl) for more detail.
42
42
 
43
- #### SASL authentication
43
+ #### SASL authentication
44
44
 
45
45
  ##### with GSSAPI
46
46
 
47
47
  - principal
48
48
  - keytab
49
49
 
50
- Set principal and path to keytab for SASL/GSSAPI authentication.
50
+ Set principal and path to keytab for SASL/GSSAPI authentication.
51
51
  See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
52
52
 
53
53
  ##### with Plain/SCRAM
@@ -57,7 +57,7 @@ See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentica
57
57
  - scram_mechanism
58
58
  - sasl_over_ssl
59
59
 
60
- Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
60
+ Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
61
61
  See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
62
62
 
63
63
  ### Input plugin (@type 'kafka')
@@ -119,7 +119,7 @@ Consume events by kafka consumer group features..
119
119
  topics <listening topics(separate with comma',')>
120
120
  format <input text type (text|json|ltsv|msgpack)> :default => json
121
121
  message_key <key (Optional, for text format only, default is message)>
122
- kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
122
+ kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
123
123
  add_headers <If true, add kafka's message headers to record>
124
124
  add_prefix <tag prefix (Optional)>
125
125
  add_suffix <tag suffix (Optional)>
@@ -135,6 +135,7 @@ Consume events by kafka consumer group features..
135
135
  offset_commit_interval (integer) :default => nil (Use default of ruby-kafka)
136
136
  offset_commit_threshold (integer) :default => nil (Use default of ruby-kafka)
137
137
  fetcher_max_queue_size (integer) :default => nil (Use default of ruby-kafka)
138
+ refresh_topic_interval (integer) :default => nil (Use default of ruby-kafka)
138
139
  start_from_beginning (bool) :default => true
139
140
  </source>
140
141
 
@@ -155,7 +156,7 @@ With the introduction of the rdkafka-ruby based input plugin we hope to support
155
156
  topics <listening topics(separate with comma',')>
156
157
  format <input text type (text|json|ltsv|msgpack)> :default => json
157
158
  message_key <key (Optional, for text format only, default is message)>
158
- kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
159
+ kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
159
160
  add_headers <If true, add kafka's message headers to record>
160
161
  add_prefix <tag prefix (Optional)>
161
162
  add_suffix <tag suffix (Optional)>
@@ -200,6 +201,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
200
201
  get_kafka_client_log (bool) :default => false
201
202
  headers (hash) :default => {}
202
203
  headers_from_record (hash) :default => {}
204
+ use_event_time (bool) :default => false
203
205
  use_default_for_unknown_topic (bool) :default => false
204
206
  discard_kafka_delivery_failed (bool) :default => false (No discard)
205
207
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
@@ -316,6 +318,23 @@ The Kafka message will have a header of source_ip=12.7.0.0.1.
316
318
 
317
319
  The configuration format is jsonpath. It is descibed in https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor
318
320
 
321
+ #### Excluding fields
322
+ Fields can be excluded from output data. Only works for kafka2 and rdkafka2 output plugin.
323
+
324
+ Fields must be specified using an array of dot notation `$.`, for example:
325
+
326
+ <match app.**>
327
+ @type kafka2
328
+ [...]
329
+ exclude_fields $.source.ip,$.HTTP_FOO
330
+ <match>
331
+
332
+ This config can be used to remove fields used on another configs.
333
+
334
+ For example, `$.source.ip` can be extracted with config `headers_from_record` and excluded from message payload.
335
+
336
+ > Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
337
+
319
338
  ### Buffered output plugin
320
339
 
321
340
  This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
@@ -346,6 +365,7 @@ Support of fluentd v0.12 has ended. `kafka_buffered` will be an alias of `kafka2
346
365
  exclude_topic_key (bool) :default => false
347
366
  exclude_partition_key (bool) :default => false
348
367
  get_kafka_client_log (bool) :default => false
368
+ use_event_time (bool) :default => false
349
369
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
350
370
 
351
371
  # See fluentd document for buffer related parameters: https://docs.fluentd.org/v/0.12/buffer
@@ -435,6 +455,7 @@ You need to install rdkafka gem.
435
455
  exclude_topic_key (bool) :default => false
436
456
  exclude_partition_key (bool) :default => false
437
457
  discard_kafka_delivery_failed (bool) :default => false (No discard)
458
+ use_event_time (bool) :default => false
438
459
 
439
460
  # same with kafka2
440
461
  headers (hash) :default => {}
@@ -469,6 +490,10 @@ You need to install rdkafka gem.
469
490
  rdkafka_delivery_handle_poll_timeout (integer) :default => 30
470
491
  # If the record size is larger than this value, such records are ignored. Default is no limit
471
492
  max_send_limit_bytes (integer) :default => nil
493
+ # The maximum number of enqueueing bytes per second. It can reduce the
494
+ # load of both Fluentd and Kafka when excessive messages are attempted
495
+ # to send. Default is no limit.
496
+ max_enqueue_bytes_per_second (integer) :default => nil
472
497
  </match>
473
498
 
474
499
  If you use v0.12, use `rdkafka` instead.
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.17.0'
16
+ gem.version = '0.17.4'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -71,6 +71,7 @@ class Fluent::KafkaInput < Fluent::Input
71
71
  require 'kafka'
72
72
 
73
73
  @time_parser = nil
74
+ @zookeeper = nil
74
75
  end
75
76
 
76
77
  def configure(conf)
@@ -67,6 +67,8 @@ class Fluent::KafkaGroupInput < Fluent::Input
67
67
  :desc => "The number of messages that can be processed before their offsets are committed"
68
68
  config_param :fetcher_max_queue_size, :integer, :default => nil,
69
69
  :desc => "The number of fetched messages per partition that are queued in fetcher queue"
70
+ config_param :refresh_topic_interval, :integer, :default => nil,
71
+ :desc => "The interval of refreshing the topic list in seconds. Zero or unset disables this"
70
72
  config_param :start_from_beginning, :bool, :default => true,
71
73
  :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
72
74
 
@@ -128,6 +130,7 @@ class Fluent::KafkaGroupInput < Fluent::Input
128
130
  @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
129
131
  @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
130
132
  @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
133
+ @consumer_opts[:refresh_topic_interval] = @refresh_topic_interval if @refresh_topic_interval
131
134
 
132
135
  @fetch_opts = {}
133
136
  @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
@@ -88,6 +88,7 @@ DESC
88
88
  require 'kafka'
89
89
 
90
90
  @kafka = nil
91
+ @field_separator = nil
91
92
  end
92
93
 
93
94
  def refresh_client
@@ -239,7 +240,8 @@ DESC
239
240
  record_buf = @formatter_proc.call(tag, time, record)
240
241
  record_buf_bytes = record_buf.bytesize
241
242
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
242
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
243
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
244
+ log.debug "Skipped event:", :record => record
243
245
  next
244
246
  end
245
247
  log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
@@ -42,6 +42,8 @@ DESC
42
42
  :desc => 'Set true to remove message key from data'
43
43
  config_param :exclude_topic_key, :bool, :default => false,
44
44
  :desc => 'Set true to remove topic name key from data'
45
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
46
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
45
47
  config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for kafka create_time'
46
48
  config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
47
49
  :desc => 'Kafka message headers'
@@ -177,6 +179,10 @@ DESC
177
179
  @headers_from_record.each do |key, value|
178
180
  @headers_from_record_accessors[key] = record_accessor_create(value)
179
181
  end
182
+
183
+ @exclude_field_accessors = @exclude_fields.map do |field|
184
+ record_accessor_create(field)
185
+ end
180
186
  end
181
187
 
182
188
  def multi_workers_ready?
@@ -235,7 +241,7 @@ DESC
235
241
  mutate_headers = !@headers_from_record_accessors.empty?
236
242
 
237
243
  begin
238
- producer = @kafka.topic_producer(topic, @producer_opts)
244
+ producer = @kafka.topic_producer(topic, **@producer_opts)
239
245
  chunk.msgpack_each { |time, record|
240
246
  begin
241
247
  record = inject_values_to_record(tag, time, record)
@@ -253,10 +259,17 @@ DESC
253
259
  headers = base_headers
254
260
  end
255
261
 
262
+ unless @exclude_fields.empty?
263
+ @exclude_field_accessors.each do |exclude_field_accessor|
264
+ exclude_field_accessor.delete(record)
265
+ end
266
+ end
267
+
256
268
  record_buf = @formatter_proc.call(tag, time, record)
257
269
  record_buf_bytes = record_buf.bytesize
258
270
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
259
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
271
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
272
+ log.debug "Skipped event:", :record => record
260
273
  next
261
274
  end
262
275
  rescue StandardError => e
@@ -107,6 +107,7 @@ DESC
107
107
  @kafka = nil
108
108
  @producers = {}
109
109
  @producers_mutex = Mutex.new
110
+ @field_separator = nil
110
111
  end
111
112
 
112
113
  def multi_workers_ready?
@@ -331,7 +332,8 @@ DESC
331
332
  record_buf = @formatter_proc.call(tag, time, record)
332
333
  record_buf_bytes = record_buf.bytesize
333
334
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
334
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
335
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
336
+ log.debug "Skipped event:", :record => record
335
337
  next
336
338
  end
337
339
  rescue StandardError => e
@@ -65,6 +65,7 @@ DESC
65
65
  The codec the producer uses to compress messages.
66
66
  Supported codecs: (gzip|snappy)
67
67
  DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
68
69
  config_param :max_send_limit_bytes, :size, :default => nil
69
70
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil
70
71
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil
@@ -91,23 +92,29 @@ DESC
91
92
  def configure(conf)
92
93
  super
93
94
  log.instance_eval {
94
- def add(level, &block)
95
- return unless block
95
+ def add(level, message = nil)
96
+ if message.nil?
97
+ if block_given?
98
+ message = yield
99
+ else
100
+ return
101
+ end
102
+ end
96
103
 
97
104
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
98
105
  case level
99
106
  when Logger::FATAL
100
- self.fatal(block.call)
107
+ self.fatal(message)
101
108
  when Logger::ERROR
102
- self.error(block.call)
109
+ self.error(message)
103
110
  when Logger::WARN
104
- self.warn(block.call)
111
+ self.warn(message)
105
112
  when Logger::INFO
106
- self.info(block.call)
113
+ self.info(message)
107
114
  when Logger::DEBUG
108
- self.debug(block.call)
115
+ self.debug(message)
109
116
  else
110
- self.trace(block.call)
117
+ self.trace(message)
111
118
  end
112
119
  end
113
120
  }
@@ -271,7 +278,8 @@ DESC
271
278
  record_buf = @formatter_proc.call(tag, time, record)
272
279
  record_buf_bytes = record_buf.bytesize
273
280
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
274
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
281
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
282
+ log.debug "Skipped event:", :record => record
275
283
  next
276
284
  end
277
285
  rescue StandardError => e
@@ -280,7 +288,7 @@ DESC
280
288
  end
281
289
 
282
290
  producer = get_producer
283
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
291
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
284
292
  handler
285
293
  }.each { |handler|
286
294
  handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
@@ -292,11 +300,11 @@ DESC
292
300
  raise e
293
301
  end
294
302
 
295
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
303
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
296
304
  attempt = 0
297
305
  loop do
298
306
  begin
299
- handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
307
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
300
308
  return handler
301
309
  rescue Exception => e
302
310
  if e.respond_to?(:code) && e.code == :queue_full
@@ -56,6 +56,8 @@ DESC
56
56
  :desc => <<-DESC
57
57
  Set true to remove topic key from data
58
58
  DESC
59
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
60
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
59
61
  config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
60
62
  :desc => 'Kafka message headers'
61
63
  config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
@@ -72,6 +74,7 @@ DESC
72
74
  The codec the producer uses to compress messages. Used for compression.codec
73
75
  Supported codecs: (gzip|snappy)
74
76
  DESC
77
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
75
78
  config_param :max_send_limit_bytes, :size, :default => nil
76
79
  config_param :discard_kafka_delivery_failed, :bool, :default => false
77
80
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
@@ -84,6 +87,7 @@ DESC
84
87
 
85
88
  config_param :max_enqueue_retries, :integer, :default => 3
86
89
  config_param :enqueue_retry_backoff, :integer, :default => 3
90
+ config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
87
91
 
88
92
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
89
93
  config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
@@ -99,34 +103,96 @@ DESC
99
103
  include Fluent::KafkaPluginUtil::SSLSettings
100
104
  include Fluent::KafkaPluginUtil::SaslSettings
101
105
 
106
+ class EnqueueRate
107
+ class LimitExceeded < StandardError
108
+ attr_reader :next_retry_clock
109
+ def initialize(next_retry_clock)
110
+ @next_retry_clock = next_retry_clock
111
+ end
112
+ end
113
+
114
+ def initialize(limit_bytes_per_second)
115
+ @mutex = Mutex.new
116
+ @start_clock = Fluent::Clock.now
117
+ @bytes_per_second = 0
118
+ @limit_bytes_per_second = limit_bytes_per_second
119
+ @commits = {}
120
+ end
121
+
122
+ def raise_if_limit_exceeded(bytes_to_enqueue)
123
+ return if @limit_bytes_per_second.nil?
124
+
125
+ @mutex.synchronize do
126
+ @commits[Thread.current] = {
127
+ clock: Fluent::Clock.now,
128
+ bytesize: bytes_to_enqueue,
129
+ }
130
+
131
+ @bytes_per_second += @commits[Thread.current][:bytesize]
132
+ duration = @commits[Thread.current][:clock] - @start_clock
133
+
134
+ if duration < 1.0
135
+ if @bytes_per_second > @limit_bytes_per_second
136
+ raise LimitExceeded.new(@start_clock + 1.0)
137
+ end
138
+ else
139
+ @start_clock = @commits[Thread.current][:clock]
140
+ @bytes_per_second = @commits[Thread.current][:bytesize]
141
+ end
142
+ end
143
+ end
144
+
145
+ def revert
146
+ return if @limit_bytes_per_second.nil?
147
+
148
+ @mutex.synchronize do
149
+ return unless @commits[Thread.current]
150
+ return unless @commits[Thread.current][:clock]
151
+ if @commits[Thread.current][:clock] >= @start_clock
152
+ @bytes_per_second -= @commits[Thread.current][:bytesize]
153
+ end
154
+ @commits[Thread.current] = nil
155
+ end
156
+ end
157
+ end
158
+
102
159
  def initialize
103
160
  super
104
161
 
105
162
  @producers = nil
106
163
  @producers_mutex = nil
107
164
  @shared_producer = nil
165
+ @enqueue_rate = nil
166
+ @writing_threads_mutex = Mutex.new
167
+ @writing_threads = Set.new
108
168
  end
109
169
 
110
170
  def configure(conf)
111
171
  super
112
172
  log.instance_eval {
113
- def add(level, &block)
114
- return unless block
173
+ def add(level, message = nil)
174
+ if message.nil?
175
+ if block_given?
176
+ message = yield
177
+ else
178
+ return
179
+ end
180
+ end
115
181
 
116
182
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
117
183
  case level
118
184
  when Logger::FATAL
119
- self.fatal(block.call)
185
+ self.fatal(message)
120
186
  when Logger::ERROR
121
- self.error(block.call)
187
+ self.error(message)
122
188
  when Logger::WARN
123
- self.warn(block.call)
189
+ self.warn(message)
124
190
  when Logger::INFO
125
- self.info(block.call)
191
+ self.info(message)
126
192
  when Logger::DEBUG
127
- self.debug(block.call)
193
+ self.debug(message)
128
194
  else
129
- self.trace(block.call)
195
+ self.trace(message)
130
196
  end
131
197
  end
132
198
  }
@@ -158,6 +224,12 @@ DESC
158
224
  @headers_from_record.each do |key, value|
159
225
  @headers_from_record_accessors[key] = record_accessor_create(value)
160
226
  end
227
+
228
+ @exclude_field_accessors = @exclude_fields.map do |field|
229
+ record_accessor_create(field)
230
+ end
231
+
232
+ @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
161
233
  end
162
234
 
163
235
  def build_config
@@ -221,8 +293,19 @@ DESC
221
293
  true
222
294
  end
223
295
 
296
+ def wait_writing_threads
297
+ done = false
298
+ until done do
299
+ @writing_threads_mutex.synchronize do
300
+ done = true if @writing_threads.empty?
301
+ end
302
+ sleep(1) unless done
303
+ end
304
+ end
305
+
224
306
  def shutdown
225
307
  super
308
+ wait_writing_threads
226
309
  shutdown_producers
227
310
  end
228
311
 
@@ -279,6 +362,7 @@ DESC
279
362
  end
280
363
 
281
364
  def write(chunk)
365
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
282
366
  tag = chunk.metadata.tag
283
367
  topic = if @topic
284
368
  extract_placeholders(@topic, chunk)
@@ -305,10 +389,17 @@ DESC
305
389
  headers[key] = header_accessor.call(record)
306
390
  end
307
391
 
392
+ unless @exclude_fields.empty?
393
+ @exclude_field_accessors.each do |exclude_field_acessor|
394
+ exclude_field_acessor.delete(record)
395
+ end
396
+ end
397
+
308
398
  record_buf = @formatter_proc.call(tag, time, record)
309
399
  record_buf_bytes = record_buf.bytesize
310
400
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
311
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
401
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
402
+ log.debug "Skipped event:", :record => record
312
403
  next
313
404
  end
314
405
  rescue StandardError => e
@@ -316,7 +407,7 @@ DESC
316
407
  next
317
408
  end
318
409
 
319
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
410
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
320
411
  if @rdkafka_delivery_handle_poll_timeout != 0
321
412
  handlers << handler
322
413
  end
@@ -333,14 +424,22 @@ DESC
333
424
  # Raise exception to retry sendind messages
334
425
  raise e
335
426
  end
427
+ ensure
428
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
336
429
  end
337
430
 
338
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
431
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
339
432
  attempt = 0
340
433
  loop do
341
434
  begin
342
- return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers)
435
+ @enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
436
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
437
+ rescue EnqueueRate::LimitExceeded => e
438
+ @enqueue_rate.revert if @enqueue_rate
439
+ duration = e.next_retry_clock - Fluent::Clock.now
440
+ sleep(duration) if duration > 0.0
343
441
  rescue Exception => e
442
+ @enqueue_rate.revert if @enqueue_rate
344
443
  if e.respond_to?(:code) && e.code == :queue_full
345
444
  if attempt <= @max_enqueue_retries
346
445
  log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
@@ -14,6 +14,7 @@ class KafkaGroupInputTest < Test::Unit::TestCase
14
14
  brokers localhost:9092
15
15
  consumer_group fluentd
16
16
  format text
17
+ refresh_topic_interval 0
17
18
  @label @kafka
18
19
  topics #{TOPIC_NAME}
19
20
  ]
@@ -52,6 +53,7 @@ class KafkaGroupInputTest < Test::Unit::TestCase
52
53
  brokers localhost:9092
53
54
  format text
54
55
  @label @kafka
56
+ refresh_topic_interval 0
55
57
  topics #{TOPIC_NAME}
56
58
  ]
57
59
  d = create_driver
@@ -1,6 +1,8 @@
1
1
  require 'helper'
2
2
  require 'fluent/test/helpers'
3
- require 'fluent/output'
3
+ require 'fluent/test/driver/input'
4
+ require 'fluent/test/driver/output'
5
+ require 'securerandom'
4
6
 
5
7
  class Kafka2OutputTest < Test::Unit::TestCase
6
8
  include Fluent::Test::Helpers
@@ -15,8 +17,8 @@ class Kafka2OutputTest < Test::Unit::TestCase
15
17
  ])
16
18
  end
17
19
 
18
- def config
19
- base_config + config_element('ROOT', '', {"default_topic" => "kitagawakeiko",
20
+ def config(default_topic: "kitagawakeiko")
21
+ base_config + config_element('ROOT', '', {"default_topic" => default_topic,
20
22
  "brokers" => "localhost:9092"}, [
21
23
  ])
22
24
  end
@@ -57,4 +59,58 @@ class Kafka2OutputTest < Test::Unit::TestCase
57
59
  d = create_driver
58
60
  assert_equal true, d.instance.multi_workers_ready?
59
61
  end
62
+
63
+ class WriteTest < self
64
+ TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
65
+
66
+ INPUT_CONFIG = %[
67
+ @type kafka
68
+ brokers localhost:9092
69
+ format json
70
+ @label @kafka
71
+ topics #{TOPIC_NAME}
72
+ ]
73
+
74
+ def create_target_driver(conf = INPUT_CONFIG)
75
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
76
+ end
77
+
78
+ def setup
79
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
80
+ end
81
+
82
+ def teardown
83
+ @kafka.delete_topic(TOPIC_NAME)
84
+ @kafka.close
85
+ end
86
+
87
+ def test_write
88
+ target_driver = create_target_driver
89
+ expected_message = {"a" => 2}
90
+ target_driver.run(expect_records: 1, timeout: 5) do
91
+ sleep 2
92
+ d = create_driver(config(default_topic: TOPIC_NAME))
93
+ d.run do
94
+ d.feed("test", event_time, expected_message)
95
+ end
96
+ end
97
+ actual_messages = target_driver.events.collect { |event| event[2] }
98
+ assert_equal([expected_message], actual_messages)
99
+ end
100
+
101
+ def test_exclude_fields
102
+ conf = config(default_topic: TOPIC_NAME) +
103
+ config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
104
+ target_driver = create_target_driver
105
+ target_driver.run(expect_records: 1, timeout: 5) do
106
+ sleep 2
107
+ d = create_driver(conf)
108
+ d.run do
109
+ d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
110
+ end
111
+ end
112
+ actual_messages = target_driver.events.collect { |event| event[2] }
113
+ assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
114
+ end
115
+ end
60
116
  end
@@ -0,0 +1,167 @@
1
+ require 'helper'
2
+ require 'fluent/test/helpers'
3
+ require 'fluent/test/driver/input'
4
+ require 'fluent/test/driver/output'
5
+ require 'securerandom'
6
+
7
+ class Rdkafka2OutputTest < Test::Unit::TestCase
8
+ include Fluent::Test::Helpers
9
+
10
+ def have_rdkafka
11
+ begin
12
+ require 'fluent/plugin/out_rdkafka2'
13
+ true
14
+ rescue LoadError
15
+ false
16
+ end
17
+ end
18
+
19
+ def setup
20
+ omit_unless(have_rdkafka, "rdkafka isn't installed")
21
+ Fluent::Test.setup
22
+ end
23
+
24
+ def base_config
25
+ config_element('ROOT', '', {"@type" => "rdkafka2"}, [
26
+ config_element('format', "", {"@type" => "json"})
27
+ ])
28
+ end
29
+
30
+ def config(default_topic: "kitagawakeiko")
31
+ base_config + config_element('ROOT', '', {"default_topic" => default_topic,
32
+ "brokers" => "localhost:9092"}, [
33
+ ])
34
+ end
35
+
36
+ def create_driver(conf = config, tag='test')
37
+ Fluent::Test::Driver::Output.new(Fluent::Rdkafka2Output).configure(conf)
38
+ end
39
+
40
+ def test_configure
41
+ assert_nothing_raised(Fluent::ConfigError) {
42
+ create_driver(base_config)
43
+ }
44
+
45
+ assert_nothing_raised(Fluent::ConfigError) {
46
+ create_driver(config)
47
+ }
48
+
49
+ assert_nothing_raised(Fluent::ConfigError) {
50
+ create_driver(config + config_element('buffer', "", {"@type" => "memory"}))
51
+ }
52
+
53
+ d = create_driver
54
+ assert_equal 'kitagawakeiko', d.instance.default_topic
55
+ assert_equal 'localhost:9092', d.instance.brokers
56
+ end
57
+
58
+ def test_mutli_worker_support
59
+ d = create_driver
60
+ assert_equal true, d.instance.multi_workers_ready?
61
+ end
62
+
63
+ class WriteTest < self
64
+ TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
65
+
66
+ INPUT_CONFIG = %[
67
+ @type kafka
68
+ brokers localhost:9092
69
+ format json
70
+ @label @kafka
71
+ topics #{TOPIC_NAME}
72
+ ]
73
+
74
+ def create_target_driver(conf = INPUT_CONFIG)
75
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
76
+ end
77
+
78
+ def setup
79
+ @kafka = nil
80
+ omit_unless(have_rdkafka, "rdkafka isn't installed")
81
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
82
+ end
83
+
84
+ def teardown
85
+ if @kafka
86
+ @kafka.delete_topic(TOPIC_NAME)
87
+ @kafka.close
88
+ end
89
+ end
90
+
91
+ def test_write
92
+ target_driver = create_target_driver
93
+ expected_message = {"a" => 2}
94
+ target_driver.run(expect_records: 1, timeout: 5) do
95
+ sleep 2
96
+ d = create_driver(config(default_topic: TOPIC_NAME))
97
+ d.run do
98
+ d.feed("test", event_time, expected_message)
99
+ end
100
+ end
101
+ actual_messages = target_driver.events.collect { |event| event[2] }
102
+ assert_equal([expected_message], actual_messages)
103
+ end
104
+
105
+ def test_write_with_use_event_time
106
+ input_config = %[
107
+ @type kafka
108
+ brokers localhost:9092
109
+ format json
110
+ @label @kafka
111
+ topics #{TOPIC_NAME}
112
+ time_source kafka
113
+ ]
114
+ target_driver = create_target_driver(input_config)
115
+ expected_message = {"a" => 2}
116
+ now = event_time
117
+ target_driver.run(expect_records: 1, timeout: 5) do
118
+ sleep 2
119
+ d = create_driver(config(default_topic: TOPIC_NAME) + config_element('ROOT', '', {"use_event_time" => true}))
120
+ d.run do
121
+ d.feed("test", now, expected_message)
122
+ end
123
+ end
124
+ actual_time = target_driver.events.collect { |event| event[1] }.last
125
+ assert_in_delta(actual_time, now, 0.001) # expects millseconds precision
126
+ actual_messages = target_driver.events.collect { |event| event[2] }
127
+ assert_equal([expected_message], actual_messages)
128
+ end
129
+
130
+ def test_exclude_fields
131
+ conf = config(default_topic: TOPIC_NAME) +
132
+ config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
133
+ target_driver = create_target_driver
134
+ target_driver.run(expect_records: 1, timeout: 5) do
135
+ sleep 2
136
+ d = create_driver(conf)
137
+ d.run do
138
+ d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
139
+ end
140
+ end
141
+ actual_messages = target_driver.events.collect { |event| event[2] }
142
+ assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
143
+ end
144
+
145
+ def test_max_enqueue_bytes_per_second
146
+ conf = config(default_topic: TOPIC_NAME) +
147
+ config_element('ROOT', '', {"max_enqueue_bytes_per_second" => 32 * 3}, [])
148
+ target_driver = create_target_driver
149
+ expected_messages = []
150
+ target_driver.run(expect_records: 9, timeout: 10) do
151
+ sleep 2
152
+ d = create_driver(conf)
153
+ start_time = Fluent::Clock.now
154
+ d.run do
155
+ 9.times do |i|
156
+ message = {"message" => "32bytes message: #{i}"}
157
+ d.feed("test", event_time, message)
158
+ expected_messages << message
159
+ end
160
+ end
161
+ assert_in_delta(2.0, Fluent::Clock.now - start_time, 0.5)
162
+ end
163
+ actual_messages = target_driver.events.collect { |event| event[2] }
164
+ assert_equal(expected_messages, actual_messages)
165
+ end
166
+ end
167
+ end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.17.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
8
8
  - Masahiro Nakagawa
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-08-30 00:00:00.000000000 Z
12
+ date: 2022-01-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -173,11 +173,12 @@ files:
173
173
  - test/plugin/test_out_kafka.rb
174
174
  - test/plugin/test_out_kafka2.rb
175
175
  - test/plugin/test_out_kafka_buffered.rb
176
+ - test/plugin/test_out_rdkafka2.rb
176
177
  homepage: https://github.com/fluent/fluent-plugin-kafka
177
178
  licenses:
178
179
  - Apache-2.0
179
180
  metadata: {}
180
- post_install_message:
181
+ post_install_message:
181
182
  rdoc_options: []
182
183
  require_paths:
183
184
  - lib
@@ -192,8 +193,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
192
193
  - !ruby/object:Gem::Version
193
194
  version: '0'
194
195
  requirements: []
195
- rubygems_version: 3.1.4
196
- signing_key:
196
+ rubygems_version: 3.2.5
197
+ signing_key:
197
198
  specification_version: 4
198
199
  summary: Fluentd plugin for Apache Kafka > 0.8
199
200
  test_files:
@@ -204,3 +205,4 @@ test_files:
204
205
  - test/plugin/test_out_kafka.rb
205
206
  - test/plugin/test_out_kafka2.rb
206
207
  - test/plugin/test_out_kafka_buffered.rb
208
+ - test/plugin/test_out_rdkafka2.rb