fluent-plugin-kafka 0.17.0 → 0.17.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15805411e4029813123b9b636b6faadb937cf38c9841adcd9d998a0f54d8b687
4
- data.tar.gz: 75fef11595c86beb4a54d2d2ff659f77075e328426463f7ef830982c9724ff16
3
+ metadata.gz: 5cc122034295e37318cd7510ef3347eeda14cc43b8c0132053cb944d68141feb
4
+ data.tar.gz: e725b07eaa95f639b2122f1a4c8342101314f2f721e3625c73889dc8caf9aead
5
5
  SHA512:
6
- metadata.gz: db08ffbde4fe36ce38abe6eebf83e6e0dc157bc5d8dd95141ef911d0c5b59074a208c3a8f836e258ffddc34f73fda303006eaef1cd8fc12ae4dd4c79d101d0c7
7
- data.tar.gz: 9b3b098b1bc58654924d50ac8d685af007763942c1cc68187bbd6d39510a27ca31ae795df0de312d97ad67463ae0c428882118640fce052119288e27e3fb0df5
6
+ metadata.gz: 2e432e7f2670132022b18fa9460b8eda69a18a4dd3a35aa775619c6a45ff8cb6ea5bad869ebc5cefe804b9bb4261ab12150cb77ad10af62dc2e54fd6de435aec
7
+ data.tar.gz: edbebd57c325292d197d342ff8f5151aa1fcfbd47128fc09d1b71e2bf4d7ccf196d54b48df106f1b9f655fe334ab1f9fe907fce8b4f1b1d20edd9c8254c6c8cd
@@ -5,6 +5,8 @@ on:
5
5
  jobs:
6
6
  build:
7
7
  runs-on: ${{ matrix.os }}
8
+ env:
9
+ USE_RDKAFKA: 1
8
10
  strategy:
9
11
  fail-fast: false
10
12
  matrix:
data/ChangeLog CHANGED
@@ -1,3 +1,19 @@
1
+ Release 0.17.4 - 2022/01/25
2
+ * in_kafka_group: Add `refresh_topic_interval` parameter
3
+
4
+ Release 0.17.3 - 2021/11/26
5
+ * output: Suppress large warning logs for events skipped by `max_send_limit_bytes`
6
+
7
+ Release 0.17.2 - 2021/10/14
8
+ * out_rdkafka2: Add `max_enqueue_bytes_per_second` parameter
9
+ * out_rdkafka2: Support `use_event_time` parameter
10
+ * out_rdkafka2: Fix a potential bug that the plugin might exit without receiving responses from Kafka.
11
+
12
+ Release 0.17.1 - 2021/09/24
13
+ * out_rdkafka/out_rdkafka2: Support rdkafka 0.9.0 or later
14
+ * out_rdkafka/out_rdkafka2: Add `exclude_fields` parameter
15
+ * out_kafka2.rb: Fix one more Ruby 3.0 keyword arguments issue
16
+
1
17
  Release 0.17.0 - 2021/08/30
2
18
  * out_kafka/out_kafka_buffered/out_kafka2: Provide murmur2 partitioner hash function choice
3
19
  * in_kafka/in_kafka_group/out_kafka/out_kafka_buffered/out_kafka2: Use Ruby Kafka's ssl_ca_cert_file_path parameter to feed the CA certs
data/Gemfile CHANGED
@@ -2,3 +2,5 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in fluent-plugin-kafka.gemspec
4
4
  gemspec
5
+
6
+ gem 'rdkafka', '>= 0.6.0' if ENV["USE_RDKAFKA"]
data/README.md CHANGED
@@ -40,14 +40,14 @@ If you want to use zookeeper related parameters, you also need to install zookee
40
40
 
41
41
  Set path to SSL related files. See [Encryption and Authentication using SSL](https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl) for more detail.
42
42
 
43
- #### SASL authentication
43
+ #### SASL authentication
44
44
 
45
45
  ##### with GSSAPI
46
46
 
47
47
  - principal
48
48
  - keytab
49
49
 
50
- Set principal and path to keytab for SASL/GSSAPI authentication.
50
+ Set principal and path to keytab for SASL/GSSAPI authentication.
51
51
  See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
52
52
 
53
53
  ##### with Plain/SCRAM
@@ -57,7 +57,7 @@ See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentica
57
57
  - scram_mechanism
58
58
  - sasl_over_ssl
59
59
 
60
- Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
60
+ Set username, password, scram_mechanism and sasl_over_ssl for SASL/Plain or Scram authentication.
61
61
  See [Authentication using SASL](https://github.com/zendesk/ruby-kafka#authentication-using-sasl) for more details.
62
62
 
63
63
  ### Input plugin (@type 'kafka')
@@ -119,7 +119,7 @@ Consume events by kafka consumer group features..
119
119
  topics <listening topics(separate with comma',')>
120
120
  format <input text type (text|json|ltsv|msgpack)> :default => json
121
121
  message_key <key (Optional, for text format only, default is message)>
122
- kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
122
+ kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
123
123
  add_headers <If true, add kafka's message headers to record>
124
124
  add_prefix <tag prefix (Optional)>
125
125
  add_suffix <tag suffix (Optional)>
@@ -135,6 +135,7 @@ Consume events by kafka consumer group features..
135
135
  offset_commit_interval (integer) :default => nil (Use default of ruby-kafka)
136
136
  offset_commit_threshold (integer) :default => nil (Use default of ruby-kafka)
137
137
  fetcher_max_queue_size (integer) :default => nil (Use default of ruby-kafka)
138
+ refresh_topic_interval (integer) :default => nil (Use default of ruby-kafka)
138
139
  start_from_beginning (bool) :default => true
139
140
  </source>
140
141
 
@@ -155,7 +156,7 @@ With the introduction of the rdkafka-ruby based input plugin we hope to support
155
156
  topics <listening topics(separate with comma',')>
156
157
  format <input text type (text|json|ltsv|msgpack)> :default => json
157
158
  message_key <key (Optional, for text format only, default is message)>
158
- kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
159
+ kafka_message_key <key (Optional, If specified, set kafka's message key to this key)>
159
160
  add_headers <If true, add kafka's message headers to record>
160
161
  add_prefix <tag prefix (Optional)>
161
162
  add_suffix <tag suffix (Optional)>
@@ -200,6 +201,7 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
200
201
  get_kafka_client_log (bool) :default => false
201
202
  headers (hash) :default => {}
202
203
  headers_from_record (hash) :default => {}
204
+ use_event_time (bool) :default => false
203
205
  use_default_for_unknown_topic (bool) :default => false
204
206
  discard_kafka_delivery_failed (bool) :default => false (No discard)
205
207
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
@@ -316,6 +318,23 @@ The Kafka message will have a header of source_ip=12.7.0.0.1.
316
318
 
317
319
  The configuration format is jsonpath. It is descibed in https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor
318
320
 
321
+ #### Excluding fields
322
+ Fields can be excluded from output data. Only works for kafka2 and rdkafka2 output plugin.
323
+
324
+ Fields must be specified using an array of dot notation `$.`, for example:
325
+
326
+ <match app.**>
327
+ @type kafka2
328
+ [...]
329
+ exclude_fields $.source.ip,$.HTTP_FOO
330
+ <match>
331
+
332
+ This config can be used to remove fields used on another configs.
333
+
334
+ For example, `$.source.ip` can be extracted with config `headers_from_record` and excluded from message payload.
335
+
336
+ > Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
337
+
319
338
  ### Buffered output plugin
320
339
 
321
340
  This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
@@ -346,6 +365,7 @@ Support of fluentd v0.12 has ended. `kafka_buffered` will be an alias of `kafka2
346
365
  exclude_topic_key (bool) :default => false
347
366
  exclude_partition_key (bool) :default => false
348
367
  get_kafka_client_log (bool) :default => false
368
+ use_event_time (bool) :default => false
349
369
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
350
370
 
351
371
  # See fluentd document for buffer related parameters: https://docs.fluentd.org/v/0.12/buffer
@@ -435,6 +455,7 @@ You need to install rdkafka gem.
435
455
  exclude_topic_key (bool) :default => false
436
456
  exclude_partition_key (bool) :default => false
437
457
  discard_kafka_delivery_failed (bool) :default => false (No discard)
458
+ use_event_time (bool) :default => false
438
459
 
439
460
  # same with kafka2
440
461
  headers (hash) :default => {}
@@ -469,6 +490,10 @@ You need to install rdkafka gem.
469
490
  rdkafka_delivery_handle_poll_timeout (integer) :default => 30
470
491
  # If the record size is larger than this value, such records are ignored. Default is no limit
471
492
  max_send_limit_bytes (integer) :default => nil
493
+ # The maximum number of enqueueing bytes per second. It can reduce the
494
+ # load of both Fluentd and Kafka when excessive messages are attempted
495
+ # to send. Default is no limit.
496
+ max_enqueue_bytes_per_second (integer) :default => nil
472
497
  </match>
473
498
 
474
499
  If you use v0.12, use `rdkafka` instead.
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.17.0'
16
+ gem.version = '0.17.4'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -71,6 +71,7 @@ class Fluent::KafkaInput < Fluent::Input
71
71
  require 'kafka'
72
72
 
73
73
  @time_parser = nil
74
+ @zookeeper = nil
74
75
  end
75
76
 
76
77
  def configure(conf)
@@ -67,6 +67,8 @@ class Fluent::KafkaGroupInput < Fluent::Input
67
67
  :desc => "The number of messages that can be processed before their offsets are committed"
68
68
  config_param :fetcher_max_queue_size, :integer, :default => nil,
69
69
  :desc => "The number of fetched messages per partition that are queued in fetcher queue"
70
+ config_param :refresh_topic_interval, :integer, :default => nil,
71
+ :desc => "The interval of refreshing the topic list in seconds. Zero or unset disables this"
70
72
  config_param :start_from_beginning, :bool, :default => true,
71
73
  :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
72
74
 
@@ -128,6 +130,7 @@ class Fluent::KafkaGroupInput < Fluent::Input
128
130
  @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
129
131
  @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
130
132
  @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
133
+ @consumer_opts[:refresh_topic_interval] = @refresh_topic_interval if @refresh_topic_interval
131
134
 
132
135
  @fetch_opts = {}
133
136
  @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
@@ -88,6 +88,7 @@ DESC
88
88
  require 'kafka'
89
89
 
90
90
  @kafka = nil
91
+ @field_separator = nil
91
92
  end
92
93
 
93
94
  def refresh_client
@@ -239,7 +240,8 @@ DESC
239
240
  record_buf = @formatter_proc.call(tag, time, record)
240
241
  record_buf_bytes = record_buf.bytesize
241
242
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
242
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
243
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
244
+ log.debug "Skipped event:", :record => record
243
245
  next
244
246
  end
245
247
  log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
@@ -42,6 +42,8 @@ DESC
42
42
  :desc => 'Set true to remove message key from data'
43
43
  config_param :exclude_topic_key, :bool, :default => false,
44
44
  :desc => 'Set true to remove topic name key from data'
45
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
46
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
45
47
  config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for kafka create_time'
46
48
  config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
47
49
  :desc => 'Kafka message headers'
@@ -177,6 +179,10 @@ DESC
177
179
  @headers_from_record.each do |key, value|
178
180
  @headers_from_record_accessors[key] = record_accessor_create(value)
179
181
  end
182
+
183
+ @exclude_field_accessors = @exclude_fields.map do |field|
184
+ record_accessor_create(field)
185
+ end
180
186
  end
181
187
 
182
188
  def multi_workers_ready?
@@ -235,7 +241,7 @@ DESC
235
241
  mutate_headers = !@headers_from_record_accessors.empty?
236
242
 
237
243
  begin
238
- producer = @kafka.topic_producer(topic, @producer_opts)
244
+ producer = @kafka.topic_producer(topic, **@producer_opts)
239
245
  chunk.msgpack_each { |time, record|
240
246
  begin
241
247
  record = inject_values_to_record(tag, time, record)
@@ -253,10 +259,17 @@ DESC
253
259
  headers = base_headers
254
260
  end
255
261
 
262
+ unless @exclude_fields.empty?
263
+ @exclude_field_accessors.each do |exclude_field_accessor|
264
+ exclude_field_accessor.delete(record)
265
+ end
266
+ end
267
+
256
268
  record_buf = @formatter_proc.call(tag, time, record)
257
269
  record_buf_bytes = record_buf.bytesize
258
270
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
259
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
271
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
272
+ log.debug "Skipped event:", :record => record
260
273
  next
261
274
  end
262
275
  rescue StandardError => e
@@ -107,6 +107,7 @@ DESC
107
107
  @kafka = nil
108
108
  @producers = {}
109
109
  @producers_mutex = Mutex.new
110
+ @field_separator = nil
110
111
  end
111
112
 
112
113
  def multi_workers_ready?
@@ -331,7 +332,8 @@ DESC
331
332
  record_buf = @formatter_proc.call(tag, time, record)
332
333
  record_buf_bytes = record_buf.bytesize
333
334
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
334
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
335
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
336
+ log.debug "Skipped event:", :record => record
335
337
  next
336
338
  end
337
339
  rescue StandardError => e
@@ -65,6 +65,7 @@ DESC
65
65
  The codec the producer uses to compress messages.
66
66
  Supported codecs: (gzip|snappy)
67
67
  DESC
68
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
68
69
  config_param :max_send_limit_bytes, :size, :default => nil
69
70
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil
70
71
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil
@@ -91,23 +92,29 @@ DESC
91
92
  def configure(conf)
92
93
  super
93
94
  log.instance_eval {
94
- def add(level, &block)
95
- return unless block
95
+ def add(level, message = nil)
96
+ if message.nil?
97
+ if block_given?
98
+ message = yield
99
+ else
100
+ return
101
+ end
102
+ end
96
103
 
97
104
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
98
105
  case level
99
106
  when Logger::FATAL
100
- self.fatal(block.call)
107
+ self.fatal(message)
101
108
  when Logger::ERROR
102
- self.error(block.call)
109
+ self.error(message)
103
110
  when Logger::WARN
104
- self.warn(block.call)
111
+ self.warn(message)
105
112
  when Logger::INFO
106
- self.info(block.call)
113
+ self.info(message)
107
114
  when Logger::DEBUG
108
- self.debug(block.call)
115
+ self.debug(message)
109
116
  else
110
- self.trace(block.call)
117
+ self.trace(message)
111
118
  end
112
119
  end
113
120
  }
@@ -271,7 +278,8 @@ DESC
271
278
  record_buf = @formatter_proc.call(tag, time, record)
272
279
  record_buf_bytes = record_buf.bytesize
273
280
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
274
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
281
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
282
+ log.debug "Skipped event:", :record => record
275
283
  next
276
284
  end
277
285
  rescue StandardError => e
@@ -280,7 +288,7 @@ DESC
280
288
  end
281
289
 
282
290
  producer = get_producer
283
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
291
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
284
292
  handler
285
293
  }.each { |handler|
286
294
  handler.wait(max_wait_timeout: @rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
@@ -292,11 +300,11 @@ DESC
292
300
  raise e
293
301
  end
294
302
 
295
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
303
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, time)
296
304
  attempt = 0
297
305
  loop do
298
306
  begin
299
- handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
307
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, timestamp: @use_event_time ? Time.at(time) : nil)
300
308
  return handler
301
309
  rescue Exception => e
302
310
  if e.respond_to?(:code) && e.code == :queue_full
@@ -56,6 +56,8 @@ DESC
56
56
  :desc => <<-DESC
57
57
  Set true to remove topic key from data
58
58
  DESC
59
+ config_param :exclude_fields, :array, :default => [], value_type: :string,
60
+ :desc => 'Fields to remove from data where the value is a jsonpath to a record value'
59
61
  config_param :headers, :hash, default: {}, symbolize_keys: true, value_type: :string,
60
62
  :desc => 'Kafka message headers'
61
63
  config_param :headers_from_record, :hash, default: {}, symbolize_keys: true, value_type: :string,
@@ -72,6 +74,7 @@ DESC
72
74
  The codec the producer uses to compress messages. Used for compression.codec
73
75
  Supported codecs: (gzip|snappy)
74
76
  DESC
77
+ config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
75
78
  config_param :max_send_limit_bytes, :size, :default => nil
76
79
  config_param :discard_kafka_delivery_failed, :bool, :default => false
77
80
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
@@ -84,6 +87,7 @@ DESC
84
87
 
85
88
  config_param :max_enqueue_retries, :integer, :default => 3
86
89
  config_param :enqueue_retry_backoff, :integer, :default => 3
90
+ config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
87
91
 
88
92
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
89
93
  config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
@@ -99,34 +103,96 @@ DESC
99
103
  include Fluent::KafkaPluginUtil::SSLSettings
100
104
  include Fluent::KafkaPluginUtil::SaslSettings
101
105
 
106
+ class EnqueueRate
107
+ class LimitExceeded < StandardError
108
+ attr_reader :next_retry_clock
109
+ def initialize(next_retry_clock)
110
+ @next_retry_clock = next_retry_clock
111
+ end
112
+ end
113
+
114
+ def initialize(limit_bytes_per_second)
115
+ @mutex = Mutex.new
116
+ @start_clock = Fluent::Clock.now
117
+ @bytes_per_second = 0
118
+ @limit_bytes_per_second = limit_bytes_per_second
119
+ @commits = {}
120
+ end
121
+
122
+ def raise_if_limit_exceeded(bytes_to_enqueue)
123
+ return if @limit_bytes_per_second.nil?
124
+
125
+ @mutex.synchronize do
126
+ @commits[Thread.current] = {
127
+ clock: Fluent::Clock.now,
128
+ bytesize: bytes_to_enqueue,
129
+ }
130
+
131
+ @bytes_per_second += @commits[Thread.current][:bytesize]
132
+ duration = @commits[Thread.current][:clock] - @start_clock
133
+
134
+ if duration < 1.0
135
+ if @bytes_per_second > @limit_bytes_per_second
136
+ raise LimitExceeded.new(@start_clock + 1.0)
137
+ end
138
+ else
139
+ @start_clock = @commits[Thread.current][:clock]
140
+ @bytes_per_second = @commits[Thread.current][:bytesize]
141
+ end
142
+ end
143
+ end
144
+
145
+ def revert
146
+ return if @limit_bytes_per_second.nil?
147
+
148
+ @mutex.synchronize do
149
+ return unless @commits[Thread.current]
150
+ return unless @commits[Thread.current][:clock]
151
+ if @commits[Thread.current][:clock] >= @start_clock
152
+ @bytes_per_second -= @commits[Thread.current][:bytesize]
153
+ end
154
+ @commits[Thread.current] = nil
155
+ end
156
+ end
157
+ end
158
+
102
159
  def initialize
103
160
  super
104
161
 
105
162
  @producers = nil
106
163
  @producers_mutex = nil
107
164
  @shared_producer = nil
165
+ @enqueue_rate = nil
166
+ @writing_threads_mutex = Mutex.new
167
+ @writing_threads = Set.new
108
168
  end
109
169
 
110
170
  def configure(conf)
111
171
  super
112
172
  log.instance_eval {
113
- def add(level, &block)
114
- return unless block
173
+ def add(level, message = nil)
174
+ if message.nil?
175
+ if block_given?
176
+ message = yield
177
+ else
178
+ return
179
+ end
180
+ end
115
181
 
116
182
  # Follow rdkakfa's log level. See also rdkafka-ruby's bindings.rb: https://github.com/appsignal/rdkafka-ruby/blob/e5c7261e3f2637554a5c12b924be297d7dca1328/lib/rdkafka/bindings.rb#L117
117
183
  case level
118
184
  when Logger::FATAL
119
- self.fatal(block.call)
185
+ self.fatal(message)
120
186
  when Logger::ERROR
121
- self.error(block.call)
187
+ self.error(message)
122
188
  when Logger::WARN
123
- self.warn(block.call)
189
+ self.warn(message)
124
190
  when Logger::INFO
125
- self.info(block.call)
191
+ self.info(message)
126
192
  when Logger::DEBUG
127
- self.debug(block.call)
193
+ self.debug(message)
128
194
  else
129
- self.trace(block.call)
195
+ self.trace(message)
130
196
  end
131
197
  end
132
198
  }
@@ -158,6 +224,12 @@ DESC
158
224
  @headers_from_record.each do |key, value|
159
225
  @headers_from_record_accessors[key] = record_accessor_create(value)
160
226
  end
227
+
228
+ @exclude_field_accessors = @exclude_fields.map do |field|
229
+ record_accessor_create(field)
230
+ end
231
+
232
+ @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
161
233
  end
162
234
 
163
235
  def build_config
@@ -221,8 +293,19 @@ DESC
221
293
  true
222
294
  end
223
295
 
296
+ def wait_writing_threads
297
+ done = false
298
+ until done do
299
+ @writing_threads_mutex.synchronize do
300
+ done = true if @writing_threads.empty?
301
+ end
302
+ sleep(1) unless done
303
+ end
304
+ end
305
+
224
306
  def shutdown
225
307
  super
308
+ wait_writing_threads
226
309
  shutdown_producers
227
310
  end
228
311
 
@@ -279,6 +362,7 @@ DESC
279
362
  end
280
363
 
281
364
  def write(chunk)
365
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
282
366
  tag = chunk.metadata.tag
283
367
  topic = if @topic
284
368
  extract_placeholders(@topic, chunk)
@@ -305,10 +389,17 @@ DESC
305
389
  headers[key] = header_accessor.call(record)
306
390
  end
307
391
 
392
+ unless @exclude_fields.empty?
393
+ @exclude_field_accessors.each do |exclude_field_acessor|
394
+ exclude_field_acessor.delete(record)
395
+ end
396
+ end
397
+
308
398
  record_buf = @formatter_proc.call(tag, time, record)
309
399
  record_buf_bytes = record_buf.bytesize
310
400
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
311
- log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
401
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
402
+ log.debug "Skipped event:", :record => record
312
403
  next
313
404
  end
314
405
  rescue StandardError => e
@@ -316,7 +407,7 @@ DESC
316
407
  next
317
408
  end
318
409
 
319
- handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
410
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
320
411
  if @rdkafka_delivery_handle_poll_timeout != 0
321
412
  handlers << handler
322
413
  end
@@ -333,14 +424,22 @@ DESC
333
424
  # Raise exception to retry sendind messages
334
425
  raise e
335
426
  end
427
+ ensure
428
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
336
429
  end
337
430
 
338
- def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
431
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers, time)
339
432
  attempt = 0
340
433
  loop do
341
434
  begin
342
- return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers)
435
+ @enqueue_rate.raise_if_limit_exceeded(record_buf.bytesize) if @enqueue_rate
436
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition, headers: headers, timestamp: @use_event_time ? Time.at(time) : nil)
437
+ rescue EnqueueRate::LimitExceeded => e
438
+ @enqueue_rate.revert if @enqueue_rate
439
+ duration = e.next_retry_clock - Fluent::Clock.now
440
+ sleep(duration) if duration > 0.0
343
441
  rescue Exception => e
442
+ @enqueue_rate.revert if @enqueue_rate
344
443
  if e.respond_to?(:code) && e.code == :queue_full
345
444
  if attempt <= @max_enqueue_retries
346
445
  log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
@@ -14,6 +14,7 @@ class KafkaGroupInputTest < Test::Unit::TestCase
14
14
  brokers localhost:9092
15
15
  consumer_group fluentd
16
16
  format text
17
+ refresh_topic_interval 0
17
18
  @label @kafka
18
19
  topics #{TOPIC_NAME}
19
20
  ]
@@ -52,6 +53,7 @@ class KafkaGroupInputTest < Test::Unit::TestCase
52
53
  brokers localhost:9092
53
54
  format text
54
55
  @label @kafka
56
+ refresh_topic_interval 0
55
57
  topics #{TOPIC_NAME}
56
58
  ]
57
59
  d = create_driver
@@ -1,6 +1,8 @@
1
1
  require 'helper'
2
2
  require 'fluent/test/helpers'
3
- require 'fluent/output'
3
+ require 'fluent/test/driver/input'
4
+ require 'fluent/test/driver/output'
5
+ require 'securerandom'
4
6
 
5
7
  class Kafka2OutputTest < Test::Unit::TestCase
6
8
  include Fluent::Test::Helpers
@@ -15,8 +17,8 @@ class Kafka2OutputTest < Test::Unit::TestCase
15
17
  ])
16
18
  end
17
19
 
18
- def config
19
- base_config + config_element('ROOT', '', {"default_topic" => "kitagawakeiko",
20
+ def config(default_topic: "kitagawakeiko")
21
+ base_config + config_element('ROOT', '', {"default_topic" => default_topic,
20
22
  "brokers" => "localhost:9092"}, [
21
23
  ])
22
24
  end
@@ -57,4 +59,58 @@ class Kafka2OutputTest < Test::Unit::TestCase
57
59
  d = create_driver
58
60
  assert_equal true, d.instance.multi_workers_ready?
59
61
  end
62
+
63
+ class WriteTest < self
64
+ TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
65
+
66
+ INPUT_CONFIG = %[
67
+ @type kafka
68
+ brokers localhost:9092
69
+ format json
70
+ @label @kafka
71
+ topics #{TOPIC_NAME}
72
+ ]
73
+
74
+ def create_target_driver(conf = INPUT_CONFIG)
75
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
76
+ end
77
+
78
+ def setup
79
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
80
+ end
81
+
82
+ def teardown
83
+ @kafka.delete_topic(TOPIC_NAME)
84
+ @kafka.close
85
+ end
86
+
87
+ def test_write
88
+ target_driver = create_target_driver
89
+ expected_message = {"a" => 2}
90
+ target_driver.run(expect_records: 1, timeout: 5) do
91
+ sleep 2
92
+ d = create_driver(config(default_topic: TOPIC_NAME))
93
+ d.run do
94
+ d.feed("test", event_time, expected_message)
95
+ end
96
+ end
97
+ actual_messages = target_driver.events.collect { |event| event[2] }
98
+ assert_equal([expected_message], actual_messages)
99
+ end
100
+
101
+ def test_exclude_fields
102
+ conf = config(default_topic: TOPIC_NAME) +
103
+ config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
104
+ target_driver = create_target_driver
105
+ target_driver.run(expect_records: 1, timeout: 5) do
106
+ sleep 2
107
+ d = create_driver(conf)
108
+ d.run do
109
+ d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
110
+ end
111
+ end
112
+ actual_messages = target_driver.events.collect { |event| event[2] }
113
+ assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
114
+ end
115
+ end
60
116
  end
@@ -0,0 +1,167 @@
1
+ require 'helper'
2
+ require 'fluent/test/helpers'
3
+ require 'fluent/test/driver/input'
4
+ require 'fluent/test/driver/output'
5
+ require 'securerandom'
6
+
7
+ class Rdkafka2OutputTest < Test::Unit::TestCase
8
+ include Fluent::Test::Helpers
9
+
10
+ def have_rdkafka
11
+ begin
12
+ require 'fluent/plugin/out_rdkafka2'
13
+ true
14
+ rescue LoadError
15
+ false
16
+ end
17
+ end
18
+
19
+ def setup
20
+ omit_unless(have_rdkafka, "rdkafka isn't installed")
21
+ Fluent::Test.setup
22
+ end
23
+
24
+ def base_config
25
+ config_element('ROOT', '', {"@type" => "rdkafka2"}, [
26
+ config_element('format', "", {"@type" => "json"})
27
+ ])
28
+ end
29
+
30
+ def config(default_topic: "kitagawakeiko")
31
+ base_config + config_element('ROOT', '', {"default_topic" => default_topic,
32
+ "brokers" => "localhost:9092"}, [
33
+ ])
34
+ end
35
+
36
+ def create_driver(conf = config, tag='test')
37
+ Fluent::Test::Driver::Output.new(Fluent::Rdkafka2Output).configure(conf)
38
+ end
39
+
40
+ def test_configure
41
+ assert_nothing_raised(Fluent::ConfigError) {
42
+ create_driver(base_config)
43
+ }
44
+
45
+ assert_nothing_raised(Fluent::ConfigError) {
46
+ create_driver(config)
47
+ }
48
+
49
+ assert_nothing_raised(Fluent::ConfigError) {
50
+ create_driver(config + config_element('buffer', "", {"@type" => "memory"}))
51
+ }
52
+
53
+ d = create_driver
54
+ assert_equal 'kitagawakeiko', d.instance.default_topic
55
+ assert_equal 'localhost:9092', d.instance.brokers
56
+ end
57
+
58
+ def test_mutli_worker_support
59
+ d = create_driver
60
+ assert_equal true, d.instance.multi_workers_ready?
61
+ end
62
+
63
+ class WriteTest < self
64
+ TOPIC_NAME = "kafka-output-#{SecureRandom.uuid}"
65
+
66
+ INPUT_CONFIG = %[
67
+ @type kafka
68
+ brokers localhost:9092
69
+ format json
70
+ @label @kafka
71
+ topics #{TOPIC_NAME}
72
+ ]
73
+
74
+ def create_target_driver(conf = INPUT_CONFIG)
75
+ Fluent::Test::Driver::Input.new(Fluent::KafkaInput).configure(conf)
76
+ end
77
+
78
+ def setup
79
+ @kafka = nil
80
+ omit_unless(have_rdkafka, "rdkafka isn't installed")
81
+ @kafka = Kafka.new(["localhost:9092"], client_id: 'kafka')
82
+ end
83
+
84
+ def teardown
85
+ if @kafka
86
+ @kafka.delete_topic(TOPIC_NAME)
87
+ @kafka.close
88
+ end
89
+ end
90
+
91
+ def test_write
92
+ target_driver = create_target_driver
93
+ expected_message = {"a" => 2}
94
+ target_driver.run(expect_records: 1, timeout: 5) do
95
+ sleep 2
96
+ d = create_driver(config(default_topic: TOPIC_NAME))
97
+ d.run do
98
+ d.feed("test", event_time, expected_message)
99
+ end
100
+ end
101
+ actual_messages = target_driver.events.collect { |event| event[2] }
102
+ assert_equal([expected_message], actual_messages)
103
+ end
104
+
105
+ def test_write_with_use_event_time
106
+ input_config = %[
107
+ @type kafka
108
+ brokers localhost:9092
109
+ format json
110
+ @label @kafka
111
+ topics #{TOPIC_NAME}
112
+ time_source kafka
113
+ ]
114
+ target_driver = create_target_driver(input_config)
115
+ expected_message = {"a" => 2}
116
+ now = event_time
117
+ target_driver.run(expect_records: 1, timeout: 5) do
118
+ sleep 2
119
+ d = create_driver(config(default_topic: TOPIC_NAME) + config_element('ROOT', '', {"use_event_time" => true}))
120
+ d.run do
121
+ d.feed("test", now, expected_message)
122
+ end
123
+ end
124
+ actual_time = target_driver.events.collect { |event| event[1] }.last
125
+ assert_in_delta(actual_time, now, 0.001) # expects millseconds precision
126
+ actual_messages = target_driver.events.collect { |event| event[2] }
127
+ assert_equal([expected_message], actual_messages)
128
+ end
129
+
130
+ def test_exclude_fields
131
+ conf = config(default_topic: TOPIC_NAME) +
132
+ config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
133
+ target_driver = create_target_driver
134
+ target_driver.run(expect_records: 1, timeout: 5) do
135
+ sleep 2
136
+ d = create_driver(conf)
137
+ d.run do
138
+ d.feed('test', event_time, {'a' => 'b', 'foo' => 'bar', 'message' => 'test'})
139
+ end
140
+ end
141
+ actual_messages = target_driver.events.collect { |event| event[2] }
142
+ assert_equal([{'a' => 'b', 'message' => 'test'}], actual_messages)
143
+ end
144
+
145
+ def test_max_enqueue_bytes_per_second
146
+ conf = config(default_topic: TOPIC_NAME) +
147
+ config_element('ROOT', '', {"max_enqueue_bytes_per_second" => 32 * 3}, [])
148
+ target_driver = create_target_driver
149
+ expected_messages = []
150
+ target_driver.run(expect_records: 9, timeout: 10) do
151
+ sleep 2
152
+ d = create_driver(conf)
153
+ start_time = Fluent::Clock.now
154
+ d.run do
155
+ 9.times do |i|
156
+ message = {"message" => "32bytes message: #{i}"}
157
+ d.feed("test", event_time, message)
158
+ expected_messages << message
159
+ end
160
+ end
161
+ assert_in_delta(2.0, Fluent::Clock.now - start_time, 0.5)
162
+ end
163
+ actual_messages = target_driver.events.collect { |event| event[2] }
164
+ assert_equal(expected_messages, actual_messages)
165
+ end
166
+ end
167
+ end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.17.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
8
8
  - Masahiro Nakagawa
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-08-30 00:00:00.000000000 Z
12
+ date: 2022-01-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -173,11 +173,12 @@ files:
173
173
  - test/plugin/test_out_kafka.rb
174
174
  - test/plugin/test_out_kafka2.rb
175
175
  - test/plugin/test_out_kafka_buffered.rb
176
+ - test/plugin/test_out_rdkafka2.rb
176
177
  homepage: https://github.com/fluent/fluent-plugin-kafka
177
178
  licenses:
178
179
  - Apache-2.0
179
180
  metadata: {}
180
- post_install_message:
181
+ post_install_message:
181
182
  rdoc_options: []
182
183
  require_paths:
183
184
  - lib
@@ -192,8 +193,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
192
193
  - !ruby/object:Gem::Version
193
194
  version: '0'
194
195
  requirements: []
195
- rubygems_version: 3.1.4
196
- signing_key:
196
+ rubygems_version: 3.2.5
197
+ signing_key:
197
198
  specification_version: 4
198
199
  summary: Fluentd plugin for Apache Kafka > 0.8
199
200
  test_files:
@@ -204,3 +205,4 @@ test_files:
204
205
  - test/plugin/test_out_kafka.rb
205
206
  - test/plugin/test_out_kafka2.rb
206
207
  - test/plugin/test_out_kafka_buffered.rb
208
+ - test/plugin/test_out_rdkafka2.rb