fluent-plugin-kafka 0.17.5 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb7f693666ff7fd4bdb43b249d3e60e882ebf99e80402de2a5310b61668ef9e7
4
- data.tar.gz: d4b141409cf83402ad1e28b9579317c672b20c9760b8f58b8d54800e8c46313b
3
+ metadata.gz: b4a8c37b041fedc3f95046620413e5f7da437557fc19294439585fa89fd5b244
4
+ data.tar.gz: fe4c8cc8df6b8b5b105fbf709044e10752d8b64f321b5a4e290cb7e316e10fe1
5
5
  SHA512:
6
- metadata.gz: 8f86c7c82fbf5db63ef0c92737ba4935a3a60534d434dfb9552c2ad624ec1e28fea9876c689a29ca6b16c16abc3818fdfb24bd7540495f980daa8a5509b29ca0
7
- data.tar.gz: 8b1dfba11d40e12f9b0b1fbf4ff947678fb5ef2ab3fbd3969e363935c8e895c29f497273ddb86c80c4e1ebc9d254045275dc7e08b07788d4573799c97ce7a393
6
+ metadata.gz: 2ff333ee092e0ffd653ab476acf8b2656b4ca59ea32d6dcc846eb8a174f69d98811272e8a735bfa3bdfac3d5b3753ce499adb5c8a215b3197310b1f4d822364e
7
+ data.tar.gz: ebf6cafbde9635cfc886ee4dce84495353b2d389e447e6188f6a88eb4ca11b19086ce5a12565bac550ef26936f424e4ec5a0fe908fad4b39bea984758ac44720
@@ -1,5 +1,6 @@
1
1
  name: Bug Report
2
2
  description: Create a report with a procedure for reproducing the bug
3
+ labels: "waiting-for-triage"
3
4
  body:
4
5
  - type: markdown
5
6
  attributes:
@@ -1,5 +1,6 @@
1
1
  name: Feature request
2
2
  description: Suggest an idea for this project
3
+ labels: "waiting-for-triage"
3
4
  body:
4
5
  - type: markdown
5
6
  attributes:
@@ -0,0 +1,6 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: 'github-actions'
4
+ directory: '/'
5
+ schedule:
6
+ interval: 'weekly'
@@ -12,12 +12,15 @@ jobs:
12
12
  strategy:
13
13
  fail-fast: false
14
14
  matrix:
15
- ruby: [ '3.1', '3.0', '2.7', '2.6' ]
15
+ ruby: [ '3.2', '3.1', '3.0', '2.7' ]
16
16
  os:
17
17
  - ubuntu-latest
18
- name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
18
+ rdkafka_versions:
19
+ - { min: '>= 0.6.0', max: '< 0.12.0' }
20
+ - { min: '>= 0.12.0', max: '>= 0.12.0' }
21
+ name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }} with rdkafka gem version (min ${{ matrix.rdkafka_versions.min }} max ${{ matrix.rdkafka_versions.max }})
19
22
  steps:
20
- - uses: actions/checkout@v2
23
+ - uses: actions/checkout@v3
21
24
  - uses: ruby/setup-ruby@v1
22
25
  with:
23
26
  ruby-version: ${{ matrix.ruby }}
@@ -33,6 +36,8 @@ jobs:
33
36
  - name: unit testing
34
37
  env:
35
38
  CI: true
39
+ RDKAFKA_VERSION_MIN_RANGE: ${{ matrix.rdkafka_versions.min }}
40
+ RDKAFKA_VERSION_MAX_RANGE: ${{ matrix.rdkafka_versions.max }}
36
41
  run: |
37
42
  sudo ./ci/prepare-kafka-server.sh
38
43
  gem install bundler rake
@@ -7,7 +7,7 @@ jobs:
7
7
  stale:
8
8
  runs-on: ubuntu-latest
9
9
  steps:
10
- - uses: actions/stale@v3
10
+ - uses: actions/stale@v8
11
11
  with:
12
12
  repo-token: ${{ secrets.GITHUB_TOKEN }}
13
13
  days-before-stale: 90
@@ -18,5 +18,7 @@ jobs:
18
18
  close-pr-message: "This PR was automatically closed because of stale in 30 days"
19
19
  stale-pr-label: "stale"
20
20
  stale-issue-label: "stale"
21
- exempt-issue-labels: "bug,enhancement,help wanted"
22
- exempt-pr-labels: "bug,enhancement,help wanted"
21
+ exempt-issue-labels: "bug,enhancement,help wanted,waiting-for-triage"
22
+ exempt-pr-labels: "bug,enhancement,help wanted,waiting-for-triage"
23
+ exempt-all-assignees: true
24
+ exempt-all-milestones: true
data/ChangeLog CHANGED
@@ -1,3 +1,17 @@
1
+ Release 0.19.0 - 2023/04/26
2
+ * out_kafka2: Add support for AWS IAM authentication
3
+ * in_kafka, in_kafka_group, out_kafka2: Add support for ssl client cert key password
4
+ * out_rdkafka2: Mask `ssl_client_cert_key_password` on dumping it to log
5
+ * out_rdkafka2: Support rdkafka-ruby 0.12
6
+
7
+ Release 0.18.1 - 2022/08/17
8
+ * out_kafka2: Fix a bug that it doesn't respect `chunk_limit_records` and `chunk_limit_size`
9
+
10
+ Release 0.18.0 - 2022/07/21
11
+ * out_kafka2: Keep alive Kafka connections between flushes
12
+ * out_rdkafka2: Enable to set SASL credentials via `username` and `password` parameters
13
+ * out_kafka2/out_rdkafka2: Add `record_key` parameter
14
+
1
15
  Release 0.17.5 - 2022/03/18
2
16
  * out_kafka2: Add `resolve_seed_brokers` parameter
3
17
 
data/Gemfile CHANGED
@@ -3,4 +3,4 @@ source 'https://rubygems.org'
3
3
  # Specify your gem's dependencies in fluent-plugin-kafka.gemspec
4
4
  gemspec
5
5
 
6
- gem 'rdkafka', '>= 0.6.0' if ENV["USE_RDKAFKA"]
6
+ gem 'rdkafka', ENV['RDKAFKA_VERSION_MIN_RANGE'], ENV['RDKAFKA_VERSION_MAX_RANGE'] if ENV['USE_RDKAFKA']
data/README.md CHANGED
@@ -36,6 +36,7 @@ If you want to use zookeeper related parameters, you also need to install zookee
36
36
  - ssl_ca_cert
37
37
  - ssl_client_cert
38
38
  - ssl_client_cert_key
39
+ - ssl_client_cert_key_password
39
40
  - ssl_ca_certs_from_system
40
41
 
41
42
  Set path to SSL related files. See [Encryption and Authentication using SSL](https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl) for more detail.
@@ -187,12 +188,17 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
187
188
  @type kafka2
188
189
 
189
190
  brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
191
+
192
+ # Kafka topic, placerholders are supported. Chunk keys are required in the Buffer section inorder for placeholders
193
+ # to work.
194
+ topic (string) :default => nil
190
195
  topic_key (string) :default => 'topic'
191
196
  partition_key (string) :default => 'partition'
192
197
  partition_key_key (string) :default => 'partition_key'
193
198
  message_key_key (string) :default => 'message_key'
194
199
  default_topic (string) :default => nil
195
200
  default_partition_key (string) :default => nil
201
+ record_key (string) :default => nil
196
202
  default_message_key (string) :default => nil
197
203
  exclude_topic_key (bool) :default => false
198
204
  exclude_partition_key (bool) :default => false
@@ -205,6 +211,17 @@ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin inst
205
211
  use_default_for_unknown_topic (bool) :default => false
206
212
  discard_kafka_delivery_failed (bool) :default => false (No discard)
207
213
  partitioner_hash_function (enum) (crc32|murmur2) :default => 'crc32'
214
+ share_producer (bool) :default => false
215
+
216
+ # If you intend to rely on AWS IAM auth to MSK with long lived credentials
217
+ # https://docs.aws.amazon.com/msk/latest/developerguide/iam-access-control.html
218
+ #
219
+ # For AWS STS support, see status in
220
+ # - https://github.com/zendesk/ruby-kafka/issues/944
221
+ # - https://github.com/zendesk/ruby-kafka/pull/951
222
+ sasl_aws_msk_iam_access_key_id (string) :default => nil
223
+ sasl_aws_msk_iam_secret_key_id (string) :default => nil
224
+ sasl_aws_msk_iam_aws_region (string) :default => nil
208
225
 
209
226
  <format>
210
227
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -241,13 +258,12 @@ ruby-kafka's log is routed to fluentd log so you can see ruby-kafka's log in flu
241
258
 
242
259
  Supports following ruby-kafka's producer options.
243
260
 
244
- - max_send_retries - default: 1 - Number of times to retry sending of messages to a leader.
261
+ - max_send_retries - default: 2 - Number of times to retry sending of messages to a leader.
245
262
  - required_acks - default: -1 - The number of acks required per request. If you need flush performance, set lower value, e.g. 1, 2.
246
263
  - ack_timeout - default: nil - How long the producer waits for acks. The unit is seconds.
247
264
  - compression_codec - default: nil - The codec the producer uses to compress messages.
248
265
  - max_send_limit_bytes - default: nil - Max byte size to send message to avoid MessageSizeTooLarge. For example, if you set 1000000(message.max.bytes in kafka), Message more than 1000000 byes will be dropped.
249
266
  - discard_kafka_delivery_failed - default: false - discard the record where [Kafka::DeliveryFailed](http://www.rubydoc.info/gems/ruby-kafka/Kafka/DeliveryFailed) occurred
250
- - monitoring_list - default: [] - library to be used to monitor. statsd and datadog are supported
251
267
 
252
268
  If you want to know about detail of monitoring, see also https://github.com/zendesk/ruby-kafka#monitoring
253
269
 
@@ -335,6 +351,40 @@ For example, `$.source.ip` can be extracted with config `headers_from_record` an
335
351
 
336
352
  > Using this config to remove unused fields is discouraged. A [filter plugin](https://docs.fluentd.org/v/0.12/filter) can be used for this purpose.
337
353
 
354
+ #### Send only a sub field as a message payload
355
+
356
+ If `record_key` is provided, the plugin sends only a sub field given by that key.
357
+ The configuration format is jsonpath.
358
+
359
+ e.g. When the following configuration and the incoming record are given:
360
+
361
+ configuration:
362
+
363
+ <match **>
364
+ @type kafka2
365
+ [...]
366
+ record_key '$.data'
367
+ </match>
368
+
369
+ record:
370
+
371
+ {
372
+ "specversion" : "1.0",
373
+ "type" : "com.example.someevent",
374
+ "id" : "C234-1234-1234",
375
+ "time" : "2018-04-05T17:31:00Z",
376
+ "datacontenttype" : "application/json",
377
+ "data" : {
378
+ "appinfoA" : "abc",
379
+ "appinfoB" : 123,
380
+ "appinfoC" : true
381
+ },
382
+ ...
383
+ }
384
+
385
+ only the `data` field will be serialized by the formatter and sent to Kafka.
386
+ The toplevel `data` key will be removed.
387
+
338
388
  ### Buffered output plugin
339
389
 
340
390
  This plugin uses ruby-kafka producer for writing data. This plugin is for v0.12. If you use v1, see `kafka2`.
@@ -384,6 +434,16 @@ Support of fluentd v0.12 has ended. `kafka_buffered` will be an alias of `kafka2
384
434
  monitoring_list (array) :default => []
385
435
  </match>
386
436
 
437
+ `kafka_buffered` supports the following `ruby-kafka` parameters:
438
+
439
+ - max_send_retries - default: 2 - Number of times to retry sending of messages to a leader.
440
+ - required_acks - default: -1 - The number of acks required per request. If you need flush performance, set lower value, e.g. 1, 2.
441
+ - ack_timeout - default: nil - How long the producer waits for acks. The unit is seconds.
442
+ - compression_codec - default: nil - The codec the producer uses to compress messages.
443
+ - max_send_limit_bytes - default: nil - Max byte size to send message to avoid MessageSizeTooLarge. For example, if you set 1000000(message.max.bytes in kafka), Message more than 1000000 byes will be dropped.
444
+ - discard_kafka_delivery_failed - default: false - discard the record where [Kafka::DeliveryFailed](http://www.rubydoc.info/gems/ruby-kafka/Kafka/DeliveryFailed) occurred
445
+ - monitoring_list - default: [] - library to be used to monitor. statsd and datadog are supported
446
+
387
447
  `kafka_buffered` has two additional parameters:
388
448
 
389
449
  - kafka_agg_max_bytes - default: 4096 - Maximum value of total message size to be included in one batch transmission.
@@ -460,6 +520,7 @@ You need to install rdkafka gem.
460
520
  # same with kafka2
461
521
  headers (hash) :default => {}
462
522
  headers_from_record (hash) :default => {}
523
+ record_key (string) :default => nil
463
524
 
464
525
  <format>
465
526
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -0,0 +1,3 @@
1
+ # Examples
2
+
3
+ This directory contains example Fluentd config for this plugin
@@ -0,0 +1,32 @@
1
+ <source>
2
+ @type sample
3
+ sample {"hello": "world"}
4
+ rate 7000
5
+ tag sample.hello.world
6
+ </source>
7
+
8
+ <match sample.**>
9
+ @type kafka2
10
+
11
+ brokers "broker:29092"
12
+
13
+ # Writes to topic `events.sample.hello.world`
14
+ topic "events.${tag}"
15
+
16
+ # Writes to topic `hello.world`
17
+ # topic "${tag[1]}.${tag[2]}"
18
+
19
+ <format>
20
+ @type json
21
+ </format>
22
+
23
+ <buffer tag>
24
+ flush_at_shutdown true
25
+ flush_mode interval
26
+ flush_interval 1s
27
+ chunk_limit_size 3MB
28
+ chunk_full_threshold 1
29
+ total_limit_size 1024MB
30
+ overflow_action block
31
+ </buffer>
32
+ </match>
@@ -0,0 +1,23 @@
1
+ <source>
2
+ @type sample
3
+ sample {"hello": "world", "some_record":{"event":"message"}}
4
+ rate 7000
5
+ tag sample.hello.world
6
+ </source>
7
+
8
+ <match sample.**>
9
+ @type kafka2
10
+
11
+ brokers "broker:29092"
12
+
13
+ record_key "some_record"
14
+ default_topic "events"
15
+
16
+ <format>
17
+ # requires the fluent-plugin-formatter-protobuf gem
18
+ # see its docs for full usage
19
+ @type protobuf
20
+ class_name SomeRecord
21
+ include_paths ["/opt/fluent-plugin-formatter-protobuf/some_record_pb.rb"]
22
+ </format>
23
+ </match>
@@ -0,0 +1,31 @@
1
+ <source>
2
+ @type sample
3
+ sample {"hello": "world", "some_record":{"event":"message"}}
4
+ rate 7000
5
+ tag sample.hello.world
6
+ </source>
7
+
8
+ <match sample.**>
9
+ @type kafka2
10
+
11
+ brokers "broker:29092"
12
+
13
+ # {"event": "message"} will be formatted and sent to Kafka
14
+ record_key "some_record"
15
+
16
+ default_topic "events"
17
+
18
+ <format>
19
+ @type json
20
+ </format>
21
+
22
+ <buffer>
23
+ flush_at_shutdown true
24
+ flush_mode interval
25
+ flush_interval 1s
26
+ chunk_limit_size 3MB
27
+ chunk_full_threshold 1
28
+ total_limit_size 1024MB
29
+ overflow_action block
30
+ </buffer>
31
+ </match>
@@ -13,12 +13,12 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.17.5'
16
+ gem.version = '0.19.0'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
20
20
  gem.add_dependency 'ltsv'
21
- gem.add_dependency 'ruby-kafka', '>= 1.4.0', '< 2'
21
+ gem.add_dependency 'ruby-kafka', '>= 1.5.0', '< 2'
22
22
  gem.add_development_dependency "rake", ">= 0.9.2"
23
23
  gem.add_development_dependency "test-unit", ">= 3.0.8"
24
24
  gem.add_development_dependency "test-unit-rr", "~> 1.0"
@@ -200,16 +200,19 @@ class Fluent::KafkaInput < Fluent::Input
200
200
  if @scram_mechanism != nil && @username != nil && @password != nil
201
201
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
202
202
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
203
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
203
204
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
204
205
  sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
205
206
  elsif @username != nil && @password != nil
206
207
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
207
208
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
209
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
208
210
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
209
211
  sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
210
212
  else
211
213
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert_file_path: @ssl_ca_cert,
212
214
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
215
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
213
216
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
214
217
  ssl_verify_hostname: @ssl_verify_hostname)
215
218
  end
@@ -188,16 +188,19 @@ class Fluent::KafkaGroupInput < Fluent::Input
188
188
  if @scram_mechanism != nil && @username != nil && @password != nil
189
189
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
190
190
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
191
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
191
192
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
192
193
  sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
193
194
  elsif @username != nil && @password != nil
194
195
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
195
196
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
197
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
196
198
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password,
197
199
  sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
198
200
  else
199
201
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
200
202
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
203
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
201
204
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
202
205
  ssl_verify_hostname: @ssl_verify_hostname)
203
206
  end
@@ -1,5 +1,18 @@
1
1
  module Fluent
2
2
  module KafkaPluginUtil
3
+ module AwsIamSettings
4
+ def self.included(klass)
5
+ klass.instance_eval do
6
+ config_param :sasl_aws_msk_iam_access_key_id, :string, :default => nil, secret: true,
7
+ desc: "AWS access key Id for IAM authentication to MSK."
8
+ config_param :sasl_aws_msk_iam_secret_key_id, :string, :default => nil, secret: true,
9
+ desc: "AWS access key secret for IAM authentication to MSK."
10
+ config_param :sasl_aws_msk_iam_aws_region, :string, :default => nil,
11
+ desc: "AWS region for IAM authentication to MSK."
12
+ end
13
+ end
14
+ end
15
+
3
16
  module SSLSettings
4
17
  def self.included(klass)
5
18
  klass.instance_eval {
@@ -10,6 +23,8 @@ module Fluent
10
23
  :desc => "a PEM encoded client cert to use with and SSL connection. Must be used in combination with ssl_client_cert_key."
11
24
  config_param :ssl_client_cert_key, :string, :default => nil,
12
25
  :desc => "a PEM encoded client cert key to use with and SSL connection. Must be used in combination with ssl_client_cert."
26
+ config_param :ssl_client_cert_key_password, :string, :default => nil, secret: true,
27
+ :desc => "a PEM encoded client cert key password to use with SSL connection."
13
28
  config_param :ssl_client_cert_chain, :string, :default => nil,
14
29
  :desc => "an extra PEM encoded cert to use with and SSL connection."
15
30
  config_param :ssl_ca_certs_from_system, :bool, :default => false,
@@ -38,9 +38,15 @@ module Kafka
38
38
  end
39
39
 
40
40
  # for out_kafka2
41
+ # Majority (if not all) of this code is lifted from https://github.com/zendesk/ruby-kafka/blob/master/lib/kafka/producer.rb
42
+ # with the main difference where we have removed any checks regarding max_buffer_bytesize and max_buffer_size
43
+ # The reason for doing this is to provide a better UX for our users where they only need to set those bounds in
44
+ # the Buffer section using `chunk_limit_size` and `chunk_limit_records`.
45
+ #
46
+ # We should reconsider this in the future in case the `ruby-kafka` library drastically changes its internal.
41
47
  module Kafka
42
48
  class Client
43
- def topic_producer(topic, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60)
49
+ def custom_producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60)
44
50
  cluster = initialize_cluster
45
51
  compressor = Compressor.new(
46
52
  codec_name: compression_codec,
@@ -57,8 +63,7 @@ module Kafka
57
63
  transactional_timeout: transactional_timeout,
58
64
  )
59
65
 
60
- TopicProducer.new(topic,
61
- cluster: cluster,
66
+ CustomProducer.new(cluster: cluster,
62
67
  transaction_manager: transaction_manager,
63
68
  logger: @logger,
64
69
  instrumenter: @instrumenter,
@@ -74,8 +79,8 @@ module Kafka
74
79
  end
75
80
  end
76
81
 
77
- class TopicProducer
78
- def initialize(topic, cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:, partitioner:)
82
+ class CustomProducer
83
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:, partitioner:)
79
84
  @cluster = cluster
80
85
  @transaction_manager = transaction_manager
81
86
  @logger = logger
@@ -88,10 +93,6 @@ module Kafka
88
93
  @max_buffer_bytesize = max_buffer_bytesize
89
94
  @compressor = compressor
90
95
  @partitioner = partitioner
91
-
92
- @topic = topic
93
- @cluster.add_target_topics(Set.new([topic]))
94
-
95
96
  # A buffer organized by topic/partition.
96
97
  @buffer = MessageBuffer.new
97
98
 
@@ -99,12 +100,12 @@ module Kafka
99
100
  @pending_message_queue = PendingMessageQueue.new
100
101
  end
101
102
 
102
- def produce(value, key: nil, partition: nil, partition_key: nil, headers: EMPTY_HEADER, create_time: Time.now)
103
+ def produce(value, key: nil, partition: nil, partition_key: nil, headers: EMPTY_HEADER, create_time: Time.now, topic: nil)
103
104
  message = PendingMessage.new(
104
105
  value: value,
105
106
  key: key,
106
107
  headers: headers,
107
- topic: @topic,
108
+ topic: topic,
108
109
  partition: partition,
109
110
  partition_key: partition_key,
110
111
  create_time: create_time
@@ -245,12 +246,13 @@ module Kafka
245
246
 
246
247
  def assign_partitions!
247
248
  failed_messages = []
248
- partition_count = @cluster.partitions_for(@topic).count
249
249
 
250
250
  @pending_message_queue.each do |message|
251
251
  partition = message.partition
252
252
 
253
253
  begin
254
+ partition_count = @cluster.partitions_for(message.topic).count
255
+
254
256
  if partition.nil?
255
257
  partition = @partitioner.call(partition_count, message)
256
258
  end
@@ -27,6 +27,11 @@ DESC
27
27
  config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
28
28
  :desc => "Specify kafka patrtitioner hash algorithm"
29
29
  config_param :default_partition, :integer, :default => nil
30
+ config_param :record_key, :string, :default => nil,
31
+ :desc => <<-DESC
32
+ A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
33
+ If defined, only this field in the record will be sent to Kafka as the message payload.
34
+ DESC
30
35
  config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
31
36
  config_param :client_id, :string, :default => 'fluentd'
32
37
  config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
@@ -81,6 +86,7 @@ DESC
81
86
  Add a regular expression to capture ActiveSupport notifications from the Kafka client
82
87
  requires activesupport gem - records will be generated under fluent_kafka_stats.**
83
88
  DESC
89
+ config_param :share_producer, :bool, :default => false, :desc => 'share kafka producer between flush threads'
84
90
 
85
91
  config_section :buffer do
86
92
  config_set_default :chunk_keys, ["topic"]
@@ -89,6 +95,7 @@ DESC
89
95
  config_set_default :@type, 'json'
90
96
  end
91
97
 
98
+ include Fluent::KafkaPluginUtil::AwsIamSettings
92
99
  include Fluent::KafkaPluginUtil::SSLSettings
93
100
  include Fluent::KafkaPluginUtil::SaslSettings
94
101
 
@@ -96,30 +103,58 @@ DESC
96
103
  super
97
104
 
98
105
  @kafka = nil
106
+ @producers = nil
107
+ @producers_mutex = nil
108
+ @shared_producer = nil
109
+
110
+ @writing_threads_mutex = Mutex.new
111
+ @writing_threads = Set.new
99
112
  end
100
113
 
101
114
  def refresh_client(raise_error = true)
102
115
  begin
103
116
  logger = @get_kafka_client_log ? log : nil
117
+ use_long_lived_aws_credentials = @sasl_aws_msk_iam_access_key_id != nil && @sasl_aws_msk_iam_secret_key_id != nil
104
118
  if @scram_mechanism != nil && @username != nil && @password != nil
105
- @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
106
- ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_client_cert_chain: read_ssl_file(@ssl_client_cert_chain),
107
- ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
108
- sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname, resolve_seed_brokers: @resolve_seed_brokers,
109
- partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
119
+ sasl_params = {
120
+ sasl_scram_username: @username,
121
+ sasl_scram_password: @password,
122
+ sasl_scram_mechanism: @scram_mechanism,
123
+ }
110
124
  elsif @username != nil && @password != nil
111
- @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
112
- ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_client_cert_chain: read_ssl_file(@ssl_client_cert_chain),
113
- ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password, sasl_over_ssl: @sasl_over_ssl,
114
- ssl_verify_hostname: @ssl_verify_hostname, resolve_seed_brokers: @resolve_seed_brokers,
115
- partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
125
+ sasl_params = {
126
+ sasl_plain_username: @username,
127
+ sasl_plain_password: @password,
128
+ }
129
+ elsif use_long_lived_aws_credentials
130
+ sasl_params = {
131
+ sasl_aws_msk_iam_access_key_id: @sasl_aws_msk_iam_access_key_id,
132
+ sasl_aws_msk_iam_secret_key_id: @sasl_aws_msk_iam_secret_key_id,
133
+ sasl_aws_msk_iam_aws_region: @sasl_aws_msk_iam_aws_region,
134
+ }
116
135
  else
117
- @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert_file_path: @ssl_ca_cert,
118
- ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_client_cert_chain: read_ssl_file(@ssl_client_cert_chain),
119
- ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab, sasl_over_ssl: @sasl_over_ssl,
120
- ssl_verify_hostname: @ssl_verify_hostname, resolve_seed_brokers: @resolve_seed_brokers,
121
- partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
136
+ sasl_params = {
137
+ sasl_gssapi_principal: @principal,
138
+ sasl_gssapi_keytab: @keytab,
139
+ }
122
140
  end
141
+ @kafka = Kafka.new(
142
+ seed_brokers: @seed_brokers,
143
+ client_id: @client_id,
144
+ logger: logger,
145
+ connect_timeout: @connect_timeout,
146
+ socket_timeout: @socket_timeout,
147
+ ssl_ca_cert_file_path: @ssl_ca_cert,
148
+ ssl_client_cert: read_ssl_file(@ssl_client_cert),
149
+ ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
150
+ ssl_client_cert_key_password: @ssl_client_cert_key_password,
151
+ ssl_client_cert_chain: read_ssl_file(@ssl_client_cert_chain),
152
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
153
+ ssl_verify_hostname: @ssl_verify_hostname,
154
+ resolve_seed_brokers: @resolve_seed_brokers,
155
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function),
156
+ sasl_over_ssl: @sasl_over_ssl,
157
+ **sasl_params)
123
158
  log.info "initialized kafka producer: #{@client_id}"
124
159
  rescue Exception => e
125
160
  if raise_error # During startup, error should be reported to engine and stop its phase for safety.
@@ -185,15 +220,29 @@ DESC
185
220
  @exclude_field_accessors = @exclude_fields.map do |field|
186
221
  record_accessor_create(field)
187
222
  end
223
+
224
+ @record_field_accessor = nil
225
+ @record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
188
226
  end
189
227
 
190
228
  def multi_workers_ready?
191
229
  true
192
230
  end
193
231
 
232
+ def create_producer
233
+ @kafka.custom_producer(**@producer_opts)
234
+ end
235
+
194
236
  def start
195
237
  super
196
238
  refresh_client
239
+
240
+ if @share_producer
241
+ @shared_producer = create_producer
242
+ else
243
+ @producers = {}
244
+ @producers_mutex = Mutex.new
245
+ end
197
246
  end
198
247
 
199
248
  def close
@@ -206,6 +255,56 @@ DESC
206
255
  @kafka = nil
207
256
  end
208
257
 
258
+ def wait_writing_threads
259
+ done = false
260
+ until done do
261
+ @writing_threads_mutex.synchronize do
262
+ done = true if @writing_threads.empty?
263
+ end
264
+ sleep(1) unless done
265
+ end
266
+ end
267
+
268
+ def shutdown
269
+ super
270
+ wait_writing_threads
271
+ shutdown_producers
272
+ end
273
+
274
+ def shutdown_producers
275
+ if @share_producer
276
+ @shared_producer.shutdown
277
+ @shared_producer = nil
278
+ else
279
+ @producers_mutex.synchronize {
280
+ shutdown_threads = @producers.map { |key, producer|
281
+ th = Thread.new {
282
+ producer.shutdown
283
+ }
284
+ th.abort_on_exception = true
285
+ th
286
+ }
287
+ shutdown_threads.each { |th| th.join }
288
+ @producers = {}
289
+ }
290
+ end
291
+ end
292
+
293
+ def get_producer
294
+ if @share_producer
295
+ @shared_producer
296
+ else
297
+ @producers_mutex.synchronize {
298
+ producer = @producers[Thread.current.object_id]
299
+ unless producer
300
+ producer = create_producer
301
+ @producers[Thread.current.object_id] = producer
302
+ end
303
+ producer
304
+ }
305
+ end
306
+ end
307
+
209
308
  def setup_formatter(conf)
210
309
  type = conf['@type']
211
310
  case type
@@ -229,6 +328,8 @@ DESC
229
328
 
230
329
  # TODO: optimize write performance
231
330
  def write(chunk)
331
+ @writing_threads_mutex.synchronize { @writing_threads.add(Thread.current) }
332
+
232
333
  tag = chunk.metadata.tag
233
334
  topic = if @topic
234
335
  extract_placeholders(@topic, chunk)
@@ -237,13 +338,12 @@ DESC
237
338
  end
238
339
 
239
340
  messages = 0
240
- record_buf = nil
241
341
 
242
342
  base_headers = @headers
243
343
  mutate_headers = !@headers_from_record_accessors.empty?
244
344
 
245
345
  begin
246
- producer = @kafka.topic_producer(topic, **@producer_opts)
346
+ producer = get_producer
247
347
  chunk.msgpack_each { |time, record|
248
348
  begin
249
349
  record = inject_values_to_record(tag, time, record)
@@ -267,6 +367,7 @@ DESC
267
367
  end
268
368
  end
269
369
 
370
+ record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
270
371
  record_buf = @formatter_proc.call(tag, time, record)
271
372
  record_buf_bytes = record_buf.bytesize
272
373
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
@@ -283,7 +384,7 @@ DESC
283
384
  messages += 1
284
385
 
285
386
  producer.produce(record_buf, key: message_key, partition_key: partition_key, partition: partition, headers: headers,
286
- create_time: @use_event_time ? Time.at(time) : Time.now)
387
+ create_time: @use_event_time ? Time.at(time) : Time.now, topic: topic)
287
388
  }
288
389
 
289
390
  if messages > 0
@@ -301,7 +402,6 @@ DESC
301
402
  end
302
403
  rescue Kafka::UnknownTopicOrPartition
303
404
  if @use_default_for_unknown_topic && topic != @default_topic
304
- producer.shutdown if producer
305
405
  log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
306
406
  topic = @default_topic
307
407
  retry
@@ -321,7 +421,7 @@ DESC
321
421
  # Raise exception to retry sendind messages
322
422
  raise e unless ignore
323
423
  ensure
324
- producer.shutdown if producer
424
+ @writing_threads_mutex.synchronize { @writing_threads.delete(Thread.current) }
325
425
  end
326
426
  end
327
427
  end
@@ -5,9 +5,37 @@ require 'fluent/plugin/kafka_plugin_util'
5
5
 
6
6
  require 'rdkafka'
7
7
 
8
+ # This is required for `rdkafka` version >= 0.12.0
9
+ # Overriding the close method in order to provide a time limit for when it should be forcibly closed
10
+ class Rdkafka::Producer::Client
11
+ # return false if producer is forcefully closed, otherwise return true
12
+ def close(timeout=nil)
13
+ return unless @native
14
+
15
+ # Indicate to polling thread that we're closing
16
+ @polling_thread[:closing] = true
17
+ # Wait for the polling thread to finish up
18
+ thread = @polling_thread.join(timeout)
19
+
20
+ Rdkafka::Bindings.rd_kafka_destroy(@native)
21
+
22
+ @native = nil
23
+
24
+ return !thread.nil?
25
+ end
26
+ end
27
+
8
28
  class Rdkafka::Producer
9
29
  # return false if producer is forcefully closed, otherwise return true
10
30
  def close(timeout = nil)
31
+ rdkafka_version = Rdkafka::VERSION || '0.0.0'
32
+ # Rdkafka version >= 0.12.0 changed its internals
33
+ if Gem::Version::create(rdkafka_version) >= Gem::Version.create('0.12.0')
34
+ ObjectSpace.undefine_finalizer(self)
35
+
36
+ return @client.close(timeout)
37
+ end
38
+
11
39
  @closing = true
12
40
  # Wait for the polling thread to finish up
13
41
  # If the broker isn't alive, the thread doesn't exit
@@ -73,6 +101,11 @@ DESC
73
101
  :desc => <<-DESC
74
102
  The codec the producer uses to compress messages. Used for compression.codec
75
103
  Supported codecs: (gzip|snappy)
104
+ DESC
105
+ config_param :record_key, :string, :default => nil,
106
+ :desc => <<-DESC
107
+ A jsonpath to a record value pointing to the field which will be passed to the formatter and sent as the Kafka message payload.
108
+ If defined, only this field in the record will be sent to Kafka as the message payload.
76
109
  DESC
77
110
  config_param :use_event_time, :bool, :default => false, :desc => 'Use fluentd event time for rdkafka timestamp'
78
111
  config_param :max_send_limit_bytes, :size, :default => nil
@@ -90,7 +123,6 @@ DESC
90
123
  config_param :max_enqueue_bytes_per_second, :size, :default => nil, :desc => 'The maximum number of enqueueing bytes per second'
91
124
 
92
125
  config_param :service_name, :string, :default => nil, :desc => 'Used for sasl.kerberos.service.name'
93
- config_param :ssl_client_cert_key_password, :string, :default => nil, :desc => 'Used for ssl.key.password'
94
126
 
95
127
  config_section :buffer do
96
128
  config_set_default :chunk_keys, ["topic"]
@@ -230,6 +262,9 @@ DESC
230
262
  end
231
263
 
232
264
  @enqueue_rate = EnqueueRate.new(@max_enqueue_bytes_per_second) unless @max_enqueue_bytes_per_second.nil?
265
+
266
+ @record_field_accessor = nil
267
+ @record_field_accessor = record_accessor_create(@record_key) unless @record_key.nil?
233
268
  end
234
269
 
235
270
  def build_config
@@ -270,6 +305,8 @@ DESC
270
305
  config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
271
306
  config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
272
307
  config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
308
+ config[:"sasl.username"] = @username if @username
309
+ config[:"sasl.password"] = @password if @password
273
310
 
274
311
  @rdkafka_options.each { |k, v|
275
312
  config[k.to_sym] = v
@@ -371,8 +408,6 @@ DESC
371
408
  end
372
409
 
373
410
  handlers = []
374
- record_buf = nil
375
- record_buf_bytes = nil
376
411
 
377
412
  headers = @headers.clone
378
413
 
@@ -395,6 +430,7 @@ DESC
395
430
  end
396
431
  end
397
432
 
433
+ record = @record_field_accessor.call(record) unless @record_field_accessor.nil?
398
434
  record_buf = @formatter_proc.call(tag, time, record)
399
435
  record_buf_bytes = record_buf.bytesize
400
436
  if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
@@ -105,6 +105,21 @@ class Kafka2OutputTest < Test::Unit::TestCase
105
105
  assert_equal([expected_message], actual_messages)
106
106
  end
107
107
 
108
+ def test_record_key
109
+ conf = config(default_topic: TOPIC_NAME) +
110
+ config_element('ROOT', '', {"record_key" => "$.data"}, [])
111
+ target_driver = create_target_driver
112
+ target_driver.run(expect_records: 1, timeout: 5) do
113
+ sleep 2
114
+ d = create_driver(conf)
115
+ d.run do
116
+ d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
117
+ end
118
+ end
119
+ actual_messages = target_driver.events.collect { |event| event[2] }
120
+ assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
121
+ end
122
+
108
123
  def test_exclude_fields
109
124
  conf = config(default_topic: TOPIC_NAME) +
110
125
  config_element('ROOT', '', {"exclude_fields" => "$.foo"}, [])
@@ -163,5 +163,20 @@ class Rdkafka2OutputTest < Test::Unit::TestCase
163
163
  actual_messages = target_driver.events.collect { |event| event[2] }
164
164
  assert_equal(expected_messages, actual_messages)
165
165
  end
166
+
167
+ def test_record_key
168
+ conf = config(default_topic: TOPIC_NAME) +
169
+ config_element('ROOT', '', {"record_key" => "$.data"}, [])
170
+ target_driver = create_target_driver
171
+ target_driver.run(expect_records: 1, timeout: 5) do
172
+ sleep 2
173
+ d = create_driver(conf)
174
+ d.run do
175
+ d.feed('test', event_time, {'data' => {'a' => 'b', 'foo' => 'bar', 'message' => 'test'}, 'message_key' => '123456'})
176
+ end
177
+ end
178
+ actual_messages = target_driver.events.collect { |event| event[2] }
179
+ assert_equal([{'a' => 'b', 'foo' => 'bar', 'message' => 'test'}], actual_messages)
180
+ end
166
181
  end
167
182
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.5
4
+ version: 0.19.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-03-18 00:00:00.000000000 Z
12
+ date: 2023-04-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -51,7 +51,7 @@ dependencies:
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: 1.4.0
54
+ version: 1.5.0
55
55
  - - "<"
56
56
  - !ruby/object:Gem::Version
57
57
  version: '2'
@@ -61,7 +61,7 @@ dependencies:
61
61
  requirements:
62
62
  - - ">="
63
63
  - !ruby/object:Gem::Version
64
- version: 1.4.0
64
+ version: 1.5.0
65
65
  - - "<"
66
66
  - !ruby/object:Gem::Version
67
67
  version: '2'
@@ -146,6 +146,7 @@ files:
146
146
  - ".github/ISSUE_TEMPLATE/bug_report.yaml"
147
147
  - ".github/ISSUE_TEMPLATE/config.yml"
148
148
  - ".github/ISSUE_TEMPLATE/feature_request.yaml"
149
+ - ".github/dependabot.yml"
149
150
  - ".github/workflows/linux.yml"
150
151
  - ".github/workflows/stale-actions.yml"
151
152
  - ".gitignore"
@@ -155,6 +156,10 @@ files:
155
156
  - README.md
156
157
  - Rakefile
157
158
  - ci/prepare-kafka-server.sh
159
+ - examples/README.md
160
+ - examples/out_kafka2/dynamic_topic_based_on_tag.conf
161
+ - examples/out_kafka2/protobuf-formatter.conf
162
+ - examples/out_kafka2/record_key.conf
158
163
  - fluent-plugin-kafka.gemspec
159
164
  - lib/fluent/plugin/in_kafka.rb
160
165
  - lib/fluent/plugin/in_kafka_group.rb
@@ -193,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
198
  - !ruby/object:Gem::Version
194
199
  version: '0'
195
200
  requirements: []
196
- rubygems_version: 3.2.5
201
+ rubygems_version: 3.3.5
197
202
  signing_key:
198
203
  specification_version: 4
199
204
  summary: Fluentd plugin for Apache Kafka > 0.8