fluent-plugin-kafka 0.12.4 → 0.14.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b9d681257730f610d10ef3f3ab0f219d3167df23482370bcc3c89b01cf12098
4
- data.tar.gz: f628e31d41fdc36f51d93a4fb4f7e68321ad0857765a6088a9b46ce593c435c8
3
+ metadata.gz: 14cde202b38079778e0d694692f05c19c9576be8622f35a8896df35f33ea9733
4
+ data.tar.gz: 80cbc1050e85239dabfe78dbffd97a100c939ec4e9227bd68956a7b3d15aa75e
5
5
  SHA512:
6
- metadata.gz: 57b996881b2981fb4366c31f25f27b4ca19b4d8ca2c4e27227ac4b5d603ae7a6b2719e7a049d8b668ace11620786832b7e970c90c3536de1ed6ba3da1723dd09
7
- data.tar.gz: 21ddc3ab9a074b3910719ae59cdf826423f3cd52c1b9179ab86ccdf6db60166776cd3b7218f6e04da1ac101dd130c87486e30658736e7f6347b161776c7bc7ee
6
+ metadata.gz: 5ec7ed5f16a7d78a0dcd5f6eb15ecd94dd47adc7ae77208896a0ede1d341ede1f5a668141e7ac4e5413734105e276bd483f3f2b8c2041d329a4d619a9c469a76
7
+ data.tar.gz: 11877dd67f3b0f714b38153368611c8f234b6a96976a00e21117e9523fa5bcde210fd13fa5e22086cf604ee61fc94a3472bc4946c69a6f5623eeace486e7eb75
data/ChangeLog CHANGED
@@ -1,3 +1,26 @@
1
+ Release 0.14.2 - 2020/08/26
2
+
3
+ * in_kafka_group: Add `add_headers` parameter
4
+ * out_kafka2/out_rdkafka2: Support `discard_kafka_delivery_failed` parameter
5
+
6
+ Release 0.14.1 - 2020/08/11
7
+
8
+ * kafka_producer_ext: Fix regression by v0.14.0 changes
9
+
10
+ Release 0.14.0 - 2020/08/07
11
+
12
+ * Update ruby-kafka dependency to v1.2.0 or later. Check https://github.com/zendesk/ruby-kafka#compatibility
13
+ * kafka_producer_ext: Follow Paritioner API change
14
+
15
+ Release 0.13.1 - 2020/07/17
16
+
17
+ * in_kafka_group: Support ssl_verify_hostname parameter
18
+ * out_kafka2/out_rdkafka2: Support topic parameter with placeholders
19
+
20
+ Release 0.13.0 - 2020/03/09
21
+
22
+ * Accept ruby-kafka v1 or later
23
+
1
24
  Release 0.12.4 - 2020/03/03
2
25
 
3
26
  * output: Follow rdkafka log level
data/README.md CHANGED
@@ -118,10 +118,13 @@ Consume events by kafka consumer group features..
118
118
  topics <listening topics(separate with comma',')>
119
119
  format <input text type (text|json|ltsv|msgpack)> :default => json
120
120
  message_key <key (Optional, for text format only, default is message)>
121
+ kafka_mesasge_key <key (Optional, If specified, set kafka's message key to this key)>
122
+ add_headers <If true, add kafka's message headers to record>
121
123
  add_prefix <tag prefix (Optional)>
122
124
  add_suffix <tag suffix (Optional)>
123
125
  retry_emit_limit <Wait retry_emit_limit x 1s when BuffereQueueLimitError happens. The default is nil and it means waiting until BufferQueueLimitError is resolved>
124
- use_record_time <If true, replace event time with contents of 'time' field of fetched record>
126
+ use_record_time (Deprecated. Use 'time_source record' instead.) <If true, replace event time with contents of 'time' field of fetched record>
127
+ time_source <source for message timestamp (now|kafka|record)> :default => now
125
128
  time_format <string (Optional when use_record_time is used)>
126
129
 
127
130
  # ruby-kafka consumer options
@@ -140,7 +143,8 @@ Consuming topic name is used for event tag. So when the target topic name is `ap
140
143
 
141
144
  ### Output plugin
142
145
 
143
- This `kafka2` plugin is for fluentd v1.0 or later. This will be `out_kafka` plugin in the future.
146
+ This `kafka2` plugin is for fluentd v1 or later. This plugin uses `ruby-kafka` producer for writing data.
147
+ If `ruby-kafka` doesn't fit your kafka environment, check `rdkafka2` plugin instead. This will be `out_kafka` plugin in the future.
144
148
 
145
149
  <match app.**>
146
150
  @type kafka2
@@ -161,6 +165,7 @@ This `kafka2` plugin is for fluentd v1.0 or later. This will be `out_kafka` plug
161
165
  headers (hash) :default => {}
162
166
  headers_from_record (hash) :default => {}
163
167
  use_default_for_unknown_topic (bool) :default => false
168
+ discard_kafka_delivery_failed (bool) :default => false (No discard)
164
169
 
165
170
  <format>
166
171
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -384,6 +389,7 @@ You need to install rdkafka gem.
384
389
  default_message_key (string) :default => nil
385
390
  exclude_topic_key (bool) :default => false
386
391
  exclude_partition_key (bool) :default => false
392
+ discard_kafka_delivery_failed (bool) :default => false (No discard)
387
393
 
388
394
  # same with kafka2
389
395
  headers (hash) :default => {}
@@ -443,7 +449,7 @@ See ruby-kafka README for more details: https://github.com/zendesk/ruby-kafka#co
443
449
 
444
450
  To avoid the problem, there are 2 approaches:
445
451
 
446
- - Upgrade your kafka cluster to latest version. This is better becase recent version is faster and robust.
452
+ - Upgrade your kafka cluster to latest version. This is better because recent version is faster and robust.
447
453
  - Downgrade ruby-kafka/fluent-plugin-kafka to work with your older kafka.
448
454
 
449
455
  ## Contributing
@@ -13,12 +13,12 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.12.4'
16
+ gem.version = '0.14.2'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
20
20
  gem.add_dependency 'ltsv'
21
- gem.add_dependency 'ruby-kafka', '>= 0.7.8', '< 0.8.0'
21
+ gem.add_dependency 'ruby-kafka', '>= 1.2.0', '< 2'
22
22
  gem.add_development_dependency "rake", ">= 0.9.2"
23
23
  gem.add_development_dependency "test-unit", ">= 3.0.8"
24
24
  end
@@ -39,6 +39,8 @@ class Fluent::KafkaInput < Fluent::Input
39
39
  :deprecated => "Use 'time_source record' instead."
40
40
  config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
41
41
  :desc => "Source for message timestamp."
42
+ config_param :record_time_key, :string, :default => 'time',
43
+ :desc => "Time field when time_source is 'record'"
42
44
  config_param :get_kafka_client_log, :bool, :default => false
43
45
  config_param :time_format, :string, :default => nil,
44
46
  :desc => "Time format to be used to parse 'time' field."
@@ -292,9 +294,9 @@ class Fluent::KafkaInput < Fluent::Input
292
294
  record_time = Fluent::Engine.now
293
295
  when :record
294
296
  if @time_format
295
- record_time = @time_parser.parse(record['time'])
297
+ record_time = @time_parser.parse(record[@record_time_key])
296
298
  else
297
- record_time = record['time']
299
+ record_time = record[@record_time_key]
298
300
  end
299
301
  else
300
302
  $log.fatal "BUG: invalid time_source: #{@time_source}"
@@ -18,6 +18,8 @@ class Fluent::KafkaGroupInput < Fluent::Input
18
18
  :desc => "Supported format: (json|text|ltsv|msgpack)"
19
19
  config_param :message_key, :string, :default => 'message',
20
20
  :desc => "For 'text' format only."
21
+ config_param :add_headers, :bool, :default => false,
22
+ :desc => "Add kafka's message headers to event record"
21
23
  config_param :add_prefix, :string, :default => nil,
22
24
  :desc => "Tag prefix (Optional)"
23
25
  config_param :add_suffix, :string, :default => nil,
@@ -29,6 +31,8 @@ class Fluent::KafkaGroupInput < Fluent::Input
29
31
  :deprecated => "Use 'time_source record' instead."
30
32
  config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
31
33
  :desc => "Source for message timestamp."
34
+ config_param :record_time_key, :string, :default => 'time',
35
+ :desc => "Time field when time_source is 'record'"
32
36
  config_param :get_kafka_client_log, :bool, :default => false
33
37
  config_param :time_format, :string, :default => nil,
34
38
  :desc => "Time format to be used to parse 'time' field."
@@ -166,16 +170,17 @@ class Fluent::KafkaGroupInput < Fluent::Input
166
170
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
167
171
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
168
172
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
169
- sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
173
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
170
174
  elsif @username != nil && @password != nil
171
175
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
172
176
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
173
177
  ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password,
174
- sasl_over_ssl: @sasl_over_ssl)
178
+ sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname)
175
179
  else
176
180
  @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
177
181
  ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
178
- ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
182
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab,
183
+ ssl_verify_hostname: @ssl_verify_hostname)
179
184
  end
180
185
 
181
186
  @consumer = setup_consumer
@@ -198,7 +203,14 @@ class Fluent::KafkaGroupInput < Fluent::Input
198
203
  def setup_consumer
199
204
  consumer = @kafka.consumer(@consumer_opts)
200
205
  @topics.each { |topic|
201
- consumer.subscribe(topic, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
206
+ if m = /^\/(.+)\/$/.match(topic)
207
+ topic_or_regex = Regexp.new(m[1])
208
+ $log.info "Subscribe to topics matching the regex #{topic}"
209
+ else
210
+ topic_or_regex = topic
211
+ $log.info "Subscribe to topic #{topic}"
212
+ end
213
+ consumer.subscribe(topic_or_regex, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
202
214
  }
203
215
  consumer
204
216
  end
@@ -243,9 +255,9 @@ class Fluent::KafkaGroupInput < Fluent::Input
243
255
  record_time = Fluent::Engine.now
244
256
  when :record
245
257
  if @time_format
246
- record_time = @time_parser.parse(record['time'].to_s)
258
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
247
259
  else
248
- record_time = record['time']
260
+ record_time = record[@record_time_key]
249
261
  end
250
262
  else
251
263
  log.fatal "BUG: invalid time_source: #{@time_source}"
@@ -253,6 +265,11 @@ class Fluent::KafkaGroupInput < Fluent::Input
253
265
  if @kafka_message_key
254
266
  record[@kafka_message_key] = msg.key
255
267
  end
268
+ if @add_headers
269
+ msg.headers.each_pair { |k, v|
270
+ record[k] = v
271
+ }
272
+ end
256
273
  es.add(record_time, record)
257
274
  rescue => e
258
275
  log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
@@ -69,12 +69,13 @@ module Kafka
69
69
  retry_backoff: retry_backoff,
70
70
  max_buffer_size: max_buffer_size,
71
71
  max_buffer_bytesize: max_buffer_bytesize,
72
+ partitioner: @partitioner,
72
73
  )
73
74
  end
74
75
  end
75
76
 
76
77
  class TopicProducer
77
- def initialize(topic, cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
78
+ def initialize(topic, cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:, partitioner:)
78
79
  @cluster = cluster
79
80
  @transaction_manager = transaction_manager
80
81
  @logger = logger
@@ -86,6 +87,7 @@ module Kafka
86
87
  @max_buffer_size = max_buffer_size
87
88
  @max_buffer_bytesize = max_buffer_bytesize
88
89
  @compressor = compressor
90
+ @partitioner = partitioner
89
91
 
90
92
  @topic = topic
91
93
  @cluster.add_target_topics(Set.new([topic]))
@@ -250,7 +252,7 @@ module Kafka
250
252
 
251
253
  begin
252
254
  if partition.nil?
253
- partition = Partitioner.partition_for_key(partition_count, message)
255
+ partition = @partitioner.call(partition_count, message)
254
256
  end
255
257
 
256
258
  @buffer.write(
@@ -15,6 +15,7 @@ module Fluent::Plugin
15
15
  Set brokers directly:
16
16
  <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
17
17
  DESC
18
+ config_param :topic, :string, :default => nil, :desc => "kafka topic. Placeholders are supported"
18
19
  config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
19
20
  config_param :default_topic, :string, :default => nil,
20
21
  :desc => "Default output topic when record doesn't have topic field"
@@ -68,6 +69,7 @@ The codec the producer uses to compress messages.
68
69
  Supported codecs depends on ruby-kafka: https://github.com/zendesk/ruby-kafka#compression
69
70
  DESC
70
71
  config_param :max_send_limit_bytes, :size, :default => nil
72
+ config_param :discard_kafka_delivery_failed, :bool, :default => false
71
73
  config_param :active_support_notification_regex, :string, :default => nil,
72
74
  :desc => <<-DESC
73
75
  Add a regular expression to capture ActiveSupport notifications from the Kafka client
@@ -215,7 +217,11 @@ DESC
215
217
  # TODO: optimize write performance
216
218
  def write(chunk)
217
219
  tag = chunk.metadata.tag
218
- topic = (chunk.metadata.variables && chunk.metadata.variables[@topic_key_sym]) || @default_topic || tag
220
+ topic = if @topic
221
+ extract_placeholders(@topic, chunk)
222
+ else
223
+ (chunk.metadata.variables && chunk.metadata.variables[@topic_key_sym]) || @default_topic || tag
224
+ end
219
225
 
220
226
  messages = 0
221
227
  record_buf = nil
@@ -262,7 +268,16 @@ DESC
262
268
 
263
269
  if messages > 0
264
270
  log.debug { "#{messages} messages send." }
265
- producer.deliver_messages
271
+ if @discard_kafka_delivery_failed
272
+ begin
273
+ producer.deliver_messages
274
+ rescue Kafka::DeliveryFailed => e
275
+ log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
276
+ producer.clear_buffer
277
+ end
278
+ else
279
+ producer.deliver_messages
280
+ end
266
281
  end
267
282
  rescue Kafka::UnknownTopicOrPartition
268
283
  if @use_default_for_unknown_topic && topic != @default_topic
@@ -33,6 +33,7 @@ Set brokers directly:
33
33
  <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
34
34
  Brokers: you can choose to use either brokers or zookeeper.
35
35
  DESC
36
+ config_param :topic, :string, :default => nil, :desc => "kafka topic. Placeholders are supported"
36
37
  config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
37
38
  config_param :default_topic, :string, :default => nil,
38
39
  :desc => "Default output topic when record doesn't have topic field"
@@ -72,6 +73,7 @@ The codec the producer uses to compress messages. Used for compression.codec
72
73
  Supported codecs: (gzip|snappy)
73
74
  DESC
74
75
  config_param :max_send_limit_bytes, :size, :default => nil
76
+ config_param :discard_kafka_delivery_failed, :bool, :default => false
75
77
  config_param :rdkafka_buffering_max_ms, :integer, :default => nil, :desc => 'Used for queue.buffering.max.ms'
76
78
  config_param :rdkafka_buffering_max_messages, :integer, :default => nil, :desc => 'Used for queue.buffering.max.messages'
77
79
  config_param :rdkafka_message_max_bytes, :integer, :default => nil, :desc => 'Used for message.max.bytes'
@@ -278,7 +280,11 @@ DESC
278
280
 
279
281
  def write(chunk)
280
282
  tag = chunk.metadata.tag
281
- topic = (chunk.metadata.variables && chunk.metadata.variables[@topic_key_sym]) || @default_topic || tag
283
+ topic = if @topic
284
+ extract_placeholders(@topic, chunk)
285
+ else
286
+ (chunk.metadata.variables && chunk.metadata.variables[@topic_key_sym]) || @default_topic || tag
287
+ end
282
288
 
283
289
  handlers = []
284
290
  record_buf = nil
@@ -320,9 +326,13 @@ DESC
320
326
  }
321
327
  end
322
328
  rescue Exception => e
323
- log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
324
- # Raise exception to retry sendind messages
325
- raise e
329
+ if @discard_kafka_delivery_failed
330
+ log.warn "Delivery failed. Discard events:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
331
+ else
332
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
333
+ # Raise exception to retry sendind messages
334
+ raise e
335
+ end
326
336
  end
327
337
 
328
338
  def enqueue_with_retry(producer, topic, record_buf, message_key, partition, headers)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.4
4
+ version: 0.14.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-03-03 00:00:00.000000000 Z
12
+ date: 2020-08-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -51,20 +51,20 @@ dependencies:
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: 0.7.8
54
+ version: 1.2.0
55
55
  - - "<"
56
56
  - !ruby/object:Gem::Version
57
- version: 0.8.0
57
+ version: '2'
58
58
  type: :runtime
59
59
  prerelease: false
60
60
  version_requirements: !ruby/object:Gem::Requirement
61
61
  requirements:
62
62
  - - ">="
63
63
  - !ruby/object:Gem::Version
64
- version: 0.7.8
64
+ version: 1.2.0
65
65
  - - "<"
66
66
  - !ruby/object:Gem::Version
67
- version: 0.8.0
67
+ version: '2'
68
68
  - !ruby/object:Gem::Dependency
69
69
  name: rake
70
70
  requirement: !ruby/object:Gem::Requirement