fluent-plugin-kafka 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 89401c7dab61eaf4ca3275ab87f09cab16a628e5
4
- data.tar.gz: c21453ce9d5aa335db22c4d24a49216e67923313
3
+ metadata.gz: 4ba39ffa7fe26f17e105ea227d72be24796dd751
4
+ data.tar.gz: b56e6e521aeeffadb78c48ce041de214a6c4d673
5
5
  SHA512:
6
- metadata.gz: 7ec04e4653c1d0a4a28473a3c477a44a87c6c87ee9e544943e14adbb2eae342c1635339ad00799f55f05bb4319f25f49c16ed85406aa843936b0a6ca38462916
7
- data.tar.gz: b705e3367f9d70933f07226a270aaf66cf7aa510c299a18f702620d5fabd059f18adc7ac1f7cda6aaa546b2e62789d89795a95b6024dab16512b218aa726dfa5
6
+ metadata.gz: 3489e648c43e82d9c2c67eaa933801014d6553c2df13cea6839a42b707d5d154765532f19a8620dcf1a95479513165d136874feb6a4729f53dda0aa04f389f31
7
+ data.tar.gz: 7e2b3d89558211394dfe6d2e32a865e5ed469e459f9756434b7fdbca59bd9ac31f520e3d52d17aa37cdd1c21a9fefceb34a5d632198a448479a5957ea6f52962
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.9.0 - 2019/02/22
2
+
3
+ * Add v1 API based rdkafka2 output plugin
4
+ * out_kafka2: Add use_default_for_unknown_topic parameter
5
+
1
6
  Release 0.8.4 - 2019/01/18
2
7
 
3
8
  * in_kafka_group: Support integer/float time field with time_format
data/README.md CHANGED
@@ -249,6 +249,7 @@ This plugin is for fluentd v1.0 or later. This will be `out_kafka` plugin in the
249
249
  exclude_topic_key (bool) :default => false
250
250
  exclude_partition_key (bool) :default => false
251
251
  get_kafka_client_log (bool) :default => false
252
+ use_default_for_unknown_topic (bool) :default => false
252
253
 
253
254
  <format>
254
255
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.8.4'
16
+ gem.version = '0.9.0'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -24,6 +24,7 @@ DESC
24
24
  config_param :default_partition_key, :string, :default => nil
25
25
  config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
26
26
  config_param :default_partition, :integer, :default => nil
27
+ config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
27
28
  config_param :client_id, :string, :default => 'fluentd'
28
29
  config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
29
30
  config_param :sasl_over_ssl, :bool, :default => true,
@@ -126,6 +127,9 @@ DESC
126
127
  @formatter_proc = setup_formatter(formatter_conf)
127
128
 
128
129
  if @default_topic.nil?
130
+ if @use_default_for_unknown_topic
131
+ raise Fluent::ConfigError, "default_topic must be set when use_default_for_unknown_topic is true"
132
+ end
129
133
  if @chunk_keys.include?('topic') && !@chunk_key_tag
130
134
  log.warn "Use 'topic' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer topic,tag>"
131
135
  end
@@ -195,12 +199,12 @@ DESC
195
199
  def write(chunk)
196
200
  tag = chunk.metadata.tag
197
201
  topic = chunk.metadata.variables[@topic_key_sym] || @default_topic || tag
198
- producer = @kafka.topic_producer(topic, @producer_opts)
199
202
 
200
203
  messages = 0
201
204
  record_buf = nil
202
205
 
203
206
  begin
207
+ producer = @kafka.topic_producer(topic, @producer_opts)
204
208
  chunk.msgpack_each { |time, record|
205
209
  begin
206
210
  record = inject_values_to_record(tag, time, record)
@@ -225,6 +229,14 @@ DESC
225
229
  log.debug { "#{messages} messages send." }
226
230
  producer.deliver_messages
227
231
  end
232
+ rescue Kafka::UnknownTopicOrPartition
233
+ if @use_default_for_unknown_topic && topic != @default_topic
234
+ producer.shutdown if producer
235
+ log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
236
+ topic = @default_topic
237
+ retry
238
+ end
239
+ raise
228
240
  end
229
241
  rescue Exception => e
230
242
  ignore = @ignore_exceptions.include?(e.class.name)
@@ -0,0 +1,293 @@
1
+ require 'thread'
2
+ require 'fluent/plugin/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+
7
+ class Rdkafka::Producer
8
+ # return false if producer is forcefully closed, otherwise return true
9
+ def close(timeout = nil)
10
+ @closing = true
11
+ # Wait for the polling thread to finish up
12
+ # If the broker isn't alive, the thread doesn't exit
13
+ if timeout
14
+ thr = @polling_thread.join(timeout)
15
+ return !!thr
16
+ else
17
+ @polling_thread.join
18
+ return true
19
+ end
20
+ end
21
+ end
22
+
23
+ module Fluent::Plugin
24
+ class Fluent::Rdkafka2Output < Output
25
+ Fluent::Plugin.register_output('rdkafka2', self)
26
+
27
+ helpers :inject, :formatter
28
+
29
+ config_param :brokers, :string, :default => 'localhost:9092',
30
+ :desc => <<-DESC
31
+ Set brokers directly:
32
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
33
+ Brokers: you can choose to use either brokers or zookeeper.
34
+ DESC
35
+ config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
36
+ config_param :default_topic, :string, :default => nil,
37
+ :desc => "Default output topic when record doesn't have topic field"
38
+ config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
39
+ config_param :default_message_key, :string, :default => nil
40
+ config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
41
+ config_param :default_partition, :integer, :default => nil
42
+ config_param :client_id, :string, :default => 'kafka'
43
+ config_param :output_data_type, :string, :default => 'json', :obsoleted => "Use <format> section instead"
44
+ config_param :output_include_tag, :bool, :default => false, :obsoleted => "Use <inject> section instead"
45
+ config_param :output_include_time, :bool, :default => false, :obsoleted => "Use <inject> section instead"
46
+ config_param :exclude_partition, :bool, :default => false,
47
+ :desc => <<-DESC
48
+ Set true to remove partition from data
49
+ DESC
50
+ config_param :exclude_message_key, :bool, :default => false,
51
+ :desc => <<-DESC
52
+ Set true to remove partition key from data
53
+ DESC
54
+ config_param :exclude_topic_key, :bool, :default => false,
55
+ :desc => <<-DESC
56
+ Set true to remove topic name key from data
57
+ DESC
58
+ config_param :max_send_retries, :integer, :default => 2,
59
+ :desc => "Number of times to retry sending of messages to a leader."
60
+ config_param :required_acks, :integer, :default => -1,
61
+ :desc => "The number of acks required per request."
62
+ config_param :ack_timeout, :time, :default => nil,
63
+ :desc => "How long the producer waits for acks."
64
+ config_param :compression_codec, :string, :default => nil,
65
+ :desc => <<-DESC
66
+ The codec the producer uses to compress messages.
67
+ Supported codecs: (gzip|snappy)
68
+ DESC
69
+
70
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
71
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
72
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
73
+ config_param :rdkafka_message_max_num, :integer, :default => nil
74
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
75
+ config_param :rdkafka_options, :hash, :default => {}
76
+
77
+ config_param :max_enqueue_retries, :integer, :default => 3
78
+ config_param :enqueue_retry_backoff, :integer, :default => 3
79
+
80
+ config_param :service_name, :string, :default => nil
81
+ config_param :ssl_client_cert_key_password, :string, :default => nil
82
+
83
+ config_section :buffer do
84
+ config_set_default :chunk_keys, ["topic"]
85
+ end
86
+ config_section :format do
87
+ config_set_default :@type, 'json'
88
+ config_set_default :add_newline, false
89
+ end
90
+
91
+ include Fluent::KafkaPluginUtil::SSLSettings
92
+ include Fluent::KafkaPluginUtil::SaslSettings
93
+
94
+ def initialize
95
+ super
96
+ @producers = {}
97
+ @producers_mutex = Mutex.new
98
+ end
99
+
100
+ def configure(conf)
101
+ super
102
+ log.instance_eval {
103
+ def add(level, &block)
104
+ if block
105
+ self.info(block.call)
106
+ end
107
+ end
108
+ }
109
+ Rdkafka::Config.logger = log
110
+ config = build_config
111
+ @rdkafka = Rdkafka::Config.new(config)
112
+
113
+ if @default_topic.nil?
114
+ if @chunk_keys.include?(@topic_key) && !@chunk_key_tag
115
+ log.warn "Use '#{@topic_key}' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer #{@topic_key},tag>"
116
+ end
117
+ else
118
+ if @chunk_key_tag
119
+ log.warn "default_topic is set. Fluentd's event tag is not used for topic"
120
+ end
121
+ end
122
+
123
+ formatter_conf = conf.elements('format').first
124
+ unless formatter_conf
125
+ raise Fluent::ConfigError, "<format> section is required."
126
+ end
127
+ unless formatter_conf["@type"]
128
+ raise Fluent::ConfigError, "format/@type is required."
129
+ end
130
+ @formatter_proc = setup_formatter(formatter_conf)
131
+ @topic_key_sym = @topic_key.to_sym
132
+ end
133
+
134
+ def build_config
135
+ config = {:"bootstrap.servers" => @brokers}
136
+
137
+ if @ssl_ca_cert && @ssl_ca_cert[0]
138
+ ssl = true
139
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
140
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
141
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
142
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
143
+ end
144
+
145
+ if @principal
146
+ sasl = true
147
+ config[:"sasl.mechanisms"] = "GSSAPI"
148
+ config[:"sasl.kerberos.principal"] = @principal
149
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
150
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
151
+ end
152
+
153
+ if ssl && sasl
154
+ security_protocol = "SASL_SSL"
155
+ elsif ssl && !sasl
156
+ security_protocol = "SSL"
157
+ elsif !ssl && sasl
158
+ security_protocol = "SASL_PLAINTEXT"
159
+ else
160
+ security_protocol = "PLAINTEXT"
161
+ end
162
+ config[:"security.protocol"] = security_protocol
163
+
164
+ config[:"compression.codec"] = @compression_codec if @compression_codec
165
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
166
+ config[:"request.required.acks"] = @required_acks if @required_acks
167
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
168
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
169
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
170
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
171
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
172
+
173
+ @rdkafka_options.each { |k, v|
174
+ config[k.to_sym] = v
175
+ }
176
+
177
+ config
178
+ end
179
+
180
+ def start
181
+ super
182
+ end
183
+
184
+ def multi_workers_ready?
185
+ true
186
+ end
187
+
188
+ def shutdown
189
+ super
190
+ shutdown_producers
191
+ end
192
+
193
+ def shutdown_producers
194
+ @producers_mutex.synchronize {
195
+ shutdown_threads = @producers.map { |key, producer|
196
+ th = Thread.new {
197
+ unless producer.close(10)
198
+ log.warn("Queue is forcefully closed after 10 seconds wait")
199
+ end
200
+ }
201
+ th.abort_on_exception = true
202
+ th
203
+ }
204
+ shutdown_threads.each { |th| th.join }
205
+ @producers = {}
206
+ }
207
+ end
208
+
209
+ def get_producer
210
+ @producers_mutex.synchronize {
211
+ producer = @producers[Thread.current.object_id]
212
+ unless producer
213
+ producer = @rdkafka.producer
214
+ @producers[Thread.current.object_id] = producer
215
+ end
216
+ producer
217
+ }
218
+ end
219
+
220
+ def setup_formatter(conf)
221
+ type = conf['@type']
222
+ case type
223
+ when 'ltsv'
224
+ require 'ltsv'
225
+ Proc.new { |tag, time, record| LTSV.dump(record) }
226
+ else
227
+ @formatter = formatter_create(usage: 'rdkafka-plugin', conf: conf)
228
+ @formatter.method(:format)
229
+ end
230
+ end
231
+
232
+ def write(chunk)
233
+ tag = chunk.metadata.tag
234
+ topic = chunk.metadata.variables[@topic_key_sym] || @default_topic || tag
235
+
236
+ handlers = []
237
+ record_buf = nil
238
+ record_buf_bytes = nil
239
+
240
+ begin
241
+ producer = get_producer
242
+ chunk.msgpack_each { |time, record|
243
+ begin
244
+ record = inject_values_to_record(tag, time, record)
245
+ record.delete(@topic_key) if @exclude_topic_key
246
+ partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
247
+ message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
248
+
249
+ record_buf = @formatter_proc.call(tag, time, record)
250
+ record_buf_bytes = record_buf.bytesize
251
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
252
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
253
+ next
254
+ end
255
+ rescue StandardError => e
256
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
257
+ next
258
+ end
259
+
260
+ handlers << enqueue_with_retry(producer, topic, record_buf, message_key, partition)
261
+ }
262
+ handlers.each { |handler|
263
+ handler.wait(@rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
264
+ }
265
+ end
266
+ rescue Exception => e
267
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
268
+ # Raise exception to retry sendind messages
269
+ raise e
270
+ end
271
+
272
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
273
+ attempt = 0
274
+ loop do
275
+ begin
276
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
277
+ rescue Exception => e
278
+ if e.code == :queue_full
279
+ if attempt <= @max_enqueue_retries
280
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
281
+ sleep @enqueue_retry_backoff
282
+ attempt += 1
283
+ else
284
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
285
+ end
286
+ else
287
+ raise e
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.4
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-01-19 00:00:00.000000000 Z
12
+ date: 2019-02-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -117,6 +117,7 @@ files:
117
117
  - lib/fluent/plugin/out_kafka2.rb
118
118
  - lib/fluent/plugin/out_kafka_buffered.rb
119
119
  - lib/fluent/plugin/out_rdkafka.rb
120
+ - lib/fluent/plugin/out_rdkafka2.rb
120
121
  - test/helper.rb
121
122
  - test/plugin/test_out_kafka.rb
122
123
  homepage: https://github.com/fluent/fluent-plugin-kafka