fluent-plugin-kafka 0.8.4 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 89401c7dab61eaf4ca3275ab87f09cab16a628e5
4
- data.tar.gz: c21453ce9d5aa335db22c4d24a49216e67923313
3
+ metadata.gz: 4ba39ffa7fe26f17e105ea227d72be24796dd751
4
+ data.tar.gz: b56e6e521aeeffadb78c48ce041de214a6c4d673
5
5
  SHA512:
6
- metadata.gz: 7ec04e4653c1d0a4a28473a3c477a44a87c6c87ee9e544943e14adbb2eae342c1635339ad00799f55f05bb4319f25f49c16ed85406aa843936b0a6ca38462916
7
- data.tar.gz: b705e3367f9d70933f07226a270aaf66cf7aa510c299a18f702620d5fabd059f18adc7ac1f7cda6aaa546b2e62789d89795a95b6024dab16512b218aa726dfa5
6
+ metadata.gz: 3489e648c43e82d9c2c67eaa933801014d6553c2df13cea6839a42b707d5d154765532f19a8620dcf1a95479513165d136874feb6a4729f53dda0aa04f389f31
7
+ data.tar.gz: 7e2b3d89558211394dfe6d2e32a865e5ed469e459f9756434b7fdbca59bd9ac31f520e3d52d17aa37cdd1c21a9fefceb34a5d632198a448479a5957ea6f52962
data/ChangeLog CHANGED
@@ -1,3 +1,8 @@
1
+ Release 0.9.0 - 2019/02/22
2
+
3
+ * Add v1 API based rdkafka2 output plugin
4
+ * out_kafka2: Add use_default_for_unknown_topic parameter
5
+
1
6
  Release 0.8.4 - 2019/01/18
2
7
 
3
8
  * in_kafka_group: Support integer/float time field with time_format
data/README.md CHANGED
@@ -249,6 +249,7 @@ This plugin is for fluentd v1.0 or later. This will be `out_kafka` plugin in the
249
249
  exclude_topic_key (bool) :default => false
250
250
  exclude_partition_key (bool) :default => false
251
251
  get_kafka_client_log (bool) :default => false
252
+ use_default_for_unknown_topic (bool) :default => false
252
253
 
253
254
  <format>
254
255
  @type (json|ltsv|msgpack|attr:<record name>|<formatter name>) :default => json
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
14
  gem.name = "fluent-plugin-kafka"
15
15
  gem.require_paths = ["lib"]
16
- gem.version = '0.8.4'
16
+ gem.version = '0.9.0'
17
17
  gem.required_ruby_version = ">= 2.1.0"
18
18
 
19
19
  gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
@@ -24,6 +24,7 @@ DESC
24
24
  config_param :default_partition_key, :string, :default => nil
25
25
  config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
26
26
  config_param :default_partition, :integer, :default => nil
27
+ config_param :use_default_for_unknown_topic, :bool, :default => false, :desc => "If true, default_topic is used when topic not found"
27
28
  config_param :client_id, :string, :default => 'fluentd'
28
29
  config_param :idempotent, :bool, :default => false, :desc => 'Enable idempotent producer'
29
30
  config_param :sasl_over_ssl, :bool, :default => true,
@@ -126,6 +127,9 @@ DESC
126
127
  @formatter_proc = setup_formatter(formatter_conf)
127
128
 
128
129
  if @default_topic.nil?
130
+ if @use_default_for_unknown_topic
131
+ raise Fluent::ConfigError, "default_topic must be set when use_default_for_unknown_topic is true"
132
+ end
129
133
  if @chunk_keys.include?('topic') && !@chunk_key_tag
130
134
  log.warn "Use 'topic' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer topic,tag>"
131
135
  end
@@ -195,12 +199,12 @@ DESC
195
199
  def write(chunk)
196
200
  tag = chunk.metadata.tag
197
201
  topic = chunk.metadata.variables[@topic_key_sym] || @default_topic || tag
198
- producer = @kafka.topic_producer(topic, @producer_opts)
199
202
 
200
203
  messages = 0
201
204
  record_buf = nil
202
205
 
203
206
  begin
207
+ producer = @kafka.topic_producer(topic, @producer_opts)
204
208
  chunk.msgpack_each { |time, record|
205
209
  begin
206
210
  record = inject_values_to_record(tag, time, record)
@@ -225,6 +229,14 @@ DESC
225
229
  log.debug { "#{messages} messages send." }
226
230
  producer.deliver_messages
227
231
  end
232
+ rescue Kafka::UnknownTopicOrPartition
233
+ if @use_default_for_unknown_topic && topic != @default_topic
234
+ producer.shutdown if producer
235
+ log.warn "'#{topic}' topic not found. Retry with '#{default_topic}' topic"
236
+ topic = @default_topic
237
+ retry
238
+ end
239
+ raise
228
240
  end
229
241
  rescue Exception => e
230
242
  ignore = @ignore_exceptions.include?(e.class.name)
@@ -0,0 +1,293 @@
1
+ require 'thread'
2
+ require 'fluent/plugin/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+
7
+ class Rdkafka::Producer
8
+ # return false if producer is forcefully closed, otherwise return true
9
+ def close(timeout = nil)
10
+ @closing = true
11
+ # Wait for the polling thread to finish up
12
+ # If the broker isn't alive, the thread doesn't exit
13
+ if timeout
14
+ thr = @polling_thread.join(timeout)
15
+ return !!thr
16
+ else
17
+ @polling_thread.join
18
+ return true
19
+ end
20
+ end
21
+ end
22
+
23
+ module Fluent::Plugin
24
+ class Fluent::Rdkafka2Output < Output
25
+ Fluent::Plugin.register_output('rdkafka2', self)
26
+
27
+ helpers :inject, :formatter
28
+
29
+ config_param :brokers, :string, :default => 'localhost:9092',
30
+ :desc => <<-DESC
31
+ Set brokers directly:
32
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
33
+ Brokers: you can choose to use either brokers or zookeeper.
34
+ DESC
35
+ config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
36
+ config_param :default_topic, :string, :default => nil,
37
+ :desc => "Default output topic when record doesn't have topic field"
38
+ config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
39
+ config_param :default_message_key, :string, :default => nil
40
+ config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
41
+ config_param :default_partition, :integer, :default => nil
42
+ config_param :client_id, :string, :default => 'kafka'
43
+ config_param :output_data_type, :string, :default => 'json', :obsoleted => "Use <format> section instead"
44
+ config_param :output_include_tag, :bool, :default => false, :obsoleted => "Use <inject> section instead"
45
+ config_param :output_include_time, :bool, :default => false, :obsoleted => "Use <inject> section instead"
46
+ config_param :exclude_partition, :bool, :default => false,
47
+ :desc => <<-DESC
48
+ Set true to remove partition from data
49
+ DESC
50
+ config_param :exclude_message_key, :bool, :default => false,
51
+ :desc => <<-DESC
52
+ Set true to remove partition key from data
53
+ DESC
54
+ config_param :exclude_topic_key, :bool, :default => false,
55
+ :desc => <<-DESC
56
+ Set true to remove topic name key from data
57
+ DESC
58
+ config_param :max_send_retries, :integer, :default => 2,
59
+ :desc => "Number of times to retry sending of messages to a leader."
60
+ config_param :required_acks, :integer, :default => -1,
61
+ :desc => "The number of acks required per request."
62
+ config_param :ack_timeout, :time, :default => nil,
63
+ :desc => "How long the producer waits for acks."
64
+ config_param :compression_codec, :string, :default => nil,
65
+ :desc => <<-DESC
66
+ The codec the producer uses to compress messages.
67
+ Supported codecs: (gzip|snappy)
68
+ DESC
69
+
70
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
71
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
72
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
73
+ config_param :rdkafka_message_max_num, :integer, :default => nil
74
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
75
+ config_param :rdkafka_options, :hash, :default => {}
76
+
77
+ config_param :max_enqueue_retries, :integer, :default => 3
78
+ config_param :enqueue_retry_backoff, :integer, :default => 3
79
+
80
+ config_param :service_name, :string, :default => nil
81
+ config_param :ssl_client_cert_key_password, :string, :default => nil
82
+
83
+ config_section :buffer do
84
+ config_set_default :chunk_keys, ["topic"]
85
+ end
86
+ config_section :format do
87
+ config_set_default :@type, 'json'
88
+ config_set_default :add_newline, false
89
+ end
90
+
91
+ include Fluent::KafkaPluginUtil::SSLSettings
92
+ include Fluent::KafkaPluginUtil::SaslSettings
93
+
94
+ def initialize
95
+ super
96
+ @producers = {}
97
+ @producers_mutex = Mutex.new
98
+ end
99
+
100
+ def configure(conf)
101
+ super
102
+ log.instance_eval {
103
+ def add(level, &block)
104
+ if block
105
+ self.info(block.call)
106
+ end
107
+ end
108
+ }
109
+ Rdkafka::Config.logger = log
110
+ config = build_config
111
+ @rdkafka = Rdkafka::Config.new(config)
112
+
113
+ if @default_topic.nil?
114
+ if @chunk_keys.include?(@topic_key) && !@chunk_key_tag
115
+ log.warn "Use '#{@topic_key}' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer #{@topic_key},tag>"
116
+ end
117
+ else
118
+ if @chunk_key_tag
119
+ log.warn "default_topic is set. Fluentd's event tag is not used for topic"
120
+ end
121
+ end
122
+
123
+ formatter_conf = conf.elements('format').first
124
+ unless formatter_conf
125
+ raise Fluent::ConfigError, "<format> section is required."
126
+ end
127
+ unless formatter_conf["@type"]
128
+ raise Fluent::ConfigError, "format/@type is required."
129
+ end
130
+ @formatter_proc = setup_formatter(formatter_conf)
131
+ @topic_key_sym = @topic_key.to_sym
132
+ end
133
+
134
+ def build_config
135
+ config = {:"bootstrap.servers" => @brokers}
136
+
137
+ if @ssl_ca_cert && @ssl_ca_cert[0]
138
+ ssl = true
139
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
140
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
141
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
142
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
143
+ end
144
+
145
+ if @principal
146
+ sasl = true
147
+ config[:"sasl.mechanisms"] = "GSSAPI"
148
+ config[:"sasl.kerberos.principal"] = @principal
149
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
150
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
151
+ end
152
+
153
+ if ssl && sasl
154
+ security_protocol = "SASL_SSL"
155
+ elsif ssl && !sasl
156
+ security_protocol = "SSL"
157
+ elsif !ssl && sasl
158
+ security_protocol = "SASL_PLAINTEXT"
159
+ else
160
+ security_protocol = "PLAINTEXT"
161
+ end
162
+ config[:"security.protocol"] = security_protocol
163
+
164
+ config[:"compression.codec"] = @compression_codec if @compression_codec
165
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
166
+ config[:"request.required.acks"] = @required_acks if @required_acks
167
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
168
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
169
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
170
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
171
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
172
+
173
+ @rdkafka_options.each { |k, v|
174
+ config[k.to_sym] = v
175
+ }
176
+
177
+ config
178
+ end
179
+
180
+ def start
181
+ super
182
+ end
183
+
184
+ def multi_workers_ready?
185
+ true
186
+ end
187
+
188
+ def shutdown
189
+ super
190
+ shutdown_producers
191
+ end
192
+
193
+ def shutdown_producers
194
+ @producers_mutex.synchronize {
195
+ shutdown_threads = @producers.map { |key, producer|
196
+ th = Thread.new {
197
+ unless producer.close(10)
198
+ log.warn("Queue is forcefully closed after 10 seconds wait")
199
+ end
200
+ }
201
+ th.abort_on_exception = true
202
+ th
203
+ }
204
+ shutdown_threads.each { |th| th.join }
205
+ @producers = {}
206
+ }
207
+ end
208
+
209
+ def get_producer
210
+ @producers_mutex.synchronize {
211
+ producer = @producers[Thread.current.object_id]
212
+ unless producer
213
+ producer = @rdkafka.producer
214
+ @producers[Thread.current.object_id] = producer
215
+ end
216
+ producer
217
+ }
218
+ end
219
+
220
+ def setup_formatter(conf)
221
+ type = conf['@type']
222
+ case type
223
+ when 'ltsv'
224
+ require 'ltsv'
225
+ Proc.new { |tag, time, record| LTSV.dump(record) }
226
+ else
227
+ @formatter = formatter_create(usage: 'rdkafka-plugin', conf: conf)
228
+ @formatter.method(:format)
229
+ end
230
+ end
231
+
232
+ def write(chunk)
233
+ tag = chunk.metadata.tag
234
+ topic = chunk.metadata.variables[@topic_key_sym] || @default_topic || tag
235
+
236
+ handlers = []
237
+ record_buf = nil
238
+ record_buf_bytes = nil
239
+
240
+ begin
241
+ producer = get_producer
242
+ chunk.msgpack_each { |time, record|
243
+ begin
244
+ record = inject_values_to_record(tag, time, record)
245
+ record.delete(@topic_key) if @exclude_topic_key
246
+ partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
247
+ message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
248
+
249
+ record_buf = @formatter_proc.call(tag, time, record)
250
+ record_buf_bytes = record_buf.bytesize
251
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
252
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
253
+ next
254
+ end
255
+ rescue StandardError => e
256
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
257
+ next
258
+ end
259
+
260
+ handlers << enqueue_with_retry(producer, topic, record_buf, message_key, partition)
261
+ }
262
+ handlers.each { |handler|
263
+ handler.wait(@rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
264
+ }
265
+ end
266
+ rescue Exception => e
267
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
268
+ # Raise exception to retry sendind messages
269
+ raise e
270
+ end
271
+
272
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
273
+ attempt = 0
274
+ loop do
275
+ begin
276
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
277
+ rescue Exception => e
278
+ if e.code == :queue_full
279
+ if attempt <= @max_enqueue_retries
280
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
281
+ sleep @enqueue_retry_backoff
282
+ attempt += 1
283
+ else
284
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
285
+ end
286
+ else
287
+ raise e
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.4
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-01-19 00:00:00.000000000 Z
12
+ date: 2019-02-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -117,6 +117,7 @@ files:
117
117
  - lib/fluent/plugin/out_kafka2.rb
118
118
  - lib/fluent/plugin/out_kafka_buffered.rb
119
119
  - lib/fluent/plugin/out_rdkafka.rb
120
+ - lib/fluent/plugin/out_rdkafka2.rb
120
121
  - test/helper.rb
121
122
  - test/plugin/test_out_kafka.rb
122
123
  homepage: https://github.com/fluent/fluent-plugin-kafka