fluent-plugin-kafka-custom-ruby-version 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,301 @@
1
+ require 'thread'
2
+ require 'fluent/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+ require 'fluent/plugin/kafka_producer_ext'
7
+
8
+ class Rdkafka::Producer
9
+ # return false if producer is forcefully closed, otherwise return true
10
+ def close(timeout = nil)
11
+ @closing = true
12
+ # Wait for the polling thread to finish up
13
+ # If the broker isn't alive, the thread doesn't exit
14
+ if timeout
15
+ thr = @polling_thread.join(timeout)
16
+ return !!thr
17
+ else
18
+ @polling_thread.join
19
+ return true
20
+ end
21
+ end
22
+ end
23
+
24
+ class Fluent::KafkaOutputBuffered2 < Fluent::BufferedOutput
25
+ Fluent::Plugin.register_output('rdkafka', self)
26
+
27
+ config_param :brokers, :string, :default => 'localhost:9092',
28
+ :desc => <<-DESC
29
+ Set brokers directly:
30
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
31
+ Brokers: you can choose to use either brokers or zookeeper.
32
+ DESC
33
+ config_param :default_topic, :string, :default => nil,
34
+ :desc => "Output topic"
35
+ config_param :default_message_key, :string, :default => nil
36
+ config_param :default_partition, :integer, :default => nil
37
+ config_param :client_id, :string, :default => 'kafka'
38
+ config_param :output_data_type, :string, :default => 'json',
39
+ :desc => <<-DESC
40
+ Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
41
+ DESC
42
+ config_param :output_include_tag, :bool, :default => false
43
+ config_param :output_include_time, :bool, :default => false
44
+ config_param :exclude_partition, :bool, :default => false,
45
+ :desc => <<-DESC
46
+ Set true to remove partition from data
47
+ DESC
48
+ config_param :exclude_message_key, :bool, :default => false,
49
+ :desc => <<-DESC
50
+ Set true to remove partition key from data
51
+ DESC
52
+ config_param :exclude_topic_key, :bool, :default => false,
53
+ :desc => <<-DESC
54
+ Set true to remove topic name key from data
55
+ DESC
56
+ config_param :max_send_retries, :integer, :default => 2,
57
+ :desc => "Number of times to retry sending of messages to a leader."
58
+ config_param :required_acks, :integer, :default => -1,
59
+ :desc => "The number of acks required per request."
60
+ config_param :ack_timeout, :time, :default => nil,
61
+ :desc => "How long the producer waits for acks."
62
+ config_param :compression_codec, :string, :default => nil,
63
+ :desc => <<-DESC
64
+ The codec the producer uses to compress messages.
65
+ Supported codecs: (gzip|snappy)
66
+ DESC
67
+
68
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
69
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
70
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
71
+ config_param :rdkafka_message_max_num, :integer, :default => nil
72
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
73
+ config_param :rdkafka_options, :hash, :default => {}
74
+
75
+ config_param :max_enqueue_retries, :integer, :default => 3
76
+ config_param :enqueue_retry_backoff, :integer, :default => 3
77
+
78
+ config_param :service_name, :string, :default => nil
79
+ config_param :ssl_client_cert_key_password, :string, :default => nil
80
+
81
+ include Fluent::KafkaPluginUtil::SSLSettings
82
+ include Fluent::KafkaPluginUtil::SaslSettings
83
+
84
+ def initialize
85
+ super
86
+ @producers = {}
87
+ @producers_mutex = Mutex.new
88
+ end
89
+
90
+ def configure(conf)
91
+ super
92
+ log.instance_eval {
93
+ def add(level, &block)
94
+ if block
95
+ self.info(block.call)
96
+ end
97
+ end
98
+ }
99
+ Rdkafka::Config.logger = log
100
+ config = build_config
101
+ @rdkafka = Rdkafka::Config.new(config)
102
+ @formatter_proc = setup_formatter(conf)
103
+ end
104
+
105
+ def build_config
106
+ config = {
107
+ :"bootstrap.servers" => @brokers,
108
+ }
109
+
110
+ if @ssl_ca_cert && @ssl_ca_cert[0]
111
+ ssl = true
112
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
113
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
114
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
115
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
116
+ end
117
+
118
+ if @principal
119
+ sasl = true
120
+ config[:"sasl.mechanisms"] = "GSSAPI"
121
+ config[:"sasl.kerberos.principal"] = @principal
122
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
123
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
124
+ end
125
+
126
+ if ssl && sasl
127
+ security_protocol = "SASL_SSL"
128
+ elsif ssl && !sasl
129
+ security_protocol = "SSL"
130
+ elsif !ssl && sasl
131
+ security_protocol = "SASL_PLAINTEXT"
132
+ else
133
+ security_protocol = "PLAINTEXT"
134
+ end
135
+ config[:"security.protocol"] = security_protocol
136
+
137
+ config[:"compression.codec"] = @compression_codec if @compression_codec
138
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
139
+ config[:"request.required.acks"] = @required_acks if @required_acks
140
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
141
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
142
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
143
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
144
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
145
+
146
+ @rdkafka_options.each { |k, v|
147
+ config[k.to_sym] = v
148
+ }
149
+
150
+ config
151
+ end
152
+
153
+ def start
154
+ super
155
+ end
156
+
157
+ def multi_workers_ready?
158
+ true
159
+ end
160
+
161
+ def shutdown
162
+ super
163
+ shutdown_producers
164
+ end
165
+
166
+ def shutdown_producers
167
+ @producers_mutex.synchronize {
168
+ shutdown_threads = @producers.map { |key, producer|
169
+ th = Thread.new {
170
+ unless producer.close(10)
171
+ log.warn("Queue is forcefully closed after 10 seconds wait")
172
+ end
173
+ }
174
+ th.abort_on_exception = true
175
+ th
176
+ }
177
+ shutdown_threads.each { |th| th.join }
178
+ @producers = {}
179
+ }
180
+ end
181
+
182
+ def get_producer
183
+ @producers_mutex.synchronize {
184
+ producer = @producers[Thread.current.object_id]
185
+ unless producer
186
+ producer = @rdkafka.producer
187
+ @producers[Thread.current.object_id] = producer
188
+ end
189
+ producer
190
+ }
191
+ end
192
+
193
+ def emit(tag, es, chain)
194
+ super(tag, es, chain, tag)
195
+ end
196
+
197
+ def format_stream(tag, es)
198
+ es.to_msgpack_stream
199
+ end
200
+
201
+ def setup_formatter(conf)
202
+ if @output_data_type == 'json'
203
+ begin
204
+ require 'oj'
205
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
206
+ Proc.new { |tag, time, record| Oj.dump(record) }
207
+ rescue LoadError
208
+ require 'yajl'
209
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
210
+ end
211
+ elsif @output_data_type == 'ltsv'
212
+ require 'ltsv'
213
+ Proc.new { |tag, time, record| LTSV.dump(record) }
214
+ elsif @output_data_type == 'msgpack'
215
+ require 'msgpack'
216
+ Proc.new { |tag, time, record| record.to_msgpack }
217
+ elsif @output_data_type =~ /^attr:(.*)$/
218
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
219
+ @custom_attributes.unshift('time') if @output_include_time
220
+ @custom_attributes.unshift('tag') if @output_include_tag
221
+ Proc.new { |tag, time, record|
222
+ @custom_attributes.map { |attr|
223
+ record[attr].nil? ? '' : record[attr].to_s
224
+ }.join(@f_separator)
225
+ }
226
+ else
227
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
228
+ @formatter.configure(conf)
229
+ @formatter.method(:format)
230
+ end
231
+ end
232
+
233
+ def write(chunk)
234
+ tag = chunk.key
235
+ def_topic = @default_topic || tag
236
+
237
+ record_buf = nil
238
+ record_buf_bytes = nil
239
+
240
+ begin
241
+ chunk.msgpack_each.map { |time, record|
242
+ begin
243
+ if @output_include_time
244
+ if @time_format
245
+ record['time'.freeze] = Time.at(time).strftime(@time_format)
246
+ else
247
+ record['time'.freeze] = time
248
+ end
249
+ end
250
+
251
+ record['tag'] = tag if @output_include_tag
252
+ topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
253
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
254
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
255
+
256
+ record_buf = @formatter_proc.call(tag, time, record)
257
+ record_buf_bytes = record_buf.bytesize
258
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
259
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
260
+ next
261
+ end
262
+ rescue StandardError => e
263
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
264
+ next
265
+ end
266
+
267
+ producer = get_producer
268
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
269
+ handler
270
+ }.each { |handler|
271
+ handler.wait(@rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
272
+ }
273
+ end
274
+ rescue Exception => e
275
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
276
+ # Raise exception to retry sendind messages
277
+ raise e
278
+ end
279
+
280
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
281
+ attempt = 0
282
+ loop do
283
+ begin
284
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
285
+ return handler
286
+ rescue Exception => e
287
+ if e.code == :queue_full
288
+ if attempt <= @max_enqueue_retries
289
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
290
+ sleep @enqueue_retry_backoff
291
+ attempt += 1
292
+ else
293
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
294
+ end
295
+ else
296
+ raise e
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
@@ -0,0 +1,293 @@
1
+ require 'thread'
2
+ require 'fluent/plugin/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+
7
+ class Rdkafka::Producer
8
+ # return false if producer is forcefully closed, otherwise return true
9
+ def close(timeout = nil)
10
+ @closing = true
11
+ # Wait for the polling thread to finish up
12
+ # If the broker isn't alive, the thread doesn't exit
13
+ if timeout
14
+ thr = @polling_thread.join(timeout)
15
+ return !!thr
16
+ else
17
+ @polling_thread.join
18
+ return true
19
+ end
20
+ end
21
+ end
22
+
23
+ module Fluent::Plugin
24
+ class Fluent::Rdkafka2Output < Output
25
+ Fluent::Plugin.register_output('rdkafka2', self)
26
+
27
+ helpers :inject, :formatter
28
+
29
+ config_param :brokers, :string, :default => 'localhost:9092',
30
+ :desc => <<-DESC
31
+ Set brokers directly:
32
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
33
+ Brokers: you can choose to use either brokers or zookeeper.
34
+ DESC
35
+ config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
36
+ config_param :default_topic, :string, :default => nil,
37
+ :desc => "Default output topic when record doesn't have topic field"
38
+ config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
39
+ config_param :default_message_key, :string, :default => nil
40
+ config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
41
+ config_param :default_partition, :integer, :default => nil
42
+ config_param :client_id, :string, :default => 'kafka'
43
+ config_param :output_data_type, :string, :default => 'json', :obsoleted => "Use <format> section instead"
44
+ config_param :output_include_tag, :bool, :default => false, :obsoleted => "Use <inject> section instead"
45
+ config_param :output_include_time, :bool, :default => false, :obsoleted => "Use <inject> section instead"
46
+ config_param :exclude_partition, :bool, :default => false,
47
+ :desc => <<-DESC
48
+ Set true to remove partition from data
49
+ DESC
50
+ config_param :exclude_message_key, :bool, :default => false,
51
+ :desc => <<-DESC
52
+ Set true to remove partition key from data
53
+ DESC
54
+ config_param :exclude_topic_key, :bool, :default => false,
55
+ :desc => <<-DESC
56
+ Set true to remove topic name key from data
57
+ DESC
58
+ config_param :max_send_retries, :integer, :default => 2,
59
+ :desc => "Number of times to retry sending of messages to a leader."
60
+ config_param :required_acks, :integer, :default => -1,
61
+ :desc => "The number of acks required per request."
62
+ config_param :ack_timeout, :time, :default => nil,
63
+ :desc => "How long the producer waits for acks."
64
+ config_param :compression_codec, :string, :default => nil,
65
+ :desc => <<-DESC
66
+ The codec the producer uses to compress messages.
67
+ Supported codecs: (gzip|snappy)
68
+ DESC
69
+
70
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
71
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
72
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
73
+ config_param :rdkafka_message_max_num, :integer, :default => nil
74
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
75
+ config_param :rdkafka_options, :hash, :default => {}
76
+
77
+ config_param :max_enqueue_retries, :integer, :default => 3
78
+ config_param :enqueue_retry_backoff, :integer, :default => 3
79
+
80
+ config_param :service_name, :string, :default => nil
81
+ config_param :ssl_client_cert_key_password, :string, :default => nil
82
+
83
+ config_section :buffer do
84
+ config_set_default :chunk_keys, ["topic"]
85
+ end
86
+ config_section :format do
87
+ config_set_default :@type, 'json'
88
+ config_set_default :add_newline, false
89
+ end
90
+
91
+ include Fluent::KafkaPluginUtil::SSLSettings
92
+ include Fluent::KafkaPluginUtil::SaslSettings
93
+
94
+ def initialize
95
+ super
96
+ @producers = {}
97
+ @producers_mutex = Mutex.new
98
+ end
99
+
100
+ def configure(conf)
101
+ super
102
+ log.instance_eval {
103
+ def add(level, &block)
104
+ if block
105
+ self.info(block.call)
106
+ end
107
+ end
108
+ }
109
+ Rdkafka::Config.logger = log
110
+ config = build_config
111
+ @rdkafka = Rdkafka::Config.new(config)
112
+
113
+ if @default_topic.nil?
114
+ if @chunk_keys.include?(@topic_key) && !@chunk_key_tag
115
+ log.warn "Use '#{@topic_key}' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer #{@topic_key},tag>"
116
+ end
117
+ else
118
+ if @chunk_key_tag
119
+ log.warn "default_topic is set. Fluentd's event tag is not used for topic"
120
+ end
121
+ end
122
+
123
+ formatter_conf = conf.elements('format').first
124
+ unless formatter_conf
125
+ raise Fluent::ConfigError, "<format> section is required."
126
+ end
127
+ unless formatter_conf["@type"]
128
+ raise Fluent::ConfigError, "format/@type is required."
129
+ end
130
+ @formatter_proc = setup_formatter(formatter_conf)
131
+ @topic_key_sym = @topic_key.to_sym
132
+ end
133
+
134
+ def build_config
135
+ config = {:"bootstrap.servers" => @brokers}
136
+
137
+ if @ssl_ca_cert && @ssl_ca_cert[0]
138
+ ssl = true
139
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
140
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
141
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
142
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
143
+ end
144
+
145
+ if @principal
146
+ sasl = true
147
+ config[:"sasl.mechanisms"] = "GSSAPI"
148
+ config[:"sasl.kerberos.principal"] = @principal
149
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
150
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
151
+ end
152
+
153
+ if ssl && sasl
154
+ security_protocol = "SASL_SSL"
155
+ elsif ssl && !sasl
156
+ security_protocol = "SSL"
157
+ elsif !ssl && sasl
158
+ security_protocol = "SASL_PLAINTEXT"
159
+ else
160
+ security_protocol = "PLAINTEXT"
161
+ end
162
+ config[:"security.protocol"] = security_protocol
163
+
164
+ config[:"compression.codec"] = @compression_codec if @compression_codec
165
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
166
+ config[:"request.required.acks"] = @required_acks if @required_acks
167
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
168
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
169
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
170
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
171
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
172
+
173
+ @rdkafka_options.each { |k, v|
174
+ config[k.to_sym] = v
175
+ }
176
+
177
+ config
178
+ end
179
+
180
+ def start
181
+ super
182
+ end
183
+
184
+ def multi_workers_ready?
185
+ true
186
+ end
187
+
188
+ def shutdown
189
+ super
190
+ shutdown_producers
191
+ end
192
+
193
+ def shutdown_producers
194
+ @producers_mutex.synchronize {
195
+ shutdown_threads = @producers.map { |key, producer|
196
+ th = Thread.new {
197
+ unless producer.close(10)
198
+ log.warn("Queue is forcefully closed after 10 seconds wait")
199
+ end
200
+ }
201
+ th.abort_on_exception = true
202
+ th
203
+ }
204
+ shutdown_threads.each { |th| th.join }
205
+ @producers = {}
206
+ }
207
+ end
208
+
209
+ def get_producer
210
+ @producers_mutex.synchronize {
211
+ producer = @producers[Thread.current.object_id]
212
+ unless producer
213
+ producer = @rdkafka.producer
214
+ @producers[Thread.current.object_id] = producer
215
+ end
216
+ producer
217
+ }
218
+ end
219
+
220
+ def setup_formatter(conf)
221
+ type = conf['@type']
222
+ case type
223
+ when 'ltsv'
224
+ require 'ltsv'
225
+ Proc.new { |tag, time, record| LTSV.dump(record) }
226
+ else
227
+ @formatter = formatter_create(usage: 'rdkafka-plugin', conf: conf)
228
+ @formatter.method(:format)
229
+ end
230
+ end
231
+
232
+ def write(chunk)
233
+ tag = chunk.metadata.tag
234
+ topic = chunk.metadata.variables[@topic_key_sym] || @default_topic || tag
235
+
236
+ handlers = []
237
+ record_buf = nil
238
+ record_buf_bytes = nil
239
+
240
+ begin
241
+ producer = get_producer
242
+ chunk.msgpack_each { |time, record|
243
+ begin
244
+ record = inject_values_to_record(tag, time, record)
245
+ record.delete(@topic_key) if @exclude_topic_key
246
+ partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
247
+ message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
248
+
249
+ record_buf = @formatter_proc.call(tag, time, record)
250
+ record_buf_bytes = record_buf.bytesize
251
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
252
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
253
+ next
254
+ end
255
+ rescue StandardError => e
256
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
257
+ next
258
+ end
259
+
260
+ handlers << enqueue_with_retry(producer, topic, record_buf, message_key, partition)
261
+ }
262
+ handlers.each { |handler|
263
+ handler.wait(@rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
264
+ }
265
+ end
266
+ rescue Exception => e
267
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
268
+ # Raise exception to retry sendind messages
269
+ raise e
270
+ end
271
+
272
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
273
+ attempt = 0
274
+ loop do
275
+ begin
276
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
277
+ rescue Exception => e
278
+ if e.code == :queue_full
279
+ if attempt <= @max_enqueue_retries
280
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
281
+ sleep @enqueue_retry_backoff
282
+ attempt += 1
283
+ else
284
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
285
+ end
286
+ else
287
+ raise e
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end