fluent-plugin-kafka-custom-ruby-version 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,301 @@
1
+ require 'thread'
2
+ require 'fluent/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+ require 'fluent/plugin/kafka_producer_ext'
7
+
8
+ class Rdkafka::Producer
9
+ # return false if producer is forcefully closed, otherwise return true
10
+ def close(timeout = nil)
11
+ @closing = true
12
+ # Wait for the polling thread to finish up
13
+ # If the broker isn't alive, the thread doesn't exit
14
+ if timeout
15
+ thr = @polling_thread.join(timeout)
16
+ return !!thr
17
+ else
18
+ @polling_thread.join
19
+ return true
20
+ end
21
+ end
22
+ end
23
+
24
+ class Fluent::KafkaOutputBuffered2 < Fluent::BufferedOutput
25
+ Fluent::Plugin.register_output('rdkafka', self)
26
+
27
+ config_param :brokers, :string, :default => 'localhost:9092',
28
+ :desc => <<-DESC
29
+ Set brokers directly:
30
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
31
+ Brokers: you can choose to use either brokers or zookeeper.
32
+ DESC
33
+ config_param :default_topic, :string, :default => nil,
34
+ :desc => "Output topic"
35
+ config_param :default_message_key, :string, :default => nil
36
+ config_param :default_partition, :integer, :default => nil
37
+ config_param :client_id, :string, :default => 'kafka'
38
+ config_param :output_data_type, :string, :default => 'json',
39
+ :desc => <<-DESC
40
+ Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
41
+ DESC
42
+ config_param :output_include_tag, :bool, :default => false
43
+ config_param :output_include_time, :bool, :default => false
44
+ config_param :exclude_partition, :bool, :default => false,
45
+ :desc => <<-DESC
46
+ Set true to remove partition from data
47
+ DESC
48
+ config_param :exclude_message_key, :bool, :default => false,
49
+ :desc => <<-DESC
50
+ Set true to remove partition key from data
51
+ DESC
52
+ config_param :exclude_topic_key, :bool, :default => false,
53
+ :desc => <<-DESC
54
+ Set true to remove topic name key from data
55
+ DESC
56
+ config_param :max_send_retries, :integer, :default => 2,
57
+ :desc => "Number of times to retry sending of messages to a leader."
58
+ config_param :required_acks, :integer, :default => -1,
59
+ :desc => "The number of acks required per request."
60
+ config_param :ack_timeout, :time, :default => nil,
61
+ :desc => "How long the producer waits for acks."
62
+ config_param :compression_codec, :string, :default => nil,
63
+ :desc => <<-DESC
64
+ The codec the producer uses to compress messages.
65
+ Supported codecs: (gzip|snappy)
66
+ DESC
67
+
68
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
69
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
70
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
71
+ config_param :rdkafka_message_max_num, :integer, :default => nil
72
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
73
+ config_param :rdkafka_options, :hash, :default => {}
74
+
75
+ config_param :max_enqueue_retries, :integer, :default => 3
76
+ config_param :enqueue_retry_backoff, :integer, :default => 3
77
+
78
+ config_param :service_name, :string, :default => nil
79
+ config_param :ssl_client_cert_key_password, :string, :default => nil
80
+
81
+ include Fluent::KafkaPluginUtil::SSLSettings
82
+ include Fluent::KafkaPluginUtil::SaslSettings
83
+
84
+ def initialize
85
+ super
86
+ @producers = {}
87
+ @producers_mutex = Mutex.new
88
+ end
89
+
90
+ def configure(conf)
91
+ super
92
+ log.instance_eval {
93
+ def add(level, &block)
94
+ if block
95
+ self.info(block.call)
96
+ end
97
+ end
98
+ }
99
+ Rdkafka::Config.logger = log
100
+ config = build_config
101
+ @rdkafka = Rdkafka::Config.new(config)
102
+ @formatter_proc = setup_formatter(conf)
103
+ end
104
+
105
+ def build_config
106
+ config = {
107
+ :"bootstrap.servers" => @brokers,
108
+ }
109
+
110
+ if @ssl_ca_cert && @ssl_ca_cert[0]
111
+ ssl = true
112
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
113
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
114
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
115
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
116
+ end
117
+
118
+ if @principal
119
+ sasl = true
120
+ config[:"sasl.mechanisms"] = "GSSAPI"
121
+ config[:"sasl.kerberos.principal"] = @principal
122
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
123
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
124
+ end
125
+
126
+ if ssl && sasl
127
+ security_protocol = "SASL_SSL"
128
+ elsif ssl && !sasl
129
+ security_protocol = "SSL"
130
+ elsif !ssl && sasl
131
+ security_protocol = "SASL_PLAINTEXT"
132
+ else
133
+ security_protocol = "PLAINTEXT"
134
+ end
135
+ config[:"security.protocol"] = security_protocol
136
+
137
+ config[:"compression.codec"] = @compression_codec if @compression_codec
138
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
139
+ config[:"request.required.acks"] = @required_acks if @required_acks
140
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
141
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
142
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
143
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
144
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
145
+
146
+ @rdkafka_options.each { |k, v|
147
+ config[k.to_sym] = v
148
+ }
149
+
150
+ config
151
+ end
152
+
153
+ def start
154
+ super
155
+ end
156
+
157
+ def multi_workers_ready?
158
+ true
159
+ end
160
+
161
+ def shutdown
162
+ super
163
+ shutdown_producers
164
+ end
165
+
166
+ def shutdown_producers
167
+ @producers_mutex.synchronize {
168
+ shutdown_threads = @producers.map { |key, producer|
169
+ th = Thread.new {
170
+ unless producer.close(10)
171
+ log.warn("Queue is forcefully closed after 10 seconds wait")
172
+ end
173
+ }
174
+ th.abort_on_exception = true
175
+ th
176
+ }
177
+ shutdown_threads.each { |th| th.join }
178
+ @producers = {}
179
+ }
180
+ end
181
+
182
+ def get_producer
183
+ @producers_mutex.synchronize {
184
+ producer = @producers[Thread.current.object_id]
185
+ unless producer
186
+ producer = @rdkafka.producer
187
+ @producers[Thread.current.object_id] = producer
188
+ end
189
+ producer
190
+ }
191
+ end
192
+
193
+ def emit(tag, es, chain)
194
+ super(tag, es, chain, tag)
195
+ end
196
+
197
+ def format_stream(tag, es)
198
+ es.to_msgpack_stream
199
+ end
200
+
201
+ def setup_formatter(conf)
202
+ if @output_data_type == 'json'
203
+ begin
204
+ require 'oj'
205
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
206
+ Proc.new { |tag, time, record| Oj.dump(record) }
207
+ rescue LoadError
208
+ require 'yajl'
209
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
210
+ end
211
+ elsif @output_data_type == 'ltsv'
212
+ require 'ltsv'
213
+ Proc.new { |tag, time, record| LTSV.dump(record) }
214
+ elsif @output_data_type == 'msgpack'
215
+ require 'msgpack'
216
+ Proc.new { |tag, time, record| record.to_msgpack }
217
+ elsif @output_data_type =~ /^attr:(.*)$/
218
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
219
+ @custom_attributes.unshift('time') if @output_include_time
220
+ @custom_attributes.unshift('tag') if @output_include_tag
221
+ Proc.new { |tag, time, record|
222
+ @custom_attributes.map { |attr|
223
+ record[attr].nil? ? '' : record[attr].to_s
224
+ }.join(@f_separator)
225
+ }
226
+ else
227
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
228
+ @formatter.configure(conf)
229
+ @formatter.method(:format)
230
+ end
231
+ end
232
+
233
+ def write(chunk)
234
+ tag = chunk.key
235
+ def_topic = @default_topic || tag
236
+
237
+ record_buf = nil
238
+ record_buf_bytes = nil
239
+
240
+ begin
241
+ chunk.msgpack_each.map { |time, record|
242
+ begin
243
+ if @output_include_time
244
+ if @time_format
245
+ record['time'.freeze] = Time.at(time).strftime(@time_format)
246
+ else
247
+ record['time'.freeze] = time
248
+ end
249
+ end
250
+
251
+ record['tag'] = tag if @output_include_tag
252
+ topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
253
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
254
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
255
+
256
+ record_buf = @formatter_proc.call(tag, time, record)
257
+ record_buf_bytes = record_buf.bytesize
258
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
259
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
260
+ next
261
+ end
262
+ rescue StandardError => e
263
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
264
+ next
265
+ end
266
+
267
+ producer = get_producer
268
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
269
+ handler
270
+ }.each { |handler|
271
+ handler.wait(@rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
272
+ }
273
+ end
274
+ rescue Exception => e
275
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
276
+ # Raise exception to retry sendind messages
277
+ raise e
278
+ end
279
+
280
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
281
+ attempt = 0
282
+ loop do
283
+ begin
284
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
285
+ return handler
286
+ rescue Exception => e
287
+ if e.code == :queue_full
288
+ if attempt <= @max_enqueue_retries
289
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
290
+ sleep @enqueue_retry_backoff
291
+ attempt += 1
292
+ else
293
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
294
+ end
295
+ else
296
+ raise e
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
@@ -0,0 +1,293 @@
1
+ require 'thread'
2
+ require 'fluent/plugin/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+
7
+ class Rdkafka::Producer
8
+ # return false if producer is forcefully closed, otherwise return true
9
+ def close(timeout = nil)
10
+ @closing = true
11
+ # Wait for the polling thread to finish up
12
+ # If the broker isn't alive, the thread doesn't exit
13
+ if timeout
14
+ thr = @polling_thread.join(timeout)
15
+ return !!thr
16
+ else
17
+ @polling_thread.join
18
+ return true
19
+ end
20
+ end
21
+ end
22
+
23
+ module Fluent::Plugin
24
+ class Fluent::Rdkafka2Output < Output
25
+ Fluent::Plugin.register_output('rdkafka2', self)
26
+
27
+ helpers :inject, :formatter
28
+
29
+ config_param :brokers, :string, :default => 'localhost:9092',
30
+ :desc => <<-DESC
31
+ Set brokers directly:
32
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
33
+ Brokers: you can choose to use either brokers or zookeeper.
34
+ DESC
35
+ config_param :topic_key, :string, :default => 'topic', :desc => "Field for kafka topic"
36
+ config_param :default_topic, :string, :default => nil,
37
+ :desc => "Default output topic when record doesn't have topic field"
38
+ config_param :message_key_key, :string, :default => 'message_key', :desc => "Field for kafka message key"
39
+ config_param :default_message_key, :string, :default => nil
40
+ config_param :partition_key, :string, :default => 'partition', :desc => "Field for kafka partition"
41
+ config_param :default_partition, :integer, :default => nil
42
+ config_param :client_id, :string, :default => 'kafka'
43
+ config_param :output_data_type, :string, :default => 'json', :obsoleted => "Use <format> section instead"
44
+ config_param :output_include_tag, :bool, :default => false, :obsoleted => "Use <inject> section instead"
45
+ config_param :output_include_time, :bool, :default => false, :obsoleted => "Use <inject> section instead"
46
+ config_param :exclude_partition, :bool, :default => false,
47
+ :desc => <<-DESC
48
+ Set true to remove partition from data
49
+ DESC
50
+ config_param :exclude_message_key, :bool, :default => false,
51
+ :desc => <<-DESC
52
+ Set true to remove partition key from data
53
+ DESC
54
+ config_param :exclude_topic_key, :bool, :default => false,
55
+ :desc => <<-DESC
56
+ Set true to remove topic name key from data
57
+ DESC
58
+ config_param :max_send_retries, :integer, :default => 2,
59
+ :desc => "Number of times to retry sending of messages to a leader."
60
+ config_param :required_acks, :integer, :default => -1,
61
+ :desc => "The number of acks required per request."
62
+ config_param :ack_timeout, :time, :default => nil,
63
+ :desc => "How long the producer waits for acks."
64
+ config_param :compression_codec, :string, :default => nil,
65
+ :desc => <<-DESC
66
+ The codec the producer uses to compress messages.
67
+ Supported codecs: (gzip|snappy)
68
+ DESC
69
+
70
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
71
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
72
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
73
+ config_param :rdkafka_message_max_num, :integer, :default => nil
74
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
75
+ config_param :rdkafka_options, :hash, :default => {}
76
+
77
+ config_param :max_enqueue_retries, :integer, :default => 3
78
+ config_param :enqueue_retry_backoff, :integer, :default => 3
79
+
80
+ config_param :service_name, :string, :default => nil
81
+ config_param :ssl_client_cert_key_password, :string, :default => nil
82
+
83
+ config_section :buffer do
84
+ config_set_default :chunk_keys, ["topic"]
85
+ end
86
+ config_section :format do
87
+ config_set_default :@type, 'json'
88
+ config_set_default :add_newline, false
89
+ end
90
+
91
+ include Fluent::KafkaPluginUtil::SSLSettings
92
+ include Fluent::KafkaPluginUtil::SaslSettings
93
+
94
+ def initialize
95
+ super
96
+ @producers = {}
97
+ @producers_mutex = Mutex.new
98
+ end
99
+
100
+ def configure(conf)
101
+ super
102
+ log.instance_eval {
103
+ def add(level, &block)
104
+ if block
105
+ self.info(block.call)
106
+ end
107
+ end
108
+ }
109
+ Rdkafka::Config.logger = log
110
+ config = build_config
111
+ @rdkafka = Rdkafka::Config.new(config)
112
+
113
+ if @default_topic.nil?
114
+ if @chunk_keys.include?(@topic_key) && !@chunk_key_tag
115
+ log.warn "Use '#{@topic_key}' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer #{@topic_key},tag>"
116
+ end
117
+ else
118
+ if @chunk_key_tag
119
+ log.warn "default_topic is set. Fluentd's event tag is not used for topic"
120
+ end
121
+ end
122
+
123
+ formatter_conf = conf.elements('format').first
124
+ unless formatter_conf
125
+ raise Fluent::ConfigError, "<format> section is required."
126
+ end
127
+ unless formatter_conf["@type"]
128
+ raise Fluent::ConfigError, "format/@type is required."
129
+ end
130
+ @formatter_proc = setup_formatter(formatter_conf)
131
+ @topic_key_sym = @topic_key.to_sym
132
+ end
133
+
134
+ def build_config
135
+ config = {:"bootstrap.servers" => @brokers}
136
+
137
+ if @ssl_ca_cert && @ssl_ca_cert[0]
138
+ ssl = true
139
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
140
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
141
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
142
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
143
+ end
144
+
145
+ if @principal
146
+ sasl = true
147
+ config[:"sasl.mechanisms"] = "GSSAPI"
148
+ config[:"sasl.kerberos.principal"] = @principal
149
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
150
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
151
+ end
152
+
153
+ if ssl && sasl
154
+ security_protocol = "SASL_SSL"
155
+ elsif ssl && !sasl
156
+ security_protocol = "SSL"
157
+ elsif !ssl && sasl
158
+ security_protocol = "SASL_PLAINTEXT"
159
+ else
160
+ security_protocol = "PLAINTEXT"
161
+ end
162
+ config[:"security.protocol"] = security_protocol
163
+
164
+ config[:"compression.codec"] = @compression_codec if @compression_codec
165
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
166
+ config[:"request.required.acks"] = @required_acks if @required_acks
167
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
168
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
169
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
170
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
171
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
172
+
173
+ @rdkafka_options.each { |k, v|
174
+ config[k.to_sym] = v
175
+ }
176
+
177
+ config
178
+ end
179
+
180
+ def start
181
+ super
182
+ end
183
+
184
+ def multi_workers_ready?
185
+ true
186
+ end
187
+
188
+ def shutdown
189
+ super
190
+ shutdown_producers
191
+ end
192
+
193
+ def shutdown_producers
194
+ @producers_mutex.synchronize {
195
+ shutdown_threads = @producers.map { |key, producer|
196
+ th = Thread.new {
197
+ unless producer.close(10)
198
+ log.warn("Queue is forcefully closed after 10 seconds wait")
199
+ end
200
+ }
201
+ th.abort_on_exception = true
202
+ th
203
+ }
204
+ shutdown_threads.each { |th| th.join }
205
+ @producers = {}
206
+ }
207
+ end
208
+
209
+ def get_producer
210
+ @producers_mutex.synchronize {
211
+ producer = @producers[Thread.current.object_id]
212
+ unless producer
213
+ producer = @rdkafka.producer
214
+ @producers[Thread.current.object_id] = producer
215
+ end
216
+ producer
217
+ }
218
+ end
219
+
220
+ def setup_formatter(conf)
221
+ type = conf['@type']
222
+ case type
223
+ when 'ltsv'
224
+ require 'ltsv'
225
+ Proc.new { |tag, time, record| LTSV.dump(record) }
226
+ else
227
+ @formatter = formatter_create(usage: 'rdkafka-plugin', conf: conf)
228
+ @formatter.method(:format)
229
+ end
230
+ end
231
+
232
+ def write(chunk)
233
+ tag = chunk.metadata.tag
234
+ topic = chunk.metadata.variables[@topic_key_sym] || @default_topic || tag
235
+
236
+ handlers = []
237
+ record_buf = nil
238
+ record_buf_bytes = nil
239
+
240
+ begin
241
+ producer = get_producer
242
+ chunk.msgpack_each { |time, record|
243
+ begin
244
+ record = inject_values_to_record(tag, time, record)
245
+ record.delete(@topic_key) if @exclude_topic_key
246
+ partition = (@exclude_partition ? record.delete(@partition_key) : record[@partition_key]) || @default_partition
247
+ message_key = (@exclude_message_key ? record.delete(@message_key_key) : record[@message_key_key]) || @default_message_key
248
+
249
+ record_buf = @formatter_proc.call(tag, time, record)
250
+ record_buf_bytes = record_buf.bytesize
251
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
252
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
253
+ next
254
+ end
255
+ rescue StandardError => e
256
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
257
+ next
258
+ end
259
+
260
+ handlers << enqueue_with_retry(producer, topic, record_buf, message_key, partition)
261
+ }
262
+ handlers.each { |handler|
263
+ handler.wait(@rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
264
+ }
265
+ end
266
+ rescue Exception => e
267
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
268
+ # Raise exception to retry sendind messages
269
+ raise e
270
+ end
271
+
272
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
273
+ attempt = 0
274
+ loop do
275
+ begin
276
+ return producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
277
+ rescue Exception => e
278
+ if e.code == :queue_full
279
+ if attempt <= @max_enqueue_retries
280
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
281
+ sleep @enqueue_retry_backoff
282
+ attempt += 1
283
+ else
284
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
285
+ end
286
+ else
287
+ raise e
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end