sk-fluent-plugin-kafka 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,301 @@
1
+ require 'thread'
2
+ require 'fluent/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+ require 'fluent/plugin/kafka_producer_ext'
7
+
8
+ class Rdkafka::Producer
9
+ # return false if producer is forcefully closed, otherwise return true
10
+ def close(timeout = nil)
11
+ @closing = true
12
+ # Wait for the polling thread to finish up
13
+ # If the broker isn't alive, the thread doesn't exit
14
+ if timeout
15
+ thr = @polling_thread.join(timeout)
16
+ return !!thr
17
+ else
18
+ @polling_thread.join
19
+ return true
20
+ end
21
+ end
22
+ end
23
+
24
+ class Fluent::KafkaOutputBuffered2 < Fluent::BufferedOutput
25
+ Fluent::Plugin.register_output('rdkafka', self)
26
+
27
+ config_param :brokers, :string, :default => 'localhost:9092',
28
+ :desc => <<-DESC
29
+ Set brokers directly:
30
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
31
+ Brokers: you can choose to use either brokers or zookeeper.
32
+ DESC
33
+ config_param :default_topic, :string, :default => nil,
34
+ :desc => "Output topic"
35
+ config_param :default_message_key, :string, :default => nil
36
+ config_param :default_partition, :integer, :default => nil
37
+ config_param :client_id, :string, :default => 'kafka'
38
+ config_param :output_data_type, :string, :default => 'json',
39
+ :desc => <<-DESC
40
+ Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
41
+ DESC
42
+ config_param :output_include_tag, :bool, :default => false
43
+ config_param :output_include_time, :bool, :default => false
44
+ config_param :exclude_partition, :bool, :default => false,
45
+ :desc => <<-DESC
46
+ Set true to remove partition from data
47
+ DESC
48
+ config_param :exclude_message_key, :bool, :default => false,
49
+ :desc => <<-DESC
50
+ Set true to remove partition key from data
51
+ DESC
52
+ config_param :exclude_topic_key, :bool, :default => false,
53
+ :desc => <<-DESC
54
+ Set true to remove topic name key from data
55
+ DESC
56
+ config_param :max_send_retries, :integer, :default => 2,
57
+ :desc => "Number of times to retry sending of messages to a leader."
58
+ config_param :required_acks, :integer, :default => -1,
59
+ :desc => "The number of acks required per request."
60
+ config_param :ack_timeout, :time, :default => nil,
61
+ :desc => "How long the producer waits for acks."
62
+ config_param :compression_codec, :string, :default => nil,
63
+ :desc => <<-DESC
64
+ The codec the producer uses to compress messages.
65
+ Supported codecs: (gzip|snappy)
66
+ DESC
67
+
68
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
69
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
70
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
71
+ config_param :rdkafka_message_max_num, :integer, :default => nil
72
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
73
+ config_param :rdkafka_options, :hash, :default => {}
74
+
75
+ config_param :max_enqueue_retries, :integer, :default => 3
76
+ config_param :enqueue_retry_backoff, :integer, :default => 3
77
+
78
+ config_param :service_name, :string, :default => nil
79
+ config_param :ssl_client_cert_key_password, :string, :default => nil
80
+
81
+ include Fluent::KafkaPluginUtil::SSLSettings
82
+ include Fluent::KafkaPluginUtil::SaslSettings
83
+
84
+ def initialize
85
+ super
86
+ @producers = {}
87
+ @producers_mutex = Mutex.new
88
+ end
89
+
90
+ def configure(conf)
91
+ super
92
+ log.instance_eval {
93
+ def add(level, &block)
94
+ if block
95
+ self.info(block.call)
96
+ end
97
+ end
98
+ }
99
+ Rdkafka::Config.logger = log
100
+ config = build_config
101
+ @rdkafka = Rdkafka::Config.new(config)
102
+ @formatter_proc = setup_formatter(conf)
103
+ end
104
+
105
+ def build_config
106
+ config = {
107
+ :"bootstrap.servers" => @brokers,
108
+ }
109
+
110
+ if @ssl_ca_cert && @ssl_ca_cert[0]
111
+ ssl = true
112
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
113
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
114
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
115
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
116
+ end
117
+
118
+ if @principal
119
+ sasl = true
120
+ config[:"sasl.mechanisms"] = "GSSAPI"
121
+ config[:"sasl.kerberos.principal"] = @principal
122
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
123
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
124
+ end
125
+
126
+ if ssl && sasl
127
+ security_protocol = "SASL_SSL"
128
+ elsif ssl && !sasl
129
+ security_protocol = "SSL"
130
+ elsif !ssl && sasl
131
+ security_protocol = "SASL_PLAINTEXT"
132
+ else
133
+ security_protocol = "PLAINTEXT"
134
+ end
135
+ config[:"security.protocol"] = security_protocol
136
+
137
+ config[:"compression.codec"] = @compression_codec if @compression_codec
138
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
139
+ config[:"request.required.acks"] = @required_acks if @required_acks
140
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
141
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
142
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
143
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
144
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
145
+
146
+ @rdkafka_options.each { |k, v|
147
+ config[k.to_sym] = v
148
+ }
149
+
150
+ config
151
+ end
152
+
153
+ def start
154
+ super
155
+ end
156
+
157
+ def multi_workers_ready?
158
+ true
159
+ end
160
+
161
+ def shutdown
162
+ super
163
+ shutdown_producers
164
+ end
165
+
166
+ def shutdown_producers
167
+ @producers_mutex.synchronize {
168
+ shutdown_threads = @producers.map { |key, producer|
169
+ th = Thread.new {
170
+ unless producer.close(10)
171
+ log.warn("Queue is forcefully closed after 10 seconds wait")
172
+ end
173
+ }
174
+ th.abort_on_exception = true
175
+ th
176
+ }
177
+ shutdown_threads.each { |th| th.join }
178
+ @producers = {}
179
+ }
180
+ end
181
+
182
+ def get_producer
183
+ @producers_mutex.synchronize {
184
+ producer = @producers[Thread.current.object_id]
185
+ unless producer
186
+ producer = @rdkafka.producer
187
+ @producers[Thread.current.object_id] = producer
188
+ end
189
+ producer
190
+ }
191
+ end
192
+
193
+ def emit(tag, es, chain)
194
+ super(tag, es, chain, tag)
195
+ end
196
+
197
+ def format_stream(tag, es)
198
+ es.to_msgpack_stream
199
+ end
200
+
201
+ def setup_formatter(conf)
202
+ if @output_data_type == 'json'
203
+ begin
204
+ require 'oj'
205
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
206
+ Proc.new { |tag, time, record| Oj.dump(record) }
207
+ rescue LoadError
208
+ require 'yajl'
209
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
210
+ end
211
+ elsif @output_data_type == 'ltsv'
212
+ require 'ltsv'
213
+ Proc.new { |tag, time, record| LTSV.dump(record) }
214
+ elsif @output_data_type == 'msgpack'
215
+ require 'msgpack'
216
+ Proc.new { |tag, time, record| record.to_msgpack }
217
+ elsif @output_data_type =~ /^attr:(.*)$/
218
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
219
+ @custom_attributes.unshift('time') if @output_include_time
220
+ @custom_attributes.unshift('tag') if @output_include_tag
221
+ Proc.new { |tag, time, record|
222
+ @custom_attributes.map { |attr|
223
+ record[attr].nil? ? '' : record[attr].to_s
224
+ }.join(@f_separator)
225
+ }
226
+ else
227
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
228
+ @formatter.configure(conf)
229
+ @formatter.method(:format)
230
+ end
231
+ end
232
+
233
+ def write(chunk)
234
+ tag = chunk.key
235
+ def_topic = @default_topic || tag
236
+
237
+ record_buf = nil
238
+ record_buf_bytes = nil
239
+
240
+ begin
241
+ chunk.msgpack_each.map { |time, record|
242
+ begin
243
+ if @output_include_time
244
+ if @time_format
245
+ record['time'.freeze] = Time.at(time).strftime(@time_format)
246
+ else
247
+ record['time'.freeze] = time
248
+ end
249
+ end
250
+
251
+ record['tag'] = tag if @output_include_tag
252
+ topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
253
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
254
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
255
+
256
+ record_buf = @formatter_proc.call(tag, time, record)
257
+ record_buf_bytes = record_buf.bytesize
258
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
259
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
260
+ next
261
+ end
262
+ rescue StandardError => e
263
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
264
+ next
265
+ end
266
+
267
+ producer = get_producer
268
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
269
+ handler
270
+ }.each { |handler|
271
+ handler.wait(@rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
272
+ }
273
+ end
274
+ rescue Exception => e
275
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
276
+ # Raise exception to retry sendind messages
277
+ raise e
278
+ end
279
+
280
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
281
+ attempt = 0
282
+ loop do
283
+ begin
284
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
285
+ return handler
286
+ rescue Exception => e
287
+ if e.code == :queue_full
288
+ if attempt <= @max_enqueue_retries
289
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
290
+ sleep @enqueue_retry_backoff
291
+ attempt += 1
292
+ else
293
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
294
+ end
295
+ else
296
+ raise e
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ end
20
+ }
21
+ $log = nulllogger
22
+ end
23
+
24
+ require 'fluent/plugin/out_kafka'
25
+
26
+ class Test::Unit::TestCase
27
+ end
@@ -0,0 +1,58 @@
1
+ require 'helper'
2
+ require 'fluent/output'
3
+
4
+ class KafkaOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ BASE_CONFIG = %[
10
+ type kafka_buffered
11
+ ]
12
+
13
+ CONFIG = BASE_CONFIG + %[
14
+ default_topic kitagawakeiko
15
+ brokers localhost:9092
16
+ ]
17
+
18
+ def create_driver(conf = CONFIG, tag='test')
19
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::KafkaOutput, tag).configure(conf)
20
+ end
21
+
22
+ def test_configure
23
+ assert_nothing_raised(Fluent::ConfigError) {
24
+ create_driver(BASE_CONFIG)
25
+ }
26
+
27
+ assert_nothing_raised(Fluent::ConfigError) {
28
+ create_driver(CONFIG)
29
+ }
30
+
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(CONFIG + %[
33
+ buffer_type memory
34
+ ])
35
+ }
36
+
37
+ d = create_driver
38
+ assert_equal 'kitagawakeiko', d.instance.default_topic
39
+ assert_equal 'localhost:9092', d.instance.brokers
40
+ end
41
+
42
+ def test_format
43
+ d = create_driver
44
+ end
45
+
46
+ def test_mutli_worker_support
47
+ d = create_driver
48
+ assert_equal true, d.instance.multi_workers_ready?
49
+
50
+ end
51
+
52
+ def test_write
53
+ d = create_driver
54
+ time = Time.parse("2011-01-02 13:14:15 UTC").to_i
55
+ d.emit({"a"=>1}, time)
56
+ d.emit({"a"=>2}, time)
57
+ end
58
+ end
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sk-fluent-plugin-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.0
5
+ platform: ruby
6
+ authors:
7
+ - Hidemasa Togashi
8
+ - Masahiro Nakagawa
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2018-10-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fluentd
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: 0.10.58
21
+ - - "<"
22
+ - !ruby/object:Gem::Version
23
+ version: '2'
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ version: 0.10.58
31
+ - - "<"
32
+ - !ruby/object:Gem::Version
33
+ version: '2'
34
+ - !ruby/object:Gem::Dependency
35
+ name: ltsv
36
+ requirement: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ type: :runtime
42
+ prerelease: false
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ - !ruby/object:Gem::Dependency
49
+ name: ruby-kafka
50
+ requirement: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 0.7.1
55
+ - - "<"
56
+ - !ruby/object:Gem::Version
57
+ version: 0.8.0
58
+ type: :runtime
59
+ prerelease: false
60
+ version_requirements: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 0.7.1
65
+ - - "<"
66
+ - !ruby/object:Gem::Version
67
+ version: 0.8.0
68
+ - !ruby/object:Gem::Dependency
69
+ name: rake
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 0.9.2
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: 0.9.2
82
+ - !ruby/object:Gem::Dependency
83
+ name: test-unit
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 3.0.8
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: 3.0.8
96
+ description: Fluentd plugin for Apache Kafka > 0.8
97
+ email:
98
+ - sandeep.kotha@live.com
99
+ executables: []
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - ".gitignore"
104
+ - ".travis.yml"
105
+ - ChangeLog
106
+ - Gemfile
107
+ - LICENSE
108
+ - README.md
109
+ - Rakefile
110
+ - fluent-plugin-kafka.gemspec
111
+ - lib/fluent/plugin/in_kafka.rb
112
+ - lib/fluent/plugin/in_kafka_group.rb
113
+ - lib/fluent/plugin/kafka_plugin_util.rb
114
+ - lib/fluent/plugin/kafka_producer_ext.rb
115
+ - lib/fluent/plugin/out_kafka.rb
116
+ - lib/fluent/plugin/out_kafka2.rb
117
+ - lib/fluent/plugin/out_kafka_buffered.rb
118
+ - lib/fluent/plugin/out_rdkafka.rb
119
+ - test/helper.rb
120
+ - test/plugin/test_out_kafka.rb
121
+ homepage: https://github.com/fluent/fluent-plugin-kafka
122
+ licenses:
123
+ - Apache-2.0
124
+ metadata: {}
125
+ post_install_message:
126
+ rdoc_options: []
127
+ require_paths:
128
+ - lib
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: 2.1.0
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project:
141
+ rubygems_version: 2.7.7
142
+ signing_key:
143
+ specification_version: 4
144
+ summary: Fluentd plugin for Apache Kafka > 0.8
145
+ test_files:
146
+ - test/helper.rb
147
+ - test/plugin/test_out_kafka.rb