sk-fluent-plugin-kafka 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,301 @@
1
+ require 'thread'
2
+ require 'fluent/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ require 'rdkafka'
6
+ require 'fluent/plugin/kafka_producer_ext'
7
+
8
+ class Rdkafka::Producer
9
+ # return false if producer is forcefully closed, otherwise return true
10
+ def close(timeout = nil)
11
+ @closing = true
12
+ # Wait for the polling thread to finish up
13
+ # If the broker isn't alive, the thread doesn't exit
14
+ if timeout
15
+ thr = @polling_thread.join(timeout)
16
+ return !!thr
17
+ else
18
+ @polling_thread.join
19
+ return true
20
+ end
21
+ end
22
+ end
23
+
24
+ class Fluent::KafkaOutputBuffered2 < Fluent::BufferedOutput
25
+ Fluent::Plugin.register_output('rdkafka', self)
26
+
27
+ config_param :brokers, :string, :default => 'localhost:9092',
28
+ :desc => <<-DESC
29
+ Set brokers directly:
30
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
31
+ Brokers: you can choose to use either brokers or zookeeper.
32
+ DESC
33
+ config_param :default_topic, :string, :default => nil,
34
+ :desc => "Output topic"
35
+ config_param :default_message_key, :string, :default => nil
36
+ config_param :default_partition, :integer, :default => nil
37
+ config_param :client_id, :string, :default => 'kafka'
38
+ config_param :output_data_type, :string, :default => 'json',
39
+ :desc => <<-DESC
40
+ Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
41
+ DESC
42
+ config_param :output_include_tag, :bool, :default => false
43
+ config_param :output_include_time, :bool, :default => false
44
+ config_param :exclude_partition, :bool, :default => false,
45
+ :desc => <<-DESC
46
+ Set true to remove partition from data
47
+ DESC
48
+ config_param :exclude_message_key, :bool, :default => false,
49
+ :desc => <<-DESC
50
+ Set true to remove partition key from data
51
+ DESC
52
+ config_param :exclude_topic_key, :bool, :default => false,
53
+ :desc => <<-DESC
54
+ Set true to remove topic name key from data
55
+ DESC
56
+ config_param :max_send_retries, :integer, :default => 2,
57
+ :desc => "Number of times to retry sending of messages to a leader."
58
+ config_param :required_acks, :integer, :default => -1,
59
+ :desc => "The number of acks required per request."
60
+ config_param :ack_timeout, :time, :default => nil,
61
+ :desc => "How long the producer waits for acks."
62
+ config_param :compression_codec, :string, :default => nil,
63
+ :desc => <<-DESC
64
+ The codec the producer uses to compress messages.
65
+ Supported codecs: (gzip|snappy)
66
+ DESC
67
+
68
+ config_param :rdkafka_buffering_max_ms, :integer, :default => nil
69
+ config_param :rdkafka_buffering_max_messages, :integer, :default => nil
70
+ config_param :rdkafka_message_max_bytes, :integer, :default => nil
71
+ config_param :rdkafka_message_max_num, :integer, :default => nil
72
+ config_param :rdkafka_delivery_handle_poll_timeout, :integer, :default => 30
73
+ config_param :rdkafka_options, :hash, :default => {}
74
+
75
+ config_param :max_enqueue_retries, :integer, :default => 3
76
+ config_param :enqueue_retry_backoff, :integer, :default => 3
77
+
78
+ config_param :service_name, :string, :default => nil
79
+ config_param :ssl_client_cert_key_password, :string, :default => nil
80
+
81
+ include Fluent::KafkaPluginUtil::SSLSettings
82
+ include Fluent::KafkaPluginUtil::SaslSettings
83
+
84
+ def initialize
85
+ super
86
+ @producers = {}
87
+ @producers_mutex = Mutex.new
88
+ end
89
+
90
+ def configure(conf)
91
+ super
92
+ log.instance_eval {
93
+ def add(level, &block)
94
+ if block
95
+ self.info(block.call)
96
+ end
97
+ end
98
+ }
99
+ Rdkafka::Config.logger = log
100
+ config = build_config
101
+ @rdkafka = Rdkafka::Config.new(config)
102
+ @formatter_proc = setup_formatter(conf)
103
+ end
104
+
105
+ def build_config
106
+ config = {
107
+ :"bootstrap.servers" => @brokers,
108
+ }
109
+
110
+ if @ssl_ca_cert && @ssl_ca_cert[0]
111
+ ssl = true
112
+ config[:"ssl.ca.location"] = @ssl_ca_cert[0]
113
+ config[:"ssl.certificate.location"] = @ssl_client_cert if @ssl_client_cert
114
+ config[:"ssl.key.location"] = @ssl_client_cert_key if @ssl_client_cert_key
115
+ config[:"ssl.key.password"] = @ssl_client_cert_key_password if @ssl_client_cert_key_password
116
+ end
117
+
118
+ if @principal
119
+ sasl = true
120
+ config[:"sasl.mechanisms"] = "GSSAPI"
121
+ config[:"sasl.kerberos.principal"] = @principal
122
+ config[:"sasl.kerberos.service.name"] = @service_name if @service_name
123
+ config[:"sasl.kerberos.keytab"] = @keytab if @keytab
124
+ end
125
+
126
+ if ssl && sasl
127
+ security_protocol = "SASL_SSL"
128
+ elsif ssl && !sasl
129
+ security_protocol = "SSL"
130
+ elsif !ssl && sasl
131
+ security_protocol = "SASL_PLAINTEXT"
132
+ else
133
+ security_protocol = "PLAINTEXT"
134
+ end
135
+ config[:"security.protocol"] = security_protocol
136
+
137
+ config[:"compression.codec"] = @compression_codec if @compression_codec
138
+ config[:"message.send.max.retries"] = @max_send_retries if @max_send_retries
139
+ config[:"request.required.acks"] = @required_acks if @required_acks
140
+ config[:"request.timeout.ms"] = @ack_timeout * 1000 if @ack_timeout
141
+ config[:"queue.buffering.max.ms"] = @rdkafka_buffering_max_ms if @rdkafka_buffering_max_ms
142
+ config[:"queue.buffering.max.messages"] = @rdkafka_buffering_max_messages if @rdkafka_buffering_max_messages
143
+ config[:"message.max.bytes"] = @rdkafka_message_max_bytes if @rdkafka_message_max_bytes
144
+ config[:"batch.num.messages"] = @rdkafka_message_max_num if @rdkafka_message_max_num
145
+
146
+ @rdkafka_options.each { |k, v|
147
+ config[k.to_sym] = v
148
+ }
149
+
150
+ config
151
+ end
152
+
153
+ def start
154
+ super
155
+ end
156
+
157
+ def multi_workers_ready?
158
+ true
159
+ end
160
+
161
+ def shutdown
162
+ super
163
+ shutdown_producers
164
+ end
165
+
166
+ def shutdown_producers
167
+ @producers_mutex.synchronize {
168
+ shutdown_threads = @producers.map { |key, producer|
169
+ th = Thread.new {
170
+ unless producer.close(10)
171
+ log.warn("Queue is forcefully closed after 10 seconds wait")
172
+ end
173
+ }
174
+ th.abort_on_exception = true
175
+ th
176
+ }
177
+ shutdown_threads.each { |th| th.join }
178
+ @producers = {}
179
+ }
180
+ end
181
+
182
+ def get_producer
183
+ @producers_mutex.synchronize {
184
+ producer = @producers[Thread.current.object_id]
185
+ unless producer
186
+ producer = @rdkafka.producer
187
+ @producers[Thread.current.object_id] = producer
188
+ end
189
+ producer
190
+ }
191
+ end
192
+
193
+ def emit(tag, es, chain)
194
+ super(tag, es, chain, tag)
195
+ end
196
+
197
+ def format_stream(tag, es)
198
+ es.to_msgpack_stream
199
+ end
200
+
201
+ def setup_formatter(conf)
202
+ if @output_data_type == 'json'
203
+ begin
204
+ require 'oj'
205
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
206
+ Proc.new { |tag, time, record| Oj.dump(record) }
207
+ rescue LoadError
208
+ require 'yajl'
209
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
210
+ end
211
+ elsif @output_data_type == 'ltsv'
212
+ require 'ltsv'
213
+ Proc.new { |tag, time, record| LTSV.dump(record) }
214
+ elsif @output_data_type == 'msgpack'
215
+ require 'msgpack'
216
+ Proc.new { |tag, time, record| record.to_msgpack }
217
+ elsif @output_data_type =~ /^attr:(.*)$/
218
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
219
+ @custom_attributes.unshift('time') if @output_include_time
220
+ @custom_attributes.unshift('tag') if @output_include_tag
221
+ Proc.new { |tag, time, record|
222
+ @custom_attributes.map { |attr|
223
+ record[attr].nil? ? '' : record[attr].to_s
224
+ }.join(@f_separator)
225
+ }
226
+ else
227
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
228
+ @formatter.configure(conf)
229
+ @formatter.method(:format)
230
+ end
231
+ end
232
+
233
+ def write(chunk)
234
+ tag = chunk.key
235
+ def_topic = @default_topic || tag
236
+
237
+ record_buf = nil
238
+ record_buf_bytes = nil
239
+
240
+ begin
241
+ chunk.msgpack_each.map { |time, record|
242
+ begin
243
+ if @output_include_time
244
+ if @time_format
245
+ record['time'.freeze] = Time.at(time).strftime(@time_format)
246
+ else
247
+ record['time'.freeze] = time
248
+ end
249
+ end
250
+
251
+ record['tag'] = tag if @output_include_tag
252
+ topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
253
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
254
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
255
+
256
+ record_buf = @formatter_proc.call(tag, time, record)
257
+ record_buf_bytes = record_buf.bytesize
258
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
259
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
260
+ next
261
+ end
262
+ rescue StandardError => e
263
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
264
+ next
265
+ end
266
+
267
+ producer = get_producer
268
+ handler = enqueue_with_retry(producer, topic, record_buf, message_key, partition)
269
+ handler
270
+ }.each { |handler|
271
+ handler.wait(@rdkafka_delivery_handle_poll_timeout) if @rdkafka_delivery_handle_poll_timeout != 0
272
+ }
273
+ end
274
+ rescue Exception => e
275
+ log.warn "Send exception occurred: #{e} at #{e.backtrace.first}"
276
+ # Raise exception to retry sendind messages
277
+ raise e
278
+ end
279
+
280
+ def enqueue_with_retry(producer, topic, record_buf, message_key, partition)
281
+ attempt = 0
282
+ loop do
283
+ begin
284
+ handler = producer.produce(topic: topic, payload: record_buf, key: message_key, partition: partition)
285
+ return handler
286
+ rescue Exception => e
287
+ if e.code == :queue_full
288
+ if attempt <= @max_enqueue_retries
289
+ log.warn "Failed to enqueue message; attempting retry #{attempt} of #{@max_enqueue_retries} after #{@enqueue_retry_backoff}s"
290
+ sleep @enqueue_retry_backoff
291
+ attempt += 1
292
+ else
293
+ raise "Failed to enqueue message although tried retry #{@max_enqueue_retries} times"
294
+ end
295
+ else
296
+ raise e
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ end
20
+ }
21
+ $log = nulllogger
22
+ end
23
+
24
+ require 'fluent/plugin/out_kafka'
25
+
26
+ class Test::Unit::TestCase
27
+ end
@@ -0,0 +1,58 @@
1
+ require 'helper'
2
+ require 'fluent/output'
3
+
4
+ class KafkaOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ BASE_CONFIG = %[
10
+ type kafka_buffered
11
+ ]
12
+
13
+ CONFIG = BASE_CONFIG + %[
14
+ default_topic kitagawakeiko
15
+ brokers localhost:9092
16
+ ]
17
+
18
+ def create_driver(conf = CONFIG, tag='test')
19
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::KafkaOutput, tag).configure(conf)
20
+ end
21
+
22
+ def test_configure
23
+ assert_nothing_raised(Fluent::ConfigError) {
24
+ create_driver(BASE_CONFIG)
25
+ }
26
+
27
+ assert_nothing_raised(Fluent::ConfigError) {
28
+ create_driver(CONFIG)
29
+ }
30
+
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(CONFIG + %[
33
+ buffer_type memory
34
+ ])
35
+ }
36
+
37
+ d = create_driver
38
+ assert_equal 'kitagawakeiko', d.instance.default_topic
39
+ assert_equal 'localhost:9092', d.instance.brokers
40
+ end
41
+
42
+ def test_format
43
+ d = create_driver
44
+ end
45
+
46
+ def test_mutli_worker_support
47
+ d = create_driver
48
+ assert_equal true, d.instance.multi_workers_ready?
49
+
50
+ end
51
+
52
+ def test_write
53
+ d = create_driver
54
+ time = Time.parse("2011-01-02 13:14:15 UTC").to_i
55
+ d.emit({"a"=>1}, time)
56
+ d.emit({"a"=>2}, time)
57
+ end
58
+ end
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sk-fluent-plugin-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.0
5
+ platform: ruby
6
+ authors:
7
+ - Hidemasa Togashi
8
+ - Masahiro Nakagawa
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2018-10-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fluentd
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: 0.10.58
21
+ - - "<"
22
+ - !ruby/object:Gem::Version
23
+ version: '2'
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ version: 0.10.58
31
+ - - "<"
32
+ - !ruby/object:Gem::Version
33
+ version: '2'
34
+ - !ruby/object:Gem::Dependency
35
+ name: ltsv
36
+ requirement: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ type: :runtime
42
+ prerelease: false
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ - !ruby/object:Gem::Dependency
49
+ name: ruby-kafka
50
+ requirement: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 0.7.1
55
+ - - "<"
56
+ - !ruby/object:Gem::Version
57
+ version: 0.8.0
58
+ type: :runtime
59
+ prerelease: false
60
+ version_requirements: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 0.7.1
65
+ - - "<"
66
+ - !ruby/object:Gem::Version
67
+ version: 0.8.0
68
+ - !ruby/object:Gem::Dependency
69
+ name: rake
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 0.9.2
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: 0.9.2
82
+ - !ruby/object:Gem::Dependency
83
+ name: test-unit
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 3.0.8
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: 3.0.8
96
+ description: Fluentd plugin for Apache Kafka > 0.8
97
+ email:
98
+ - sandeep.kotha@live.com
99
+ executables: []
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - ".gitignore"
104
+ - ".travis.yml"
105
+ - ChangeLog
106
+ - Gemfile
107
+ - LICENSE
108
+ - README.md
109
+ - Rakefile
110
+ - fluent-plugin-kafka.gemspec
111
+ - lib/fluent/plugin/in_kafka.rb
112
+ - lib/fluent/plugin/in_kafka_group.rb
113
+ - lib/fluent/plugin/kafka_plugin_util.rb
114
+ - lib/fluent/plugin/kafka_producer_ext.rb
115
+ - lib/fluent/plugin/out_kafka.rb
116
+ - lib/fluent/plugin/out_kafka2.rb
117
+ - lib/fluent/plugin/out_kafka_buffered.rb
118
+ - lib/fluent/plugin/out_rdkafka.rb
119
+ - test/helper.rb
120
+ - test/plugin/test_out_kafka.rb
121
+ homepage: https://github.com/fluent/fluent-plugin-kafka
122
+ licenses:
123
+ - Apache-2.0
124
+ metadata: {}
125
+ post_install_message:
126
+ rdoc_options: []
127
+ require_paths:
128
+ - lib
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: 2.1.0
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project:
141
+ rubygems_version: 2.7.7
142
+ signing_key:
143
+ specification_version: 4
144
+ summary: Fluentd plugin for Apache Kafka > 0.8
145
+ test_files:
146
+ - test/helper.rb
147
+ - test/plugin/test_out_kafka.rb