roched-fluent-plugin-kafka 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,330 @@
1
+ require 'thread'
2
+ require 'fluent/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
6
+ Fluent::Plugin.register_output('kafka_buffered', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => <<-DESC
10
+ Set brokers directly:
11
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
12
+ Brokers: you can choose to use either brokers or zookeeper.
13
+ DESC
14
+ config_param :zookeeper, :string, :default => nil,
15
+ :desc => <<-DESC
16
+ Set brokers via Zookeeper:
17
+ <zookeeper_host>:<zookeeper_port>
18
+ DESC
19
+ config_param :zookeeper_path, :string, :default => '/brokers/ids',
20
+ :desc => "Path in path for Broker id. Default to /brokers/ids"
21
+ config_param :default_topic, :string, :default => nil,
22
+ :desc => "Output topic"
23
+ config_param :default_message_key, :string, :default => nil
24
+ config_param :default_partition_key, :string, :default => nil
25
+ config_param :default_partition, :integer, :default => nil
26
+ config_param :client_id, :string, :default => 'kafka'
27
+ config_param :output_data_type, :string, :default => 'json',
28
+ :desc => <<-DESC
29
+ Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
30
+ DESC
31
+ config_param :output_include_tag, :bool, :default => false
32
+ config_param :output_include_time, :bool, :default => false
33
+ config_param :exclude_partition_key, :bool, :default => false,
34
+ :desc => <<-DESC
35
+ Set true to remove partition key from data
36
+ DESC
37
+ config_param :exclude_partition, :bool, :default => false,
38
+ :desc => <<-DESC
39
+ Set true to remove partition from data
40
+ DESC
41
+ config_param :exclude_message_key, :bool, :default => false,
42
+ :desc => <<-DESC
43
+ Set true to remove partition key from data
44
+ DESC
45
+ config_param :exclude_topic_key, :bool, :default => false,
46
+ :desc => <<-DESC
47
+ Set true to remove topic name key from data
48
+ DESC
49
+
50
+ config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
51
+ config_param :kafka_agg_max_messages, :integer, :default => nil
52
+ config_param :get_kafka_client_log, :bool, :default => false
53
+
54
+ # ruby-kafka producer options
55
+ config_param :max_send_retries, :integer, :default => 2,
56
+ :desc => "Number of times to retry sending of messages to a leader."
57
+ config_param :required_acks, :integer, :default => -1,
58
+ :desc => "The number of acks required per request."
59
+ config_param :ack_timeout, :time, :default => nil,
60
+ :desc => "How long the producer waits for acks."
61
+ config_param :compression_codec, :string, :default => nil,
62
+ :desc => <<-DESC
63
+ The codec the producer uses to compress messages.
64
+ Supported codecs: (gzip|snappy)
65
+ DESC
66
+ config_param :max_send_limit_bytes, :size, :default => nil
67
+ config_param :discard_kafka_delivery_failed, :bool, :default => false
68
+
69
+ config_param :time_format, :string, :default => nil
70
+
71
+ config_param :active_support_notification_regex, :string, :default => nil,
72
+ :desc => <<-DESC
73
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
74
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
75
+ DESC
76
+
77
+ config_param :monitoring_list, :array, :default => [],
78
+ :desc => "library to be used to monitor. statsd and datadog are supported"
79
+
80
+ include Fluent::KafkaPluginUtil::SSLSettings
81
+ include Fluent::KafkaPluginUtil::SaslSettings
82
+
83
+ attr_accessor :output_data_type
84
+ attr_accessor :field_separator
85
+
86
+ unless method_defined?(:log)
87
+ define_method("log") { $log }
88
+ end
89
+
90
+ def initialize
91
+ super
92
+
93
+ require 'kafka'
94
+ require 'fluent/plugin/kafka_producer_ext'
95
+
96
+ @kafka = nil
97
+ @producers = {}
98
+ @producers_mutex = Mutex.new
99
+ end
100
+
101
+ def refresh_client(raise_error = true)
102
+ if @zookeeper
103
+ @seed_brokers = []
104
+ z = Zookeeper.new(@zookeeper)
105
+ z.get_children(:path => @zookeeper_path)[:children].each do |id|
106
+ broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
107
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
108
+ end
109
+ z.close
110
+ log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
111
+ end
112
+ begin
113
+ if @seed_brokers.length > 0
114
+ logger = @get_kafka_client_log ? log : nil
115
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
116
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
117
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
118
+ log.info "initialized kafka producer: #{@client_id}"
119
+ else
120
+ log.warn "No brokers found on Zookeeper"
121
+ end
122
+ rescue Exception => e
123
+ if raise_error # During startup, error should be reported to engine and stop its phase for safety.
124
+ raise e
125
+ else
126
+ log.error e
127
+ end
128
+ end
129
+ end
130
+
131
+ def configure(conf)
132
+ super
133
+
134
+ if @zookeeper
135
+ require 'zookeeper'
136
+ else
137
+ @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
138
+ log.info "brokers has been set directly: #{@seed_brokers}"
139
+ end
140
+
141
+ if conf['ack_timeout_ms']
142
+ log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
143
+ @ack_timeout = conf['ack_timeout_ms'].to_i / 1000
144
+ end
145
+
146
+ @f_separator = case @field_separator
147
+ when /SPACE/i then ' '
148
+ when /COMMA/i then ','
149
+ when /SOH/i then "\x01"
150
+ else "\t"
151
+ end
152
+
153
+ @formatter_proc = setup_formatter(conf)
154
+
155
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
156
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
157
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
158
+
159
+ if @discard_kafka_delivery_failed
160
+ log.warn "'discard_kafka_delivery_failed' option discards events which cause delivery failure, e.g. invalid topic or something."
161
+ log.warn "If this is unexpected, you need to check your configuration or data."
162
+ end
163
+
164
+ if @active_support_notification_regex
165
+ require 'active_support/notifications'
166
+ require 'active_support/core_ext/hash/keys'
167
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
168
+ event = ActiveSupport::Notifications::Event.new(*args)
169
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
170
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
171
+ end
172
+ end
173
+
174
+ @monitoring_list.each { |m|
175
+ require "kafka/#{m}"
176
+ log.info "#{m} monitoring started"
177
+ }
178
+ end
179
+
180
+ def start
181
+ super
182
+ refresh_client
183
+ end
184
+
185
+ def shutdown
186
+ super
187
+ shutdown_producers
188
+ @kafka = nil
189
+ end
190
+
191
+ def emit(tag, es, chain)
192
+ super(tag, es, chain, tag)
193
+ end
194
+
195
+ def format_stream(tag, es)
196
+ es.to_msgpack_stream
197
+ end
198
+
199
+ def shutdown_producers
200
+ @producers_mutex.synchronize {
201
+ @producers.each { |key, producer|
202
+ producer.shutdown
203
+ }
204
+ @producers = {}
205
+ }
206
+ end
207
+
208
+ def get_producer
209
+ @producers_mutex.synchronize {
210
+ producer = @producers[Thread.current.object_id]
211
+ unless producer
212
+ producer = @kafka.producer(@producer_opts)
213
+ @producers[Thread.current.object_id] = producer
214
+ end
215
+ producer
216
+ }
217
+ end
218
+
219
+ def setup_formatter(conf)
220
+ if @output_data_type == 'json'
221
+ require 'yajl'
222
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
223
+ elsif @output_data_type == 'ltsv'
224
+ require 'ltsv'
225
+ Proc.new { |tag, time, record| LTSV.dump(record) }
226
+ elsif @output_data_type == 'msgpack'
227
+ require 'msgpack'
228
+ Proc.new { |tag, time, record| record.to_msgpack }
229
+ elsif @output_data_type =~ /^attr:(.*)$/
230
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
231
+ @custom_attributes.unshift('time') if @output_include_time
232
+ @custom_attributes.unshift('tag') if @output_include_tag
233
+ Proc.new { |tag, time, record|
234
+ @custom_attributes.map { |attr|
235
+ record[attr].nil? ? '' : record[attr].to_s
236
+ }.join(@f_separator)
237
+ }
238
+ else
239
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
240
+ @formatter.configure(conf)
241
+ @formatter.method(:format)
242
+ end
243
+ end
244
+
245
+ def deliver_messages(producer, tag)
246
+ if @discard_kafka_delivery_failed
247
+ begin
248
+ producer.deliver_messages
249
+ rescue Kafka::DeliveryFailed => e
250
+ log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
251
+ producer.clear_buffer
252
+ end
253
+ else
254
+ producer.deliver_messages
255
+ end
256
+ end
257
+
258
+ def write(chunk)
259
+ tag = chunk.key
260
+ def_topic = @default_topic || tag
261
+ producer = get_producer
262
+
263
+ records_by_topic = {}
264
+ bytes_by_topic = {}
265
+ messages = 0
266
+ messages_bytes = 0
267
+ record_buf = nil
268
+ record_buf_bytes = nil
269
+
270
+ begin
271
+ chunk.msgpack_each { |time, record|
272
+ begin
273
+ if @output_include_time
274
+ if @time_format
275
+ record['time'.freeze] = Time.at(time).strftime(@time_format)
276
+ else
277
+ record['time'.freeze] = time
278
+ end
279
+ end
280
+
281
+ record['tag'] = tag if @output_include_tag
282
+ topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
283
+ partition_key = (@exclude_partition_key ? record.delete('partition_key'.freeze) : record['partition_key'.freeze]) || @default_partition_key
284
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
285
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
286
+
287
+ records_by_topic[topic] ||= 0
288
+ bytes_by_topic[topic] ||= 0
289
+
290
+ record_buf = @formatter_proc.call(tag, time, record)
291
+ record_buf_bytes = record_buf.bytesize
292
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
293
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
294
+ next
295
+ end
296
+ rescue StandardError => e
297
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
298
+ next
299
+ end
300
+
301
+ if (messages > 0) and (messages_bytes + record_buf_bytes > @kafka_agg_max_bytes) or (@kafka_agg_max_messages && messages >= @kafka_agg_max_messages)
302
+ log.debug { "#{messages} messages send because reaches the limit of batch transmission." }
303
+ deliver_messages(producer, tag)
304
+ messages = 0
305
+ messages_bytes = 0
306
+ end
307
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
308
+ messages += 1
309
+ producer.produce2(record_buf, topic: topic, key: message_key, partition_key: partition_key, partition: partition)
310
+ messages_bytes += record_buf_bytes
311
+
312
+ records_by_topic[topic] += 1
313
+ bytes_by_topic[topic] += record_buf_bytes
314
+ }
315
+ if messages > 0
316
+ log.debug { "#{messages} messages send." }
317
+ deliver_messages(producer, tag)
318
+ end
319
+ log.debug { "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})" }
320
+ end
321
+ rescue Exception => e
322
+ log.warn "Send exception occurred: #{e}"
323
+ log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
324
+ # For safety, refresh client and its producers
325
+ shutdown_producers
326
+ refresh_client(false)
327
+ # Raise exception to retry sendind messages
328
+ raise e
329
+ end
330
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ end
20
+ }
21
+ $log = nulllogger
22
+ end
23
+
24
+ require 'fluent/plugin/out_kafka'
25
+
26
+ class Test::Unit::TestCase
27
+ end
@@ -0,0 +1,52 @@
1
+ require 'helper'
2
+ require 'fluent/output'
3
+
4
+ class KafkaOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ BASE_CONFIG = %[
10
+ type kafka_buffered
11
+ ]
12
+
13
+ CONFIG = BASE_CONFIG + %[
14
+ default_topic kitagawakeiko
15
+ brokers localhost:9092
16
+ ]
17
+
18
+ def create_driver(conf = CONFIG, tag='test')
19
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::KafkaOutput, tag).configure(conf)
20
+ end
21
+
22
+ def test_configure
23
+ assert_nothing_raised(Fluent::ConfigError) {
24
+ create_driver(BASE_CONFIG)
25
+ }
26
+
27
+ assert_nothing_raised(Fluent::ConfigError) {
28
+ create_driver(CONFIG)
29
+ }
30
+
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(CONFIG + %[
33
+ buffer_type memory
34
+ ])
35
+ }
36
+
37
+ d = create_driver
38
+ assert_equal 'kitagawakeiko', d.instance.default_topic
39
+ assert_equal 'localhost:9092', d.instance.brokers
40
+ end
41
+
42
+ def test_format
43
+ d = create_driver
44
+ end
45
+
46
+ def test_write
47
+ d = create_driver
48
+ time = Time.parse("2011-01-02 13:14:15 UTC").to_i
49
+ d.emit({"a"=>1}, time)
50
+ d.emit({"a"=>2}, time)
51
+ end
52
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: roched-fluent-plugin-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.5
5
+ platform: ruby
6
+ authors:
7
+ - Hidemasa Togashi
8
+ - Masahiro Nakagawa
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2017-11-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fluentd
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - '>='
19
+ - !ruby/object:Gem::Version
20
+ version: 0.10.58
21
+ - - <
22
+ - !ruby/object:Gem::Version
23
+ version: '2'
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - '>='
29
+ - !ruby/object:Gem::Version
30
+ version: 0.10.58
31
+ - - <
32
+ - !ruby/object:Gem::Version
33
+ version: '2'
34
+ - !ruby/object:Gem::Dependency
35
+ name: ltsv
36
+ requirement: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ type: :runtime
42
+ prerelease: false
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ - !ruby/object:Gem::Dependency
49
+ name: ruby-kafka
50
+ requirement: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 0.4.1
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.4.1
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.9.2
69
+ type: :development
70
+ prerelease: false
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: 0.9.2
76
+ - !ruby/object:Gem::Dependency
77
+ name: test-unit
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 3.0.8
83
+ type: :development
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: 3.0.8
90
+ description: Fluentd plugin for Apache Kafka > 0.8
91
+ email:
92
+ - togachiro@gmail.com
93
+ - repeatedly@gmail.com
94
+ executables: []
95
+ extensions: []
96
+ extra_rdoc_files: []
97
+ files:
98
+ - .gitignore
99
+ - .travis.yml
100
+ - ChangeLog
101
+ - Gemfile
102
+ - LICENSE
103
+ - README.md
104
+ - Rakefile
105
+ - fluent-plugin-kafka.gemspec
106
+ - lib/fluent/plugin/in_kafka.rb
107
+ - lib/fluent/plugin/in_kafka_group.rb
108
+ - lib/fluent/plugin/kafka_plugin_util.rb
109
+ - lib/fluent/plugin/kafka_producer_ext.rb
110
+ - lib/fluent/plugin/out_kafka.rb
111
+ - lib/fluent/plugin/out_kafka2.rb
112
+ - lib/fluent/plugin/out_kafka_buffered.rb
113
+ - test/helper.rb
114
+ - test/plugin/test_out_kafka.rb
115
+ homepage: https://github.com/roche-d/fluent-plugin-kafka
116
+ licenses:
117
+ - Apache-2.0
118
+ metadata: {}
119
+ post_install_message:
120
+ rdoc_options: []
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - '>='
126
+ - !ruby/object:Gem::Version
127
+ version: 2.1.0
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 2.0.14.1
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: Fluentd plugin for Apache Kafka > 0.8
139
+ test_files:
140
+ - test/helper.rb
141
+ - test/plugin/test_out_kafka.rb