roched-fluent-plugin-kafka 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,330 @@
1
+ require 'thread'
2
+ require 'fluent/output'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
6
+ Fluent::Plugin.register_output('kafka_buffered', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => <<-DESC
10
+ Set brokers directly:
11
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
12
+ Brokers: you can choose to use either brokers or zookeeper.
13
+ DESC
14
+ config_param :zookeeper, :string, :default => nil,
15
+ :desc => <<-DESC
16
+ Set brokers via Zookeeper:
17
+ <zookeeper_host>:<zookeeper_port>
18
+ DESC
19
+ config_param :zookeeper_path, :string, :default => '/brokers/ids',
20
+ :desc => "Path in path for Broker id. Default to /brokers/ids"
21
+ config_param :default_topic, :string, :default => nil,
22
+ :desc => "Output topic"
23
+ config_param :default_message_key, :string, :default => nil
24
+ config_param :default_partition_key, :string, :default => nil
25
+ config_param :default_partition, :integer, :default => nil
26
+ config_param :client_id, :string, :default => 'kafka'
27
+ config_param :output_data_type, :string, :default => 'json',
28
+ :desc => <<-DESC
29
+ Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
30
+ DESC
31
+ config_param :output_include_tag, :bool, :default => false
32
+ config_param :output_include_time, :bool, :default => false
33
+ config_param :exclude_partition_key, :bool, :default => false,
34
+ :desc => <<-DESC
35
+ Set true to remove partition key from data
36
+ DESC
37
+ config_param :exclude_partition, :bool, :default => false,
38
+ :desc => <<-DESC
39
+ Set true to remove partition from data
40
+ DESC
41
+ config_param :exclude_message_key, :bool, :default => false,
42
+ :desc => <<-DESC
43
+ Set true to remove partition key from data
44
+ DESC
45
+ config_param :exclude_topic_key, :bool, :default => false,
46
+ :desc => <<-DESC
47
+ Set true to remove topic name key from data
48
+ DESC
49
+
50
+ config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
51
+ config_param :kafka_agg_max_messages, :integer, :default => nil
52
+ config_param :get_kafka_client_log, :bool, :default => false
53
+
54
+ # ruby-kafka producer options
55
+ config_param :max_send_retries, :integer, :default => 2,
56
+ :desc => "Number of times to retry sending of messages to a leader."
57
+ config_param :required_acks, :integer, :default => -1,
58
+ :desc => "The number of acks required per request."
59
+ config_param :ack_timeout, :time, :default => nil,
60
+ :desc => "How long the producer waits for acks."
61
+ config_param :compression_codec, :string, :default => nil,
62
+ :desc => <<-DESC
63
+ The codec the producer uses to compress messages.
64
+ Supported codecs: (gzip|snappy)
65
+ DESC
66
+ config_param :max_send_limit_bytes, :size, :default => nil
67
+ config_param :discard_kafka_delivery_failed, :bool, :default => false
68
+
69
+ config_param :time_format, :string, :default => nil
70
+
71
+ config_param :active_support_notification_regex, :string, :default => nil,
72
+ :desc => <<-DESC
73
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
74
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
75
+ DESC
76
+
77
+ config_param :monitoring_list, :array, :default => [],
78
+ :desc => "library to be used to monitor. statsd and datadog are supported"
79
+
80
+ include Fluent::KafkaPluginUtil::SSLSettings
81
+ include Fluent::KafkaPluginUtil::SaslSettings
82
+
83
+ attr_accessor :output_data_type
84
+ attr_accessor :field_separator
85
+
86
+ unless method_defined?(:log)
87
+ define_method("log") { $log }
88
+ end
89
+
90
+ def initialize
91
+ super
92
+
93
+ require 'kafka'
94
+ require 'fluent/plugin/kafka_producer_ext'
95
+
96
+ @kafka = nil
97
+ @producers = {}
98
+ @producers_mutex = Mutex.new
99
+ end
100
+
101
+ def refresh_client(raise_error = true)
102
+ if @zookeeper
103
+ @seed_brokers = []
104
+ z = Zookeeper.new(@zookeeper)
105
+ z.get_children(:path => @zookeeper_path)[:children].each do |id|
106
+ broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
107
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
108
+ end
109
+ z.close
110
+ log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
111
+ end
112
+ begin
113
+ if @seed_brokers.length > 0
114
+ logger = @get_kafka_client_log ? log : nil
115
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
116
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
117
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
118
+ log.info "initialized kafka producer: #{@client_id}"
119
+ else
120
+ log.warn "No brokers found on Zookeeper"
121
+ end
122
+ rescue Exception => e
123
+ if raise_error # During startup, error should be reported to engine and stop its phase for safety.
124
+ raise e
125
+ else
126
+ log.error e
127
+ end
128
+ end
129
+ end
130
+
131
+ def configure(conf)
132
+ super
133
+
134
+ if @zookeeper
135
+ require 'zookeeper'
136
+ else
137
+ @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
138
+ log.info "brokers has been set directly: #{@seed_brokers}"
139
+ end
140
+
141
+ if conf['ack_timeout_ms']
142
+ log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
143
+ @ack_timeout = conf['ack_timeout_ms'].to_i / 1000
144
+ end
145
+
146
+ @f_separator = case @field_separator
147
+ when /SPACE/i then ' '
148
+ when /COMMA/i then ','
149
+ when /SOH/i then "\x01"
150
+ else "\t"
151
+ end
152
+
153
+ @formatter_proc = setup_formatter(conf)
154
+
155
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
156
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
157
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
158
+
159
+ if @discard_kafka_delivery_failed
160
+ log.warn "'discard_kafka_delivery_failed' option discards events which cause delivery failure, e.g. invalid topic or something."
161
+ log.warn "If this is unexpected, you need to check your configuration or data."
162
+ end
163
+
164
+ if @active_support_notification_regex
165
+ require 'active_support/notifications'
166
+ require 'active_support/core_ext/hash/keys'
167
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
168
+ event = ActiveSupport::Notifications::Event.new(*args)
169
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
170
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
171
+ end
172
+ end
173
+
174
+ @monitoring_list.each { |m|
175
+ require "kafka/#{m}"
176
+ log.info "#{m} monitoring started"
177
+ }
178
+ end
179
+
180
+ def start
181
+ super
182
+ refresh_client
183
+ end
184
+
185
+ def shutdown
186
+ super
187
+ shutdown_producers
188
+ @kafka = nil
189
+ end
190
+
191
+ def emit(tag, es, chain)
192
+ super(tag, es, chain, tag)
193
+ end
194
+
195
+ def format_stream(tag, es)
196
+ es.to_msgpack_stream
197
+ end
198
+
199
+ def shutdown_producers
200
+ @producers_mutex.synchronize {
201
+ @producers.each { |key, producer|
202
+ producer.shutdown
203
+ }
204
+ @producers = {}
205
+ }
206
+ end
207
+
208
+ def get_producer
209
+ @producers_mutex.synchronize {
210
+ producer = @producers[Thread.current.object_id]
211
+ unless producer
212
+ producer = @kafka.producer(@producer_opts)
213
+ @producers[Thread.current.object_id] = producer
214
+ end
215
+ producer
216
+ }
217
+ end
218
+
219
+ def setup_formatter(conf)
220
+ if @output_data_type == 'json'
221
+ require 'yajl'
222
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
223
+ elsif @output_data_type == 'ltsv'
224
+ require 'ltsv'
225
+ Proc.new { |tag, time, record| LTSV.dump(record) }
226
+ elsif @output_data_type == 'msgpack'
227
+ require 'msgpack'
228
+ Proc.new { |tag, time, record| record.to_msgpack }
229
+ elsif @output_data_type =~ /^attr:(.*)$/
230
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
231
+ @custom_attributes.unshift('time') if @output_include_time
232
+ @custom_attributes.unshift('tag') if @output_include_tag
233
+ Proc.new { |tag, time, record|
234
+ @custom_attributes.map { |attr|
235
+ record[attr].nil? ? '' : record[attr].to_s
236
+ }.join(@f_separator)
237
+ }
238
+ else
239
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
240
+ @formatter.configure(conf)
241
+ @formatter.method(:format)
242
+ end
243
+ end
244
+
245
+ def deliver_messages(producer, tag)
246
+ if @discard_kafka_delivery_failed
247
+ begin
248
+ producer.deliver_messages
249
+ rescue Kafka::DeliveryFailed => e
250
+ log.warn "DeliveryFailed occurred. Discard broken event:", :error => e.to_s, :error_class => e.class.to_s, :tag => tag
251
+ producer.clear_buffer
252
+ end
253
+ else
254
+ producer.deliver_messages
255
+ end
256
+ end
257
+
258
+ def write(chunk)
259
+ tag = chunk.key
260
+ def_topic = @default_topic || tag
261
+ producer = get_producer
262
+
263
+ records_by_topic = {}
264
+ bytes_by_topic = {}
265
+ messages = 0
266
+ messages_bytes = 0
267
+ record_buf = nil
268
+ record_buf_bytes = nil
269
+
270
+ begin
271
+ chunk.msgpack_each { |time, record|
272
+ begin
273
+ if @output_include_time
274
+ if @time_format
275
+ record['time'.freeze] = Time.at(time).strftime(@time_format)
276
+ else
277
+ record['time'.freeze] = time
278
+ end
279
+ end
280
+
281
+ record['tag'] = tag if @output_include_tag
282
+ topic = (@exclude_topic_key ? record.delete('topic'.freeze) : record['topic'.freeze]) || def_topic
283
+ partition_key = (@exclude_partition_key ? record.delete('partition_key'.freeze) : record['partition_key'.freeze]) || @default_partition_key
284
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
285
+ message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
286
+
287
+ records_by_topic[topic] ||= 0
288
+ bytes_by_topic[topic] ||= 0
289
+
290
+ record_buf = @formatter_proc.call(tag, time, record)
291
+ record_buf_bytes = record_buf.bytesize
292
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
293
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record => record
294
+ next
295
+ end
296
+ rescue StandardError => e
297
+ log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
298
+ next
299
+ end
300
+
301
+ if (messages > 0) and (messages_bytes + record_buf_bytes > @kafka_agg_max_bytes) or (@kafka_agg_max_messages && messages >= @kafka_agg_max_messages)
302
+ log.debug { "#{messages} messages send because reaches the limit of batch transmission." }
303
+ deliver_messages(producer, tag)
304
+ messages = 0
305
+ messages_bytes = 0
306
+ end
307
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
308
+ messages += 1
309
+ producer.produce2(record_buf, topic: topic, key: message_key, partition_key: partition_key, partition: partition)
310
+ messages_bytes += record_buf_bytes
311
+
312
+ records_by_topic[topic] += 1
313
+ bytes_by_topic[topic] += record_buf_bytes
314
+ }
315
+ if messages > 0
316
+ log.debug { "#{messages} messages send." }
317
+ deliver_messages(producer, tag)
318
+ end
319
+ log.debug { "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})" }
320
+ end
321
+ rescue Exception => e
322
+ log.warn "Send exception occurred: #{e}"
323
+ log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
324
+ # For safety, refresh client and its producers
325
+ shutdown_producers
326
+ refresh_client(false)
327
+ # Raise exception to retry sendind messages
328
+ raise e
329
+ end
330
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ end
20
+ }
21
+ $log = nulllogger
22
+ end
23
+
24
+ require 'fluent/plugin/out_kafka'
25
+
26
+ class Test::Unit::TestCase
27
+ end
@@ -0,0 +1,52 @@
1
+ require 'helper'
2
+ require 'fluent/output'
3
+
4
+ class KafkaOutputTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ BASE_CONFIG = %[
10
+ type kafka_buffered
11
+ ]
12
+
13
+ CONFIG = BASE_CONFIG + %[
14
+ default_topic kitagawakeiko
15
+ brokers localhost:9092
16
+ ]
17
+
18
+ def create_driver(conf = CONFIG, tag='test')
19
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::KafkaOutput, tag).configure(conf)
20
+ end
21
+
22
+ def test_configure
23
+ assert_nothing_raised(Fluent::ConfigError) {
24
+ create_driver(BASE_CONFIG)
25
+ }
26
+
27
+ assert_nothing_raised(Fluent::ConfigError) {
28
+ create_driver(CONFIG)
29
+ }
30
+
31
+ assert_nothing_raised(Fluent::ConfigError) {
32
+ create_driver(CONFIG + %[
33
+ buffer_type memory
34
+ ])
35
+ }
36
+
37
+ d = create_driver
38
+ assert_equal 'kitagawakeiko', d.instance.default_topic
39
+ assert_equal 'localhost:9092', d.instance.brokers
40
+ end
41
+
42
+ def test_format
43
+ d = create_driver
44
+ end
45
+
46
+ def test_write
47
+ d = create_driver
48
+ time = Time.parse("2011-01-02 13:14:15 UTC").to_i
49
+ d.emit({"a"=>1}, time)
50
+ d.emit({"a"=>2}, time)
51
+ end
52
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: roched-fluent-plugin-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.5
5
+ platform: ruby
6
+ authors:
7
+ - Hidemasa Togashi
8
+ - Masahiro Nakagawa
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2017-11-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fluentd
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - '>='
19
+ - !ruby/object:Gem::Version
20
+ version: 0.10.58
21
+ - - <
22
+ - !ruby/object:Gem::Version
23
+ version: '2'
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - '>='
29
+ - !ruby/object:Gem::Version
30
+ version: 0.10.58
31
+ - - <
32
+ - !ruby/object:Gem::Version
33
+ version: '2'
34
+ - !ruby/object:Gem::Dependency
35
+ name: ltsv
36
+ requirement: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ type: :runtime
42
+ prerelease: false
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ - !ruby/object:Gem::Dependency
49
+ name: ruby-kafka
50
+ requirement: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 0.4.1
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.4.1
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.9.2
69
+ type: :development
70
+ prerelease: false
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: 0.9.2
76
+ - !ruby/object:Gem::Dependency
77
+ name: test-unit
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 3.0.8
83
+ type: :development
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: 3.0.8
90
+ description: Fluentd plugin for Apache Kafka > 0.8
91
+ email:
92
+ - togachiro@gmail.com
93
+ - repeatedly@gmail.com
94
+ executables: []
95
+ extensions: []
96
+ extra_rdoc_files: []
97
+ files:
98
+ - .gitignore
99
+ - .travis.yml
100
+ - ChangeLog
101
+ - Gemfile
102
+ - LICENSE
103
+ - README.md
104
+ - Rakefile
105
+ - fluent-plugin-kafka.gemspec
106
+ - lib/fluent/plugin/in_kafka.rb
107
+ - lib/fluent/plugin/in_kafka_group.rb
108
+ - lib/fluent/plugin/kafka_plugin_util.rb
109
+ - lib/fluent/plugin/kafka_producer_ext.rb
110
+ - lib/fluent/plugin/out_kafka.rb
111
+ - lib/fluent/plugin/out_kafka2.rb
112
+ - lib/fluent/plugin/out_kafka_buffered.rb
113
+ - test/helper.rb
114
+ - test/plugin/test_out_kafka.rb
115
+ homepage: https://github.com/roche-d/fluent-plugin-kafka
116
+ licenses:
117
+ - Apache-2.0
118
+ metadata: {}
119
+ post_install_message:
120
+ rdoc_options: []
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - '>='
126
+ - !ruby/object:Gem::Version
127
+ version: 2.1.0
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 2.0.14.1
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: Fluentd plugin for Apache Kafka > 0.8
139
+ test_files:
140
+ - test/helper.rb
141
+ - test/plugin/test_out_kafka.rb