sk-fluent-plugin-kafka 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +25 -0
- data/ChangeLog +161 -0
- data/Gemfile +4 -0
- data/LICENSE +14 -0
- data/README.md +319 -0
- data/Rakefile +12 -0
- data/fluent-plugin-kafka.gemspec +24 -0
- data/lib/fluent/plugin/in_kafka.rb +341 -0
- data/lib/fluent/plugin/in_kafka_group.rb +281 -0
- data/lib/fluent/plugin/kafka_plugin_util.rb +52 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
- data/lib/fluent/plugin/out_kafka.rb +254 -0
- data/lib/fluent/plugin/out_kafka2.rb +243 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +361 -0
- data/lib/fluent/plugin/out_rdkafka.rb +301 -0
- data/test/helper.rb +27 -0
- data/test/plugin/test_out_kafka.rb +58 -0
- metadata +147 -0
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
Rake::TestTask.new(:test) do |test|
|
7
|
+
test.libs << 'lib' << 'test'
|
8
|
+
test.test_files = FileList['test/**/test_*.rb']
|
9
|
+
test.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => [:build]
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.authors = ["Hidemasa Togashi", "Masahiro Nakagawa"]
|
5
|
+
gem.email = ["sandeep.kotha@live.com"]
|
6
|
+
gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
|
7
|
+
gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
|
8
|
+
gem.homepage = "https://github.com/fluent/fluent-plugin-kafka"
|
9
|
+
gem.license = "Apache-2.0"
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "sk-fluent-plugin-kafka"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = '0.8.0'
|
17
|
+
gem.required_ruby_version = ">= 2.1.0"
|
18
|
+
|
19
|
+
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
20
|
+
gem.add_dependency 'ltsv'
|
21
|
+
gem.add_dependency 'ruby-kafka', '>= 0.7.1', '< 0.8.0'
|
22
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
23
|
+
gem.add_development_dependency "test-unit", ">= 3.0.8"
|
24
|
+
end
|
@@ -0,0 +1,341 @@
|
|
1
|
+
require 'fluent/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
class Fluent::KafkaInput < Fluent::Input
|
6
|
+
Fluent::Plugin.register_input('kafka', self)
|
7
|
+
|
8
|
+
config_param :format, :string, :default => 'json',
|
9
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
10
|
+
config_param :message_key, :string, :default => 'message',
|
11
|
+
:desc => "For 'text' format only."
|
12
|
+
config_param :host, :string, :default => nil,
|
13
|
+
:desc => "Broker host"
|
14
|
+
config_param :port, :integer, :default => nil,
|
15
|
+
:desc => "Broker port"
|
16
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
17
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
18
|
+
config_param :interval, :integer, :default => 1, # seconds
|
19
|
+
:desc => "Interval (Unit: seconds)"
|
20
|
+
config_param :topics, :string, :default => nil,
|
21
|
+
:desc => "Listening topics(separate with comma',')"
|
22
|
+
config_param :client_id, :string, :default => 'kafka'
|
23
|
+
config_param :partition, :integer, :default => 0,
|
24
|
+
:desc => "Listening partition"
|
25
|
+
config_param :offset, :integer, :default => -1,
|
26
|
+
:desc => "Listening start offset"
|
27
|
+
config_param :add_prefix, :string, :default => nil,
|
28
|
+
:desc => "Tag prefix"
|
29
|
+
config_param :add_suffix, :string, :default => nil,
|
30
|
+
:desc => "tag suffix"
|
31
|
+
config_param :add_offset_in_record, :bool, :default => false
|
32
|
+
|
33
|
+
config_param :offset_zookeeper, :string, :default => nil
|
34
|
+
config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
|
35
|
+
config_param :use_record_time, :bool, :default => false,
|
36
|
+
:desc => "Replace message timestamp with contents of 'time' field."
|
37
|
+
config_param :time_format, :string, :default => nil,
|
38
|
+
:desc => "Time format to be used to parse 'time' filed."
|
39
|
+
config_param :kafka_message_key, :string, :default => nil,
|
40
|
+
:desc => "Set kafka's message key to this field"
|
41
|
+
|
42
|
+
# Kafka#fetch_messages options
|
43
|
+
config_param :max_bytes, :integer, :default => nil,
|
44
|
+
:desc => "Maximum number of bytes to fetch."
|
45
|
+
config_param :max_wait_time, :integer, :default => nil,
|
46
|
+
:desc => "How long to block until the server sends us data."
|
47
|
+
config_param :min_bytes, :integer, :default => nil,
|
48
|
+
:desc => "Smallest amount of data the server should send us."
|
49
|
+
|
50
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
51
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
52
|
+
|
53
|
+
unless method_defined?(:router)
|
54
|
+
define_method("router") { Fluent::Engine }
|
55
|
+
end
|
56
|
+
|
57
|
+
def initialize
|
58
|
+
super
|
59
|
+
require 'kafka'
|
60
|
+
|
61
|
+
@time_parser = nil
|
62
|
+
end
|
63
|
+
|
64
|
+
def configure(conf)
|
65
|
+
super
|
66
|
+
|
67
|
+
@topic_list = []
|
68
|
+
if @topics
|
69
|
+
@topic_list = @topics.split(',').map { |topic|
|
70
|
+
TopicEntry.new(topic.strip, @partition, @offset)
|
71
|
+
}
|
72
|
+
else
|
73
|
+
conf.elements.select { |element| element.name == 'topic' }.each do |element|
|
74
|
+
unless element.has_key?('topic')
|
75
|
+
raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
|
76
|
+
end
|
77
|
+
partition = element.has_key?('partition') ? element['partition'].to_i : 0
|
78
|
+
offset = element.has_key?('offset') ? element['offset'].to_i : -1
|
79
|
+
@topic_list.push(TopicEntry.new(element['topic'], partition, offset))
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
if @topic_list.empty?
|
84
|
+
raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
|
85
|
+
end
|
86
|
+
|
87
|
+
# For backward compatibility
|
88
|
+
@brokers = case
|
89
|
+
when @host && @port
|
90
|
+
["#{@host}:#{@port}"]
|
91
|
+
when @host
|
92
|
+
["#{@host}:9092"]
|
93
|
+
when @port
|
94
|
+
["localhost:#{@port}"]
|
95
|
+
else
|
96
|
+
@brokers
|
97
|
+
end
|
98
|
+
|
99
|
+
if conf['max_wait_ms']
|
100
|
+
log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
|
101
|
+
@max_wait_time = conf['max_wait_ms'].to_i / 1000
|
102
|
+
end
|
103
|
+
|
104
|
+
@max_wait_time = @interval if @max_wait_time.nil?
|
105
|
+
|
106
|
+
require 'zookeeper' if @offset_zookeeper
|
107
|
+
|
108
|
+
@parser_proc = setup_parser
|
109
|
+
|
110
|
+
if @use_record_time and @time_format
|
111
|
+
if defined?(Fluent::TimeParser)
|
112
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
113
|
+
else
|
114
|
+
@time_parser = Fluent::TextParser::TimeParser.new(@time_format)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def setup_parser
|
120
|
+
case @format
|
121
|
+
when 'json'
|
122
|
+
begin
|
123
|
+
require 'oj'
|
124
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
125
|
+
Proc.new { |msg, te|
|
126
|
+
r = Oj.load(msg.value)
|
127
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
128
|
+
r
|
129
|
+
}
|
130
|
+
rescue LoadError
|
131
|
+
require 'yajl'
|
132
|
+
Proc.new { |msg, te|
|
133
|
+
r = Yajl::Parser.parse(msg.value)
|
134
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
135
|
+
r
|
136
|
+
}
|
137
|
+
end
|
138
|
+
when 'ltsv'
|
139
|
+
require 'ltsv'
|
140
|
+
Proc.new { |msg, te|
|
141
|
+
r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
|
142
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
143
|
+
r
|
144
|
+
}
|
145
|
+
when 'msgpack'
|
146
|
+
require 'msgpack'
|
147
|
+
Proc.new { |msg, te|
|
148
|
+
r = MessagePack.unpack(msg.value)
|
149
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
150
|
+
r
|
151
|
+
}
|
152
|
+
when 'text'
|
153
|
+
Proc.new { |msg, te|
|
154
|
+
r = {@message_key => msg.value}
|
155
|
+
add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
|
156
|
+
r
|
157
|
+
}
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def add_offset_in_hash(hash, te, offset)
|
162
|
+
hash['kafka_topic'.freeze] = te.topic
|
163
|
+
hash['kafka_partition'.freeze] = te.partition
|
164
|
+
hash['kafka_offset'.freeze] = offset
|
165
|
+
end
|
166
|
+
|
167
|
+
def start
|
168
|
+
super
|
169
|
+
|
170
|
+
@loop = Coolio::Loop.new
|
171
|
+
opt = {}
|
172
|
+
opt[:max_bytes] = @max_bytes if @max_bytes
|
173
|
+
opt[:max_wait_time] = @max_wait_time if @max_wait_time
|
174
|
+
opt[:min_bytes] = @min_bytes if @min_bytes
|
175
|
+
|
176
|
+
if @scram_mechanism != nil && @username != nil && @password != nil
|
177
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
178
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
179
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
|
180
|
+
sasl_scram_mechanism: @scram_mechanism)
|
181
|
+
elsif @username != nil && @password != nil
|
182
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
183
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
184
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password)
|
185
|
+
else
|
186
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
187
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
188
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
|
189
|
+
end
|
190
|
+
|
191
|
+
@zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
|
192
|
+
|
193
|
+
@topic_watchers = @topic_list.map {|topic_entry|
|
194
|
+
offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
|
195
|
+
TopicWatcher.new(
|
196
|
+
topic_entry,
|
197
|
+
@kafka,
|
198
|
+
interval,
|
199
|
+
@parser_proc,
|
200
|
+
@add_prefix,
|
201
|
+
@add_suffix,
|
202
|
+
offset_manager,
|
203
|
+
router,
|
204
|
+
@kafka_message_key,
|
205
|
+
opt)
|
206
|
+
}
|
207
|
+
@topic_watchers.each {|tw|
|
208
|
+
tw.attach(@loop)
|
209
|
+
}
|
210
|
+
@thread = Thread.new(&method(:run))
|
211
|
+
end
|
212
|
+
|
213
|
+
def shutdown
|
214
|
+
@loop.stop
|
215
|
+
@zookeeper.close! if @zookeeper
|
216
|
+
@thread.join
|
217
|
+
@kafka.close
|
218
|
+
super
|
219
|
+
end
|
220
|
+
|
221
|
+
def run
|
222
|
+
@loop.run
|
223
|
+
rescue => e
|
224
|
+
$log.error "unexpected error", :error => e.to_s
|
225
|
+
$log.error_backtrace
|
226
|
+
end
|
227
|
+
|
228
|
+
class TopicWatcher < Coolio::TimerWatcher
|
229
|
+
def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, options={})
|
230
|
+
@topic_entry = topic_entry
|
231
|
+
@kafka = kafka
|
232
|
+
@callback = method(:consume)
|
233
|
+
@parser = parser
|
234
|
+
@add_prefix = add_prefix
|
235
|
+
@add_suffix = add_suffix
|
236
|
+
@options = options
|
237
|
+
@offset_manager = offset_manager
|
238
|
+
@router = router
|
239
|
+
@kafka_message_key = kafka_message_key
|
240
|
+
|
241
|
+
@next_offset = @topic_entry.offset
|
242
|
+
if @topic_entry.offset == -1 && offset_manager
|
243
|
+
@next_offset = offset_manager.next_offset
|
244
|
+
end
|
245
|
+
@fetch_args = {
|
246
|
+
topic: @topic_entry.topic,
|
247
|
+
partition: @topic_entry.partition,
|
248
|
+
}.merge(@options)
|
249
|
+
|
250
|
+
super(interval, true)
|
251
|
+
end
|
252
|
+
|
253
|
+
def on_timer
|
254
|
+
@callback.call
|
255
|
+
rescue => e
|
256
|
+
# TODO log?
|
257
|
+
$log.error e.to_s
|
258
|
+
$log.error_backtrace
|
259
|
+
end
|
260
|
+
|
261
|
+
def consume
|
262
|
+
offset = @next_offset
|
263
|
+
@fetch_args[:offset] = offset
|
264
|
+
messages = @kafka.fetch_messages(@fetch_args)
|
265
|
+
|
266
|
+
return if messages.size.zero?
|
267
|
+
|
268
|
+
es = Fluent::MultiEventStream.new
|
269
|
+
tag = @topic_entry.topic
|
270
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
271
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
272
|
+
|
273
|
+
messages.each { |msg|
|
274
|
+
begin
|
275
|
+
record = @parser.call(msg, @topic_entry)
|
276
|
+
if @use_record_time
|
277
|
+
if @time_format
|
278
|
+
record_time = @time_parser.parse(record['time'])
|
279
|
+
else
|
280
|
+
record_time = record['time']
|
281
|
+
end
|
282
|
+
else
|
283
|
+
record_time = Fluent::Engine.now
|
284
|
+
end
|
285
|
+
if @kafka_message_key
|
286
|
+
record[@kafka_message_key] = msg.key
|
287
|
+
end
|
288
|
+
es.add(record_time, record)
|
289
|
+
rescue => e
|
290
|
+
$log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
291
|
+
$log.debug_backtrace
|
292
|
+
end
|
293
|
+
}
|
294
|
+
offset = messages.last.offset + 1
|
295
|
+
|
296
|
+
unless es.empty?
|
297
|
+
@router.emit_stream(tag, es)
|
298
|
+
|
299
|
+
if @offset_manager
|
300
|
+
@offset_manager.save_offset(offset)
|
301
|
+
end
|
302
|
+
@next_offset = offset
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
class TopicEntry
|
308
|
+
def initialize(topic, partition, offset)
|
309
|
+
@topic = topic
|
310
|
+
@partition = partition
|
311
|
+
@offset = offset
|
312
|
+
end
|
313
|
+
attr_reader :topic, :partition, :offset
|
314
|
+
end
|
315
|
+
|
316
|
+
class OffsetManager
|
317
|
+
def initialize(topic_entry, zookeeper, zk_root_node)
|
318
|
+
@zookeeper = zookeeper
|
319
|
+
@zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
|
320
|
+
create_node(@zk_path, topic_entry.topic, topic_entry.partition)
|
321
|
+
end
|
322
|
+
|
323
|
+
def create_node(zk_path, topic, partition)
|
324
|
+
path = ""
|
325
|
+
zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
|
326
|
+
path = path + dir
|
327
|
+
@zookeeper.create(:path => "#{path}")
|
328
|
+
}
|
329
|
+
$log.trace "use zk offset node : #{path}"
|
330
|
+
end
|
331
|
+
|
332
|
+
def next_offset
|
333
|
+
@zookeeper.get(:path => @zk_path)[:data].to_i
|
334
|
+
end
|
335
|
+
|
336
|
+
def save_offset(offset)
|
337
|
+
@zookeeper.set(:path => @zk_path, :data => offset.to_s)
|
338
|
+
$log.trace "update zk offset node : #{offset.to_s}"
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
@@ -0,0 +1,281 @@
|
|
1
|
+
require 'fluent/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
class Fluent::KafkaGroupInput < Fluent::Input
|
6
|
+
Fluent::Plugin.register_input('kafka_group', self)
|
7
|
+
|
8
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
9
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
10
|
+
config_param :consumer_group, :string,
|
11
|
+
:desc => "Consumer group name, must set."
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
config_param :client_id, :string, :default => 'kafka'
|
15
|
+
config_param :format, :string, :default => 'json',
|
16
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
17
|
+
config_param :message_key, :string, :default => 'message',
|
18
|
+
:desc => "For 'text' format only."
|
19
|
+
config_param :add_prefix, :string, :default => nil,
|
20
|
+
:desc => "Tag prefix (Optional)"
|
21
|
+
config_param :add_suffix, :string, :default => nil,
|
22
|
+
:desc => "Tag suffix (Optional)"
|
23
|
+
config_param :retry_emit_limit, :integer, :default => nil,
|
24
|
+
:desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
|
25
|
+
config_param :use_record_time, :bool, :default => false,
|
26
|
+
:desc => "Replace message timestamp with contents of 'time' field."
|
27
|
+
config_param :time_format, :string, :default => nil,
|
28
|
+
:desc => "Time format to be used to parse 'time' filed."
|
29
|
+
config_param :kafka_message_key, :string, :default => nil,
|
30
|
+
:desc => "Set kafka's message key to this field"
|
31
|
+
|
32
|
+
config_param :retry_wait_seconds, :integer, :default => 30
|
33
|
+
config_param :disable_retry_limit, :bool, :default => false,
|
34
|
+
:desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
|
35
|
+
config_param :retry_limit, :integer, :default => 10,
|
36
|
+
:desc => "The maximum number of retries for connecting kafka (default: 10)"
|
37
|
+
# Kafka consumer options
|
38
|
+
config_param :max_bytes, :integer, :default => 1048576,
|
39
|
+
:desc => "Maximum number of bytes to fetch."
|
40
|
+
config_param :max_wait_time, :integer, :default => nil,
|
41
|
+
:desc => "How long to block until the server sends us data."
|
42
|
+
config_param :min_bytes, :integer, :default => nil,
|
43
|
+
:desc => "Smallest amount of data the server should send us."
|
44
|
+
config_param :session_timeout, :integer, :default => nil,
|
45
|
+
:desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
|
46
|
+
config_param :offset_commit_interval, :integer, :default => nil,
|
47
|
+
:desc => "The interval between offset commits, in seconds"
|
48
|
+
config_param :offset_commit_threshold, :integer, :default => nil,
|
49
|
+
:desc => "The number of messages that can be processed before their offsets are committed"
|
50
|
+
config_param :fetcher_max_queue_size, :integer, :default => nil,
|
51
|
+
:desc => "The number of fetched messages per partition that are queued in fetcher queue"
|
52
|
+
config_param :start_from_beginning, :bool, :default => true,
|
53
|
+
:desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
|
54
|
+
|
55
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
56
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
57
|
+
|
58
|
+
class ForShutdown < StandardError
|
59
|
+
end
|
60
|
+
|
61
|
+
BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
|
62
|
+
Fluent::Plugin::Buffer::BufferOverflowError
|
63
|
+
else
|
64
|
+
Fluent::BufferQueueLimitError
|
65
|
+
end
|
66
|
+
|
67
|
+
unless method_defined?(:router)
|
68
|
+
define_method("router") { Fluent::Engine }
|
69
|
+
end
|
70
|
+
|
71
|
+
def initialize
|
72
|
+
super
|
73
|
+
require 'kafka'
|
74
|
+
|
75
|
+
@time_parser = nil
|
76
|
+
@retry_count = 1
|
77
|
+
end
|
78
|
+
|
79
|
+
def _config_to_array(config)
|
80
|
+
config_array = config.split(',').map {|k| k.strip }
|
81
|
+
if config_array.empty?
|
82
|
+
raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
|
83
|
+
end
|
84
|
+
config_array
|
85
|
+
end
|
86
|
+
|
87
|
+
def multi_workers_ready?
|
88
|
+
true
|
89
|
+
end
|
90
|
+
|
91
|
+
private :_config_to_array
|
92
|
+
|
93
|
+
def configure(conf)
|
94
|
+
super
|
95
|
+
|
96
|
+
$log.info "Will watch for topics #{@topics} at brokers " \
|
97
|
+
"#{@brokers} and '#{@consumer_group}' group"
|
98
|
+
|
99
|
+
@topics = _config_to_array(@topics)
|
100
|
+
|
101
|
+
if conf['max_wait_ms']
|
102
|
+
log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
|
103
|
+
@max_wait_time = conf['max_wait_ms'].to_i / 1000
|
104
|
+
end
|
105
|
+
|
106
|
+
@parser_proc = setup_parser
|
107
|
+
|
108
|
+
@consumer_opts = {:group_id => @consumer_group}
|
109
|
+
@consumer_opts[:session_timeout] = @session_timeout if @session_timeout
|
110
|
+
@consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
|
111
|
+
@consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
|
112
|
+
@consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
|
113
|
+
|
114
|
+
@fetch_opts = {}
|
115
|
+
@fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
|
116
|
+
@fetch_opts[:min_bytes] = @min_bytes if @min_bytes
|
117
|
+
|
118
|
+
if @use_record_time and @time_format
|
119
|
+
if defined?(Fluent::TimeParser)
|
120
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
121
|
+
else
|
122
|
+
@time_parser = Fluent::TextParser::TimeParser.new(@time_format)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def setup_parser
|
128
|
+
case @format
|
129
|
+
when 'json'
|
130
|
+
begin
|
131
|
+
require 'oj'
|
132
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
133
|
+
Proc.new { |msg| Oj.load(msg.value) }
|
134
|
+
rescue LoadError
|
135
|
+
require 'yajl'
|
136
|
+
Proc.new { |msg| Yajl::Parser.parse(msg.value) }
|
137
|
+
end
|
138
|
+
when 'ltsv'
|
139
|
+
require 'ltsv'
|
140
|
+
Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
|
141
|
+
when 'msgpack'
|
142
|
+
require 'msgpack'
|
143
|
+
Proc.new { |msg| MessagePack.unpack(msg.value) }
|
144
|
+
when 'text'
|
145
|
+
Proc.new { |msg| {@message_key => msg.value} }
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def start
|
150
|
+
super
|
151
|
+
|
152
|
+
if @scram_mechanism != nil && @username != nil && @password != nil
|
153
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
154
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
155
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
|
156
|
+
sasl_scram_mechanism: @scram_mechanism)
|
157
|
+
elsif @username != nil && @password != nil
|
158
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
159
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
160
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password)
|
161
|
+
else
|
162
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
163
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
164
|
+
ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
|
165
|
+
end
|
166
|
+
|
167
|
+
@consumer = setup_consumer
|
168
|
+
@thread = Thread.new(&method(:run))
|
169
|
+
end
|
170
|
+
|
171
|
+
def shutdown
|
172
|
+
# This nil assignment should be guarded by mutex in multithread programming manner.
|
173
|
+
# But the situation is very low contention, so we don't use mutex for now.
|
174
|
+
# If the problem happens, we will add a guard for consumer.
|
175
|
+
consumer = @consumer
|
176
|
+
@consumer = nil
|
177
|
+
consumer.stop
|
178
|
+
|
179
|
+
@thread.join
|
180
|
+
@kafka.close
|
181
|
+
super
|
182
|
+
end
|
183
|
+
|
184
|
+
def setup_consumer
|
185
|
+
consumer = @kafka.consumer(@consumer_opts)
|
186
|
+
@topics.each { |topic|
|
187
|
+
consumer.subscribe(topic, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
|
188
|
+
}
|
189
|
+
consumer
|
190
|
+
end
|
191
|
+
|
192
|
+
def reconnect_consumer
|
193
|
+
log.warn "Stopping Consumer"
|
194
|
+
consumer = @consumer
|
195
|
+
@consumer = nil
|
196
|
+
if consumer
|
197
|
+
consumer.stop
|
198
|
+
end
|
199
|
+
log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
|
200
|
+
@retry_count = @retry_count + 1
|
201
|
+
sleep @retry_wait_seconds
|
202
|
+
@consumer = setup_consumer
|
203
|
+
log.warn "Re-starting consumer #{Time.now.to_s}"
|
204
|
+
@retry_count = 0
|
205
|
+
rescue =>e
|
206
|
+
log.error "unexpected error during re-starting consumer object access", :error => e.to_s
|
207
|
+
log.error_backtrace
|
208
|
+
if @retry_count <= @retry_limit or disable_retry_limit
|
209
|
+
reconnect_consumer
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def run
|
214
|
+
while @consumer
|
215
|
+
begin
|
216
|
+
@consumer.each_batch(@fetch_opts) { |batch|
|
217
|
+
es = Fluent::MultiEventStream.new
|
218
|
+
tag = batch.topic
|
219
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
220
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
221
|
+
|
222
|
+
batch.messages.each { |msg|
|
223
|
+
begin
|
224
|
+
record = @parser_proc.call(msg)
|
225
|
+
if @use_record_time
|
226
|
+
if @time_format
|
227
|
+
record_time = @time_parser.parse(record['time'])
|
228
|
+
else
|
229
|
+
record_time = record['time']
|
230
|
+
end
|
231
|
+
else
|
232
|
+
record_time = Fluent::Engine.now
|
233
|
+
end
|
234
|
+
if @kafka_message_key
|
235
|
+
record[@kafka_message_key] = msg.key
|
236
|
+
end
|
237
|
+
es.add(record_time, record)
|
238
|
+
rescue => e
|
239
|
+
log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
240
|
+
log.debug_backtrace
|
241
|
+
end
|
242
|
+
}
|
243
|
+
|
244
|
+
unless es.empty?
|
245
|
+
emit_events(tag, es)
|
246
|
+
end
|
247
|
+
}
|
248
|
+
rescue ForShutdown
|
249
|
+
rescue => e
|
250
|
+
log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
|
251
|
+
log.error_backtrace
|
252
|
+
reconnect_consumer
|
253
|
+
end
|
254
|
+
end
|
255
|
+
rescue => e
|
256
|
+
log.error "unexpected error during consumer object access", :error => e.to_s
|
257
|
+
log.error_backtrace
|
258
|
+
end
|
259
|
+
|
260
|
+
def emit_events(tag, es)
|
261
|
+
retries = 0
|
262
|
+
begin
|
263
|
+
router.emit_stream(tag, es)
|
264
|
+
rescue BufferError
|
265
|
+
raise ForShutdown if @consumer.nil?
|
266
|
+
|
267
|
+
if @retry_emit_limit.nil?
|
268
|
+
sleep 1
|
269
|
+
retry
|
270
|
+
end
|
271
|
+
|
272
|
+
if retries < @retry_emit_limit
|
273
|
+
retries += 1
|
274
|
+
sleep 1
|
275
|
+
retry
|
276
|
+
else
|
277
|
+
raise RuntimeError, "Exceeds retry_emit_limit"
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|