fluent-plugin-kafka-custom-ruby-version 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/**/test_*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ task :default => [:build]
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Hidemasa Togashi", "Masahiro Nakagawa"]
5
+ gem.email = ["togachiro@gmail.com", "repeatedly@gmail.com"]
6
+ gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
7
+ gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
8
+ gem.homepage = "https://github.com/gozzip2009/fluent-plugin-kafka-custom-ruby-version"
9
+ gem.license = "Apache-2.0"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "fluent-plugin-kafka-custom-ruby-version"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = '0.9.3'
17
+ gem.required_ruby_version = ">= 2.1.0"
18
+
19
+ gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
20
+ gem.add_dependency 'ltsv'
21
+ gem.add_dependency 'ruby-kafka', '0.6.7'
22
+ gem.add_development_dependency "rake", ">= 0.9.2"
23
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
24
+ end
@@ -0,0 +1,343 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :sasl_over_ssl, :bool, :default => true,
24
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
25
+ config_param :partition, :integer, :default => 0,
26
+ :desc => "Listening partition"
27
+ config_param :offset, :integer, :default => -1,
28
+ :desc => "Listening start offset"
29
+ config_param :add_prefix, :string, :default => nil,
30
+ :desc => "Tag prefix"
31
+ config_param :add_suffix, :string, :default => nil,
32
+ :desc => "tag suffix"
33
+ config_param :add_offset_in_record, :bool, :default => false
34
+
35
+ config_param :offset_zookeeper, :string, :default => nil
36
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka-custom-ruby-version'
37
+ config_param :use_record_time, :bool, :default => false,
38
+ :desc => "Replace message timestamp with contents of 'time' field."
39
+ config_param :time_format, :string, :default => nil,
40
+ :desc => "Time format to be used to parse 'time' filed."
41
+ config_param :kafka_message_key, :string, :default => nil,
42
+ :desc => "Set kafka's message key to this field"
43
+
44
+ # Kafka#fetch_messages options
45
+ config_param :max_bytes, :integer, :default => nil,
46
+ :desc => "Maximum number of bytes to fetch."
47
+ config_param :max_wait_time, :integer, :default => nil,
48
+ :desc => "How long to block until the server sends us data."
49
+ config_param :min_bytes, :integer, :default => nil,
50
+ :desc => "Smallest amount of data the server should send us."
51
+
52
+ include Fluent::KafkaPluginUtil::SSLSettings
53
+ include Fluent::KafkaPluginUtil::SaslSettings
54
+
55
+ unless method_defined?(:router)
56
+ define_method("router") { Fluent::Engine }
57
+ end
58
+
59
+ def initialize
60
+ super
61
+ require 'kafka'
62
+
63
+ @time_parser = nil
64
+ end
65
+
66
+ def configure(conf)
67
+ super
68
+
69
+ @topic_list = []
70
+ if @topics
71
+ @topic_list = @topics.split(',').map { |topic|
72
+ TopicEntry.new(topic.strip, @partition, @offset)
73
+ }
74
+ else
75
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
76
+ unless element.has_key?('topic')
77
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
78
+ end
79
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
80
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
81
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
82
+ end
83
+ end
84
+
85
+ if @topic_list.empty?
86
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
87
+ end
88
+
89
+ # For backward compatibility
90
+ @brokers = case
91
+ when @host && @port
92
+ ["#{@host}:#{@port}"]
93
+ when @host
94
+ ["#{@host}:9092"]
95
+ when @port
96
+ ["localhost:#{@port}"]
97
+ else
98
+ @brokers
99
+ end
100
+
101
+ if conf['max_wait_ms']
102
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
103
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
104
+ end
105
+
106
+ @max_wait_time = @interval if @max_wait_time.nil?
107
+
108
+ require 'zookeeper' if @offset_zookeeper
109
+
110
+ @parser_proc = setup_parser
111
+
112
+ if @use_record_time and @time_format
113
+ if defined?(Fluent::TimeParser)
114
+ @time_parser = Fluent::TimeParser.new(@time_format)
115
+ else
116
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
117
+ end
118
+ end
119
+ end
120
+
121
+ def setup_parser
122
+ case @format
123
+ when 'json'
124
+ begin
125
+ require 'oj'
126
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
127
+ Proc.new { |msg, te|
128
+ r = Oj.load(msg.value)
129
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
130
+ r
131
+ }
132
+ rescue LoadError
133
+ require 'yajl'
134
+ Proc.new { |msg, te|
135
+ r = Yajl::Parser.parse(msg.value)
136
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
137
+ r
138
+ }
139
+ end
140
+ when 'ltsv'
141
+ require 'ltsv'
142
+ Proc.new { |msg, te|
143
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
144
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
145
+ r
146
+ }
147
+ when 'msgpack'
148
+ require 'msgpack'
149
+ Proc.new { |msg, te|
150
+ r = MessagePack.unpack(msg.value)
151
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
152
+ r
153
+ }
154
+ when 'text'
155
+ Proc.new { |msg, te|
156
+ r = {@message_key => msg.value}
157
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
158
+ r
159
+ }
160
+ end
161
+ end
162
+
163
+ def add_offset_in_hash(hash, te, offset)
164
+ hash['kafka_topic'.freeze] = te.topic
165
+ hash['kafka_partition'.freeze] = te.partition
166
+ hash['kafka_offset'.freeze] = offset
167
+ end
168
+
169
+ def start
170
+ super
171
+
172
+ @loop = Coolio::Loop.new
173
+ opt = {}
174
+ opt[:max_bytes] = @max_bytes if @max_bytes
175
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
176
+ opt[:min_bytes] = @min_bytes if @min_bytes
177
+
178
+ if @scram_mechanism != nil && @username != nil && @password != nil
179
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
180
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
181
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
182
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
183
+ elsif @username != nil && @password != nil
184
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
185
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
186
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password)
187
+ else
188
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
189
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
190
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
191
+ end
192
+
193
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
194
+
195
+ @topic_watchers = @topic_list.map {|topic_entry|
196
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
197
+ TopicWatcher.new(
198
+ topic_entry,
199
+ @kafka,
200
+ interval,
201
+ @parser_proc,
202
+ @add_prefix,
203
+ @add_suffix,
204
+ offset_manager,
205
+ router,
206
+ @kafka_message_key,
207
+ opt)
208
+ }
209
+ @topic_watchers.each {|tw|
210
+ tw.attach(@loop)
211
+ }
212
+ @thread = Thread.new(&method(:run))
213
+ end
214
+
215
+ def shutdown
216
+ @loop.stop
217
+ @zookeeper.close! if @zookeeper
218
+ @thread.join
219
+ @kafka.close
220
+ super
221
+ end
222
+
223
+ def run
224
+ @loop.run
225
+ rescue => e
226
+ $log.error "unexpected error", :error => e.to_s
227
+ $log.error_backtrace
228
+ end
229
+
230
+ class TopicWatcher < Coolio::TimerWatcher
231
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, options={})
232
+ @topic_entry = topic_entry
233
+ @kafka = kafka
234
+ @callback = method(:consume)
235
+ @parser = parser
236
+ @add_prefix = add_prefix
237
+ @add_suffix = add_suffix
238
+ @options = options
239
+ @offset_manager = offset_manager
240
+ @router = router
241
+ @kafka_message_key = kafka_message_key
242
+
243
+ @next_offset = @topic_entry.offset
244
+ if @topic_entry.offset == -1 && offset_manager
245
+ @next_offset = offset_manager.next_offset
246
+ end
247
+ @fetch_args = {
248
+ topic: @topic_entry.topic,
249
+ partition: @topic_entry.partition,
250
+ }.merge(@options)
251
+
252
+ super(interval, true)
253
+ end
254
+
255
+ def on_timer
256
+ @callback.call
257
+ rescue => e
258
+ # TODO log?
259
+ $log.error e.to_s
260
+ $log.error_backtrace
261
+ end
262
+
263
+ def consume
264
+ offset = @next_offset
265
+ @fetch_args[:offset] = offset
266
+ messages = @kafka.fetch_messages(@fetch_args)
267
+
268
+ return if messages.size.zero?
269
+
270
+ es = Fluent::MultiEventStream.new
271
+ tag = @topic_entry.topic
272
+ tag = @add_prefix + "." + tag if @add_prefix
273
+ tag = tag + "." + @add_suffix if @add_suffix
274
+
275
+ messages.each { |msg|
276
+ begin
277
+ record = @parser.call(msg, @topic_entry)
278
+ if @use_record_time
279
+ if @time_format
280
+ record_time = @time_parser.parse(record['time'])
281
+ else
282
+ record_time = record['time']
283
+ end
284
+ else
285
+ record_time = Fluent::Engine.now
286
+ end
287
+ if @kafka_message_key
288
+ record[@kafka_message_key] = msg.key
289
+ end
290
+ es.add(record_time, record)
291
+ rescue => e
292
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
293
+ $log.debug_backtrace
294
+ end
295
+ }
296
+ offset = messages.last.offset + 1
297
+
298
+ unless es.empty?
299
+ @router.emit_stream(tag, es)
300
+
301
+ if @offset_manager
302
+ @offset_manager.save_offset(offset)
303
+ end
304
+ @next_offset = offset
305
+ end
306
+ end
307
+ end
308
+
309
+ class TopicEntry
310
+ def initialize(topic, partition, offset)
311
+ @topic = topic
312
+ @partition = partition
313
+ @offset = offset
314
+ end
315
+ attr_reader :topic, :partition, :offset
316
+ end
317
+
318
+ class OffsetManager
319
+ def initialize(topic_entry, zookeeper, zk_root_node)
320
+ @zookeeper = zookeeper
321
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
322
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
323
+ end
324
+
325
+ def create_node(zk_path, topic, partition)
326
+ path = ""
327
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
328
+ path = path + dir
329
+ @zookeeper.create(:path => "#{path}")
330
+ }
331
+ $log.trace "use zk offset node : #{path}"
332
+ end
333
+
334
+ def next_offset
335
+ @zookeeper.get(:path => @zk_path)[:data].to_i
336
+ end
337
+
338
+ def save_offset(offset)
339
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
340
+ $log.trace "update zk offset node : #{offset.to_s}"
341
+ end
342
+ end
343
+ end
@@ -0,0 +1,283 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :client_id, :string, :default => 'kafka'
15
+ config_param :sasl_over_ssl, :bool, :default => true,
16
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
17
+ config_param :format, :string, :default => 'json',
18
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
19
+ config_param :message_key, :string, :default => 'message',
20
+ :desc => "For 'text' format only."
21
+ config_param :add_prefix, :string, :default => nil,
22
+ :desc => "Tag prefix (Optional)"
23
+ config_param :add_suffix, :string, :default => nil,
24
+ :desc => "Tag suffix (Optional)"
25
+ config_param :retry_emit_limit, :integer, :default => nil,
26
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
27
+ config_param :use_record_time, :bool, :default => false,
28
+ :desc => "Replace message timestamp with contents of 'time' field."
29
+ config_param :time_format, :string, :default => nil,
30
+ :desc => "Time format to be used to parse 'time' filed."
31
+ config_param :kafka_message_key, :string, :default => nil,
32
+ :desc => "Set kafka's message key to this field"
33
+
34
+ config_param :retry_wait_seconds, :integer, :default => 30
35
+ config_param :disable_retry_limit, :bool, :default => false,
36
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
37
+ config_param :retry_limit, :integer, :default => 10,
38
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
39
+ # Kafka consumer options
40
+ config_param :max_bytes, :integer, :default => 1048576,
41
+ :desc => "Maximum number of bytes to fetch."
42
+ config_param :max_wait_time, :integer, :default => nil,
43
+ :desc => "How long to block until the server sends us data."
44
+ config_param :min_bytes, :integer, :default => nil,
45
+ :desc => "Smallest amount of data the server should send us."
46
+ config_param :session_timeout, :integer, :default => nil,
47
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
48
+ config_param :offset_commit_interval, :integer, :default => nil,
49
+ :desc => "The interval between offset commits, in seconds"
50
+ config_param :offset_commit_threshold, :integer, :default => nil,
51
+ :desc => "The number of messages that can be processed before their offsets are committed"
52
+ config_param :fetcher_max_queue_size, :integer, :default => nil,
53
+ :desc => "The number of fetched messages per partition that are queued in fetcher queue"
54
+ config_param :start_from_beginning, :bool, :default => true,
55
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
56
+
57
+ include Fluent::KafkaPluginUtil::SSLSettings
58
+ include Fluent::KafkaPluginUtil::SaslSettings
59
+
60
+ class ForShutdown < StandardError
61
+ end
62
+
63
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
64
+ Fluent::Plugin::Buffer::BufferOverflowError
65
+ else
66
+ Fluent::BufferQueueLimitError
67
+ end
68
+
69
+ unless method_defined?(:router)
70
+ define_method("router") { Fluent::Engine }
71
+ end
72
+
73
+ def initialize
74
+ super
75
+ require 'kafka'
76
+
77
+ @time_parser = nil
78
+ @retry_count = 1
79
+ end
80
+
81
+ def _config_to_array(config)
82
+ config_array = config.split(',').map {|k| k.strip }
83
+ if config_array.empty?
84
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
85
+ end
86
+ config_array
87
+ end
88
+
89
+ def multi_workers_ready?
90
+ true
91
+ end
92
+
93
+ private :_config_to_array
94
+
95
+ def configure(conf)
96
+ super
97
+
98
+ $log.info "Will watch for topics #{@topics} at brokers " \
99
+ "#{@brokers} and '#{@consumer_group}' group"
100
+
101
+ @topics = _config_to_array(@topics)
102
+
103
+ if conf['max_wait_ms']
104
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
105
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
106
+ end
107
+
108
+ @parser_proc = setup_parser
109
+
110
+ @consumer_opts = {:group_id => @consumer_group}
111
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
112
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
113
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
114
+ @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
115
+
116
+ @fetch_opts = {}
117
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
118
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
119
+
120
+ if @use_record_time and @time_format
121
+ if defined?(Fluent::TimeParser)
122
+ @time_parser = Fluent::TimeParser.new(@time_format)
123
+ else
124
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
125
+ end
126
+ end
127
+ end
128
+
129
+ def setup_parser
130
+ case @format
131
+ when 'json'
132
+ begin
133
+ require 'oj'
134
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
135
+ Proc.new { |msg| Oj.load(msg.value) }
136
+ rescue LoadError
137
+ require 'yajl'
138
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
139
+ end
140
+ when 'ltsv'
141
+ require 'ltsv'
142
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
143
+ when 'msgpack'
144
+ require 'msgpack'
145
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
146
+ when 'text'
147
+ Proc.new { |msg| {@message_key => msg.value} }
148
+ end
149
+ end
150
+
151
+ def start
152
+ super
153
+
154
+ if @scram_mechanism != nil && @username != nil && @password != nil
155
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
156
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
157
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
158
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
159
+ elsif @username != nil && @password != nil
160
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
161
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
162
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password)
163
+ else
164
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
165
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
166
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
167
+ end
168
+
169
+ @consumer = setup_consumer
170
+ @thread = Thread.new(&method(:run))
171
+ end
172
+
173
+ def shutdown
174
+ # This nil assignment should be guarded by mutex in multithread programming manner.
175
+ # But the situation is very low contention, so we don't use mutex for now.
176
+ # If the problem happens, we will add a guard for consumer.
177
+ consumer = @consumer
178
+ @consumer = nil
179
+ consumer.stop
180
+
181
+ @thread.join
182
+ @kafka.close
183
+ super
184
+ end
185
+
186
+ def setup_consumer
187
+ consumer = @kafka.consumer(@consumer_opts)
188
+ @topics.each { |topic|
189
+ consumer.subscribe(topic, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
190
+ }
191
+ consumer
192
+ end
193
+
194
+ def reconnect_consumer
195
+ log.warn "Stopping Consumer"
196
+ consumer = @consumer
197
+ @consumer = nil
198
+ if consumer
199
+ consumer.stop
200
+ end
201
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
202
+ @retry_count = @retry_count + 1
203
+ sleep @retry_wait_seconds
204
+ @consumer = setup_consumer
205
+ log.warn "Re-starting consumer #{Time.now.to_s}"
206
+ @retry_count = 0
207
+ rescue =>e
208
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
209
+ log.error_backtrace
210
+ if @retry_count <= @retry_limit or disable_retry_limit
211
+ reconnect_consumer
212
+ end
213
+ end
214
+
215
+ def run
216
+ while @consumer
217
+ begin
218
+ @consumer.each_batch(@fetch_opts) { |batch|
219
+ es = Fluent::MultiEventStream.new
220
+ tag = batch.topic
221
+ tag = @add_prefix + "." + tag if @add_prefix
222
+ tag = tag + "." + @add_suffix if @add_suffix
223
+
224
+ batch.messages.each { |msg|
225
+ begin
226
+ record = @parser_proc.call(msg)
227
+ if @use_record_time
228
+ if @time_format
229
+ record_time = @time_parser.parse(record['time'].to_s)
230
+ else
231
+ record_time = record['time']
232
+ end
233
+ else
234
+ record_time = Fluent::Engine.now
235
+ end
236
+ if @kafka_message_key
237
+ record[@kafka_message_key] = msg.key
238
+ end
239
+ es.add(record_time, record)
240
+ rescue => e
241
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
242
+ log.debug_backtrace
243
+ end
244
+ }
245
+
246
+ unless es.empty?
247
+ emit_events(tag, es)
248
+ end
249
+ }
250
+ rescue ForShutdown
251
+ rescue => e
252
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
253
+ log.error_backtrace
254
+ reconnect_consumer
255
+ end
256
+ end
257
+ rescue => e
258
+ log.error "unexpected error during consumer object access", :error => e.to_s
259
+ log.error_backtrace
260
+ end
261
+
262
+ def emit_events(tag, es)
263
+ retries = 0
264
+ begin
265
+ router.emit_stream(tag, es)
266
+ rescue BufferError
267
+ raise ForShutdown if @consumer.nil?
268
+
269
+ if @retry_emit_limit.nil?
270
+ sleep 1
271
+ retry
272
+ end
273
+
274
+ if retries < @retry_emit_limit
275
+ retries += 1
276
+ sleep 1
277
+ retry
278
+ else
279
+ raise RuntimeError, "Exceeds retry_emit_limit"
280
+ end
281
+ end
282
+ end
283
+ end