sk-fluent-plugin-kafka 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/**/test_*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ task :default => [:build]
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Hidemasa Togashi", "Masahiro Nakagawa"]
5
+ gem.email = ["sandeep.kotha@live.com"]
6
+ gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
7
+ gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
8
+ gem.homepage = "https://github.com/fluent/fluent-plugin-kafka"
9
+ gem.license = "Apache-2.0"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "sk-fluent-plugin-kafka"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = '0.8.0'
17
+ gem.required_ruby_version = ">= 2.1.0"
18
+
19
+ gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
20
+ gem.add_dependency 'ltsv'
21
+ gem.add_dependency 'ruby-kafka', '>= 0.7.1', '< 0.8.0'
22
+ gem.add_development_dependency "rake", ">= 0.9.2"
23
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
24
+ end
@@ -0,0 +1,341 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :partition, :integer, :default => 0,
24
+ :desc => "Listening partition"
25
+ config_param :offset, :integer, :default => -1,
26
+ :desc => "Listening start offset"
27
+ config_param :add_prefix, :string, :default => nil,
28
+ :desc => "Tag prefix"
29
+ config_param :add_suffix, :string, :default => nil,
30
+ :desc => "tag suffix"
31
+ config_param :add_offset_in_record, :bool, :default => false
32
+
33
+ config_param :offset_zookeeper, :string, :default => nil
34
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
35
+ config_param :use_record_time, :bool, :default => false,
36
+ :desc => "Replace message timestamp with contents of 'time' field."
37
+ config_param :time_format, :string, :default => nil,
38
+ :desc => "Time format to be used to parse 'time' filed."
39
+ config_param :kafka_message_key, :string, :default => nil,
40
+ :desc => "Set kafka's message key to this field"
41
+
42
+ # Kafka#fetch_messages options
43
+ config_param :max_bytes, :integer, :default => nil,
44
+ :desc => "Maximum number of bytes to fetch."
45
+ config_param :max_wait_time, :integer, :default => nil,
46
+ :desc => "How long to block until the server sends us data."
47
+ config_param :min_bytes, :integer, :default => nil,
48
+ :desc => "Smallest amount of data the server should send us."
49
+
50
+ include Fluent::KafkaPluginUtil::SSLSettings
51
+ include Fluent::KafkaPluginUtil::SaslSettings
52
+
53
+ unless method_defined?(:router)
54
+ define_method("router") { Fluent::Engine }
55
+ end
56
+
57
+ def initialize
58
+ super
59
+ require 'kafka'
60
+
61
+ @time_parser = nil
62
+ end
63
+
64
+ def configure(conf)
65
+ super
66
+
67
+ @topic_list = []
68
+ if @topics
69
+ @topic_list = @topics.split(',').map { |topic|
70
+ TopicEntry.new(topic.strip, @partition, @offset)
71
+ }
72
+ else
73
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
74
+ unless element.has_key?('topic')
75
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
76
+ end
77
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
78
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
79
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
80
+ end
81
+ end
82
+
83
+ if @topic_list.empty?
84
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
85
+ end
86
+
87
+ # For backward compatibility
88
+ @brokers = case
89
+ when @host && @port
90
+ ["#{@host}:#{@port}"]
91
+ when @host
92
+ ["#{@host}:9092"]
93
+ when @port
94
+ ["localhost:#{@port}"]
95
+ else
96
+ @brokers
97
+ end
98
+
99
+ if conf['max_wait_ms']
100
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
101
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
102
+ end
103
+
104
+ @max_wait_time = @interval if @max_wait_time.nil?
105
+
106
+ require 'zookeeper' if @offset_zookeeper
107
+
108
+ @parser_proc = setup_parser
109
+
110
+ if @use_record_time and @time_format
111
+ if defined?(Fluent::TimeParser)
112
+ @time_parser = Fluent::TimeParser.new(@time_format)
113
+ else
114
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
115
+ end
116
+ end
117
+ end
118
+
119
+ def setup_parser
120
+ case @format
121
+ when 'json'
122
+ begin
123
+ require 'oj'
124
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
125
+ Proc.new { |msg, te|
126
+ r = Oj.load(msg.value)
127
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
128
+ r
129
+ }
130
+ rescue LoadError
131
+ require 'yajl'
132
+ Proc.new { |msg, te|
133
+ r = Yajl::Parser.parse(msg.value)
134
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
135
+ r
136
+ }
137
+ end
138
+ when 'ltsv'
139
+ require 'ltsv'
140
+ Proc.new { |msg, te|
141
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
142
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
143
+ r
144
+ }
145
+ when 'msgpack'
146
+ require 'msgpack'
147
+ Proc.new { |msg, te|
148
+ r = MessagePack.unpack(msg.value)
149
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
150
+ r
151
+ }
152
+ when 'text'
153
+ Proc.new { |msg, te|
154
+ r = {@message_key => msg.value}
155
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
156
+ r
157
+ }
158
+ end
159
+ end
160
+
161
+ def add_offset_in_hash(hash, te, offset)
162
+ hash['kafka_topic'.freeze] = te.topic
163
+ hash['kafka_partition'.freeze] = te.partition
164
+ hash['kafka_offset'.freeze] = offset
165
+ end
166
+
167
+ def start
168
+ super
169
+
170
+ @loop = Coolio::Loop.new
171
+ opt = {}
172
+ opt[:max_bytes] = @max_bytes if @max_bytes
173
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
174
+ opt[:min_bytes] = @min_bytes if @min_bytes
175
+
176
+ if @scram_mechanism != nil && @username != nil && @password != nil
177
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
178
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
179
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
180
+ sasl_scram_mechanism: @scram_mechanism)
181
+ elsif @username != nil && @password != nil
182
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
183
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
184
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password)
185
+ else
186
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
187
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
188
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
189
+ end
190
+
191
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
192
+
193
+ @topic_watchers = @topic_list.map {|topic_entry|
194
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
195
+ TopicWatcher.new(
196
+ topic_entry,
197
+ @kafka,
198
+ interval,
199
+ @parser_proc,
200
+ @add_prefix,
201
+ @add_suffix,
202
+ offset_manager,
203
+ router,
204
+ @kafka_message_key,
205
+ opt)
206
+ }
207
+ @topic_watchers.each {|tw|
208
+ tw.attach(@loop)
209
+ }
210
+ @thread = Thread.new(&method(:run))
211
+ end
212
+
213
+ def shutdown
214
+ @loop.stop
215
+ @zookeeper.close! if @zookeeper
216
+ @thread.join
217
+ @kafka.close
218
+ super
219
+ end
220
+
221
+ def run
222
+ @loop.run
223
+ rescue => e
224
+ $log.error "unexpected error", :error => e.to_s
225
+ $log.error_backtrace
226
+ end
227
+
228
+ class TopicWatcher < Coolio::TimerWatcher
229
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, options={})
230
+ @topic_entry = topic_entry
231
+ @kafka = kafka
232
+ @callback = method(:consume)
233
+ @parser = parser
234
+ @add_prefix = add_prefix
235
+ @add_suffix = add_suffix
236
+ @options = options
237
+ @offset_manager = offset_manager
238
+ @router = router
239
+ @kafka_message_key = kafka_message_key
240
+
241
+ @next_offset = @topic_entry.offset
242
+ if @topic_entry.offset == -1 && offset_manager
243
+ @next_offset = offset_manager.next_offset
244
+ end
245
+ @fetch_args = {
246
+ topic: @topic_entry.topic,
247
+ partition: @topic_entry.partition,
248
+ }.merge(@options)
249
+
250
+ super(interval, true)
251
+ end
252
+
253
+ def on_timer
254
+ @callback.call
255
+ rescue => e
256
+ # TODO log?
257
+ $log.error e.to_s
258
+ $log.error_backtrace
259
+ end
260
+
261
+ def consume
262
+ offset = @next_offset
263
+ @fetch_args[:offset] = offset
264
+ messages = @kafka.fetch_messages(@fetch_args)
265
+
266
+ return if messages.size.zero?
267
+
268
+ es = Fluent::MultiEventStream.new
269
+ tag = @topic_entry.topic
270
+ tag = @add_prefix + "." + tag if @add_prefix
271
+ tag = tag + "." + @add_suffix if @add_suffix
272
+
273
+ messages.each { |msg|
274
+ begin
275
+ record = @parser.call(msg, @topic_entry)
276
+ if @use_record_time
277
+ if @time_format
278
+ record_time = @time_parser.parse(record['time'])
279
+ else
280
+ record_time = record['time']
281
+ end
282
+ else
283
+ record_time = Fluent::Engine.now
284
+ end
285
+ if @kafka_message_key
286
+ record[@kafka_message_key] = msg.key
287
+ end
288
+ es.add(record_time, record)
289
+ rescue => e
290
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
291
+ $log.debug_backtrace
292
+ end
293
+ }
294
+ offset = messages.last.offset + 1
295
+
296
+ unless es.empty?
297
+ @router.emit_stream(tag, es)
298
+
299
+ if @offset_manager
300
+ @offset_manager.save_offset(offset)
301
+ end
302
+ @next_offset = offset
303
+ end
304
+ end
305
+ end
306
+
307
+ class TopicEntry
308
+ def initialize(topic, partition, offset)
309
+ @topic = topic
310
+ @partition = partition
311
+ @offset = offset
312
+ end
313
+ attr_reader :topic, :partition, :offset
314
+ end
315
+
316
+ class OffsetManager
317
+ def initialize(topic_entry, zookeeper, zk_root_node)
318
+ @zookeeper = zookeeper
319
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
320
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
321
+ end
322
+
323
+ def create_node(zk_path, topic, partition)
324
+ path = ""
325
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
326
+ path = path + dir
327
+ @zookeeper.create(:path => "#{path}")
328
+ }
329
+ $log.trace "use zk offset node : #{path}"
330
+ end
331
+
332
+ def next_offset
333
+ @zookeeper.get(:path => @zk_path)[:data].to_i
334
+ end
335
+
336
+ def save_offset(offset)
337
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
338
+ $log.trace "update zk offset node : #{offset.to_s}"
339
+ end
340
+ end
341
+ end
@@ -0,0 +1,281 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :client_id, :string, :default => 'kafka'
15
+ config_param :format, :string, :default => 'json',
16
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
17
+ config_param :message_key, :string, :default => 'message',
18
+ :desc => "For 'text' format only."
19
+ config_param :add_prefix, :string, :default => nil,
20
+ :desc => "Tag prefix (Optional)"
21
+ config_param :add_suffix, :string, :default => nil,
22
+ :desc => "Tag suffix (Optional)"
23
+ config_param :retry_emit_limit, :integer, :default => nil,
24
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
25
+ config_param :use_record_time, :bool, :default => false,
26
+ :desc => "Replace message timestamp with contents of 'time' field."
27
+ config_param :time_format, :string, :default => nil,
28
+ :desc => "Time format to be used to parse 'time' filed."
29
+ config_param :kafka_message_key, :string, :default => nil,
30
+ :desc => "Set kafka's message key to this field"
31
+
32
+ config_param :retry_wait_seconds, :integer, :default => 30
33
+ config_param :disable_retry_limit, :bool, :default => false,
34
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
35
+ config_param :retry_limit, :integer, :default => 10,
36
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
37
+ # Kafka consumer options
38
+ config_param :max_bytes, :integer, :default => 1048576,
39
+ :desc => "Maximum number of bytes to fetch."
40
+ config_param :max_wait_time, :integer, :default => nil,
41
+ :desc => "How long to block until the server sends us data."
42
+ config_param :min_bytes, :integer, :default => nil,
43
+ :desc => "Smallest amount of data the server should send us."
44
+ config_param :session_timeout, :integer, :default => nil,
45
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
46
+ config_param :offset_commit_interval, :integer, :default => nil,
47
+ :desc => "The interval between offset commits, in seconds"
48
+ config_param :offset_commit_threshold, :integer, :default => nil,
49
+ :desc => "The number of messages that can be processed before their offsets are committed"
50
+ config_param :fetcher_max_queue_size, :integer, :default => nil,
51
+ :desc => "The number of fetched messages per partition that are queued in fetcher queue"
52
+ config_param :start_from_beginning, :bool, :default => true,
53
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
54
+
55
+ include Fluent::KafkaPluginUtil::SSLSettings
56
+ include Fluent::KafkaPluginUtil::SaslSettings
57
+
58
+ class ForShutdown < StandardError
59
+ end
60
+
61
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
62
+ Fluent::Plugin::Buffer::BufferOverflowError
63
+ else
64
+ Fluent::BufferQueueLimitError
65
+ end
66
+
67
+ unless method_defined?(:router)
68
+ define_method("router") { Fluent::Engine }
69
+ end
70
+
71
+ def initialize
72
+ super
73
+ require 'kafka'
74
+
75
+ @time_parser = nil
76
+ @retry_count = 1
77
+ end
78
+
79
+ def _config_to_array(config)
80
+ config_array = config.split(',').map {|k| k.strip }
81
+ if config_array.empty?
82
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
83
+ end
84
+ config_array
85
+ end
86
+
87
+ def multi_workers_ready?
88
+ true
89
+ end
90
+
91
+ private :_config_to_array
92
+
93
+ def configure(conf)
94
+ super
95
+
96
+ $log.info "Will watch for topics #{@topics} at brokers " \
97
+ "#{@brokers} and '#{@consumer_group}' group"
98
+
99
+ @topics = _config_to_array(@topics)
100
+
101
+ if conf['max_wait_ms']
102
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
103
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
104
+ end
105
+
106
+ @parser_proc = setup_parser
107
+
108
+ @consumer_opts = {:group_id => @consumer_group}
109
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
110
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
111
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
112
+ @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
113
+
114
+ @fetch_opts = {}
115
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
116
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
117
+
118
+ if @use_record_time and @time_format
119
+ if defined?(Fluent::TimeParser)
120
+ @time_parser = Fluent::TimeParser.new(@time_format)
121
+ else
122
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
123
+ end
124
+ end
125
+ end
126
+
127
+ def setup_parser
128
+ case @format
129
+ when 'json'
130
+ begin
131
+ require 'oj'
132
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
133
+ Proc.new { |msg| Oj.load(msg.value) }
134
+ rescue LoadError
135
+ require 'yajl'
136
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
137
+ end
138
+ when 'ltsv'
139
+ require 'ltsv'
140
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
141
+ when 'msgpack'
142
+ require 'msgpack'
143
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
144
+ when 'text'
145
+ Proc.new { |msg| {@message_key => msg.value} }
146
+ end
147
+ end
148
+
149
+ def start
150
+ super
151
+
152
+ if @scram_mechanism != nil && @username != nil && @password != nil
153
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
154
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
155
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
156
+ sasl_scram_mechanism: @scram_mechanism)
157
+ elsif @username != nil && @password != nil
158
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
159
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
160
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password)
161
+ else
162
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
163
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
164
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
165
+ end
166
+
167
+ @consumer = setup_consumer
168
+ @thread = Thread.new(&method(:run))
169
+ end
170
+
171
+ def shutdown
172
+ # This nil assignment should be guarded by mutex in multithread programming manner.
173
+ # But the situation is very low contention, so we don't use mutex for now.
174
+ # If the problem happens, we will add a guard for consumer.
175
+ consumer = @consumer
176
+ @consumer = nil
177
+ consumer.stop
178
+
179
+ @thread.join
180
+ @kafka.close
181
+ super
182
+ end
183
+
184
+ def setup_consumer
185
+ consumer = @kafka.consumer(@consumer_opts)
186
+ @topics.each { |topic|
187
+ consumer.subscribe(topic, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
188
+ }
189
+ consumer
190
+ end
191
+
192
+ def reconnect_consumer
193
+ log.warn "Stopping Consumer"
194
+ consumer = @consumer
195
+ @consumer = nil
196
+ if consumer
197
+ consumer.stop
198
+ end
199
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
200
+ @retry_count = @retry_count + 1
201
+ sleep @retry_wait_seconds
202
+ @consumer = setup_consumer
203
+ log.warn "Re-starting consumer #{Time.now.to_s}"
204
+ @retry_count = 0
205
+ rescue =>e
206
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
207
+ log.error_backtrace
208
+ if @retry_count <= @retry_limit or disable_retry_limit
209
+ reconnect_consumer
210
+ end
211
+ end
212
+
213
+ def run
214
+ while @consumer
215
+ begin
216
+ @consumer.each_batch(@fetch_opts) { |batch|
217
+ es = Fluent::MultiEventStream.new
218
+ tag = batch.topic
219
+ tag = @add_prefix + "." + tag if @add_prefix
220
+ tag = tag + "." + @add_suffix if @add_suffix
221
+
222
+ batch.messages.each { |msg|
223
+ begin
224
+ record = @parser_proc.call(msg)
225
+ if @use_record_time
226
+ if @time_format
227
+ record_time = @time_parser.parse(record['time'])
228
+ else
229
+ record_time = record['time']
230
+ end
231
+ else
232
+ record_time = Fluent::Engine.now
233
+ end
234
+ if @kafka_message_key
235
+ record[@kafka_message_key] = msg.key
236
+ end
237
+ es.add(record_time, record)
238
+ rescue => e
239
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
240
+ log.debug_backtrace
241
+ end
242
+ }
243
+
244
+ unless es.empty?
245
+ emit_events(tag, es)
246
+ end
247
+ }
248
+ rescue ForShutdown
249
+ rescue => e
250
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
251
+ log.error_backtrace
252
+ reconnect_consumer
253
+ end
254
+ end
255
+ rescue => e
256
+ log.error "unexpected error during consumer object access", :error => e.to_s
257
+ log.error_backtrace
258
+ end
259
+
260
+ def emit_events(tag, es)
261
+ retries = 0
262
+ begin
263
+ router.emit_stream(tag, es)
264
+ rescue BufferError
265
+ raise ForShutdown if @consumer.nil?
266
+
267
+ if @retry_emit_limit.nil?
268
+ sleep 1
269
+ retry
270
+ end
271
+
272
+ if retries < @retry_emit_limit
273
+ retries += 1
274
+ sleep 1
275
+ retry
276
+ else
277
+ raise RuntimeError, "Exceeds retry_emit_limit"
278
+ end
279
+ end
280
+ end
281
+ end