sk-fluent-plugin-kafka 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/**/test_*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ task :default => [:build]
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Hidemasa Togashi", "Masahiro Nakagawa"]
5
+ gem.email = ["sandeep.kotha@live.com"]
6
+ gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
7
+ gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
8
+ gem.homepage = "https://github.com/fluent/fluent-plugin-kafka"
9
+ gem.license = "Apache-2.0"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "sk-fluent-plugin-kafka"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = '0.8.0'
17
+ gem.required_ruby_version = ">= 2.1.0"
18
+
19
+ gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
20
+ gem.add_dependency 'ltsv'
21
+ gem.add_dependency 'ruby-kafka', '>= 0.7.1', '< 0.8.0'
22
+ gem.add_development_dependency "rake", ">= 0.9.2"
23
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
24
+ end
@@ -0,0 +1,341 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :partition, :integer, :default => 0,
24
+ :desc => "Listening partition"
25
+ config_param :offset, :integer, :default => -1,
26
+ :desc => "Listening start offset"
27
+ config_param :add_prefix, :string, :default => nil,
28
+ :desc => "Tag prefix"
29
+ config_param :add_suffix, :string, :default => nil,
30
+ :desc => "tag suffix"
31
+ config_param :add_offset_in_record, :bool, :default => false
32
+
33
+ config_param :offset_zookeeper, :string, :default => nil
34
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
35
+ config_param :use_record_time, :bool, :default => false,
36
+ :desc => "Replace message timestamp with contents of 'time' field."
37
+ config_param :time_format, :string, :default => nil,
38
+ :desc => "Time format to be used to parse 'time' filed."
39
+ config_param :kafka_message_key, :string, :default => nil,
40
+ :desc => "Set kafka's message key to this field"
41
+
42
+ # Kafka#fetch_messages options
43
+ config_param :max_bytes, :integer, :default => nil,
44
+ :desc => "Maximum number of bytes to fetch."
45
+ config_param :max_wait_time, :integer, :default => nil,
46
+ :desc => "How long to block until the server sends us data."
47
+ config_param :min_bytes, :integer, :default => nil,
48
+ :desc => "Smallest amount of data the server should send us."
49
+
50
+ include Fluent::KafkaPluginUtil::SSLSettings
51
+ include Fluent::KafkaPluginUtil::SaslSettings
52
+
53
+ unless method_defined?(:router)
54
+ define_method("router") { Fluent::Engine }
55
+ end
56
+
57
+ def initialize
58
+ super
59
+ require 'kafka'
60
+
61
+ @time_parser = nil
62
+ end
63
+
64
+ def configure(conf)
65
+ super
66
+
67
+ @topic_list = []
68
+ if @topics
69
+ @topic_list = @topics.split(',').map { |topic|
70
+ TopicEntry.new(topic.strip, @partition, @offset)
71
+ }
72
+ else
73
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
74
+ unless element.has_key?('topic')
75
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
76
+ end
77
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
78
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
79
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
80
+ end
81
+ end
82
+
83
+ if @topic_list.empty?
84
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
85
+ end
86
+
87
+ # For backward compatibility
88
+ @brokers = case
89
+ when @host && @port
90
+ ["#{@host}:#{@port}"]
91
+ when @host
92
+ ["#{@host}:9092"]
93
+ when @port
94
+ ["localhost:#{@port}"]
95
+ else
96
+ @brokers
97
+ end
98
+
99
+ if conf['max_wait_ms']
100
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
101
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
102
+ end
103
+
104
+ @max_wait_time = @interval if @max_wait_time.nil?
105
+
106
+ require 'zookeeper' if @offset_zookeeper
107
+
108
+ @parser_proc = setup_parser
109
+
110
+ if @use_record_time and @time_format
111
+ if defined?(Fluent::TimeParser)
112
+ @time_parser = Fluent::TimeParser.new(@time_format)
113
+ else
114
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
115
+ end
116
+ end
117
+ end
118
+
119
+ def setup_parser
120
+ case @format
121
+ when 'json'
122
+ begin
123
+ require 'oj'
124
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
125
+ Proc.new { |msg, te|
126
+ r = Oj.load(msg.value)
127
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
128
+ r
129
+ }
130
+ rescue LoadError
131
+ require 'yajl'
132
+ Proc.new { |msg, te|
133
+ r = Yajl::Parser.parse(msg.value)
134
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
135
+ r
136
+ }
137
+ end
138
+ when 'ltsv'
139
+ require 'ltsv'
140
+ Proc.new { |msg, te|
141
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
142
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
143
+ r
144
+ }
145
+ when 'msgpack'
146
+ require 'msgpack'
147
+ Proc.new { |msg, te|
148
+ r = MessagePack.unpack(msg.value)
149
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
150
+ r
151
+ }
152
+ when 'text'
153
+ Proc.new { |msg, te|
154
+ r = {@message_key => msg.value}
155
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
156
+ r
157
+ }
158
+ end
159
+ end
160
+
161
+ def add_offset_in_hash(hash, te, offset)
162
+ hash['kafka_topic'.freeze] = te.topic
163
+ hash['kafka_partition'.freeze] = te.partition
164
+ hash['kafka_offset'.freeze] = offset
165
+ end
166
+
167
+ def start
168
+ super
169
+
170
+ @loop = Coolio::Loop.new
171
+ opt = {}
172
+ opt[:max_bytes] = @max_bytes if @max_bytes
173
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
174
+ opt[:min_bytes] = @min_bytes if @min_bytes
175
+
176
+ if @scram_mechanism != nil && @username != nil && @password != nil
177
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
178
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
179
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
180
+ sasl_scram_mechanism: @scram_mechanism)
181
+ elsif @username != nil && @password != nil
182
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
183
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
184
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password)
185
+ else
186
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
187
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
188
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
189
+ end
190
+
191
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
192
+
193
+ @topic_watchers = @topic_list.map {|topic_entry|
194
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
195
+ TopicWatcher.new(
196
+ topic_entry,
197
+ @kafka,
198
+ interval,
199
+ @parser_proc,
200
+ @add_prefix,
201
+ @add_suffix,
202
+ offset_manager,
203
+ router,
204
+ @kafka_message_key,
205
+ opt)
206
+ }
207
+ @topic_watchers.each {|tw|
208
+ tw.attach(@loop)
209
+ }
210
+ @thread = Thread.new(&method(:run))
211
+ end
212
+
213
+ def shutdown
214
+ @loop.stop
215
+ @zookeeper.close! if @zookeeper
216
+ @thread.join
217
+ @kafka.close
218
+ super
219
+ end
220
+
221
+ def run
222
+ @loop.run
223
+ rescue => e
224
+ $log.error "unexpected error", :error => e.to_s
225
+ $log.error_backtrace
226
+ end
227
+
228
+ class TopicWatcher < Coolio::TimerWatcher
229
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, options={})
230
+ @topic_entry = topic_entry
231
+ @kafka = kafka
232
+ @callback = method(:consume)
233
+ @parser = parser
234
+ @add_prefix = add_prefix
235
+ @add_suffix = add_suffix
236
+ @options = options
237
+ @offset_manager = offset_manager
238
+ @router = router
239
+ @kafka_message_key = kafka_message_key
240
+
241
+ @next_offset = @topic_entry.offset
242
+ if @topic_entry.offset == -1 && offset_manager
243
+ @next_offset = offset_manager.next_offset
244
+ end
245
+ @fetch_args = {
246
+ topic: @topic_entry.topic,
247
+ partition: @topic_entry.partition,
248
+ }.merge(@options)
249
+
250
+ super(interval, true)
251
+ end
252
+
253
+ def on_timer
254
+ @callback.call
255
+ rescue => e
256
+ # TODO log?
257
+ $log.error e.to_s
258
+ $log.error_backtrace
259
+ end
260
+
261
+ def consume
262
+ offset = @next_offset
263
+ @fetch_args[:offset] = offset
264
+ messages = @kafka.fetch_messages(@fetch_args)
265
+
266
+ return if messages.size.zero?
267
+
268
+ es = Fluent::MultiEventStream.new
269
+ tag = @topic_entry.topic
270
+ tag = @add_prefix + "." + tag if @add_prefix
271
+ tag = tag + "." + @add_suffix if @add_suffix
272
+
273
+ messages.each { |msg|
274
+ begin
275
+ record = @parser.call(msg, @topic_entry)
276
+ if @use_record_time
277
+ if @time_format
278
+ record_time = @time_parser.parse(record['time'])
279
+ else
280
+ record_time = record['time']
281
+ end
282
+ else
283
+ record_time = Fluent::Engine.now
284
+ end
285
+ if @kafka_message_key
286
+ record[@kafka_message_key] = msg.key
287
+ end
288
+ es.add(record_time, record)
289
+ rescue => e
290
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
291
+ $log.debug_backtrace
292
+ end
293
+ }
294
+ offset = messages.last.offset + 1
295
+
296
+ unless es.empty?
297
+ @router.emit_stream(tag, es)
298
+
299
+ if @offset_manager
300
+ @offset_manager.save_offset(offset)
301
+ end
302
+ @next_offset = offset
303
+ end
304
+ end
305
+ end
306
+
307
+ class TopicEntry
308
+ def initialize(topic, partition, offset)
309
+ @topic = topic
310
+ @partition = partition
311
+ @offset = offset
312
+ end
313
+ attr_reader :topic, :partition, :offset
314
+ end
315
+
316
+ class OffsetManager
317
+ def initialize(topic_entry, zookeeper, zk_root_node)
318
+ @zookeeper = zookeeper
319
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
320
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
321
+ end
322
+
323
+ def create_node(zk_path, topic, partition)
324
+ path = ""
325
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
326
+ path = path + dir
327
+ @zookeeper.create(:path => "#{path}")
328
+ }
329
+ $log.trace "use zk offset node : #{path}"
330
+ end
331
+
332
+ def next_offset
333
+ @zookeeper.get(:path => @zk_path)[:data].to_i
334
+ end
335
+
336
+ def save_offset(offset)
337
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
338
+ $log.trace "update zk offset node : #{offset.to_s}"
339
+ end
340
+ end
341
+ end
@@ -0,0 +1,281 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :client_id, :string, :default => 'kafka'
15
+ config_param :format, :string, :default => 'json',
16
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
17
+ config_param :message_key, :string, :default => 'message',
18
+ :desc => "For 'text' format only."
19
+ config_param :add_prefix, :string, :default => nil,
20
+ :desc => "Tag prefix (Optional)"
21
+ config_param :add_suffix, :string, :default => nil,
22
+ :desc => "Tag suffix (Optional)"
23
+ config_param :retry_emit_limit, :integer, :default => nil,
24
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
25
+ config_param :use_record_time, :bool, :default => false,
26
+ :desc => "Replace message timestamp with contents of 'time' field."
27
+ config_param :time_format, :string, :default => nil,
28
+ :desc => "Time format to be used to parse 'time' filed."
29
+ config_param :kafka_message_key, :string, :default => nil,
30
+ :desc => "Set kafka's message key to this field"
31
+
32
+ config_param :retry_wait_seconds, :integer, :default => 30
33
+ config_param :disable_retry_limit, :bool, :default => false,
34
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
35
+ config_param :retry_limit, :integer, :default => 10,
36
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
37
+ # Kafka consumer options
38
+ config_param :max_bytes, :integer, :default => 1048576,
39
+ :desc => "Maximum number of bytes to fetch."
40
+ config_param :max_wait_time, :integer, :default => nil,
41
+ :desc => "How long to block until the server sends us data."
42
+ config_param :min_bytes, :integer, :default => nil,
43
+ :desc => "Smallest amount of data the server should send us."
44
+ config_param :session_timeout, :integer, :default => nil,
45
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
46
+ config_param :offset_commit_interval, :integer, :default => nil,
47
+ :desc => "The interval between offset commits, in seconds"
48
+ config_param :offset_commit_threshold, :integer, :default => nil,
49
+ :desc => "The number of messages that can be processed before their offsets are committed"
50
+ config_param :fetcher_max_queue_size, :integer, :default => nil,
51
+ :desc => "The number of fetched messages per partition that are queued in fetcher queue"
52
+ config_param :start_from_beginning, :bool, :default => true,
53
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
54
+
55
+ include Fluent::KafkaPluginUtil::SSLSettings
56
+ include Fluent::KafkaPluginUtil::SaslSettings
57
+
58
+ class ForShutdown < StandardError
59
+ end
60
+
61
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
62
+ Fluent::Plugin::Buffer::BufferOverflowError
63
+ else
64
+ Fluent::BufferQueueLimitError
65
+ end
66
+
67
+ unless method_defined?(:router)
68
+ define_method("router") { Fluent::Engine }
69
+ end
70
+
71
+ def initialize
72
+ super
73
+ require 'kafka'
74
+
75
+ @time_parser = nil
76
+ @retry_count = 1
77
+ end
78
+
79
+ def _config_to_array(config)
80
+ config_array = config.split(',').map {|k| k.strip }
81
+ if config_array.empty?
82
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
83
+ end
84
+ config_array
85
+ end
86
+
87
+ def multi_workers_ready?
88
+ true
89
+ end
90
+
91
+ private :_config_to_array
92
+
93
+ def configure(conf)
94
+ super
95
+
96
+ $log.info "Will watch for topics #{@topics} at brokers " \
97
+ "#{@brokers} and '#{@consumer_group}' group"
98
+
99
+ @topics = _config_to_array(@topics)
100
+
101
+ if conf['max_wait_ms']
102
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
103
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
104
+ end
105
+
106
+ @parser_proc = setup_parser
107
+
108
+ @consumer_opts = {:group_id => @consumer_group}
109
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
110
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
111
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
112
+ @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
113
+
114
+ @fetch_opts = {}
115
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
116
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
117
+
118
+ if @use_record_time and @time_format
119
+ if defined?(Fluent::TimeParser)
120
+ @time_parser = Fluent::TimeParser.new(@time_format)
121
+ else
122
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
123
+ end
124
+ end
125
+ end
126
+
127
+ def setup_parser
128
+ case @format
129
+ when 'json'
130
+ begin
131
+ require 'oj'
132
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
133
+ Proc.new { |msg| Oj.load(msg.value) }
134
+ rescue LoadError
135
+ require 'yajl'
136
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
137
+ end
138
+ when 'ltsv'
139
+ require 'ltsv'
140
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
141
+ when 'msgpack'
142
+ require 'msgpack'
143
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
144
+ when 'text'
145
+ Proc.new { |msg| {@message_key => msg.value} }
146
+ end
147
+ end
148
+
149
+ def start
150
+ super
151
+
152
+ if @scram_mechanism != nil && @username != nil && @password != nil
153
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
154
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
155
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
156
+ sasl_scram_mechanism: @scram_mechanism)
157
+ elsif @username != nil && @password != nil
158
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
159
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
160
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password)
161
+ else
162
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: log, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
163
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
164
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
165
+ end
166
+
167
+ @consumer = setup_consumer
168
+ @thread = Thread.new(&method(:run))
169
+ end
170
+
171
+ def shutdown
172
+ # This nil assignment should be guarded by mutex in multithread programming manner.
173
+ # But the situation is very low contention, so we don't use mutex for now.
174
+ # If the problem happens, we will add a guard for consumer.
175
+ consumer = @consumer
176
+ @consumer = nil
177
+ consumer.stop
178
+
179
+ @thread.join
180
+ @kafka.close
181
+ super
182
+ end
183
+
184
+ def setup_consumer
185
+ consumer = @kafka.consumer(@consumer_opts)
186
+ @topics.each { |topic|
187
+ consumer.subscribe(topic, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
188
+ }
189
+ consumer
190
+ end
191
+
192
+ def reconnect_consumer
193
+ log.warn "Stopping Consumer"
194
+ consumer = @consumer
195
+ @consumer = nil
196
+ if consumer
197
+ consumer.stop
198
+ end
199
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
200
+ @retry_count = @retry_count + 1
201
+ sleep @retry_wait_seconds
202
+ @consumer = setup_consumer
203
+ log.warn "Re-starting consumer #{Time.now.to_s}"
204
+ @retry_count = 0
205
+ rescue =>e
206
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
207
+ log.error_backtrace
208
+ if @retry_count <= @retry_limit or disable_retry_limit
209
+ reconnect_consumer
210
+ end
211
+ end
212
+
213
+ def run
214
+ while @consumer
215
+ begin
216
+ @consumer.each_batch(@fetch_opts) { |batch|
217
+ es = Fluent::MultiEventStream.new
218
+ tag = batch.topic
219
+ tag = @add_prefix + "." + tag if @add_prefix
220
+ tag = tag + "." + @add_suffix if @add_suffix
221
+
222
+ batch.messages.each { |msg|
223
+ begin
224
+ record = @parser_proc.call(msg)
225
+ if @use_record_time
226
+ if @time_format
227
+ record_time = @time_parser.parse(record['time'])
228
+ else
229
+ record_time = record['time']
230
+ end
231
+ else
232
+ record_time = Fluent::Engine.now
233
+ end
234
+ if @kafka_message_key
235
+ record[@kafka_message_key] = msg.key
236
+ end
237
+ es.add(record_time, record)
238
+ rescue => e
239
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
240
+ log.debug_backtrace
241
+ end
242
+ }
243
+
244
+ unless es.empty?
245
+ emit_events(tag, es)
246
+ end
247
+ }
248
+ rescue ForShutdown
249
+ rescue => e
250
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
251
+ log.error_backtrace
252
+ reconnect_consumer
253
+ end
254
+ end
255
+ rescue => e
256
+ log.error "unexpected error during consumer object access", :error => e.to_s
257
+ log.error_backtrace
258
+ end
259
+
260
+ def emit_events(tag, es)
261
+ retries = 0
262
+ begin
263
+ router.emit_stream(tag, es)
264
+ rescue BufferError
265
+ raise ForShutdown if @consumer.nil?
266
+
267
+ if @retry_emit_limit.nil?
268
+ sleep 1
269
+ retry
270
+ end
271
+
272
+ if retries < @retry_emit_limit
273
+ retries += 1
274
+ sleep 1
275
+ retry
276
+ else
277
+ raise RuntimeError, "Exceeds retry_emit_limit"
278
+ end
279
+ end
280
+ end
281
+ end