madhawk57-log 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/**/test_*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ task :default => [:build]
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Martin Falk"]
5
+ gem.email = ["martin.falk@netent.com"]
6
+ gem.description = %q{Fluentd}
7
+ gem.summary = %q{Fluentd}
8
+
9
+ gem.files = `git ls-files`.split($\)
10
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
11
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
12
+ gem.name = "madhawk57-log"
13
+ gem.require_paths = ["lib"]
14
+ gem.version = '0.1.0'
15
+ gem.required_ruby_version = ">= 2.1.0"
16
+
17
+ gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
18
+ gem.add_dependency 'ltsv'
19
+ gem.add_dependency 'ruby-kafka', '>= 0.7.8', '< 2'
20
+ gem.add_dependency 'google-cloud-secret_manager', '~> 0.5.0'
21
+ gem.add_development_dependency "rake", ">= 0.9.2"
22
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
23
+ end
@@ -0,0 +1,360 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :sasl_over_ssl, :bool, :default => true,
24
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
25
+ config_param :partition, :integer, :default => 0,
26
+ :desc => "Listening partition"
27
+ config_param :offset, :integer, :default => -1,
28
+ :desc => "Listening start offset"
29
+ config_param :add_prefix, :string, :default => nil,
30
+ :desc => "Tag prefix"
31
+ config_param :add_suffix, :string, :default => nil,
32
+ :desc => "tag suffix"
33
+ config_param :add_offset_in_record, :bool, :default => false
34
+
35
+ config_param :offset_zookeeper, :string, :default => nil
36
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
37
+ config_param :use_record_time, :bool, :default => false,
38
+ :desc => "Replace message timestamp with contents of 'time' field.",
39
+ :deprecated => "Use 'time_source record' instead."
40
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
41
+ :desc => "Source for message timestamp."
42
+ config_param :record_time_key, :string, :default => 'time',
43
+ :desc => "Time field when time_source is 'record'"
44
+ config_param :get_kafka_client_log, :bool, :default => false
45
+ config_param :time_format, :string, :default => nil,
46
+ :desc => "Time format to be used to parse 'time' field."
47
+ config_param :kafka_message_key, :string, :default => nil,
48
+ :desc => "Set kafka's message key to this field"
49
+
50
+ # Kafka#fetch_messages options
51
+ config_param :max_bytes, :integer, :default => nil,
52
+ :desc => "Maximum number of bytes to fetch."
53
+ config_param :max_wait_time, :integer, :default => nil,
54
+ :desc => "How long to block until the server sends us data."
55
+ config_param :min_bytes, :integer, :default => nil,
56
+ :desc => "Smallest amount of data the server should send us."
57
+
58
+ include Fluent::KafkaPluginUtil::SSLSettings
59
+ include Fluent::KafkaPluginUtil::SaslSettings
60
+
61
+ unless method_defined?(:router)
62
+ define_method("router") { Fluent::Engine }
63
+ end
64
+
65
+ def initialize
66
+ super
67
+ require 'kafka'
68
+
69
+ @time_parser = nil
70
+ end
71
+
72
+ def configure(conf)
73
+ super
74
+
75
+ @topic_list = []
76
+ if @topics
77
+ @topic_list = @topics.split(',').map { |topic|
78
+ TopicEntry.new(topic.strip, @partition, @offset)
79
+ }
80
+ else
81
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
82
+ unless element.has_key?('topic')
83
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
84
+ end
85
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
86
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
87
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
88
+ end
89
+ end
90
+
91
+ if @topic_list.empty?
92
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
93
+ end
94
+
95
+ # For backward compatibility
96
+ @brokers = case
97
+ when @host && @port
98
+ ["#{@host}:#{@port}"]
99
+ when @host
100
+ ["#{@host}:9092"]
101
+ when @port
102
+ ["localhost:#{@port}"]
103
+ else
104
+ @brokers
105
+ end
106
+
107
+ if conf['max_wait_ms']
108
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
109
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
110
+ end
111
+
112
+ @max_wait_time = @interval if @max_wait_time.nil?
113
+
114
+ require 'zookeeper' if @offset_zookeeper
115
+
116
+ @parser_proc = setup_parser
117
+
118
+ @time_source = :record if @use_record_time
119
+
120
+ if @time_source == :record and @time_format
121
+ if defined?(Fluent::TimeParser)
122
+ @time_parser = Fluent::TimeParser.new(@time_format)
123
+ else
124
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
125
+ end
126
+ end
127
+ end
128
+
129
+ def setup_parser
130
+ case @format
131
+ when 'json'
132
+ begin
133
+ require 'oj'
134
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
135
+ Proc.new { |msg, te|
136
+ r = Oj.load(msg.value)
137
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
138
+ r
139
+ }
140
+ rescue LoadError
141
+ require 'yajl'
142
+ Proc.new { |msg, te|
143
+ r = Yajl::Parser.parse(msg.value)
144
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
145
+ r
146
+ }
147
+ end
148
+ when 'ltsv'
149
+ require 'ltsv'
150
+ Proc.new { |msg, te|
151
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
152
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
153
+ r
154
+ }
155
+ when 'msgpack'
156
+ require 'msgpack'
157
+ Proc.new { |msg, te|
158
+ r = MessagePack.unpack(msg.value)
159
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
160
+ r
161
+ }
162
+ when 'text'
163
+ Proc.new { |msg, te|
164
+ r = {@message_key => msg.value}
165
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
166
+ r
167
+ }
168
+ end
169
+ end
170
+
171
+ def add_offset_in_hash(hash, te, offset)
172
+ hash['kafka_topic'.freeze] = te.topic
173
+ hash['kafka_partition'.freeze] = te.partition
174
+ hash['kafka_offset'.freeze] = offset
175
+ end
176
+
177
+ def start
178
+ super
179
+
180
+ @loop = Coolio::Loop.new
181
+ opt = {}
182
+ opt[:max_bytes] = @max_bytes if @max_bytes
183
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
184
+ opt[:min_bytes] = @min_bytes if @min_bytes
185
+
186
+ logger = @get_kafka_client_log ? log : nil
187
+ if @scram_mechanism != nil && @username != nil && @password != nil
188
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
189
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
190
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
191
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
192
+ elsif @username != nil && @password != nil
193
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
194
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
195
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
196
+ sasl_over_ssl: @sasl_over_ssl)
197
+ else
198
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
199
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
200
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
201
+ end
202
+
203
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
204
+
205
+ @topic_watchers = @topic_list.map {|topic_entry|
206
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
207
+ TopicWatcher.new(
208
+ topic_entry,
209
+ @kafka,
210
+ interval,
211
+ @parser_proc,
212
+ @add_prefix,
213
+ @add_suffix,
214
+ offset_manager,
215
+ router,
216
+ @kafka_message_key,
217
+ @time_source,
218
+ opt)
219
+ }
220
+ @topic_watchers.each {|tw|
221
+ tw.attach(@loop)
222
+ }
223
+ @thread = Thread.new(&method(:run))
224
+ end
225
+
226
+ def shutdown
227
+ @loop.stop
228
+ @zookeeper.close! if @zookeeper
229
+ @thread.join
230
+ @kafka.close
231
+ super
232
+ end
233
+
234
+ def run
235
+ @loop.run
236
+ rescue => e
237
+ $log.error "unexpected error", :error => e.to_s
238
+ $log.error_backtrace
239
+ end
240
+
241
+ class TopicWatcher < Coolio::TimerWatcher
242
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, time_source, options={})
243
+ @topic_entry = topic_entry
244
+ @kafka = kafka
245
+ @callback = method(:consume)
246
+ @parser = parser
247
+ @add_prefix = add_prefix
248
+ @add_suffix = add_suffix
249
+ @options = options
250
+ @offset_manager = offset_manager
251
+ @router = router
252
+ @kafka_message_key = kafka_message_key
253
+ @time_source = time_source
254
+
255
+ @next_offset = @topic_entry.offset
256
+ if @topic_entry.offset == -1 && offset_manager
257
+ @next_offset = offset_manager.next_offset
258
+ end
259
+ @fetch_args = {
260
+ topic: @topic_entry.topic,
261
+ partition: @topic_entry.partition,
262
+ }.merge(@options)
263
+
264
+ super(interval, true)
265
+ end
266
+
267
+ def on_timer
268
+ @callback.call
269
+ rescue => e
270
+ # TODO log?
271
+ $log.error e.to_s
272
+ $log.error_backtrace
273
+ end
274
+
275
+ def consume
276
+ offset = @next_offset
277
+ @fetch_args[:offset] = offset
278
+ messages = @kafka.fetch_messages(@fetch_args)
279
+
280
+ return if messages.size.zero?
281
+
282
+ es = Fluent::MultiEventStream.new
283
+ tag = @topic_entry.topic
284
+ tag = @add_prefix + "." + tag if @add_prefix
285
+ tag = tag + "." + @add_suffix if @add_suffix
286
+
287
+ messages.each { |msg|
288
+ begin
289
+ record = @parser.call(msg, @topic_entry)
290
+ case @time_source
291
+ when :kafka
292
+ record_time = Fluent::EventTime.from_time(msg.create_time)
293
+ when :now
294
+ record_time = Fluent::Engine.now
295
+ when :record
296
+ if @time_format
297
+ record_time = @time_parser.parse(record[@record_time_key])
298
+ else
299
+ record_time = record[@record_time_key]
300
+ end
301
+ else
302
+ $log.fatal "BUG: invalid time_source: #{@time_source}"
303
+ end
304
+ if @kafka_message_key
305
+ record[@kafka_message_key] = msg.key
306
+ end
307
+ es.add(record_time, record)
308
+ rescue => e
309
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
310
+ $log.debug_backtrace
311
+ end
312
+ }
313
+ offset = messages.last.offset + 1
314
+
315
+ unless es.empty?
316
+ @router.emit_stream(tag, es)
317
+
318
+ if @offset_manager
319
+ @offset_manager.save_offset(offset)
320
+ end
321
+ @next_offset = offset
322
+ end
323
+ end
324
+ end
325
+
326
+ class TopicEntry
327
+ def initialize(topic, partition, offset)
328
+ @topic = topic
329
+ @partition = partition
330
+ @offset = offset
331
+ end
332
+ attr_reader :topic, :partition, :offset
333
+ end
334
+
335
+ class OffsetManager
336
+ def initialize(topic_entry, zookeeper, zk_root_node)
337
+ @zookeeper = zookeeper
338
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
339
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
340
+ end
341
+
342
+ def create_node(zk_path, topic, partition)
343
+ path = ""
344
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
345
+ path = path + dir
346
+ @zookeeper.create(:path => "#{path}")
347
+ }
348
+ $log.trace "use zk offset node : #{path}"
349
+ end
350
+
351
+ def next_offset
352
+ @zookeeper.get(:path => @zk_path)[:data].to_i
353
+ end
354
+
355
+ def save_offset(offset)
356
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
357
+ $log.trace "update zk offset node : #{offset.to_s}"
358
+ end
359
+ end
360
+ end
@@ -0,0 +1,309 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :client_id, :string, :default => 'kafka'
15
+ config_param :sasl_over_ssl, :bool, :default => true,
16
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
17
+ config_param :format, :string, :default => 'json',
18
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
19
+ config_param :message_key, :string, :default => 'message',
20
+ :desc => "For 'text' format only."
21
+ config_param :add_prefix, :string, :default => nil,
22
+ :desc => "Tag prefix (Optional)"
23
+ config_param :add_suffix, :string, :default => nil,
24
+ :desc => "Tag suffix (Optional)"
25
+ config_param :retry_emit_limit, :integer, :default => nil,
26
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
27
+ config_param :use_record_time, :bool, :default => false,
28
+ :desc => "Replace message timestamp with contents of 'time' field.",
29
+ :deprecated => "Use 'time_source record' instead."
30
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
31
+ :desc => "Source for message timestamp."
32
+ config_param :record_time_key, :string, :default => 'time',
33
+ :desc => "Time field when time_source is 'record'"
34
+ config_param :get_kafka_client_log, :bool, :default => false
35
+ config_param :time_format, :string, :default => nil,
36
+ :desc => "Time format to be used to parse 'time' field."
37
+ config_param :kafka_message_key, :string, :default => nil,
38
+ :desc => "Set kafka's message key to this field"
39
+ config_param :connect_timeout, :integer, :default => nil,
40
+ :desc => "[Integer, nil] the timeout setting for connecting to brokers"
41
+ config_param :socket_timeout, :integer, :default => nil,
42
+ :desc => "[Integer, nil] the timeout setting for socket connection"
43
+
44
+ config_param :retry_wait_seconds, :integer, :default => 30
45
+ config_param :disable_retry_limit, :bool, :default => false,
46
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
47
+ config_param :retry_limit, :integer, :default => 10,
48
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
49
+ # Kafka consumer options
50
+ config_param :max_bytes, :integer, :default => 1048576,
51
+ :desc => "Maximum number of bytes to fetch."
52
+ config_param :max_wait_time, :integer, :default => nil,
53
+ :desc => "How long to block until the server sends us data."
54
+ config_param :min_bytes, :integer, :default => nil,
55
+ :desc => "Smallest amount of data the server should send us."
56
+ config_param :session_timeout, :integer, :default => nil,
57
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
58
+ config_param :offset_commit_interval, :integer, :default => nil,
59
+ :desc => "The interval between offset commits, in seconds"
60
+ config_param :offset_commit_threshold, :integer, :default => nil,
61
+ :desc => "The number of messages that can be processed before their offsets are committed"
62
+ config_param :fetcher_max_queue_size, :integer, :default => nil,
63
+ :desc => "The number of fetched messages per partition that are queued in fetcher queue"
64
+ config_param :start_from_beginning, :bool, :default => true,
65
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
66
+
67
+ include Fluent::KafkaPluginUtil::SSLSettings
68
+ include Fluent::KafkaPluginUtil::SaslSettings
69
+
70
+ class ForShutdown < StandardError
71
+ end
72
+
73
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
74
+ Fluent::Plugin::Buffer::BufferOverflowError
75
+ else
76
+ Fluent::BufferQueueLimitError
77
+ end
78
+
79
+ unless method_defined?(:router)
80
+ define_method("router") { Fluent::Engine }
81
+ end
82
+
83
+ def initialize
84
+ super
85
+ require 'kafka'
86
+
87
+ @time_parser = nil
88
+ @retry_count = 1
89
+ end
90
+
91
+ def _config_to_array(config)
92
+ config_array = config.split(',').map {|k| k.strip }
93
+ if config_array.empty?
94
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
95
+ end
96
+ config_array
97
+ end
98
+
99
+ def multi_workers_ready?
100
+ true
101
+ end
102
+
103
+ private :_config_to_array
104
+
105
+ def configure(conf)
106
+ super
107
+
108
+ $log.info "Will watch for topics #{@topics} at brokers " \
109
+ "#{@brokers} and '#{@consumer_group}' group"
110
+
111
+ @topics = _config_to_array(@topics)
112
+
113
+ if conf['max_wait_ms']
114
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
115
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
116
+ end
117
+
118
+ @parser_proc = setup_parser
119
+
120
+ @consumer_opts = {:group_id => @consumer_group}
121
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
122
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
123
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
124
+ @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
125
+
126
+ @fetch_opts = {}
127
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
128
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
129
+
130
+ @time_source = :record if @use_record_time
131
+
132
+ if @time_source == :record and @time_format
133
+ if defined?(Fluent::TimeParser)
134
+ @time_parser = Fluent::TimeParser.new(@time_format)
135
+ else
136
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
137
+ end
138
+ end
139
+ end
140
+
141
+ def setup_parser
142
+ case @format
143
+ when 'json'
144
+ begin
145
+ require 'oj'
146
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
147
+ Proc.new { |msg| Oj.load(msg.value) }
148
+ rescue LoadError
149
+ require 'yajl'
150
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
151
+ end
152
+ when 'ltsv'
153
+ require 'ltsv'
154
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
155
+ when 'msgpack'
156
+ require 'msgpack'
157
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
158
+ when 'text'
159
+ Proc.new { |msg| {@message_key => msg.value} }
160
+ end
161
+ end
162
+
163
+ def start
164
+ super
165
+
166
+ logger = @get_kafka_client_log ? log : nil
167
+ if @scram_mechanism != nil && @username != nil && @password != nil
168
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
169
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
170
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
171
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
172
+ elsif @username != nil && @password != nil
173
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
174
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
175
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password,
176
+ sasl_over_ssl: @sasl_over_ssl)
177
+ else
178
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
179
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
180
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
181
+ end
182
+
183
+ @consumer = setup_consumer
184
+ @thread = Thread.new(&method(:run))
185
+ end
186
+
187
+ def shutdown
188
+ # This nil assignment should be guarded by mutex in multithread programming manner.
189
+ # But the situation is very low contention, so we don't use mutex for now.
190
+ # If the problem happens, we will add a guard for consumer.
191
+ consumer = @consumer
192
+ @consumer = nil
193
+ consumer.stop
194
+
195
+ @thread.join
196
+ @kafka.close
197
+ super
198
+ end
199
+
200
+ def setup_consumer
201
+ consumer = @kafka.consumer(@consumer_opts)
202
+ @topics.each { |topic|
203
+ if m = /^\/(.+)\/$/.match(topic)
204
+ topic_or_regex = Regexp.new(m[1])
205
+ $log.info "Subscribe to topics matching the regex #{topic}"
206
+ else
207
+ topic_or_regex = topic
208
+ $log.info "Subscribe to topic #{topic}"
209
+ end
210
+ consumer.subscribe(topic_or_regex, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
211
+ }
212
+ consumer
213
+ end
214
+
215
+ def reconnect_consumer
216
+ log.warn "Stopping Consumer"
217
+ consumer = @consumer
218
+ @consumer = nil
219
+ if consumer
220
+ consumer.stop
221
+ end
222
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
223
+ @retry_count = @retry_count + 1
224
+ sleep @retry_wait_seconds
225
+ @consumer = setup_consumer
226
+ log.warn "Re-starting consumer #{Time.now.to_s}"
227
+ @retry_count = 0
228
+ rescue =>e
229
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
230
+ log.error_backtrace
231
+ if @retry_count <= @retry_limit or disable_retry_limit
232
+ reconnect_consumer
233
+ end
234
+ end
235
+
236
+ def run
237
+ while @consumer
238
+ begin
239
+ @consumer.each_batch(@fetch_opts) { |batch|
240
+ es = Fluent::MultiEventStream.new
241
+ tag = batch.topic
242
+ tag = @add_prefix + "." + tag if @add_prefix
243
+ tag = tag + "." + @add_suffix if @add_suffix
244
+
245
+ batch.messages.each { |msg|
246
+ begin
247
+ record = @parser_proc.call(msg)
248
+ case @time_source
249
+ when :kafka
250
+ record_time = Fluent::EventTime.from_time(msg.create_time)
251
+ when :now
252
+ record_time = Fluent::Engine.now
253
+ when :record
254
+ if @time_format
255
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
256
+ else
257
+ record_time = record[@record_time_key]
258
+ end
259
+ else
260
+ log.fatal "BUG: invalid time_source: #{@time_source}"
261
+ end
262
+ if @kafka_message_key
263
+ record[@kafka_message_key] = msg.key
264
+ end
265
+ es.add(record_time, record)
266
+ rescue => e
267
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
268
+ log.debug_backtrace
269
+ end
270
+ }
271
+
272
+ unless es.empty?
273
+ emit_events(tag, es)
274
+ end
275
+ }
276
+ rescue ForShutdown
277
+ rescue => e
278
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
279
+ log.error_backtrace
280
+ reconnect_consumer
281
+ end
282
+ end
283
+ rescue => e
284
+ log.error "unexpected error during consumer object access", :error => e.to_s
285
+ log.error_backtrace
286
+ end
287
+
288
+ def emit_events(tag, es)
289
+ retries = 0
290
+ begin
291
+ router.emit_stream(tag, es)
292
+ rescue BufferError
293
+ raise ForShutdown if @consumer.nil?
294
+
295
+ if @retry_emit_limit.nil?
296
+ sleep 1
297
+ retry
298
+ end
299
+
300
+ if retries < @retry_emit_limit
301
+ retries += 1
302
+ sleep 1
303
+ retry
304
+ else
305
+ raise RuntimeError, "Exceeds retry_emit_limit"
306
+ end
307
+ end
308
+ end
309
+ end