madhawk57-log 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/**/test_*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ task :default => [:build]
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Martin Falk"]
5
+ gem.email = ["martin.falk@netent.com"]
6
+ gem.description = %q{Fluentd}
7
+ gem.summary = %q{Fluentd}
8
+
9
+ gem.files = `git ls-files`.split($\)
10
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
11
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
12
+ gem.name = "madhawk57-log"
13
+ gem.require_paths = ["lib"]
14
+ gem.version = '0.1.0'
15
+ gem.required_ruby_version = ">= 2.1.0"
16
+
17
+ gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
18
+ gem.add_dependency 'ltsv'
19
+ gem.add_dependency 'ruby-kafka', '>= 0.7.8', '< 2'
20
+ gem.add_dependency 'google-cloud-secret_manager', '~> 0.5.0'
21
+ gem.add_development_dependency "rake", ">= 0.9.2"
22
+ gem.add_development_dependency "test-unit", ">= 3.0.8"
23
+ end
@@ -0,0 +1,360 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :sasl_over_ssl, :bool, :default => true,
24
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
25
+ config_param :partition, :integer, :default => 0,
26
+ :desc => "Listening partition"
27
+ config_param :offset, :integer, :default => -1,
28
+ :desc => "Listening start offset"
29
+ config_param :add_prefix, :string, :default => nil,
30
+ :desc => "Tag prefix"
31
+ config_param :add_suffix, :string, :default => nil,
32
+ :desc => "tag suffix"
33
+ config_param :add_offset_in_record, :bool, :default => false
34
+
35
+ config_param :offset_zookeeper, :string, :default => nil
36
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
37
+ config_param :use_record_time, :bool, :default => false,
38
+ :desc => "Replace message timestamp with contents of 'time' field.",
39
+ :deprecated => "Use 'time_source record' instead."
40
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
41
+ :desc => "Source for message timestamp."
42
+ config_param :record_time_key, :string, :default => 'time',
43
+ :desc => "Time field when time_source is 'record'"
44
+ config_param :get_kafka_client_log, :bool, :default => false
45
+ config_param :time_format, :string, :default => nil,
46
+ :desc => "Time format to be used to parse 'time' field."
47
+ config_param :kafka_message_key, :string, :default => nil,
48
+ :desc => "Set kafka's message key to this field"
49
+
50
+ # Kafka#fetch_messages options
51
+ config_param :max_bytes, :integer, :default => nil,
52
+ :desc => "Maximum number of bytes to fetch."
53
+ config_param :max_wait_time, :integer, :default => nil,
54
+ :desc => "How long to block until the server sends us data."
55
+ config_param :min_bytes, :integer, :default => nil,
56
+ :desc => "Smallest amount of data the server should send us."
57
+
58
+ include Fluent::KafkaPluginUtil::SSLSettings
59
+ include Fluent::KafkaPluginUtil::SaslSettings
60
+
61
+ unless method_defined?(:router)
62
+ define_method("router") { Fluent::Engine }
63
+ end
64
+
65
+ def initialize
66
+ super
67
+ require 'kafka'
68
+
69
+ @time_parser = nil
70
+ end
71
+
72
+ def configure(conf)
73
+ super
74
+
75
+ @topic_list = []
76
+ if @topics
77
+ @topic_list = @topics.split(',').map { |topic|
78
+ TopicEntry.new(topic.strip, @partition, @offset)
79
+ }
80
+ else
81
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
82
+ unless element.has_key?('topic')
83
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
84
+ end
85
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
86
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
87
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
88
+ end
89
+ end
90
+
91
+ if @topic_list.empty?
92
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
93
+ end
94
+
95
+ # For backward compatibility
96
+ @brokers = case
97
+ when @host && @port
98
+ ["#{@host}:#{@port}"]
99
+ when @host
100
+ ["#{@host}:9092"]
101
+ when @port
102
+ ["localhost:#{@port}"]
103
+ else
104
+ @brokers
105
+ end
106
+
107
+ if conf['max_wait_ms']
108
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
109
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
110
+ end
111
+
112
+ @max_wait_time = @interval if @max_wait_time.nil?
113
+
114
+ require 'zookeeper' if @offset_zookeeper
115
+
116
+ @parser_proc = setup_parser
117
+
118
+ @time_source = :record if @use_record_time
119
+
120
+ if @time_source == :record and @time_format
121
+ if defined?(Fluent::TimeParser)
122
+ @time_parser = Fluent::TimeParser.new(@time_format)
123
+ else
124
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
125
+ end
126
+ end
127
+ end
128
+
129
+ def setup_parser
130
+ case @format
131
+ when 'json'
132
+ begin
133
+ require 'oj'
134
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
135
+ Proc.new { |msg, te|
136
+ r = Oj.load(msg.value)
137
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
138
+ r
139
+ }
140
+ rescue LoadError
141
+ require 'yajl'
142
+ Proc.new { |msg, te|
143
+ r = Yajl::Parser.parse(msg.value)
144
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
145
+ r
146
+ }
147
+ end
148
+ when 'ltsv'
149
+ require 'ltsv'
150
+ Proc.new { |msg, te|
151
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
152
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
153
+ r
154
+ }
155
+ when 'msgpack'
156
+ require 'msgpack'
157
+ Proc.new { |msg, te|
158
+ r = MessagePack.unpack(msg.value)
159
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
160
+ r
161
+ }
162
+ when 'text'
163
+ Proc.new { |msg, te|
164
+ r = {@message_key => msg.value}
165
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
166
+ r
167
+ }
168
+ end
169
+ end
170
+
171
+ def add_offset_in_hash(hash, te, offset)
172
+ hash['kafka_topic'.freeze] = te.topic
173
+ hash['kafka_partition'.freeze] = te.partition
174
+ hash['kafka_offset'.freeze] = offset
175
+ end
176
+
177
+ def start
178
+ super
179
+
180
+ @loop = Coolio::Loop.new
181
+ opt = {}
182
+ opt[:max_bytes] = @max_bytes if @max_bytes
183
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
184
+ opt[:min_bytes] = @min_bytes if @min_bytes
185
+
186
+ logger = @get_kafka_client_log ? log : nil
187
+ if @scram_mechanism != nil && @username != nil && @password != nil
188
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
189
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
190
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
191
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
192
+ elsif @username != nil && @password != nil
193
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
194
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
195
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system,sasl_plain_username: @username, sasl_plain_password: @password,
196
+ sasl_over_ssl: @sasl_over_ssl)
197
+ else
198
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
199
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
200
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
201
+ end
202
+
203
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
204
+
205
+ @topic_watchers = @topic_list.map {|topic_entry|
206
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
207
+ TopicWatcher.new(
208
+ topic_entry,
209
+ @kafka,
210
+ interval,
211
+ @parser_proc,
212
+ @add_prefix,
213
+ @add_suffix,
214
+ offset_manager,
215
+ router,
216
+ @kafka_message_key,
217
+ @time_source,
218
+ opt)
219
+ }
220
+ @topic_watchers.each {|tw|
221
+ tw.attach(@loop)
222
+ }
223
+ @thread = Thread.new(&method(:run))
224
+ end
225
+
226
+ def shutdown
227
+ @loop.stop
228
+ @zookeeper.close! if @zookeeper
229
+ @thread.join
230
+ @kafka.close
231
+ super
232
+ end
233
+
234
+ def run
235
+ @loop.run
236
+ rescue => e
237
+ $log.error "unexpected error", :error => e.to_s
238
+ $log.error_backtrace
239
+ end
240
+
241
+ class TopicWatcher < Coolio::TimerWatcher
242
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, kafka_message_key, time_source, options={})
243
+ @topic_entry = topic_entry
244
+ @kafka = kafka
245
+ @callback = method(:consume)
246
+ @parser = parser
247
+ @add_prefix = add_prefix
248
+ @add_suffix = add_suffix
249
+ @options = options
250
+ @offset_manager = offset_manager
251
+ @router = router
252
+ @kafka_message_key = kafka_message_key
253
+ @time_source = time_source
254
+
255
+ @next_offset = @topic_entry.offset
256
+ if @topic_entry.offset == -1 && offset_manager
257
+ @next_offset = offset_manager.next_offset
258
+ end
259
+ @fetch_args = {
260
+ topic: @topic_entry.topic,
261
+ partition: @topic_entry.partition,
262
+ }.merge(@options)
263
+
264
+ super(interval, true)
265
+ end
266
+
267
+ def on_timer
268
+ @callback.call
269
+ rescue => e
270
+ # TODO log?
271
+ $log.error e.to_s
272
+ $log.error_backtrace
273
+ end
274
+
275
+ def consume
276
+ offset = @next_offset
277
+ @fetch_args[:offset] = offset
278
+ messages = @kafka.fetch_messages(@fetch_args)
279
+
280
+ return if messages.size.zero?
281
+
282
+ es = Fluent::MultiEventStream.new
283
+ tag = @topic_entry.topic
284
+ tag = @add_prefix + "." + tag if @add_prefix
285
+ tag = tag + "." + @add_suffix if @add_suffix
286
+
287
+ messages.each { |msg|
288
+ begin
289
+ record = @parser.call(msg, @topic_entry)
290
+ case @time_source
291
+ when :kafka
292
+ record_time = Fluent::EventTime.from_time(msg.create_time)
293
+ when :now
294
+ record_time = Fluent::Engine.now
295
+ when :record
296
+ if @time_format
297
+ record_time = @time_parser.parse(record[@record_time_key])
298
+ else
299
+ record_time = record[@record_time_key]
300
+ end
301
+ else
302
+ $log.fatal "BUG: invalid time_source: #{@time_source}"
303
+ end
304
+ if @kafka_message_key
305
+ record[@kafka_message_key] = msg.key
306
+ end
307
+ es.add(record_time, record)
308
+ rescue => e
309
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
310
+ $log.debug_backtrace
311
+ end
312
+ }
313
+ offset = messages.last.offset + 1
314
+
315
+ unless es.empty?
316
+ @router.emit_stream(tag, es)
317
+
318
+ if @offset_manager
319
+ @offset_manager.save_offset(offset)
320
+ end
321
+ @next_offset = offset
322
+ end
323
+ end
324
+ end
325
+
326
+ class TopicEntry
327
+ def initialize(topic, partition, offset)
328
+ @topic = topic
329
+ @partition = partition
330
+ @offset = offset
331
+ end
332
+ attr_reader :topic, :partition, :offset
333
+ end
334
+
335
+ class OffsetManager
336
+ def initialize(topic_entry, zookeeper, zk_root_node)
337
+ @zookeeper = zookeeper
338
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
339
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
340
+ end
341
+
342
+ def create_node(zk_path, topic, partition)
343
+ path = ""
344
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
345
+ path = path + dir
346
+ @zookeeper.create(:path => "#{path}")
347
+ }
348
+ $log.trace "use zk offset node : #{path}"
349
+ end
350
+
351
+ def next_offset
352
+ @zookeeper.get(:path => @zk_path)[:data].to_i
353
+ end
354
+
355
+ def save_offset(offset)
356
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
357
+ $log.trace "update zk offset node : #{offset.to_s}"
358
+ end
359
+ end
360
+ end
@@ -0,0 +1,309 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :client_id, :string, :default => 'kafka'
15
+ config_param :sasl_over_ssl, :bool, :default => true,
16
+ :desc => "Set to false to prevent SSL strict mode when using SASL authentication"
17
+ config_param :format, :string, :default => 'json',
18
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
19
+ config_param :message_key, :string, :default => 'message',
20
+ :desc => "For 'text' format only."
21
+ config_param :add_prefix, :string, :default => nil,
22
+ :desc => "Tag prefix (Optional)"
23
+ config_param :add_suffix, :string, :default => nil,
24
+ :desc => "Tag suffix (Optional)"
25
+ config_param :retry_emit_limit, :integer, :default => nil,
26
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
27
+ config_param :use_record_time, :bool, :default => false,
28
+ :desc => "Replace message timestamp with contents of 'time' field.",
29
+ :deprecated => "Use 'time_source record' instead."
30
+ config_param :time_source, :enum, :list => [:now, :kafka, :record], :default => :now,
31
+ :desc => "Source for message timestamp."
32
+ config_param :record_time_key, :string, :default => 'time',
33
+ :desc => "Time field when time_source is 'record'"
34
+ config_param :get_kafka_client_log, :bool, :default => false
35
+ config_param :time_format, :string, :default => nil,
36
+ :desc => "Time format to be used to parse 'time' field."
37
+ config_param :kafka_message_key, :string, :default => nil,
38
+ :desc => "Set kafka's message key to this field"
39
+ config_param :connect_timeout, :integer, :default => nil,
40
+ :desc => "[Integer, nil] the timeout setting for connecting to brokers"
41
+ config_param :socket_timeout, :integer, :default => nil,
42
+ :desc => "[Integer, nil] the timeout setting for socket connection"
43
+
44
+ config_param :retry_wait_seconds, :integer, :default => 30
45
+ config_param :disable_retry_limit, :bool, :default => false,
46
+ :desc => "If set true, it disables retry_limit and make Fluentd retry indefinitely (default: false)"
47
+ config_param :retry_limit, :integer, :default => 10,
48
+ :desc => "The maximum number of retries for connecting kafka (default: 10)"
49
+ # Kafka consumer options
50
+ config_param :max_bytes, :integer, :default => 1048576,
51
+ :desc => "Maximum number of bytes to fetch."
52
+ config_param :max_wait_time, :integer, :default => nil,
53
+ :desc => "How long to block until the server sends us data."
54
+ config_param :min_bytes, :integer, :default => nil,
55
+ :desc => "Smallest amount of data the server should send us."
56
+ config_param :session_timeout, :integer, :default => nil,
57
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
58
+ config_param :offset_commit_interval, :integer, :default => nil,
59
+ :desc => "The interval between offset commits, in seconds"
60
+ config_param :offset_commit_threshold, :integer, :default => nil,
61
+ :desc => "The number of messages that can be processed before their offsets are committed"
62
+ config_param :fetcher_max_queue_size, :integer, :default => nil,
63
+ :desc => "The number of fetched messages per partition that are queued in fetcher queue"
64
+ config_param :start_from_beginning, :bool, :default => true,
65
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
66
+
67
+ include Fluent::KafkaPluginUtil::SSLSettings
68
+ include Fluent::KafkaPluginUtil::SaslSettings
69
+
70
+ class ForShutdown < StandardError
71
+ end
72
+
73
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
74
+ Fluent::Plugin::Buffer::BufferOverflowError
75
+ else
76
+ Fluent::BufferQueueLimitError
77
+ end
78
+
79
+ unless method_defined?(:router)
80
+ define_method("router") { Fluent::Engine }
81
+ end
82
+
83
+ def initialize
84
+ super
85
+ require 'kafka'
86
+
87
+ @time_parser = nil
88
+ @retry_count = 1
89
+ end
90
+
91
+ def _config_to_array(config)
92
+ config_array = config.split(',').map {|k| k.strip }
93
+ if config_array.empty?
94
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
95
+ end
96
+ config_array
97
+ end
98
+
99
+ def multi_workers_ready?
100
+ true
101
+ end
102
+
103
+ private :_config_to_array
104
+
105
+ def configure(conf)
106
+ super
107
+
108
+ $log.info "Will watch for topics #{@topics} at brokers " \
109
+ "#{@brokers} and '#{@consumer_group}' group"
110
+
111
+ @topics = _config_to_array(@topics)
112
+
113
+ if conf['max_wait_ms']
114
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
115
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
116
+ end
117
+
118
+ @parser_proc = setup_parser
119
+
120
+ @consumer_opts = {:group_id => @consumer_group}
121
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
122
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
123
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
124
+ @consumer_opts[:fetcher_max_queue_size] = @fetcher_max_queue_size if @fetcher_max_queue_size
125
+
126
+ @fetch_opts = {}
127
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
128
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
129
+
130
+ @time_source = :record if @use_record_time
131
+
132
+ if @time_source == :record and @time_format
133
+ if defined?(Fluent::TimeParser)
134
+ @time_parser = Fluent::TimeParser.new(@time_format)
135
+ else
136
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
137
+ end
138
+ end
139
+ end
140
+
141
+ def setup_parser
142
+ case @format
143
+ when 'json'
144
+ begin
145
+ require 'oj'
146
+ Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
147
+ Proc.new { |msg| Oj.load(msg.value) }
148
+ rescue LoadError
149
+ require 'yajl'
150
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
151
+ end
152
+ when 'ltsv'
153
+ require 'ltsv'
154
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
155
+ when 'msgpack'
156
+ require 'msgpack'
157
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
158
+ when 'text'
159
+ Proc.new { |msg| {@message_key => msg.value} }
160
+ end
161
+ end
162
+
163
+ def start
164
+ super
165
+
166
+ logger = @get_kafka_client_log ? log : nil
167
+ if @scram_mechanism != nil && @username != nil && @password != nil
168
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
169
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
170
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_scram_username: @username, sasl_scram_password: @password,
171
+ sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
172
+ elsif @username != nil && @password != nil
173
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
174
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
175
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_plain_username: @username, sasl_plain_password: @password,
176
+ sasl_over_ssl: @sasl_over_ssl)
177
+ else
178
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, connect_timeout: @connect_timeout, socket_timeout: @socket_timeout, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
179
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
180
+ ssl_ca_certs_from_system: @ssl_ca_certs_from_system, sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
181
+ end
182
+
183
+ @consumer = setup_consumer
184
+ @thread = Thread.new(&method(:run))
185
+ end
186
+
187
+ def shutdown
188
+ # This nil assignment should be guarded by mutex in multithread programming manner.
189
+ # But the situation is very low contention, so we don't use mutex for now.
190
+ # If the problem happens, we will add a guard for consumer.
191
+ consumer = @consumer
192
+ @consumer = nil
193
+ consumer.stop
194
+
195
+ @thread.join
196
+ @kafka.close
197
+ super
198
+ end
199
+
200
+ def setup_consumer
201
+ consumer = @kafka.consumer(@consumer_opts)
202
+ @topics.each { |topic|
203
+ if m = /^\/(.+)\/$/.match(topic)
204
+ topic_or_regex = Regexp.new(m[1])
205
+ $log.info "Subscribe to topics matching the regex #{topic}"
206
+ else
207
+ topic_or_regex = topic
208
+ $log.info "Subscribe to topic #{topic}"
209
+ end
210
+ consumer.subscribe(topic_or_regex, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
211
+ }
212
+ consumer
213
+ end
214
+
215
+ def reconnect_consumer
216
+ log.warn "Stopping Consumer"
217
+ consumer = @consumer
218
+ @consumer = nil
219
+ if consumer
220
+ consumer.stop
221
+ end
222
+ log.warn "Could not connect to broker. retry_time:#{@retry_count}. Next retry will be in #{@retry_wait_seconds} seconds"
223
+ @retry_count = @retry_count + 1
224
+ sleep @retry_wait_seconds
225
+ @consumer = setup_consumer
226
+ log.warn "Re-starting consumer #{Time.now.to_s}"
227
+ @retry_count = 0
228
+ rescue =>e
229
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
230
+ log.error_backtrace
231
+ if @retry_count <= @retry_limit or disable_retry_limit
232
+ reconnect_consumer
233
+ end
234
+ end
235
+
236
+ def run
237
+ while @consumer
238
+ begin
239
+ @consumer.each_batch(@fetch_opts) { |batch|
240
+ es = Fluent::MultiEventStream.new
241
+ tag = batch.topic
242
+ tag = @add_prefix + "." + tag if @add_prefix
243
+ tag = tag + "." + @add_suffix if @add_suffix
244
+
245
+ batch.messages.each { |msg|
246
+ begin
247
+ record = @parser_proc.call(msg)
248
+ case @time_source
249
+ when :kafka
250
+ record_time = Fluent::EventTime.from_time(msg.create_time)
251
+ when :now
252
+ record_time = Fluent::Engine.now
253
+ when :record
254
+ if @time_format
255
+ record_time = @time_parser.parse(record[@record_time_key].to_s)
256
+ else
257
+ record_time = record[@record_time_key]
258
+ end
259
+ else
260
+ log.fatal "BUG: invalid time_source: #{@time_source}"
261
+ end
262
+ if @kafka_message_key
263
+ record[@kafka_message_key] = msg.key
264
+ end
265
+ es.add(record_time, record)
266
+ rescue => e
267
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
268
+ log.debug_backtrace
269
+ end
270
+ }
271
+
272
+ unless es.empty?
273
+ emit_events(tag, es)
274
+ end
275
+ }
276
+ rescue ForShutdown
277
+ rescue => e
278
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
279
+ log.error_backtrace
280
+ reconnect_consumer
281
+ end
282
+ end
283
+ rescue => e
284
+ log.error "unexpected error during consumer object access", :error => e.to_s
285
+ log.error_backtrace
286
+ end
287
+
288
+ def emit_events(tag, es)
289
+ retries = 0
290
+ begin
291
+ router.emit_stream(tag, es)
292
+ rescue BufferError
293
+ raise ForShutdown if @consumer.nil?
294
+
295
+ if @retry_emit_limit.nil?
296
+ sleep 1
297
+ retry
298
+ end
299
+
300
+ if retries < @retry_emit_limit
301
+ retries += 1
302
+ sleep 1
303
+ retry
304
+ else
305
+ raise RuntimeError, "Exceeds retry_emit_limit"
306
+ end
307
+ end
308
+ end
309
+ end