roched-fluent-plugin-kafka 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,310 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :partition, :integer, :default => 0,
24
+ :desc => "Listening partition"
25
+ config_param :offset, :integer, :default => -1,
26
+ :desc => "Listening start offset"
27
+ config_param :add_prefix, :string, :default => nil,
28
+ :desc => "Tag prefix"
29
+ config_param :add_suffix, :string, :default => nil,
30
+ :desc => "tag suffix"
31
+ config_param :add_offset_in_record, :bool, :default => false
32
+
33
+ config_param :offset_zookeeper, :string, :default => nil
34
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
35
+ config_param :use_record_time, :bool, :default => false,
36
+ :desc => "Replace message timestamp with contents of 'time' field."
37
+ config_param :time_format, :string, :default => nil,
38
+ :desc => "Time format to be used to parse 'time' filed."
39
+
40
+ # Kafka#fetch_messages options
41
+ config_param :max_bytes, :integer, :default => nil,
42
+ :desc => "Maximum number of bytes to fetch."
43
+ config_param :max_wait_time, :integer, :default => nil,
44
+ :desc => "How long to block until the server sends us data."
45
+ config_param :min_bytes, :integer, :default => nil,
46
+ :desc => "Smallest amount of data the server should send us."
47
+
48
+ include Fluent::KafkaPluginUtil::SSLSettings
49
+ include Fluent::KafkaPluginUtil::SaslSettings
50
+
51
+ unless method_defined?(:router)
52
+ define_method("router") { Fluent::Engine }
53
+ end
54
+
55
+ def initialize
56
+ super
57
+ require 'kafka'
58
+
59
+ @time_parser = nil
60
+ end
61
+
62
+ def configure(conf)
63
+ super
64
+
65
+ @topic_list = []
66
+ if @topics
67
+ @topic_list = @topics.split(',').map { |topic|
68
+ TopicEntry.new(topic.strip, @partition, @offset)
69
+ }
70
+ else
71
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
72
+ unless element.has_key?('topic')
73
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
74
+ end
75
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
76
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
77
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
78
+ end
79
+ end
80
+
81
+ if @topic_list.empty?
82
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
83
+ end
84
+
85
+ # For backward compatibility
86
+ @brokers = case
87
+ when @host && @port
88
+ ["#{@host}:#{@port}"]
89
+ when @host
90
+ ["#{@host}:9092"]
91
+ when @port
92
+ ["localhost:#{@port}"]
93
+ else
94
+ @brokers
95
+ end
96
+
97
+ if conf['max_wait_ms']
98
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
99
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
100
+ end
101
+
102
+ @max_wait_time = @interval if @max_wait_time.nil?
103
+
104
+ require 'zookeeper' if @offset_zookeeper
105
+
106
+ @parser_proc = setup_parser
107
+
108
+ if @use_record_time and @time_format
109
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
110
+ end
111
+ end
112
+
113
+ def setup_parser
114
+ case @format
115
+ when 'json'
116
+ require 'yajl'
117
+ Proc.new { |msg, te|
118
+ r = Yajl::Parser.parse(msg.value)
119
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
120
+ r
121
+ }
122
+ when 'ltsv'
123
+ require 'ltsv'
124
+ Proc.new { |msg, te|
125
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
126
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
127
+ r
128
+ }
129
+ when 'msgpack'
130
+ require 'msgpack'
131
+ Proc.new { |msg, te|
132
+ r = MessagePack.unpack(msg.value)
133
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
134
+ r
135
+ }
136
+ when 'text'
137
+ Proc.new { |msg, te|
138
+ r = {@message_key => msg.value}
139
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
140
+ r
141
+ }
142
+ end
143
+ end
144
+
145
+ def add_offset_in_hash(hash, te, offset)
146
+ hash['kafka_topic'.freeze] = te.topic
147
+ hash['kafka_partition'.freeze] = te.partition
148
+ hash['kafka_offset'.freeze] = offset
149
+ end
150
+
151
+ def start
152
+ super
153
+
154
+ @loop = Coolio::Loop.new
155
+ opt = {}
156
+ opt[:max_bytes] = @max_bytes if @max_bytes
157
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
158
+ opt[:min_bytes] = @min_bytes if @min_bytes
159
+
160
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id,
161
+ ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
162
+ ssl_client_cert: read_ssl_file(@ssl_client_cert),
163
+ ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
164
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
165
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
166
+
167
+ @topic_watchers = @topic_list.map {|topic_entry|
168
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
169
+ TopicWatcher.new(
170
+ topic_entry,
171
+ @kafka,
172
+ interval,
173
+ @parser_proc,
174
+ @add_prefix,
175
+ @add_suffix,
176
+ offset_manager,
177
+ router,
178
+ opt)
179
+ }
180
+ @topic_watchers.each {|tw|
181
+ tw.attach(@loop)
182
+ }
183
+ @thread = Thread.new(&method(:run))
184
+ end
185
+
186
+ def shutdown
187
+ @loop.stop
188
+ @zookeeper.close! if @zookeeper
189
+ @thread.join
190
+ @kafka.close
191
+ super
192
+ end
193
+
194
+ def run
195
+ @loop.run
196
+ rescue => e
197
+ $log.error "unexpected error", :error => e.to_s
198
+ $log.error_backtrace
199
+ end
200
+
201
+ class TopicWatcher < Coolio::TimerWatcher
202
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, options={})
203
+ @topic_entry = topic_entry
204
+ @kafka = kafka
205
+ @callback = method(:consume)
206
+ @parser = parser
207
+ @add_prefix = add_prefix
208
+ @add_suffix = add_suffix
209
+ @options = options
210
+ @offset_manager = offset_manager
211
+ @router = router
212
+
213
+ @next_offset = @topic_entry.offset
214
+ if @topic_entry.offset == -1 && offset_manager
215
+ @next_offset = offset_manager.next_offset
216
+ end
217
+ @fetch_args = {
218
+ topic: @topic_entry.topic,
219
+ partition: @topic_entry.partition,
220
+ }.merge(@options)
221
+
222
+ super(interval, true)
223
+ end
224
+
225
+ def on_timer
226
+ @callback.call
227
+ rescue => e
228
+ # TODO log?
229
+ $log.error e.to_s
230
+ $log.error_backtrace
231
+ end
232
+
233
+ def consume
234
+ offset = @next_offset
235
+ @fetch_args[:offset] = offset
236
+ messages = @kafka.fetch_messages(@fetch_args)
237
+
238
+ return if messages.size.zero?
239
+
240
+ es = Fluent::MultiEventStream.new
241
+ tag = @topic_entry.topic
242
+ tag = @add_prefix + "." + tag if @add_prefix
243
+ tag = tag + "." + @add_suffix if @add_suffix
244
+
245
+ messages.each { |msg|
246
+ begin
247
+ record = @parser.call(msg, @topic_entry)
248
+ if @use_record_time
249
+ if @time_format
250
+ record_time = @time_parser.parse(record['time'])
251
+ else
252
+ record_time = record['time']
253
+ end
254
+ else
255
+ record_time = Fluent::Engine.now
256
+ end
257
+ es.add(record_time, record)
258
+ rescue => e
259
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
260
+ $log.debug_backtrace
261
+ end
262
+ }
263
+ offset = messages.last.offset + 1
264
+
265
+ unless es.empty?
266
+ @router.emit_stream(tag, es)
267
+
268
+ if @offset_manager
269
+ @offset_manager.save_offset(offset)
270
+ end
271
+ @next_offset = offset
272
+ end
273
+ end
274
+ end
275
+
276
+ class TopicEntry
277
+ def initialize(topic, partition, offset)
278
+ @topic = topic
279
+ @partition = partition
280
+ @offset = offset
281
+ end
282
+ attr_reader :topic, :partition, :offset
283
+ end
284
+
285
+ class OffsetManager
286
+ def initialize(topic_entry, zookeeper, zk_root_node)
287
+ @zookeeper = zookeeper
288
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
289
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
290
+ end
291
+
292
+ def create_node(zk_path, topic, partition)
293
+ path = ""
294
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
295
+ path = path + dir
296
+ @zookeeper.create(:path => "#{path}")
297
+ }
298
+ $log.trace "use zk offset node : #{path}"
299
+ end
300
+
301
+ def next_offset
302
+ @zookeeper.get(:path => @zk_path)[:data].to_i
303
+ end
304
+
305
+ def save_offset(offset)
306
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
307
+ $log.trace "update zk offset node : #{offset.to_s}"
308
+ end
309
+ end
310
+ end
@@ -0,0 +1,236 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :format, :string, :default => 'json',
15
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
16
+ config_param :message_key, :string, :default => 'message',
17
+ :desc => "For 'text' format only."
18
+ config_param :add_prefix, :string, :default => nil,
19
+ :desc => "Tag prefix (Optional)"
20
+ config_param :add_suffix, :string, :default => nil,
21
+ :desc => "Tag suffix (Optional)"
22
+ config_param :retry_emit_limit, :integer, :default => nil,
23
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
24
+ config_param :use_record_time, :bool, :default => false,
25
+ :desc => "Replace message timestamp with contents of 'time' field."
26
+ config_param :time_format, :string, :default => nil,
27
+ :desc => "Time format to be used to parse 'time' filed."
28
+
29
+ config_param :retry_wait_seconds, :integer, :default => 30
30
+ # Kafka consumer options
31
+ config_param :max_bytes, :integer, :default => 1048576,
32
+ :desc => "Maximum number of bytes to fetch."
33
+ config_param :max_wait_time, :integer, :default => nil,
34
+ :desc => "How long to block until the server sends us data."
35
+ config_param :min_bytes, :integer, :default => nil,
36
+ :desc => "Smallest amount of data the server should send us."
37
+ config_param :session_timeout, :integer, :default => nil,
38
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
39
+ config_param :offset_commit_interval, :integer, :default => nil,
40
+ :desc => "The interval between offset commits, in seconds"
41
+ config_param :offset_commit_threshold, :integer, :default => nil,
42
+ :desc => "The number of messages that can be processed before their offsets are committed"
43
+ config_param :start_from_beginning, :bool, :default => true,
44
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
45
+
46
+ include Fluent::KafkaPluginUtil::SSLSettings
47
+ include Fluent::KafkaPluginUtil::SaslSettings
48
+
49
+ class ForShutdown < StandardError
50
+ end
51
+
52
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
53
+ Fluent::Plugin::Buffer::BufferOverflowError
54
+ else
55
+ Fluent::BufferQueueLimitError
56
+ end
57
+
58
+ unless method_defined?(:router)
59
+ define_method("router") { Fluent::Engine }
60
+ end
61
+
62
+ def initialize
63
+ super
64
+ require 'kafka'
65
+
66
+ @time_parser = nil
67
+ end
68
+
69
+ def _config_to_array(config)
70
+ config_array = config.split(',').map {|k| k.strip }
71
+ if config_array.empty?
72
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
73
+ end
74
+ config_array
75
+ end
76
+
77
+ private :_config_to_array
78
+
79
+ def configure(conf)
80
+ super
81
+
82
+ $log.info "Will watch for topics #{@topics} at brokers " \
83
+ "#{@brokers} and '#{@consumer_group}' group"
84
+
85
+ @topics = _config_to_array(@topics)
86
+
87
+ if conf['max_wait_ms']
88
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
89
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
90
+ end
91
+
92
+ @parser_proc = setup_parser
93
+
94
+ @consumer_opts = {:group_id => @consumer_group}
95
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
96
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
97
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
98
+
99
+ @fetch_opts = {}
100
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
101
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
102
+
103
+ if @use_record_time and @time_format
104
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
105
+ end
106
+ end
107
+
108
+ def setup_parser
109
+ case @format
110
+ when 'json'
111
+ require 'yajl'
112
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
113
+ when 'ltsv'
114
+ require 'ltsv'
115
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
116
+ when 'msgpack'
117
+ require 'msgpack'
118
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
119
+ when 'text'
120
+ Proc.new { |msg| {@message_key => msg.value} }
121
+ end
122
+ end
123
+
124
+ def start
125
+ super
126
+
127
+ @kafka = Kafka.new(seed_brokers: @brokers,
128
+ ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
129
+ ssl_client_cert: read_ssl_file(@ssl_client_cert),
130
+ ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
131
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
132
+ @consumer = setup_consumer
133
+ @thread = Thread.new(&method(:run))
134
+ end
135
+
136
+ def shutdown
137
+ # This nil assignment should be guarded by mutex in multithread programming manner.
138
+ # But the situation is very low contention, so we don't use mutex for now.
139
+ # If the problem happens, we will add a guard for consumer.
140
+ consumer = @consumer
141
+ @consumer = nil
142
+ consumer.stop
143
+
144
+ @thread.join
145
+ @kafka.close
146
+ super
147
+ end
148
+
149
+ def setup_consumer
150
+ consumer = @kafka.consumer(@consumer_opts)
151
+ @topics.each { |topic|
152
+ consumer.subscribe(topic, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
153
+ }
154
+ consumer
155
+ end
156
+
157
+ def reconnect_consumer
158
+ log.warn "Stopping Consumer"
159
+ consumer = @consumer
160
+ @consumer = nil
161
+ consumer.stop
162
+ log.warn "Could not connect to broker. Next retry will be in #{@retry_wait_seconds} seconds"
163
+ sleep @retry_wait_seconds
164
+ @consumer = setup_consumer
165
+ log.warn "Re-starting consumer #{Time.now.to_s}"
166
+ rescue =>e
167
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
168
+ log.error_backtrace
169
+ end
170
+
171
+ def run
172
+ while @consumer
173
+ begin
174
+ @consumer.each_batch(@fetch_opts) { |batch|
175
+ es = Fluent::MultiEventStream.new
176
+ tag = batch.topic
177
+ tag = @add_prefix + "." + tag if @add_prefix
178
+ tag = tag + "." + @add_suffix if @add_suffix
179
+
180
+ batch.messages.each { |msg|
181
+ begin
182
+ record = @parser_proc.call(msg)
183
+ if @use_record_time
184
+ if @time_format
185
+ record_time = @time_parser.parse(record['time'])
186
+ else
187
+ record_time = record['time']
188
+ end
189
+ else
190
+ record_time = Fluent::Engine.now
191
+ end
192
+ es.add(record_time, record)
193
+ rescue => e
194
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
195
+ log.debug_backtrace
196
+ end
197
+ }
198
+
199
+ unless es.empty?
200
+ emit_events(tag, es)
201
+ end
202
+ }
203
+ rescue ForShutdown
204
+ rescue => e
205
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
206
+ log.error_backtrace
207
+ reconnect_consumer
208
+ end
209
+ end
210
+ rescue => e
211
+ log.error "unexpected error during consumer object access", :error => e.to_s
212
+ log.error_backtrace
213
+ end
214
+
215
+ def emit_events(tag, es)
216
+ retries = 0
217
+ begin
218
+ router.emit_stream(tag, es)
219
+ rescue BufferError
220
+ raise ForShutdown if @consumer.nil?
221
+
222
+ if @retry_emit_limit.nil?
223
+ sleep 1
224
+ retry
225
+ end
226
+
227
+ if retries < @retry_emit_limit
228
+ retries += 1
229
+ sleep 1
230
+ retry
231
+ else
232
+ raise RuntimeError, "Exceeds retry_emit_limit"
233
+ end
234
+ end
235
+ end
236
+ end
@@ -0,0 +1,44 @@
1
+ module Fluent
2
+ module KafkaPluginUtil
3
+ module SSLSettings
4
+ def self.included(klass)
5
+ klass.instance_eval {
6
+ # https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl
7
+ config_param :ssl_ca_cert, :array, :value_type => :string, :default => nil,
8
+ :desc => "a PEM encoded CA cert to use with and SSL connection."
9
+ config_param :ssl_client_cert, :string, :default => nil,
10
+ :desc => "a PEM encoded client cert to use with and SSL connection. Must be used in combination with ssl_client_cert_key."
11
+ config_param :ssl_client_cert_key, :string, :default => nil,
12
+ :desc => "a PEM encoded client cert key to use with and SSL connection. Must be used in combination with ssl_client_cert."
13
+ }
14
+ end
15
+
16
+ def read_ssl_file(path)
17
+ return nil if path.nil?
18
+
19
+ if path.is_a?(Array)
20
+ path.map { |fp| File.read(fp) }
21
+ else
22
+ File.read(path)
23
+ end
24
+ end
25
+ end
26
+
27
+ module SaslSettings
28
+ def self.included(klass)
29
+ klass.instance_eval {
30
+ config_param :principal, :string, :default => nil,
31
+ :desc => "a Kerberos principal to use with SASL authentication (GSSAPI)."
32
+ config_param :keytab, :string, :default => nil,
33
+ :desc => "a filepath to Kerberos keytab. Must be used with principal."
34
+ config_param :username, :string, :default => nil,
35
+ :desc => "a username when using PLAIN/SCRAM SASL authentication"
36
+ config_param :password, :string, :default => nil,
37
+ :desc => "a password when using PLAIN/SCRAM SASL authentication"
38
+ config_param :scram_mechanism, :string, :default => nil,
39
+ :desc => "if set, use SCRAM authentication with specified mechanism. When unset, default to PLAIN authentication"
40
+ }
41
+ end
42
+ end
43
+ end
44
+ end