roched-fluent-plugin-kafka 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,310 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka', self)
7
+
8
+ config_param :format, :string, :default => 'json',
9
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
10
+ config_param :message_key, :string, :default => 'message',
11
+ :desc => "For 'text' format only."
12
+ config_param :host, :string, :default => nil,
13
+ :desc => "Broker host"
14
+ config_param :port, :integer, :default => nil,
15
+ :desc => "Broker port"
16
+ config_param :brokers, :string, :default => 'localhost:9092',
17
+ :desc => "List of broker-host:port, separate with comma, must set."
18
+ config_param :interval, :integer, :default => 1, # seconds
19
+ :desc => "Interval (Unit: seconds)"
20
+ config_param :topics, :string, :default => nil,
21
+ :desc => "Listening topics(separate with comma',')"
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :partition, :integer, :default => 0,
24
+ :desc => "Listening partition"
25
+ config_param :offset, :integer, :default => -1,
26
+ :desc => "Listening start offset"
27
+ config_param :add_prefix, :string, :default => nil,
28
+ :desc => "Tag prefix"
29
+ config_param :add_suffix, :string, :default => nil,
30
+ :desc => "tag suffix"
31
+ config_param :add_offset_in_record, :bool, :default => false
32
+
33
+ config_param :offset_zookeeper, :string, :default => nil
34
+ config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
35
+ config_param :use_record_time, :bool, :default => false,
36
+ :desc => "Replace message timestamp with contents of 'time' field."
37
+ config_param :time_format, :string, :default => nil,
38
+ :desc => "Time format to be used to parse 'time' filed."
39
+
40
+ # Kafka#fetch_messages options
41
+ config_param :max_bytes, :integer, :default => nil,
42
+ :desc => "Maximum number of bytes to fetch."
43
+ config_param :max_wait_time, :integer, :default => nil,
44
+ :desc => "How long to block until the server sends us data."
45
+ config_param :min_bytes, :integer, :default => nil,
46
+ :desc => "Smallest amount of data the server should send us."
47
+
48
+ include Fluent::KafkaPluginUtil::SSLSettings
49
+ include Fluent::KafkaPluginUtil::SaslSettings
50
+
51
+ unless method_defined?(:router)
52
+ define_method("router") { Fluent::Engine }
53
+ end
54
+
55
+ def initialize
56
+ super
57
+ require 'kafka'
58
+
59
+ @time_parser = nil
60
+ end
61
+
62
+ def configure(conf)
63
+ super
64
+
65
+ @topic_list = []
66
+ if @topics
67
+ @topic_list = @topics.split(',').map { |topic|
68
+ TopicEntry.new(topic.strip, @partition, @offset)
69
+ }
70
+ else
71
+ conf.elements.select { |element| element.name == 'topic' }.each do |element|
72
+ unless element.has_key?('topic')
73
+ raise Fluent::ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
74
+ end
75
+ partition = element.has_key?('partition') ? element['partition'].to_i : 0
76
+ offset = element.has_key?('offset') ? element['offset'].to_i : -1
77
+ @topic_list.push(TopicEntry.new(element['topic'], partition, offset))
78
+ end
79
+ end
80
+
81
+ if @topic_list.empty?
82
+ raise Fluent::ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
83
+ end
84
+
85
+ # For backward compatibility
86
+ @brokers = case
87
+ when @host && @port
88
+ ["#{@host}:#{@port}"]
89
+ when @host
90
+ ["#{@host}:9092"]
91
+ when @port
92
+ ["localhost:#{@port}"]
93
+ else
94
+ @brokers
95
+ end
96
+
97
+ if conf['max_wait_ms']
98
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
99
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
100
+ end
101
+
102
+ @max_wait_time = @interval if @max_wait_time.nil?
103
+
104
+ require 'zookeeper' if @offset_zookeeper
105
+
106
+ @parser_proc = setup_parser
107
+
108
+ if @use_record_time and @time_format
109
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
110
+ end
111
+ end
112
+
113
+ def setup_parser
114
+ case @format
115
+ when 'json'
116
+ require 'yajl'
117
+ Proc.new { |msg, te|
118
+ r = Yajl::Parser.parse(msg.value)
119
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
120
+ r
121
+ }
122
+ when 'ltsv'
123
+ require 'ltsv'
124
+ Proc.new { |msg, te|
125
+ r = LTSV.parse(msg.value, {:symbolize_keys => false}).first
126
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
127
+ r
128
+ }
129
+ when 'msgpack'
130
+ require 'msgpack'
131
+ Proc.new { |msg, te|
132
+ r = MessagePack.unpack(msg.value)
133
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
134
+ r
135
+ }
136
+ when 'text'
137
+ Proc.new { |msg, te|
138
+ r = {@message_key => msg.value}
139
+ add_offset_in_hash(r, te, msg.offset) if @add_offset_in_record
140
+ r
141
+ }
142
+ end
143
+ end
144
+
145
+ def add_offset_in_hash(hash, te, offset)
146
+ hash['kafka_topic'.freeze] = te.topic
147
+ hash['kafka_partition'.freeze] = te.partition
148
+ hash['kafka_offset'.freeze] = offset
149
+ end
150
+
151
+ def start
152
+ super
153
+
154
+ @loop = Coolio::Loop.new
155
+ opt = {}
156
+ opt[:max_bytes] = @max_bytes if @max_bytes
157
+ opt[:max_wait_time] = @max_wait_time if @max_wait_time
158
+ opt[:min_bytes] = @min_bytes if @min_bytes
159
+
160
+ @kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id,
161
+ ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
162
+ ssl_client_cert: read_ssl_file(@ssl_client_cert),
163
+ ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
164
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
165
+ @zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
166
+
167
+ @topic_watchers = @topic_list.map {|topic_entry|
168
+ offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
169
+ TopicWatcher.new(
170
+ topic_entry,
171
+ @kafka,
172
+ interval,
173
+ @parser_proc,
174
+ @add_prefix,
175
+ @add_suffix,
176
+ offset_manager,
177
+ router,
178
+ opt)
179
+ }
180
+ @topic_watchers.each {|tw|
181
+ tw.attach(@loop)
182
+ }
183
+ @thread = Thread.new(&method(:run))
184
+ end
185
+
186
+ def shutdown
187
+ @loop.stop
188
+ @zookeeper.close! if @zookeeper
189
+ @thread.join
190
+ @kafka.close
191
+ super
192
+ end
193
+
194
+ def run
195
+ @loop.run
196
+ rescue => e
197
+ $log.error "unexpected error", :error => e.to_s
198
+ $log.error_backtrace
199
+ end
200
+
201
+ class TopicWatcher < Coolio::TimerWatcher
202
+ def initialize(topic_entry, kafka, interval, parser, add_prefix, add_suffix, offset_manager, router, options={})
203
+ @topic_entry = topic_entry
204
+ @kafka = kafka
205
+ @callback = method(:consume)
206
+ @parser = parser
207
+ @add_prefix = add_prefix
208
+ @add_suffix = add_suffix
209
+ @options = options
210
+ @offset_manager = offset_manager
211
+ @router = router
212
+
213
+ @next_offset = @topic_entry.offset
214
+ if @topic_entry.offset == -1 && offset_manager
215
+ @next_offset = offset_manager.next_offset
216
+ end
217
+ @fetch_args = {
218
+ topic: @topic_entry.topic,
219
+ partition: @topic_entry.partition,
220
+ }.merge(@options)
221
+
222
+ super(interval, true)
223
+ end
224
+
225
+ def on_timer
226
+ @callback.call
227
+ rescue => e
228
+ # TODO log?
229
+ $log.error e.to_s
230
+ $log.error_backtrace
231
+ end
232
+
233
+ def consume
234
+ offset = @next_offset
235
+ @fetch_args[:offset] = offset
236
+ messages = @kafka.fetch_messages(@fetch_args)
237
+
238
+ return if messages.size.zero?
239
+
240
+ es = Fluent::MultiEventStream.new
241
+ tag = @topic_entry.topic
242
+ tag = @add_prefix + "." + tag if @add_prefix
243
+ tag = tag + "." + @add_suffix if @add_suffix
244
+
245
+ messages.each { |msg|
246
+ begin
247
+ record = @parser.call(msg, @topic_entry)
248
+ if @use_record_time
249
+ if @time_format
250
+ record_time = @time_parser.parse(record['time'])
251
+ else
252
+ record_time = record['time']
253
+ end
254
+ else
255
+ record_time = Fluent::Engine.now
256
+ end
257
+ es.add(record_time, record)
258
+ rescue => e
259
+ $log.warn "parser error in #{@topic_entry.topic}/#{@topic_entry.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
260
+ $log.debug_backtrace
261
+ end
262
+ }
263
+ offset = messages.last.offset + 1
264
+
265
+ unless es.empty?
266
+ @router.emit_stream(tag, es)
267
+
268
+ if @offset_manager
269
+ @offset_manager.save_offset(offset)
270
+ end
271
+ @next_offset = offset
272
+ end
273
+ end
274
+ end
275
+
276
+ class TopicEntry
277
+ def initialize(topic, partition, offset)
278
+ @topic = topic
279
+ @partition = partition
280
+ @offset = offset
281
+ end
282
+ attr_reader :topic, :partition, :offset
283
+ end
284
+
285
+ class OffsetManager
286
+ def initialize(topic_entry, zookeeper, zk_root_node)
287
+ @zookeeper = zookeeper
288
+ @zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
289
+ create_node(@zk_path, topic_entry.topic, topic_entry.partition)
290
+ end
291
+
292
+ def create_node(zk_path, topic, partition)
293
+ path = ""
294
+ zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
295
+ path = path + dir
296
+ @zookeeper.create(:path => "#{path}")
297
+ }
298
+ $log.trace "use zk offset node : #{path}"
299
+ end
300
+
301
+ def next_offset
302
+ @zookeeper.get(:path => @zk_path)[:data].to_i
303
+ end
304
+
305
+ def save_offset(offset)
306
+ @zookeeper.set(:path => @zk_path, :data => offset.to_s)
307
+ $log.trace "update zk offset node : #{offset.to_s}"
308
+ end
309
+ end
310
+ end
@@ -0,0 +1,236 @@
1
+ require 'fluent/input'
2
+ require 'fluent/time'
3
+ require 'fluent/plugin/kafka_plugin_util'
4
+
5
+ class Fluent::KafkaGroupInput < Fluent::Input
6
+ Fluent::Plugin.register_input('kafka_group', self)
7
+
8
+ config_param :brokers, :string, :default => 'localhost:9092',
9
+ :desc => "List of broker-host:port, separate with comma, must set."
10
+ config_param :consumer_group, :string,
11
+ :desc => "Consumer group name, must set."
12
+ config_param :topics, :string,
13
+ :desc => "Listening topics(separate with comma',')."
14
+ config_param :format, :string, :default => 'json',
15
+ :desc => "Supported format: (json|text|ltsv|msgpack)"
16
+ config_param :message_key, :string, :default => 'message',
17
+ :desc => "For 'text' format only."
18
+ config_param :add_prefix, :string, :default => nil,
19
+ :desc => "Tag prefix (Optional)"
20
+ config_param :add_suffix, :string, :default => nil,
21
+ :desc => "Tag suffix (Optional)"
22
+ config_param :retry_emit_limit, :integer, :default => nil,
23
+ :desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
24
+ config_param :use_record_time, :bool, :default => false,
25
+ :desc => "Replace message timestamp with contents of 'time' field."
26
+ config_param :time_format, :string, :default => nil,
27
+ :desc => "Time format to be used to parse 'time' filed."
28
+
29
+ config_param :retry_wait_seconds, :integer, :default => 30
30
+ # Kafka consumer options
31
+ config_param :max_bytes, :integer, :default => 1048576,
32
+ :desc => "Maximum number of bytes to fetch."
33
+ config_param :max_wait_time, :integer, :default => nil,
34
+ :desc => "How long to block until the server sends us data."
35
+ config_param :min_bytes, :integer, :default => nil,
36
+ :desc => "Smallest amount of data the server should send us."
37
+ config_param :session_timeout, :integer, :default => nil,
38
+ :desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
39
+ config_param :offset_commit_interval, :integer, :default => nil,
40
+ :desc => "The interval between offset commits, in seconds"
41
+ config_param :offset_commit_threshold, :integer, :default => nil,
42
+ :desc => "The number of messages that can be processed before their offsets are committed"
43
+ config_param :start_from_beginning, :bool, :default => true,
44
+ :desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
45
+
46
+ include Fluent::KafkaPluginUtil::SSLSettings
47
+ include Fluent::KafkaPluginUtil::SaslSettings
48
+
49
+ class ForShutdown < StandardError
50
+ end
51
+
52
+ BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
53
+ Fluent::Plugin::Buffer::BufferOverflowError
54
+ else
55
+ Fluent::BufferQueueLimitError
56
+ end
57
+
58
+ unless method_defined?(:router)
59
+ define_method("router") { Fluent::Engine }
60
+ end
61
+
62
+ def initialize
63
+ super
64
+ require 'kafka'
65
+
66
+ @time_parser = nil
67
+ end
68
+
69
+ def _config_to_array(config)
70
+ config_array = config.split(',').map {|k| k.strip }
71
+ if config_array.empty?
72
+ raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
73
+ end
74
+ config_array
75
+ end
76
+
77
+ private :_config_to_array
78
+
79
+ def configure(conf)
80
+ super
81
+
82
+ $log.info "Will watch for topics #{@topics} at brokers " \
83
+ "#{@brokers} and '#{@consumer_group}' group"
84
+
85
+ @topics = _config_to_array(@topics)
86
+
87
+ if conf['max_wait_ms']
88
+ log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
89
+ @max_wait_time = conf['max_wait_ms'].to_i / 1000
90
+ end
91
+
92
+ @parser_proc = setup_parser
93
+
94
+ @consumer_opts = {:group_id => @consumer_group}
95
+ @consumer_opts[:session_timeout] = @session_timeout if @session_timeout
96
+ @consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
97
+ @consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
98
+
99
+ @fetch_opts = {}
100
+ @fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
101
+ @fetch_opts[:min_bytes] = @min_bytes if @min_bytes
102
+
103
+ if @use_record_time and @time_format
104
+ @time_parser = Fluent::TextParser::TimeParser.new(@time_format)
105
+ end
106
+ end
107
+
108
+ def setup_parser
109
+ case @format
110
+ when 'json'
111
+ require 'yajl'
112
+ Proc.new { |msg| Yajl::Parser.parse(msg.value) }
113
+ when 'ltsv'
114
+ require 'ltsv'
115
+ Proc.new { |msg| LTSV.parse(msg.value, {:symbolize_keys => false}).first }
116
+ when 'msgpack'
117
+ require 'msgpack'
118
+ Proc.new { |msg| MessagePack.unpack(msg.value) }
119
+ when 'text'
120
+ Proc.new { |msg| {@message_key => msg.value} }
121
+ end
122
+ end
123
+
124
+ def start
125
+ super
126
+
127
+ @kafka = Kafka.new(seed_brokers: @brokers,
128
+ ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
129
+ ssl_client_cert: read_ssl_file(@ssl_client_cert),
130
+ ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
131
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
132
+ @consumer = setup_consumer
133
+ @thread = Thread.new(&method(:run))
134
+ end
135
+
136
+ def shutdown
137
+ # This nil assignment should be guarded by mutex in multithread programming manner.
138
+ # But the situation is very low contention, so we don't use mutex for now.
139
+ # If the problem happens, we will add a guard for consumer.
140
+ consumer = @consumer
141
+ @consumer = nil
142
+ consumer.stop
143
+
144
+ @thread.join
145
+ @kafka.close
146
+ super
147
+ end
148
+
149
+ def setup_consumer
150
+ consumer = @kafka.consumer(@consumer_opts)
151
+ @topics.each { |topic|
152
+ consumer.subscribe(topic, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
153
+ }
154
+ consumer
155
+ end
156
+
157
+ def reconnect_consumer
158
+ log.warn "Stopping Consumer"
159
+ consumer = @consumer
160
+ @consumer = nil
161
+ consumer.stop
162
+ log.warn "Could not connect to broker. Next retry will be in #{@retry_wait_seconds} seconds"
163
+ sleep @retry_wait_seconds
164
+ @consumer = setup_consumer
165
+ log.warn "Re-starting consumer #{Time.now.to_s}"
166
+ rescue =>e
167
+ log.error "unexpected error during re-starting consumer object access", :error => e.to_s
168
+ log.error_backtrace
169
+ end
170
+
171
+ def run
172
+ while @consumer
173
+ begin
174
+ @consumer.each_batch(@fetch_opts) { |batch|
175
+ es = Fluent::MultiEventStream.new
176
+ tag = batch.topic
177
+ tag = @add_prefix + "." + tag if @add_prefix
178
+ tag = tag + "." + @add_suffix if @add_suffix
179
+
180
+ batch.messages.each { |msg|
181
+ begin
182
+ record = @parser_proc.call(msg)
183
+ if @use_record_time
184
+ if @time_format
185
+ record_time = @time_parser.parse(record['time'])
186
+ else
187
+ record_time = record['time']
188
+ end
189
+ else
190
+ record_time = Fluent::Engine.now
191
+ end
192
+ es.add(record_time, record)
193
+ rescue => e
194
+ log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
195
+ log.debug_backtrace
196
+ end
197
+ }
198
+
199
+ unless es.empty?
200
+ emit_events(tag, es)
201
+ end
202
+ }
203
+ rescue ForShutdown
204
+ rescue => e
205
+ log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
206
+ log.error_backtrace
207
+ reconnect_consumer
208
+ end
209
+ end
210
+ rescue => e
211
+ log.error "unexpected error during consumer object access", :error => e.to_s
212
+ log.error_backtrace
213
+ end
214
+
215
+ def emit_events(tag, es)
216
+ retries = 0
217
+ begin
218
+ router.emit_stream(tag, es)
219
+ rescue BufferError
220
+ raise ForShutdown if @consumer.nil?
221
+
222
+ if @retry_emit_limit.nil?
223
+ sleep 1
224
+ retry
225
+ end
226
+
227
+ if retries < @retry_emit_limit
228
+ retries += 1
229
+ sleep 1
230
+ retry
231
+ else
232
+ raise RuntimeError, "Exceeds retry_emit_limit"
233
+ end
234
+ end
235
+ end
236
+ end
@@ -0,0 +1,44 @@
1
+ module Fluent
2
+ module KafkaPluginUtil
3
+ module SSLSettings
4
+ def self.included(klass)
5
+ klass.instance_eval {
6
+ # https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl
7
+ config_param :ssl_ca_cert, :array, :value_type => :string, :default => nil,
8
+ :desc => "a PEM encoded CA cert to use with and SSL connection."
9
+ config_param :ssl_client_cert, :string, :default => nil,
10
+ :desc => "a PEM encoded client cert to use with and SSL connection. Must be used in combination with ssl_client_cert_key."
11
+ config_param :ssl_client_cert_key, :string, :default => nil,
12
+ :desc => "a PEM encoded client cert key to use with and SSL connection. Must be used in combination with ssl_client_cert."
13
+ }
14
+ end
15
+
16
+ def read_ssl_file(path)
17
+ return nil if path.nil?
18
+
19
+ if path.is_a?(Array)
20
+ path.map { |fp| File.read(fp) }
21
+ else
22
+ File.read(path)
23
+ end
24
+ end
25
+ end
26
+
27
+ module SaslSettings
28
+ def self.included(klass)
29
+ klass.instance_eval {
30
+ config_param :principal, :string, :default => nil,
31
+ :desc => "a Kerberos principal to use with SASL authentication (GSSAPI)."
32
+ config_param :keytab, :string, :default => nil,
33
+ :desc => "a filepath to Kerberos keytab. Must be used with principal."
34
+ config_param :username, :string, :default => nil,
35
+ :desc => "a username when using PLAIN/SCRAM SASL authentication"
36
+ config_param :password, :string, :default => nil,
37
+ :desc => "a password when using PLAIN/SCRAM SASL authentication"
38
+ config_param :scram_mechanism, :string, :default => nil,
39
+ :desc => "if set, use SCRAM authentication with specified mechanism. When unset, default to PLAIN authentication"
40
+ }
41
+ end
42
+ end
43
+ end
44
+ end