sk-fluent-plugin-kafka 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,52 @@
1
+ module Fluent
2
+ module KafkaPluginUtil
3
+ module SSLSettings
4
+ def self.included(klass)
5
+ klass.instance_eval {
6
+ # https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl
7
+ config_param :ssl_ca_cert, :array, :value_type => :string, :default => nil,
8
+ :desc => "a PEM encoded CA cert to use with and SSL connection."
9
+ config_param :ssl_client_cert, :string, :default => nil,
10
+ :desc => "a PEM encoded client cert to use with and SSL connection. Must be used in combination with ssl_client_cert_key."
11
+ config_param :ssl_client_cert_key, :string, :default => nil,
12
+ :desc => "a PEM encoded client cert key to use with and SSL connection. Must be used in combination with ssl_client_cert."
13
+ config_param :ssl_ca_certs_from_system, :bool, :default => false,
14
+ :desc => "this configures the store to look up CA certificates from the system default certificate store on an as needed basis. The location of the store can usually be determined by: OpenSSL::X509::DEFAULT_CERT_FILE."
15
+ }
16
+ end
17
+
18
+ def read_ssl_file(path)
19
+ return nil if path.nil?
20
+
21
+ if path.is_a?(Array)
22
+ path.map { |fp| File.read(fp) }
23
+ else
24
+ File.read(path)
25
+ end
26
+ end
27
+
28
+ def pickup_ssl_endpoint(node)
29
+ ssl_endpoint = node['endpoints'].find {|e| e.start_with?('SSL')}
30
+ raise 'no SSL endpoint found on Zookeeper' unless ssl_endpoint
31
+ return [URI.parse(ssl_endpoint).host, URI.parse(ssl_endpoint).port].join(':')
32
+ end
33
+ end
34
+
35
+ module SaslSettings
36
+ def self.included(klass)
37
+ klass.instance_eval {
38
+ config_param :principal, :string, :default => nil,
39
+ :desc => "a Kerberos principal to use with SASL authentication (GSSAPI)."
40
+ config_param :keytab, :string, :default => nil,
41
+ :desc => "a filepath to Kerberos keytab. Must be used with principal."
42
+ config_param :username, :string, :default => nil,
43
+ :desc => "a username when using PLAIN/SCRAM SASL authentication"
44
+ config_param :password, :string, :default => nil, secret: true,
45
+ :desc => "a password when using PLAIN/SCRAM SASL authentication"
46
+ config_param :scram_mechanism, :string, :default => nil,
47
+ :desc => "if set, use SCRAM authentication with specified mechanism. When unset, default to PLAIN authentication"
48
+ }
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,308 @@
1
+ require "set"
2
+ require "kafka/partitioner"
3
+ require "kafka/message_buffer"
4
+ require "kafka/produce_operation"
5
+ require "kafka/pending_message_queue"
6
+ require "kafka/pending_message"
7
+ require "kafka/compressor"
8
+ require 'kafka/producer'
9
+
10
+ # for out_kafka_buffered
11
+ module Kafka
12
+ EMPTY_HEADER = {}
13
+
14
+ class Producer
15
+ def produce_for_buffered(value, key: nil, topic:, partition: nil, partition_key: nil)
16
+ create_time = Time.now
17
+
18
+ message = PendingMessage.new(
19
+ value: value,
20
+ key: key,
21
+ headers: EMPTY_HEADER,
22
+ topic: topic,
23
+ partition: partition,
24
+ partition_key: partition_key,
25
+ create_time: create_time
26
+ )
27
+
28
+ # If the producer is in transactional mode, all the message production
29
+ # must be used when the producer is currently in transaction
30
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
31
+ raise 'You must trigger begin_transaction before producing messages'
32
+ end
33
+
34
+ @target_topics.add(topic)
35
+ @pending_message_queue.write(message)
36
+
37
+ nil
38
+ end
39
+ end
40
+ end
41
+
42
+ # for out_kafka2
43
+ module Kafka
44
+ class Client
45
+ def topic_producer(topic, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60)
46
+ cluster = initialize_cluster
47
+ compressor = Compressor.new(
48
+ codec_name: compression_codec,
49
+ threshold: compression_threshold,
50
+ instrumenter: @instrumenter,
51
+ )
52
+
53
+ transaction_manager = TransactionManager.new(
54
+ cluster: cluster,
55
+ logger: @logger,
56
+ idempotent: idempotent,
57
+ transactional: transactional,
58
+ transactional_id: transactional_id,
59
+ transactional_timeout: transactional_timeout,
60
+ )
61
+
62
+ TopicProducer.new(topic,
63
+ cluster: cluster,
64
+ transaction_manager: transaction_manager,
65
+ logger: @logger,
66
+ instrumenter: @instrumenter,
67
+ compressor: compressor,
68
+ ack_timeout: ack_timeout,
69
+ required_acks: required_acks,
70
+ max_retries: max_retries,
71
+ retry_backoff: retry_backoff,
72
+ max_buffer_size: max_buffer_size,
73
+ max_buffer_bytesize: max_buffer_bytesize,
74
+ )
75
+ end
76
+ end
77
+
78
+ class TopicProducer
79
+ def initialize(topic, cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
80
+ @cluster = cluster
81
+ @transaction_manager = transaction_manager
82
+ @logger = logger
83
+ @instrumenter = instrumenter
84
+ @required_acks = required_acks == :all ? -1 : required_acks
85
+ @ack_timeout = ack_timeout
86
+ @max_retries = max_retries
87
+ @retry_backoff = retry_backoff
88
+ @max_buffer_size = max_buffer_size
89
+ @max_buffer_bytesize = max_buffer_bytesize
90
+ @compressor = compressor
91
+
92
+ @topic = topic
93
+ @cluster.add_target_topics(Set.new([topic]))
94
+
95
+ # A buffer organized by topic/partition.
96
+ @buffer = MessageBuffer.new
97
+
98
+ # Messages added by `#produce` but not yet assigned a partition.
99
+ @pending_message_queue = PendingMessageQueue.new
100
+ end
101
+
102
+ def produce(value, key: nil, partition: nil, partition_key: nil)
103
+ create_time = Time.now
104
+
105
+ message = PendingMessage.new(
106
+ value: value,
107
+ key: key,
108
+ headers: EMPTY_HEADER,
109
+ topic: @topic,
110
+ partition: partition,
111
+ partition_key: partition_key,
112
+ create_time: create_time
113
+ )
114
+
115
+ # If the producer is in transactional mode, all the message production
116
+ # must be used when the producer is currently in transaction
117
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
118
+ raise 'You must trigger begin_transaction before producing messages'
119
+ end
120
+
121
+ @pending_message_queue.write(message)
122
+
123
+ nil
124
+ end
125
+
126
+ def deliver_messages
127
+ # There's no need to do anything if the buffer is empty.
128
+ return if buffer_size == 0
129
+
130
+ deliver_messages_with_retries
131
+ end
132
+
133
+ # Returns the number of messages currently held in the buffer.
134
+ #
135
+ # @return [Integer] buffer size.
136
+ def buffer_size
137
+ @pending_message_queue.size + @buffer.size
138
+ end
139
+
140
+ def buffer_bytesize
141
+ @pending_message_queue.bytesize + @buffer.bytesize
142
+ end
143
+
144
+ # Deletes all buffered messages.
145
+ #
146
+ # @return [nil]
147
+ def clear_buffer
148
+ @buffer.clear
149
+ @pending_message_queue.clear
150
+ end
151
+
152
+ # Closes all connections to the brokers.
153
+ #
154
+ # @return [nil]
155
+ def shutdown
156
+ @transaction_manager.close
157
+ @cluster.disconnect
158
+ end
159
+
160
+ def init_transactions
161
+ @transaction_manager.init_transactions
162
+ end
163
+
164
+ def begin_transaction
165
+ @transaction_manager.begin_transaction
166
+ end
167
+
168
+ def commit_transaction
169
+ @transaction_manager.commit_transaction
170
+ end
171
+
172
+ def abort_transaction
173
+ @transaction_manager.abort_transaction
174
+ end
175
+
176
+ def transaction
177
+ raise 'This method requires a block' unless block_given?
178
+ begin_transaction
179
+ yield
180
+ commit_transaction
181
+ rescue Kafka::Producer::AbortTransaction
182
+ abort_transaction
183
+ rescue
184
+ abort_transaction
185
+ raise
186
+ end
187
+
188
+ def deliver_messages_with_retries
189
+ attempt = 0
190
+
191
+ #@cluster.add_target_topics(@target_topics)
192
+
193
+ operation = ProduceOperation.new(
194
+ cluster: @cluster,
195
+ transaction_manager: @transaction_manager,
196
+ buffer: @buffer,
197
+ required_acks: @required_acks,
198
+ ack_timeout: @ack_timeout,
199
+ compressor: @compressor,
200
+ logger: @logger,
201
+ instrumenter: @instrumenter,
202
+ )
203
+
204
+ loop do
205
+ attempt += 1
206
+
207
+ begin
208
+ @cluster.refresh_metadata_if_necessary!
209
+ rescue ConnectionError => e
210
+ raise DeliveryFailed.new(e, buffer_messages)
211
+ end
212
+
213
+ assign_partitions!
214
+ operation.execute
215
+
216
+ if @required_acks.zero?
217
+ # No response is returned by the brokers, so we can't know which messages
218
+ # have been successfully written. Our only option is to assume that they all
219
+ # have.
220
+ @buffer.clear
221
+ end
222
+
223
+ if buffer_size.zero?
224
+ break
225
+ elsif attempt <= @max_retries
226
+ @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
227
+
228
+ sleep @retry_backoff
229
+ else
230
+ @logger.error "Failed to send all messages; keeping remaining messages in buffer"
231
+ break
232
+ end
233
+ end
234
+
235
+ unless @pending_message_queue.empty?
236
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
237
+ @cluster.mark_as_stale!
238
+ raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
239
+ end
240
+
241
+ unless @buffer.empty?
242
+ partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
243
+
244
+ raise DeliveryFailed, "Failed to send messages to #{partitions}"
245
+ end
246
+ end
247
+
248
+ def assign_partitions!
249
+ failed_messages = []
250
+ partition_count = @cluster.partitions_for(@topic).count
251
+
252
+ @pending_message_queue.each do |message|
253
+ partition = message.partition
254
+
255
+ begin
256
+ if partition.nil?
257
+ partition = Partitioner.partition_for_key(partition_count, message)
258
+ end
259
+
260
+ @buffer.write(
261
+ value: message.value,
262
+ key: message.key,
263
+ headers: message.headers,
264
+ topic: message.topic,
265
+ partition: partition,
266
+ create_time: message.create_time,
267
+ )
268
+ rescue Kafka::Error => e
269
+ failed_messages << message
270
+ end
271
+ end
272
+
273
+ if failed_messages.any?
274
+ failed_messages.group_by(&:topic).each do |topic, messages|
275
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
276
+ end
277
+
278
+ @cluster.mark_as_stale!
279
+ end
280
+
281
+ @pending_message_queue.replace(failed_messages)
282
+ end
283
+
284
+ def buffer_messages
285
+ messages = []
286
+
287
+ @pending_message_queue.each do |message|
288
+ messages << message
289
+ end
290
+
291
+ @buffer.each do |topic, partition, messages_for_partition|
292
+ messages_for_partition.each do |message|
293
+ messages << PendingMessage.new(
294
+ value: message.value,
295
+ key: message.key,
296
+ headers: message.headers,
297
+ topic: topic,
298
+ partition: partition,
299
+ partition_key: nil,
300
+ create_time: message.create_time
301
+ )
302
+ end
303
+ end
304
+
305
+ messages
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,254 @@
1
+ require 'fluent/output'
2
+ require 'fluent/plugin/kafka_plugin_util'
3
+
4
+ class Fluent::KafkaOutput < Fluent::Output
5
+ Fluent::Plugin.register_output('kafka', self)
6
+
7
+ config_param :brokers, :string, :default => 'localhost:9092',
8
+ :desc => <<-DESC
9
+ Set brokers directly
10
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
11
+ Note that you can choose to use either brokers or zookeeper.
12
+ DESC
13
+ config_param :zookeeper, :string, :default => nil,
14
+ :desc => "Set brokers via Zookeeper: <zookeeper_host>:<zookeeper_port>"
15
+ config_param :zookeeper_path, :string, :default => '/brokers/ids',
16
+ :desc => "Path in path for Broker id. Default to /brokers/ids"
17
+ config_param :default_topic, :string, :default => nil,
18
+ :desc => "Output topic."
19
+ config_param :default_message_key, :string, :default => nil
20
+ config_param :default_partition_key, :string, :default => nil
21
+ config_param :default_partition, :integer, :default => nil
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :sasl_over_ssl, :bool, :default => true,
24
+ :desc => <<-DESC
25
+ Set to false to prevent SSL strict mode when using SASL authentication
26
+ DESC
27
+ config_param :output_data_type, :string, :default => 'json',
28
+ :desc => "Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)"
29
+ config_param :output_include_tag, :bool, :default => false
30
+ config_param :output_include_time, :bool, :default => false
31
+ config_param :exclude_partition_key, :bool, :default => false,
32
+ :desc => <<-DESC
33
+ Set true to remove partition key from data
34
+ DESC
35
+ config_param :exclude_partition, :bool, :default => false,
36
+ :desc => <<-DESC
37
+ Set true to remove partition from data
38
+ DESC
39
+
40
+ config_param :exclude_message_key, :bool, :default => false,
41
+ :desc => <<-DESC
42
+ Set true to remove message key from data
43
+ DESC
44
+ config_param :exclude_topic_key, :bool, :default => false,
45
+ :desc => <<-DESC
46
+ Set true to remove topic name key from data
47
+ DESC
48
+
49
+ # ruby-kafka producer options
50
+ config_param :max_send_retries, :integer, :default => 2,
51
+ :desc => "Number of times to retry sending of messages to a leader."
52
+ config_param :required_acks, :integer, :default => -1,
53
+ :desc => "The number of acks required per request."
54
+ config_param :ack_timeout, :integer, :default => nil,
55
+ :desc => "How long the producer waits for acks."
56
+ config_param :compression_codec, :string, :default => nil,
57
+ :desc => "The codec the producer uses to compress messages."
58
+
59
+ config_param :time_format, :string, :default => nil
60
+
61
+ config_param :max_buffer_size, :integer, :default => nil,
62
+ :desc => "Number of messages to be buffered by the kafka producer."
63
+
64
+ config_param :max_buffer_bytesize, :integer, :default => nil,
65
+ :desc => "Maximum size in bytes to be buffered."
66
+
67
+ config_param :active_support_notification_regex, :string, :default => nil,
68
+ :desc => <<-DESC
69
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
70
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
71
+ DESC
72
+
73
+ include Fluent::KafkaPluginUtil::SSLSettings
74
+ include Fluent::KafkaPluginUtil::SaslSettings
75
+
76
+ attr_accessor :output_data_type
77
+ attr_accessor :field_separator
78
+
79
+ unless method_defined?(:log)
80
+ define_method("log") { $log }
81
+ end
82
+
83
+ def initialize
84
+ super
85
+
86
+ require 'kafka'
87
+
88
+ @kafka = nil
89
+ end
90
+
91
+ def refresh_client
92
+ if @zookeeper
93
+ @seed_brokers = []
94
+ z = Zookeeper.new(@zookeeper)
95
+ z.get_children(:path => @zookeeper_path)[:children].each do |id|
96
+ broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
97
+ if @ssl_client_cert
98
+ @seed_brokers.push(pickup_ssl_endpoint(broker))
99
+ else
100
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
101
+ end
102
+ end
103
+ z.close
104
+ log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
105
+ end
106
+ begin
107
+ if @seed_brokers.length > 0
108
+ if @scram_mechanism != nil && @username != nil && @password != nil
109
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
110
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
111
+ sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
112
+ elsif @username != nil && @password != nil
113
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
114
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
115
+ sasl_plain_username: @username, sasl_plain_password: @password)
116
+ else
117
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
118
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
119
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
120
+ end
121
+ log.info "initialized kafka producer: #{@client_id}"
122
+ else
123
+ log.warn "No brokers found on Zookeeper"
124
+ end
125
+ rescue Exception => e
126
+ log.error e
127
+ end
128
+ end
129
+
130
+ def configure(conf)
131
+ super
132
+
133
+ if @zookeeper
134
+ require 'zookeeper'
135
+ else
136
+ @seed_brokers = @brokers.split(",")
137
+ log.info "brokers has been set directly: #{@seed_brokers}"
138
+ end
139
+
140
+ if conf['ack_timeout_ms']
141
+ log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
142
+ @ack_timeout = conf['ack_timeout_ms'].to_i / 1000
143
+ end
144
+
145
+ @f_separator = case @field_separator
146
+ when /SPACE/i then ' '
147
+ when /COMMA/i then ','
148
+ when /SOH/i then "\x01"
149
+ else "\t"
150
+ end
151
+
152
+ @formatter_proc = setup_formatter(conf)
153
+
154
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
155
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
156
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
157
+ @producer_opts[:max_buffer_size] = @max_buffer_size if @max_buffer_size
158
+ @producer_opts[:max_buffer_bytesize] = @max_buffer_bytesize if @max_buffer_bytesize
159
+ if @active_support_notification_regex
160
+ require 'active_support/notifications'
161
+ require 'active_support/core_ext/hash/keys'
162
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
163
+ event = ActiveSupport::Notifications::Event.new(*args)
164
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
165
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
166
+ end
167
+ end
168
+ end
169
+
170
+ def multi_workers_ready?
171
+ true
172
+ end
173
+
174
+ def start
175
+ super
176
+ refresh_client
177
+ end
178
+
179
+ def shutdown
180
+ super
181
+ @kafka = nil
182
+ end
183
+
184
+ def setup_formatter(conf)
185
+ if @output_data_type == 'json'
186
+ require 'yajl'
187
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
188
+ elsif @output_data_type == 'ltsv'
189
+ require 'ltsv'
190
+ Proc.new { |tag, time, record| LTSV.dump(record) }
191
+ elsif @output_data_type == 'msgpack'
192
+ require 'msgpack'
193
+ Proc.new { |tag, time, record| record.to_msgpack }
194
+ elsif @output_data_type =~ /^attr:(.*)$/
195
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
196
+ @custom_attributes.unshift('time') if @output_include_time
197
+ @custom_attributes.unshift('tag') if @output_include_tag
198
+ Proc.new { |tag, time, record|
199
+ @custom_attributes.map { |attr|
200
+ record[attr].nil? ? '' : record[attr].to_s
201
+ }.join(@f_separator)
202
+ }
203
+ else
204
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
205
+ @formatter.configure(conf)
206
+ @formatter.method(:format)
207
+ end
208
+ end
209
+
210
+ def emit(tag, es, chain)
211
+ begin
212
+ chain.next
213
+
214
+ # out_kafka is mainly for testing so don't need the performance unlike out_kafka_buffered.
215
+ producer = @kafka.producer(@producer_opts)
216
+
217
+ es.each do |time, record|
218
+ if @output_include_time
219
+ if @time_format
220
+ record['time'] = Time.at(time).strftime(@time_format)
221
+ else
222
+ record['time'] = time
223
+ end
224
+ end
225
+ record['tag'] = tag if @output_include_tag
226
+ topic = (@exclude_topic_key ? record.delete('topic') : record['topic']) || @default_topic || tag
227
+ partition_key = (@exclude_partition_key ? record.delete('partition_key') : record['partition_key']) || @default_partition_key
228
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
229
+ message_key = (@exclude_message_key ? record.delete('message_key') : record['message_key']) || @default_message_key
230
+
231
+ value = @formatter_proc.call(tag, time, record)
232
+
233
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{value}." }
234
+ begin
235
+ producer.produce(value, topic: topic, key: message_key, partition: partition, partition_key: partition_key)
236
+ rescue Kafka::BufferOverflow => e
237
+ log.warn "BufferOverflow occurred: #{e}"
238
+ log.info "Trying to deliver the messages to prevent the buffer from overflowing again."
239
+ producer.deliver_messages
240
+ log.info "Recovered from BufferOverflow successfully`"
241
+ end
242
+ end
243
+
244
+ producer.deliver_messages
245
+ producer.shutdown
246
+ rescue Exception => e
247
+ log.warn "Send exception occurred: #{e}"
248
+ producer.shutdown if producer
249
+ refresh_client
250
+ raise e
251
+ end
252
+ end
253
+
254
+ end