sk-fluent-plugin-kafka 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ module Fluent
2
+ module KafkaPluginUtil
3
+ module SSLSettings
4
+ def self.included(klass)
5
+ klass.instance_eval {
6
+ # https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl
7
+ config_param :ssl_ca_cert, :array, :value_type => :string, :default => nil,
8
+ :desc => "a PEM encoded CA cert to use with and SSL connection."
9
+ config_param :ssl_client_cert, :string, :default => nil,
10
+ :desc => "a PEM encoded client cert to use with and SSL connection. Must be used in combination with ssl_client_cert_key."
11
+ config_param :ssl_client_cert_key, :string, :default => nil,
12
+ :desc => "a PEM encoded client cert key to use with and SSL connection. Must be used in combination with ssl_client_cert."
13
+ config_param :ssl_ca_certs_from_system, :bool, :default => false,
14
+ :desc => "this configures the store to look up CA certificates from the system default certificate store on an as needed basis. The location of the store can usually be determined by: OpenSSL::X509::DEFAULT_CERT_FILE."
15
+ }
16
+ end
17
+
18
+ def read_ssl_file(path)
19
+ return nil if path.nil?
20
+
21
+ if path.is_a?(Array)
22
+ path.map { |fp| File.read(fp) }
23
+ else
24
+ File.read(path)
25
+ end
26
+ end
27
+
28
+ def pickup_ssl_endpoint(node)
29
+ ssl_endpoint = node['endpoints'].find {|e| e.start_with?('SSL')}
30
+ raise 'no SSL endpoint found on Zookeeper' unless ssl_endpoint
31
+ return [URI.parse(ssl_endpoint).host, URI.parse(ssl_endpoint).port].join(':')
32
+ end
33
+ end
34
+
35
+ module SaslSettings
36
+ def self.included(klass)
37
+ klass.instance_eval {
38
+ config_param :principal, :string, :default => nil,
39
+ :desc => "a Kerberos principal to use with SASL authentication (GSSAPI)."
40
+ config_param :keytab, :string, :default => nil,
41
+ :desc => "a filepath to Kerberos keytab. Must be used with principal."
42
+ config_param :username, :string, :default => nil,
43
+ :desc => "a username when using PLAIN/SCRAM SASL authentication"
44
+ config_param :password, :string, :default => nil, secret: true,
45
+ :desc => "a password when using PLAIN/SCRAM SASL authentication"
46
+ config_param :scram_mechanism, :string, :default => nil,
47
+ :desc => "if set, use SCRAM authentication with specified mechanism. When unset, default to PLAIN authentication"
48
+ }
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,308 @@
1
+ require "set"
2
+ require "kafka/partitioner"
3
+ require "kafka/message_buffer"
4
+ require "kafka/produce_operation"
5
+ require "kafka/pending_message_queue"
6
+ require "kafka/pending_message"
7
+ require "kafka/compressor"
8
+ require 'kafka/producer'
9
+
10
+ # for out_kafka_buffered
11
+ module Kafka
12
+ EMPTY_HEADER = {}
13
+
14
+ class Producer
15
+ def produce_for_buffered(value, key: nil, topic:, partition: nil, partition_key: nil)
16
+ create_time = Time.now
17
+
18
+ message = PendingMessage.new(
19
+ value: value,
20
+ key: key,
21
+ headers: EMPTY_HEADER,
22
+ topic: topic,
23
+ partition: partition,
24
+ partition_key: partition_key,
25
+ create_time: create_time
26
+ )
27
+
28
+ # If the producer is in transactional mode, all the message production
29
+ # must be used when the producer is currently in transaction
30
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
31
+ raise 'You must trigger begin_transaction before producing messages'
32
+ end
33
+
34
+ @target_topics.add(topic)
35
+ @pending_message_queue.write(message)
36
+
37
+ nil
38
+ end
39
+ end
40
+ end
41
+
42
+ # for out_kafka2
43
+ module Kafka
44
+ class Client
45
+ def topic_producer(topic, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60)
46
+ cluster = initialize_cluster
47
+ compressor = Compressor.new(
48
+ codec_name: compression_codec,
49
+ threshold: compression_threshold,
50
+ instrumenter: @instrumenter,
51
+ )
52
+
53
+ transaction_manager = TransactionManager.new(
54
+ cluster: cluster,
55
+ logger: @logger,
56
+ idempotent: idempotent,
57
+ transactional: transactional,
58
+ transactional_id: transactional_id,
59
+ transactional_timeout: transactional_timeout,
60
+ )
61
+
62
+ TopicProducer.new(topic,
63
+ cluster: cluster,
64
+ transaction_manager: transaction_manager,
65
+ logger: @logger,
66
+ instrumenter: @instrumenter,
67
+ compressor: compressor,
68
+ ack_timeout: ack_timeout,
69
+ required_acks: required_acks,
70
+ max_retries: max_retries,
71
+ retry_backoff: retry_backoff,
72
+ max_buffer_size: max_buffer_size,
73
+ max_buffer_bytesize: max_buffer_bytesize,
74
+ )
75
+ end
76
+ end
77
+
78
+ class TopicProducer
79
+ def initialize(topic, cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
80
+ @cluster = cluster
81
+ @transaction_manager = transaction_manager
82
+ @logger = logger
83
+ @instrumenter = instrumenter
84
+ @required_acks = required_acks == :all ? -1 : required_acks
85
+ @ack_timeout = ack_timeout
86
+ @max_retries = max_retries
87
+ @retry_backoff = retry_backoff
88
+ @max_buffer_size = max_buffer_size
89
+ @max_buffer_bytesize = max_buffer_bytesize
90
+ @compressor = compressor
91
+
92
+ @topic = topic
93
+ @cluster.add_target_topics(Set.new([topic]))
94
+
95
+ # A buffer organized by topic/partition.
96
+ @buffer = MessageBuffer.new
97
+
98
+ # Messages added by `#produce` but not yet assigned a partition.
99
+ @pending_message_queue = PendingMessageQueue.new
100
+ end
101
+
102
+ def produce(value, key: nil, partition: nil, partition_key: nil)
103
+ create_time = Time.now
104
+
105
+ message = PendingMessage.new(
106
+ value: value,
107
+ key: key,
108
+ headers: EMPTY_HEADER,
109
+ topic: @topic,
110
+ partition: partition,
111
+ partition_key: partition_key,
112
+ create_time: create_time
113
+ )
114
+
115
+ # If the producer is in transactional mode, all the message production
116
+ # must be used when the producer is currently in transaction
117
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
118
+ raise 'You must trigger begin_transaction before producing messages'
119
+ end
120
+
121
+ @pending_message_queue.write(message)
122
+
123
+ nil
124
+ end
125
+
126
+ def deliver_messages
127
+ # There's no need to do anything if the buffer is empty.
128
+ return if buffer_size == 0
129
+
130
+ deliver_messages_with_retries
131
+ end
132
+
133
+ # Returns the number of messages currently held in the buffer.
134
+ #
135
+ # @return [Integer] buffer size.
136
+ def buffer_size
137
+ @pending_message_queue.size + @buffer.size
138
+ end
139
+
140
+ def buffer_bytesize
141
+ @pending_message_queue.bytesize + @buffer.bytesize
142
+ end
143
+
144
+ # Deletes all buffered messages.
145
+ #
146
+ # @return [nil]
147
+ def clear_buffer
148
+ @buffer.clear
149
+ @pending_message_queue.clear
150
+ end
151
+
152
+ # Closes all connections to the brokers.
153
+ #
154
+ # @return [nil]
155
+ def shutdown
156
+ @transaction_manager.close
157
+ @cluster.disconnect
158
+ end
159
+
160
+ def init_transactions
161
+ @transaction_manager.init_transactions
162
+ end
163
+
164
+ def begin_transaction
165
+ @transaction_manager.begin_transaction
166
+ end
167
+
168
+ def commit_transaction
169
+ @transaction_manager.commit_transaction
170
+ end
171
+
172
+ def abort_transaction
173
+ @transaction_manager.abort_transaction
174
+ end
175
+
176
+ def transaction
177
+ raise 'This method requires a block' unless block_given?
178
+ begin_transaction
179
+ yield
180
+ commit_transaction
181
+ rescue Kafka::Producer::AbortTransaction
182
+ abort_transaction
183
+ rescue
184
+ abort_transaction
185
+ raise
186
+ end
187
+
188
+ def deliver_messages_with_retries
189
+ attempt = 0
190
+
191
+ #@cluster.add_target_topics(@target_topics)
192
+
193
+ operation = ProduceOperation.new(
194
+ cluster: @cluster,
195
+ transaction_manager: @transaction_manager,
196
+ buffer: @buffer,
197
+ required_acks: @required_acks,
198
+ ack_timeout: @ack_timeout,
199
+ compressor: @compressor,
200
+ logger: @logger,
201
+ instrumenter: @instrumenter,
202
+ )
203
+
204
+ loop do
205
+ attempt += 1
206
+
207
+ begin
208
+ @cluster.refresh_metadata_if_necessary!
209
+ rescue ConnectionError => e
210
+ raise DeliveryFailed.new(e, buffer_messages)
211
+ end
212
+
213
+ assign_partitions!
214
+ operation.execute
215
+
216
+ if @required_acks.zero?
217
+ # No response is returned by the brokers, so we can't know which messages
218
+ # have been successfully written. Our only option is to assume that they all
219
+ # have.
220
+ @buffer.clear
221
+ end
222
+
223
+ if buffer_size.zero?
224
+ break
225
+ elsif attempt <= @max_retries
226
+ @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
227
+
228
+ sleep @retry_backoff
229
+ else
230
+ @logger.error "Failed to send all messages; keeping remaining messages in buffer"
231
+ break
232
+ end
233
+ end
234
+
235
+ unless @pending_message_queue.empty?
236
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
237
+ @cluster.mark_as_stale!
238
+ raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
239
+ end
240
+
241
+ unless @buffer.empty?
242
+ partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
243
+
244
+ raise DeliveryFailed, "Failed to send messages to #{partitions}"
245
+ end
246
+ end
247
+
248
+ def assign_partitions!
249
+ failed_messages = []
250
+ partition_count = @cluster.partitions_for(@topic).count
251
+
252
+ @pending_message_queue.each do |message|
253
+ partition = message.partition
254
+
255
+ begin
256
+ if partition.nil?
257
+ partition = Partitioner.partition_for_key(partition_count, message)
258
+ end
259
+
260
+ @buffer.write(
261
+ value: message.value,
262
+ key: message.key,
263
+ headers: message.headers,
264
+ topic: message.topic,
265
+ partition: partition,
266
+ create_time: message.create_time,
267
+ )
268
+ rescue Kafka::Error => e
269
+ failed_messages << message
270
+ end
271
+ end
272
+
273
+ if failed_messages.any?
274
+ failed_messages.group_by(&:topic).each do |topic, messages|
275
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
276
+ end
277
+
278
+ @cluster.mark_as_stale!
279
+ end
280
+
281
+ @pending_message_queue.replace(failed_messages)
282
+ end
283
+
284
+ def buffer_messages
285
+ messages = []
286
+
287
+ @pending_message_queue.each do |message|
288
+ messages << message
289
+ end
290
+
291
+ @buffer.each do |topic, partition, messages_for_partition|
292
+ messages_for_partition.each do |message|
293
+ messages << PendingMessage.new(
294
+ value: message.value,
295
+ key: message.key,
296
+ headers: message.headers,
297
+ topic: topic,
298
+ partition: partition,
299
+ partition_key: nil,
300
+ create_time: message.create_time
301
+ )
302
+ end
303
+ end
304
+
305
+ messages
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,254 @@
1
+ require 'fluent/output'
2
+ require 'fluent/plugin/kafka_plugin_util'
3
+
4
+ class Fluent::KafkaOutput < Fluent::Output
5
+ Fluent::Plugin.register_output('kafka', self)
6
+
7
+ config_param :brokers, :string, :default => 'localhost:9092',
8
+ :desc => <<-DESC
9
+ Set brokers directly
10
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
11
+ Note that you can choose to use either brokers or zookeeper.
12
+ DESC
13
+ config_param :zookeeper, :string, :default => nil,
14
+ :desc => "Set brokers via Zookeeper: <zookeeper_host>:<zookeeper_port>"
15
+ config_param :zookeeper_path, :string, :default => '/brokers/ids',
16
+ :desc => "Path in path for Broker id. Default to /brokers/ids"
17
+ config_param :default_topic, :string, :default => nil,
18
+ :desc => "Output topic."
19
+ config_param :default_message_key, :string, :default => nil
20
+ config_param :default_partition_key, :string, :default => nil
21
+ config_param :default_partition, :integer, :default => nil
22
+ config_param :client_id, :string, :default => 'kafka'
23
+ config_param :sasl_over_ssl, :bool, :default => true,
24
+ :desc => <<-DESC
25
+ Set to false to prevent SSL strict mode when using SASL authentication
26
+ DESC
27
+ config_param :output_data_type, :string, :default => 'json',
28
+ :desc => "Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)"
29
+ config_param :output_include_tag, :bool, :default => false
30
+ config_param :output_include_time, :bool, :default => false
31
+ config_param :exclude_partition_key, :bool, :default => false,
32
+ :desc => <<-DESC
33
+ Set true to remove partition key from data
34
+ DESC
35
+ config_param :exclude_partition, :bool, :default => false,
36
+ :desc => <<-DESC
37
+ Set true to remove partition from data
38
+ DESC
39
+
40
+ config_param :exclude_message_key, :bool, :default => false,
41
+ :desc => <<-DESC
42
+ Set true to remove message key from data
43
+ DESC
44
+ config_param :exclude_topic_key, :bool, :default => false,
45
+ :desc => <<-DESC
46
+ Set true to remove topic name key from data
47
+ DESC
48
+
49
+ # ruby-kafka producer options
50
+ config_param :max_send_retries, :integer, :default => 2,
51
+ :desc => "Number of times to retry sending of messages to a leader."
52
+ config_param :required_acks, :integer, :default => -1,
53
+ :desc => "The number of acks required per request."
54
+ config_param :ack_timeout, :integer, :default => nil,
55
+ :desc => "How long the producer waits for acks."
56
+ config_param :compression_codec, :string, :default => nil,
57
+ :desc => "The codec the producer uses to compress messages."
58
+
59
+ config_param :time_format, :string, :default => nil
60
+
61
+ config_param :max_buffer_size, :integer, :default => nil,
62
+ :desc => "Number of messages to be buffered by the kafka producer."
63
+
64
+ config_param :max_buffer_bytesize, :integer, :default => nil,
65
+ :desc => "Maximum size in bytes to be buffered."
66
+
67
+ config_param :active_support_notification_regex, :string, :default => nil,
68
+ :desc => <<-DESC
69
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
70
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
71
+ DESC
72
+
73
+ include Fluent::KafkaPluginUtil::SSLSettings
74
+ include Fluent::KafkaPluginUtil::SaslSettings
75
+
76
+ attr_accessor :output_data_type
77
+ attr_accessor :field_separator
78
+
79
+ unless method_defined?(:log)
80
+ define_method("log") { $log }
81
+ end
82
+
83
+ def initialize
84
+ super
85
+
86
+ require 'kafka'
87
+
88
+ @kafka = nil
89
+ end
90
+
91
+ def refresh_client
92
+ if @zookeeper
93
+ @seed_brokers = []
94
+ z = Zookeeper.new(@zookeeper)
95
+ z.get_children(:path => @zookeeper_path)[:children].each do |id|
96
+ broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
97
+ if @ssl_client_cert
98
+ @seed_brokers.push(pickup_ssl_endpoint(broker))
99
+ else
100
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
101
+ end
102
+ end
103
+ z.close
104
+ log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
105
+ end
106
+ begin
107
+ if @seed_brokers.length > 0
108
+ if @scram_mechanism != nil && @username != nil && @password != nil
109
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
110
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
111
+ sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl)
112
+ elsif @username != nil && @password != nil
113
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
114
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
115
+ sasl_plain_username: @username, sasl_plain_password: @password)
116
+ else
117
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
118
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
119
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
120
+ end
121
+ log.info "initialized kafka producer: #{@client_id}"
122
+ else
123
+ log.warn "No brokers found on Zookeeper"
124
+ end
125
+ rescue Exception => e
126
+ log.error e
127
+ end
128
+ end
129
+
130
+ def configure(conf)
131
+ super
132
+
133
+ if @zookeeper
134
+ require 'zookeeper'
135
+ else
136
+ @seed_brokers = @brokers.split(",")
137
+ log.info "brokers has been set directly: #{@seed_brokers}"
138
+ end
139
+
140
+ if conf['ack_timeout_ms']
141
+ log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
142
+ @ack_timeout = conf['ack_timeout_ms'].to_i / 1000
143
+ end
144
+
145
+ @f_separator = case @field_separator
146
+ when /SPACE/i then ' '
147
+ when /COMMA/i then ','
148
+ when /SOH/i then "\x01"
149
+ else "\t"
150
+ end
151
+
152
+ @formatter_proc = setup_formatter(conf)
153
+
154
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
155
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
156
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
157
+ @producer_opts[:max_buffer_size] = @max_buffer_size if @max_buffer_size
158
+ @producer_opts[:max_buffer_bytesize] = @max_buffer_bytesize if @max_buffer_bytesize
159
+ if @active_support_notification_regex
160
+ require 'active_support/notifications'
161
+ require 'active_support/core_ext/hash/keys'
162
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
163
+ event = ActiveSupport::Notifications::Event.new(*args)
164
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
165
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
166
+ end
167
+ end
168
+ end
169
+
170
+ def multi_workers_ready?
171
+ true
172
+ end
173
+
174
+ def start
175
+ super
176
+ refresh_client
177
+ end
178
+
179
+ def shutdown
180
+ super
181
+ @kafka = nil
182
+ end
183
+
184
+ def setup_formatter(conf)
185
+ if @output_data_type == 'json'
186
+ require 'yajl'
187
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
188
+ elsif @output_data_type == 'ltsv'
189
+ require 'ltsv'
190
+ Proc.new { |tag, time, record| LTSV.dump(record) }
191
+ elsif @output_data_type == 'msgpack'
192
+ require 'msgpack'
193
+ Proc.new { |tag, time, record| record.to_msgpack }
194
+ elsif @output_data_type =~ /^attr:(.*)$/
195
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
196
+ @custom_attributes.unshift('time') if @output_include_time
197
+ @custom_attributes.unshift('tag') if @output_include_tag
198
+ Proc.new { |tag, time, record|
199
+ @custom_attributes.map { |attr|
200
+ record[attr].nil? ? '' : record[attr].to_s
201
+ }.join(@f_separator)
202
+ }
203
+ else
204
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
205
+ @formatter.configure(conf)
206
+ @formatter.method(:format)
207
+ end
208
+ end
209
+
210
+ def emit(tag, es, chain)
211
+ begin
212
+ chain.next
213
+
214
+ # out_kafka is mainly for testing so don't need the performance unlike out_kafka_buffered.
215
+ producer = @kafka.producer(@producer_opts)
216
+
217
+ es.each do |time, record|
218
+ if @output_include_time
219
+ if @time_format
220
+ record['time'] = Time.at(time).strftime(@time_format)
221
+ else
222
+ record['time'] = time
223
+ end
224
+ end
225
+ record['tag'] = tag if @output_include_tag
226
+ topic = (@exclude_topic_key ? record.delete('topic') : record['topic']) || @default_topic || tag
227
+ partition_key = (@exclude_partition_key ? record.delete('partition_key') : record['partition_key']) || @default_partition_key
228
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
229
+ message_key = (@exclude_message_key ? record.delete('message_key') : record['message_key']) || @default_message_key
230
+
231
+ value = @formatter_proc.call(tag, time, record)
232
+
233
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{value}." }
234
+ begin
235
+ producer.produce(value, topic: topic, key: message_key, partition: partition, partition_key: partition_key)
236
+ rescue Kafka::BufferOverflow => e
237
+ log.warn "BufferOverflow occurred: #{e}"
238
+ log.info "Trying to deliver the messages to prevent the buffer from overflowing again."
239
+ producer.deliver_messages
240
+ log.info "Recovered from BufferOverflow successfully`"
241
+ end
242
+ end
243
+
244
+ producer.deliver_messages
245
+ producer.shutdown
246
+ rescue Exception => e
247
+ log.warn "Send exception occurred: #{e}"
248
+ producer.shutdown if producer
249
+ refresh_client
250
+ raise e
251
+ end
252
+ end
253
+
254
+ end