fluent-plugin-kafka-xst 0.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,84 @@
1
+ module Fluent
2
+ module KafkaPluginUtil
3
+ module AwsIamSettings
4
+ def self.included(klass)
5
+ klass.instance_eval do
6
+ config_param :sasl_aws_msk_iam_access_key_id, :string, :default => nil, secret: true,
7
+ desc: "AWS access key Id for IAM authentication to MSK."
8
+ config_param :sasl_aws_msk_iam_secret_key_id, :string, :default => nil, secret: true,
9
+ desc: "AWS access key secret for IAM authentication to MSK."
10
+ config_param :sasl_aws_msk_iam_aws_region, :string, :default => nil,
11
+ desc: "AWS region for IAM authentication to MSK."
12
+ end
13
+ end
14
+ end
15
+
16
+ module SSLSettings
17
+ def self.included(klass)
18
+ klass.instance_eval {
19
+ # https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl
20
+ config_param :ssl_ca_cert, :array, :value_type => :string, :default => nil,
21
+ :desc => "a PEM encoded CA cert to use with and SSL connection."
22
+ config_param :ssl_client_cert, :string, :default => nil,
23
+ :desc => "a PEM encoded client cert to use with and SSL connection. Must be used in combination with ssl_client_cert_key."
24
+ config_param :ssl_client_cert_key, :string, :default => nil,
25
+ :desc => "a PEM encoded client cert key to use with and SSL connection. Must be used in combination with ssl_client_cert."
26
+ config_param :ssl_client_cert_key_password, :string, :default => nil, secret: true,
27
+ :desc => "a PEM encoded client cert key password to use with SSL connection."
28
+ config_param :ssl_client_cert_chain, :string, :default => nil,
29
+ :desc => "an extra PEM encoded cert to use with and SSL connection."
30
+ config_param :ssl_ca_certs_from_system, :bool, :default => false,
31
+ :desc => "this configures the store to look up CA certificates from the system default certificate store on an as needed basis. The location of the store can usually be determined by: OpenSSL::X509::DEFAULT_CERT_FILE."
32
+ config_param :ssl_verify_hostname, :bool, :default => true,
33
+ :desc => "this configures whether hostname of certificate should be verified or not."
34
+ }
35
+ end
36
+
37
+ DummyFormatter = Object.new
38
+
39
+ def start
40
+ super
41
+
42
+ # This is bad point here but easy to fix for all kafka plugins
43
+ unless log.respond_to?(:formatter)
44
+ def log.formatter
45
+ Fluent::KafkaPluginUtil::SSLSettings::DummyFormatter
46
+ end
47
+ end
48
+ end
49
+
50
+ def read_ssl_file(path)
51
+ return nil if path.nil? || path.respond_to?(:strip) && path.strip.empty?
52
+
53
+ if path.is_a?(Array)
54
+ path.map { |fp| File.read(fp) }
55
+ else
56
+ File.read(path)
57
+ end
58
+ end
59
+
60
+ def pickup_ssl_endpoint(node)
61
+ ssl_endpoint = node['endpoints'].find {|e| e.start_with?('SSL')}
62
+ raise 'no SSL endpoint found on Zookeeper' unless ssl_endpoint
63
+ return [URI.parse(ssl_endpoint).host, URI.parse(ssl_endpoint).port].join(':')
64
+ end
65
+ end
66
+
67
+ module SaslSettings
68
+ def self.included(klass)
69
+ klass.instance_eval {
70
+ config_param :principal, :string, :default => nil,
71
+ :desc => "a Kerberos principal to use with SASL authentication (GSSAPI)."
72
+ config_param :keytab, :string, :default => nil,
73
+ :desc => "a filepath to Kerberos keytab. Must be used with principal."
74
+ config_param :username, :string, :default => nil,
75
+ :desc => "a username when using PLAIN/SCRAM SASL authentication"
76
+ config_param :password, :string, :default => nil, secret: true,
77
+ :desc => "a password when using PLAIN/SCRAM SASL authentication"
78
+ config_param :scram_mechanism, :string, :default => nil,
79
+ :desc => "if set, use SCRAM authentication with specified mechanism. When unset, default to PLAIN authentication"
80
+ }
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,308 @@
1
+ require "set"
2
+ require "kafka/partitioner"
3
+ require "kafka/message_buffer"
4
+ require "kafka/produce_operation"
5
+ require "kafka/pending_message_queue"
6
+ require "kafka/pending_message"
7
+ require "kafka/compressor"
8
+ require 'kafka/producer'
9
+
10
+ # for out_kafka_buffered
11
+ module Kafka
12
+ EMPTY_HEADER = {}
13
+
14
+ class Producer
15
+ def produce_for_buffered(value, key: nil, topic:, partition: nil, partition_key: nil, create_time: Time.now)
16
+ message = PendingMessage.new(
17
+ value: value,
18
+ key: key,
19
+ headers: EMPTY_HEADER,
20
+ topic: topic,
21
+ partition: partition,
22
+ partition_key: partition_key,
23
+ create_time: create_time
24
+ )
25
+
26
+ # If the producer is in transactional mode, all the message production
27
+ # must be used when the producer is currently in transaction
28
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
29
+ raise 'You must trigger begin_transaction before producing messages'
30
+ end
31
+
32
+ @target_topics.add(topic)
33
+ @pending_message_queue.write(message)
34
+
35
+ nil
36
+ end
37
+ end
38
+ end
39
+
40
+ # for out_kafka2
41
+ # Majority (if not all) of this code is lifted from https://github.com/zendesk/ruby-kafka/blob/master/lib/kafka/producer.rb
42
+ # with the main difference where we have removed any checks regarding max_buffer_bytesize and max_buffer_size
43
+ # The reason for doing this is to provide a better UX for our users where they only need to set those bounds in
44
+ # the Buffer section using `chunk_limit_size` and `chunk_limit_records`.
45
+ #
46
+ # We should reconsider this in the future in case the `ruby-kafka` library drastically changes its internal.
47
+ module Kafka
48
+ class Client
49
+ def custom_producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60)
50
+ cluster = initialize_cluster
51
+ compressor = Compressor.new(
52
+ codec_name: compression_codec,
53
+ threshold: compression_threshold,
54
+ instrumenter: @instrumenter,
55
+ )
56
+
57
+ transaction_manager = TransactionManager.new(
58
+ cluster: cluster,
59
+ logger: @logger,
60
+ idempotent: idempotent,
61
+ transactional: transactional,
62
+ transactional_id: transactional_id,
63
+ transactional_timeout: transactional_timeout,
64
+ )
65
+
66
+ CustomProducer.new(cluster: cluster,
67
+ transaction_manager: transaction_manager,
68
+ logger: @logger,
69
+ instrumenter: @instrumenter,
70
+ compressor: compressor,
71
+ ack_timeout: ack_timeout,
72
+ required_acks: required_acks,
73
+ max_retries: max_retries,
74
+ retry_backoff: retry_backoff,
75
+ max_buffer_size: max_buffer_size,
76
+ max_buffer_bytesize: max_buffer_bytesize,
77
+ partitioner: @partitioner,
78
+ )
79
+ end
80
+ end
81
+
82
+ class CustomProducer
83
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:, partitioner:)
84
+ @cluster = cluster
85
+ @transaction_manager = transaction_manager
86
+ @logger = logger
87
+ @instrumenter = instrumenter
88
+ @required_acks = required_acks == :all ? -1 : required_acks
89
+ @ack_timeout = ack_timeout
90
+ @max_retries = max_retries
91
+ @retry_backoff = retry_backoff
92
+ @max_buffer_size = max_buffer_size
93
+ @max_buffer_bytesize = max_buffer_bytesize
94
+ @compressor = compressor
95
+ @partitioner = partitioner
96
+ # A buffer organized by topic/partition.
97
+ @buffer = MessageBuffer.new
98
+
99
+ # Messages added by `#produce` but not yet assigned a partition.
100
+ @pending_message_queue = PendingMessageQueue.new
101
+ end
102
+
103
+ def produce(value, key: nil, partition: nil, partition_key: nil, headers: EMPTY_HEADER, create_time: Time.now, topic: nil)
104
+ message = PendingMessage.new(
105
+ value: value,
106
+ key: key,
107
+ headers: headers,
108
+ topic: topic,
109
+ partition: partition,
110
+ partition_key: partition_key,
111
+ create_time: create_time
112
+ )
113
+
114
+ # If the producer is in transactional mode, all the message production
115
+ # must be used when the producer is currently in transaction
116
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
117
+ raise 'You must trigger begin_transaction before producing messages'
118
+ end
119
+
120
+ @pending_message_queue.write(message)
121
+
122
+ nil
123
+ end
124
+
125
+ def deliver_messages
126
+ # There's no need to do anything if the buffer is empty.
127
+ return if buffer_size == 0
128
+
129
+ deliver_messages_with_retries
130
+ end
131
+
132
+ # Returns the number of messages currently held in the buffer.
133
+ #
134
+ # @return [Integer] buffer size.
135
+ def buffer_size
136
+ @pending_message_queue.size + @buffer.size
137
+ end
138
+
139
+ def buffer_bytesize
140
+ @pending_message_queue.bytesize + @buffer.bytesize
141
+ end
142
+
143
+ # Deletes all buffered messages.
144
+ #
145
+ # @return [nil]
146
+ def clear_buffer
147
+ @buffer.clear
148
+ @pending_message_queue.clear
149
+ end
150
+
151
+ # Closes all connections to the brokers.
152
+ #
153
+ # @return [nil]
154
+ def shutdown
155
+ @transaction_manager.close
156
+ @cluster.disconnect
157
+ end
158
+
159
+ def init_transactions
160
+ @transaction_manager.init_transactions
161
+ end
162
+
163
+ def begin_transaction
164
+ @transaction_manager.begin_transaction
165
+ end
166
+
167
+ def commit_transaction
168
+ @transaction_manager.commit_transaction
169
+ end
170
+
171
+ def abort_transaction
172
+ @transaction_manager.abort_transaction
173
+ end
174
+
175
+ def transaction
176
+ raise 'This method requires a block' unless block_given?
177
+ begin_transaction
178
+ yield
179
+ commit_transaction
180
+ rescue Kafka::Producer::AbortTransaction
181
+ abort_transaction
182
+ rescue
183
+ abort_transaction
184
+ raise
185
+ end
186
+
187
+ def deliver_messages_with_retries
188
+ attempt = 0
189
+
190
+ #@cluster.add_target_topics(@target_topics)
191
+
192
+ operation = ProduceOperation.new(
193
+ cluster: @cluster,
194
+ transaction_manager: @transaction_manager,
195
+ buffer: @buffer,
196
+ required_acks: @required_acks,
197
+ ack_timeout: @ack_timeout,
198
+ compressor: @compressor,
199
+ logger: @logger,
200
+ instrumenter: @instrumenter,
201
+ )
202
+
203
+ loop do
204
+ attempt += 1
205
+
206
+ begin
207
+ @cluster.refresh_metadata_if_necessary!
208
+ rescue ConnectionError => e
209
+ raise DeliveryFailed.new(e, buffer_messages)
210
+ end
211
+
212
+ assign_partitions!
213
+ operation.execute
214
+
215
+ if @required_acks.zero?
216
+ # No response is returned by the brokers, so we can't know which messages
217
+ # have been successfully written. Our only option is to assume that they all
218
+ # have.
219
+ @buffer.clear
220
+ end
221
+
222
+ if buffer_size.zero?
223
+ break
224
+ elsif attempt <= @max_retries
225
+ @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
226
+
227
+ sleep @retry_backoff
228
+ else
229
+ @logger.error "Failed to send all messages; keeping remaining messages in buffer"
230
+ break
231
+ end
232
+ end
233
+
234
+ unless @pending_message_queue.empty?
235
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
236
+ @cluster.mark_as_stale!
237
+ raise DeliveryFailed.new("Failed to assign partitions to #{@pending_message_queue.size} messages", buffer_messages)
238
+ end
239
+
240
+ unless @buffer.empty?
241
+ partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
242
+
243
+ raise DeliveryFailed.new("Failed to send messages to #{partitions}", buffer_messages)
244
+ end
245
+ end
246
+
247
+ def assign_partitions!
248
+ failed_messages = []
249
+
250
+ @pending_message_queue.each do |message|
251
+ partition = message.partition
252
+
253
+ begin
254
+ partition_count = @cluster.partitions_for(message.topic).count
255
+
256
+ if partition.nil?
257
+ partition = @partitioner.call(partition_count, message)
258
+ end
259
+
260
+ @buffer.write(
261
+ value: message.value,
262
+ key: message.key,
263
+ headers: message.headers,
264
+ topic: message.topic,
265
+ partition: partition,
266
+ create_time: message.create_time,
267
+ )
268
+ rescue Kafka::Error => e
269
+ failed_messages << message
270
+ end
271
+ end
272
+
273
+ if failed_messages.any?
274
+ failed_messages.group_by(&:topic).each do |topic, messages|
275
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
276
+ end
277
+
278
+ @cluster.mark_as_stale!
279
+ end
280
+
281
+ @pending_message_queue.replace(failed_messages)
282
+ end
283
+
284
+ def buffer_messages
285
+ messages = []
286
+
287
+ @pending_message_queue.each do |message|
288
+ messages << message
289
+ end
290
+
291
+ @buffer.each do |topic, partition, messages_for_partition|
292
+ messages_for_partition.each do |message|
293
+ messages << PendingMessage.new(
294
+ value: message.value,
295
+ key: message.key,
296
+ headers: message.headers,
297
+ topic: topic,
298
+ partition: partition,
299
+ partition_key: nil,
300
+ create_time: message.create_time
301
+ )
302
+ end
303
+ end
304
+
305
+ messages
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,268 @@
1
+ require 'fluent/output'
2
+ require 'fluent/plugin/kafka_plugin_util'
3
+
4
+ class Fluent::KafkaOutput < Fluent::Output
5
+ Fluent::Plugin.register_output('kafka', self)
6
+
7
+ config_param :brokers, :string, :default => 'localhost:9092',
8
+ :desc => <<-DESC
9
+ Set brokers directly
10
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
11
+ Note that you can choose to use either brokers or zookeeper.
12
+ DESC
13
+ config_param :zookeeper, :string, :default => nil,
14
+ :desc => "Set brokers via Zookeeper: <zookeeper_host>:<zookeeper_port>"
15
+ config_param :zookeeper_path, :string, :default => '/brokers/ids',
16
+ :desc => "Path in path for Broker id. Default to /brokers/ids"
17
+ config_param :default_topic, :string, :default => nil,
18
+ :desc => "Output topic."
19
+ config_param :default_message_key, :string, :default => nil
20
+ config_param :default_partition_key, :string, :default => nil
21
+ config_param :default_partition, :integer, :default => nil
22
+ config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
23
+ :desc => "Specify kafka patrtitioner hash algorithm"
24
+ config_param :client_id, :string, :default => 'kafka'
25
+ config_param :sasl_over_ssl, :bool, :default => true,
26
+ :desc => <<-DESC
27
+ Set to false to prevent SSL strict mode when using SASL authentication
28
+ DESC
29
+ config_param :output_data_type, :string, :default => 'json',
30
+ :desc => "Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)"
31
+ config_param :output_include_tag, :bool, :default => false
32
+ config_param :output_include_time, :bool, :default => false
33
+ config_param :exclude_partition_key, :bool, :default => false,
34
+ :desc => <<-DESC
35
+ Set true to remove partition key from data
36
+ DESC
37
+ config_param :exclude_partition, :bool, :default => false,
38
+ :desc => <<-DESC
39
+ Set true to remove partition from data
40
+ DESC
41
+
42
+ config_param :exclude_message_key, :bool, :default => false,
43
+ :desc => <<-DESC
44
+ Set true to remove message key from data
45
+ DESC
46
+ config_param :exclude_topic_key, :bool, :default => false,
47
+ :desc => <<-DESC
48
+ Set true to remove topic name key from data
49
+ DESC
50
+
51
+ # ruby-kafka producer options
52
+ config_param :max_send_retries, :integer, :default => 2,
53
+ :desc => "Number of times to retry sending of messages to a leader."
54
+ config_param :required_acks, :integer, :default => -1,
55
+ :desc => "The number of acks required per request."
56
+ config_param :ack_timeout, :integer, :default => nil,
57
+ :desc => "How long the producer waits for acks."
58
+ config_param :compression_codec, :string, :default => nil,
59
+ :desc => "The codec the producer uses to compress messages."
60
+ config_param :max_send_limit_bytes, :size, :default => nil
61
+ config_param :time_format, :string, :default => nil
62
+
63
+ config_param :max_buffer_size, :integer, :default => nil,
64
+ :desc => "Number of messages to be buffered by the kafka producer."
65
+
66
+ config_param :max_buffer_bytesize, :integer, :default => nil,
67
+ :desc => "Maximum size in bytes to be buffered."
68
+
69
+ config_param :active_support_notification_regex, :string, :default => nil,
70
+ :desc => <<-DESC
71
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
72
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
73
+ DESC
74
+
75
+ include Fluent::KafkaPluginUtil::SSLSettings
76
+ include Fluent::KafkaPluginUtil::SaslSettings
77
+
78
+ attr_accessor :output_data_type
79
+ attr_accessor :field_separator
80
+
81
+ unless method_defined?(:log)
82
+ define_method("log") { $log }
83
+ end
84
+
85
+ def initialize
86
+ super
87
+
88
+ require 'kafka'
89
+
90
+ @kafka = nil
91
+ @field_separator = nil
92
+ end
93
+
94
+ def refresh_client
95
+ if @zookeeper
96
+ @seed_brokers = []
97
+ z = Zookeeper.new(@zookeeper)
98
+ z.get_children(:path => @zookeeper_path)[:children].each do |id|
99
+ broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
100
+ if @ssl_client_cert
101
+ @seed_brokers.push(pickup_ssl_endpoint(broker))
102
+ else
103
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
104
+ end
105
+ end
106
+ z.close
107
+ log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
108
+ end
109
+ begin
110
+ if @seed_brokers.length > 0
111
+ if @scram_mechanism != nil && @username != nil && @password != nil
112
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert_file_path: @ssl_ca_cert,
113
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
114
+ sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl,
115
+ ssl_verify_hostname: @ssl_verify_hostname,
116
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
117
+ elsif @username != nil && @password != nil
118
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert_file_path: @ssl_ca_cert,
119
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
120
+ sasl_plain_username: @username, sasl_plain_password: @password, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname,
121
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
122
+ else
123
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert_file_path: @ssl_ca_cert,
124
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
125
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname,
126
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
127
+ end
128
+ log.info "initialized kafka producer: #{@client_id}"
129
+ else
130
+ log.warn "No brokers found on Zookeeper"
131
+ end
132
+ rescue Exception => e
133
+ log.error e
134
+ end
135
+ end
136
+
137
+ def configure(conf)
138
+ super
139
+
140
+ log.warn "Support of fluentd v0.12 has ended. Use kafka2 instead. kafka will be an alias of kafka2"
141
+
142
+ if @zookeeper
143
+ require 'zookeeper'
144
+ else
145
+ @seed_brokers = @brokers.split(",")
146
+ log.info "brokers has been set directly: #{@seed_brokers}"
147
+ end
148
+
149
+ if conf['ack_timeout_ms']
150
+ log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
151
+ @ack_timeout = conf['ack_timeout_ms'].to_i / 1000
152
+ end
153
+
154
+ @f_separator = case @field_separator
155
+ when /SPACE/i then ' '
156
+ when /COMMA/i then ','
157
+ when /SOH/i then "\x01"
158
+ else "\t"
159
+ end
160
+
161
+ @formatter_proc = setup_formatter(conf)
162
+
163
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
164
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
165
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
166
+ @producer_opts[:max_buffer_size] = @max_buffer_size if @max_buffer_size
167
+ @producer_opts[:max_buffer_bytesize] = @max_buffer_bytesize if @max_buffer_bytesize
168
+ if @active_support_notification_regex
169
+ require 'active_support/notifications'
170
+ require 'active_support/core_ext/hash/keys'
171
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
172
+ event = ActiveSupport::Notifications::Event.new(*args)
173
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
174
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
175
+ end
176
+ end
177
+ end
178
+
179
+ def multi_workers_ready?
180
+ true
181
+ end
182
+
183
+ def start
184
+ super
185
+ refresh_client
186
+ end
187
+
188
+ def shutdown
189
+ super
190
+ @kafka = nil
191
+ end
192
+
193
+ def setup_formatter(conf)
194
+ if @output_data_type == 'json'
195
+ require 'yajl'
196
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
197
+ elsif @output_data_type == 'ltsv'
198
+ require 'ltsv'
199
+ Proc.new { |tag, time, record| LTSV.dump(record) }
200
+ elsif @output_data_type == 'msgpack'
201
+ require 'msgpack'
202
+ Proc.new { |tag, time, record| record.to_msgpack }
203
+ elsif @output_data_type =~ /^attr:(.*)$/
204
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
205
+ @custom_attributes.unshift('time') if @output_include_time
206
+ @custom_attributes.unshift('tag') if @output_include_tag
207
+ Proc.new { |tag, time, record|
208
+ @custom_attributes.map { |attr|
209
+ record[attr].nil? ? '' : record[attr].to_s
210
+ }.join(@f_separator)
211
+ }
212
+ else
213
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
214
+ @formatter.configure(conf)
215
+ @formatter.method(:format)
216
+ end
217
+ end
218
+
219
+ def emit(tag, es, chain)
220
+ begin
221
+ chain.next
222
+
223
+ # out_kafka is mainly for testing so don't need the performance unlike out_kafka_buffered.
224
+ producer = @kafka.producer(**@producer_opts)
225
+
226
+ es.each do |time, record|
227
+ if @output_include_time
228
+ if @time_format
229
+ record['time'] = Time.at(time).strftime(@time_format)
230
+ else
231
+ record['time'] = time
232
+ end
233
+ end
234
+ record['tag'] = tag if @output_include_tag
235
+ topic = (@exclude_topic_key ? record.delete('topic') : record['topic']) || @default_topic || tag
236
+ partition_key = (@exclude_partition_key ? record.delete('partition_key') : record['partition_key']) || @default_partition_key
237
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
238
+ message_key = (@exclude_message_key ? record.delete('message_key') : record['message_key']) || @default_message_key
239
+
240
+ record_buf = @formatter_proc.call(tag, time, record)
241
+ record_buf_bytes = record_buf.bytesize
242
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
243
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
244
+ log.debug "Skipped event:", :record => record
245
+ next
246
+ end
247
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
248
+ begin
249
+ producer.produce(record_buf, topic: topic, key: message_key, partition: partition, partition_key: partition_key)
250
+ rescue Kafka::BufferOverflow => e
251
+ log.warn "BufferOverflow occurred: #{e}"
252
+ log.info "Trying to deliver the messages to prevent the buffer from overflowing again."
253
+ producer.deliver_messages
254
+ log.info "Recovered from BufferOverflow successfully`"
255
+ end
256
+ end
257
+
258
+ producer.deliver_messages
259
+ producer.shutdown
260
+ rescue Exception => e
261
+ log.warn "Send exception occurred: #{e}"
262
+ producer.shutdown if producer
263
+ refresh_client
264
+ raise e
265
+ end
266
+ end
267
+
268
+ end