fluent-plugin-kafka-xst 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE/bug_report.yaml +72 -0
  3. data/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.yaml +39 -0
  5. data/.github/dependabot.yml +6 -0
  6. data/.github/workflows/linux.yml +45 -0
  7. data/.github/workflows/stale-actions.yml +24 -0
  8. data/.gitignore +2 -0
  9. data/ChangeLog +344 -0
  10. data/Gemfile +6 -0
  11. data/LICENSE +14 -0
  12. data/README.md +594 -0
  13. data/Rakefile +12 -0
  14. data/ci/prepare-kafka-server.sh +33 -0
  15. data/examples/README.md +3 -0
  16. data/examples/out_kafka2/dynamic_topic_based_on_tag.conf +32 -0
  17. data/examples/out_kafka2/protobuf-formatter.conf +23 -0
  18. data/examples/out_kafka2/record_key.conf +31 -0
  19. data/fluent-plugin-kafka.gemspec +27 -0
  20. data/lib/fluent/plugin/in_kafka.rb +388 -0
  21. data/lib/fluent/plugin/in_kafka_group.rb +394 -0
  22. data/lib/fluent/plugin/in_rdkafka_group.rb +305 -0
  23. data/lib/fluent/plugin/kafka_plugin_util.rb +84 -0
  24. data/lib/fluent/plugin/kafka_producer_ext.rb +308 -0
  25. data/lib/fluent/plugin/out_kafka.rb +268 -0
  26. data/lib/fluent/plugin/out_kafka2.rb +427 -0
  27. data/lib/fluent/plugin/out_kafka_buffered.rb +374 -0
  28. data/lib/fluent/plugin/out_rdkafka.rb +324 -0
  29. data/lib/fluent/plugin/out_rdkafka2.rb +526 -0
  30. data/test/helper.rb +34 -0
  31. data/test/plugin/test_in_kafka.rb +66 -0
  32. data/test/plugin/test_in_kafka_group.rb +69 -0
  33. data/test/plugin/test_kafka_plugin_util.rb +44 -0
  34. data/test/plugin/test_out_kafka.rb +68 -0
  35. data/test/plugin/test_out_kafka2.rb +138 -0
  36. data/test/plugin/test_out_kafka_buffered.rb +68 -0
  37. data/test/plugin/test_out_rdkafka2.rb +182 -0
  38. metadata +214 -0
@@ -0,0 +1,84 @@
1
+ module Fluent
2
+ module KafkaPluginUtil
3
+ module AwsIamSettings
4
+ def self.included(klass)
5
+ klass.instance_eval do
6
+ config_param :sasl_aws_msk_iam_access_key_id, :string, :default => nil, secret: true,
7
+ desc: "AWS access key Id for IAM authentication to MSK."
8
+ config_param :sasl_aws_msk_iam_secret_key_id, :string, :default => nil, secret: true,
9
+ desc: "AWS access key secret for IAM authentication to MSK."
10
+ config_param :sasl_aws_msk_iam_aws_region, :string, :default => nil,
11
+ desc: "AWS region for IAM authentication to MSK."
12
+ end
13
+ end
14
+ end
15
+
16
+ module SSLSettings
17
+ def self.included(klass)
18
+ klass.instance_eval {
19
+ # https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl
20
+ config_param :ssl_ca_cert, :array, :value_type => :string, :default => nil,
21
+ :desc => "a PEM encoded CA cert to use with and SSL connection."
22
+ config_param :ssl_client_cert, :string, :default => nil,
23
+ :desc => "a PEM encoded client cert to use with and SSL connection. Must be used in combination with ssl_client_cert_key."
24
+ config_param :ssl_client_cert_key, :string, :default => nil,
25
+ :desc => "a PEM encoded client cert key to use with and SSL connection. Must be used in combination with ssl_client_cert."
26
+ config_param :ssl_client_cert_key_password, :string, :default => nil, secret: true,
27
+ :desc => "a PEM encoded client cert key password to use with SSL connection."
28
+ config_param :ssl_client_cert_chain, :string, :default => nil,
29
+ :desc => "an extra PEM encoded cert to use with and SSL connection."
30
+ config_param :ssl_ca_certs_from_system, :bool, :default => false,
31
+ :desc => "this configures the store to look up CA certificates from the system default certificate store on an as needed basis. The location of the store can usually be determined by: OpenSSL::X509::DEFAULT_CERT_FILE."
32
+ config_param :ssl_verify_hostname, :bool, :default => true,
33
+ :desc => "this configures whether hostname of certificate should be verified or not."
34
+ }
35
+ end
36
+
37
+ DummyFormatter = Object.new
38
+
39
+ def start
40
+ super
41
+
42
+ # This is bad point here but easy to fix for all kafka plugins
43
+ unless log.respond_to?(:formatter)
44
+ def log.formatter
45
+ Fluent::KafkaPluginUtil::SSLSettings::DummyFormatter
46
+ end
47
+ end
48
+ end
49
+
50
+ def read_ssl_file(path)
51
+ return nil if path.nil? || path.respond_to?(:strip) && path.strip.empty?
52
+
53
+ if path.is_a?(Array)
54
+ path.map { |fp| File.read(fp) }
55
+ else
56
+ File.read(path)
57
+ end
58
+ end
59
+
60
+ def pickup_ssl_endpoint(node)
61
+ ssl_endpoint = node['endpoints'].find {|e| e.start_with?('SSL')}
62
+ raise 'no SSL endpoint found on Zookeeper' unless ssl_endpoint
63
+ return [URI.parse(ssl_endpoint).host, URI.parse(ssl_endpoint).port].join(':')
64
+ end
65
+ end
66
+
67
+ module SaslSettings
68
+ def self.included(klass)
69
+ klass.instance_eval {
70
+ config_param :principal, :string, :default => nil,
71
+ :desc => "a Kerberos principal to use with SASL authentication (GSSAPI)."
72
+ config_param :keytab, :string, :default => nil,
73
+ :desc => "a filepath to Kerberos keytab. Must be used with principal."
74
+ config_param :username, :string, :default => nil,
75
+ :desc => "a username when using PLAIN/SCRAM SASL authentication"
76
+ config_param :password, :string, :default => nil, secret: true,
77
+ :desc => "a password when using PLAIN/SCRAM SASL authentication"
78
+ config_param :scram_mechanism, :string, :default => nil,
79
+ :desc => "if set, use SCRAM authentication with specified mechanism. When unset, default to PLAIN authentication"
80
+ }
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,308 @@
1
+ require "set"
2
+ require "kafka/partitioner"
3
+ require "kafka/message_buffer"
4
+ require "kafka/produce_operation"
5
+ require "kafka/pending_message_queue"
6
+ require "kafka/pending_message"
7
+ require "kafka/compressor"
8
+ require 'kafka/producer'
9
+
10
+ # for out_kafka_buffered
11
+ module Kafka
12
+ EMPTY_HEADER = {}
13
+
14
+ class Producer
15
+ def produce_for_buffered(value, key: nil, topic:, partition: nil, partition_key: nil, create_time: Time.now)
16
+ message = PendingMessage.new(
17
+ value: value,
18
+ key: key,
19
+ headers: EMPTY_HEADER,
20
+ topic: topic,
21
+ partition: partition,
22
+ partition_key: partition_key,
23
+ create_time: create_time
24
+ )
25
+
26
+ # If the producer is in transactional mode, all the message production
27
+ # must be used when the producer is currently in transaction
28
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
29
+ raise 'You must trigger begin_transaction before producing messages'
30
+ end
31
+
32
+ @target_topics.add(topic)
33
+ @pending_message_queue.write(message)
34
+
35
+ nil
36
+ end
37
+ end
38
+ end
39
+
40
+ # for out_kafka2
41
+ # Majority (if not all) of this code is lifted from https://github.com/zendesk/ruby-kafka/blob/master/lib/kafka/producer.rb
42
+ # with the main difference where we have removed any checks regarding max_buffer_bytesize and max_buffer_size
43
+ # The reason for doing this is to provide a better UX for our users where they only need to set those bounds in
44
+ # the Buffer section using `chunk_limit_size` and `chunk_limit_records`.
45
+ #
46
+ # We should reconsider this in the future in case the `ruby-kafka` library drastically changes its internal.
47
+ module Kafka
48
+ class Client
49
+ def custom_producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000, idempotent: false, transactional: false, transactional_id: nil, transactional_timeout: 60)
50
+ cluster = initialize_cluster
51
+ compressor = Compressor.new(
52
+ codec_name: compression_codec,
53
+ threshold: compression_threshold,
54
+ instrumenter: @instrumenter,
55
+ )
56
+
57
+ transaction_manager = TransactionManager.new(
58
+ cluster: cluster,
59
+ logger: @logger,
60
+ idempotent: idempotent,
61
+ transactional: transactional,
62
+ transactional_id: transactional_id,
63
+ transactional_timeout: transactional_timeout,
64
+ )
65
+
66
+ CustomProducer.new(cluster: cluster,
67
+ transaction_manager: transaction_manager,
68
+ logger: @logger,
69
+ instrumenter: @instrumenter,
70
+ compressor: compressor,
71
+ ack_timeout: ack_timeout,
72
+ required_acks: required_acks,
73
+ max_retries: max_retries,
74
+ retry_backoff: retry_backoff,
75
+ max_buffer_size: max_buffer_size,
76
+ max_buffer_bytesize: max_buffer_bytesize,
77
+ partitioner: @partitioner,
78
+ )
79
+ end
80
+ end
81
+
82
+ class CustomProducer
83
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:, partitioner:)
84
+ @cluster = cluster
85
+ @transaction_manager = transaction_manager
86
+ @logger = logger
87
+ @instrumenter = instrumenter
88
+ @required_acks = required_acks == :all ? -1 : required_acks
89
+ @ack_timeout = ack_timeout
90
+ @max_retries = max_retries
91
+ @retry_backoff = retry_backoff
92
+ @max_buffer_size = max_buffer_size
93
+ @max_buffer_bytesize = max_buffer_bytesize
94
+ @compressor = compressor
95
+ @partitioner = partitioner
96
+ # A buffer organized by topic/partition.
97
+ @buffer = MessageBuffer.new
98
+
99
+ # Messages added by `#produce` but not yet assigned a partition.
100
+ @pending_message_queue = PendingMessageQueue.new
101
+ end
102
+
103
+ def produce(value, key: nil, partition: nil, partition_key: nil, headers: EMPTY_HEADER, create_time: Time.now, topic: nil)
104
+ message = PendingMessage.new(
105
+ value: value,
106
+ key: key,
107
+ headers: headers,
108
+ topic: topic,
109
+ partition: partition,
110
+ partition_key: partition_key,
111
+ create_time: create_time
112
+ )
113
+
114
+ # If the producer is in transactional mode, all the message production
115
+ # must be used when the producer is currently in transaction
116
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
117
+ raise 'You must trigger begin_transaction before producing messages'
118
+ end
119
+
120
+ @pending_message_queue.write(message)
121
+
122
+ nil
123
+ end
124
+
125
+ def deliver_messages
126
+ # There's no need to do anything if the buffer is empty.
127
+ return if buffer_size == 0
128
+
129
+ deliver_messages_with_retries
130
+ end
131
+
132
+ # Returns the number of messages currently held in the buffer.
133
+ #
134
+ # @return [Integer] buffer size.
135
+ def buffer_size
136
+ @pending_message_queue.size + @buffer.size
137
+ end
138
+
139
+ def buffer_bytesize
140
+ @pending_message_queue.bytesize + @buffer.bytesize
141
+ end
142
+
143
+ # Deletes all buffered messages.
144
+ #
145
+ # @return [nil]
146
+ def clear_buffer
147
+ @buffer.clear
148
+ @pending_message_queue.clear
149
+ end
150
+
151
+ # Closes all connections to the brokers.
152
+ #
153
+ # @return [nil]
154
+ def shutdown
155
+ @transaction_manager.close
156
+ @cluster.disconnect
157
+ end
158
+
159
+ def init_transactions
160
+ @transaction_manager.init_transactions
161
+ end
162
+
163
+ def begin_transaction
164
+ @transaction_manager.begin_transaction
165
+ end
166
+
167
+ def commit_transaction
168
+ @transaction_manager.commit_transaction
169
+ end
170
+
171
+ def abort_transaction
172
+ @transaction_manager.abort_transaction
173
+ end
174
+
175
+ def transaction
176
+ raise 'This method requires a block' unless block_given?
177
+ begin_transaction
178
+ yield
179
+ commit_transaction
180
+ rescue Kafka::Producer::AbortTransaction
181
+ abort_transaction
182
+ rescue
183
+ abort_transaction
184
+ raise
185
+ end
186
+
187
+ def deliver_messages_with_retries
188
+ attempt = 0
189
+
190
+ #@cluster.add_target_topics(@target_topics)
191
+
192
+ operation = ProduceOperation.new(
193
+ cluster: @cluster,
194
+ transaction_manager: @transaction_manager,
195
+ buffer: @buffer,
196
+ required_acks: @required_acks,
197
+ ack_timeout: @ack_timeout,
198
+ compressor: @compressor,
199
+ logger: @logger,
200
+ instrumenter: @instrumenter,
201
+ )
202
+
203
+ loop do
204
+ attempt += 1
205
+
206
+ begin
207
+ @cluster.refresh_metadata_if_necessary!
208
+ rescue ConnectionError => e
209
+ raise DeliveryFailed.new(e, buffer_messages)
210
+ end
211
+
212
+ assign_partitions!
213
+ operation.execute
214
+
215
+ if @required_acks.zero?
216
+ # No response is returned by the brokers, so we can't know which messages
217
+ # have been successfully written. Our only option is to assume that they all
218
+ # have.
219
+ @buffer.clear
220
+ end
221
+
222
+ if buffer_size.zero?
223
+ break
224
+ elsif attempt <= @max_retries
225
+ @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
226
+
227
+ sleep @retry_backoff
228
+ else
229
+ @logger.error "Failed to send all messages; keeping remaining messages in buffer"
230
+ break
231
+ end
232
+ end
233
+
234
+ unless @pending_message_queue.empty?
235
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
236
+ @cluster.mark_as_stale!
237
+ raise DeliveryFailed.new("Failed to assign partitions to #{@pending_message_queue.size} messages", buffer_messages)
238
+ end
239
+
240
+ unless @buffer.empty?
241
+ partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
242
+
243
+ raise DeliveryFailed.new("Failed to send messages to #{partitions}", buffer_messages)
244
+ end
245
+ end
246
+
247
+ def assign_partitions!
248
+ failed_messages = []
249
+
250
+ @pending_message_queue.each do |message|
251
+ partition = message.partition
252
+
253
+ begin
254
+ partition_count = @cluster.partitions_for(message.topic).count
255
+
256
+ if partition.nil?
257
+ partition = @partitioner.call(partition_count, message)
258
+ end
259
+
260
+ @buffer.write(
261
+ value: message.value,
262
+ key: message.key,
263
+ headers: message.headers,
264
+ topic: message.topic,
265
+ partition: partition,
266
+ create_time: message.create_time,
267
+ )
268
+ rescue Kafka::Error => e
269
+ failed_messages << message
270
+ end
271
+ end
272
+
273
+ if failed_messages.any?
274
+ failed_messages.group_by(&:topic).each do |topic, messages|
275
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
276
+ end
277
+
278
+ @cluster.mark_as_stale!
279
+ end
280
+
281
+ @pending_message_queue.replace(failed_messages)
282
+ end
283
+
284
+ def buffer_messages
285
+ messages = []
286
+
287
+ @pending_message_queue.each do |message|
288
+ messages << message
289
+ end
290
+
291
+ @buffer.each do |topic, partition, messages_for_partition|
292
+ messages_for_partition.each do |message|
293
+ messages << PendingMessage.new(
294
+ value: message.value,
295
+ key: message.key,
296
+ headers: message.headers,
297
+ topic: topic,
298
+ partition: partition,
299
+ partition_key: nil,
300
+ create_time: message.create_time
301
+ )
302
+ end
303
+ end
304
+
305
+ messages
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,268 @@
1
+ require 'fluent/output'
2
+ require 'fluent/plugin/kafka_plugin_util'
3
+
4
+ class Fluent::KafkaOutput < Fluent::Output
5
+ Fluent::Plugin.register_output('kafka', self)
6
+
7
+ config_param :brokers, :string, :default => 'localhost:9092',
8
+ :desc => <<-DESC
9
+ Set brokers directly
10
+ <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
11
+ Note that you can choose to use either brokers or zookeeper.
12
+ DESC
13
+ config_param :zookeeper, :string, :default => nil,
14
+ :desc => "Set brokers via Zookeeper: <zookeeper_host>:<zookeeper_port>"
15
+ config_param :zookeeper_path, :string, :default => '/brokers/ids',
16
+ :desc => "Path in path for Broker id. Default to /brokers/ids"
17
+ config_param :default_topic, :string, :default => nil,
18
+ :desc => "Output topic."
19
+ config_param :default_message_key, :string, :default => nil
20
+ config_param :default_partition_key, :string, :default => nil
21
+ config_param :default_partition, :integer, :default => nil
22
+ config_param :partitioner_hash_function, :enum, list: [:crc32, :murmur2], :default => :crc32,
23
+ :desc => "Specify kafka patrtitioner hash algorithm"
24
+ config_param :client_id, :string, :default => 'kafka'
25
+ config_param :sasl_over_ssl, :bool, :default => true,
26
+ :desc => <<-DESC
27
+ Set to false to prevent SSL strict mode when using SASL authentication
28
+ DESC
29
+ config_param :output_data_type, :string, :default => 'json',
30
+ :desc => "Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)"
31
+ config_param :output_include_tag, :bool, :default => false
32
+ config_param :output_include_time, :bool, :default => false
33
+ config_param :exclude_partition_key, :bool, :default => false,
34
+ :desc => <<-DESC
35
+ Set true to remove partition key from data
36
+ DESC
37
+ config_param :exclude_partition, :bool, :default => false,
38
+ :desc => <<-DESC
39
+ Set true to remove partition from data
40
+ DESC
41
+
42
+ config_param :exclude_message_key, :bool, :default => false,
43
+ :desc => <<-DESC
44
+ Set true to remove message key from data
45
+ DESC
46
+ config_param :exclude_topic_key, :bool, :default => false,
47
+ :desc => <<-DESC
48
+ Set true to remove topic name key from data
49
+ DESC
50
+
51
+ # ruby-kafka producer options
52
+ config_param :max_send_retries, :integer, :default => 2,
53
+ :desc => "Number of times to retry sending of messages to a leader."
54
+ config_param :required_acks, :integer, :default => -1,
55
+ :desc => "The number of acks required per request."
56
+ config_param :ack_timeout, :integer, :default => nil,
57
+ :desc => "How long the producer waits for acks."
58
+ config_param :compression_codec, :string, :default => nil,
59
+ :desc => "The codec the producer uses to compress messages."
60
+ config_param :max_send_limit_bytes, :size, :default => nil
61
+ config_param :time_format, :string, :default => nil
62
+
63
+ config_param :max_buffer_size, :integer, :default => nil,
64
+ :desc => "Number of messages to be buffered by the kafka producer."
65
+
66
+ config_param :max_buffer_bytesize, :integer, :default => nil,
67
+ :desc => "Maximum size in bytes to be buffered."
68
+
69
+ config_param :active_support_notification_regex, :string, :default => nil,
70
+ :desc => <<-DESC
71
+ Add a regular expression to capture ActiveSupport notifications from the Kafka client
72
+ requires activesupport gem - records will be generated under fluent_kafka_stats.**
73
+ DESC
74
+
75
+ include Fluent::KafkaPluginUtil::SSLSettings
76
+ include Fluent::KafkaPluginUtil::SaslSettings
77
+
78
+ attr_accessor :output_data_type
79
+ attr_accessor :field_separator
80
+
81
+ unless method_defined?(:log)
82
+ define_method("log") { $log }
83
+ end
84
+
85
+ def initialize
86
+ super
87
+
88
+ require 'kafka'
89
+
90
+ @kafka = nil
91
+ @field_separator = nil
92
+ end
93
+
94
+ def refresh_client
95
+ if @zookeeper
96
+ @seed_brokers = []
97
+ z = Zookeeper.new(@zookeeper)
98
+ z.get_children(:path => @zookeeper_path)[:children].each do |id|
99
+ broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
100
+ if @ssl_client_cert
101
+ @seed_brokers.push(pickup_ssl_endpoint(broker))
102
+ else
103
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
104
+ end
105
+ end
106
+ z.close
107
+ log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
108
+ end
109
+ begin
110
+ if @seed_brokers.length > 0
111
+ if @scram_mechanism != nil && @username != nil && @password != nil
112
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert_file_path: @ssl_ca_cert,
113
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
114
+ sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism, sasl_over_ssl: @sasl_over_ssl,
115
+ ssl_verify_hostname: @ssl_verify_hostname,
116
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
117
+ elsif @username != nil && @password != nil
118
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert_file_path: @ssl_ca_cert,
119
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
120
+ sasl_plain_username: @username, sasl_plain_password: @password, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname,
121
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
122
+ else
123
+ @kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert_file_path: @ssl_ca_cert,
124
+ ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key), ssl_ca_certs_from_system: @ssl_ca_certs_from_system,
125
+ sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab, sasl_over_ssl: @sasl_over_ssl, ssl_verify_hostname: @ssl_verify_hostname,
126
+ partitioner: Kafka::Partitioner.new(hash_function: @partitioner_hash_function))
127
+ end
128
+ log.info "initialized kafka producer: #{@client_id}"
129
+ else
130
+ log.warn "No brokers found on Zookeeper"
131
+ end
132
+ rescue Exception => e
133
+ log.error e
134
+ end
135
+ end
136
+
137
+ def configure(conf)
138
+ super
139
+
140
+ log.warn "Support of fluentd v0.12 has ended. Use kafka2 instead. kafka will be an alias of kafka2"
141
+
142
+ if @zookeeper
143
+ require 'zookeeper'
144
+ else
145
+ @seed_brokers = @brokers.split(",")
146
+ log.info "brokers has been set directly: #{@seed_brokers}"
147
+ end
148
+
149
+ if conf['ack_timeout_ms']
150
+ log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
151
+ @ack_timeout = conf['ack_timeout_ms'].to_i / 1000
152
+ end
153
+
154
+ @f_separator = case @field_separator
155
+ when /SPACE/i then ' '
156
+ when /COMMA/i then ','
157
+ when /SOH/i then "\x01"
158
+ else "\t"
159
+ end
160
+
161
+ @formatter_proc = setup_formatter(conf)
162
+
163
+ @producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
164
+ @producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
165
+ @producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
166
+ @producer_opts[:max_buffer_size] = @max_buffer_size if @max_buffer_size
167
+ @producer_opts[:max_buffer_bytesize] = @max_buffer_bytesize if @max_buffer_bytesize
168
+ if @active_support_notification_regex
169
+ require 'active_support/notifications'
170
+ require 'active_support/core_ext/hash/keys'
171
+ ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
172
+ event = ActiveSupport::Notifications::Event.new(*args)
173
+ message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
174
+ @router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
175
+ end
176
+ end
177
+ end
178
+
179
+ def multi_workers_ready?
180
+ true
181
+ end
182
+
183
+ def start
184
+ super
185
+ refresh_client
186
+ end
187
+
188
+ def shutdown
189
+ super
190
+ @kafka = nil
191
+ end
192
+
193
+ def setup_formatter(conf)
194
+ if @output_data_type == 'json'
195
+ require 'yajl'
196
+ Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
197
+ elsif @output_data_type == 'ltsv'
198
+ require 'ltsv'
199
+ Proc.new { |tag, time, record| LTSV.dump(record) }
200
+ elsif @output_data_type == 'msgpack'
201
+ require 'msgpack'
202
+ Proc.new { |tag, time, record| record.to_msgpack }
203
+ elsif @output_data_type =~ /^attr:(.*)$/
204
+ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
205
+ @custom_attributes.unshift('time') if @output_include_time
206
+ @custom_attributes.unshift('tag') if @output_include_tag
207
+ Proc.new { |tag, time, record|
208
+ @custom_attributes.map { |attr|
209
+ record[attr].nil? ? '' : record[attr].to_s
210
+ }.join(@f_separator)
211
+ }
212
+ else
213
+ @formatter = Fluent::Plugin.new_formatter(@output_data_type)
214
+ @formatter.configure(conf)
215
+ @formatter.method(:format)
216
+ end
217
+ end
218
+
219
+ def emit(tag, es, chain)
220
+ begin
221
+ chain.next
222
+
223
+ # out_kafka is mainly for testing so don't need the performance unlike out_kafka_buffered.
224
+ producer = @kafka.producer(**@producer_opts)
225
+
226
+ es.each do |time, record|
227
+ if @output_include_time
228
+ if @time_format
229
+ record['time'] = Time.at(time).strftime(@time_format)
230
+ else
231
+ record['time'] = time
232
+ end
233
+ end
234
+ record['tag'] = tag if @output_include_tag
235
+ topic = (@exclude_topic_key ? record.delete('topic') : record['topic']) || @default_topic || tag
236
+ partition_key = (@exclude_partition_key ? record.delete('partition_key') : record['partition_key']) || @default_partition_key
237
+ partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
238
+ message_key = (@exclude_message_key ? record.delete('message_key') : record['message_key']) || @default_message_key
239
+
240
+ record_buf = @formatter_proc.call(tag, time, record)
241
+ record_buf_bytes = record_buf.bytesize
242
+ if @max_send_limit_bytes && record_buf_bytes > @max_send_limit_bytes
243
+ log.warn "record size exceeds max_send_limit_bytes. Skip event:", :time => time, :record_size => record_buf_bytes
244
+ log.debug "Skipped event:", :record => record
245
+ next
246
+ end
247
+ log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
248
+ begin
249
+ producer.produce(record_buf, topic: topic, key: message_key, partition: partition, partition_key: partition_key)
250
+ rescue Kafka::BufferOverflow => e
251
+ log.warn "BufferOverflow occurred: #{e}"
252
+ log.info "Trying to deliver the messages to prevent the buffer from overflowing again."
253
+ producer.deliver_messages
254
+ log.info "Recovered from BufferOverflow successfully`"
255
+ end
256
+ end
257
+
258
+ producer.deliver_messages
259
+ producer.shutdown
260
+ rescue Exception => e
261
+ log.warn "Send exception occurred: #{e}"
262
+ producer.shutdown if producer
263
+ refresh_client
264
+ raise e
265
+ end
266
+ end
267
+
268
+ end