roched-fluent-plugin-kafka 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +18 -0
- data/ChangeLog +94 -0
- data/Gemfile +4 -0
- data/LICENSE +14 -0
- data/README.md +244 -0
- data/Rakefile +12 -0
- data/fluent-plugin-kafka.gemspec +24 -0
- data/lib/fluent/plugin/in_kafka.rb +310 -0
- data/lib/fluent/plugin/in_kafka_group.rb +236 -0
- data/lib/fluent/plugin/kafka_plugin_util.rb +44 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +223 -0
- data/lib/fluent/plugin/out_kafka.rb +242 -0
- data/lib/fluent/plugin/out_kafka2.rb +224 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +330 -0
- data/test/helper.rb +27 -0
- data/test/plugin/test_out_kafka.rb +52 -0
- metadata +141 -0
@@ -0,0 +1,223 @@
|
|
1
|
+
require "set"
|
2
|
+
require "kafka/partitioner"
|
3
|
+
require "kafka/message_buffer"
|
4
|
+
require "kafka/produce_operation"
|
5
|
+
require "kafka/pending_message_queue"
|
6
|
+
require "kafka/pending_message"
|
7
|
+
require "kafka/compressor"
|
8
|
+
require 'kafka/producer'
|
9
|
+
|
10
|
+
# for out_kafka_buffered
|
11
|
+
module Kafka
|
12
|
+
class Producer
|
13
|
+
def produce2(value, key: nil, topic:, partition: nil, partition_key: nil)
|
14
|
+
create_time = Time.now
|
15
|
+
|
16
|
+
message = PendingMessage.new(
|
17
|
+
value,
|
18
|
+
key,
|
19
|
+
topic,
|
20
|
+
partition,
|
21
|
+
partition_key,
|
22
|
+
create_time
|
23
|
+
)
|
24
|
+
|
25
|
+
@target_topics.add(topic)
|
26
|
+
@pending_message_queue.write(message)
|
27
|
+
|
28
|
+
nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# for out_kafka2
|
34
|
+
module Kafka
|
35
|
+
class Client
|
36
|
+
def topic_producer(topic, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
|
37
|
+
compressor = Compressor.new(
|
38
|
+
codec_name: compression_codec,
|
39
|
+
threshold: compression_threshold,
|
40
|
+
instrumenter: @instrumenter,
|
41
|
+
)
|
42
|
+
|
43
|
+
TopicProducer.new(topic,
|
44
|
+
cluster: initialize_cluster,
|
45
|
+
logger: @logger,
|
46
|
+
instrumenter: @instrumenter,
|
47
|
+
compressor: compressor,
|
48
|
+
ack_timeout: ack_timeout,
|
49
|
+
required_acks: required_acks,
|
50
|
+
max_retries: max_retries,
|
51
|
+
retry_backoff: retry_backoff,
|
52
|
+
max_buffer_size: max_buffer_size,
|
53
|
+
max_buffer_bytesize: max_buffer_bytesize,
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class TopicProducer
|
59
|
+
def initialize(topic, cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
|
60
|
+
@cluster = cluster
|
61
|
+
@logger = logger
|
62
|
+
@instrumenter = instrumenter
|
63
|
+
@required_acks = required_acks == :all ? -1 : required_acks
|
64
|
+
@ack_timeout = ack_timeout
|
65
|
+
@max_retries = max_retries
|
66
|
+
@retry_backoff = retry_backoff
|
67
|
+
@max_buffer_size = max_buffer_size
|
68
|
+
@max_buffer_bytesize = max_buffer_bytesize
|
69
|
+
@compressor = compressor
|
70
|
+
|
71
|
+
@topic = topic
|
72
|
+
@cluster.add_target_topics(Set.new([topic]))
|
73
|
+
|
74
|
+
# A buffer organized by topic/partition.
|
75
|
+
@buffer = MessageBuffer.new
|
76
|
+
|
77
|
+
# Messages added by `#produce` but not yet assigned a partition.
|
78
|
+
@pending_message_queue = PendingMessageQueue.new
|
79
|
+
end
|
80
|
+
|
81
|
+
def produce(value, key, partition, partition_key)
|
82
|
+
create_time = Time.now
|
83
|
+
|
84
|
+
message = PendingMessage.new(
|
85
|
+
value,
|
86
|
+
key,
|
87
|
+
@topic,
|
88
|
+
partition,
|
89
|
+
partition_key,
|
90
|
+
create_time
|
91
|
+
)
|
92
|
+
|
93
|
+
@pending_message_queue.write(message)
|
94
|
+
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
def deliver_messages
|
99
|
+
# There's no need to do anything if the buffer is empty.
|
100
|
+
return if buffer_size == 0
|
101
|
+
|
102
|
+
deliver_messages_with_retries
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns the number of messages currently held in the buffer.
|
106
|
+
#
|
107
|
+
# @return [Integer] buffer size.
|
108
|
+
def buffer_size
|
109
|
+
@pending_message_queue.size + @buffer.size
|
110
|
+
end
|
111
|
+
|
112
|
+
def buffer_bytesize
|
113
|
+
@pending_message_queue.bytesize + @buffer.bytesize
|
114
|
+
end
|
115
|
+
|
116
|
+
# Deletes all buffered messages.
|
117
|
+
#
|
118
|
+
# @return [nil]
|
119
|
+
def clear_buffer
|
120
|
+
@buffer.clear
|
121
|
+
@pending_message_queue.clear
|
122
|
+
end
|
123
|
+
|
124
|
+
# Closes all connections to the brokers.
|
125
|
+
#
|
126
|
+
# @return [nil]
|
127
|
+
def shutdown
|
128
|
+
@cluster.disconnect
|
129
|
+
end
|
130
|
+
|
131
|
+
private
|
132
|
+
|
133
|
+
def deliver_messages_with_retries
|
134
|
+
attempt = 0
|
135
|
+
|
136
|
+
#@cluster.add_target_topics(@target_topics)
|
137
|
+
|
138
|
+
operation = ProduceOperation.new(
|
139
|
+
cluster: @cluster,
|
140
|
+
buffer: @buffer,
|
141
|
+
required_acks: @required_acks,
|
142
|
+
ack_timeout: @ack_timeout,
|
143
|
+
compressor: @compressor,
|
144
|
+
logger: @logger,
|
145
|
+
instrumenter: @instrumenter,
|
146
|
+
)
|
147
|
+
|
148
|
+
loop do
|
149
|
+
attempt += 1
|
150
|
+
|
151
|
+
@cluster.refresh_metadata_if_necessary!
|
152
|
+
|
153
|
+
assign_partitions!
|
154
|
+
operation.execute
|
155
|
+
|
156
|
+
if @required_acks.zero?
|
157
|
+
# No response is returned by the brokers, so we can't know which messages
|
158
|
+
# have been successfully written. Our only option is to assume that they all
|
159
|
+
# have.
|
160
|
+
@buffer.clear
|
161
|
+
end
|
162
|
+
|
163
|
+
if buffer_size.zero?
|
164
|
+
break
|
165
|
+
elsif attempt <= @max_retries
|
166
|
+
@logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
|
167
|
+
|
168
|
+
sleep @retry_backoff
|
169
|
+
else
|
170
|
+
@logger.error "Failed to send all messages; keeping remaining messages in buffer"
|
171
|
+
break
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
unless @pending_message_queue.empty?
|
176
|
+
# Mark the cluster as stale in order to force a cluster metadata refresh.
|
177
|
+
@cluster.mark_as_stale!
|
178
|
+
raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
|
179
|
+
end
|
180
|
+
|
181
|
+
unless @buffer.empty?
|
182
|
+
partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
|
183
|
+
|
184
|
+
raise DeliveryFailed, "Failed to send messages to #{partitions}"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def assign_partitions!
|
189
|
+
failed_messages = []
|
190
|
+
partition_count = @cluster.partitions_for(@topic).count
|
191
|
+
|
192
|
+
@pending_message_queue.each do |message|
|
193
|
+
partition = message.partition
|
194
|
+
|
195
|
+
begin
|
196
|
+
if partition.nil?
|
197
|
+
partition = Partitioner.partition_for_key(partition_count, message)
|
198
|
+
end
|
199
|
+
|
200
|
+
@buffer.write(
|
201
|
+
value: message.value,
|
202
|
+
key: message.key,
|
203
|
+
topic: message.topic,
|
204
|
+
partition: partition,
|
205
|
+
create_time: message.create_time,
|
206
|
+
)
|
207
|
+
rescue Kafka::Error => e
|
208
|
+
failed_messages << message
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
if failed_messages.any?
|
213
|
+
failed_messages.group_by(&:topic).each do |topic, messages|
|
214
|
+
@logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
|
215
|
+
end
|
216
|
+
|
217
|
+
@cluster.mark_as_stale!
|
218
|
+
end
|
219
|
+
|
220
|
+
@pending_message_queue.replace(failed_messages)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
require 'fluent/output'
|
2
|
+
require 'fluent/plugin/kafka_plugin_util'
|
3
|
+
|
4
|
+
class Fluent::KafkaOutput < Fluent::Output
|
5
|
+
Fluent::Plugin.register_output('kafka', self)
|
6
|
+
|
7
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
8
|
+
:desc => <<-DESC
|
9
|
+
Set brokers directly
|
10
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
11
|
+
Note that you can choose to use either brokers or zookeeper.
|
12
|
+
DESC
|
13
|
+
config_param :zookeeper, :string, :default => nil,
|
14
|
+
:desc => "Set brokers via Zookeeper: <zookeeper_host>:<zookeeper_port>"
|
15
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids',
|
16
|
+
:desc => "Path in path for Broker id. Default to /brokers/ids"
|
17
|
+
config_param :default_topic, :string, :default => nil,
|
18
|
+
:desc => "Output topic."
|
19
|
+
config_param :default_message_key, :string, :default => nil
|
20
|
+
config_param :default_partition_key, :string, :default => nil
|
21
|
+
config_param :default_partition, :integer, :default => nil
|
22
|
+
config_param :client_id, :string, :default => 'kafka'
|
23
|
+
config_param :output_data_type, :string, :default => 'json',
|
24
|
+
:desc => "Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)"
|
25
|
+
config_param :output_include_tag, :bool, :default => false
|
26
|
+
config_param :output_include_time, :bool, :default => false
|
27
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
28
|
+
:desc => <<-DESC
|
29
|
+
Set true to remove partition key from data
|
30
|
+
DESC
|
31
|
+
config_param :exclude_partition, :bool, :default => false,
|
32
|
+
:desc => <<-DESC
|
33
|
+
Set true to remove partition from data
|
34
|
+
DESC
|
35
|
+
|
36
|
+
config_param :exclude_message_key, :bool, :default => false,
|
37
|
+
:desc => <<-DESC
|
38
|
+
Set true to remove message key from data
|
39
|
+
DESC
|
40
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
41
|
+
:desc => <<-DESC
|
42
|
+
Set true to remove topic name key from data
|
43
|
+
DESC
|
44
|
+
|
45
|
+
# ruby-kafka producer options
|
46
|
+
config_param :max_send_retries, :integer, :default => 2,
|
47
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
48
|
+
config_param :required_acks, :integer, :default => -1,
|
49
|
+
:desc => "The number of acks required per request."
|
50
|
+
config_param :ack_timeout, :integer, :default => nil,
|
51
|
+
:desc => "How long the producer waits for acks."
|
52
|
+
config_param :compression_codec, :string, :default => nil,
|
53
|
+
:desc => "The codec the producer uses to compress messages."
|
54
|
+
|
55
|
+
config_param :time_format, :string, :default => nil
|
56
|
+
|
57
|
+
config_param :max_buffer_size, :integer, :default => nil,
|
58
|
+
:desc => "Number of messages to be buffered by the kafka producer."
|
59
|
+
|
60
|
+
config_param :max_buffer_bytesize, :integer, :default => nil,
|
61
|
+
:desc => "Maximum size in bytes to be buffered."
|
62
|
+
|
63
|
+
config_param :active_support_notification_regex, :string, :default => nil,
|
64
|
+
:desc => <<-DESC
|
65
|
+
Add a regular expression to capture ActiveSupport notifications from the Kafka client
|
66
|
+
requires activesupport gem - records will be generated under fluent_kafka_stats.**
|
67
|
+
DESC
|
68
|
+
|
69
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
70
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
71
|
+
|
72
|
+
attr_accessor :output_data_type
|
73
|
+
attr_accessor :field_separator
|
74
|
+
|
75
|
+
unless method_defined?(:log)
|
76
|
+
define_method("log") { $log }
|
77
|
+
end
|
78
|
+
|
79
|
+
def initialize
|
80
|
+
super
|
81
|
+
|
82
|
+
require 'kafka'
|
83
|
+
|
84
|
+
@kafka = nil
|
85
|
+
end
|
86
|
+
|
87
|
+
def refresh_client
|
88
|
+
if @zookeeper
|
89
|
+
@seed_brokers = []
|
90
|
+
z = Zookeeper.new(@zookeeper)
|
91
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
92
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
93
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
94
|
+
end
|
95
|
+
z.close
|
96
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
97
|
+
end
|
98
|
+
begin
|
99
|
+
if @seed_brokers.length > 0
|
100
|
+
if @scram_mechanism && @username && @password
|
101
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
102
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
103
|
+
sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism)
|
104
|
+
elseif @username && @password
|
105
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
106
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
107
|
+
sasl_plain_usernam: @username, sasl_plain_password: @password)
|
108
|
+
else
|
109
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
110
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
111
|
+
sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
|
112
|
+
end
|
113
|
+
log.info "initialized kafka producer: #{@client_id}"
|
114
|
+
else
|
115
|
+
log.warn "No brokers found on Zookeeper"
|
116
|
+
end
|
117
|
+
rescue Exception => e
|
118
|
+
log.error e
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def configure(conf)
|
123
|
+
super
|
124
|
+
|
125
|
+
if @zookeeper
|
126
|
+
require 'zookeeper'
|
127
|
+
else
|
128
|
+
@seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
|
129
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
130
|
+
end
|
131
|
+
|
132
|
+
if conf['ack_timeout_ms']
|
133
|
+
log.warn "'ack_timeout_ms' parameter is deprecated. Use second unit 'ack_timeout' instead"
|
134
|
+
@ack_timeout = conf['ack_timeout_ms'].to_i / 1000
|
135
|
+
end
|
136
|
+
|
137
|
+
@f_separator = case @field_separator
|
138
|
+
when /SPACE/i then ' '
|
139
|
+
when /COMMA/i then ','
|
140
|
+
when /SOH/i then "\x01"
|
141
|
+
else "\t"
|
142
|
+
end
|
143
|
+
|
144
|
+
@formatter_proc = setup_formatter(conf)
|
145
|
+
|
146
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
|
147
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
148
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
149
|
+
@producer_opts[:max_buffer_size] = @max_buffer_size if @max_buffer_size
|
150
|
+
@producer_opts[:max_buffer_bytesize] = @max_buffer_bytesize if @max_buffer_bytesize
|
151
|
+
if @active_support_notification_regex
|
152
|
+
require 'active_support/notifications'
|
153
|
+
require 'active_support/core_ext/hash/keys'
|
154
|
+
ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
|
155
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
156
|
+
message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
|
157
|
+
@router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def start
|
163
|
+
super
|
164
|
+
refresh_client
|
165
|
+
end
|
166
|
+
|
167
|
+
def shutdown
|
168
|
+
super
|
169
|
+
@kafka = nil
|
170
|
+
end
|
171
|
+
|
172
|
+
def setup_formatter(conf)
|
173
|
+
if @output_data_type == 'json'
|
174
|
+
require 'yajl'
|
175
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
176
|
+
elsif @output_data_type == 'ltsv'
|
177
|
+
require 'ltsv'
|
178
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
179
|
+
elsif @output_data_type == 'msgpack'
|
180
|
+
require 'msgpack'
|
181
|
+
Proc.new { |tag, time, record| record.to_msgpack }
|
182
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
183
|
+
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
|
184
|
+
@custom_attributes.unshift('time') if @output_include_time
|
185
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
186
|
+
Proc.new { |tag, time, record|
|
187
|
+
@custom_attributes.map { |attr|
|
188
|
+
record[attr].nil? ? '' : record[attr].to_s
|
189
|
+
}.join(@f_separator)
|
190
|
+
}
|
191
|
+
else
|
192
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
193
|
+
@formatter.configure(conf)
|
194
|
+
@formatter.method(:format)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def emit(tag, es, chain)
|
199
|
+
begin
|
200
|
+
chain.next
|
201
|
+
|
202
|
+
# out_kafka is mainly for testing so don't need the performance unlike out_kafka_buffered.
|
203
|
+
producer = @kafka.producer(@producer_opts)
|
204
|
+
|
205
|
+
es.each do |time, record|
|
206
|
+
if @output_include_time
|
207
|
+
if @time_format
|
208
|
+
record['time'] = Time.at(time).strftime(@time_format)
|
209
|
+
else
|
210
|
+
record['time'] = time
|
211
|
+
end
|
212
|
+
end
|
213
|
+
record['tag'] = tag if @output_include_tag
|
214
|
+
topic = (@exclude_topic_key ? record.delete('topic') : record['topic']) || @default_topic || tag
|
215
|
+
partition_key = (@exclude_partition_key ? record.delete('partition_key') : record['partition_key']) || @default_partition_key
|
216
|
+
partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
|
217
|
+
message_key = (@exclude_message_key ? record.delete('message_key') : record['message_key']) || @default_message_key
|
218
|
+
|
219
|
+
value = @formatter_proc.call(tag, time, record)
|
220
|
+
|
221
|
+
log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{value}." }
|
222
|
+
begin
|
223
|
+
producer.produce(value, topic: topic, key: message_key, partition: partition, partition_key: partition_key)
|
224
|
+
rescue Kafka::BufferOverflow => e
|
225
|
+
log.warn "BufferOverflow occurred: #{e}"
|
226
|
+
log.info "Trying to deliver the messages to prevent the buffer from overflowing again."
|
227
|
+
producer.deliver_messages
|
228
|
+
log.info "Recovered from BufferOverflow successfully`"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
producer.deliver_messages
|
233
|
+
producer.shutdown
|
234
|
+
rescue Exception => e
|
235
|
+
log.warn "Send exception occurred: #{e}"
|
236
|
+
producer.shutdown if producer
|
237
|
+
refresh_client
|
238
|
+
raise e
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
require 'fluent/plugin/output'
|
2
|
+
require 'fluent/plugin/kafka_plugin_util'
|
3
|
+
|
4
|
+
require 'kafka'
|
5
|
+
require 'fluent/plugin/kafka_producer_ext'
|
6
|
+
|
7
|
+
module Fluent::Plugin
|
8
|
+
class Fluent::Kafka2Output < Output
|
9
|
+
Fluent::Plugin.register_output('kafka2', self)
|
10
|
+
|
11
|
+
helpers :inject, :formatter
|
12
|
+
|
13
|
+
config_param :brokers, :array, :value_type => :string, :default => ['localhost:9092'],
|
14
|
+
:desc => <<-DESC
|
15
|
+
Set brokers directly:
|
16
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
17
|
+
DESC
|
18
|
+
config_param :default_topic, :string, :default => nil,
|
19
|
+
:desc => "Default output topic when record doesn't have topic field"
|
20
|
+
config_param :default_message_key, :string, :default => nil
|
21
|
+
config_param :default_partition_key, :string, :default => nil
|
22
|
+
config_param :default_partition, :integer, :default => nil
|
23
|
+
config_param :client_id, :string, :default => 'fluentd'
|
24
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
25
|
+
:desc => 'Set true to remove partition key from data'
|
26
|
+
config_param :exclude_partition, :bool, :default => false,
|
27
|
+
:desc => 'Set true to remove partition from data'
|
28
|
+
config_param :exclude_message_key, :bool, :default => false,
|
29
|
+
:desc => 'Set true to remove partition key from data'
|
30
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
31
|
+
:desc => 'Set true to remove topic name key from data'
|
32
|
+
|
33
|
+
config_param :get_kafka_client_log, :bool, :default => false
|
34
|
+
|
35
|
+
# ruby-kafka producer options
|
36
|
+
config_param :max_send_retries, :integer, :default => 2,
|
37
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
38
|
+
config_param :required_acks, :integer, :default => -1,
|
39
|
+
:desc => "The number of acks required per request."
|
40
|
+
config_param :ack_timeout, :time, :default => nil,
|
41
|
+
:desc => "How long the producer waits for acks."
|
42
|
+
config_param :compression_codec, :string, :default => nil,
|
43
|
+
:desc => <<-DESC
|
44
|
+
The codec the producer uses to compress messages.
|
45
|
+
Supported codecs: (gzip|snappy)
|
46
|
+
DESC
|
47
|
+
|
48
|
+
config_param :active_support_notification_regex, :string, :default => nil,
|
49
|
+
:desc => <<-DESC
|
50
|
+
Add a regular expression to capture ActiveSupport notifications from the Kafka client
|
51
|
+
requires activesupport gem - records will be generated under fluent_kafka_stats.**
|
52
|
+
DESC
|
53
|
+
|
54
|
+
config_section :buffer do
|
55
|
+
config_set_default :chunk_keys, ["topic"]
|
56
|
+
end
|
57
|
+
config_section :format do
|
58
|
+
config_set_default :@type, 'json'
|
59
|
+
end
|
60
|
+
|
61
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
62
|
+
include Fluent::KafkaPluginUtil::SaslSettings
|
63
|
+
|
64
|
+
def initialize
|
65
|
+
super
|
66
|
+
|
67
|
+
@kafka = nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def refresh_client(raise_error = true)
|
71
|
+
begin
|
72
|
+
logger = @get_kafka_client_log ? log : nil
|
73
|
+
if @scram_mechanism && @username && @password
|
74
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
75
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
76
|
+
sasl_scram_username: @username, sasl_scram_password: @password, sasl_scram_mechanism: @scram_mechanism)
|
77
|
+
elseif @username && @password
|
78
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
79
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
80
|
+
sasl_plain_username: @username, sasl_plain_password: @password)
|
81
|
+
else
|
82
|
+
@kafka = Kafka.new(seed_brokers: @seed_brokers, client_id: @client_id, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
83
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key),
|
84
|
+
sasl_gssapi_principal: @principal, sasl_gssapi_keytab: @keytab)
|
85
|
+
end
|
86
|
+
log.info "initialized kafka producer: #{@client_id}"
|
87
|
+
rescue Exception => e
|
88
|
+
if raise_error # During startup, error should be reported to engine and stop its phase for safety.
|
89
|
+
raise e
|
90
|
+
else
|
91
|
+
log.error e
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def configure(conf)
|
97
|
+
super
|
98
|
+
|
99
|
+
if @brokers.size > 0
|
100
|
+
log.info "brokers has been set: #{@brokers}"
|
101
|
+
else
|
102
|
+
raise Fluent::Config, 'No brokers specified. Need one broker at least.'
|
103
|
+
end
|
104
|
+
|
105
|
+
formatter_conf = conf.elements('format').first
|
106
|
+
unless formatter_conf
|
107
|
+
raise Fluent::ConfigError, "<format> section is required."
|
108
|
+
end
|
109
|
+
unless formatter_conf["@type"]
|
110
|
+
raise Fluent::ConfigError, "format/@type is required."
|
111
|
+
end
|
112
|
+
@formatter_proc = setup_formatter(formatter_conf)
|
113
|
+
|
114
|
+
if @default_topic.nil?
|
115
|
+
if @chunk_keys.include?('topic') && !@chunk_keys.include?('tag')
|
116
|
+
log.warn "Use 'topic' field of event record for topic but no fallback. Recommend to set default_topic or set 'tag' in buffer chunk keys like <buffer topic,tag>"
|
117
|
+
end
|
118
|
+
else
|
119
|
+
if @chunk_keys.include?('tag')
|
120
|
+
log.warn "default_topic is set. Fluentd's event tag is not used for topic"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
|
125
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
126
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
127
|
+
if @active_support_notification_regex
|
128
|
+
require 'active_support/notifications'
|
129
|
+
require 'active_support/core_ext/hash/keys'
|
130
|
+
ActiveSupport::Notifications.subscribe(Regexp.new(@active_support_notification_regex)) do |*args|
|
131
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
132
|
+
message = event.payload.respond_to?(:stringify_keys) ? event.payload.stringify_keys : event.payload
|
133
|
+
@router.emit("fluent_kafka_stats.#{event.name}", Time.now.to_i, message)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def multi_workers_ready?
|
139
|
+
true
|
140
|
+
end
|
141
|
+
|
142
|
+
def start
|
143
|
+
super
|
144
|
+
refresh_client
|
145
|
+
end
|
146
|
+
|
147
|
+
def close
|
148
|
+
super
|
149
|
+
@kafka.close if @kafka
|
150
|
+
end
|
151
|
+
|
152
|
+
def terminate
|
153
|
+
super
|
154
|
+
@kafka = nil
|
155
|
+
end
|
156
|
+
|
157
|
+
def setup_formatter(conf)
|
158
|
+
type = conf['@type']
|
159
|
+
case type
|
160
|
+
when 'json'
|
161
|
+
begin
|
162
|
+
require 'oj'
|
163
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
164
|
+
Proc.new { |tag, time, record| Oj.dump(record) }
|
165
|
+
rescue LoadError
|
166
|
+
require 'yajl'
|
167
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
168
|
+
end
|
169
|
+
when 'ltsv'
|
170
|
+
require 'ltsv'
|
171
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
172
|
+
else
|
173
|
+
@formatter = formatter_create(usage: 'kafka-plugin', conf: conf)
|
174
|
+
@formatter.method(:format)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# TODO: optimize write performance
|
179
|
+
def write(chunk)
|
180
|
+
tag = chunk.metadata.tag
|
181
|
+
topic = chunk.metadata.variables[:topic] || @default_topic || tag
|
182
|
+
producer = @kafka.topic_producer(topic, @producer_opts)
|
183
|
+
|
184
|
+
messages = 0
|
185
|
+
record_buf = nil
|
186
|
+
|
187
|
+
begin
|
188
|
+
chunk.msgpack_each { |time, record|
|
189
|
+
begin
|
190
|
+
record = inject_values_to_record(tag, time, record)
|
191
|
+
record.delete('topic'.freeze) if @exclude_topic_key
|
192
|
+
partition_key = (@exclude_partition_key ? record.delete('partition_key'.freeze) : record['partition_key'.freeze]) || @default_partition_key
|
193
|
+
partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
|
194
|
+
message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
|
195
|
+
|
196
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
197
|
+
rescue StandardError => e
|
198
|
+
log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
|
199
|
+
next
|
200
|
+
end
|
201
|
+
|
202
|
+
log.trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
|
203
|
+
messages += 1
|
204
|
+
|
205
|
+
producer.produce(record_buf, message_key, partition, partition_key)
|
206
|
+
}
|
207
|
+
|
208
|
+
if messages > 0
|
209
|
+
log.debug { "#{messages} messages send." }
|
210
|
+
producer.deliver_messages
|
211
|
+
end
|
212
|
+
end
|
213
|
+
rescue Exception => e
|
214
|
+
log.warn "Send exception occurred: #{e}"
|
215
|
+
log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
|
216
|
+
# For safety, refresh client and its producers
|
217
|
+
refresh_client(false)
|
218
|
+
# Raise exception to retry sendind messages
|
219
|
+
raise e
|
220
|
+
ensure
|
221
|
+
producer.shutdown if producer
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|