fluent-plugin-kafka-enchanced 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.travis.yml +17 -0
- data/ChangeLog +49 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +221 -0
- data/Rakefile +12 -0
- data/fluent-plugin-kafka.gemspec +23 -0
- data/lib/fluent/plugin/in_kafka.rb +308 -0
- data/lib/fluent/plugin/in_kafka_group.rb +218 -0
- data/lib/fluent/plugin/kafka_plugin_util.rb +22 -0
- data/lib/fluent/plugin/kafka_producer_ext.rb +225 -0
- data/lib/fluent/plugin/out_kafka.rb +200 -0
- data/lib/fluent/plugin/out_kafka2.rb +187 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +279 -0
- data/test/helper.rb +27 -0
- data/test/plugin/test_out_kafka.rb +52 -0
- metadata +138 -0
@@ -0,0 +1,218 @@
|
|
1
|
+
require 'fluent/input'
|
2
|
+
require 'fluent/time'
|
3
|
+
require 'fluent/plugin/kafka_plugin_util'
|
4
|
+
|
5
|
+
class Fluent::KafkaGroupInput < Fluent::Input
|
6
|
+
Fluent::Plugin.register_input('kafka_group', self)
|
7
|
+
|
8
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
9
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
10
|
+
config_param :consumer_group, :string,
|
11
|
+
:desc => "Consumer group name, must set."
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
config_param :format, :string, :default => 'json',
|
15
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
16
|
+
config_param :message_key, :string, :default => 'message',
|
17
|
+
:desc => "For 'text' format only."
|
18
|
+
config_param :add_prefix, :string, :default => nil,
|
19
|
+
:desc => "Tag prefix (Optional)"
|
20
|
+
config_param :add_suffix, :string, :default => nil,
|
21
|
+
:desc => "Tag suffix (Optional)"
|
22
|
+
config_param :retry_emit_limit, :integer, :default => nil,
|
23
|
+
:desc => "How long to stop event consuming when BufferQueueLimitError happens. Wait retry_emit_limit x 1s. The default is waiting until BufferQueueLimitError is resolved"
|
24
|
+
config_param :use_record_time, :bool, :default => false,
|
25
|
+
:desc => "Replace message timestamp with contents of 'time' field."
|
26
|
+
config_param :time_format, :string, :default => nil,
|
27
|
+
:desc => "Time format to be used to parse 'time' filed."
|
28
|
+
|
29
|
+
# Kafka consumer options
|
30
|
+
config_param :max_bytes, :integer, :default => 1048576,
|
31
|
+
:desc => "Maximum number of bytes to fetch."
|
32
|
+
config_param :max_wait_time, :integer, :default => nil,
|
33
|
+
:desc => "How long to block until the server sends us data."
|
34
|
+
config_param :min_bytes, :integer, :default => nil,
|
35
|
+
:desc => "Smallest amount of data the server should send us."
|
36
|
+
config_param :session_timeout, :integer, :default => nil,
|
37
|
+
:desc => "The number of seconds after which, if a client hasn't contacted the Kafka cluster"
|
38
|
+
config_param :offset_commit_interval, :integer, :default => nil,
|
39
|
+
:desc => "The interval between offset commits, in seconds"
|
40
|
+
config_param :offset_commit_threshold, :integer, :default => nil,
|
41
|
+
:desc => "The number of messages that can be processed before their offsets are committed"
|
42
|
+
config_param :start_from_beginning, :bool, :default => true,
|
43
|
+
:desc => "Whether to start from the beginning of the topic or just subscribe to new messages being produced"
|
44
|
+
|
45
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
46
|
+
|
47
|
+
class ForShutdown < StandardError
|
48
|
+
end
|
49
|
+
|
50
|
+
BufferError = if defined?(Fluent::Plugin::Buffer::BufferOverflowError)
|
51
|
+
Fluent::Plugin::Buffer::BufferOverflowError
|
52
|
+
else
|
53
|
+
Fluent::BufferQueueLimitError
|
54
|
+
end
|
55
|
+
|
56
|
+
unless method_defined?(:router)
|
57
|
+
define_method("router") { Fluent::Engine }
|
58
|
+
end
|
59
|
+
|
60
|
+
def initialize
|
61
|
+
super
|
62
|
+
require 'kafka'
|
63
|
+
|
64
|
+
@time_parser = nil
|
65
|
+
end
|
66
|
+
|
67
|
+
def _config_to_array(config)
|
68
|
+
config_array = config.split(',').map {|k| k.strip }
|
69
|
+
if config_array.empty?
|
70
|
+
raise Fluent::ConfigError, "kafka_group: '#{config}' is a required parameter"
|
71
|
+
end
|
72
|
+
config_array
|
73
|
+
end
|
74
|
+
|
75
|
+
private :_config_to_array
|
76
|
+
|
77
|
+
def configure(conf)
|
78
|
+
super
|
79
|
+
|
80
|
+
$log.info "Will watch for topics #{@topics} at brokers " \
|
81
|
+
"#{@brokers} and '#{@consumer_group}' group"
|
82
|
+
|
83
|
+
@topics = _config_to_array(@topics)
|
84
|
+
|
85
|
+
if conf['max_wait_ms']
|
86
|
+
log.warn "'max_wait_ms' parameter is deprecated. Use second unit 'max_wait_time' instead"
|
87
|
+
@max_wait_time = conf['max_wait_ms'].to_i / 1000
|
88
|
+
end
|
89
|
+
|
90
|
+
@parser_proc = setup_parser
|
91
|
+
|
92
|
+
@consumer_opts = {:group_id => @consumer_group}
|
93
|
+
@consumer_opts[:session_timeout] = @session_timeout if @session_timeout
|
94
|
+
@consumer_opts[:offset_commit_interval] = @offset_commit_interval if @offset_commit_interval
|
95
|
+
@consumer_opts[:offset_commit_threshold] = @offset_commit_threshold if @offset_commit_threshold
|
96
|
+
|
97
|
+
@fetch_opts = {}
|
98
|
+
@fetch_opts[:max_wait_time] = @max_wait_time if @max_wait_time
|
99
|
+
@fetch_opts[:min_bytes] = @min_bytes if @min_bytes
|
100
|
+
|
101
|
+
if @use_record_time and @time_format
|
102
|
+
@time_parser = Fluent::TextParser::TimeParser.new(@time_format)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def setup_parser
|
107
|
+
case @format
|
108
|
+
when 'json'
|
109
|
+
require 'yajl'
|
110
|
+
Proc.new { |msg| Yajl::Parser.parse(msg.value) }
|
111
|
+
when 'ltsv'
|
112
|
+
require 'ltsv'
|
113
|
+
Proc.new { |msg| LTSV.parse(msg.value).first }
|
114
|
+
when 'msgpack'
|
115
|
+
require 'msgpack'
|
116
|
+
Proc.new { |msg| MessagePack.unpack(msg.value) }
|
117
|
+
when 'text'
|
118
|
+
Proc.new { |msg| {@message_key => msg.value} }
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def start
|
123
|
+
super
|
124
|
+
|
125
|
+
@kafka = Kafka.new(seed_brokers: @brokers,
|
126
|
+
ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
127
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert),
|
128
|
+
ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key))
|
129
|
+
@consumer = setup_consumer
|
130
|
+
@thread = Thread.new(&method(:run))
|
131
|
+
end
|
132
|
+
|
133
|
+
def shutdown
|
134
|
+
# This nil assignment should be guarded by mutex in multithread programming manner.
|
135
|
+
# But the situation is very low contention, so we don't use mutex for now.
|
136
|
+
# If the problem happens, we will add a guard for consumer.
|
137
|
+
consumer = @consumer
|
138
|
+
@consumer = nil
|
139
|
+
consumer.stop
|
140
|
+
|
141
|
+
@thread.join
|
142
|
+
@kafka.close
|
143
|
+
super
|
144
|
+
end
|
145
|
+
|
146
|
+
def setup_consumer
|
147
|
+
consumer = @kafka.consumer(@consumer_opts)
|
148
|
+
@topics.each { |topic|
|
149
|
+
consumer.subscribe(topic, start_from_beginning: @start_from_beginning, max_bytes_per_partition: @max_bytes)
|
150
|
+
}
|
151
|
+
consumer
|
152
|
+
end
|
153
|
+
|
154
|
+
def run
|
155
|
+
while @consumer
|
156
|
+
begin
|
157
|
+
@consumer.each_batch(@fetch_opts) { |batch|
|
158
|
+
es = Fluent::MultiEventStream.new
|
159
|
+
tag = batch.topic
|
160
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
161
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
162
|
+
|
163
|
+
batch.messages.each { |msg|
|
164
|
+
begin
|
165
|
+
record = @parser_proc.call(msg)
|
166
|
+
if @use_record_time
|
167
|
+
if @time_format
|
168
|
+
record_time = @time_parser.parse(record['time'])
|
169
|
+
else
|
170
|
+
record_time = record['time']
|
171
|
+
end
|
172
|
+
else
|
173
|
+
record_time = Fluent::Engine.now
|
174
|
+
end
|
175
|
+
es.add(record_time, record)
|
176
|
+
rescue => e
|
177
|
+
log.warn "parser error in #{batch.topic}/#{batch.partition}", :error => e.to_s, :value => msg.value, :offset => msg.offset
|
178
|
+
log.debug_backtrace
|
179
|
+
end
|
180
|
+
}
|
181
|
+
|
182
|
+
unless es.empty?
|
183
|
+
emit_events(tag, es)
|
184
|
+
end
|
185
|
+
}
|
186
|
+
rescue ForShutdown
|
187
|
+
rescue => e
|
188
|
+
log.error "unexpected error during consuming events from kafka. Re-fetch events.", :error => e.to_s
|
189
|
+
log.error_backtrace
|
190
|
+
end
|
191
|
+
end
|
192
|
+
rescue => e
|
193
|
+
log.error "unexpected error during consumer object access", :error => e.to_s
|
194
|
+
log.error_backtrace
|
195
|
+
end
|
196
|
+
|
197
|
+
def emit_events(tag, es)
|
198
|
+
retries = 0
|
199
|
+
begin
|
200
|
+
router.emit_stream(tag, es)
|
201
|
+
rescue BufferError
|
202
|
+
raise ForShutdown if @consumer.nil?
|
203
|
+
|
204
|
+
if @retry_emit_limit.nil?
|
205
|
+
sleep 1
|
206
|
+
retry
|
207
|
+
end
|
208
|
+
|
209
|
+
if retries < @retry_emit_limit
|
210
|
+
retries += 1
|
211
|
+
sleep 1
|
212
|
+
retry
|
213
|
+
else
|
214
|
+
raise RuntimeError, "Exceeds retry_emit_limit"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Fluent
|
2
|
+
module KafkaPluginUtil
|
3
|
+
module SSLSettings
|
4
|
+
def self.included(klass)
|
5
|
+
klass.instance_eval {
|
6
|
+
# https://github.com/zendesk/ruby-kafka#encryption-and-authentication-using-ssl
|
7
|
+
config_param :ssl_ca_cert, :string, :default => nil,
|
8
|
+
:desc => "a PEM encoded CA cert to use with and SSL connection."
|
9
|
+
config_param :ssl_client_cert, :string, :default => nil,
|
10
|
+
:desc => "a PEM encoded client cert to use with and SSL connection. Must be used in combination with ssl_client_cert_key."
|
11
|
+
config_param :ssl_client_cert_key, :string, :default => nil,
|
12
|
+
:desc => "a PEM encoded client cert key to use with and SSL connection. Must be used in combination with ssl_client_cert."
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def read_ssl_file(path)
|
17
|
+
return nil if path.nil?
|
18
|
+
File.read(path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,225 @@
|
|
1
|
+
require "set"
|
2
|
+
require "kafka/partitioner"
|
3
|
+
require "kafka/message_buffer"
|
4
|
+
require "kafka/produce_operation"
|
5
|
+
require "kafka/pending_message_queue"
|
6
|
+
require "kafka/pending_message"
|
7
|
+
require "kafka/compressor"
|
8
|
+
require 'kafka/producer'
|
9
|
+
|
10
|
+
# for out_kafka_buffered
|
11
|
+
module Kafka
|
12
|
+
class Producer
|
13
|
+
def produce2(value, key: nil, topic:, partition: nil, partition_key: nil)
|
14
|
+
create_time = Time.now
|
15
|
+
|
16
|
+
message = PendingMessage.new(
|
17
|
+
value,
|
18
|
+
key,
|
19
|
+
topic,
|
20
|
+
partition,
|
21
|
+
partition_key,
|
22
|
+
create_time,
|
23
|
+
key.to_s.bytesize + value.to_s.bytesize
|
24
|
+
)
|
25
|
+
|
26
|
+
@target_topics.add(topic)
|
27
|
+
@pending_message_queue.write(message)
|
28
|
+
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# for out_kafka2
|
35
|
+
module Kafka
|
36
|
+
class Client
|
37
|
+
def topic_producer(topic, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
|
38
|
+
compressor = Compressor.new(
|
39
|
+
codec_name: compression_codec,
|
40
|
+
threshold: compression_threshold,
|
41
|
+
instrumenter: @instrumenter,
|
42
|
+
)
|
43
|
+
|
44
|
+
TopicProducer.new(topic,
|
45
|
+
cluster: initialize_cluster,
|
46
|
+
logger: @logger,
|
47
|
+
instrumenter: @instrumenter,
|
48
|
+
compressor: compressor,
|
49
|
+
ack_timeout: ack_timeout,
|
50
|
+
required_acks: required_acks,
|
51
|
+
max_retries: max_retries,
|
52
|
+
retry_backoff: retry_backoff,
|
53
|
+
max_buffer_size: max_buffer_size,
|
54
|
+
max_buffer_bytesize: max_buffer_bytesize,
|
55
|
+
)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class TopicProducer
|
60
|
+
def initialize(topic, cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
|
61
|
+
@cluster = cluster
|
62
|
+
@logger = logger
|
63
|
+
@instrumenter = instrumenter
|
64
|
+
@required_acks = required_acks == :all ? -1 : required_acks
|
65
|
+
@ack_timeout = ack_timeout
|
66
|
+
@max_retries = max_retries
|
67
|
+
@retry_backoff = retry_backoff
|
68
|
+
@max_buffer_size = max_buffer_size
|
69
|
+
@max_buffer_bytesize = max_buffer_bytesize
|
70
|
+
@compressor = compressor
|
71
|
+
|
72
|
+
@topic = topic
|
73
|
+
@cluster.add_target_topics(Set.new([topic]))
|
74
|
+
|
75
|
+
# A buffer organized by topic/partition.
|
76
|
+
@buffer = MessageBuffer.new
|
77
|
+
|
78
|
+
# Messages added by `#produce` but not yet assigned a partition.
|
79
|
+
@pending_message_queue = PendingMessageQueue.new
|
80
|
+
end
|
81
|
+
|
82
|
+
def produce(value, key, partition, partition_key)
|
83
|
+
create_time = Time.now
|
84
|
+
|
85
|
+
message = PendingMessage.new(
|
86
|
+
value,
|
87
|
+
key,
|
88
|
+
@topic,
|
89
|
+
partition,
|
90
|
+
partition_key,
|
91
|
+
create_time,
|
92
|
+
key.to_s.bytesize + value.to_s.bytesize
|
93
|
+
)
|
94
|
+
|
95
|
+
@pending_message_queue.write(message)
|
96
|
+
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
|
100
|
+
def deliver_messages
|
101
|
+
# There's no need to do anything if the buffer is empty.
|
102
|
+
return if buffer_size == 0
|
103
|
+
|
104
|
+
deliver_messages_with_retries
|
105
|
+
end
|
106
|
+
|
107
|
+
# Returns the number of messages currently held in the buffer.
|
108
|
+
#
|
109
|
+
# @return [Integer] buffer size.
|
110
|
+
def buffer_size
|
111
|
+
@pending_message_queue.size + @buffer.size
|
112
|
+
end
|
113
|
+
|
114
|
+
def buffer_bytesize
|
115
|
+
@pending_message_queue.bytesize + @buffer.bytesize
|
116
|
+
end
|
117
|
+
|
118
|
+
# Deletes all buffered messages.
|
119
|
+
#
|
120
|
+
# @return [nil]
|
121
|
+
def clear_buffer
|
122
|
+
@buffer.clear
|
123
|
+
@pending_message_queue.clear
|
124
|
+
end
|
125
|
+
|
126
|
+
# Closes all connections to the brokers.
|
127
|
+
#
|
128
|
+
# @return [nil]
|
129
|
+
def shutdown
|
130
|
+
@cluster.disconnect
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def deliver_messages_with_retries
|
136
|
+
attempt = 0
|
137
|
+
|
138
|
+
#@cluster.add_target_topics(@target_topics)
|
139
|
+
|
140
|
+
operation = ProduceOperation.new(
|
141
|
+
cluster: @cluster,
|
142
|
+
buffer: @buffer,
|
143
|
+
required_acks: @required_acks,
|
144
|
+
ack_timeout: @ack_timeout,
|
145
|
+
compressor: @compressor,
|
146
|
+
logger: @logger,
|
147
|
+
instrumenter: @instrumenter,
|
148
|
+
)
|
149
|
+
|
150
|
+
loop do
|
151
|
+
attempt += 1
|
152
|
+
|
153
|
+
@cluster.refresh_metadata_if_necessary!
|
154
|
+
|
155
|
+
assign_partitions!
|
156
|
+
operation.execute
|
157
|
+
|
158
|
+
if @required_acks.zero?
|
159
|
+
# No response is returned by the brokers, so we can't know which messages
|
160
|
+
# have been successfully written. Our only option is to assume that they all
|
161
|
+
# have.
|
162
|
+
@buffer.clear
|
163
|
+
end
|
164
|
+
|
165
|
+
if buffer_size.zero?
|
166
|
+
break
|
167
|
+
elsif attempt <= @max_retries
|
168
|
+
@logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
|
169
|
+
|
170
|
+
sleep @retry_backoff
|
171
|
+
else
|
172
|
+
@logger.error "Failed to send all messages; keeping remaining messages in buffer"
|
173
|
+
break
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
unless @pending_message_queue.empty?
|
178
|
+
# Mark the cluster as stale in order to force a cluster metadata refresh.
|
179
|
+
@cluster.mark_as_stale!
|
180
|
+
raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
|
181
|
+
end
|
182
|
+
|
183
|
+
unless @buffer.empty?
|
184
|
+
partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
|
185
|
+
|
186
|
+
raise DeliveryFailed, "Failed to send messages to #{partitions}"
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def assign_partitions!
|
191
|
+
failed_messages = []
|
192
|
+
partition_count = @cluster.partitions_for(@topic).count
|
193
|
+
|
194
|
+
@pending_message_queue.each do |message|
|
195
|
+
partition = message.partition
|
196
|
+
|
197
|
+
begin
|
198
|
+
if partition.nil?
|
199
|
+
partition = Partitioner.partition_for_key(partition_count, message)
|
200
|
+
end
|
201
|
+
|
202
|
+
@buffer.write(
|
203
|
+
value: message.value,
|
204
|
+
key: message.key,
|
205
|
+
topic: message.topic,
|
206
|
+
partition: partition,
|
207
|
+
create_time: message.create_time,
|
208
|
+
)
|
209
|
+
rescue Kafka::Error => e
|
210
|
+
failed_messages << message
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
if failed_messages.any?
|
215
|
+
failed_messages.group_by(&:topic).each do |topic, messages|
|
216
|
+
@logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
|
217
|
+
end
|
218
|
+
|
219
|
+
@cluster.mark_as_stale!
|
220
|
+
end
|
221
|
+
|
222
|
+
@pending_message_queue.replace(failed_messages)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|