fluent-plugin-kafka 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +5 -1
- data/fluent-plugin-kafka.gemspec +1 -1
- data/lib/fluent/plugin/kafka_producer_ext.rb +201 -0
- data/lib/fluent/plugin/out_kafka2.rb +187 -0
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a09a4933e7d0f7a30094cd98900a03a80dac3c9a
|
|
4
|
+
data.tar.gz: 9421658f52091e37e39e32ae10e7e93132d9394f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e3a72bb6fecbe2dd0204e8bc84234d3d11c44017699eb7ce88ee4b154d04966f939487d85ef90b4811510c490fb733a922ba1f70012dc6fbf9490afebf843ed7
|
|
7
|
+
data.tar.gz: 5b633c21eadd8797a5a6672191a391468a5b8c8e2b4c7968f74b67d7e3edc2978f3db0df2c157b835a7dcd75e899f4b26baf1d62c02ac7cf47f55943dd72dba8
|
data/ChangeLog
CHANGED
data/fluent-plugin-kafka.gemspec
CHANGED
|
@@ -12,7 +12,7 @@ Gem::Specification.new do |gem|
|
|
|
12
12
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
|
13
13
|
gem.name = "fluent-plugin-kafka"
|
|
14
14
|
gem.require_paths = ["lib"]
|
|
15
|
-
gem.version = '0.
|
|
15
|
+
gem.version = '0.5.0'
|
|
16
16
|
gem.required_ruby_version = ">= 2.1.0"
|
|
17
17
|
|
|
18
18
|
gem.add_dependency "fluentd", [">= 0.10.58", "< 2"]
|
|
@@ -1,5 +1,13 @@
|
|
|
1
|
+
require "set"
|
|
2
|
+
require "kafka/partitioner"
|
|
3
|
+
require "kafka/message_buffer"
|
|
4
|
+
require "kafka/produce_operation"
|
|
5
|
+
require "kafka/pending_message_queue"
|
|
6
|
+
require "kafka/pending_message"
|
|
7
|
+
require "kafka/compressor"
|
|
1
8
|
require 'kafka/producer'
|
|
2
9
|
|
|
10
|
+
# for out_kafka_buffered
|
|
3
11
|
module Kafka
|
|
4
12
|
class Producer
|
|
5
13
|
def produce2(value, key: nil, topic:, partition: nil, partition_key: nil)
|
|
@@ -22,3 +30,196 @@ module Kafka
|
|
|
22
30
|
end
|
|
23
31
|
end
|
|
24
32
|
end
|
|
33
|
+
|
|
34
|
+
# for out_kafka2
|
|
35
|
+
module Kafka
|
|
36
|
+
class Client
|
|
37
|
+
def topic_producer(topic, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
|
|
38
|
+
compressor = Compressor.new(
|
|
39
|
+
codec_name: compression_codec,
|
|
40
|
+
threshold: compression_threshold,
|
|
41
|
+
instrumenter: @instrumenter,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
TopicProducer.new(topic,
|
|
45
|
+
cluster: initialize_cluster,
|
|
46
|
+
logger: @logger,
|
|
47
|
+
instrumenter: @instrumenter,
|
|
48
|
+
compressor: compressor,
|
|
49
|
+
ack_timeout: ack_timeout,
|
|
50
|
+
required_acks: required_acks,
|
|
51
|
+
max_retries: max_retries,
|
|
52
|
+
retry_backoff: retry_backoff,
|
|
53
|
+
max_buffer_size: max_buffer_size,
|
|
54
|
+
max_buffer_bytesize: max_buffer_bytesize,
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
class TopicProducer
|
|
60
|
+
def initialize(topic, cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
|
|
61
|
+
@cluster = cluster
|
|
62
|
+
@logger = logger
|
|
63
|
+
@instrumenter = instrumenter
|
|
64
|
+
@required_acks = required_acks == :all ? -1 : required_acks
|
|
65
|
+
@ack_timeout = ack_timeout
|
|
66
|
+
@max_retries = max_retries
|
|
67
|
+
@retry_backoff = retry_backoff
|
|
68
|
+
@max_buffer_size = max_buffer_size
|
|
69
|
+
@max_buffer_bytesize = max_buffer_bytesize
|
|
70
|
+
@compressor = compressor
|
|
71
|
+
|
|
72
|
+
@topic = topic
|
|
73
|
+
@cluster.add_target_topics(Set.new([topic]))
|
|
74
|
+
|
|
75
|
+
# A buffer organized by topic/partition.
|
|
76
|
+
@buffer = MessageBuffer.new
|
|
77
|
+
|
|
78
|
+
# Messages added by `#produce` but not yet assigned a partition.
|
|
79
|
+
@pending_message_queue = PendingMessageQueue.new
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def produce(value, key, partition, partition_key)
|
|
83
|
+
create_time = Time.now
|
|
84
|
+
|
|
85
|
+
message = PendingMessage.new(
|
|
86
|
+
value,
|
|
87
|
+
key,
|
|
88
|
+
@topic,
|
|
89
|
+
partition,
|
|
90
|
+
partition_key,
|
|
91
|
+
create_time,
|
|
92
|
+
key.to_s.bytesize + value.to_s.bytesize
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
@pending_message_queue.write(message)
|
|
96
|
+
|
|
97
|
+
nil
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def deliver_messages
|
|
101
|
+
# There's no need to do anything if the buffer is empty.
|
|
102
|
+
return if buffer_size == 0
|
|
103
|
+
|
|
104
|
+
deliver_messages_with_retries
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Returns the number of messages currently held in the buffer.
|
|
108
|
+
#
|
|
109
|
+
# @return [Integer] buffer size.
|
|
110
|
+
def buffer_size
|
|
111
|
+
@pending_message_queue.size + @buffer.size
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def buffer_bytesize
|
|
115
|
+
@pending_message_queue.bytesize + @buffer.bytesize
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Deletes all buffered messages.
|
|
119
|
+
#
|
|
120
|
+
# @return [nil]
|
|
121
|
+
def clear_buffer
|
|
122
|
+
@buffer.clear
|
|
123
|
+
@pending_message_queue.clear
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Closes all connections to the brokers.
|
|
127
|
+
#
|
|
128
|
+
# @return [nil]
|
|
129
|
+
def shutdown
|
|
130
|
+
@cluster.disconnect
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private
|
|
134
|
+
|
|
135
|
+
def deliver_messages_with_retries
|
|
136
|
+
attempt = 0
|
|
137
|
+
|
|
138
|
+
#@cluster.add_target_topics(@target_topics)
|
|
139
|
+
|
|
140
|
+
operation = ProduceOperation.new(
|
|
141
|
+
cluster: @cluster,
|
|
142
|
+
buffer: @buffer,
|
|
143
|
+
required_acks: @required_acks,
|
|
144
|
+
ack_timeout: @ack_timeout,
|
|
145
|
+
compressor: @compressor,
|
|
146
|
+
logger: @logger,
|
|
147
|
+
instrumenter: @instrumenter,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
loop do
|
|
151
|
+
attempt += 1
|
|
152
|
+
|
|
153
|
+
@cluster.refresh_metadata_if_necessary!
|
|
154
|
+
|
|
155
|
+
assign_partitions!
|
|
156
|
+
operation.execute
|
|
157
|
+
|
|
158
|
+
if @required_acks.zero?
|
|
159
|
+
# No response is returned by the brokers, so we can't know which messages
|
|
160
|
+
# have been successfully written. Our only option is to assume that they all
|
|
161
|
+
# have.
|
|
162
|
+
@buffer.clear
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
if buffer_size.zero?
|
|
166
|
+
break
|
|
167
|
+
elsif attempt <= @max_retries
|
|
168
|
+
@logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
|
|
169
|
+
|
|
170
|
+
sleep @retry_backoff
|
|
171
|
+
else
|
|
172
|
+
@logger.error "Failed to send all messages; keeping remaining messages in buffer"
|
|
173
|
+
break
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
unless @pending_message_queue.empty?
|
|
178
|
+
# Mark the cluster as stale in order to force a cluster metadata refresh.
|
|
179
|
+
@cluster.mark_as_stale!
|
|
180
|
+
raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
unless @buffer.empty?
|
|
184
|
+
partitions = @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
|
|
185
|
+
|
|
186
|
+
raise DeliveryFailed, "Failed to send messages to #{partitions}"
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def assign_partitions!
|
|
191
|
+
failed_messages = []
|
|
192
|
+
partition_count = @cluster.partitions_for(@topic).count
|
|
193
|
+
|
|
194
|
+
@pending_message_queue.each do |message|
|
|
195
|
+
partition = message.partition
|
|
196
|
+
|
|
197
|
+
begin
|
|
198
|
+
if partition.nil?
|
|
199
|
+
partition = Partitioner.partition_for_key(partition_count, message)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
@buffer.write(
|
|
203
|
+
value: message.value,
|
|
204
|
+
key: message.key,
|
|
205
|
+
topic: message.topic,
|
|
206
|
+
partition: partition,
|
|
207
|
+
create_time: message.create_time,
|
|
208
|
+
)
|
|
209
|
+
rescue Kafka::Error => e
|
|
210
|
+
failed_messages << message
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
if failed_messages.any?
|
|
215
|
+
failed_messages.group_by(&:topic).each do |topic, messages|
|
|
216
|
+
@logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
@cluster.mark_as_stale!
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
@pending_message_queue.replace(failed_messages)
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
require 'fluent/plugin/output'
|
|
2
|
+
require 'fluent/plugin/kafka_plugin_util'
|
|
3
|
+
|
|
4
|
+
require 'kafka'
|
|
5
|
+
require 'fluent/plugin/kafka_producer_ext'
|
|
6
|
+
|
|
7
|
+
module Fluent::Plugin
|
|
8
|
+
class Fluent::Kafka2Output < Output
|
|
9
|
+
Fluent::Plugin.register_output('kafka2', self)
|
|
10
|
+
|
|
11
|
+
helpers :inject, :formatter
|
|
12
|
+
|
|
13
|
+
config_param :brokers, :array, :value_type => :string, :default => ['localhost:9092'],
|
|
14
|
+
:desc => <<-DESC
|
|
15
|
+
Set brokers directly:
|
|
16
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
|
17
|
+
DESC
|
|
18
|
+
config_param :default_topic, :string, :default => nil,
|
|
19
|
+
:desc => "Default output topic when record doesn't have topic field"
|
|
20
|
+
config_param :default_message_key, :string, :default => nil
|
|
21
|
+
config_param :default_partition_key, :string, :default => nil
|
|
22
|
+
config_param :default_partition, :integer, :default => nil
|
|
23
|
+
config_param :client_id, :string, :default => 'fluentd'
|
|
24
|
+
config_param :exclude_partition_key, :bool, :default => false,
|
|
25
|
+
:desc => 'Set true to remove partition key from data'
|
|
26
|
+
config_param :exclude_partition, :bool, :default => false,
|
|
27
|
+
:desc => 'Set true to remove partition from data'
|
|
28
|
+
config_param :exclude_message_key, :bool, :default => false,
|
|
29
|
+
:desc => 'Set true to remove partition key from data'
|
|
30
|
+
config_param :exclude_topic_key, :bool, :default => false,
|
|
31
|
+
:desc => 'Set true to remove topic name key from data'
|
|
32
|
+
|
|
33
|
+
config_param :get_kafka_client_log, :bool, :default => false
|
|
34
|
+
|
|
35
|
+
# ruby-kafka producer options
|
|
36
|
+
config_param :max_send_retries, :integer, :default => 2,
|
|
37
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
|
38
|
+
config_param :required_acks, :integer, :default => -1,
|
|
39
|
+
:desc => "The number of acks required per request."
|
|
40
|
+
config_param :ack_timeout, :time, :default => nil,
|
|
41
|
+
:desc => "How long the producer waits for acks."
|
|
42
|
+
config_param :compression_codec, :string, :default => nil,
|
|
43
|
+
:desc => <<-DESC
|
|
44
|
+
The codec the producer uses to compress messages.
|
|
45
|
+
Supported codecs: (gzip|snappy)
|
|
46
|
+
DESC
|
|
47
|
+
|
|
48
|
+
config_section :buffer do
|
|
49
|
+
config_set_default :chunk_keys, ["topic"]
|
|
50
|
+
end
|
|
51
|
+
config_section :format do
|
|
52
|
+
config_set_default :@type, 'json'
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
include Fluent::KafkaPluginUtil::SSLSettings
|
|
56
|
+
|
|
57
|
+
def initialize
|
|
58
|
+
super
|
|
59
|
+
|
|
60
|
+
@kafka = nil
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def refresh_client(raise_error = true)
|
|
64
|
+
begin
|
|
65
|
+
logger = @get_kafka_client_log ? log : nil
|
|
66
|
+
@kafka = Kafka.new(seed_brokers: @brokers, client_id: @client_id, logger: logger, ssl_ca_cert: read_ssl_file(@ssl_ca_cert),
|
|
67
|
+
ssl_client_cert: read_ssl_file(@ssl_client_cert), ssl_client_cert_key: read_ssl_file(@ssl_client_cert_key))
|
|
68
|
+
log.info "initialized kafka producer: #{@client_id}"
|
|
69
|
+
rescue Exception => e
|
|
70
|
+
if raise_error # During startup, error should be reported to engine and stop its phase for safety.
|
|
71
|
+
raise e
|
|
72
|
+
else
|
|
73
|
+
log.error e
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def configure(conf)
|
|
79
|
+
super
|
|
80
|
+
|
|
81
|
+
if @brokers.size > 0
|
|
82
|
+
log.info "brokers has been set: #{@brokers}"
|
|
83
|
+
else
|
|
84
|
+
raise Fluent::Config, 'No brokers specified. Need one broker at least.'
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
formatter_conf = conf.elements('format').first
|
|
88
|
+
unless formatter_conf
|
|
89
|
+
raise Fluent::ConfigError, "<format> section is required."
|
|
90
|
+
end
|
|
91
|
+
unless formatter_conf["@type"]
|
|
92
|
+
raise Fluent::ConfigError, "format/@type is required."
|
|
93
|
+
end
|
|
94
|
+
@formatter_proc = setup_formatter(formatter_conf)
|
|
95
|
+
|
|
96
|
+
@producer_opts = {max_retries: @max_send_retries, required_acks: @required_acks}
|
|
97
|
+
@producer_opts[:ack_timeout] = @ack_timeout if @ack_timeout
|
|
98
|
+
@producer_opts[:compression_codec] = @compression_codec.to_sym if @compression_codec
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def multi_workers_ready?
|
|
102
|
+
true
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def start
|
|
106
|
+
super
|
|
107
|
+
refresh_client
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def close
|
|
111
|
+
super
|
|
112
|
+
@kafka.close if @kafka
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def terminate
|
|
116
|
+
super
|
|
117
|
+
@kafka = nil
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def setup_formatter(conf)
|
|
121
|
+
type = conf['@type']
|
|
122
|
+
case type
|
|
123
|
+
when 'json'
|
|
124
|
+
begin
|
|
125
|
+
require 'oj'
|
|
126
|
+
Oj.default_options = Fluent::DEFAULT_OJ_OPTIONS
|
|
127
|
+
Proc.new { |tag, time, record| Oj.dump(record) }
|
|
128
|
+
rescue LoadError
|
|
129
|
+
require 'yajl'
|
|
130
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
|
131
|
+
end
|
|
132
|
+
when 'ltsv'
|
|
133
|
+
require 'ltsv'
|
|
134
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
|
135
|
+
else
|
|
136
|
+
@formatter = formatter_create(usage: 'kafka-plugin', conf: conf)
|
|
137
|
+
@formatter.method(:format)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# TODO: optimize write performance
|
|
142
|
+
def write(chunk)
|
|
143
|
+
tag = chunk.metadata.tag
|
|
144
|
+
topic = chunk.metadata.variables[:topic] || @default_topic || tag
|
|
145
|
+
producer = @kafka.topic_producer(topic, @producer_opts)
|
|
146
|
+
|
|
147
|
+
messages = 0
|
|
148
|
+
record_buf = nil
|
|
149
|
+
|
|
150
|
+
begin
|
|
151
|
+
chunk.msgpack_each { |time, record|
|
|
152
|
+
begin
|
|
153
|
+
record = inject_values_to_record(tag, time, record)
|
|
154
|
+
record.delete('topic'.freeze) if @exclude_topic_key
|
|
155
|
+
partition_key = (@exclude_partition_key ? record.delete('partition_key'.freeze) : record['partition_key'.freeze]) || @default_partition_key
|
|
156
|
+
partition = (@exclude_partition ? record.delete('partition'.freeze) : record['partition'.freeze]) || @default_partition
|
|
157
|
+
message_key = (@exclude_message_key ? record.delete('message_key'.freeze) : record['message_key'.freeze]) || @default_message_key
|
|
158
|
+
|
|
159
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
|
160
|
+
rescue StandardError => e
|
|
161
|
+
log.warn "unexpected error during format record. Skip broken event:", :error => e.to_s, :error_class => e.class.to_s, :time => time, :record => record
|
|
162
|
+
next
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
log.on_trace { "message will send to #{topic} with partition_key: #{partition_key}, partition: #{partition}, message_key: #{message_key} and value: #{record_buf}." }
|
|
166
|
+
messages += 1
|
|
167
|
+
|
|
168
|
+
producer.produce(record_buf, message_key, partition, partition_key)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if messages > 0
|
|
172
|
+
log.trace { "#{messages} messages send." }
|
|
173
|
+
producer.deliver_messages
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
rescue Exception => e
|
|
177
|
+
log.warn "Send exception occurred: #{e}"
|
|
178
|
+
log.warn "Exception Backtrace : #{e.backtrace.join("\n")}"
|
|
179
|
+
# For safety, refresh client and its producers
|
|
180
|
+
refresh_client(false)
|
|
181
|
+
# Raise exception to retry sendind messages
|
|
182
|
+
raise e
|
|
183
|
+
ensure
|
|
184
|
+
producer.shutdown if producer
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fluent-plugin-kafka
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Hidemasa Togashi
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2017-01-
|
|
12
|
+
date: 2017-01-17 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: fluentd
|
|
@@ -108,6 +108,7 @@ files:
|
|
|
108
108
|
- lib/fluent/plugin/kafka_plugin_util.rb
|
|
109
109
|
- lib/fluent/plugin/kafka_producer_ext.rb
|
|
110
110
|
- lib/fluent/plugin/out_kafka.rb
|
|
111
|
+
- lib/fluent/plugin/out_kafka2.rb
|
|
111
112
|
- lib/fluent/plugin/out_kafka_buffered.rb
|
|
112
113
|
- test/helper.rb
|
|
113
114
|
- test/plugin/test_out_kafka.rb
|
|
@@ -130,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
130
131
|
version: '0'
|
|
131
132
|
requirements: []
|
|
132
133
|
rubyforge_project:
|
|
133
|
-
rubygems_version: 2.
|
|
134
|
+
rubygems_version: 2.6.8
|
|
134
135
|
signing_key:
|
|
135
136
|
specification_version: 4
|
|
136
137
|
summary: Fluentd plugin for Apache Kafka > 0.8
|