ruby-kafka 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/kafka/async_producer.rb +22 -1
- data/lib/kafka/message_buffer.rb +10 -3
- data/lib/kafka/pending_message.rb +4 -0
- data/lib/kafka/pending_message_queue.rb +43 -0
- data/lib/kafka/produce_operation.rb +8 -2
- data/lib/kafka/producer.rb +84 -21
- data/lib/kafka/protocol/message.rb +4 -0
- data/lib/kafka/protocol/message_set.rb +13 -4
- data/lib/kafka/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc49e2526024ee3dd1cb52b5abfef38dbc2e37ea
|
4
|
+
data.tar.gz: a3c0111bccd3e8daf83eec647baec2fa9921fbf8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 94547594275850a3bacd22758fc0ccce8956c83f0e6e7c46dd1c4a5fb4aa5b52d6199383862cb5087a90b89ee2e0ca63b486082b0d44da2e0994f6cc76e794a7
|
7
|
+
data.tar.gz: 69d7522e0ecd82057fd4b5879273026e6669441e0803c8d28bee83213a66bdbdd13f08051d4b3bb0725c3f19ad9df4dedde7c4a6c0236fac56079b1a666c7ed7
|
data/lib/kafka/async_producer.rb
CHANGED
@@ -18,6 +18,8 @@ module Kafka
|
|
18
18
|
# By default, automatic delivery is disabled and you'll have to call
|
19
19
|
# {#deliver_messages} manually.
|
20
20
|
#
|
21
|
+
# ## Buffer Overflow and Backpressure
|
22
|
+
#
|
21
23
|
# The calling thread communicates with the background thread doing the actual
|
22
24
|
# work using a thread safe queue. While the background thread is busy delivering
|
23
25
|
# messages, new messages will be buffered in the queue. In order to avoid
|
@@ -26,6 +28,17 @@ module Kafka
|
|
26
28
|
# number of messages that is allowed to be buffered. You can configure this
|
27
29
|
# value by setting `max_queue_size`.
|
28
30
|
#
|
31
|
+
# If you produce messages faster than the background producer thread can
|
32
|
+
# deliver them to Kafka you will eventually fill the producer's buffer. Once
|
33
|
+
# this happens, the background thread will stop popping messages off the
|
34
|
+
# queue until it can successfully deliver the buffered messages. The queue
|
35
|
+
# will therefore grow in size, potentially hitting the `max_queue_size` limit.
|
36
|
+
# Once this happens, calls to {#produce} will raise a {BufferOverflow} error.
|
37
|
+
#
|
38
|
+
# Depending on your use case you may want to slow down the rate of messages
|
39
|
+
# being produced or perhaps halt your application completely until the
|
40
|
+
# producer can deliver the buffered messages and clear the message queue.
|
41
|
+
#
|
29
42
|
# ## Example
|
30
43
|
#
|
31
44
|
# producer = kafka.async_producer(
|
@@ -91,6 +104,7 @@ module Kafka
|
|
91
104
|
# @return [nil]
|
92
105
|
def produce(*args)
|
93
106
|
raise BufferOverflow if @queue.size >= @max_queue_size
|
107
|
+
|
94
108
|
@queue << [:produce, args]
|
95
109
|
|
96
110
|
nil
|
@@ -146,7 +160,7 @@ module Kafka
|
|
146
160
|
|
147
161
|
case operation
|
148
162
|
when :produce
|
149
|
-
|
163
|
+
produce(*payload)
|
150
164
|
deliver_messages if threshold_reached?
|
151
165
|
when :deliver_messages
|
152
166
|
deliver_messages
|
@@ -166,6 +180,13 @@ module Kafka
|
|
166
180
|
|
167
181
|
private
|
168
182
|
|
183
|
+
def produce(*args)
|
184
|
+
@producer.produce(*args)
|
185
|
+
rescue BufferOverflow
|
186
|
+
deliver_messages
|
187
|
+
retry
|
188
|
+
end
|
189
|
+
|
169
190
|
def deliver_messages
|
170
191
|
@producer.deliver_messages
|
171
192
|
rescue DeliveryFailed
|
data/lib/kafka/message_buffer.rb
CHANGED
@@ -6,22 +6,28 @@ module Kafka
|
|
6
6
|
class MessageBuffer
|
7
7
|
include Enumerable
|
8
8
|
|
9
|
-
attr_reader :size
|
9
|
+
attr_reader :size, :bytesize
|
10
10
|
|
11
11
|
def initialize
|
12
12
|
@buffer = {}
|
13
13
|
@size = 0
|
14
|
+
@bytesize = 0
|
14
15
|
end
|
15
16
|
|
16
17
|
def write(value:, key:, topic:, partition:)
|
17
|
-
@size += 1
|
18
18
|
message = Protocol::Message.new(key: key, value: value)
|
19
|
+
|
19
20
|
buffer_for(topic, partition) << message
|
21
|
+
|
22
|
+
@size += 1
|
23
|
+
@bytesize += message.bytesize
|
20
24
|
end
|
21
25
|
|
22
26
|
def concat(messages, topic:, partition:)
|
23
|
-
@size += messages.count
|
24
27
|
buffer_for(topic, partition).concat(messages)
|
28
|
+
|
29
|
+
@size += messages.count
|
30
|
+
@bytesize += messages.map(&:bytesize).reduce(:+)
|
25
31
|
end
|
26
32
|
|
27
33
|
def to_h
|
@@ -48,6 +54,7 @@ module Kafka
|
|
48
54
|
# @return [nil]
|
49
55
|
def clear_messages(topic:, partition:)
|
50
56
|
@size -= @buffer[topic][partition].count
|
57
|
+
@bytesize -= @buffer[topic][partition].map(&:bytesize).reduce(:+)
|
51
58
|
|
52
59
|
@buffer[topic].delete(partition)
|
53
60
|
@buffer.delete(topic) if @buffer[topic].empty?
|
@@ -2,12 +2,16 @@ module Kafka
|
|
2
2
|
class PendingMessage
|
3
3
|
attr_reader :value, :key, :topic, :partition, :partition_key
|
4
4
|
|
5
|
+
attr_reader :bytesize
|
6
|
+
|
5
7
|
def initialize(value:, key:, topic:, partition:, partition_key:)
|
6
8
|
@key = key
|
7
9
|
@value = value
|
8
10
|
@topic = topic
|
9
11
|
@partition = partition
|
10
12
|
@partition_key = partition_key
|
13
|
+
|
14
|
+
@bytesize = key.to_s.bytesize + value.to_s.bytesize
|
11
15
|
end
|
12
16
|
end
|
13
17
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Kafka
|
2
|
+
|
3
|
+
# A pending message queue holds messages that have not yet been assigned to
|
4
|
+
# a partition. It's designed to only remove messages once they've been
|
5
|
+
# successfully handled.
|
6
|
+
class PendingMessageQueue
|
7
|
+
attr_reader :size, :bytesize
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@messages = []
|
11
|
+
@size = 0
|
12
|
+
@bytesize = 0
|
13
|
+
end
|
14
|
+
|
15
|
+
def write(message)
|
16
|
+
@messages << message
|
17
|
+
@size += 1
|
18
|
+
@bytesize += message.bytesize
|
19
|
+
end
|
20
|
+
|
21
|
+
def empty?
|
22
|
+
@messages.empty?
|
23
|
+
end
|
24
|
+
|
25
|
+
# Yields each message in the queue to the provided block, removing the
|
26
|
+
# message after the block has processed it. If the block raises an
|
27
|
+
# exception, the message will be retained in the queue.
|
28
|
+
#
|
29
|
+
# @yieldparam [PendingMessage] message
|
30
|
+
# @return [nil]
|
31
|
+
def dequeue_each(&block)
|
32
|
+
until @messages.empty?
|
33
|
+
message = @messages.first
|
34
|
+
|
35
|
+
yield message
|
36
|
+
|
37
|
+
@size -= 1
|
38
|
+
@bytesize -= message.bytesize
|
39
|
+
@messages.shift
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -25,12 +25,13 @@ module Kafka
|
|
25
25
|
# * `sent_message_count` – the number of messages that were successfully sent.
|
26
26
|
#
|
27
27
|
class ProduceOperation
|
28
|
-
def initialize(cluster:, buffer:, compression_codec:, required_acks:, ack_timeout:, logger:)
|
28
|
+
def initialize(cluster:, buffer:, compression_codec:, compression_threshold:, required_acks:, ack_timeout:, logger:)
|
29
29
|
@cluster = cluster
|
30
30
|
@buffer = buffer
|
31
31
|
@required_acks = required_acks
|
32
32
|
@ack_timeout = ack_timeout
|
33
33
|
@compression_codec = compression_codec
|
34
|
+
@compression_threshold = compression_threshold
|
34
35
|
@logger = logger
|
35
36
|
end
|
36
37
|
|
@@ -77,7 +78,12 @@ module Kafka
|
|
77
78
|
messages_for_topics = {}
|
78
79
|
|
79
80
|
message_buffer.each do |topic, partition, messages|
|
80
|
-
message_set = Protocol::MessageSet.new(
|
81
|
+
message_set = Protocol::MessageSet.new(
|
82
|
+
messages: messages,
|
83
|
+
compression_codec: @compression_codec,
|
84
|
+
compression_threshold: @compression_threshold,
|
85
|
+
)
|
86
|
+
|
81
87
|
messages_for_topics[topic] ||= {}
|
82
88
|
messages_for_topics[topic][partition] = message_set
|
83
89
|
end
|
data/lib/kafka/producer.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
require "set"
|
1
2
|
require "kafka/partitioner"
|
2
3
|
require "kafka/message_buffer"
|
3
4
|
require "kafka/produce_operation"
|
5
|
+
require "kafka/pending_message_queue"
|
4
6
|
require "kafka/pending_message"
|
5
7
|
require "kafka/compression"
|
6
8
|
|
@@ -47,10 +49,42 @@ module Kafka
|
|
47
49
|
# not, we do another round of requests, this time with just the remaining messages.
|
48
50
|
# We do this for as long as `max_retries` permits.
|
49
51
|
#
|
52
|
+
# ## Compression
|
53
|
+
#
|
54
|
+
# Depending on what kind of data you produce, enabling compression may yield improved
|
55
|
+
# bandwidth and space usage. Compression in Kafka is done on entire messages sets
|
56
|
+
# rather than on individual messages. This improves the compression rate and generally
|
57
|
+
# means that compressions works better the larger your buffers get, since the message
|
58
|
+
# sets will be larger by the time they're compressed.
|
59
|
+
#
|
60
|
+
# Since many workloads have variations in throughput and distribution across partitions,
|
61
|
+
# it's possible to configure a threshold for when to enable compression by setting
|
62
|
+
# `compression_threshold`. Only if the defined number of messages are buffered for a
|
63
|
+
# partition will the messages be compressed.
|
64
|
+
#
|
65
|
+
# Compression is enabled by passing the `compression_codec` parameter with the
|
66
|
+
# name of one of the algorithms allowed by Kafka:
|
67
|
+
#
|
68
|
+
# * `:snappy` for [Snappy](http://google.github.io/snappy/) compression.
|
69
|
+
# * `:gzip` for [gzip](https://en.wikipedia.org/wiki/Gzip) compression.
|
70
|
+
#
|
71
|
+
# By default, all message sets will be compressed if you specify a compression
|
72
|
+
# codec. To increase the compression threshold, set `compression_threshold` to
|
73
|
+
# an integer value higher than one.
|
74
|
+
#
|
50
75
|
# ## Instrumentation
|
51
76
|
#
|
77
|
+
# Whenever {#produce} is called, the notification `produce_message.producer.kafka`
|
78
|
+
# will be emitted with the following payload:
|
79
|
+
#
|
80
|
+
# * `value` – the message value.
|
81
|
+
# * `key` – the message key.
|
82
|
+
# * `topic` – the topic that was produced to.
|
83
|
+
# * `buffer_size` – the buffer size after adding the message.
|
84
|
+
# * `max_buffer_size` – the maximum allowed buffer size for the producer.
|
85
|
+
#
|
52
86
|
# After {#deliver_messages} completes, the notification
|
53
|
-
# `deliver_messages.producer.kafka` will be emitted
|
87
|
+
# `deliver_messages.producer.kafka` will be emitted with the following payload:
|
54
88
|
#
|
55
89
|
# * `message_count` – the total number of messages that the producer tried to
|
56
90
|
# deliver. Note that not all messages may get delivered.
|
@@ -116,9 +150,21 @@ module Kafka
|
|
116
150
|
# @param retry_backoff [Integer] the number of seconds to wait between retries.
|
117
151
|
#
|
118
152
|
# @param max_buffer_size [Integer] the number of messages allowed in the buffer
|
119
|
-
# before new writes will raise BufferOverflow exceptions.
|
153
|
+
# before new writes will raise {BufferOverflow} exceptions.
|
154
|
+
#
|
155
|
+
# @param max_buffer_bytesize [Integer] the maximum size of the buffer in bytes.
|
156
|
+
# attempting to produce messages when the buffer reaches this size will
|
157
|
+
# result in {BufferOverflow} being raised.
|
158
|
+
#
|
159
|
+
# @param compression_codec [Symbol, nil] the name of the compression codec to
|
160
|
+
# use, or nil if no compression should be performed. Valid codecs: `:snappy`
|
161
|
+
# and `:gzip`.
|
162
|
+
#
|
163
|
+
# @param compression_threshold [Integer] the number of messages that needs to
|
164
|
+
# be in a message set before it should be compressed. Note that message sets
|
165
|
+
# are per-partition rather than per-topic or per-producer.
|
120
166
|
#
|
121
|
-
def initialize(cluster:, logger:, compression_codec: nil, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
|
167
|
+
def initialize(cluster:, logger:, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
|
122
168
|
@cluster = cluster
|
123
169
|
@logger = logger
|
124
170
|
@required_acks = required_acks
|
@@ -126,13 +172,18 @@ module Kafka
|
|
126
172
|
@max_retries = max_retries
|
127
173
|
@retry_backoff = retry_backoff
|
128
174
|
@max_buffer_size = max_buffer_size
|
175
|
+
@max_buffer_bytesize = max_buffer_bytesize
|
129
176
|
@compression_codec = Compression.find_codec(compression_codec)
|
177
|
+
@compression_threshold = compression_threshold
|
178
|
+
|
179
|
+
# The set of topics that are produced to.
|
180
|
+
@target_topics = Set.new
|
130
181
|
|
131
182
|
# A buffer organized by topic/partition.
|
132
183
|
@buffer = MessageBuffer.new
|
133
184
|
|
134
185
|
# Messages added by `#produce` but not yet assigned a partition.
|
135
|
-
@
|
186
|
+
@pending_message_queue = PendingMessageQueue.new
|
136
187
|
end
|
137
188
|
|
138
189
|
# Produces a message to the specified topic. Note that messages are buffered in
|
@@ -165,11 +216,7 @@ module Kafka
|
|
165
216
|
# @raise [BufferOverflow] if the maximum buffer size has been reached.
|
166
217
|
# @return [nil]
|
167
218
|
def produce(value, key: nil, topic:, partition: nil, partition_key: nil)
|
168
|
-
|
169
|
-
raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
|
170
|
-
end
|
171
|
-
|
172
|
-
@pending_messages << PendingMessage.new(
|
219
|
+
message = PendingMessage.new(
|
173
220
|
value: value,
|
174
221
|
key: key,
|
175
222
|
topic: topic,
|
@@ -177,6 +224,25 @@ module Kafka
|
|
177
224
|
partition_key: partition_key,
|
178
225
|
)
|
179
226
|
|
227
|
+
if buffer_size >= @max_buffer_size
|
228
|
+
raise BufferOverflow, "Max buffer size (#{@max_buffer_size} messages) exceeded"
|
229
|
+
end
|
230
|
+
|
231
|
+
if buffer_bytesize + message.bytesize >= @max_buffer_bytesize
|
232
|
+
raise BufferOverflow, "Max buffer bytesize (#{@max_buffer_bytesize} bytes) exceeded"
|
233
|
+
end
|
234
|
+
|
235
|
+
@target_topics.add(topic)
|
236
|
+
@pending_message_queue.write(message)
|
237
|
+
|
238
|
+
Instrumentation.instrument("produce_message.producer.kafka", {
|
239
|
+
value: value,
|
240
|
+
key: key,
|
241
|
+
topic: topic,
|
242
|
+
buffer_size: buffer_size,
|
243
|
+
max_buffer_size: @max_buffer_size,
|
244
|
+
})
|
245
|
+
|
180
246
|
nil
|
181
247
|
end
|
182
248
|
|
@@ -211,7 +277,11 @@ module Kafka
|
|
211
277
|
#
|
212
278
|
# @return [Integer] buffer size.
|
213
279
|
def buffer_size
|
214
|
-
@
|
280
|
+
@pending_message_queue.size + @buffer.size
|
281
|
+
end
|
282
|
+
|
283
|
+
def buffer_bytesize
|
284
|
+
@pending_message_queue.bytesize + @buffer.bytesize
|
215
285
|
end
|
216
286
|
|
217
287
|
# Closes all connections to the brokers.
|
@@ -226,9 +296,7 @@ module Kafka
|
|
226
296
|
def deliver_messages_with_retries(notification)
|
227
297
|
attempt = 0
|
228
298
|
|
229
|
-
|
230
|
-
target_topics = @pending_messages.map(&:topic).uniq
|
231
|
-
@cluster.add_target_topics(target_topics)
|
299
|
+
@cluster.add_target_topics(@target_topics)
|
232
300
|
|
233
301
|
operation = ProduceOperation.new(
|
234
302
|
cluster: @cluster,
|
@@ -236,6 +304,7 @@ module Kafka
|
|
236
304
|
required_acks: @required_acks,
|
237
305
|
ack_timeout: @ack_timeout,
|
238
306
|
compression_codec: @compression_codec,
|
307
|
+
compression_threshold: @compression_threshold,
|
239
308
|
logger: @logger,
|
240
309
|
)
|
241
310
|
|
@@ -249,7 +318,7 @@ module Kafka
|
|
249
318
|
assign_partitions!
|
250
319
|
operation.execute
|
251
320
|
|
252
|
-
if
|
321
|
+
if buffer_size.zero?
|
253
322
|
break
|
254
323
|
elsif attempt <= @max_retries
|
255
324
|
@logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
|
@@ -276,10 +345,7 @@ module Kafka
|
|
276
345
|
end
|
277
346
|
|
278
347
|
def assign_partitions!
|
279
|
-
|
280
|
-
# We want to keep the message in the first-stage buffer in case there's an error.
|
281
|
-
message = @pending_messages.first
|
282
|
-
|
348
|
+
@pending_message_queue.dequeue_each do |message|
|
283
349
|
partition = message.partition
|
284
350
|
|
285
351
|
if partition.nil?
|
@@ -293,9 +359,6 @@ module Kafka
|
|
293
359
|
topic: message.topic,
|
294
360
|
partition: partition,
|
295
361
|
)
|
296
|
-
|
297
|
-
# Now it's safe to remove the message from the first-stage buffer.
|
298
|
-
@pending_messages.shift
|
299
362
|
end
|
300
363
|
rescue Kafka::Error => e
|
301
364
|
@logger.error "Failed to assign pending message to a partition: #{e}"
|
@@ -18,11 +18,15 @@ module Kafka
|
|
18
18
|
|
19
19
|
attr_reader :key, :value, :attributes, :offset
|
20
20
|
|
21
|
+
attr_reader :bytesize
|
22
|
+
|
21
23
|
def initialize(value:, key: nil, attributes: 0, offset: -1)
|
22
24
|
@key = key
|
23
25
|
@value = value
|
24
26
|
@attributes = attributes
|
25
27
|
@offset = offset
|
28
|
+
|
29
|
+
@bytesize = @key.to_s.bytesize + @value.to_s.bytesize
|
26
30
|
end
|
27
31
|
|
28
32
|
def encode(encoder)
|
@@ -3,9 +3,14 @@ module Kafka
|
|
3
3
|
class MessageSet
|
4
4
|
attr_reader :messages
|
5
5
|
|
6
|
-
def initialize(messages: [], compression_codec: nil)
|
6
|
+
def initialize(messages: [], compression_codec: nil, compression_threshold: 1)
|
7
7
|
@messages = messages
|
8
8
|
@compression_codec = compression_codec
|
9
|
+
@compression_threshold = compression_threshold
|
10
|
+
end
|
11
|
+
|
12
|
+
def size
|
13
|
+
@messages.size
|
9
14
|
end
|
10
15
|
|
11
16
|
def ==(other)
|
@@ -13,10 +18,10 @@ module Kafka
|
|
13
18
|
end
|
14
19
|
|
15
20
|
def encode(encoder)
|
16
|
-
if
|
17
|
-
encode_without_compression(encoder)
|
18
|
-
else
|
21
|
+
if compress?
|
19
22
|
encode_with_compression(encoder)
|
23
|
+
else
|
24
|
+
encode_without_compression(encoder)
|
20
25
|
end
|
21
26
|
end
|
22
27
|
|
@@ -39,6 +44,10 @@ module Kafka
|
|
39
44
|
|
40
45
|
private
|
41
46
|
|
47
|
+
def compress?
|
48
|
+
!@compression_codec.nil? && size >= @compression_threshold
|
49
|
+
end
|
50
|
+
|
42
51
|
def encode_with_compression(encoder)
|
43
52
|
codec = @compression_codec
|
44
53
|
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -179,6 +179,7 @@ files:
|
|
179
179
|
- lib/kafka/message_buffer.rb
|
180
180
|
- lib/kafka/partitioner.rb
|
181
181
|
- lib/kafka/pending_message.rb
|
182
|
+
- lib/kafka/pending_message_queue.rb
|
182
183
|
- lib/kafka/produce_operation.rb
|
183
184
|
- lib/kafka/producer.rb
|
184
185
|
- lib/kafka/protocol.rb
|