ruby-kafka 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kafka/async_producer.rb +22 -1
- data/lib/kafka/message_buffer.rb +10 -3
- data/lib/kafka/pending_message.rb +4 -0
- data/lib/kafka/pending_message_queue.rb +43 -0
- data/lib/kafka/produce_operation.rb +8 -2
- data/lib/kafka/producer.rb +84 -21
- data/lib/kafka/protocol/message.rb +4 -0
- data/lib/kafka/protocol/message_set.rb +13 -4
- data/lib/kafka/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc49e2526024ee3dd1cb52b5abfef38dbc2e37ea
|
4
|
+
data.tar.gz: a3c0111bccd3e8daf83eec647baec2fa9921fbf8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 94547594275850a3bacd22758fc0ccce8956c83f0e6e7c46dd1c4a5fb4aa5b52d6199383862cb5087a90b89ee2e0ca63b486082b0d44da2e0994f6cc76e794a7
|
7
|
+
data.tar.gz: 69d7522e0ecd82057fd4b5879273026e6669441e0803c8d28bee83213a66bdbdd13f08051d4b3bb0725c3f19ad9df4dedde7c4a6c0236fac56079b1a666c7ed7
|
data/lib/kafka/async_producer.rb
CHANGED
@@ -18,6 +18,8 @@ module Kafka
|
|
18
18
|
# By default, automatic delivery is disabled and you'll have to call
|
19
19
|
# {#deliver_messages} manually.
|
20
20
|
#
|
21
|
+
# ## Buffer Overflow and Backpressure
|
22
|
+
#
|
21
23
|
# The calling thread communicates with the background thread doing the actual
|
22
24
|
# work using a thread safe queue. While the background thread is busy delivering
|
23
25
|
# messages, new messages will be buffered in the queue. In order to avoid
|
@@ -26,6 +28,17 @@ module Kafka
|
|
26
28
|
# number of messages that is allowed to be buffered. You can configure this
|
27
29
|
# value by setting `max_queue_size`.
|
28
30
|
#
|
31
|
+
# If you produce messages faster than the background producer thread can
|
32
|
+
# deliver them to Kafka you will eventually fill the producer's buffer. Once
|
33
|
+
# this happens, the background thread will stop popping messages off the
|
34
|
+
# queue until it can successfully deliver the buffered messages. The queue
|
35
|
+
# will therefore grow in size, potentially hitting the `max_queue_size` limit.
|
36
|
+
# Once this happens, calls to {#produce} will raise a {BufferOverflow} error.
|
37
|
+
#
|
38
|
+
# Depending on your use case you may want to slow down the rate of messages
|
39
|
+
# being produced or perhaps halt your application completely until the
|
40
|
+
# producer can deliver the buffered messages and clear the message queue.
|
41
|
+
#
|
29
42
|
# ## Example
|
30
43
|
#
|
31
44
|
# producer = kafka.async_producer(
|
@@ -91,6 +104,7 @@ module Kafka
|
|
91
104
|
# @return [nil]
|
92
105
|
def produce(*args)
|
93
106
|
raise BufferOverflow if @queue.size >= @max_queue_size
|
107
|
+
|
94
108
|
@queue << [:produce, args]
|
95
109
|
|
96
110
|
nil
|
@@ -146,7 +160,7 @@ module Kafka
|
|
146
160
|
|
147
161
|
case operation
|
148
162
|
when :produce
|
149
|
-
|
163
|
+
produce(*payload)
|
150
164
|
deliver_messages if threshold_reached?
|
151
165
|
when :deliver_messages
|
152
166
|
deliver_messages
|
@@ -166,6 +180,13 @@ module Kafka
|
|
166
180
|
|
167
181
|
private
|
168
182
|
|
183
|
+
def produce(*args)
|
184
|
+
@producer.produce(*args)
|
185
|
+
rescue BufferOverflow
|
186
|
+
deliver_messages
|
187
|
+
retry
|
188
|
+
end
|
189
|
+
|
169
190
|
def deliver_messages
|
170
191
|
@producer.deliver_messages
|
171
192
|
rescue DeliveryFailed
|
data/lib/kafka/message_buffer.rb
CHANGED
@@ -6,22 +6,28 @@ module Kafka
|
|
6
6
|
class MessageBuffer
|
7
7
|
include Enumerable
|
8
8
|
|
9
|
-
attr_reader :size
|
9
|
+
attr_reader :size, :bytesize
|
10
10
|
|
11
11
|
def initialize
|
12
12
|
@buffer = {}
|
13
13
|
@size = 0
|
14
|
+
@bytesize = 0
|
14
15
|
end
|
15
16
|
|
16
17
|
def write(value:, key:, topic:, partition:)
|
17
|
-
@size += 1
|
18
18
|
message = Protocol::Message.new(key: key, value: value)
|
19
|
+
|
19
20
|
buffer_for(topic, partition) << message
|
21
|
+
|
22
|
+
@size += 1
|
23
|
+
@bytesize += message.bytesize
|
20
24
|
end
|
21
25
|
|
22
26
|
def concat(messages, topic:, partition:)
|
23
|
-
@size += messages.count
|
24
27
|
buffer_for(topic, partition).concat(messages)
|
28
|
+
|
29
|
+
@size += messages.count
|
30
|
+
@bytesize += messages.map(&:bytesize).reduce(:+)
|
25
31
|
end
|
26
32
|
|
27
33
|
def to_h
|
@@ -48,6 +54,7 @@ module Kafka
|
|
48
54
|
# @return [nil]
|
49
55
|
def clear_messages(topic:, partition:)
|
50
56
|
@size -= @buffer[topic][partition].count
|
57
|
+
@bytesize -= @buffer[topic][partition].map(&:bytesize).reduce(:+)
|
51
58
|
|
52
59
|
@buffer[topic].delete(partition)
|
53
60
|
@buffer.delete(topic) if @buffer[topic].empty?
|
@@ -2,12 +2,16 @@ module Kafka
|
|
2
2
|
class PendingMessage
|
3
3
|
attr_reader :value, :key, :topic, :partition, :partition_key
|
4
4
|
|
5
|
+
attr_reader :bytesize
|
6
|
+
|
5
7
|
def initialize(value:, key:, topic:, partition:, partition_key:)
|
6
8
|
@key = key
|
7
9
|
@value = value
|
8
10
|
@topic = topic
|
9
11
|
@partition = partition
|
10
12
|
@partition_key = partition_key
|
13
|
+
|
14
|
+
@bytesize = key.to_s.bytesize + value.to_s.bytesize
|
11
15
|
end
|
12
16
|
end
|
13
17
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Kafka
|
2
|
+
|
3
|
+
# A pending message queue holds messages that have not yet been assigned to
|
4
|
+
# a partition. It's designed to only remove messages once they've been
|
5
|
+
# successfully handled.
|
6
|
+
class PendingMessageQueue
|
7
|
+
attr_reader :size, :bytesize
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@messages = []
|
11
|
+
@size = 0
|
12
|
+
@bytesize = 0
|
13
|
+
end
|
14
|
+
|
15
|
+
def write(message)
|
16
|
+
@messages << message
|
17
|
+
@size += 1
|
18
|
+
@bytesize += message.bytesize
|
19
|
+
end
|
20
|
+
|
21
|
+
def empty?
|
22
|
+
@messages.empty?
|
23
|
+
end
|
24
|
+
|
25
|
+
# Yields each message in the queue to the provided block, removing the
|
26
|
+
# message after the block has processed it. If the block raises an
|
27
|
+
# exception, the message will be retained in the queue.
|
28
|
+
#
|
29
|
+
# @yieldparam [PendingMessage] message
|
30
|
+
# @return [nil]
|
31
|
+
def dequeue_each(&block)
|
32
|
+
until @messages.empty?
|
33
|
+
message = @messages.first
|
34
|
+
|
35
|
+
yield message
|
36
|
+
|
37
|
+
@size -= 1
|
38
|
+
@bytesize -= message.bytesize
|
39
|
+
@messages.shift
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -25,12 +25,13 @@ module Kafka
|
|
25
25
|
# * `sent_message_count` – the number of messages that were successfully sent.
|
26
26
|
#
|
27
27
|
class ProduceOperation
|
28
|
-
def initialize(cluster:, buffer:, compression_codec:, required_acks:, ack_timeout:, logger:)
|
28
|
+
def initialize(cluster:, buffer:, compression_codec:, compression_threshold:, required_acks:, ack_timeout:, logger:)
|
29
29
|
@cluster = cluster
|
30
30
|
@buffer = buffer
|
31
31
|
@required_acks = required_acks
|
32
32
|
@ack_timeout = ack_timeout
|
33
33
|
@compression_codec = compression_codec
|
34
|
+
@compression_threshold = compression_threshold
|
34
35
|
@logger = logger
|
35
36
|
end
|
36
37
|
|
@@ -77,7 +78,12 @@ module Kafka
|
|
77
78
|
messages_for_topics = {}
|
78
79
|
|
79
80
|
message_buffer.each do |topic, partition, messages|
|
80
|
-
message_set = Protocol::MessageSet.new(
|
81
|
+
message_set = Protocol::MessageSet.new(
|
82
|
+
messages: messages,
|
83
|
+
compression_codec: @compression_codec,
|
84
|
+
compression_threshold: @compression_threshold,
|
85
|
+
)
|
86
|
+
|
81
87
|
messages_for_topics[topic] ||= {}
|
82
88
|
messages_for_topics[topic][partition] = message_set
|
83
89
|
end
|
data/lib/kafka/producer.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
require "set"
|
1
2
|
require "kafka/partitioner"
|
2
3
|
require "kafka/message_buffer"
|
3
4
|
require "kafka/produce_operation"
|
5
|
+
require "kafka/pending_message_queue"
|
4
6
|
require "kafka/pending_message"
|
5
7
|
require "kafka/compression"
|
6
8
|
|
@@ -47,10 +49,42 @@ module Kafka
|
|
47
49
|
# not, we do another round of requests, this time with just the remaining messages.
|
48
50
|
# We do this for as long as `max_retries` permits.
|
49
51
|
#
|
52
|
+
# ## Compression
|
53
|
+
#
|
54
|
+
# Depending on what kind of data you produce, enabling compression may yield improved
|
55
|
+
# bandwidth and space usage. Compression in Kafka is done on entire messages sets
|
56
|
+
# rather than on individual messages. This improves the compression rate and generally
|
57
|
+
# means that compressions works better the larger your buffers get, since the message
|
58
|
+
# sets will be larger by the time they're compressed.
|
59
|
+
#
|
60
|
+
# Since many workloads have variations in throughput and distribution across partitions,
|
61
|
+
# it's possible to configure a threshold for when to enable compression by setting
|
62
|
+
# `compression_threshold`. Only if the defined number of messages are buffered for a
|
63
|
+
# partition will the messages be compressed.
|
64
|
+
#
|
65
|
+
# Compression is enabled by passing the `compression_codec` parameter with the
|
66
|
+
# name of one of the algorithms allowed by Kafka:
|
67
|
+
#
|
68
|
+
# * `:snappy` for [Snappy](http://google.github.io/snappy/) compression.
|
69
|
+
# * `:gzip` for [gzip](https://en.wikipedia.org/wiki/Gzip) compression.
|
70
|
+
#
|
71
|
+
# By default, all message sets will be compressed if you specify a compression
|
72
|
+
# codec. To increase the compression threshold, set `compression_threshold` to
|
73
|
+
# an integer value higher than one.
|
74
|
+
#
|
50
75
|
# ## Instrumentation
|
51
76
|
#
|
77
|
+
# Whenever {#produce} is called, the notification `produce_message.producer.kafka`
|
78
|
+
# will be emitted with the following payload:
|
79
|
+
#
|
80
|
+
# * `value` – the message value.
|
81
|
+
# * `key` – the message key.
|
82
|
+
# * `topic` – the topic that was produced to.
|
83
|
+
# * `buffer_size` – the buffer size after adding the message.
|
84
|
+
# * `max_buffer_size` – the maximum allowed buffer size for the producer.
|
85
|
+
#
|
52
86
|
# After {#deliver_messages} completes, the notification
|
53
|
-
# `deliver_messages.producer.kafka` will be emitted
|
87
|
+
# `deliver_messages.producer.kafka` will be emitted with the following payload:
|
54
88
|
#
|
55
89
|
# * `message_count` – the total number of messages that the producer tried to
|
56
90
|
# deliver. Note that not all messages may get delivered.
|
@@ -116,9 +150,21 @@ module Kafka
|
|
116
150
|
# @param retry_backoff [Integer] the number of seconds to wait between retries.
|
117
151
|
#
|
118
152
|
# @param max_buffer_size [Integer] the number of messages allowed in the buffer
|
119
|
-
# before new writes will raise BufferOverflow exceptions.
|
153
|
+
# before new writes will raise {BufferOverflow} exceptions.
|
154
|
+
#
|
155
|
+
# @param max_buffer_bytesize [Integer] the maximum size of the buffer in bytes.
|
156
|
+
# attempting to produce messages when the buffer reaches this size will
|
157
|
+
# result in {BufferOverflow} being raised.
|
158
|
+
#
|
159
|
+
# @param compression_codec [Symbol, nil] the name of the compression codec to
|
160
|
+
# use, or nil if no compression should be performed. Valid codecs: `:snappy`
|
161
|
+
# and `:gzip`.
|
162
|
+
#
|
163
|
+
# @param compression_threshold [Integer] the number of messages that needs to
|
164
|
+
# be in a message set before it should be compressed. Note that message sets
|
165
|
+
# are per-partition rather than per-topic or per-producer.
|
120
166
|
#
|
121
|
-
def initialize(cluster:, logger:, compression_codec: nil, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
|
167
|
+
def initialize(cluster:, logger:, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
|
122
168
|
@cluster = cluster
|
123
169
|
@logger = logger
|
124
170
|
@required_acks = required_acks
|
@@ -126,13 +172,18 @@ module Kafka
|
|
126
172
|
@max_retries = max_retries
|
127
173
|
@retry_backoff = retry_backoff
|
128
174
|
@max_buffer_size = max_buffer_size
|
175
|
+
@max_buffer_bytesize = max_buffer_bytesize
|
129
176
|
@compression_codec = Compression.find_codec(compression_codec)
|
177
|
+
@compression_threshold = compression_threshold
|
178
|
+
|
179
|
+
# The set of topics that are produced to.
|
180
|
+
@target_topics = Set.new
|
130
181
|
|
131
182
|
# A buffer organized by topic/partition.
|
132
183
|
@buffer = MessageBuffer.new
|
133
184
|
|
134
185
|
# Messages added by `#produce` but not yet assigned a partition.
|
135
|
-
@
|
186
|
+
@pending_message_queue = PendingMessageQueue.new
|
136
187
|
end
|
137
188
|
|
138
189
|
# Produces a message to the specified topic. Note that messages are buffered in
|
@@ -165,11 +216,7 @@ module Kafka
|
|
165
216
|
# @raise [BufferOverflow] if the maximum buffer size has been reached.
|
166
217
|
# @return [nil]
|
167
218
|
def produce(value, key: nil, topic:, partition: nil, partition_key: nil)
|
168
|
-
|
169
|
-
raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
|
170
|
-
end
|
171
|
-
|
172
|
-
@pending_messages << PendingMessage.new(
|
219
|
+
message = PendingMessage.new(
|
173
220
|
value: value,
|
174
221
|
key: key,
|
175
222
|
topic: topic,
|
@@ -177,6 +224,25 @@ module Kafka
|
|
177
224
|
partition_key: partition_key,
|
178
225
|
)
|
179
226
|
|
227
|
+
if buffer_size >= @max_buffer_size
|
228
|
+
raise BufferOverflow, "Max buffer size (#{@max_buffer_size} messages) exceeded"
|
229
|
+
end
|
230
|
+
|
231
|
+
if buffer_bytesize + message.bytesize >= @max_buffer_bytesize
|
232
|
+
raise BufferOverflow, "Max buffer bytesize (#{@max_buffer_bytesize} bytes) exceeded"
|
233
|
+
end
|
234
|
+
|
235
|
+
@target_topics.add(topic)
|
236
|
+
@pending_message_queue.write(message)
|
237
|
+
|
238
|
+
Instrumentation.instrument("produce_message.producer.kafka", {
|
239
|
+
value: value,
|
240
|
+
key: key,
|
241
|
+
topic: topic,
|
242
|
+
buffer_size: buffer_size,
|
243
|
+
max_buffer_size: @max_buffer_size,
|
244
|
+
})
|
245
|
+
|
180
246
|
nil
|
181
247
|
end
|
182
248
|
|
@@ -211,7 +277,11 @@ module Kafka
|
|
211
277
|
#
|
212
278
|
# @return [Integer] buffer size.
|
213
279
|
def buffer_size
|
214
|
-
@
|
280
|
+
@pending_message_queue.size + @buffer.size
|
281
|
+
end
|
282
|
+
|
283
|
+
def buffer_bytesize
|
284
|
+
@pending_message_queue.bytesize + @buffer.bytesize
|
215
285
|
end
|
216
286
|
|
217
287
|
# Closes all connections to the brokers.
|
@@ -226,9 +296,7 @@ module Kafka
|
|
226
296
|
def deliver_messages_with_retries(notification)
|
227
297
|
attempt = 0
|
228
298
|
|
229
|
-
|
230
|
-
target_topics = @pending_messages.map(&:topic).uniq
|
231
|
-
@cluster.add_target_topics(target_topics)
|
299
|
+
@cluster.add_target_topics(@target_topics)
|
232
300
|
|
233
301
|
operation = ProduceOperation.new(
|
234
302
|
cluster: @cluster,
|
@@ -236,6 +304,7 @@ module Kafka
|
|
236
304
|
required_acks: @required_acks,
|
237
305
|
ack_timeout: @ack_timeout,
|
238
306
|
compression_codec: @compression_codec,
|
307
|
+
compression_threshold: @compression_threshold,
|
239
308
|
logger: @logger,
|
240
309
|
)
|
241
310
|
|
@@ -249,7 +318,7 @@ module Kafka
|
|
249
318
|
assign_partitions!
|
250
319
|
operation.execute
|
251
320
|
|
252
|
-
if
|
321
|
+
if buffer_size.zero?
|
253
322
|
break
|
254
323
|
elsif attempt <= @max_retries
|
255
324
|
@logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
|
@@ -276,10 +345,7 @@ module Kafka
|
|
276
345
|
end
|
277
346
|
|
278
347
|
def assign_partitions!
|
279
|
-
|
280
|
-
# We want to keep the message in the first-stage buffer in case there's an error.
|
281
|
-
message = @pending_messages.first
|
282
|
-
|
348
|
+
@pending_message_queue.dequeue_each do |message|
|
283
349
|
partition = message.partition
|
284
350
|
|
285
351
|
if partition.nil?
|
@@ -293,9 +359,6 @@ module Kafka
|
|
293
359
|
topic: message.topic,
|
294
360
|
partition: partition,
|
295
361
|
)
|
296
|
-
|
297
|
-
# Now it's safe to remove the message from the first-stage buffer.
|
298
|
-
@pending_messages.shift
|
299
362
|
end
|
300
363
|
rescue Kafka::Error => e
|
301
364
|
@logger.error "Failed to assign pending message to a partition: #{e}"
|
@@ -18,11 +18,15 @@ module Kafka
|
|
18
18
|
|
19
19
|
attr_reader :key, :value, :attributes, :offset
|
20
20
|
|
21
|
+
attr_reader :bytesize
|
22
|
+
|
21
23
|
def initialize(value:, key: nil, attributes: 0, offset: -1)
|
22
24
|
@key = key
|
23
25
|
@value = value
|
24
26
|
@attributes = attributes
|
25
27
|
@offset = offset
|
28
|
+
|
29
|
+
@bytesize = @key.to_s.bytesize + @value.to_s.bytesize
|
26
30
|
end
|
27
31
|
|
28
32
|
def encode(encoder)
|
@@ -3,9 +3,14 @@ module Kafka
|
|
3
3
|
class MessageSet
|
4
4
|
attr_reader :messages
|
5
5
|
|
6
|
-
def initialize(messages: [], compression_codec: nil)
|
6
|
+
def initialize(messages: [], compression_codec: nil, compression_threshold: 1)
|
7
7
|
@messages = messages
|
8
8
|
@compression_codec = compression_codec
|
9
|
+
@compression_threshold = compression_threshold
|
10
|
+
end
|
11
|
+
|
12
|
+
def size
|
13
|
+
@messages.size
|
9
14
|
end
|
10
15
|
|
11
16
|
def ==(other)
|
@@ -13,10 +18,10 @@ module Kafka
|
|
13
18
|
end
|
14
19
|
|
15
20
|
def encode(encoder)
|
16
|
-
if
|
17
|
-
encode_without_compression(encoder)
|
18
|
-
else
|
21
|
+
if compress?
|
19
22
|
encode_with_compression(encoder)
|
23
|
+
else
|
24
|
+
encode_without_compression(encoder)
|
20
25
|
end
|
21
26
|
end
|
22
27
|
|
@@ -39,6 +44,10 @@ module Kafka
|
|
39
44
|
|
40
45
|
private
|
41
46
|
|
47
|
+
def compress?
|
48
|
+
!@compression_codec.nil? && size >= @compression_threshold
|
49
|
+
end
|
50
|
+
|
42
51
|
def encode_with_compression(encoder)
|
43
52
|
codec = @compression_codec
|
44
53
|
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -179,6 +179,7 @@ files:
|
|
179
179
|
- lib/kafka/message_buffer.rb
|
180
180
|
- lib/kafka/partitioner.rb
|
181
181
|
- lib/kafka/pending_message.rb
|
182
|
+
- lib/kafka/pending_message_queue.rb
|
182
183
|
- lib/kafka/produce_operation.rb
|
183
184
|
- lib/kafka/producer.rb
|
184
185
|
- lib/kafka/protocol.rb
|