ruby-kafka 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 50a1c28cf71285d37c57e3dbe7a5d156f891576c
4
- data.tar.gz: f8a139dc4061ec8a771f86c0a5ca280692ddd5cc
3
+ metadata.gz: cc49e2526024ee3dd1cb52b5abfef38dbc2e37ea
4
+ data.tar.gz: a3c0111bccd3e8daf83eec647baec2fa9921fbf8
5
5
  SHA512:
6
- metadata.gz: 6d3245db50893aba63b50b600903dc0baf345017c940dcaa36b0e2fbe0f50fe231bc15f9af2dc083d4969fd63ba0493781570811a3ba4ba0e7df21b06e0fd993
7
- data.tar.gz: d5cddd687cc85f02b0826e95205114e2f64cffd9dad48ef692e717dbb3f7cac41ce2b0afd71ee271e8590e27b9eac42d8f67f247c71fe760dec2915191361273
6
+ metadata.gz: 94547594275850a3bacd22758fc0ccce8956c83f0e6e7c46dd1c4a5fb4aa5b52d6199383862cb5087a90b89ee2e0ca63b486082b0d44da2e0994f6cc76e794a7
7
+ data.tar.gz: 69d7522e0ecd82057fd4b5879273026e6669441e0803c8d28bee83213a66bdbdd13f08051d4b3bb0725c3f19ad9df4dedde7c4a6c0236fac56079b1a666c7ed7
@@ -18,6 +18,8 @@ module Kafka
18
18
  # By default, automatic delivery is disabled and you'll have to call
19
19
  # {#deliver_messages} manually.
20
20
  #
21
+ # ## Buffer Overflow and Backpressure
22
+ #
21
23
  # The calling thread communicates with the background thread doing the actual
22
24
  # work using a thread safe queue. While the background thread is busy delivering
23
25
  # messages, new messages will be buffered in the queue. In order to avoid
@@ -26,6 +28,17 @@ module Kafka
26
28
  # number of messages that is allowed to be buffered. You can configure this
27
29
  # value by setting `max_queue_size`.
28
30
  #
31
+ # If you produce messages faster than the background producer thread can
32
+ # deliver them to Kafka you will eventually fill the producer's buffer. Once
33
+ # this happens, the background thread will stop popping messages off the
34
+ # queue until it can successfully deliver the buffered messages. The queue
35
+ # will therefore grow in size, potentially hitting the `max_queue_size` limit.
36
+ # Once this happens, calls to {#produce} will raise a {BufferOverflow} error.
37
+ #
38
+ # Depending on your use case you may want to slow down the rate of messages
39
+ # being produced or perhaps halt your application completely until the
40
+ # producer can deliver the buffered messages and clear the message queue.
41
+ #
29
42
  # ## Example
30
43
  #
31
44
  # producer = kafka.async_producer(
@@ -91,6 +104,7 @@ module Kafka
91
104
  # @return [nil]
92
105
  def produce(*args)
93
106
  raise BufferOverflow if @queue.size >= @max_queue_size
107
+
94
108
  @queue << [:produce, args]
95
109
 
96
110
  nil
@@ -146,7 +160,7 @@ module Kafka
146
160
 
147
161
  case operation
148
162
  when :produce
149
- @producer.produce(*payload)
163
+ produce(*payload)
150
164
  deliver_messages if threshold_reached?
151
165
  when :deliver_messages
152
166
  deliver_messages
@@ -166,6 +180,13 @@ module Kafka
166
180
 
167
181
  private
168
182
 
183
+ def produce(*args)
184
+ @producer.produce(*args)
185
+ rescue BufferOverflow
186
+ deliver_messages
187
+ retry
188
+ end
189
+
169
190
  def deliver_messages
170
191
  @producer.deliver_messages
171
192
  rescue DeliveryFailed
@@ -6,22 +6,28 @@ module Kafka
6
6
  class MessageBuffer
7
7
  include Enumerable
8
8
 
9
- attr_reader :size
9
+ attr_reader :size, :bytesize
10
10
 
11
11
  def initialize
12
12
  @buffer = {}
13
13
  @size = 0
14
+ @bytesize = 0
14
15
  end
15
16
 
16
17
  def write(value:, key:, topic:, partition:)
17
- @size += 1
18
18
  message = Protocol::Message.new(key: key, value: value)
19
+
19
20
  buffer_for(topic, partition) << message
21
+
22
+ @size += 1
23
+ @bytesize += message.bytesize
20
24
  end
21
25
 
22
26
  def concat(messages, topic:, partition:)
23
- @size += messages.count
24
27
  buffer_for(topic, partition).concat(messages)
28
+
29
+ @size += messages.count
30
+ @bytesize += messages.map(&:bytesize).reduce(:+)
25
31
  end
26
32
 
27
33
  def to_h
@@ -48,6 +54,7 @@ module Kafka
48
54
  # @return [nil]
49
55
  def clear_messages(topic:, partition:)
50
56
  @size -= @buffer[topic][partition].count
57
+ @bytesize -= @buffer[topic][partition].map(&:bytesize).reduce(:+)
51
58
 
52
59
  @buffer[topic].delete(partition)
53
60
  @buffer.delete(topic) if @buffer[topic].empty?
@@ -2,12 +2,16 @@ module Kafka
2
2
  class PendingMessage
3
3
  attr_reader :value, :key, :topic, :partition, :partition_key
4
4
 
5
+ attr_reader :bytesize
6
+
5
7
  def initialize(value:, key:, topic:, partition:, partition_key:)
6
8
  @key = key
7
9
  @value = value
8
10
  @topic = topic
9
11
  @partition = partition
10
12
  @partition_key = partition_key
13
+
14
+ @bytesize = key.to_s.bytesize + value.to_s.bytesize
11
15
  end
12
16
  end
13
17
  end
@@ -0,0 +1,43 @@
1
+ module Kafka
2
+
3
+ # A pending message queue holds messages that have not yet been assigned to
4
+ # a partition. It's designed to only remove messages once they've been
5
+ # successfully handled.
6
+ class PendingMessageQueue
7
+ attr_reader :size, :bytesize
8
+
9
+ def initialize
10
+ @messages = []
11
+ @size = 0
12
+ @bytesize = 0
13
+ end
14
+
15
+ def write(message)
16
+ @messages << message
17
+ @size += 1
18
+ @bytesize += message.bytesize
19
+ end
20
+
21
+ def empty?
22
+ @messages.empty?
23
+ end
24
+
25
+ # Yields each message in the queue to the provided block, removing the
26
+ # message after the block has processed it. If the block raises an
27
+ # exception, the message will be retained in the queue.
28
+ #
29
+ # @yieldparam [PendingMessage] message
30
+ # @return [nil]
31
+ def dequeue_each(&block)
32
+ until @messages.empty?
33
+ message = @messages.first
34
+
35
+ yield message
36
+
37
+ @size -= 1
38
+ @bytesize -= message.bytesize
39
+ @messages.shift
40
+ end
41
+ end
42
+ end
43
+ end
@@ -25,12 +25,13 @@ module Kafka
25
25
  # * `sent_message_count` – the number of messages that were successfully sent.
26
26
  #
27
27
  class ProduceOperation
28
- def initialize(cluster:, buffer:, compression_codec:, required_acks:, ack_timeout:, logger:)
28
+ def initialize(cluster:, buffer:, compression_codec:, compression_threshold:, required_acks:, ack_timeout:, logger:)
29
29
  @cluster = cluster
30
30
  @buffer = buffer
31
31
  @required_acks = required_acks
32
32
  @ack_timeout = ack_timeout
33
33
  @compression_codec = compression_codec
34
+ @compression_threshold = compression_threshold
34
35
  @logger = logger
35
36
  end
36
37
 
@@ -77,7 +78,12 @@ module Kafka
77
78
  messages_for_topics = {}
78
79
 
79
80
  message_buffer.each do |topic, partition, messages|
80
- message_set = Protocol::MessageSet.new(messages: messages, compression_codec: @compression_codec)
81
+ message_set = Protocol::MessageSet.new(
82
+ messages: messages,
83
+ compression_codec: @compression_codec,
84
+ compression_threshold: @compression_threshold,
85
+ )
86
+
81
87
  messages_for_topics[topic] ||= {}
82
88
  messages_for_topics[topic][partition] = message_set
83
89
  end
@@ -1,6 +1,8 @@
1
+ require "set"
1
2
  require "kafka/partitioner"
2
3
  require "kafka/message_buffer"
3
4
  require "kafka/produce_operation"
5
+ require "kafka/pending_message_queue"
4
6
  require "kafka/pending_message"
5
7
  require "kafka/compression"
6
8
 
@@ -47,10 +49,42 @@ module Kafka
47
49
  # not, we do another round of requests, this time with just the remaining messages.
48
50
  # We do this for as long as `max_retries` permits.
49
51
  #
52
+ # ## Compression
53
+ #
54
+ # Depending on what kind of data you produce, enabling compression may yield improved
55
+ # bandwidth and space usage. Compression in Kafka is done on entire messages sets
56
+ # rather than on individual messages. This improves the compression rate and generally
57
+ # means that compressions works better the larger your buffers get, since the message
58
+ # sets will be larger by the time they're compressed.
59
+ #
60
+ # Since many workloads have variations in throughput and distribution across partitions,
61
+ # it's possible to configure a threshold for when to enable compression by setting
62
+ # `compression_threshold`. Only if the defined number of messages are buffered for a
63
+ # partition will the messages be compressed.
64
+ #
65
+ # Compression is enabled by passing the `compression_codec` parameter with the
66
+ # name of one of the algorithms allowed by Kafka:
67
+ #
68
+ # * `:snappy` for [Snappy](http://google.github.io/snappy/) compression.
69
+ # * `:gzip` for [gzip](https://en.wikipedia.org/wiki/Gzip) compression.
70
+ #
71
+ # By default, all message sets will be compressed if you specify a compression
72
+ # codec. To increase the compression threshold, set `compression_threshold` to
73
+ # an integer value higher than one.
74
+ #
50
75
  # ## Instrumentation
51
76
  #
77
+ # Whenever {#produce} is called, the notification `produce_message.producer.kafka`
78
+ # will be emitted with the following payload:
79
+ #
80
+ # * `value` – the message value.
81
+ # * `key` – the message key.
82
+ # * `topic` – the topic that was produced to.
83
+ # * `buffer_size` – the buffer size after adding the message.
84
+ # * `max_buffer_size` – the maximum allowed buffer size for the producer.
85
+ #
52
86
  # After {#deliver_messages} completes, the notification
53
- # `deliver_messages.producer.kafka` will be emitted.
87
+ # `deliver_messages.producer.kafka` will be emitted with the following payload:
54
88
  #
55
89
  # * `message_count` – the total number of messages that the producer tried to
56
90
  # deliver. Note that not all messages may get delivered.
@@ -116,9 +150,21 @@ module Kafka
116
150
  # @param retry_backoff [Integer] the number of seconds to wait between retries.
117
151
  #
118
152
  # @param max_buffer_size [Integer] the number of messages allowed in the buffer
119
- # before new writes will raise BufferOverflow exceptions.
153
+ # before new writes will raise {BufferOverflow} exceptions.
154
+ #
155
+ # @param max_buffer_bytesize [Integer] the maximum size of the buffer in bytes.
156
+ # attempting to produce messages when the buffer reaches this size will
157
+ # result in {BufferOverflow} being raised.
158
+ #
159
+ # @param compression_codec [Symbol, nil] the name of the compression codec to
160
+ # use, or nil if no compression should be performed. Valid codecs: `:snappy`
161
+ # and `:gzip`.
162
+ #
163
+ # @param compression_threshold [Integer] the number of messages that needs to
164
+ # be in a message set before it should be compressed. Note that message sets
165
+ # are per-partition rather than per-topic or per-producer.
120
166
  #
121
- def initialize(cluster:, logger:, compression_codec: nil, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
167
+ def initialize(cluster:, logger:, compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
122
168
  @cluster = cluster
123
169
  @logger = logger
124
170
  @required_acks = required_acks
@@ -126,13 +172,18 @@ module Kafka
126
172
  @max_retries = max_retries
127
173
  @retry_backoff = retry_backoff
128
174
  @max_buffer_size = max_buffer_size
175
+ @max_buffer_bytesize = max_buffer_bytesize
129
176
  @compression_codec = Compression.find_codec(compression_codec)
177
+ @compression_threshold = compression_threshold
178
+
179
+ # The set of topics that are produced to.
180
+ @target_topics = Set.new
130
181
 
131
182
  # A buffer organized by topic/partition.
132
183
  @buffer = MessageBuffer.new
133
184
 
134
185
  # Messages added by `#produce` but not yet assigned a partition.
135
- @pending_messages = []
186
+ @pending_message_queue = PendingMessageQueue.new
136
187
  end
137
188
 
138
189
  # Produces a message to the specified topic. Note that messages are buffered in
@@ -165,11 +216,7 @@ module Kafka
165
216
  # @raise [BufferOverflow] if the maximum buffer size has been reached.
166
217
  # @return [nil]
167
218
  def produce(value, key: nil, topic:, partition: nil, partition_key: nil)
168
- unless buffer_size < @max_buffer_size
169
- raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
170
- end
171
-
172
- @pending_messages << PendingMessage.new(
219
+ message = PendingMessage.new(
173
220
  value: value,
174
221
  key: key,
175
222
  topic: topic,
@@ -177,6 +224,25 @@ module Kafka
177
224
  partition_key: partition_key,
178
225
  )
179
226
 
227
+ if buffer_size >= @max_buffer_size
228
+ raise BufferOverflow, "Max buffer size (#{@max_buffer_size} messages) exceeded"
229
+ end
230
+
231
+ if buffer_bytesize + message.bytesize >= @max_buffer_bytesize
232
+ raise BufferOverflow, "Max buffer bytesize (#{@max_buffer_bytesize} bytes) exceeded"
233
+ end
234
+
235
+ @target_topics.add(topic)
236
+ @pending_message_queue.write(message)
237
+
238
+ Instrumentation.instrument("produce_message.producer.kafka", {
239
+ value: value,
240
+ key: key,
241
+ topic: topic,
242
+ buffer_size: buffer_size,
243
+ max_buffer_size: @max_buffer_size,
244
+ })
245
+
180
246
  nil
181
247
  end
182
248
 
@@ -211,7 +277,11 @@ module Kafka
211
277
  #
212
278
  # @return [Integer] buffer size.
213
279
  def buffer_size
214
- @pending_messages.size + @buffer.size
280
+ @pending_message_queue.size + @buffer.size
281
+ end
282
+
283
+ def buffer_bytesize
284
+ @pending_message_queue.bytesize + @buffer.bytesize
215
285
  end
216
286
 
217
287
  # Closes all connections to the brokers.
@@ -226,9 +296,7 @@ module Kafka
226
296
  def deliver_messages_with_retries(notification)
227
297
  attempt = 0
228
298
 
229
- # Make sure we get metadata for this topic.
230
- target_topics = @pending_messages.map(&:topic).uniq
231
- @cluster.add_target_topics(target_topics)
299
+ @cluster.add_target_topics(@target_topics)
232
300
 
233
301
  operation = ProduceOperation.new(
234
302
  cluster: @cluster,
@@ -236,6 +304,7 @@ module Kafka
236
304
  required_acks: @required_acks,
237
305
  ack_timeout: @ack_timeout,
238
306
  compression_codec: @compression_codec,
307
+ compression_threshold: @compression_threshold,
239
308
  logger: @logger,
240
309
  )
241
310
 
@@ -249,7 +318,7 @@ module Kafka
249
318
  assign_partitions!
250
319
  operation.execute
251
320
 
252
- if @pending_messages.empty? && @buffer.empty?
321
+ if buffer_size.zero?
253
322
  break
254
323
  elsif attempt <= @max_retries
255
324
  @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
@@ -276,10 +345,7 @@ module Kafka
276
345
  end
277
346
 
278
347
  def assign_partitions!
279
- until @pending_messages.empty?
280
- # We want to keep the message in the first-stage buffer in case there's an error.
281
- message = @pending_messages.first
282
-
348
+ @pending_message_queue.dequeue_each do |message|
283
349
  partition = message.partition
284
350
 
285
351
  if partition.nil?
@@ -293,9 +359,6 @@ module Kafka
293
359
  topic: message.topic,
294
360
  partition: partition,
295
361
  )
296
-
297
- # Now it's safe to remove the message from the first-stage buffer.
298
- @pending_messages.shift
299
362
  end
300
363
  rescue Kafka::Error => e
301
364
  @logger.error "Failed to assign pending message to a partition: #{e}"
@@ -18,11 +18,15 @@ module Kafka
18
18
 
19
19
  attr_reader :key, :value, :attributes, :offset
20
20
 
21
+ attr_reader :bytesize
22
+
21
23
  def initialize(value:, key: nil, attributes: 0, offset: -1)
22
24
  @key = key
23
25
  @value = value
24
26
  @attributes = attributes
25
27
  @offset = offset
28
+
29
+ @bytesize = @key.to_s.bytesize + @value.to_s.bytesize
26
30
  end
27
31
 
28
32
  def encode(encoder)
@@ -3,9 +3,14 @@ module Kafka
3
3
  class MessageSet
4
4
  attr_reader :messages
5
5
 
6
- def initialize(messages: [], compression_codec: nil)
6
+ def initialize(messages: [], compression_codec: nil, compression_threshold: 1)
7
7
  @messages = messages
8
8
  @compression_codec = compression_codec
9
+ @compression_threshold = compression_threshold
10
+ end
11
+
12
+ def size
13
+ @messages.size
9
14
  end
10
15
 
11
16
  def ==(other)
@@ -13,10 +18,10 @@ module Kafka
13
18
  end
14
19
 
15
20
  def encode(encoder)
16
- if @compression_codec.nil?
17
- encode_without_compression(encoder)
18
- else
21
+ if compress?
19
22
  encode_with_compression(encoder)
23
+ else
24
+ encode_without_compression(encoder)
20
25
  end
21
26
  end
22
27
 
@@ -39,6 +44,10 @@ module Kafka
39
44
 
40
45
  private
41
46
 
47
+ def compress?
48
+ !@compression_codec.nil? && size >= @compression_threshold
49
+ end
50
+
42
51
  def encode_with_compression(encoder)
43
52
  codec = @compression_codec
44
53
 
data/lib/kafka/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kafka
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-22 00:00:00.000000000 Z
11
+ date: 2016-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -179,6 +179,7 @@ files:
179
179
  - lib/kafka/message_buffer.rb
180
180
  - lib/kafka/partitioner.rb
181
181
  - lib/kafka/pending_message.rb
182
+ - lib/kafka/pending_message_queue.rb
182
183
  - lib/kafka/produce_operation.rb
183
184
  - lib/kafka/producer.rb
184
185
  - lib/kafka/protocol.rb