ruby-kafka 0.3.12 → 0.3.13.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +20 -2
- data/lib/kafka/async_producer.rb +11 -1
- data/lib/kafka/client.rb +1 -0
- data/lib/kafka/consumer.rb +15 -1
- data/lib/kafka/consumer_group.rb +1 -1
- data/lib/kafka/datadog.rb +22 -0
- data/lib/kafka/offset_manager.rb +19 -2
- data/lib/kafka/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6c48d1b7996a28caddd510f0daf032fdfca84fe
|
4
|
+
data.tar.gz: 302d2211024a2fdb904947c97cb0c258dc8583eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5524a95270673acb7e7e0ba2d6907eb774c715fb467925a052a633c1ab0e76de0fe5a16806ef3a7b95bbc29d54bd832a2b0a43d496753f4b5f1a162e4f91dde
|
7
|
+
data.tar.gz: 34f6417d7d0fdb987f1c6fdf69595a9de413ad010ca391f1d5d5fa3632fe781fe9514ae3568d20c2ed120887e288bb42848ebb22ab42125215d50ca16c0523da
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,11 @@ Changes and additions to the library will be listed here.
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## v0.3.13.beta1
|
8
|
+
|
9
|
+
- Minimize the number of times messages are reprocessed after a consumer group resync.
|
10
|
+
- Improve instrumentation of the async producer.
|
11
|
+
|
7
12
|
## v0.3.12
|
8
13
|
|
9
14
|
- Fix a bug in the consumer.
|
data/README.md
CHANGED
@@ -26,8 +26,9 @@ Although parts of this library work with Kafka 0.8 – specifically, the Produce
|
|
26
26
|
1. [Consumer Groups](#consumer-groups)
|
27
27
|
2. [Consumer Checkpointing](#consumer-checkpointing)
|
28
28
|
3. [Topic Subscriptions](#topic-subscriptions)
|
29
|
-
4. [
|
30
|
-
5. [
|
29
|
+
4. [Shutting Down a Consumer](#shutting-down-a-consumer)
|
30
|
+
5. [Consuming Messages in Batches](#consuming-messages-in-batches)
|
31
|
+
6. [Balancing Throughput and Latency](#balancing-throughput-and-latency)
|
31
32
|
4. [Thread Safety](#thread-safety)
|
32
33
|
5. [Logging](#logging)
|
33
34
|
6. [Instrumentation](#instrumentation)
|
@@ -526,6 +527,23 @@ consumer.subscribe("notifications", start_from_beginning: false)
|
|
526
527
|
Once the consumer group has checkpointed its progress in the topic's partitions, the consumers will always start from the checkpointed offsets, regardless of `start_from_beginning`. As such, this setting only applies when the consumer initially starts consuming from a topic.
|
527
528
|
|
528
529
|
|
530
|
+
#### Shutting Down a Consumer
|
531
|
+
|
532
|
+
In order to shut down a running consumer process cleanly, call `#stop` on it. A common pattern is to trap a process signal and initiate the shutdown from there:
|
533
|
+
|
534
|
+
```ruby
|
535
|
+
consumer = kafka.consumer(...)
|
536
|
+
|
537
|
+
# The consumer can be stopped from the command line by executing
|
538
|
+
# `kill -s QUIT <process-id>`.
|
539
|
+
trap("QUIT") { consumer.stop }
|
540
|
+
|
541
|
+
consumer.each_message do |message|
|
542
|
+
...
|
543
|
+
end
|
544
|
+
```
|
545
|
+
|
546
|
+
|
529
547
|
#### Consuming Messages in Batches
|
530
548
|
|
531
549
|
Sometimes it is easier to deal with messages in batches rather than individually. A _batch_ is a sequence of one or more Kafka messages that all belong to the same topic and partition. One common reason to want to use batches is when some external system has a batch or transactional API.
|
data/lib/kafka/async_producer.rb
CHANGED
@@ -69,7 +69,7 @@ module Kafka
|
|
69
69
|
# @param delivery_interval [Integer] if greater than zero, the number of
|
70
70
|
# seconds between automatic message deliveries.
|
71
71
|
#
|
72
|
-
def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0, instrumenter:)
|
72
|
+
def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0, instrumenter:, logger:)
|
73
73
|
raise ArgumentError unless max_queue_size > 0
|
74
74
|
raise ArgumentError unless delivery_threshold >= 0
|
75
75
|
raise ArgumentError unless delivery_interval >= 0
|
@@ -77,6 +77,7 @@ module Kafka
|
|
77
77
|
@queue = Queue.new
|
78
78
|
@max_queue_size = max_queue_size
|
79
79
|
@instrumenter = instrumenter
|
80
|
+
@logger = logger
|
80
81
|
|
81
82
|
@worker = Worker.new(
|
82
83
|
queue: @queue,
|
@@ -102,6 +103,12 @@ module Kafka
|
|
102
103
|
args = [value, **options.merge(topic: topic)]
|
103
104
|
@queue << [:produce, args]
|
104
105
|
|
106
|
+
@instrumenter.instrument("enqueue_message.async_producer", {
|
107
|
+
topic: topic,
|
108
|
+
queue_size: @queue.size,
|
109
|
+
max_queue_size: @max_queue_size,
|
110
|
+
})
|
111
|
+
|
105
112
|
nil
|
106
113
|
end
|
107
114
|
|
@@ -122,6 +129,7 @@ module Kafka
|
|
122
129
|
# @see Kafka::Producer#shutdown
|
123
130
|
# @return [nil]
|
124
131
|
def shutdown
|
132
|
+
@timer_thread && @timer_thread.exit
|
125
133
|
@queue << [:shutdown, nil]
|
126
134
|
@worker_thread && @worker_thread.join
|
127
135
|
|
@@ -149,6 +157,8 @@ module Kafka
|
|
149
157
|
topic: topic,
|
150
158
|
})
|
151
159
|
|
160
|
+
@logger.error "Buffer overflow: failed to enqueue message for #{topic}"
|
161
|
+
|
152
162
|
raise BufferOverflow
|
153
163
|
end
|
154
164
|
|
data/lib/kafka/client.rb
CHANGED
data/lib/kafka/consumer.rb
CHANGED
@@ -211,8 +211,22 @@ module Kafka
|
|
211
211
|
end
|
212
212
|
|
213
213
|
def join_group
|
214
|
-
@
|
214
|
+
old_generation_id = @group.generation_id
|
215
|
+
|
215
216
|
@group.join
|
217
|
+
|
218
|
+
if old_generation_id && @group.generation_id != old_generation_id + 1
|
219
|
+
# We've been out of the group for at least an entire generation, no
|
220
|
+
# sense in trying to hold on to offset data
|
221
|
+
@offset_manager.clear_offsets
|
222
|
+
else
|
223
|
+
# After rejoining the group we may have been assigned a new set of
|
224
|
+
# partitions. Keeping the old offset commits around forever would risk
|
225
|
+
# having the consumer go back and reprocess messages if it's assigned
|
226
|
+
# a partition it used to be assigned to way back. For that reason, we
|
227
|
+
# only keep commits for the partitions that we're still assigned.
|
228
|
+
@offset_manager.clear_offsets_excluding(@group.assigned_partitions)
|
229
|
+
end
|
216
230
|
end
|
217
231
|
|
218
232
|
def fetch_batches(min_bytes:, max_wait_time:)
|
data/lib/kafka/consumer_group.rb
CHANGED
data/lib/kafka/datadog.rb
CHANGED
@@ -206,5 +206,27 @@ module Kafka
|
|
206
206
|
|
207
207
|
attach_to "producer.kafka"
|
208
208
|
end
|
209
|
+
|
210
|
+
class AsyncProducerSubscriber < StatsdSubscriber
|
211
|
+
def enqueue_message(event)
|
212
|
+
client = event.payload.fetch(:client_id)
|
213
|
+
topic = event.payload.fetch(:topic)
|
214
|
+
queue_size = event.payload.fetch(:queue_size)
|
215
|
+
max_queue_size = event.payload.fetch(:max_queue_size)
|
216
|
+
queue_fill_ratio = queue_size.to_f / max_queue_size.to_f
|
217
|
+
|
218
|
+
tags = {
|
219
|
+
client: client,
|
220
|
+
}
|
221
|
+
|
222
|
+
# This gets us the avg/max queue size per producer.
|
223
|
+
histogram("producer.queue.size", queue_size, tags: tags)
|
224
|
+
|
225
|
+
# This gets us the avg/max queue fill ratio per producer.
|
226
|
+
histogram("producer.queue.fill_ratio", queue_fill_ratio, tags: tags)
|
227
|
+
end
|
228
|
+
|
229
|
+
attach_to "async_producer.kafka"
|
230
|
+
end
|
209
231
|
end
|
210
232
|
end
|
data/lib/kafka/offset_manager.rb
CHANGED
@@ -59,14 +59,27 @@ module Kafka
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def commit_offsets_if_necessary
|
62
|
-
if
|
62
|
+
if commit_timeout_reached? || commit_threshold_reached?
|
63
63
|
commit_offsets
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
67
67
|
def clear_offsets
|
68
|
-
@uncommitted_offsets = 0
|
69
68
|
@processed_offsets.clear
|
69
|
+
|
70
|
+
# Clear the cached commits from the brokers.
|
71
|
+
@committed_offsets = nil
|
72
|
+
end
|
73
|
+
|
74
|
+
def clear_offsets_excluding(excluded)
|
75
|
+
# Clear all offsets that aren't in `excluded`.
|
76
|
+
@processed_offsets.each do |topic, partitions|
|
77
|
+
partitions.keep_if do |partition, _|
|
78
|
+
excluded.fetch(topic, []).include?(partition)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Clear the cached commits from the brokers.
|
70
83
|
@committed_offsets = nil
|
71
84
|
end
|
72
85
|
|
@@ -81,6 +94,10 @@ module Kafka
|
|
81
94
|
@committed_offsets.offset_for(topic, partition)
|
82
95
|
end
|
83
96
|
|
97
|
+
def commit_timeout_reached?
|
98
|
+
@commit_interval != 0 && seconds_since_last_commit >= @commit_interval
|
99
|
+
end
|
100
|
+
|
84
101
|
def commit_threshold_reached?
|
85
102
|
@commit_threshold != 0 && @uncommitted_offsets >= @commit_threshold
|
86
103
|
end
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.13.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -303,9 +303,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
303
303
|
version: 2.1.0
|
304
304
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
305
305
|
requirements:
|
306
|
-
- - "
|
306
|
+
- - ">"
|
307
307
|
- !ruby/object:Gem::Version
|
308
|
-
version:
|
308
|
+
version: 1.3.1
|
309
309
|
requirements: []
|
310
310
|
rubyforge_project:
|
311
311
|
rubygems_version: 2.4.5.1
|