ruby-kafka 0.3.12 → 0.3.13.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +20 -2
- data/lib/kafka/async_producer.rb +11 -1
- data/lib/kafka/client.rb +1 -0
- data/lib/kafka/consumer.rb +15 -1
- data/lib/kafka/consumer_group.rb +1 -1
- data/lib/kafka/datadog.rb +22 -0
- data/lib/kafka/offset_manager.rb +19 -2
- data/lib/kafka/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6c48d1b7996a28caddd510f0daf032fdfca84fe
|
4
|
+
data.tar.gz: 302d2211024a2fdb904947c97cb0c258dc8583eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5524a95270673acb7e7e0ba2d6907eb774c715fb467925a052a633c1ab0e76de0fe5a16806ef3a7b95bbc29d54bd832a2b0a43d496753f4b5f1a162e4f91dde
|
7
|
+
data.tar.gz: 34f6417d7d0fdb987f1c6fdf69595a9de413ad010ca391f1d5d5fa3632fe781fe9514ae3568d20c2ed120887e288bb42848ebb22ab42125215d50ca16c0523da
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,11 @@ Changes and additions to the library will be listed here.
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## v0.3.13.beta1
|
8
|
+
|
9
|
+
- Minimize the number of times messages are reprocessed after a consumer group resync.
|
10
|
+
- Improve instrumentation of the async producer.
|
11
|
+
|
7
12
|
## v0.3.12
|
8
13
|
|
9
14
|
- Fix a bug in the consumer.
|
data/README.md
CHANGED
@@ -26,8 +26,9 @@ Although parts of this library work with Kafka 0.8 – specifically, the Produce
|
|
26
26
|
1. [Consumer Groups](#consumer-groups)
|
27
27
|
2. [Consumer Checkpointing](#consumer-checkpointing)
|
28
28
|
3. [Topic Subscriptions](#topic-subscriptions)
|
29
|
-
4. [
|
30
|
-
5. [
|
29
|
+
4. [Shutting Down a Consumer](#shutting-down-a-consumer)
|
30
|
+
5. [Consuming Messages in Batches](#consuming-messages-in-batches)
|
31
|
+
6. [Balancing Throughput and Latency](#balancing-throughput-and-latency)
|
31
32
|
4. [Thread Safety](#thread-safety)
|
32
33
|
5. [Logging](#logging)
|
33
34
|
6. [Instrumentation](#instrumentation)
|
@@ -526,6 +527,23 @@ consumer.subscribe("notifications", start_from_beginning: false)
|
|
526
527
|
Once the consumer group has checkpointed its progress in the topic's partitions, the consumers will always start from the checkpointed offsets, regardless of `start_from_beginning`. As such, this setting only applies when the consumer initially starts consuming from a topic.
|
527
528
|
|
528
529
|
|
530
|
+
#### Shutting Down a Consumer
|
531
|
+
|
532
|
+
In order to shut down a running consumer process cleanly, call `#stop` on it. A common pattern is to trap a process signal and initiate the shutdown from there:
|
533
|
+
|
534
|
+
```ruby
|
535
|
+
consumer = kafka.consumer(...)
|
536
|
+
|
537
|
+
# The consumer can be stopped from the command line by executing
|
538
|
+
# `kill -s QUIT <process-id>`.
|
539
|
+
trap("QUIT") { consumer.stop }
|
540
|
+
|
541
|
+
consumer.each_message do |message|
|
542
|
+
...
|
543
|
+
end
|
544
|
+
```
|
545
|
+
|
546
|
+
|
529
547
|
#### Consuming Messages in Batches
|
530
548
|
|
531
549
|
Sometimes it is easier to deal with messages in batches rather than individually. A _batch_ is a sequence of one or more Kafka messages that all belong to the same topic and partition. One common reason to want to use batches is when some external system has a batch or transactional API.
|
data/lib/kafka/async_producer.rb
CHANGED
@@ -69,7 +69,7 @@ module Kafka
|
|
69
69
|
# @param delivery_interval [Integer] if greater than zero, the number of
|
70
70
|
# seconds between automatic message deliveries.
|
71
71
|
#
|
72
|
-
def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0, instrumenter:)
|
72
|
+
def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0, instrumenter:, logger:)
|
73
73
|
raise ArgumentError unless max_queue_size > 0
|
74
74
|
raise ArgumentError unless delivery_threshold >= 0
|
75
75
|
raise ArgumentError unless delivery_interval >= 0
|
@@ -77,6 +77,7 @@ module Kafka
|
|
77
77
|
@queue = Queue.new
|
78
78
|
@max_queue_size = max_queue_size
|
79
79
|
@instrumenter = instrumenter
|
80
|
+
@logger = logger
|
80
81
|
|
81
82
|
@worker = Worker.new(
|
82
83
|
queue: @queue,
|
@@ -102,6 +103,12 @@ module Kafka
|
|
102
103
|
args = [value, **options.merge(topic: topic)]
|
103
104
|
@queue << [:produce, args]
|
104
105
|
|
106
|
+
@instrumenter.instrument("enqueue_message.async_producer", {
|
107
|
+
topic: topic,
|
108
|
+
queue_size: @queue.size,
|
109
|
+
max_queue_size: @max_queue_size,
|
110
|
+
})
|
111
|
+
|
105
112
|
nil
|
106
113
|
end
|
107
114
|
|
@@ -122,6 +129,7 @@ module Kafka
|
|
122
129
|
# @see Kafka::Producer#shutdown
|
123
130
|
# @return [nil]
|
124
131
|
def shutdown
|
132
|
+
@timer_thread && @timer_thread.exit
|
125
133
|
@queue << [:shutdown, nil]
|
126
134
|
@worker_thread && @worker_thread.join
|
127
135
|
|
@@ -149,6 +157,8 @@ module Kafka
|
|
149
157
|
topic: topic,
|
150
158
|
})
|
151
159
|
|
160
|
+
@logger.error "Buffer overflow: failed to enqueue message for #{topic}"
|
161
|
+
|
152
162
|
raise BufferOverflow
|
153
163
|
end
|
154
164
|
|
data/lib/kafka/client.rb
CHANGED
data/lib/kafka/consumer.rb
CHANGED
@@ -211,8 +211,22 @@ module Kafka
|
|
211
211
|
end
|
212
212
|
|
213
213
|
def join_group
|
214
|
-
@
|
214
|
+
old_generation_id = @group.generation_id
|
215
|
+
|
215
216
|
@group.join
|
217
|
+
|
218
|
+
if old_generation_id && @group.generation_id != old_generation_id + 1
|
219
|
+
# We've been out of the group for at least an entire generation, no
|
220
|
+
# sense in trying to hold on to offset data
|
221
|
+
@offset_manager.clear_offsets
|
222
|
+
else
|
223
|
+
# After rejoining the group we may have been assigned a new set of
|
224
|
+
# partitions. Keeping the old offset commits around forever would risk
|
225
|
+
# having the consumer go back and reprocess messages if it's assigned
|
226
|
+
# a partition it used to be assigned to way back. For that reason, we
|
227
|
+
# only keep commits for the partitions that we're still assigned.
|
228
|
+
@offset_manager.clear_offsets_excluding(@group.assigned_partitions)
|
229
|
+
end
|
216
230
|
end
|
217
231
|
|
218
232
|
def fetch_batches(min_bytes:, max_wait_time:)
|
data/lib/kafka/consumer_group.rb
CHANGED
data/lib/kafka/datadog.rb
CHANGED
@@ -206,5 +206,27 @@ module Kafka
|
|
206
206
|
|
207
207
|
attach_to "producer.kafka"
|
208
208
|
end
|
209
|
+
|
210
|
+
class AsyncProducerSubscriber < StatsdSubscriber
|
211
|
+
def enqueue_message(event)
|
212
|
+
client = event.payload.fetch(:client_id)
|
213
|
+
topic = event.payload.fetch(:topic)
|
214
|
+
queue_size = event.payload.fetch(:queue_size)
|
215
|
+
max_queue_size = event.payload.fetch(:max_queue_size)
|
216
|
+
queue_fill_ratio = queue_size.to_f / max_queue_size.to_f
|
217
|
+
|
218
|
+
tags = {
|
219
|
+
client: client,
|
220
|
+
}
|
221
|
+
|
222
|
+
# This gets us the avg/max queue size per producer.
|
223
|
+
histogram("producer.queue.size", queue_size, tags: tags)
|
224
|
+
|
225
|
+
# This gets us the avg/max queue fill ratio per producer.
|
226
|
+
histogram("producer.queue.fill_ratio", queue_fill_ratio, tags: tags)
|
227
|
+
end
|
228
|
+
|
229
|
+
attach_to "async_producer.kafka"
|
230
|
+
end
|
209
231
|
end
|
210
232
|
end
|
data/lib/kafka/offset_manager.rb
CHANGED
@@ -59,14 +59,27 @@ module Kafka
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def commit_offsets_if_necessary
|
62
|
-
if
|
62
|
+
if commit_timeout_reached? || commit_threshold_reached?
|
63
63
|
commit_offsets
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
67
67
|
def clear_offsets
|
68
|
-
@uncommitted_offsets = 0
|
69
68
|
@processed_offsets.clear
|
69
|
+
|
70
|
+
# Clear the cached commits from the brokers.
|
71
|
+
@committed_offsets = nil
|
72
|
+
end
|
73
|
+
|
74
|
+
def clear_offsets_excluding(excluded)
|
75
|
+
# Clear all offsets that aren't in `excluded`.
|
76
|
+
@processed_offsets.each do |topic, partitions|
|
77
|
+
partitions.keep_if do |partition, _|
|
78
|
+
excluded.fetch(topic, []).include?(partition)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Clear the cached commits from the brokers.
|
70
83
|
@committed_offsets = nil
|
71
84
|
end
|
72
85
|
|
@@ -81,6 +94,10 @@ module Kafka
|
|
81
94
|
@committed_offsets.offset_for(topic, partition)
|
82
95
|
end
|
83
96
|
|
97
|
+
def commit_timeout_reached?
|
98
|
+
@commit_interval != 0 && seconds_since_last_commit >= @commit_interval
|
99
|
+
end
|
100
|
+
|
84
101
|
def commit_threshold_reached?
|
85
102
|
@commit_threshold != 0 && @uncommitted_offsets >= @commit_threshold
|
86
103
|
end
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.13.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -303,9 +303,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
303
303
|
version: 2.1.0
|
304
304
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
305
305
|
requirements:
|
306
|
-
- - "
|
306
|
+
- - ">"
|
307
307
|
- !ruby/object:Gem::Version
|
308
|
-
version:
|
308
|
+
version: 1.3.1
|
309
309
|
requirements: []
|
310
310
|
rubyforge_project:
|
311
311
|
rubygems_version: 2.4.5.1
|