ruby-kafka 0.3.4 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +35 -0
- data/lib/kafka/async_producer.rb +42 -20
- data/lib/kafka/client.rb +1 -0
- data/lib/kafka/producer.rb +10 -2
- data/lib/kafka/protocol/offset_commit_request.rb +4 -1
- data/lib/kafka/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99773c65ab50857fb0d09cabebd3d985a9d5b88c
|
4
|
+
data.tar.gz: 6f94dfae3f17778c3d4c73207545e194e7e5dedf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c9b7268300b13d023fcf509041bf945f791aa145156a7fffa9dff6cf197fae9715562676df035ffe994e9c67fecfcb9056de4ffffae31fe4454cf43ec81b88a
|
7
|
+
data.tar.gz: 5fe45243f286bef386f7589252c5ea59e9bc011e56c9edf5d0b999ecd1e34dfab51d0578700b55f86de4039a59de4de630b14d18077065d8732c71d204bc5b12
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,12 @@ Changes and additions to the library will be listed here.
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## v0.3.5
|
8
|
+
|
9
|
+
- Fix bug that caused the async producer to not work with Unicorn (#166).
|
10
|
+
- Fix bug that caused committed consumer offsets to be lost (#167).
|
11
|
+
- Instrument buffer overflows in the producer.
|
12
|
+
|
7
13
|
## v0.3.4
|
8
14
|
|
9
15
|
- Make the producer buffer more resilient in the face of isolated topic errors.
|
data/README.md
CHANGED
@@ -19,6 +19,8 @@ Although parts of this library work with Kafka 0.8 – specifically, the Produce
|
|
19
19
|
6. [Compression](#compression)
|
20
20
|
7. [Producing Messages from a Rails Application](#producing-messages-from-a-rails-application)
|
21
21
|
2. [Consuming Messages from Kafka](#consuming-messages-from-kafka)
|
22
|
+
1. [Consumer Checkpointing](#consumer-checkpointing)
|
23
|
+
2. [Consuming Messages in Batches](#consuming-messages-in-batches)
|
22
24
|
3. [Logging](#logging)
|
23
25
|
4. [Instrumentation](#instrumentation)
|
24
26
|
5. [Understanding Timeouts](#understanding-timeouts)
|
@@ -372,6 +374,39 @@ end
|
|
372
374
|
|
373
375
|
Each consumer process will be assigned one or more partitions from each topic that the group subscribes to. In order to handle more messages, simply start more processes.
|
374
376
|
|
377
|
+
#### Consumer Checkpointing
|
378
|
+
|
379
|
+
In order to be able to resume processing after a consumer crashes, each consumer will periodically _checkpoint_ its position within each partition it reads from. Since each partition has a monotonically increasing sequence of message offsets, this works by _committing_ the offset of the last message that was processed in a given partition. Kafka handles these commits and allows another consumer in a group to resume from the last commit when a member crashes or becomes unresponsive.
|
380
|
+
|
381
|
+
|
382
|
+
#### Consuming Messages in Batches
|
383
|
+
|
384
|
+
Sometimes it is easier to deal with messages in batches rather than individually. A _batch_ is a sequence of one or more Kafka messages that all belong to the same topic and partition. One common reason to want to use batches is when some external system has a batch or transactional API.
|
385
|
+
|
386
|
+
```ruby
|
387
|
+
# A mock search index that we'll be keeping up to date with new Kafka messages.
|
388
|
+
index = SearchIndex.new
|
389
|
+
|
390
|
+
consumer.subscribe("posts")
|
391
|
+
|
392
|
+
consumer.each_batch do |batch|
|
393
|
+
puts "Received batch: #{batch.topic}/#{batch.partition}"
|
394
|
+
|
395
|
+
transaction = index.transaction
|
396
|
+
|
397
|
+
batch.messages.each do |message|
|
398
|
+
# Let's assume that adding a document is idempotent.
|
399
|
+
transaction.add(id: message.key, body: message.value)
|
400
|
+
end
|
401
|
+
|
402
|
+
# Once this method returns, the messages have been successfully written to the
|
403
|
+
# search index. The consumer will only checkpoint a batch *after* the block
|
404
|
+
# has completed without an exception.
|
405
|
+
transaction.commit!
|
406
|
+
end
|
407
|
+
```
|
408
|
+
|
409
|
+
One important thing to note is that the client commits the offset of the batch's messages only after the _entire_ batch has been processed.
|
375
410
|
|
376
411
|
### Logging
|
377
412
|
|
data/lib/kafka/async_producer.rb
CHANGED
@@ -69,31 +69,23 @@ module Kafka
|
|
69
69
|
# @param delivery_interval [Integer] if greater than zero, the number of
|
70
70
|
# seconds between automatic message deliveries.
|
71
71
|
#
|
72
|
-
def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0)
|
72
|
+
def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0, instrumenter:)
|
73
73
|
raise ArgumentError unless max_queue_size > 0
|
74
74
|
raise ArgumentError unless delivery_threshold >= 0
|
75
75
|
raise ArgumentError unless delivery_interval >= 0
|
76
76
|
|
77
77
|
@queue = Queue.new
|
78
78
|
@max_queue_size = max_queue_size
|
79
|
+
@instrumenter = instrumenter
|
79
80
|
|
80
|
-
@
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
)
|
81
|
+
@worker = Worker.new(
|
82
|
+
queue: @queue,
|
83
|
+
producer: sync_producer,
|
84
|
+
delivery_threshold: delivery_threshold,
|
85
|
+
)
|
86
86
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
@worker_thread.abort_on_exception = true
|
91
|
-
|
92
|
-
if delivery_interval > 0
|
93
|
-
Thread.new do
|
94
|
-
Timer.new(queue: @queue, interval: delivery_interval).run
|
95
|
-
end
|
96
|
-
end
|
87
|
+
# The timer will no-op if the delivery interval is zero.
|
88
|
+
@timer = Timer.new(queue: @queue, interval: delivery_interval)
|
97
89
|
end
|
98
90
|
|
99
91
|
# Produces a message to the specified topic.
|
@@ -102,9 +94,12 @@ module Kafka
|
|
102
94
|
# @param (see Kafka::Producer#produce)
|
103
95
|
# @raise [BufferOverflow] if the message queue is full.
|
104
96
|
# @return [nil]
|
105
|
-
def produce(
|
106
|
-
|
97
|
+
def produce(value, topic:, **options)
|
98
|
+
ensure_threads_running!
|
99
|
+
|
100
|
+
buffer_overflow(topic) if @queue.size >= @max_queue_size
|
107
101
|
|
102
|
+
args = [value, **options.merge(topic: topic)]
|
108
103
|
@queue << [:produce, args]
|
109
104
|
|
110
105
|
nil
|
@@ -128,11 +123,35 @@ module Kafka
|
|
128
123
|
# @return [nil]
|
129
124
|
def shutdown
|
130
125
|
@queue << [:shutdown, nil]
|
131
|
-
@worker_thread.join
|
126
|
+
@worker_thread && @worker_thread.join
|
132
127
|
|
133
128
|
nil
|
134
129
|
end
|
135
130
|
|
131
|
+
private
|
132
|
+
|
133
|
+
def ensure_threads_running!
|
134
|
+
@worker_thread = nil unless @worker_thread && @worker_thread.alive?
|
135
|
+
@worker_thread ||= start_thread { @worker.run }
|
136
|
+
|
137
|
+
@timer_thread = nil unless @timer_thread && @timer_thread.alive?
|
138
|
+
@timer_thread ||= start_thread { @timer.run }
|
139
|
+
end
|
140
|
+
|
141
|
+
def start_thread(&block)
|
142
|
+
thread = Thread.new(&block)
|
143
|
+
thread.abort_on_exception = true
|
144
|
+
thread
|
145
|
+
end
|
146
|
+
|
147
|
+
def buffer_overflow(topic)
|
148
|
+
@instrumenter.instrument("buffer_overflow.producer", {
|
149
|
+
topic: topic,
|
150
|
+
})
|
151
|
+
|
152
|
+
raise BufferOverflow
|
153
|
+
end
|
154
|
+
|
136
155
|
class Timer
|
137
156
|
def initialize(interval:, queue:)
|
138
157
|
@queue = queue
|
@@ -140,6 +159,9 @@ module Kafka
|
|
140
159
|
end
|
141
160
|
|
142
161
|
def run
|
162
|
+
# Permanently sleep if the timer interval is zero.
|
163
|
+
Thread.stop if @interval.zero?
|
164
|
+
|
143
165
|
loop do
|
144
166
|
sleep(@interval)
|
145
167
|
@queue << [:deliver_messages, nil]
|
data/lib/kafka/client.rb
CHANGED
data/lib/kafka/producer.rb
CHANGED
@@ -194,11 +194,11 @@ module Kafka
|
|
194
194
|
)
|
195
195
|
|
196
196
|
if buffer_size >= @max_buffer_size
|
197
|
-
|
197
|
+
buffer_overflow topic, "Max buffer size (#{@max_buffer_size} messages) exceeded"
|
198
198
|
end
|
199
199
|
|
200
200
|
if buffer_bytesize + message.bytesize >= @max_buffer_bytesize
|
201
|
-
|
201
|
+
buffer_overflow topic, "Max buffer bytesize (#{@max_buffer_bytesize} bytes) exceeded"
|
202
202
|
end
|
203
203
|
|
204
204
|
@target_topics.add(topic)
|
@@ -362,5 +362,13 @@ module Kafka
|
|
362
362
|
|
363
363
|
@pending_message_queue.replace(failed_messages)
|
364
364
|
end
|
365
|
+
|
366
|
+
def buffer_overflow(topic, message)
|
367
|
+
@instrumenter.instrument("buffer_overflow.producer", {
|
368
|
+
topic: topic,
|
369
|
+
})
|
370
|
+
|
371
|
+
raise BufferOverflow, message
|
372
|
+
end
|
365
373
|
end
|
366
374
|
end
|
@@ -1,6 +1,9 @@
|
|
1
1
|
module Kafka
|
2
2
|
module Protocol
|
3
3
|
class OffsetCommitRequest
|
4
|
+
# This value signals to the broker that its default configuration should be used.
|
5
|
+
DEFAULT_RETENTION_TIME = -1
|
6
|
+
|
4
7
|
def api_key
|
5
8
|
8
|
6
9
|
end
|
@@ -13,7 +16,7 @@ module Kafka
|
|
13
16
|
OffsetCommitResponse
|
14
17
|
end
|
15
18
|
|
16
|
-
def initialize(group_id:, generation_id:, member_id:, retention_time:
|
19
|
+
def initialize(group_id:, generation_id:, member_id:, retention_time: DEFAULT_RETENTION_TIME, offsets:)
|
17
20
|
@group_id = group_id
|
18
21
|
@generation_id = generation_id
|
19
22
|
@member_id = member_id
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|