ruby-kafka 0.5.5 → 0.6.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/examples/consumer-group.rb +8 -2
- data/lib/kafka/client.rb +7 -0
- data/lib/kafka/consumer.rb +73 -76
- data/lib/kafka/fetcher.rb +178 -0
- data/lib/kafka/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc9b975d79ef5be40b82d8f534995aa397d88a12b557f0b436bea4e42c0d7baa
|
4
|
+
data.tar.gz: f2e1fa46cbd12f0bc551f527aabd0469d5dd0bcc89bdcd27a71e05dedf9bd474
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60d506503b7bb77cde82b6a606bbe73ea5f8da247b4acd676a933ca91daa8e421d8beb91642469b4847725a0b6c4cfa191c74fc1f12413846b5e4bf5dd14186c
|
7
|
+
data.tar.gz: 9b9a7733579b0ed9c81546ea6f451df85fdbf88c9aa9ab040d81ab49f36c3d029902ae69815d07f2118a93bdf410429cd019a96d90b79b0df47eb8cbc15c2daa
|
data/CHANGELOG.md
CHANGED
data/examples/consumer-group.rb
CHANGED
@@ -22,6 +22,12 @@ consumer.subscribe(topic)
|
|
22
22
|
trap("TERM") { consumer.stop }
|
23
23
|
trap("INT") { consumer.stop }
|
24
24
|
|
25
|
-
|
26
|
-
|
25
|
+
begin
|
26
|
+
consumer.each_message do |message|
|
27
|
+
end
|
28
|
+
rescue Kafka::ProcessingError => e
|
29
|
+
warn "Got #{e.cause}"
|
30
|
+
consumer.pause(e.topic, e.partition, timeout: 20)
|
31
|
+
|
32
|
+
retry
|
27
33
|
end
|
data/lib/kafka/client.rb
CHANGED
@@ -305,12 +305,19 @@ module Kafka
|
|
305
305
|
interval: heartbeat_interval,
|
306
306
|
)
|
307
307
|
|
308
|
+
fetcher = Fetcher.new(
|
309
|
+
cluster: initialize_cluster,
|
310
|
+
logger: @logger,
|
311
|
+
instrumenter: instrumenter,
|
312
|
+
)
|
313
|
+
|
308
314
|
Consumer.new(
|
309
315
|
cluster: cluster,
|
310
316
|
logger: @logger,
|
311
317
|
instrumenter: instrumenter,
|
312
318
|
group: group,
|
313
319
|
offset_manager: offset_manager,
|
320
|
+
fetcher: fetcher,
|
314
321
|
session_timeout: session_timeout,
|
315
322
|
heartbeat: heartbeat,
|
316
323
|
)
|
data/lib/kafka/consumer.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require "kafka/consumer_group"
|
2
2
|
require "kafka/offset_manager"
|
3
|
-
require "kafka/
|
3
|
+
require "kafka/fetcher"
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
|
@@ -40,13 +40,14 @@ module Kafka
|
|
40
40
|
#
|
41
41
|
class Consumer
|
42
42
|
|
43
|
-
def initialize(cluster:, logger:, instrumenter:, group:, offset_manager:, session_timeout:, heartbeat:)
|
43
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
|
44
44
|
@cluster = cluster
|
45
45
|
@logger = logger
|
46
46
|
@instrumenter = instrumenter
|
47
47
|
@group = group
|
48
48
|
@offset_manager = offset_manager
|
49
49
|
@session_timeout = session_timeout
|
50
|
+
@fetcher = fetcher
|
50
51
|
@heartbeat = heartbeat
|
51
52
|
|
52
53
|
# A list of partitions that have been paused, per topic.
|
@@ -55,9 +56,6 @@ module Kafka
|
|
55
56
|
# Whether or not the consumer is currently consuming messages.
|
56
57
|
@running = false
|
57
58
|
|
58
|
-
# The maximum number of bytes to fetch from a single partition, by topic.
|
59
|
-
@max_bytes = {}
|
60
|
-
|
61
59
|
# Hash containing offsets for each topic and partition that has the
|
62
60
|
# automatically_mark_as_processed feature disabled. Offset manager is only active
|
63
61
|
# when everything is suppose to happen automatically. Otherwise we need to keep track of the
|
@@ -93,7 +91,7 @@ module Kafka
|
|
93
91
|
|
94
92
|
@group.subscribe(topic)
|
95
93
|
@offset_manager.set_default_offset(topic, default_offset)
|
96
|
-
@
|
94
|
+
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
97
95
|
|
98
96
|
nil
|
99
97
|
end
|
@@ -136,6 +134,8 @@ module Kafka
|
|
136
134
|
def resume(topic, partition)
|
137
135
|
paused_partitions = @paused_partitions.fetch(topic, {})
|
138
136
|
paused_partitions.delete(partition)
|
137
|
+
|
138
|
+
seek_to_next(topic, partition)
|
139
139
|
end
|
140
140
|
|
141
141
|
# Whether the topic partition is currently paused.
|
@@ -153,15 +153,7 @@ module Kafka
|
|
153
153
|
# absolute point in time.
|
154
154
|
timeout = partitions.fetch(partition)
|
155
155
|
|
156
|
-
|
157
|
-
true
|
158
|
-
elsif Time.now < timeout
|
159
|
-
true
|
160
|
-
else
|
161
|
-
@logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
|
162
|
-
resume(topic, partition)
|
163
|
-
false
|
164
|
-
end
|
156
|
+
timeout.nil? || Time.now < timeout
|
165
157
|
end
|
166
158
|
end
|
167
159
|
|
@@ -193,24 +185,16 @@ module Kafka
|
|
193
185
|
# {Kafka::ProcessingError} instance.
|
194
186
|
# @return [nil]
|
195
187
|
def each_message(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
|
188
|
+
@fetcher.configure(
|
189
|
+
min_bytes: min_bytes,
|
190
|
+
max_bytes: max_bytes,
|
191
|
+
max_wait_time: max_wait_time,
|
192
|
+
)
|
193
|
+
|
196
194
|
consumer_loop do
|
197
|
-
batches = fetch_batches
|
198
|
-
min_bytes: min_bytes,
|
199
|
-
max_bytes: max_bytes,
|
200
|
-
max_wait_time: max_wait_time,
|
201
|
-
automatically_mark_as_processed: automatically_mark_as_processed
|
202
|
-
)
|
195
|
+
batches = fetch_batches
|
203
196
|
|
204
197
|
batches.each do |batch|
|
205
|
-
unless batch.empty?
|
206
|
-
@instrumenter.instrument("fetch_batch.consumer", {
|
207
|
-
topic: batch.topic,
|
208
|
-
partition: batch.partition,
|
209
|
-
offset_lag: batch.offset_lag,
|
210
|
-
highwater_mark_offset: batch.highwater_mark_offset,
|
211
|
-
message_count: batch.messages.count,
|
212
|
-
})
|
213
|
-
end
|
214
198
|
batch.messages.each do |message|
|
215
199
|
notification = {
|
216
200
|
topic: message.topic,
|
@@ -281,13 +265,14 @@ module Kafka
|
|
281
265
|
# @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
|
282
266
|
# @return [nil]
|
283
267
|
def each_batch(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
|
268
|
+
@fetcher.configure(
|
269
|
+
min_bytes: min_bytes,
|
270
|
+
max_bytes: max_bytes,
|
271
|
+
max_wait_time: max_wait_time,
|
272
|
+
)
|
273
|
+
|
284
274
|
consumer_loop do
|
285
|
-
batches = fetch_batches
|
286
|
-
min_bytes: min_bytes,
|
287
|
-
max_bytes: max_bytes,
|
288
|
-
max_wait_time: max_wait_time,
|
289
|
-
automatically_mark_as_processed: automatically_mark_as_processed
|
290
|
-
)
|
275
|
+
batches = fetch_batches
|
291
276
|
|
292
277
|
batches.each do |batch|
|
293
278
|
unless batch.empty?
|
@@ -369,6 +354,8 @@ module Kafka
|
|
369
354
|
def consumer_loop
|
370
355
|
@running = true
|
371
356
|
|
357
|
+
@fetcher.start
|
358
|
+
|
372
359
|
while @running
|
373
360
|
begin
|
374
361
|
@instrumenter.instrument("loop.consumer") do
|
@@ -394,6 +381,8 @@ module Kafka
|
|
394
381
|
end
|
395
382
|
end
|
396
383
|
ensure
|
384
|
+
@fetcher.stop
|
385
|
+
|
397
386
|
# In order to quickly have the consumer group re-balance itself, it's
|
398
387
|
# important that members explicitly tell Kafka when they're leaving.
|
399
388
|
make_final_offsets_commit!
|
@@ -433,59 +422,67 @@ module Kafka
|
|
433
422
|
# only keep commits for the partitions that we're still assigned.
|
434
423
|
@offset_manager.clear_offsets_excluding(@group.assigned_partitions)
|
435
424
|
end
|
436
|
-
end
|
437
|
-
|
438
|
-
def fetch_batches(min_bytes:, max_bytes:, max_wait_time:, automatically_mark_as_processed:)
|
439
|
-
# Return early if the consumer has been stopped.
|
440
|
-
return [] if !@running
|
441
|
-
|
442
|
-
join_group unless @group.member?
|
443
|
-
|
444
|
-
subscribed_partitions = @group.subscribed_partitions
|
445
425
|
|
446
|
-
@
|
447
|
-
|
448
|
-
operation = FetchOperation.new(
|
449
|
-
cluster: @cluster,
|
450
|
-
logger: @logger,
|
451
|
-
min_bytes: min_bytes,
|
452
|
-
max_bytes: max_bytes,
|
453
|
-
max_wait_time: max_wait_time,
|
454
|
-
)
|
426
|
+
@fetcher.reset
|
455
427
|
|
456
|
-
|
428
|
+
@group.assigned_partitions.each do |topic, partitions|
|
457
429
|
partitions.each do |partition|
|
458
|
-
if
|
459
|
-
|
430
|
+
if paused?(topic, partition)
|
431
|
+
@logger.warn "Not fetching from #{topic}/#{partition} due to pause"
|
460
432
|
else
|
461
|
-
|
462
|
-
# offset from Kafka, that's why we fallback in case of nil (it may not be 0)
|
463
|
-
if @current_offsets[topic].key?(partition)
|
464
|
-
offset = @current_offsets[topic][partition] + 1
|
465
|
-
else
|
466
|
-
offset = @offset_manager.next_offset_for(topic, partition)
|
467
|
-
end
|
433
|
+
seek_to_next(topic, partition)
|
468
434
|
end
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
469
438
|
|
470
|
-
|
439
|
+
def seek_to_next(topic, partition)
|
440
|
+
# When automatic marking is off, the first poll needs to be based on the last committed
|
441
|
+
# offset from Kafka, that's why we fallback in case of nil (it may not be 0)
|
442
|
+
if @current_offsets[topic].key?(partition)
|
443
|
+
offset = @current_offsets[topic][partition] + 1
|
444
|
+
else
|
445
|
+
offset = @offset_manager.next_offset_for(topic, partition)
|
446
|
+
end
|
471
447
|
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
448
|
+
@fetcher.seek(topic, partition, offset)
|
449
|
+
end
|
450
|
+
|
451
|
+
def resume_paused_partitions!
|
452
|
+
@paused_partitions.each do |topic, partitions|
|
453
|
+
partitions.keys.each do |partition|
|
454
|
+
unless paused?(topic, partition)
|
455
|
+
@logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
|
456
|
+
resume(topic, partition)
|
477
457
|
end
|
478
458
|
end
|
479
459
|
end
|
460
|
+
end
|
480
461
|
|
481
|
-
|
482
|
-
|
483
|
-
|
462
|
+
def fetch_batches
|
463
|
+
# Return early if the consumer has been stopped.
|
464
|
+
return [] if !@running
|
484
465
|
|
485
|
-
@
|
486
|
-
sleep backoff
|
466
|
+
join_group unless @group.member?
|
487
467
|
|
488
|
-
|
468
|
+
@heartbeat.send_if_necessary
|
469
|
+
|
470
|
+
resume_paused_partitions!
|
471
|
+
|
472
|
+
if !@fetcher.data?
|
473
|
+
@logger.debug "No batches to process"
|
474
|
+
sleep 2
|
475
|
+
[]
|
476
|
+
else
|
477
|
+
tag, message = @fetcher.poll
|
478
|
+
|
479
|
+
case tag
|
480
|
+
when :batches
|
481
|
+
message
|
482
|
+
when :exception
|
483
|
+
raise message
|
484
|
+
end
|
485
|
+
end
|
489
486
|
rescue OffsetOutOfRange => e
|
490
487
|
@logger.error "Invalid offset for #{e.topic}/#{e.partition}, resetting to default offset"
|
491
488
|
|
@@ -0,0 +1,178 @@
|
|
1
|
+
require "kafka/fetch_operation"
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Fetcher
|
5
|
+
MAX_QUEUE_SIZE = 100
|
6
|
+
|
7
|
+
attr_reader :queue
|
8
|
+
|
9
|
+
def initialize(cluster:, logger:, instrumenter:)
|
10
|
+
@cluster = cluster
|
11
|
+
@logger = logger
|
12
|
+
@instrumenter = instrumenter
|
13
|
+
|
14
|
+
@queue = Queue.new
|
15
|
+
@commands = Queue.new
|
16
|
+
@next_offsets = Hash.new { |h, k| h[k] = {} }
|
17
|
+
|
18
|
+
# Long poll until at least this many bytes can be fetched.
|
19
|
+
@min_bytes = 1
|
20
|
+
|
21
|
+
# Long poll at most this number of seconds.
|
22
|
+
@max_wait_time = 1
|
23
|
+
|
24
|
+
# The maximum number of bytes to fetch for any given fetch request.
|
25
|
+
@max_bytes = 10485760
|
26
|
+
|
27
|
+
# The maximum number of bytes to fetch per partition, by topic.
|
28
|
+
@max_bytes_per_partition = {}
|
29
|
+
|
30
|
+
@thread = Thread.new do
|
31
|
+
loop while true
|
32
|
+
end
|
33
|
+
|
34
|
+
@thread.abort_on_exception = true
|
35
|
+
end
|
36
|
+
|
37
|
+
def subscribe(topic, max_bytes_per_partition:)
|
38
|
+
@commands << [:subscribe, [topic, max_bytes_per_partition]]
|
39
|
+
end
|
40
|
+
|
41
|
+
def seek(topic, partition, offset)
|
42
|
+
@commands << [:seek, [topic, partition, offset]]
|
43
|
+
end
|
44
|
+
|
45
|
+
def configure(min_bytes:, max_bytes:, max_wait_time:)
|
46
|
+
@commands << [:configure, [min_bytes, max_bytes, max_wait_time]]
|
47
|
+
end
|
48
|
+
|
49
|
+
def start
|
50
|
+
@commands << [:start, []]
|
51
|
+
end
|
52
|
+
|
53
|
+
def handle_start
|
54
|
+
raise "already started" if @running
|
55
|
+
|
56
|
+
@running = true
|
57
|
+
end
|
58
|
+
|
59
|
+
def stop
|
60
|
+
@commands << [:stop, []]
|
61
|
+
end
|
62
|
+
|
63
|
+
def reset
|
64
|
+
@commands << [:reset, []]
|
65
|
+
end
|
66
|
+
|
67
|
+
def data?
|
68
|
+
!@queue.empty?
|
69
|
+
end
|
70
|
+
|
71
|
+
def poll
|
72
|
+
@queue.deq
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def loop
|
78
|
+
if !@commands.empty?
|
79
|
+
cmd, args = @commands.deq
|
80
|
+
|
81
|
+
@logger.debug "Handling fetcher command: #{cmd}"
|
82
|
+
|
83
|
+
send("handle_#{cmd}", *args)
|
84
|
+
elsif !@running
|
85
|
+
sleep 0.1
|
86
|
+
elsif @queue.size < MAX_QUEUE_SIZE
|
87
|
+
step
|
88
|
+
else
|
89
|
+
@logger.warn "Reached max fetcher queue size (#{MAX_QUEUE_SIZE}), sleeping 1s"
|
90
|
+
sleep 1
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def handle_configure(min_bytes, max_bytes, max_wait_time)
|
95
|
+
@min_bytes = min_bytes
|
96
|
+
@max_bytes = max_bytes
|
97
|
+
@max_wait_time = max_wait_time
|
98
|
+
end
|
99
|
+
|
100
|
+
def handle_reset
|
101
|
+
@next_offsets.clear
|
102
|
+
end
|
103
|
+
|
104
|
+
def handle_stop(*)
|
105
|
+
@running = false
|
106
|
+
|
107
|
+
# After stopping, we need to reconfigure the topics and partitions to fetch
|
108
|
+
# from. Otherwise we'd keep fetching from a bunch of partitions we may no
|
109
|
+
# longer be assigned.
|
110
|
+
handle_reset
|
111
|
+
end
|
112
|
+
|
113
|
+
def handle_subscribe(topic, max_bytes_per_partition)
|
114
|
+
@logger.info "Will fetch at most #{max_bytes_per_partition} bytes at a time per partition from #{topic}"
|
115
|
+
@max_bytes_per_partition[topic] = max_bytes_per_partition
|
116
|
+
end
|
117
|
+
|
118
|
+
def handle_seek(topic, partition, offset)
|
119
|
+
@logger.info "Seeking #{topic}/#{partition} to offset #{offset}"
|
120
|
+
@next_offsets[topic][partition] = offset
|
121
|
+
end
|
122
|
+
|
123
|
+
def step
|
124
|
+
batches = fetch_batches
|
125
|
+
|
126
|
+
batches.each do |batch|
|
127
|
+
unless batch.empty?
|
128
|
+
@instrumenter.instrument("fetch_batch.consumer", {
|
129
|
+
topic: batch.topic,
|
130
|
+
partition: batch.partition,
|
131
|
+
offset_lag: batch.offset_lag,
|
132
|
+
highwater_mark_offset: batch.highwater_mark_offset,
|
133
|
+
message_count: batch.messages.count,
|
134
|
+
})
|
135
|
+
end
|
136
|
+
|
137
|
+
@next_offsets[batch.topic][batch.partition] = batch.last_offset + 1
|
138
|
+
end
|
139
|
+
|
140
|
+
@queue << [:batches, batches]
|
141
|
+
rescue Kafka::NoPartitionsToFetchFrom
|
142
|
+
@logger.warn "No partitions to fetch from, sleeping for 1s"
|
143
|
+
sleep 1
|
144
|
+
rescue Kafka::Error => e
|
145
|
+
@queue << [:exception, e]
|
146
|
+
end
|
147
|
+
|
148
|
+
def fetch_batches
|
149
|
+
@logger.debug "Fetching batches"
|
150
|
+
|
151
|
+
operation = FetchOperation.new(
|
152
|
+
cluster: @cluster,
|
153
|
+
logger: @logger,
|
154
|
+
min_bytes: @min_bytes,
|
155
|
+
max_bytes: @max_bytes,
|
156
|
+
max_wait_time: @max_wait_time,
|
157
|
+
)
|
158
|
+
|
159
|
+
@next_offsets.each do |topic, partitions|
|
160
|
+
# Fetch at most this many bytes from any single partition.
|
161
|
+
max_bytes = @max_bytes_per_partition[topic]
|
162
|
+
|
163
|
+
partitions.each do |partition, offset|
|
164
|
+
operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
operation.execute
|
169
|
+
rescue NoPartitionsToFetchFrom
|
170
|
+
backoff = @max_wait_time > 0 ? @max_wait_time : 1
|
171
|
+
|
172
|
+
@logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
|
173
|
+
sleep backoff
|
174
|
+
|
175
|
+
[]
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
@@ -323,6 +323,7 @@ files:
|
|
323
323
|
- lib/kafka/fetch_operation.rb
|
324
324
|
- lib/kafka/fetched_batch.rb
|
325
325
|
- lib/kafka/fetched_message.rb
|
326
|
+
- lib/kafka/fetcher.rb
|
326
327
|
- lib/kafka/gzip_codec.rb
|
327
328
|
- lib/kafka/heartbeat.rb
|
328
329
|
- lib/kafka/instrumenter.rb
|
@@ -404,9 +405,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
404
405
|
version: 2.1.0
|
405
406
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
406
407
|
requirements:
|
407
|
-
- - "
|
408
|
+
- - ">"
|
408
409
|
- !ruby/object:Gem::Version
|
409
|
-
version:
|
410
|
+
version: 1.3.1
|
410
411
|
requirements: []
|
411
412
|
rubyforge_project:
|
412
413
|
rubygems_version: 2.7.6
|