ruby-kafka 0.5.5 → 0.6.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/examples/consumer-group.rb +8 -2
- data/lib/kafka/client.rb +7 -0
- data/lib/kafka/consumer.rb +73 -76
- data/lib/kafka/fetcher.rb +178 -0
- data/lib/kafka/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc9b975d79ef5be40b82d8f534995aa397d88a12b557f0b436bea4e42c0d7baa
|
4
|
+
data.tar.gz: f2e1fa46cbd12f0bc551f527aabd0469d5dd0bcc89bdcd27a71e05dedf9bd474
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60d506503b7bb77cde82b6a606bbe73ea5f8da247b4acd676a933ca91daa8e421d8beb91642469b4847725a0b6c4cfa191c74fc1f12413846b5e4bf5dd14186c
|
7
|
+
data.tar.gz: 9b9a7733579b0ed9c81546ea6f451df85fdbf88c9aa9ab040d81ab49f36c3d029902ae69815d07f2118a93bdf410429cd019a96d90b79b0df47eb8cbc15c2daa
|
data/CHANGELOG.md
CHANGED
data/examples/consumer-group.rb
CHANGED
@@ -22,6 +22,12 @@ consumer.subscribe(topic)
|
|
22
22
|
trap("TERM") { consumer.stop }
|
23
23
|
trap("INT") { consumer.stop }
|
24
24
|
|
25
|
-
|
26
|
-
|
25
|
+
begin
|
26
|
+
consumer.each_message do |message|
|
27
|
+
end
|
28
|
+
rescue Kafka::ProcessingError => e
|
29
|
+
warn "Got #{e.cause}"
|
30
|
+
consumer.pause(e.topic, e.partition, timeout: 20)
|
31
|
+
|
32
|
+
retry
|
27
33
|
end
|
data/lib/kafka/client.rb
CHANGED
@@ -305,12 +305,19 @@ module Kafka
|
|
305
305
|
interval: heartbeat_interval,
|
306
306
|
)
|
307
307
|
|
308
|
+
fetcher = Fetcher.new(
|
309
|
+
cluster: initialize_cluster,
|
310
|
+
logger: @logger,
|
311
|
+
instrumenter: instrumenter,
|
312
|
+
)
|
313
|
+
|
308
314
|
Consumer.new(
|
309
315
|
cluster: cluster,
|
310
316
|
logger: @logger,
|
311
317
|
instrumenter: instrumenter,
|
312
318
|
group: group,
|
313
319
|
offset_manager: offset_manager,
|
320
|
+
fetcher: fetcher,
|
314
321
|
session_timeout: session_timeout,
|
315
322
|
heartbeat: heartbeat,
|
316
323
|
)
|
data/lib/kafka/consumer.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require "kafka/consumer_group"
|
2
2
|
require "kafka/offset_manager"
|
3
|
-
require "kafka/
|
3
|
+
require "kafka/fetcher"
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
|
@@ -40,13 +40,14 @@ module Kafka
|
|
40
40
|
#
|
41
41
|
class Consumer
|
42
42
|
|
43
|
-
def initialize(cluster:, logger:, instrumenter:, group:, offset_manager:, session_timeout:, heartbeat:)
|
43
|
+
def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
|
44
44
|
@cluster = cluster
|
45
45
|
@logger = logger
|
46
46
|
@instrumenter = instrumenter
|
47
47
|
@group = group
|
48
48
|
@offset_manager = offset_manager
|
49
49
|
@session_timeout = session_timeout
|
50
|
+
@fetcher = fetcher
|
50
51
|
@heartbeat = heartbeat
|
51
52
|
|
52
53
|
# A list of partitions that have been paused, per topic.
|
@@ -55,9 +56,6 @@ module Kafka
|
|
55
56
|
# Whether or not the consumer is currently consuming messages.
|
56
57
|
@running = false
|
57
58
|
|
58
|
-
# The maximum number of bytes to fetch from a single partition, by topic.
|
59
|
-
@max_bytes = {}
|
60
|
-
|
61
59
|
# Hash containing offsets for each topic and partition that has the
|
62
60
|
# automatically_mark_as_processed feature disabled. Offset manager is only active
|
63
61
|
# when everything is suppose to happen automatically. Otherwise we need to keep track of the
|
@@ -93,7 +91,7 @@ module Kafka
|
|
93
91
|
|
94
92
|
@group.subscribe(topic)
|
95
93
|
@offset_manager.set_default_offset(topic, default_offset)
|
96
|
-
@
|
94
|
+
@fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
|
97
95
|
|
98
96
|
nil
|
99
97
|
end
|
@@ -136,6 +134,8 @@ module Kafka
|
|
136
134
|
def resume(topic, partition)
|
137
135
|
paused_partitions = @paused_partitions.fetch(topic, {})
|
138
136
|
paused_partitions.delete(partition)
|
137
|
+
|
138
|
+
seek_to_next(topic, partition)
|
139
139
|
end
|
140
140
|
|
141
141
|
# Whether the topic partition is currently paused.
|
@@ -153,15 +153,7 @@ module Kafka
|
|
153
153
|
# absolute point in time.
|
154
154
|
timeout = partitions.fetch(partition)
|
155
155
|
|
156
|
-
|
157
|
-
true
|
158
|
-
elsif Time.now < timeout
|
159
|
-
true
|
160
|
-
else
|
161
|
-
@logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
|
162
|
-
resume(topic, partition)
|
163
|
-
false
|
164
|
-
end
|
156
|
+
timeout.nil? || Time.now < timeout
|
165
157
|
end
|
166
158
|
end
|
167
159
|
|
@@ -193,24 +185,16 @@ module Kafka
|
|
193
185
|
# {Kafka::ProcessingError} instance.
|
194
186
|
# @return [nil]
|
195
187
|
def each_message(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
|
188
|
+
@fetcher.configure(
|
189
|
+
min_bytes: min_bytes,
|
190
|
+
max_bytes: max_bytes,
|
191
|
+
max_wait_time: max_wait_time,
|
192
|
+
)
|
193
|
+
|
196
194
|
consumer_loop do
|
197
|
-
batches = fetch_batches
|
198
|
-
min_bytes: min_bytes,
|
199
|
-
max_bytes: max_bytes,
|
200
|
-
max_wait_time: max_wait_time,
|
201
|
-
automatically_mark_as_processed: automatically_mark_as_processed
|
202
|
-
)
|
195
|
+
batches = fetch_batches
|
203
196
|
|
204
197
|
batches.each do |batch|
|
205
|
-
unless batch.empty?
|
206
|
-
@instrumenter.instrument("fetch_batch.consumer", {
|
207
|
-
topic: batch.topic,
|
208
|
-
partition: batch.partition,
|
209
|
-
offset_lag: batch.offset_lag,
|
210
|
-
highwater_mark_offset: batch.highwater_mark_offset,
|
211
|
-
message_count: batch.messages.count,
|
212
|
-
})
|
213
|
-
end
|
214
198
|
batch.messages.each do |message|
|
215
199
|
notification = {
|
216
200
|
topic: message.topic,
|
@@ -281,13 +265,14 @@ module Kafka
|
|
281
265
|
# @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
|
282
266
|
# @return [nil]
|
283
267
|
def each_batch(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
|
268
|
+
@fetcher.configure(
|
269
|
+
min_bytes: min_bytes,
|
270
|
+
max_bytes: max_bytes,
|
271
|
+
max_wait_time: max_wait_time,
|
272
|
+
)
|
273
|
+
|
284
274
|
consumer_loop do
|
285
|
-
batches = fetch_batches
|
286
|
-
min_bytes: min_bytes,
|
287
|
-
max_bytes: max_bytes,
|
288
|
-
max_wait_time: max_wait_time,
|
289
|
-
automatically_mark_as_processed: automatically_mark_as_processed
|
290
|
-
)
|
275
|
+
batches = fetch_batches
|
291
276
|
|
292
277
|
batches.each do |batch|
|
293
278
|
unless batch.empty?
|
@@ -369,6 +354,8 @@ module Kafka
|
|
369
354
|
def consumer_loop
|
370
355
|
@running = true
|
371
356
|
|
357
|
+
@fetcher.start
|
358
|
+
|
372
359
|
while @running
|
373
360
|
begin
|
374
361
|
@instrumenter.instrument("loop.consumer") do
|
@@ -394,6 +381,8 @@ module Kafka
|
|
394
381
|
end
|
395
382
|
end
|
396
383
|
ensure
|
384
|
+
@fetcher.stop
|
385
|
+
|
397
386
|
# In order to quickly have the consumer group re-balance itself, it's
|
398
387
|
# important that members explicitly tell Kafka when they're leaving.
|
399
388
|
make_final_offsets_commit!
|
@@ -433,59 +422,67 @@ module Kafka
|
|
433
422
|
# only keep commits for the partitions that we're still assigned.
|
434
423
|
@offset_manager.clear_offsets_excluding(@group.assigned_partitions)
|
435
424
|
end
|
436
|
-
end
|
437
|
-
|
438
|
-
def fetch_batches(min_bytes:, max_bytes:, max_wait_time:, automatically_mark_as_processed:)
|
439
|
-
# Return early if the consumer has been stopped.
|
440
|
-
return [] if !@running
|
441
|
-
|
442
|
-
join_group unless @group.member?
|
443
|
-
|
444
|
-
subscribed_partitions = @group.subscribed_partitions
|
445
425
|
|
446
|
-
@
|
447
|
-
|
448
|
-
operation = FetchOperation.new(
|
449
|
-
cluster: @cluster,
|
450
|
-
logger: @logger,
|
451
|
-
min_bytes: min_bytes,
|
452
|
-
max_bytes: max_bytes,
|
453
|
-
max_wait_time: max_wait_time,
|
454
|
-
)
|
426
|
+
@fetcher.reset
|
455
427
|
|
456
|
-
|
428
|
+
@group.assigned_partitions.each do |topic, partitions|
|
457
429
|
partitions.each do |partition|
|
458
|
-
if
|
459
|
-
|
430
|
+
if paused?(topic, partition)
|
431
|
+
@logger.warn "Not fetching from #{topic}/#{partition} due to pause"
|
460
432
|
else
|
461
|
-
|
462
|
-
# offset from Kafka, that's why we fallback in case of nil (it may not be 0)
|
463
|
-
if @current_offsets[topic].key?(partition)
|
464
|
-
offset = @current_offsets[topic][partition] + 1
|
465
|
-
else
|
466
|
-
offset = @offset_manager.next_offset_for(topic, partition)
|
467
|
-
end
|
433
|
+
seek_to_next(topic, partition)
|
468
434
|
end
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
469
438
|
|
470
|
-
|
439
|
+
def seek_to_next(topic, partition)
|
440
|
+
# When automatic marking is off, the first poll needs to be based on the last committed
|
441
|
+
# offset from Kafka, that's why we fallback in case of nil (it may not be 0)
|
442
|
+
if @current_offsets[topic].key?(partition)
|
443
|
+
offset = @current_offsets[topic][partition] + 1
|
444
|
+
else
|
445
|
+
offset = @offset_manager.next_offset_for(topic, partition)
|
446
|
+
end
|
471
447
|
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
448
|
+
@fetcher.seek(topic, partition, offset)
|
449
|
+
end
|
450
|
+
|
451
|
+
def resume_paused_partitions!
|
452
|
+
@paused_partitions.each do |topic, partitions|
|
453
|
+
partitions.keys.each do |partition|
|
454
|
+
unless paused?(topic, partition)
|
455
|
+
@logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
|
456
|
+
resume(topic, partition)
|
477
457
|
end
|
478
458
|
end
|
479
459
|
end
|
460
|
+
end
|
480
461
|
|
481
|
-
|
482
|
-
|
483
|
-
|
462
|
+
def fetch_batches
|
463
|
+
# Return early if the consumer has been stopped.
|
464
|
+
return [] if !@running
|
484
465
|
|
485
|
-
@
|
486
|
-
sleep backoff
|
466
|
+
join_group unless @group.member?
|
487
467
|
|
488
|
-
|
468
|
+
@heartbeat.send_if_necessary
|
469
|
+
|
470
|
+
resume_paused_partitions!
|
471
|
+
|
472
|
+
if !@fetcher.data?
|
473
|
+
@logger.debug "No batches to process"
|
474
|
+
sleep 2
|
475
|
+
[]
|
476
|
+
else
|
477
|
+
tag, message = @fetcher.poll
|
478
|
+
|
479
|
+
case tag
|
480
|
+
when :batches
|
481
|
+
message
|
482
|
+
when :exception
|
483
|
+
raise message
|
484
|
+
end
|
485
|
+
end
|
489
486
|
rescue OffsetOutOfRange => e
|
490
487
|
@logger.error "Invalid offset for #{e.topic}/#{e.partition}, resetting to default offset"
|
491
488
|
|
@@ -0,0 +1,178 @@
|
|
1
|
+
require "kafka/fetch_operation"
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Fetcher
|
5
|
+
MAX_QUEUE_SIZE = 100
|
6
|
+
|
7
|
+
attr_reader :queue
|
8
|
+
|
9
|
+
def initialize(cluster:, logger:, instrumenter:)
|
10
|
+
@cluster = cluster
|
11
|
+
@logger = logger
|
12
|
+
@instrumenter = instrumenter
|
13
|
+
|
14
|
+
@queue = Queue.new
|
15
|
+
@commands = Queue.new
|
16
|
+
@next_offsets = Hash.new { |h, k| h[k] = {} }
|
17
|
+
|
18
|
+
# Long poll until at least this many bytes can be fetched.
|
19
|
+
@min_bytes = 1
|
20
|
+
|
21
|
+
# Long poll at most this number of seconds.
|
22
|
+
@max_wait_time = 1
|
23
|
+
|
24
|
+
# The maximum number of bytes to fetch for any given fetch request.
|
25
|
+
@max_bytes = 10485760
|
26
|
+
|
27
|
+
# The maximum number of bytes to fetch per partition, by topic.
|
28
|
+
@max_bytes_per_partition = {}
|
29
|
+
|
30
|
+
@thread = Thread.new do
|
31
|
+
loop while true
|
32
|
+
end
|
33
|
+
|
34
|
+
@thread.abort_on_exception = true
|
35
|
+
end
|
36
|
+
|
37
|
+
def subscribe(topic, max_bytes_per_partition:)
|
38
|
+
@commands << [:subscribe, [topic, max_bytes_per_partition]]
|
39
|
+
end
|
40
|
+
|
41
|
+
def seek(topic, partition, offset)
|
42
|
+
@commands << [:seek, [topic, partition, offset]]
|
43
|
+
end
|
44
|
+
|
45
|
+
def configure(min_bytes:, max_bytes:, max_wait_time:)
|
46
|
+
@commands << [:configure, [min_bytes, max_bytes, max_wait_time]]
|
47
|
+
end
|
48
|
+
|
49
|
+
def start
|
50
|
+
@commands << [:start, []]
|
51
|
+
end
|
52
|
+
|
53
|
+
def handle_start
|
54
|
+
raise "already started" if @running
|
55
|
+
|
56
|
+
@running = true
|
57
|
+
end
|
58
|
+
|
59
|
+
def stop
|
60
|
+
@commands << [:stop, []]
|
61
|
+
end
|
62
|
+
|
63
|
+
def reset
|
64
|
+
@commands << [:reset, []]
|
65
|
+
end
|
66
|
+
|
67
|
+
def data?
|
68
|
+
!@queue.empty?
|
69
|
+
end
|
70
|
+
|
71
|
+
def poll
|
72
|
+
@queue.deq
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def loop
|
78
|
+
if !@commands.empty?
|
79
|
+
cmd, args = @commands.deq
|
80
|
+
|
81
|
+
@logger.debug "Handling fetcher command: #{cmd}"
|
82
|
+
|
83
|
+
send("handle_#{cmd}", *args)
|
84
|
+
elsif !@running
|
85
|
+
sleep 0.1
|
86
|
+
elsif @queue.size < MAX_QUEUE_SIZE
|
87
|
+
step
|
88
|
+
else
|
89
|
+
@logger.warn "Reached max fetcher queue size (#{MAX_QUEUE_SIZE}), sleeping 1s"
|
90
|
+
sleep 1
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def handle_configure(min_bytes, max_bytes, max_wait_time)
|
95
|
+
@min_bytes = min_bytes
|
96
|
+
@max_bytes = max_bytes
|
97
|
+
@max_wait_time = max_wait_time
|
98
|
+
end
|
99
|
+
|
100
|
+
def handle_reset
|
101
|
+
@next_offsets.clear
|
102
|
+
end
|
103
|
+
|
104
|
+
def handle_stop(*)
|
105
|
+
@running = false
|
106
|
+
|
107
|
+
# After stopping, we need to reconfigure the topics and partitions to fetch
|
108
|
+
# from. Otherwise we'd keep fetching from a bunch of partitions we may no
|
109
|
+
# longer be assigned.
|
110
|
+
handle_reset
|
111
|
+
end
|
112
|
+
|
113
|
+
def handle_subscribe(topic, max_bytes_per_partition)
|
114
|
+
@logger.info "Will fetch at most #{max_bytes_per_partition} bytes at a time per partition from #{topic}"
|
115
|
+
@max_bytes_per_partition[topic] = max_bytes_per_partition
|
116
|
+
end
|
117
|
+
|
118
|
+
def handle_seek(topic, partition, offset)
|
119
|
+
@logger.info "Seeking #{topic}/#{partition} to offset #{offset}"
|
120
|
+
@next_offsets[topic][partition] = offset
|
121
|
+
end
|
122
|
+
|
123
|
+
def step
|
124
|
+
batches = fetch_batches
|
125
|
+
|
126
|
+
batches.each do |batch|
|
127
|
+
unless batch.empty?
|
128
|
+
@instrumenter.instrument("fetch_batch.consumer", {
|
129
|
+
topic: batch.topic,
|
130
|
+
partition: batch.partition,
|
131
|
+
offset_lag: batch.offset_lag,
|
132
|
+
highwater_mark_offset: batch.highwater_mark_offset,
|
133
|
+
message_count: batch.messages.count,
|
134
|
+
})
|
135
|
+
end
|
136
|
+
|
137
|
+
@next_offsets[batch.topic][batch.partition] = batch.last_offset + 1
|
138
|
+
end
|
139
|
+
|
140
|
+
@queue << [:batches, batches]
|
141
|
+
rescue Kafka::NoPartitionsToFetchFrom
|
142
|
+
@logger.warn "No partitions to fetch from, sleeping for 1s"
|
143
|
+
sleep 1
|
144
|
+
rescue Kafka::Error => e
|
145
|
+
@queue << [:exception, e]
|
146
|
+
end
|
147
|
+
|
148
|
+
def fetch_batches
|
149
|
+
@logger.debug "Fetching batches"
|
150
|
+
|
151
|
+
operation = FetchOperation.new(
|
152
|
+
cluster: @cluster,
|
153
|
+
logger: @logger,
|
154
|
+
min_bytes: @min_bytes,
|
155
|
+
max_bytes: @max_bytes,
|
156
|
+
max_wait_time: @max_wait_time,
|
157
|
+
)
|
158
|
+
|
159
|
+
@next_offsets.each do |topic, partitions|
|
160
|
+
# Fetch at most this many bytes from any single partition.
|
161
|
+
max_bytes = @max_bytes_per_partition[topic]
|
162
|
+
|
163
|
+
partitions.each do |partition, offset|
|
164
|
+
operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
operation.execute
|
169
|
+
rescue NoPartitionsToFetchFrom
|
170
|
+
backoff = @max_wait_time > 0 ? @max_wait_time : 1
|
171
|
+
|
172
|
+
@logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
|
173
|
+
sleep backoff
|
174
|
+
|
175
|
+
[]
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
@@ -323,6 +323,7 @@ files:
|
|
323
323
|
- lib/kafka/fetch_operation.rb
|
324
324
|
- lib/kafka/fetched_batch.rb
|
325
325
|
- lib/kafka/fetched_message.rb
|
326
|
+
- lib/kafka/fetcher.rb
|
326
327
|
- lib/kafka/gzip_codec.rb
|
327
328
|
- lib/kafka/heartbeat.rb
|
328
329
|
- lib/kafka/instrumenter.rb
|
@@ -404,9 +405,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
404
405
|
version: 2.1.0
|
405
406
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
406
407
|
requirements:
|
407
|
-
- - "
|
408
|
+
- - ">"
|
408
409
|
- !ruby/object:Gem::Version
|
409
|
-
version:
|
410
|
+
version: 1.3.1
|
410
411
|
requirements: []
|
411
412
|
rubyforge_project:
|
412
413
|
rubygems_version: 2.7.6
|