ruby-kafka 0.5.5 → 0.6.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 63ec54e4024c83bb5a73583700391db7b89fed005bb4b196cf912424351bba0e
4
- data.tar.gz: d567d51c5434f6ef034b769f78cc03286d91e875aa5ea9058623cd2e755cf28c
3
+ metadata.gz: cc9b975d79ef5be40b82d8f534995aa397d88a12b557f0b436bea4e42c0d7baa
4
+ data.tar.gz: f2e1fa46cbd12f0bc551f527aabd0469d5dd0bcc89bdcd27a71e05dedf9bd474
5
5
  SHA512:
6
- metadata.gz: 3f9f8269075c30eb60a3ee7afcca5c1fe8eab9aa6c5cf51c5512dc6b564299a5366d33bc97d08b61a17ecc4a299e066ecb48f86938c93643324a8083289ed3af
7
- data.tar.gz: 92767b83a152bef74992d1b4dc4b1818bdce8715a4e1e32c53671eaf6ee8d28d8e76d4800bf141c8cdd6cc44ef51aef78335464dbe504f8f72245ec8fc964a9a
6
+ metadata.gz: 60d506503b7bb77cde82b6a606bbe73ea5f8da247b4acd676a933ca91daa8e421d8beb91642469b4847725a0b6c4cfa191c74fc1f12413846b5e4bf5dd14186c
7
+ data.tar.gz: 9b9a7733579b0ed9c81546ea6f451df85fdbf88c9aa9ab040d81ab49f36c3d029902ae69815d07f2118a93bdf410429cd019a96d90b79b0df47eb8cbc15c2daa
data/CHANGELOG.md CHANGED
@@ -4,6 +4,8 @@ Changes and additions to the library will be listed here.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ - Fetch messages asynchronously (#526).
8
+
7
9
  ## v0.5.5
8
10
 
9
11
  - Support PLAINTEXT and SSL URI schemes (#550).
@@ -22,6 +22,12 @@ consumer.subscribe(topic)
22
22
  trap("TERM") { consumer.stop }
23
23
  trap("INT") { consumer.stop }
24
24
 
25
- consumer.each_message do |message|
26
- puts message.value
25
+ begin
26
+ consumer.each_message do |message|
27
+ end
28
+ rescue Kafka::ProcessingError => e
29
+ warn "Got #{e.cause}"
30
+ consumer.pause(e.topic, e.partition, timeout: 20)
31
+
32
+ retry
27
33
  end
data/lib/kafka/client.rb CHANGED
@@ -305,12 +305,19 @@ module Kafka
305
305
  interval: heartbeat_interval,
306
306
  )
307
307
 
308
+ fetcher = Fetcher.new(
309
+ cluster: initialize_cluster,
310
+ logger: @logger,
311
+ instrumenter: instrumenter,
312
+ )
313
+
308
314
  Consumer.new(
309
315
  cluster: cluster,
310
316
  logger: @logger,
311
317
  instrumenter: instrumenter,
312
318
  group: group,
313
319
  offset_manager: offset_manager,
320
+ fetcher: fetcher,
314
321
  session_timeout: session_timeout,
315
322
  heartbeat: heartbeat,
316
323
  )
@@ -1,6 +1,6 @@
1
1
  require "kafka/consumer_group"
2
2
  require "kafka/offset_manager"
3
- require "kafka/fetch_operation"
3
+ require "kafka/fetcher"
4
4
 
5
5
  module Kafka
6
6
 
@@ -40,13 +40,14 @@ module Kafka
40
40
  #
41
41
  class Consumer
42
42
 
43
- def initialize(cluster:, logger:, instrumenter:, group:, offset_manager:, session_timeout:, heartbeat:)
43
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
44
44
  @cluster = cluster
45
45
  @logger = logger
46
46
  @instrumenter = instrumenter
47
47
  @group = group
48
48
  @offset_manager = offset_manager
49
49
  @session_timeout = session_timeout
50
+ @fetcher = fetcher
50
51
  @heartbeat = heartbeat
51
52
 
52
53
  # A list of partitions that have been paused, per topic.
@@ -55,9 +56,6 @@ module Kafka
55
56
  # Whether or not the consumer is currently consuming messages.
56
57
  @running = false
57
58
 
58
- # The maximum number of bytes to fetch from a single partition, by topic.
59
- @max_bytes = {}
60
-
61
59
  # Hash containing offsets for each topic and partition that has the
62
60
  # automatically_mark_as_processed feature disabled. Offset manager is only active
63
61
  # when everything is suppose to happen automatically. Otherwise we need to keep track of the
@@ -93,7 +91,7 @@ module Kafka
93
91
 
94
92
  @group.subscribe(topic)
95
93
  @offset_manager.set_default_offset(topic, default_offset)
96
- @max_bytes[topic] = max_bytes_per_partition
94
+ @fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
97
95
 
98
96
  nil
99
97
  end
@@ -136,6 +134,8 @@ module Kafka
136
134
  def resume(topic, partition)
137
135
  paused_partitions = @paused_partitions.fetch(topic, {})
138
136
  paused_partitions.delete(partition)
137
+
138
+ seek_to_next(topic, partition)
139
139
  end
140
140
 
141
141
  # Whether the topic partition is currently paused.
@@ -153,15 +153,7 @@ module Kafka
153
153
  # absolute point in time.
154
154
  timeout = partitions.fetch(partition)
155
155
 
156
- if timeout.nil?
157
- true
158
- elsif Time.now < timeout
159
- true
160
- else
161
- @logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
162
- resume(topic, partition)
163
- false
164
- end
156
+ timeout.nil? || Time.now < timeout
165
157
  end
166
158
  end
167
159
 
@@ -193,24 +185,16 @@ module Kafka
193
185
  # {Kafka::ProcessingError} instance.
194
186
  # @return [nil]
195
187
  def each_message(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
188
+ @fetcher.configure(
189
+ min_bytes: min_bytes,
190
+ max_bytes: max_bytes,
191
+ max_wait_time: max_wait_time,
192
+ )
193
+
196
194
  consumer_loop do
197
- batches = fetch_batches(
198
- min_bytes: min_bytes,
199
- max_bytes: max_bytes,
200
- max_wait_time: max_wait_time,
201
- automatically_mark_as_processed: automatically_mark_as_processed
202
- )
195
+ batches = fetch_batches
203
196
 
204
197
  batches.each do |batch|
205
- unless batch.empty?
206
- @instrumenter.instrument("fetch_batch.consumer", {
207
- topic: batch.topic,
208
- partition: batch.partition,
209
- offset_lag: batch.offset_lag,
210
- highwater_mark_offset: batch.highwater_mark_offset,
211
- message_count: batch.messages.count,
212
- })
213
- end
214
198
  batch.messages.each do |message|
215
199
  notification = {
216
200
  topic: message.topic,
@@ -281,13 +265,14 @@ module Kafka
281
265
  # @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
282
266
  # @return [nil]
283
267
  def each_batch(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
268
+ @fetcher.configure(
269
+ min_bytes: min_bytes,
270
+ max_bytes: max_bytes,
271
+ max_wait_time: max_wait_time,
272
+ )
273
+
284
274
  consumer_loop do
285
- batches = fetch_batches(
286
- min_bytes: min_bytes,
287
- max_bytes: max_bytes,
288
- max_wait_time: max_wait_time,
289
- automatically_mark_as_processed: automatically_mark_as_processed
290
- )
275
+ batches = fetch_batches
291
276
 
292
277
  batches.each do |batch|
293
278
  unless batch.empty?
@@ -369,6 +354,8 @@ module Kafka
369
354
  def consumer_loop
370
355
  @running = true
371
356
 
357
+ @fetcher.start
358
+
372
359
  while @running
373
360
  begin
374
361
  @instrumenter.instrument("loop.consumer") do
@@ -394,6 +381,8 @@ module Kafka
394
381
  end
395
382
  end
396
383
  ensure
384
+ @fetcher.stop
385
+
397
386
  # In order to quickly have the consumer group re-balance itself, it's
398
387
  # important that members explicitly tell Kafka when they're leaving.
399
388
  make_final_offsets_commit!
@@ -433,59 +422,67 @@ module Kafka
433
422
  # only keep commits for the partitions that we're still assigned.
434
423
  @offset_manager.clear_offsets_excluding(@group.assigned_partitions)
435
424
  end
436
- end
437
-
438
- def fetch_batches(min_bytes:, max_bytes:, max_wait_time:, automatically_mark_as_processed:)
439
- # Return early if the consumer has been stopped.
440
- return [] if !@running
441
-
442
- join_group unless @group.member?
443
-
444
- subscribed_partitions = @group.subscribed_partitions
445
425
 
446
- @heartbeat.send_if_necessary
447
-
448
- operation = FetchOperation.new(
449
- cluster: @cluster,
450
- logger: @logger,
451
- min_bytes: min_bytes,
452
- max_bytes: max_bytes,
453
- max_wait_time: max_wait_time,
454
- )
426
+ @fetcher.reset
455
427
 
456
- subscribed_partitions.each do |topic, partitions|
428
+ @group.assigned_partitions.each do |topic, partitions|
457
429
  partitions.each do |partition|
458
- if automatically_mark_as_processed
459
- offset = @offset_manager.next_offset_for(topic, partition)
430
+ if paused?(topic, partition)
431
+ @logger.warn "Not fetching from #{topic}/#{partition} due to pause"
460
432
  else
461
- # When automatic marking is off, the first poll needs to be based on the last committed
462
- # offset from Kafka, that's why we fallback in case of nil (it may not be 0)
463
- if @current_offsets[topic].key?(partition)
464
- offset = @current_offsets[topic][partition] + 1
465
- else
466
- offset = @offset_manager.next_offset_for(topic, partition)
467
- end
433
+ seek_to_next(topic, partition)
468
434
  end
435
+ end
436
+ end
437
+ end
469
438
 
470
- max_bytes = @max_bytes.fetch(topic)
439
+ def seek_to_next(topic, partition)
440
+ # When automatic marking is off, the first poll needs to be based on the last committed
441
+ # offset from Kafka, that's why we fallback in case of nil (it may not be 0)
442
+ if @current_offsets[topic].key?(partition)
443
+ offset = @current_offsets[topic][partition] + 1
444
+ else
445
+ offset = @offset_manager.next_offset_for(topic, partition)
446
+ end
471
447
 
472
- if paused?(topic, partition)
473
- @logger.warn "Partition #{topic}/#{partition} is currently paused, skipping"
474
- else
475
- @logger.debug "Fetching batch from #{topic}/#{partition} starting at offset #{offset}"
476
- operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
448
+ @fetcher.seek(topic, partition, offset)
449
+ end
450
+
451
+ def resume_paused_partitions!
452
+ @paused_partitions.each do |topic, partitions|
453
+ partitions.keys.each do |partition|
454
+ unless paused?(topic, partition)
455
+ @logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
456
+ resume(topic, partition)
477
457
  end
478
458
  end
479
459
  end
460
+ end
480
461
 
481
- operation.execute
482
- rescue NoPartitionsToFetchFrom
483
- backoff = max_wait_time > 0 ? max_wait_time : 1
462
+ def fetch_batches
463
+ # Return early if the consumer has been stopped.
464
+ return [] if !@running
484
465
 
485
- @logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
486
- sleep backoff
466
+ join_group unless @group.member?
487
467
 
488
- retry
468
+ @heartbeat.send_if_necessary
469
+
470
+ resume_paused_partitions!
471
+
472
+ if !@fetcher.data?
473
+ @logger.debug "No batches to process"
474
+ sleep 2
475
+ []
476
+ else
477
+ tag, message = @fetcher.poll
478
+
479
+ case tag
480
+ when :batches
481
+ message
482
+ when :exception
483
+ raise message
484
+ end
485
+ end
489
486
  rescue OffsetOutOfRange => e
490
487
  @logger.error "Invalid offset for #{e.topic}/#{e.partition}, resetting to default offset"
491
488
 
@@ -0,0 +1,178 @@
1
+ require "kafka/fetch_operation"
2
+
3
+ module Kafka
4
+ class Fetcher
5
+ MAX_QUEUE_SIZE = 100
6
+
7
+ attr_reader :queue
8
+
9
+ def initialize(cluster:, logger:, instrumenter:)
10
+ @cluster = cluster
11
+ @logger = logger
12
+ @instrumenter = instrumenter
13
+
14
+ @queue = Queue.new
15
+ @commands = Queue.new
16
+ @next_offsets = Hash.new { |h, k| h[k] = {} }
17
+
18
+ # Long poll until at least this many bytes can be fetched.
19
+ @min_bytes = 1
20
+
21
+ # Long poll at most this number of seconds.
22
+ @max_wait_time = 1
23
+
24
+ # The maximum number of bytes to fetch for any given fetch request.
25
+ @max_bytes = 10485760
26
+
27
+ # The maximum number of bytes to fetch per partition, by topic.
28
+ @max_bytes_per_partition = {}
29
+
30
+ @thread = Thread.new do
31
+ loop while true
32
+ end
33
+
34
+ @thread.abort_on_exception = true
35
+ end
36
+
37
+ def subscribe(topic, max_bytes_per_partition:)
38
+ @commands << [:subscribe, [topic, max_bytes_per_partition]]
39
+ end
40
+
41
+ def seek(topic, partition, offset)
42
+ @commands << [:seek, [topic, partition, offset]]
43
+ end
44
+
45
+ def configure(min_bytes:, max_bytes:, max_wait_time:)
46
+ @commands << [:configure, [min_bytes, max_bytes, max_wait_time]]
47
+ end
48
+
49
+ def start
50
+ @commands << [:start, []]
51
+ end
52
+
53
+ def handle_start
54
+ raise "already started" if @running
55
+
56
+ @running = true
57
+ end
58
+
59
+ def stop
60
+ @commands << [:stop, []]
61
+ end
62
+
63
+ def reset
64
+ @commands << [:reset, []]
65
+ end
66
+
67
+ def data?
68
+ !@queue.empty?
69
+ end
70
+
71
+ def poll
72
+ @queue.deq
73
+ end
74
+
75
+ private
76
+
77
+ def loop
78
+ if !@commands.empty?
79
+ cmd, args = @commands.deq
80
+
81
+ @logger.debug "Handling fetcher command: #{cmd}"
82
+
83
+ send("handle_#{cmd}", *args)
84
+ elsif !@running
85
+ sleep 0.1
86
+ elsif @queue.size < MAX_QUEUE_SIZE
87
+ step
88
+ else
89
+ @logger.warn "Reached max fetcher queue size (#{MAX_QUEUE_SIZE}), sleeping 1s"
90
+ sleep 1
91
+ end
92
+ end
93
+
94
+ def handle_configure(min_bytes, max_bytes, max_wait_time)
95
+ @min_bytes = min_bytes
96
+ @max_bytes = max_bytes
97
+ @max_wait_time = max_wait_time
98
+ end
99
+
100
+ def handle_reset
101
+ @next_offsets.clear
102
+ end
103
+
104
+ def handle_stop(*)
105
+ @running = false
106
+
107
+ # After stopping, we need to reconfigure the topics and partitions to fetch
108
+ # from. Otherwise we'd keep fetching from a bunch of partitions we may no
109
+ # longer be assigned.
110
+ handle_reset
111
+ end
112
+
113
+ def handle_subscribe(topic, max_bytes_per_partition)
114
+ @logger.info "Will fetch at most #{max_bytes_per_partition} bytes at a time per partition from #{topic}"
115
+ @max_bytes_per_partition[topic] = max_bytes_per_partition
116
+ end
117
+
118
+ def handle_seek(topic, partition, offset)
119
+ @logger.info "Seeking #{topic}/#{partition} to offset #{offset}"
120
+ @next_offsets[topic][partition] = offset
121
+ end
122
+
123
+ def step
124
+ batches = fetch_batches
125
+
126
+ batches.each do |batch|
127
+ unless batch.empty?
128
+ @instrumenter.instrument("fetch_batch.consumer", {
129
+ topic: batch.topic,
130
+ partition: batch.partition,
131
+ offset_lag: batch.offset_lag,
132
+ highwater_mark_offset: batch.highwater_mark_offset,
133
+ message_count: batch.messages.count,
134
+ })
135
+ end
136
+
137
+ @next_offsets[batch.topic][batch.partition] = batch.last_offset + 1
138
+ end
139
+
140
+ @queue << [:batches, batches]
141
+ rescue Kafka::NoPartitionsToFetchFrom
142
+ @logger.warn "No partitions to fetch from, sleeping for 1s"
143
+ sleep 1
144
+ rescue Kafka::Error => e
145
+ @queue << [:exception, e]
146
+ end
147
+
148
+ def fetch_batches
149
+ @logger.debug "Fetching batches"
150
+
151
+ operation = FetchOperation.new(
152
+ cluster: @cluster,
153
+ logger: @logger,
154
+ min_bytes: @min_bytes,
155
+ max_bytes: @max_bytes,
156
+ max_wait_time: @max_wait_time,
157
+ )
158
+
159
+ @next_offsets.each do |topic, partitions|
160
+ # Fetch at most this many bytes from any single partition.
161
+ max_bytes = @max_bytes_per_partition[topic]
162
+
163
+ partitions.each do |partition, offset|
164
+ operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
165
+ end
166
+ end
167
+
168
+ operation.execute
169
+ rescue NoPartitionsToFetchFrom
170
+ backoff = @max_wait_time > 0 ? @max_wait_time : 1
171
+
172
+ @logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
173
+ sleep backoff
174
+
175
+ []
176
+ end
177
+ end
178
+ end
data/lib/kafka/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kafka
2
- VERSION = "0.5.5"
2
+ VERSION = "0.6.0.beta1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.6.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
@@ -323,6 +323,7 @@ files:
323
323
  - lib/kafka/fetch_operation.rb
324
324
  - lib/kafka/fetched_batch.rb
325
325
  - lib/kafka/fetched_message.rb
326
+ - lib/kafka/fetcher.rb
326
327
  - lib/kafka/gzip_codec.rb
327
328
  - lib/kafka/heartbeat.rb
328
329
  - lib/kafka/instrumenter.rb
@@ -404,9 +405,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
404
405
  version: 2.1.0
405
406
  required_rubygems_version: !ruby/object:Gem::Requirement
406
407
  requirements:
407
- - - ">="
408
+ - - ">"
408
409
  - !ruby/object:Gem::Version
409
- version: '0'
410
+ version: 1.3.1
410
411
  requirements: []
411
412
  rubyforge_project:
412
413
  rubygems_version: 2.7.6