ruby-kafka 0.5.5 → 0.6.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 63ec54e4024c83bb5a73583700391db7b89fed005bb4b196cf912424351bba0e
4
- data.tar.gz: d567d51c5434f6ef034b769f78cc03286d91e875aa5ea9058623cd2e755cf28c
3
+ metadata.gz: cc9b975d79ef5be40b82d8f534995aa397d88a12b557f0b436bea4e42c0d7baa
4
+ data.tar.gz: f2e1fa46cbd12f0bc551f527aabd0469d5dd0bcc89bdcd27a71e05dedf9bd474
5
5
  SHA512:
6
- metadata.gz: 3f9f8269075c30eb60a3ee7afcca5c1fe8eab9aa6c5cf51c5512dc6b564299a5366d33bc97d08b61a17ecc4a299e066ecb48f86938c93643324a8083289ed3af
7
- data.tar.gz: 92767b83a152bef74992d1b4dc4b1818bdce8715a4e1e32c53671eaf6ee8d28d8e76d4800bf141c8cdd6cc44ef51aef78335464dbe504f8f72245ec8fc964a9a
6
+ metadata.gz: 60d506503b7bb77cde82b6a606bbe73ea5f8da247b4acd676a933ca91daa8e421d8beb91642469b4847725a0b6c4cfa191c74fc1f12413846b5e4bf5dd14186c
7
+ data.tar.gz: 9b9a7733579b0ed9c81546ea6f451df85fdbf88c9aa9ab040d81ab49f36c3d029902ae69815d07f2118a93bdf410429cd019a96d90b79b0df47eb8cbc15c2daa
data/CHANGELOG.md CHANGED
@@ -4,6 +4,8 @@ Changes and additions to the library will be listed here.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ - Fetch messages asynchronously (#526).
8
+
7
9
  ## v0.5.5
8
10
 
9
11
  - Support PLAINTEXT and SSL URI schemes (#550).
@@ -22,6 +22,12 @@ consumer.subscribe(topic)
22
22
  trap("TERM") { consumer.stop }
23
23
  trap("INT") { consumer.stop }
24
24
 
25
- consumer.each_message do |message|
26
- puts message.value
25
+ begin
26
+ consumer.each_message do |message|
27
+ end
28
+ rescue Kafka::ProcessingError => e
29
+ warn "Got #{e.cause}"
30
+ consumer.pause(e.topic, e.partition, timeout: 20)
31
+
32
+ retry
27
33
  end
data/lib/kafka/client.rb CHANGED
@@ -305,12 +305,19 @@ module Kafka
305
305
  interval: heartbeat_interval,
306
306
  )
307
307
 
308
+ fetcher = Fetcher.new(
309
+ cluster: initialize_cluster,
310
+ logger: @logger,
311
+ instrumenter: instrumenter,
312
+ )
313
+
308
314
  Consumer.new(
309
315
  cluster: cluster,
310
316
  logger: @logger,
311
317
  instrumenter: instrumenter,
312
318
  group: group,
313
319
  offset_manager: offset_manager,
320
+ fetcher: fetcher,
314
321
  session_timeout: session_timeout,
315
322
  heartbeat: heartbeat,
316
323
  )
@@ -1,6 +1,6 @@
1
1
  require "kafka/consumer_group"
2
2
  require "kafka/offset_manager"
3
- require "kafka/fetch_operation"
3
+ require "kafka/fetcher"
4
4
 
5
5
  module Kafka
6
6
 
@@ -40,13 +40,14 @@ module Kafka
40
40
  #
41
41
  class Consumer
42
42
 
43
- def initialize(cluster:, logger:, instrumenter:, group:, offset_manager:, session_timeout:, heartbeat:)
43
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
44
44
  @cluster = cluster
45
45
  @logger = logger
46
46
  @instrumenter = instrumenter
47
47
  @group = group
48
48
  @offset_manager = offset_manager
49
49
  @session_timeout = session_timeout
50
+ @fetcher = fetcher
50
51
  @heartbeat = heartbeat
51
52
 
52
53
  # A list of partitions that have been paused, per topic.
@@ -55,9 +56,6 @@ module Kafka
55
56
  # Whether or not the consumer is currently consuming messages.
56
57
  @running = false
57
58
 
58
- # The maximum number of bytes to fetch from a single partition, by topic.
59
- @max_bytes = {}
60
-
61
59
  # Hash containing offsets for each topic and partition that has the
62
60
  # automatically_mark_as_processed feature disabled. Offset manager is only active
63
61
  # when everything is suppose to happen automatically. Otherwise we need to keep track of the
@@ -93,7 +91,7 @@ module Kafka
93
91
 
94
92
  @group.subscribe(topic)
95
93
  @offset_manager.set_default_offset(topic, default_offset)
96
- @max_bytes[topic] = max_bytes_per_partition
94
+ @fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
97
95
 
98
96
  nil
99
97
  end
@@ -136,6 +134,8 @@ module Kafka
136
134
  def resume(topic, partition)
137
135
  paused_partitions = @paused_partitions.fetch(topic, {})
138
136
  paused_partitions.delete(partition)
137
+
138
+ seek_to_next(topic, partition)
139
139
  end
140
140
 
141
141
  # Whether the topic partition is currently paused.
@@ -153,15 +153,7 @@ module Kafka
153
153
  # absolute point in time.
154
154
  timeout = partitions.fetch(partition)
155
155
 
156
- if timeout.nil?
157
- true
158
- elsif Time.now < timeout
159
- true
160
- else
161
- @logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
162
- resume(topic, partition)
163
- false
164
- end
156
+ timeout.nil? || Time.now < timeout
165
157
  end
166
158
  end
167
159
 
@@ -193,24 +185,16 @@ module Kafka
193
185
  # {Kafka::ProcessingError} instance.
194
186
  # @return [nil]
195
187
  def each_message(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
188
+ @fetcher.configure(
189
+ min_bytes: min_bytes,
190
+ max_bytes: max_bytes,
191
+ max_wait_time: max_wait_time,
192
+ )
193
+
196
194
  consumer_loop do
197
- batches = fetch_batches(
198
- min_bytes: min_bytes,
199
- max_bytes: max_bytes,
200
- max_wait_time: max_wait_time,
201
- automatically_mark_as_processed: automatically_mark_as_processed
202
- )
195
+ batches = fetch_batches
203
196
 
204
197
  batches.each do |batch|
205
- unless batch.empty?
206
- @instrumenter.instrument("fetch_batch.consumer", {
207
- topic: batch.topic,
208
- partition: batch.partition,
209
- offset_lag: batch.offset_lag,
210
- highwater_mark_offset: batch.highwater_mark_offset,
211
- message_count: batch.messages.count,
212
- })
213
- end
214
198
  batch.messages.each do |message|
215
199
  notification = {
216
200
  topic: message.topic,
@@ -281,13 +265,14 @@ module Kafka
281
265
  # @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
282
266
  # @return [nil]
283
267
  def each_batch(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
268
+ @fetcher.configure(
269
+ min_bytes: min_bytes,
270
+ max_bytes: max_bytes,
271
+ max_wait_time: max_wait_time,
272
+ )
273
+
284
274
  consumer_loop do
285
- batches = fetch_batches(
286
- min_bytes: min_bytes,
287
- max_bytes: max_bytes,
288
- max_wait_time: max_wait_time,
289
- automatically_mark_as_processed: automatically_mark_as_processed
290
- )
275
+ batches = fetch_batches
291
276
 
292
277
  batches.each do |batch|
293
278
  unless batch.empty?
@@ -369,6 +354,8 @@ module Kafka
369
354
  def consumer_loop
370
355
  @running = true
371
356
 
357
+ @fetcher.start
358
+
372
359
  while @running
373
360
  begin
374
361
  @instrumenter.instrument("loop.consumer") do
@@ -394,6 +381,8 @@ module Kafka
394
381
  end
395
382
  end
396
383
  ensure
384
+ @fetcher.stop
385
+
397
386
  # In order to quickly have the consumer group re-balance itself, it's
398
387
  # important that members explicitly tell Kafka when they're leaving.
399
388
  make_final_offsets_commit!
@@ -433,59 +422,67 @@ module Kafka
433
422
  # only keep commits for the partitions that we're still assigned.
434
423
  @offset_manager.clear_offsets_excluding(@group.assigned_partitions)
435
424
  end
436
- end
437
-
438
- def fetch_batches(min_bytes:, max_bytes:, max_wait_time:, automatically_mark_as_processed:)
439
- # Return early if the consumer has been stopped.
440
- return [] if !@running
441
-
442
- join_group unless @group.member?
443
-
444
- subscribed_partitions = @group.subscribed_partitions
445
425
 
446
- @heartbeat.send_if_necessary
447
-
448
- operation = FetchOperation.new(
449
- cluster: @cluster,
450
- logger: @logger,
451
- min_bytes: min_bytes,
452
- max_bytes: max_bytes,
453
- max_wait_time: max_wait_time,
454
- )
426
+ @fetcher.reset
455
427
 
456
- subscribed_partitions.each do |topic, partitions|
428
+ @group.assigned_partitions.each do |topic, partitions|
457
429
  partitions.each do |partition|
458
- if automatically_mark_as_processed
459
- offset = @offset_manager.next_offset_for(topic, partition)
430
+ if paused?(topic, partition)
431
+ @logger.warn "Not fetching from #{topic}/#{partition} due to pause"
460
432
  else
461
- # When automatic marking is off, the first poll needs to be based on the last committed
462
- # offset from Kafka, that's why we fallback in case of nil (it may not be 0)
463
- if @current_offsets[topic].key?(partition)
464
- offset = @current_offsets[topic][partition] + 1
465
- else
466
- offset = @offset_manager.next_offset_for(topic, partition)
467
- end
433
+ seek_to_next(topic, partition)
468
434
  end
435
+ end
436
+ end
437
+ end
469
438
 
470
- max_bytes = @max_bytes.fetch(topic)
439
+ def seek_to_next(topic, partition)
440
+ # When automatic marking is off, the first poll needs to be based on the last committed
441
+ # offset from Kafka, that's why we fallback in case of nil (it may not be 0)
442
+ if @current_offsets[topic].key?(partition)
443
+ offset = @current_offsets[topic][partition] + 1
444
+ else
445
+ offset = @offset_manager.next_offset_for(topic, partition)
446
+ end
471
447
 
472
- if paused?(topic, partition)
473
- @logger.warn "Partition #{topic}/#{partition} is currently paused, skipping"
474
- else
475
- @logger.debug "Fetching batch from #{topic}/#{partition} starting at offset #{offset}"
476
- operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
448
+ @fetcher.seek(topic, partition, offset)
449
+ end
450
+
451
+ def resume_paused_partitions!
452
+ @paused_partitions.each do |topic, partitions|
453
+ partitions.keys.each do |partition|
454
+ unless paused?(topic, partition)
455
+ @logger.info "Automatically resuming partition #{topic}/#{partition}, pause timeout expired"
456
+ resume(topic, partition)
477
457
  end
478
458
  end
479
459
  end
460
+ end
480
461
 
481
- operation.execute
482
- rescue NoPartitionsToFetchFrom
483
- backoff = max_wait_time > 0 ? max_wait_time : 1
462
+ def fetch_batches
463
+ # Return early if the consumer has been stopped.
464
+ return [] if !@running
484
465
 
485
- @logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
486
- sleep backoff
466
+ join_group unless @group.member?
487
467
 
488
- retry
468
+ @heartbeat.send_if_necessary
469
+
470
+ resume_paused_partitions!
471
+
472
+ if !@fetcher.data?
473
+ @logger.debug "No batches to process"
474
+ sleep 2
475
+ []
476
+ else
477
+ tag, message = @fetcher.poll
478
+
479
+ case tag
480
+ when :batches
481
+ message
482
+ when :exception
483
+ raise message
484
+ end
485
+ end
489
486
  rescue OffsetOutOfRange => e
490
487
  @logger.error "Invalid offset for #{e.topic}/#{e.partition}, resetting to default offset"
491
488
 
@@ -0,0 +1,178 @@
1
+ require "kafka/fetch_operation"
2
+
3
+ module Kafka
4
+ class Fetcher
5
+ MAX_QUEUE_SIZE = 100
6
+
7
+ attr_reader :queue
8
+
9
+ def initialize(cluster:, logger:, instrumenter:)
10
+ @cluster = cluster
11
+ @logger = logger
12
+ @instrumenter = instrumenter
13
+
14
+ @queue = Queue.new
15
+ @commands = Queue.new
16
+ @next_offsets = Hash.new { |h, k| h[k] = {} }
17
+
18
+ # Long poll until at least this many bytes can be fetched.
19
+ @min_bytes = 1
20
+
21
+ # Long poll at most this number of seconds.
22
+ @max_wait_time = 1
23
+
24
+ # The maximum number of bytes to fetch for any given fetch request.
25
+ @max_bytes = 10485760
26
+
27
+ # The maximum number of bytes to fetch per partition, by topic.
28
+ @max_bytes_per_partition = {}
29
+
30
+ @thread = Thread.new do
31
+ loop while true
32
+ end
33
+
34
+ @thread.abort_on_exception = true
35
+ end
36
+
37
+ def subscribe(topic, max_bytes_per_partition:)
38
+ @commands << [:subscribe, [topic, max_bytes_per_partition]]
39
+ end
40
+
41
+ def seek(topic, partition, offset)
42
+ @commands << [:seek, [topic, partition, offset]]
43
+ end
44
+
45
+ def configure(min_bytes:, max_bytes:, max_wait_time:)
46
+ @commands << [:configure, [min_bytes, max_bytes, max_wait_time]]
47
+ end
48
+
49
+ def start
50
+ @commands << [:start, []]
51
+ end
52
+
53
+ def handle_start
54
+ raise "already started" if @running
55
+
56
+ @running = true
57
+ end
58
+
59
+ def stop
60
+ @commands << [:stop, []]
61
+ end
62
+
63
+ def reset
64
+ @commands << [:reset, []]
65
+ end
66
+
67
+ def data?
68
+ !@queue.empty?
69
+ end
70
+
71
+ def poll
72
+ @queue.deq
73
+ end
74
+
75
+ private
76
+
77
+ def loop
78
+ if !@commands.empty?
79
+ cmd, args = @commands.deq
80
+
81
+ @logger.debug "Handling fetcher command: #{cmd}"
82
+
83
+ send("handle_#{cmd}", *args)
84
+ elsif !@running
85
+ sleep 0.1
86
+ elsif @queue.size < MAX_QUEUE_SIZE
87
+ step
88
+ else
89
+ @logger.warn "Reached max fetcher queue size (#{MAX_QUEUE_SIZE}), sleeping 1s"
90
+ sleep 1
91
+ end
92
+ end
93
+
94
+ def handle_configure(min_bytes, max_bytes, max_wait_time)
95
+ @min_bytes = min_bytes
96
+ @max_bytes = max_bytes
97
+ @max_wait_time = max_wait_time
98
+ end
99
+
100
+ def handle_reset
101
+ @next_offsets.clear
102
+ end
103
+
104
+ def handle_stop(*)
105
+ @running = false
106
+
107
+ # After stopping, we need to reconfigure the topics and partitions to fetch
108
+ # from. Otherwise we'd keep fetching from a bunch of partitions we may no
109
+ # longer be assigned.
110
+ handle_reset
111
+ end
112
+
113
+ def handle_subscribe(topic, max_bytes_per_partition)
114
+ @logger.info "Will fetch at most #{max_bytes_per_partition} bytes at a time per partition from #{topic}"
115
+ @max_bytes_per_partition[topic] = max_bytes_per_partition
116
+ end
117
+
118
+ def handle_seek(topic, partition, offset)
119
+ @logger.info "Seeking #{topic}/#{partition} to offset #{offset}"
120
+ @next_offsets[topic][partition] = offset
121
+ end
122
+
123
+ def step
124
+ batches = fetch_batches
125
+
126
+ batches.each do |batch|
127
+ unless batch.empty?
128
+ @instrumenter.instrument("fetch_batch.consumer", {
129
+ topic: batch.topic,
130
+ partition: batch.partition,
131
+ offset_lag: batch.offset_lag,
132
+ highwater_mark_offset: batch.highwater_mark_offset,
133
+ message_count: batch.messages.count,
134
+ })
135
+ end
136
+
137
+ @next_offsets[batch.topic][batch.partition] = batch.last_offset + 1
138
+ end
139
+
140
+ @queue << [:batches, batches]
141
+ rescue Kafka::NoPartitionsToFetchFrom
142
+ @logger.warn "No partitions to fetch from, sleeping for 1s"
143
+ sleep 1
144
+ rescue Kafka::Error => e
145
+ @queue << [:exception, e]
146
+ end
147
+
148
+ def fetch_batches
149
+ @logger.debug "Fetching batches"
150
+
151
+ operation = FetchOperation.new(
152
+ cluster: @cluster,
153
+ logger: @logger,
154
+ min_bytes: @min_bytes,
155
+ max_bytes: @max_bytes,
156
+ max_wait_time: @max_wait_time,
157
+ )
158
+
159
+ @next_offsets.each do |topic, partitions|
160
+ # Fetch at most this many bytes from any single partition.
161
+ max_bytes = @max_bytes_per_partition[topic]
162
+
163
+ partitions.each do |partition, offset|
164
+ operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
165
+ end
166
+ end
167
+
168
+ operation.execute
169
+ rescue NoPartitionsToFetchFrom
170
+ backoff = @max_wait_time > 0 ? @max_wait_time : 1
171
+
172
+ @logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
173
+ sleep backoff
174
+
175
+ []
176
+ end
177
+ end
178
+ end
data/lib/kafka/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kafka
2
- VERSION = "0.5.5"
2
+ VERSION = "0.6.0.beta1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.6.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
@@ -323,6 +323,7 @@ files:
323
323
  - lib/kafka/fetch_operation.rb
324
324
  - lib/kafka/fetched_batch.rb
325
325
  - lib/kafka/fetched_message.rb
326
+ - lib/kafka/fetcher.rb
326
327
  - lib/kafka/gzip_codec.rb
327
328
  - lib/kafka/heartbeat.rb
328
329
  - lib/kafka/instrumenter.rb
@@ -404,9 +405,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
404
405
  version: 2.1.0
405
406
  required_rubygems_version: !ruby/object:Gem::Requirement
406
407
  requirements:
407
- - - ">="
408
+ - - ">"
408
409
  - !ruby/object:Gem::Version
409
- version: '0'
410
+ version: 1.3.1
410
411
  requirements: []
411
412
  rubyforge_project:
412
413
  rubygems_version: 2.7.6