ruby-kafka 0.5.0.beta5 → 0.5.0.beta6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: '049f2fa2e6a04ff652cffe3ed630dbfe69e87009'
4
- data.tar.gz: ed051c1dbb21aaac9360e58c74fca58d7706a76a
3
+ metadata.gz: 9893c7609209a4d58c3ac399cbbdce89c8df0124
4
+ data.tar.gz: 820e9d1bcd07f6d7c8de1d0ba1d286f184b64d8c
5
5
  SHA512:
6
- metadata.gz: '08b01eb42a5082deac74cd5ff3136aa50ec16ac34c5ec4818d5a3722b9b46273b3601bd355073d3165c8917a9daf51356a43ff5ec97eceedbfcc6e0450018b79'
7
- data.tar.gz: 16114b7e4abe72d82f3ae37cc0cfba4cbfdf798073adc04f1eaea892ac64eed2d96b66f093df1e2b3f1c6ac40f7a5b87146efadbed888d46a32abc4113e1f289
6
+ metadata.gz: baf63463ad738c02a1ad07d60db31ff6546a3cc2e97ad411ea02b31dbad7654b003ae61836bc67f10db30650096e278a3e621f3456a44aac4a0c7f574f80b728
7
+ data.tar.gz: 54be82a578abcf241b387f70cd2a07b2ce069f2e9d80e0c24104780af4b1ae8b55dccf8f5962ac31456d19bd2770f2a2af6911fcd8ef7200f501633c36fe81be
@@ -20,6 +20,7 @@ consumer = kafka.consumer(group_id: "test")
20
20
  consumer.subscribe(topic)
21
21
 
22
22
  trap("TERM") { consumer.stop }
23
+ trap("INT") { consumer.stop }
23
24
 
24
25
  consumer.each_message do |message|
25
26
  puts message.value
@@ -109,10 +109,10 @@ module Kafka
109
109
 
110
110
  response
111
111
  end
112
- rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::ESHUTDOWN, EOFError => e
112
+ rescue SystemCallError, EOFError => e
113
113
  close
114
114
 
115
- raise ConnectionError, "Connection error: #{e}"
115
+ raise ConnectionError, "Connection error #{e.class}: #{e}"
116
116
  end
117
117
 
118
118
  private
@@ -57,6 +57,17 @@ module Kafka
57
57
 
58
58
  # The maximum number of bytes to fetch from a single partition, by topic.
59
59
  @max_bytes = {}
60
+
61
+ # Hash containing offsets for each topic and partition that has the
62
+ # automatically_mark_as_processed feature disabled. Offset manager is only active
63
+ # when everything is suppose to happen automatically. Otherwise we need to keep track of the
64
+ # offset manually in memory for all the time
65
+ # The key structure for this equals an array with topic and partition [topic, partition]
66
+ # The value is equal to the offset of the last message we've received
67
+ # @note It won't be updated in case user marks message as processed, because for the case
68
+ # when user commits message other than last in a batch, this would make ruby-kafka refetch
69
+ # some already consumed messages
70
+ @current_offsets = Hash.new { |h, k| h[k] = {} }
60
71
  end
61
72
 
62
73
  # Subscribes the consumer to a topic.
@@ -181,7 +192,11 @@ module Kafka
181
192
  # @return [nil]
182
193
  def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
183
194
  consumer_loop do
184
- batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
195
+ batches = fetch_batches(
196
+ min_bytes: min_bytes,
197
+ max_wait_time: max_wait_time,
198
+ automatically_mark_as_processed: automatically_mark_as_processed
199
+ )
185
200
 
186
201
  batches.each do |batch|
187
202
  batch.messages.each do |message|
@@ -197,6 +212,7 @@ module Kafka
197
212
 
198
213
  begin
199
214
  yield message
215
+ @current_offsets[message.topic][message.partition] = message.offset
200
216
  rescue => e
201
217
  location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
202
218
  backtrace = e.backtrace.join("\n")
@@ -247,7 +263,11 @@ module Kafka
247
263
  # @return [nil]
248
264
  def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
249
265
  consumer_loop do
250
- batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
266
+ batches = fetch_batches(
267
+ min_bytes: min_bytes,
268
+ max_wait_time: max_wait_time,
269
+ automatically_mark_as_processed: automatically_mark_as_processed
270
+ )
251
271
 
252
272
  batches.each do |batch|
253
273
  unless batch.empty?
@@ -262,6 +282,7 @@ module Kafka
262
282
 
263
283
  begin
264
284
  yield batch
285
+ @current_offsets[batch.topic][batch.partition] = batch.last_offset
265
286
  rescue => e
266
287
  offset_range = (batch.first_offset..batch.last_offset)
267
288
  location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
@@ -379,7 +400,7 @@ module Kafka
379
400
  end
380
401
  end
381
402
 
382
- def fetch_batches(min_bytes:, max_wait_time:)
403
+ def fetch_batches(min_bytes:, max_wait_time:, automatically_mark_as_processed:)
383
404
  join_group unless @group.member?
384
405
 
385
406
  subscribed_partitions = @group.subscribed_partitions
@@ -395,7 +416,18 @@ module Kafka
395
416
 
396
417
  subscribed_partitions.each do |topic, partitions|
397
418
  partitions.each do |partition|
398
- offset = @offset_manager.next_offset_for(topic, partition)
419
+ if automatically_mark_as_processed
420
+ offset = @offset_manager.next_offset_for(topic, partition)
421
+ else
422
+ # When automatic marking is off, the first poll needs to be based on the last committed
423
+ # offset from Kafka, that's why we fallback in case of nil (it may not be 0)
424
+ if @current_offsets[topic].key?(partition)
425
+ offset = @current_offsets[topic][partition] + 1
426
+ else
427
+ offset = @offset_manager.next_offset_for(topic, partition)
428
+ end
429
+ end
430
+
399
431
  max_bytes = @max_bytes.fetch(topic)
400
432
 
401
433
  if paused?(topic, partition)
@@ -48,6 +48,7 @@ module Kafka
48
48
  rescue ConnectionError
49
49
  @logger.error "Connection error while trying to join group `#{@group_id}`; retrying..."
50
50
  sleep 1
51
+ @cluster.mark_as_stale!
51
52
  @coordinator = nil
52
53
  retry
53
54
  end
@@ -24,9 +24,6 @@ module Kafka
24
24
 
25
25
  @timeout = timeout
26
26
 
27
- # This pipe is used to cancel IO.select calls when sockets are closed.
28
- @cancel_reader, @cancel_writer = IO.pipe
29
-
30
27
  @socket = Socket.new(Socket.const_get(addr[0][0]), Socket::SOCK_STREAM, 0)
31
28
  @socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1)
32
29
 
@@ -60,20 +57,11 @@ module Kafka
60
57
  # @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
61
58
  # @return [String] the data that was read from the socket.
62
59
  def read(num_bytes)
63
- rs, _, _ = IO.select([@socket, @cancel_reader], nil, nil, @timeout)
64
-
65
- # The read timed out.
66
- raise Errno::ETIMEDOUT if rs.nil?
67
-
68
- # The socket has been closed.
69
- raise Errno::ECONNABORTED if rs.include?(@cancel_reader)
60
+ unless IO.select([@socket], nil, nil, @timeout)
61
+ raise Errno::ETIMEDOUT
62
+ end
70
63
 
71
64
  @socket.read(num_bytes)
72
- rescue Errno::EBADF
73
- # We'll get EBADF if `select` is called with a closed socket, or
74
- # if it's closed in the middle of things.
75
- raise Errno::ESHUTDOWN if @socket.closed?
76
- raise
77
65
  rescue IO::EAGAINWaitReadable
78
66
  retry
79
67
  end
@@ -92,7 +80,6 @@ module Kafka
92
80
  end
93
81
 
94
82
  def close
95
- @cancel_writer.puts
96
83
  @socket.close
97
84
  end
98
85
 
@@ -1,3 +1,3 @@
1
1
  module Kafka
2
- VERSION = "0.5.0.beta5"
2
+ VERSION = "0.5.0.beta6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0.beta5
4
+ version: 0.5.0.beta6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-10-26 00:00:00.000000000 Z
11
+ date: 2017-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler