ruby-kafka 0.5.0.beta5 → 0.5.0.beta6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: '049f2fa2e6a04ff652cffe3ed630dbfe69e87009'
4
- data.tar.gz: ed051c1dbb21aaac9360e58c74fca58d7706a76a
3
+ metadata.gz: 9893c7609209a4d58c3ac399cbbdce89c8df0124
4
+ data.tar.gz: 820e9d1bcd07f6d7c8de1d0ba1d286f184b64d8c
5
5
  SHA512:
6
- metadata.gz: '08b01eb42a5082deac74cd5ff3136aa50ec16ac34c5ec4818d5a3722b9b46273b3601bd355073d3165c8917a9daf51356a43ff5ec97eceedbfcc6e0450018b79'
7
- data.tar.gz: 16114b7e4abe72d82f3ae37cc0cfba4cbfdf798073adc04f1eaea892ac64eed2d96b66f093df1e2b3f1c6ac40f7a5b87146efadbed888d46a32abc4113e1f289
6
+ metadata.gz: baf63463ad738c02a1ad07d60db31ff6546a3cc2e97ad411ea02b31dbad7654b003ae61836bc67f10db30650096e278a3e621f3456a44aac4a0c7f574f80b728
7
+ data.tar.gz: 54be82a578abcf241b387f70cd2a07b2ce069f2e9d80e0c24104780af4b1ae8b55dccf8f5962ac31456d19bd2770f2a2af6911fcd8ef7200f501633c36fe81be
@@ -20,6 +20,7 @@ consumer = kafka.consumer(group_id: "test")
20
20
  consumer.subscribe(topic)
21
21
 
22
22
  trap("TERM") { consumer.stop }
23
+ trap("INT") { consumer.stop }
23
24
 
24
25
  consumer.each_message do |message|
25
26
  puts message.value
@@ -109,10 +109,10 @@ module Kafka
109
109
 
110
110
  response
111
111
  end
112
- rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::ESHUTDOWN, EOFError => e
112
+ rescue SystemCallError, EOFError => e
113
113
  close
114
114
 
115
- raise ConnectionError, "Connection error: #{e}"
115
+ raise ConnectionError, "Connection error #{e.class}: #{e}"
116
116
  end
117
117
 
118
118
  private
@@ -57,6 +57,17 @@ module Kafka
57
57
 
58
58
  # The maximum number of bytes to fetch from a single partition, by topic.
59
59
  @max_bytes = {}
60
+
61
+ # Hash containing offsets for each topic and partition that has the
62
+ # automatically_mark_as_processed feature disabled. Offset manager is only active
63
+ # when everything is suppose to happen automatically. Otherwise we need to keep track of the
64
+ # offset manually in memory for all the time
65
+ # The key structure for this equals an array with topic and partition [topic, partition]
66
+ # The value is equal to the offset of the last message we've received
67
+ # @note It won't be updated in case user marks message as processed, because for the case
68
+ # when user commits message other than last in a batch, this would make ruby-kafka refetch
69
+ # some already consumed messages
70
+ @current_offsets = Hash.new { |h, k| h[k] = {} }
60
71
  end
61
72
 
62
73
  # Subscribes the consumer to a topic.
@@ -181,7 +192,11 @@ module Kafka
181
192
  # @return [nil]
182
193
  def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
183
194
  consumer_loop do
184
- batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
195
+ batches = fetch_batches(
196
+ min_bytes: min_bytes,
197
+ max_wait_time: max_wait_time,
198
+ automatically_mark_as_processed: automatically_mark_as_processed
199
+ )
185
200
 
186
201
  batches.each do |batch|
187
202
  batch.messages.each do |message|
@@ -197,6 +212,7 @@ module Kafka
197
212
 
198
213
  begin
199
214
  yield message
215
+ @current_offsets[message.topic][message.partition] = message.offset
200
216
  rescue => e
201
217
  location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
202
218
  backtrace = e.backtrace.join("\n")
@@ -247,7 +263,11 @@ module Kafka
247
263
  # @return [nil]
248
264
  def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
249
265
  consumer_loop do
250
- batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
266
+ batches = fetch_batches(
267
+ min_bytes: min_bytes,
268
+ max_wait_time: max_wait_time,
269
+ automatically_mark_as_processed: automatically_mark_as_processed
270
+ )
251
271
 
252
272
  batches.each do |batch|
253
273
  unless batch.empty?
@@ -262,6 +282,7 @@ module Kafka
262
282
 
263
283
  begin
264
284
  yield batch
285
+ @current_offsets[batch.topic][batch.partition] = batch.last_offset
265
286
  rescue => e
266
287
  offset_range = (batch.first_offset..batch.last_offset)
267
288
  location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
@@ -379,7 +400,7 @@ module Kafka
379
400
  end
380
401
  end
381
402
 
382
- def fetch_batches(min_bytes:, max_wait_time:)
403
+ def fetch_batches(min_bytes:, max_wait_time:, automatically_mark_as_processed:)
383
404
  join_group unless @group.member?
384
405
 
385
406
  subscribed_partitions = @group.subscribed_partitions
@@ -395,7 +416,18 @@ module Kafka
395
416
 
396
417
  subscribed_partitions.each do |topic, partitions|
397
418
  partitions.each do |partition|
398
- offset = @offset_manager.next_offset_for(topic, partition)
419
+ if automatically_mark_as_processed
420
+ offset = @offset_manager.next_offset_for(topic, partition)
421
+ else
422
+ # When automatic marking is off, the first poll needs to be based on the last committed
423
+ # offset from Kafka, that's why we fallback in case of nil (it may not be 0)
424
+ if @current_offsets[topic].key?(partition)
425
+ offset = @current_offsets[topic][partition] + 1
426
+ else
427
+ offset = @offset_manager.next_offset_for(topic, partition)
428
+ end
429
+ end
430
+
399
431
  max_bytes = @max_bytes.fetch(topic)
400
432
 
401
433
  if paused?(topic, partition)
@@ -48,6 +48,7 @@ module Kafka
48
48
  rescue ConnectionError
49
49
  @logger.error "Connection error while trying to join group `#{@group_id}`; retrying..."
50
50
  sleep 1
51
+ @cluster.mark_as_stale!
51
52
  @coordinator = nil
52
53
  retry
53
54
  end
@@ -24,9 +24,6 @@ module Kafka
24
24
 
25
25
  @timeout = timeout
26
26
 
27
- # This pipe is used to cancel IO.select calls when sockets are closed.
28
- @cancel_reader, @cancel_writer = IO.pipe
29
-
30
27
  @socket = Socket.new(Socket.const_get(addr[0][0]), Socket::SOCK_STREAM, 0)
31
28
  @socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1)
32
29
 
@@ -60,20 +57,11 @@ module Kafka
60
57
  # @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
61
58
  # @return [String] the data that was read from the socket.
62
59
  def read(num_bytes)
63
- rs, _, _ = IO.select([@socket, @cancel_reader], nil, nil, @timeout)
64
-
65
- # The read timed out.
66
- raise Errno::ETIMEDOUT if rs.nil?
67
-
68
- # The socket has been closed.
69
- raise Errno::ECONNABORTED if rs.include?(@cancel_reader)
60
+ unless IO.select([@socket], nil, nil, @timeout)
61
+ raise Errno::ETIMEDOUT
62
+ end
70
63
 
71
64
  @socket.read(num_bytes)
72
- rescue Errno::EBADF
73
- # We'll get EBADF if `select` is called with a closed socket, or
74
- # if it's closed in the middle of things.
75
- raise Errno::ESHUTDOWN if @socket.closed?
76
- raise
77
65
  rescue IO::EAGAINWaitReadable
78
66
  retry
79
67
  end
@@ -92,7 +80,6 @@ module Kafka
92
80
  end
93
81
 
94
82
  def close
95
- @cancel_writer.puts
96
83
  @socket.close
97
84
  end
98
85
 
@@ -1,3 +1,3 @@
1
1
  module Kafka
2
- VERSION = "0.5.0.beta5"
2
+ VERSION = "0.5.0.beta6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0.beta5
4
+ version: 0.5.0.beta6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-10-26 00:00:00.000000000 Z
11
+ date: 2017-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler