ruby-kafka 0.5.0.beta5 → 0.5.0.beta6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/examples/consumer-group.rb +1 -0
- data/lib/kafka/connection.rb +2 -2
- data/lib/kafka/consumer.rb +36 -4
- data/lib/kafka/consumer_group.rb +1 -0
- data/lib/kafka/socket_with_timeout.rb +3 -16
- data/lib/kafka/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9893c7609209a4d58c3ac399cbbdce89c8df0124
|
4
|
+
data.tar.gz: 820e9d1bcd07f6d7c8de1d0ba1d286f184b64d8c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: baf63463ad738c02a1ad07d60db31ff6546a3cc2e97ad411ea02b31dbad7654b003ae61836bc67f10db30650096e278a3e621f3456a44aac4a0c7f574f80b728
|
7
|
+
data.tar.gz: 54be82a578abcf241b387f70cd2a07b2ce069f2e9d80e0c24104780af4b1ae8b55dccf8f5962ac31456d19bd2770f2a2af6911fcd8ef7200f501633c36fe81be
|
data/examples/consumer-group.rb
CHANGED
data/lib/kafka/connection.rb
CHANGED
@@ -109,10 +109,10 @@ module Kafka
|
|
109
109
|
|
110
110
|
response
|
111
111
|
end
|
112
|
-
rescue
|
112
|
+
rescue SystemCallError, EOFError => e
|
113
113
|
close
|
114
114
|
|
115
|
-
raise ConnectionError, "Connection error: #{e}"
|
115
|
+
raise ConnectionError, "Connection error #{e.class}: #{e}"
|
116
116
|
end
|
117
117
|
|
118
118
|
private
|
data/lib/kafka/consumer.rb
CHANGED
@@ -57,6 +57,17 @@ module Kafka
|
|
57
57
|
|
58
58
|
# The maximum number of bytes to fetch from a single partition, by topic.
|
59
59
|
@max_bytes = {}
|
60
|
+
|
61
|
+
# Hash containing offsets for each topic and partition that has the
|
62
|
+
# automatically_mark_as_processed feature disabled. Offset manager is only active
|
63
|
+
# when everything is suppose to happen automatically. Otherwise we need to keep track of the
|
64
|
+
# offset manually in memory for all the time
|
65
|
+
# The key structure for this equals an array with topic and partition [topic, partition]
|
66
|
+
# The value is equal to the offset of the last message we've received
|
67
|
+
# @note It won't be updated in case user marks message as processed, because for the case
|
68
|
+
# when user commits message other than last in a batch, this would make ruby-kafka refetch
|
69
|
+
# some already consumed messages
|
70
|
+
@current_offsets = Hash.new { |h, k| h[k] = {} }
|
60
71
|
end
|
61
72
|
|
62
73
|
# Subscribes the consumer to a topic.
|
@@ -181,7 +192,11 @@ module Kafka
|
|
181
192
|
# @return [nil]
|
182
193
|
def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
|
183
194
|
consumer_loop do
|
184
|
-
batches = fetch_batches(
|
195
|
+
batches = fetch_batches(
|
196
|
+
min_bytes: min_bytes,
|
197
|
+
max_wait_time: max_wait_time,
|
198
|
+
automatically_mark_as_processed: automatically_mark_as_processed
|
199
|
+
)
|
185
200
|
|
186
201
|
batches.each do |batch|
|
187
202
|
batch.messages.each do |message|
|
@@ -197,6 +212,7 @@ module Kafka
|
|
197
212
|
|
198
213
|
begin
|
199
214
|
yield message
|
215
|
+
@current_offsets[message.topic][message.partition] = message.offset
|
200
216
|
rescue => e
|
201
217
|
location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
|
202
218
|
backtrace = e.backtrace.join("\n")
|
@@ -247,7 +263,11 @@ module Kafka
|
|
247
263
|
# @return [nil]
|
248
264
|
def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
|
249
265
|
consumer_loop do
|
250
|
-
batches = fetch_batches(
|
266
|
+
batches = fetch_batches(
|
267
|
+
min_bytes: min_bytes,
|
268
|
+
max_wait_time: max_wait_time,
|
269
|
+
automatically_mark_as_processed: automatically_mark_as_processed
|
270
|
+
)
|
251
271
|
|
252
272
|
batches.each do |batch|
|
253
273
|
unless batch.empty?
|
@@ -262,6 +282,7 @@ module Kafka
|
|
262
282
|
|
263
283
|
begin
|
264
284
|
yield batch
|
285
|
+
@current_offsets[batch.topic][batch.partition] = batch.last_offset
|
265
286
|
rescue => e
|
266
287
|
offset_range = (batch.first_offset..batch.last_offset)
|
267
288
|
location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
|
@@ -379,7 +400,7 @@ module Kafka
|
|
379
400
|
end
|
380
401
|
end
|
381
402
|
|
382
|
-
def fetch_batches(min_bytes:, max_wait_time:)
|
403
|
+
def fetch_batches(min_bytes:, max_wait_time:, automatically_mark_as_processed:)
|
383
404
|
join_group unless @group.member?
|
384
405
|
|
385
406
|
subscribed_partitions = @group.subscribed_partitions
|
@@ -395,7 +416,18 @@ module Kafka
|
|
395
416
|
|
396
417
|
subscribed_partitions.each do |topic, partitions|
|
397
418
|
partitions.each do |partition|
|
398
|
-
|
419
|
+
if automatically_mark_as_processed
|
420
|
+
offset = @offset_manager.next_offset_for(topic, partition)
|
421
|
+
else
|
422
|
+
# When automatic marking is off, the first poll needs to be based on the last committed
|
423
|
+
# offset from Kafka, that's why we fallback in case of nil (it may not be 0)
|
424
|
+
if @current_offsets[topic].key?(partition)
|
425
|
+
offset = @current_offsets[topic][partition] + 1
|
426
|
+
else
|
427
|
+
offset = @offset_manager.next_offset_for(topic, partition)
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
399
431
|
max_bytes = @max_bytes.fetch(topic)
|
400
432
|
|
401
433
|
if paused?(topic, partition)
|
data/lib/kafka/consumer_group.rb
CHANGED
@@ -24,9 +24,6 @@ module Kafka
|
|
24
24
|
|
25
25
|
@timeout = timeout
|
26
26
|
|
27
|
-
# This pipe is used to cancel IO.select calls when sockets are closed.
|
28
|
-
@cancel_reader, @cancel_writer = IO.pipe
|
29
|
-
|
30
27
|
@socket = Socket.new(Socket.const_get(addr[0][0]), Socket::SOCK_STREAM, 0)
|
31
28
|
@socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1)
|
32
29
|
|
@@ -60,20 +57,11 @@ module Kafka
|
|
60
57
|
# @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
|
61
58
|
# @return [String] the data that was read from the socket.
|
62
59
|
def read(num_bytes)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
raise Errno::ETIMEDOUT if rs.nil?
|
67
|
-
|
68
|
-
# The socket has been closed.
|
69
|
-
raise Errno::ECONNABORTED if rs.include?(@cancel_reader)
|
60
|
+
unless IO.select([@socket], nil, nil, @timeout)
|
61
|
+
raise Errno::ETIMEDOUT
|
62
|
+
end
|
70
63
|
|
71
64
|
@socket.read(num_bytes)
|
72
|
-
rescue Errno::EBADF
|
73
|
-
# We'll get EBADF if `select` is called with a closed socket, or
|
74
|
-
# if it's closed in the middle of things.
|
75
|
-
raise Errno::ESHUTDOWN if @socket.closed?
|
76
|
-
raise
|
77
65
|
rescue IO::EAGAINWaitReadable
|
78
66
|
retry
|
79
67
|
end
|
@@ -92,7 +80,6 @@ module Kafka
|
|
92
80
|
end
|
93
81
|
|
94
82
|
def close
|
95
|
-
@cancel_writer.puts
|
96
83
|
@socket.close
|
97
84
|
end
|
98
85
|
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.0.
|
4
|
+
version: 0.5.0.beta6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|