ruby-kafka 0.3.18.beta1 → 0.3.18.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -4
- data/lib/kafka/broker.rb +0 -16
- data/lib/kafka/connection.rb +23 -111
- data/lib/kafka/consumer.rb +5 -1
- data/lib/kafka/fetch_operation.rb +6 -14
- data/lib/kafka/instrumenter.rb +4 -28
- data/lib/kafka/version.rb +1 -1
- metadata +2 -3
- data/lib/kafka/protocol/null_response.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a315e2a5db26fa2430705e5dc25757593682703b
|
4
|
+
data.tar.gz: d80c0b9f184d4ec2da61139de39bf97177c7de4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63f090c16636aff10749d7e20628996207bf07dcd68da7fc58a76f49113972eb35e995b46af91d4b418692effefdc737e652750a583b6d7c3f066d5e797ff1e6
|
7
|
+
data.tar.gz: d5e813fbdf4d9663ca7e7718493f6b6b34523828eefd1697f69da2fb715fa1b533f84681be2fbd38ecd023c9b70bfb5085b04873764ab6f3cc040cc95c2f1ef0
|
data/README.md
CHANGED
@@ -637,12 +637,13 @@ In order to optimize for throughput, you want to make sure to fetch as many mess
|
|
637
637
|
|
638
638
|
In order to optimize for low latency, you want to process a message as soon as possible, even if that means fetching a smaller batch of messages.
|
639
639
|
|
640
|
-
There are
|
640
|
+
There are three values that can be tuned in order to balance these two concerns.
|
641
641
|
|
642
642
|
* `min_bytes` is the minimum number of bytes to return from a single message fetch. By setting this to a high value you can increase the processing throughput. The default value is one byte.
|
643
|
-
* `max_wait_time` is the maximum number of seconds to wait before returning data from a single message fetch. By setting this high you also increase the processing throughput – and by setting it low you set a bound on latency. This configuration overrides `min_bytes`, so you'll _always_ get data back within the time specified. The default value is five seconds.
|
643
|
+
* `max_wait_time` is the maximum number of seconds to wait before returning data from a single message fetch. By setting this high you also increase the processing throughput – and by setting it low you set a bound on latency. This configuration overrides `min_bytes`, so you'll _always_ get data back within the time specified. The default value is five seconds. If you want to have at most one second of latency, set `max_wait_time` to 1.
|
644
|
+
* `max_bytes_per_partition` is the maximum amount of data a broker will return for a single partition when fetching new messages. The default is 1MB, but increasing this number may lead to better throughtput since you'll need to fetch less frequently. Setting it to a lower value is not recommended unless you have so many partitions that it's causing network and latency issues to transfer a fetch response from a broker to a client. Setting the number too high may result in instability, so be careful.
|
644
645
|
|
645
|
-
|
646
|
+
The first two settings can be passed to either `#each_message` or `#each_batch`, e.g.
|
646
647
|
|
647
648
|
```ruby
|
648
649
|
# Waits for data for up to 30 seconds, preferring to fetch at least 5KB at a time.
|
@@ -651,7 +652,16 @@ consumer.each_message(min_bytes: 1024 * 5, max_wait_time: 30) do |message|
|
|
651
652
|
end
|
652
653
|
```
|
653
654
|
|
654
|
-
|
655
|
+
The last setting is configured when subscribing to a topic, and can vary between topics:
|
656
|
+
|
657
|
+
```ruby
|
658
|
+
# Fetches up to 5MB per partition at a time for better throughput.
|
659
|
+
consumer.subscribe("greetings", max_bytes_per_partition: 5 * 1024 * 1024)
|
660
|
+
|
661
|
+
consumer.each_message do |message|
|
662
|
+
# ...
|
663
|
+
end
|
664
|
+
```
|
655
665
|
|
656
666
|
|
657
667
|
### Thread Safety
|
data/lib/kafka/broker.rb
CHANGED
@@ -40,22 +40,6 @@ module Kafka
|
|
40
40
|
@connection.send_request(request)
|
41
41
|
end
|
42
42
|
|
43
|
-
# Fetches messages asynchronously.
|
44
|
-
#
|
45
|
-
# The fetch request is sent to the broker, but the response is not read.
|
46
|
-
# This allows the broker to process the request, wait for new messages,
|
47
|
-
# and send a response without the client having to wait. In order to
|
48
|
-
# read the response, call `#call` on the returned object. This will
|
49
|
-
# block the caller until the response is available.
|
50
|
-
#
|
51
|
-
# @param (see Kafka::Protocol::FetchRequest#initialize)
|
52
|
-
# @return [Kafka::AsyncResponse]
|
53
|
-
def fetch_messages_async(**options)
|
54
|
-
request = Protocol::FetchRequest.new(**options)
|
55
|
-
|
56
|
-
@connection.send_async_request(request)
|
57
|
-
end
|
58
|
-
|
59
43
|
# Lists the offset of the specified topics and partitions.
|
60
44
|
#
|
61
45
|
# @param (see Kafka::Protocol::ListOffsetRequest#initialize)
|
data/lib/kafka/connection.rb
CHANGED
@@ -2,41 +2,11 @@ require "stringio"
|
|
2
2
|
require "kafka/socket_with_timeout"
|
3
3
|
require "kafka/ssl_socket_with_timeout"
|
4
4
|
require "kafka/protocol/request_message"
|
5
|
-
require "kafka/protocol/null_response"
|
6
5
|
require "kafka/protocol/encoder"
|
7
6
|
require "kafka/protocol/decoder"
|
8
7
|
|
9
8
|
module Kafka
|
10
9
|
|
11
|
-
# An asynchronous response object allows us to deliver a response at some
|
12
|
-
# later point in time.
|
13
|
-
#
|
14
|
-
# When instantiating an AsyncResponse, you provide a response decoder and
|
15
|
-
# a block that will force the caller to wait until a response is available.
|
16
|
-
class AsyncResponse
|
17
|
-
# Use a custom "nil" value so that nil can be an actual value.
|
18
|
-
MISSING = Object.new
|
19
|
-
|
20
|
-
def initialize(decoder, &block)
|
21
|
-
@decoder = decoder
|
22
|
-
@block = block
|
23
|
-
@response = MISSING
|
24
|
-
end
|
25
|
-
|
26
|
-
# Block until a response is available.
|
27
|
-
def call
|
28
|
-
@block.call if @response == MISSING
|
29
|
-
@response
|
30
|
-
end
|
31
|
-
|
32
|
-
# Deliver the response data.
|
33
|
-
#
|
34
|
-
# After calling this, `#call` will returned the decoded response.
|
35
|
-
def deliver(data)
|
36
|
-
@response = @decoder.decode(data)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
10
|
# A connection to a single Kafka broker.
|
41
11
|
#
|
42
12
|
# Usually you'll need a separate connection to each broker in a cluster, since most
|
@@ -108,18 +78,6 @@ module Kafka
|
|
108
78
|
#
|
109
79
|
# @return [Object] the response.
|
110
80
|
def send_request(request)
|
111
|
-
# Immediately block on the asynchronous request.
|
112
|
-
send_async_request(request).call
|
113
|
-
end
|
114
|
-
|
115
|
-
# Sends a request over the connection.
|
116
|
-
#
|
117
|
-
# @param request [#encode, #response_class] the request that should be
|
118
|
-
# encoded and written.
|
119
|
-
#
|
120
|
-
# @return [AsyncResponse] the async response, allowing the caller to choose
|
121
|
-
# when to block.
|
122
|
-
def send_async_request(request)
|
123
81
|
# Default notification payload.
|
124
82
|
notification = {
|
125
83
|
broker_host: @host,
|
@@ -128,41 +86,15 @@ module Kafka
|
|
128
86
|
response_size: 0,
|
129
87
|
}
|
130
88
|
|
131
|
-
@instrumenter.
|
132
|
-
|
133
|
-
open unless open?
|
134
|
-
|
135
|
-
@correlation_id += 1
|
89
|
+
@instrumenter.instrument("request.connection", notification) do
|
90
|
+
open unless open?
|
136
91
|
|
137
|
-
|
92
|
+
@correlation_id += 1
|
138
93
|
|
139
|
-
|
140
|
-
correlation_id = @correlation_id
|
94
|
+
write_request(request, notification)
|
141
95
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
# Immediately deliver a nil value.
|
146
|
-
async_response.deliver(nil)
|
147
|
-
|
148
|
-
@instrumenter.finish("request.connection", notification)
|
149
|
-
|
150
|
-
async_response
|
151
|
-
else
|
152
|
-
async_response = AsyncResponse.new(response_class) {
|
153
|
-
# A caller is trying to read the response, so we have to wait for it
|
154
|
-
# before we can return.
|
155
|
-
wait_for_response(correlation_id, notification)
|
156
|
-
|
157
|
-
# Once done, we can finish the instrumentation.
|
158
|
-
@instrumenter.finish("request.connection", notification)
|
159
|
-
}
|
160
|
-
|
161
|
-
# Store the asynchronous response so that data can be delivered to it
|
162
|
-
# at a later time.
|
163
|
-
@pending_async_responses[correlation_id] = async_response
|
164
|
-
|
165
|
-
async_response
|
96
|
+
response_class = request.response_class
|
97
|
+
wait_for_response(response_class, notification) unless response_class.nil?
|
166
98
|
end
|
167
99
|
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
168
100
|
close
|
@@ -186,9 +118,6 @@ module Kafka
|
|
186
118
|
|
187
119
|
# Correlation id is initialized to zero and bumped for each request.
|
188
120
|
@correlation_id = 0
|
189
|
-
|
190
|
-
# The pipeline of pending response futures must be reset.
|
191
|
-
@pending_async_responses = {}
|
192
121
|
rescue Errno::ETIMEDOUT => e
|
193
122
|
@logger.error "Timed out while trying to connect to #{self}: #{e}"
|
194
123
|
raise ConnectionError, e
|
@@ -230,8 +159,8 @@ module Kafka
|
|
230
159
|
# a given Decoder.
|
231
160
|
#
|
232
161
|
# @return [nil]
|
233
|
-
def read_response(
|
234
|
-
@logger.debug "Waiting for response #{
|
162
|
+
def read_response(response_class, notification)
|
163
|
+
@logger.debug "Waiting for response #{@correlation_id} from #{to_s}"
|
235
164
|
|
236
165
|
data = @decoder.bytes
|
237
166
|
notification[:response_size] = data.bytesize
|
@@ -240,49 +169,32 @@ module Kafka
|
|
240
169
|
response_decoder = Kafka::Protocol::Decoder.new(buffer)
|
241
170
|
|
242
171
|
correlation_id = response_decoder.int32
|
172
|
+
response = response_class.decode(response_decoder)
|
243
173
|
|
244
174
|
@logger.debug "Received response #{correlation_id} from #{to_s}"
|
245
175
|
|
246
|
-
return correlation_id,
|
176
|
+
return correlation_id, response
|
247
177
|
rescue Errno::ETIMEDOUT
|
248
|
-
@logger.error "Timed out while waiting for response #{
|
178
|
+
@logger.error "Timed out while waiting for response #{@correlation_id}"
|
249
179
|
raise
|
250
|
-
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
251
|
-
close
|
252
|
-
|
253
|
-
raise ConnectionError, "Connection error: #{e}"
|
254
180
|
end
|
255
181
|
|
256
|
-
def wait_for_response(
|
182
|
+
def wait_for_response(response_class, notification)
|
257
183
|
loop do
|
258
|
-
correlation_id,
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
#
|
266
|
-
|
267
|
-
|
268
|
-
elsif correlation_id > expected_correlation_id
|
269
|
-
raise Kafka::Error, "Correlation id mismatch: expected #{expected_correlation_id} but got #{correlation_id}"
|
184
|
+
correlation_id, response = read_response(response_class, notification)
|
185
|
+
|
186
|
+
# There may have been a previous request that timed out before the client
|
187
|
+
# was able to read the response. In that case, the response will still be
|
188
|
+
# sitting in the socket waiting to be read. If the response we just read
|
189
|
+
# was to a previous request, we can safely skip it.
|
190
|
+
if correlation_id < @correlation_id
|
191
|
+
@logger.error "Received out-of-order response id #{correlation_id}, was expecting #{@correlation_id}"
|
192
|
+
elsif correlation_id > @correlation_id
|
193
|
+
raise Kafka::Error, "Correlation id mismatch: expected #{@correlation_id} but got #{correlation_id}"
|
270
194
|
else
|
271
|
-
|
272
|
-
# async response future.
|
273
|
-
async_response = @pending_async_responses.delete(correlation_id)
|
274
|
-
async_response.deliver(data)
|
275
|
-
|
276
|
-
return async_response.call
|
195
|
+
return response
|
277
196
|
end
|
278
197
|
end
|
279
|
-
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
280
|
-
notification[:exception] = [e.class.name, e.message]
|
281
|
-
notification[:exception_object] = e
|
282
|
-
|
283
|
-
close
|
284
|
-
|
285
|
-
raise ConnectionError, "Connection error: #{e}"
|
286
198
|
end
|
287
199
|
end
|
288
200
|
end
|
data/lib/kafka/consumer.rb
CHANGED
@@ -288,6 +288,10 @@ module Kafka
|
|
288
288
|
@offset_manager.mark_as_processed(message.topic, message.partition, message.offset)
|
289
289
|
end
|
290
290
|
|
291
|
+
def send_heartbeat_if_necessary
|
292
|
+
@heartbeat.send_if_necessary
|
293
|
+
end
|
294
|
+
|
291
295
|
private
|
292
296
|
|
293
297
|
def consumer_loop
|
@@ -316,7 +320,7 @@ module Kafka
|
|
316
320
|
|
317
321
|
def make_final_offsets_commit!(attempts = 3)
|
318
322
|
@offset_manager.commit_offsets
|
319
|
-
rescue ConnectionError
|
323
|
+
rescue ConnectionError
|
320
324
|
# It's important to make sure final offsets commit is done
|
321
325
|
# As otherwise messages that have been processed after last auto-commit
|
322
326
|
# will be processed again and that may be huge amount of messages
|
@@ -40,11 +40,7 @@ module Kafka
|
|
40
40
|
}
|
41
41
|
end
|
42
42
|
|
43
|
-
def execute
|
44
|
-
if block.nil?
|
45
|
-
return to_enum(:execute)
|
46
|
-
end
|
47
|
-
|
43
|
+
def execute
|
48
44
|
@cluster.add_target_topics(@topics.keys)
|
49
45
|
@cluster.refresh_metadata_if_necessary!
|
50
46
|
|
@@ -60,7 +56,7 @@ module Kafka
|
|
60
56
|
end
|
61
57
|
end
|
62
58
|
|
63
|
-
|
59
|
+
topics_by_broker.flat_map {|broker, topics|
|
64
60
|
resolve_offsets(broker, topics)
|
65
61
|
|
66
62
|
options = {
|
@@ -69,14 +65,10 @@ module Kafka
|
|
69
65
|
topics: topics,
|
70
66
|
}
|
71
67
|
|
72
|
-
broker.
|
73
|
-
}
|
74
|
-
|
75
|
-
responses.each {|response_future|
|
76
|
-
response = response_future.call
|
68
|
+
response = broker.fetch_messages(**options)
|
77
69
|
|
78
|
-
response.topics.
|
79
|
-
fetched_topic.partitions.
|
70
|
+
response.topics.flat_map {|fetched_topic|
|
71
|
+
fetched_topic.partitions.map {|fetched_partition|
|
80
72
|
begin
|
81
73
|
Protocol.handle_error(fetched_partition.error_code)
|
82
74
|
rescue Kafka::OffsetOutOfRange => e
|
@@ -101,7 +93,7 @@ module Kafka
|
|
101
93
|
)
|
102
94
|
}
|
103
95
|
|
104
|
-
|
96
|
+
FetchedBatch.new(
|
105
97
|
topic: fetched_topic.name,
|
106
98
|
partition: fetched_partition.partition,
|
107
99
|
highwater_mark_offset: fetched_partition.highwater_mark_offset,
|
data/lib/kafka/instrumenter.rb
CHANGED
@@ -6,35 +6,19 @@ module Kafka
|
|
6
6
|
@default_payload = default_payload
|
7
7
|
|
8
8
|
if defined?(ActiveSupport::Notifications)
|
9
|
-
@backend = ActiveSupport::Notifications
|
9
|
+
@backend = ActiveSupport::Notifications
|
10
10
|
else
|
11
11
|
@backend = nil
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
def instrument(event_name, payload = {})
|
15
|
+
def instrument(event_name, payload = {}, &block)
|
16
16
|
if @backend
|
17
17
|
payload.update(@default_payload)
|
18
18
|
|
19
|
-
@backend.instrument("#{event_name}.#{NAMESPACE}", payload)
|
19
|
+
@backend.instrument("#{event_name}.#{NAMESPACE}", payload, &block)
|
20
20
|
else
|
21
|
-
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def start(event_name, payload = {})
|
26
|
-
if @backend
|
27
|
-
payload.update(@default_payload)
|
28
|
-
|
29
|
-
@backend.start("#{event_name}.#{NAMESPACE}", payload)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def finish(event_name, payload = {})
|
34
|
-
if @backend
|
35
|
-
payload.update(@default_payload)
|
36
|
-
|
37
|
-
@backend.finish("#{event_name}.#{NAMESPACE}", payload)
|
21
|
+
block.call(payload) if block
|
38
22
|
end
|
39
23
|
end
|
40
24
|
end
|
@@ -48,13 +32,5 @@ module Kafka
|
|
48
32
|
def instrument(event_name, payload = {}, &block)
|
49
33
|
@backend.instrument(event_name, @extra_payload.merge(payload), &block)
|
50
34
|
end
|
51
|
-
|
52
|
-
def start(event_name, payload = {})
|
53
|
-
@backend.start(event_name, @extra_payload.merge(payload))
|
54
|
-
end
|
55
|
-
|
56
|
-
def finish(event_name, payload = {})
|
57
|
-
@backend.finish(event_name, @extra_payload.merge(payload))
|
58
|
-
end
|
59
35
|
end
|
60
36
|
end
|
data/lib/kafka/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.18.
|
4
|
+
version: 0.3.18.beta2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: gssapi
|
@@ -295,7 +295,6 @@ files:
|
|
295
295
|
- lib/kafka/protocol/message.rb
|
296
296
|
- lib/kafka/protocol/message_set.rb
|
297
297
|
- lib/kafka/protocol/metadata_response.rb
|
298
|
-
- lib/kafka/protocol/null_response.rb
|
299
298
|
- lib/kafka/protocol/offset_commit_request.rb
|
300
299
|
- lib/kafka/protocol/offset_commit_response.rb
|
301
300
|
- lib/kafka/protocol/offset_fetch_request.rb
|