ruby-kafka 0.3.17 → 0.3.18.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile.lock +6 -2
- data/README.md +76 -14
- data/lib/kafka.rb +72 -0
- data/lib/kafka/broker.rb +22 -0
- data/lib/kafka/client.rb +29 -1
- data/lib/kafka/cluster.rb +4 -1
- data/lib/kafka/connection.rb +114 -23
- data/lib/kafka/connection_builder.rb +30 -3
- data/lib/kafka/consumer.rb +35 -10
- data/lib/kafka/datadog.rb +1 -1
- data/lib/kafka/fetch_operation.rb +14 -6
- data/lib/kafka/instrumenter.rb +28 -4
- data/lib/kafka/protocol.rb +27 -9
- data/lib/kafka/protocol/null_response.rb +11 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +31 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +26 -0
- data/lib/kafka/sasl_gssapi_authenticator.rb +69 -0
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +2 -0
- data/vendor/bundle/bin/bundler +17 -0
- data/vendor/bundle/bin/coderay +17 -0
- data/vendor/bundle/bin/dotenv +17 -0
- data/vendor/bundle/bin/htmldiff +17 -0
- data/vendor/bundle/bin/ldiff +17 -0
- data/vendor/bundle/bin/pry +17 -0
- data/vendor/bundle/bin/rake +17 -0
- data/vendor/bundle/bin/rspec +17 -0
- data/vendor/bundle/bin/ruby-prof +17 -0
- data/vendor/bundle/bin/ruby-prof-check-trace +17 -0
- metadata +32 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80c7dd382316d8fd5465ab7f73a4393cf3ed01cf
|
4
|
+
data.tar.gz: b30842098f9076127c33eb762dffc71424de2866
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90b3ed06f47b6e2b22a7623bcf3825206b7745170207cb9e2226adcc2ee60988769e7ad498a241e30eb0f04dbf3daa56acef4c91c5334b04ded7d1cb429e4866
|
7
|
+
data.tar.gz: 72c20e866cd03e575bbae4d82a86fe56195463412929dbb4656c1cfc16ef4c9f109317897ad7773cc79ccd22da395c440522b174e786f2ff358629c4db897dc8
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-kafka (0.3.
|
4
|
+
ruby-kafka (0.3.18.beta1)
|
5
|
+
gssapi (>= 1.2.0)
|
5
6
|
|
6
7
|
GEM
|
7
8
|
remote: https://rubygems.org/
|
@@ -23,6 +24,9 @@ GEM
|
|
23
24
|
dogstatsd-ruby (2.1.0)
|
24
25
|
dotenv (2.1.0)
|
25
26
|
excon (0.54.0)
|
27
|
+
ffi (1.9.18)
|
28
|
+
gssapi (1.2.0)
|
29
|
+
ffi (>= 1.0.1)
|
26
30
|
i18n (0.7.0)
|
27
31
|
json (1.8.3)
|
28
32
|
method_source (0.8.2)
|
@@ -83,4 +87,4 @@ RUBY VERSION
|
|
83
87
|
ruby 2.2.3p173
|
84
88
|
|
85
89
|
BUNDLED WITH
|
86
|
-
1.
|
90
|
+
1.14.6
|
data/README.md
CHANGED
@@ -33,7 +33,9 @@ Although parts of this library work with Kafka 0.8 – specifically, the Produce
|
|
33
33
|
7. [Monitoring](#monitoring)
|
34
34
|
1. [Reporting Metrics to Datadog](#reporting-metrics-to-datadog)
|
35
35
|
8. [Understanding Timeouts](#understanding-timeouts)
|
36
|
-
9. [
|
36
|
+
9. [Security](#security)
|
37
|
+
1. [Encryption and Authentication using SSL](#encryption-and-authentication-using-ssl)
|
38
|
+
2. [Authentication using SASL](#authentication-using-sasl)
|
37
39
|
4. [Design](#design)
|
38
40
|
1. [Producer Design](#producer-design)
|
39
41
|
2. [Asynchronous Producer Design](#asynchronous-producer-design)
|
@@ -101,7 +103,7 @@ require "kafka"
|
|
101
103
|
kafka = Kafka.new(
|
102
104
|
# At least one of these nodes must be available:
|
103
105
|
seed_brokers: ["kafka1:9092", "kafka2:9092"],
|
104
|
-
|
106
|
+
|
105
107
|
# Set an optional client id in order to identify the client to Kafka:
|
106
108
|
client_id: "my-application",
|
107
109
|
)
|
@@ -198,7 +200,7 @@ These policies can be used alone or in combination.
|
|
198
200
|
producer = kafka.async_producer(
|
199
201
|
# Trigger a delivery once 100 messages have been buffered.
|
200
202
|
delivery_threshold: 100,
|
201
|
-
|
203
|
+
|
202
204
|
# Trigger a delivery every 30 seconds.
|
203
205
|
delivery_interval: 30,
|
204
206
|
)
|
@@ -435,13 +437,13 @@ In your controllers, simply call the producer directly:
|
|
435
437
|
class OrdersController
|
436
438
|
def create
|
437
439
|
@order = Order.create!(params[:order])
|
438
|
-
|
440
|
+
|
439
441
|
event = {
|
440
442
|
order_id: @order.id,
|
441
443
|
amount: @order.amount,
|
442
444
|
timestamp: Time.now,
|
443
445
|
}
|
444
|
-
|
446
|
+
|
445
447
|
$kafka_producer.produce(event.to_json, topic: "order_events")
|
446
448
|
end
|
447
449
|
end
|
@@ -449,8 +451,6 @@ end
|
|
449
451
|
|
450
452
|
### Consuming Messages from Kafka
|
451
453
|
|
452
|
-
**Warning:** The Consumer API is still alpha level and will likely change. The consumer code should not be considered stable, as it hasn't been exhaustively tested in production environments yet.
|
453
|
-
|
454
454
|
Consuming messages from a Kafka topic is simple:
|
455
455
|
|
456
456
|
```ruby
|
@@ -501,9 +501,11 @@ Each consumer process will be assigned one or more partitions from each topic th
|
|
501
501
|
|
502
502
|
In order to be able to resume processing after a consumer crashes, each consumer will periodically _checkpoint_ its position within each partition it reads from. Since each partition has a monotonically increasing sequence of message offsets, this works by _committing_ the offset of the last message that was processed in a given partition. Kafka handles these commits and allows another consumer in a group to resume from the last commit when a member crashes or becomes unresponsive.
|
503
503
|
|
504
|
-
By default, offsets are committed every 10 seconds. You can increase the frequency, known as the _offset commit interval_, to limit the duration of double-processing scenarios, at the cost of a lower throughput due to the added coordination. If you want to improve throughput, and double-processing is of less concern to you, then you can decrease the frequency.
|
504
|
+
By default, offsets are committed every 10 seconds. You can increase the frequency, known as the _offset commit interval_, to limit the duration of double-processing scenarios, at the cost of a lower throughput due to the added coordination. If you want to improve throughput, and double-processing is of less concern to you, then you can decrease the frequency. Set the commit interval to zero in order to disable the timer-based commit trigger entirely.
|
505
|
+
|
506
|
+
In addition to the time based trigger it's possible to trigger checkpointing in response to _n_ messages having been processed, known as the _offset commit threshold_. This puts a bound on the number of messages that can be double-processed before the problem is detected. Setting this to 1 will cause an offset commit to take place every time a message has been processed. By default this trigger is disabled (set to zero).
|
505
507
|
|
506
|
-
|
508
|
+
It is possible to trigger an immediate offset commit by calling `Consumer#commit_offsets`. This blocks the caller until the Kafka cluster has acknowledged the commit.
|
507
509
|
|
508
510
|
Stale offsets are periodically purged by the broker. The broker setting `offsets.retention.minutes` controls the retention window for committed offsets, and defaults to 1 day. The length of the retention window, known as _offset retention time_, can be changed for the consumer.
|
509
511
|
|
@@ -524,6 +526,46 @@ consumer = kafka.consumer(
|
|
524
526
|
)
|
525
527
|
```
|
526
528
|
|
529
|
+
For some use cases it may be necessary to control when messages are marked as processed. Note that since only the consumer position within each partition can be saved, marking a message as processed implies that all messages in the partition with a lower offset should also be considered as having been processed.
|
530
|
+
|
531
|
+
The method `Consumer#mark_message_as_processed` marks a message (and all those that precede it in a partition) as having been processed. This is an advanced API that you should only use if you know what you're doing.
|
532
|
+
|
533
|
+
```ruby
|
534
|
+
# Manually controlling checkpointing:
|
535
|
+
|
536
|
+
# Typically you want to use this API in order to buffer messages until some
|
537
|
+
# special "commit" message is received, e.g. in order to group together
|
538
|
+
# transactions consisting of several items.
|
539
|
+
buffer = []
|
540
|
+
|
541
|
+
# Messages will not be marked as processed automatically. If you shut down the
|
542
|
+
# consumer without calling `#mark_message_as_processed` first, the consumer will
|
543
|
+
# not resume where you left off!
|
544
|
+
consumer.each_message(automatically_mark_as_processed: false) do |message|
|
545
|
+
# Our messages are JSON with a `type` field and other stuff.
|
546
|
+
event = JSON.parse(message.value)
|
547
|
+
|
548
|
+
case event.fetch("type")
|
549
|
+
when "add_to_cart"
|
550
|
+
buffer << event
|
551
|
+
when "complete_purchase"
|
552
|
+
# We've received all the messages we need, time to save the transaction.
|
553
|
+
save_transaction(buffer)
|
554
|
+
|
555
|
+
# Now we can set the checkpoint by marking the last message as processed.
|
556
|
+
consumer.mark_message_as_processed(message)
|
557
|
+
|
558
|
+
# We can optionally trigger an immediate, blocking offset commit in order
|
559
|
+
# to minimize the risk of crashing before the automatic triggers have
|
560
|
+
# kicked in.
|
561
|
+
consumer.commit_offsets
|
562
|
+
|
563
|
+
# Make the buffer ready for the next transaction.
|
564
|
+
buffer.clear
|
565
|
+
end
|
566
|
+
end
|
567
|
+
```
|
568
|
+
|
527
569
|
|
528
570
|
#### Topic Subscriptions
|
529
571
|
|
@@ -614,9 +656,9 @@ If you want to have at most one second of latency, set `max_wait_time: 1`.
|
|
614
656
|
|
615
657
|
### Thread Safety
|
616
658
|
|
617
|
-
You typically don't want to share a Kafka client between threads, since the network communication is not synchronized. Furthermore, you should avoid using threads in a consumer unless you're very careful about waiting for all work to complete before returning from the `#each_message` or `#each_batch` block. This is because _checkpointing_ assumes that returning from the block means that the messages that have been yielded have been successfully processed.
|
659
|
+
You typically don't want to share a Kafka client object between threads, since the network communication is not synchronized. Furthermore, you should avoid using threads in a consumer unless you're very careful about waiting for all work to complete before returning from the `#each_message` or `#each_batch` block. This is because _checkpointing_ assumes that returning from the block means that the messages that have been yielded have been successfully processed.
|
618
660
|
|
619
|
-
You should also avoid sharing a synchronous producer between threads, as the internal buffers are not thread safe. However, the _asynchronous_ producer should be safe to use in a multi-threaded environment.
|
661
|
+
You should also avoid sharing a synchronous producer between threads, as the internal buffers are not thread safe. However, the _asynchronous_ producer should be safe to use in a multi-threaded environment. This is because producers, when instantiated, get their own copy of any non-thread-safe data such as network sockets. Furthermore, the asynchronous producer has been designed in such a way to only a single background thread operates on this data while any foreground thread with a reference to the producer object can only send messages to that background thread over a safe queue. Therefore it is safe to share an async producer object between many threads.
|
620
662
|
|
621
663
|
### Logging
|
622
664
|
|
@@ -721,11 +763,13 @@ When sending many messages, it's likely that the client needs to send some messa
|
|
721
763
|
|
722
764
|
Make sure your application can survive being blocked for so long.
|
723
765
|
|
724
|
-
###
|
766
|
+
### Security
|
767
|
+
|
768
|
+
#### Encryption and Authentication using SSL
|
725
769
|
|
726
770
|
By default, communication between Kafka clients and brokers is unencrypted and unauthenticated. Kafka 0.9 added optional support for [encryption and client authentication and authorization](http://kafka.apache.org/documentation.html#security_ssl). There are two layers of security made possible by this:
|
727
771
|
|
728
|
-
|
772
|
+
##### Encryption of Communication
|
729
773
|
|
730
774
|
By enabling SSL encryption you can have some confidence that messages can be sent to Kafka over an untrusted network without being intercepted.
|
731
775
|
|
@@ -740,7 +784,7 @@ kafka = Kafka.new(
|
|
740
784
|
|
741
785
|
Without passing the CA certificate to the client it would be impossible to protect against [man-in-the-middle attacks](https://en.wikipedia.org/wiki/Man-in-the-middle_attack).
|
742
786
|
|
743
|
-
|
787
|
+
##### Client Authentication
|
744
788
|
|
745
789
|
In order to authenticate the client to the cluster, you need to pass in a certificate and key created for the client and trusted by the brokers.
|
746
790
|
|
@@ -755,6 +799,24 @@ kafka = Kafka.new(
|
|
755
799
|
|
756
800
|
Once client authentication is set up, it is possible to configure the Kafka cluster to [authorize client requests](http://kafka.apache.org/documentation.html#security_authz).
|
757
801
|
|
802
|
+
##### Using JKS Certificates
|
803
|
+
|
804
|
+
Typically, Kafka certificates come in the JKS format, which isn't supported by ruby-kafka. There's [a wiki page](https://github.com/zendesk/ruby-kafka/wiki/Creating-X509-certificates-from-JKS-format) that describes how to generate valid X509 certificates from JKS certificates.
|
805
|
+
|
806
|
+
#### Authentication using SASL
|
807
|
+
|
808
|
+
Kafka has support for using SASL to authenticate clients. Currently only the GSSAPI mechanism is supported by ruby-kafka.
|
809
|
+
|
810
|
+
In order to authenticate using SASL, set your principal and optionally your keytab when initializing the Kafka client:
|
811
|
+
|
812
|
+
```ruby
|
813
|
+
kafka = Kafka.new(
|
814
|
+
sasl_gssapi_principal: 'kafka/kafka.example.com@EXAMPLE.COM',
|
815
|
+
sasl_gssapi_keytab: '/etc/keytabs/kafka.keytab',
|
816
|
+
# ...
|
817
|
+
)
|
818
|
+
```
|
819
|
+
|
758
820
|
## Design
|
759
821
|
|
760
822
|
The library has been designed as a layered system, with each layer having a clear responsibility:
|
data/lib/kafka.rb
CHANGED
@@ -79,6 +79,7 @@ module Kafka
|
|
79
79
|
class NotCoordinatorForGroup < ProtocolError
|
80
80
|
end
|
81
81
|
|
82
|
+
# 17
|
82
83
|
# For a request which attempts to access an invalid topic (e.g. one which has
|
83
84
|
# an illegal name), or if an attempt is made to write to an internal topic
|
84
85
|
# (such as the consumer offsets topic).
|
@@ -90,37 +91,105 @@ module Kafka
|
|
90
91
|
class RecordListTooLarge < ProtocolError
|
91
92
|
end
|
92
93
|
|
94
|
+
# 19
|
93
95
|
# Returned from a produce request when the number of in-sync replicas is
|
94
96
|
# lower than the configured minimum and requiredAcks is -1.
|
95
97
|
class NotEnoughReplicas < ProtocolError
|
96
98
|
end
|
97
99
|
|
100
|
+
# 20
|
98
101
|
# Returned from a produce request when the message was written to the log,
|
99
102
|
# but with fewer in-sync replicas than required.
|
100
103
|
class NotEnoughReplicasAfterAppend < ProtocolError
|
101
104
|
end
|
102
105
|
|
106
|
+
# 21
|
103
107
|
# Returned from a produce request if the requested requiredAcks is invalid
|
104
108
|
# (anything other than -1, 1, or 0).
|
105
109
|
class InvalidRequiredAcks < ProtocolError
|
106
110
|
end
|
107
111
|
|
112
|
+
# 9
|
108
113
|
# Raised if a replica is expected on a broker, but is not. Can be safely ignored.
|
109
114
|
class ReplicaNotAvailable < ProtocolError
|
110
115
|
end
|
111
116
|
|
117
|
+
#
|
112
118
|
class UnknownMemberId < ProtocolError
|
113
119
|
end
|
114
120
|
|
121
|
+
# 27
|
115
122
|
class RebalanceInProgress < ProtocolError
|
116
123
|
end
|
117
124
|
|
125
|
+
# 22
|
118
126
|
class IllegalGeneration < ProtocolError
|
119
127
|
end
|
120
128
|
|
129
|
+
# 26
|
121
130
|
class InvalidSessionTimeout < ProtocolError
|
122
131
|
end
|
123
132
|
|
133
|
+
# 28
|
134
|
+
class InvalidCommitOffsetSize < ProtocolError
|
135
|
+
end
|
136
|
+
|
137
|
+
# 29
|
138
|
+
class TopicAuthorizationCode < ProtocolError
|
139
|
+
end
|
140
|
+
|
141
|
+
# 30
|
142
|
+
class GroupAuthorizationCode < ProtocolError
|
143
|
+
end
|
144
|
+
|
145
|
+
# 31
|
146
|
+
class ClusterAuthorizationCode < ProtocolError
|
147
|
+
end
|
148
|
+
|
149
|
+
# 32
|
150
|
+
class InvalidTimestamp < ProtocolError
|
151
|
+
end
|
152
|
+
|
153
|
+
# 33
|
154
|
+
class UnsupportedSaslMechanism < ProtocolError
|
155
|
+
end
|
156
|
+
|
157
|
+
# 34
|
158
|
+
class InvalidSaslState < ProtocolError
|
159
|
+
end
|
160
|
+
|
161
|
+
# 35
|
162
|
+
class UnsupportedVersion < ProtocolError
|
163
|
+
end
|
164
|
+
|
165
|
+
# 36
|
166
|
+
class TopicAlreadyExists < Error
|
167
|
+
end
|
168
|
+
|
169
|
+
# 37
|
170
|
+
class InvalidPartitions < Error
|
171
|
+
end
|
172
|
+
|
173
|
+
# 38
|
174
|
+
class InvalidReplicationFactor < Error
|
175
|
+
end
|
176
|
+
|
177
|
+
# 39
|
178
|
+
class InvalidReplicaAssignment < Error
|
179
|
+
end
|
180
|
+
|
181
|
+
# 40
|
182
|
+
class InvalidConfig < Error
|
183
|
+
end
|
184
|
+
|
185
|
+
# 41
|
186
|
+
class NotController < Error
|
187
|
+
end
|
188
|
+
|
189
|
+
# 42
|
190
|
+
class InvalidRequest < Error
|
191
|
+
end
|
192
|
+
|
124
193
|
# Raised when there's a network connection error.
|
125
194
|
class ConnectionError < Error
|
126
195
|
end
|
@@ -142,6 +211,9 @@ module Kafka
|
|
142
211
|
class FetchError < Error
|
143
212
|
end
|
144
213
|
|
214
|
+
class NoPartitionsAssignedError < Error
|
215
|
+
end
|
216
|
+
|
145
217
|
# Initializes a new Kafka client.
|
146
218
|
#
|
147
219
|
# @see Client#initialize
|
data/lib/kafka/broker.rb
CHANGED
@@ -40,6 +40,22 @@ module Kafka
|
|
40
40
|
@connection.send_request(request)
|
41
41
|
end
|
42
42
|
|
43
|
+
# Fetches messages asynchronously.
|
44
|
+
#
|
45
|
+
# The fetch request is sent to the broker, but the response is not read.
|
46
|
+
# This allows the broker to process the request, wait for new messages,
|
47
|
+
# and send a response without the client having to wait. In order to
|
48
|
+
# read the response, call `#call` on the returned object. This will
|
49
|
+
# block the caller until the response is available.
|
50
|
+
#
|
51
|
+
# @param (see Kafka::Protocol::FetchRequest#initialize)
|
52
|
+
# @return [Kafka::AsyncResponse]
|
53
|
+
def fetch_messages_async(**options)
|
54
|
+
request = Protocol::FetchRequest.new(**options)
|
55
|
+
|
56
|
+
@connection.send_async_request(request)
|
57
|
+
end
|
58
|
+
|
43
59
|
# Lists the offset of the specified topics and partitions.
|
44
60
|
#
|
45
61
|
# @param (see Kafka::Protocol::ListOffsetRequest#initialize)
|
@@ -101,5 +117,11 @@ module Kafka
|
|
101
117
|
|
102
118
|
@connection.send_request(request)
|
103
119
|
end
|
120
|
+
|
121
|
+
def sasl_handshake(**options)
|
122
|
+
request = Protocol::SaslHandshakeRequest(**options)
|
123
|
+
|
124
|
+
@connection.send_request(request)
|
125
|
+
end
|
104
126
|
end
|
105
127
|
end
|
data/lib/kafka/client.rb
CHANGED
@@ -40,8 +40,14 @@ module Kafka
|
|
40
40
|
# @param ssl_client_cert_key [String, nil] a PEM encoded client cert key to use with an
|
41
41
|
# SSL connection. Must be used in combination with ssl_client_cert.
|
42
42
|
#
|
43
|
+
# @param sasl_gssapi_principal [String, nil] a KRB5 principal
|
44
|
+
#
|
45
|
+
# @param sasl_gssapi_keytab [String, nil] a KRB5 keytab filepath
|
46
|
+
#
|
43
47
|
# @return [Client]
|
44
|
-
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
|
48
|
+
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
|
49
|
+
ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
|
50
|
+
sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil)
|
45
51
|
@logger = logger || Logger.new(nil)
|
46
52
|
@instrumenter = Instrumenter.new(client_id: client_id)
|
47
53
|
@seed_brokers = normalize_seed_brokers(seed_brokers)
|
@@ -55,6 +61,8 @@ module Kafka
|
|
55
61
|
ssl_context: ssl_context,
|
56
62
|
logger: @logger,
|
57
63
|
instrumenter: @instrumenter,
|
64
|
+
sasl_gssapi_principal: sasl_gssapi_principal,
|
65
|
+
sasl_gssapi_keytab: sasl_gssapi_keytab
|
58
66
|
)
|
59
67
|
|
60
68
|
@cluster = initialize_cluster
|
@@ -414,6 +422,26 @@ module Kafka
|
|
414
422
|
@cluster.resolve_offset(topic, partition, :latest) - 1
|
415
423
|
end
|
416
424
|
|
425
|
+
|
426
|
+
# Retrieve the offset of the last message in each partition of the specified topics.
|
427
|
+
#
|
428
|
+
# @param topics [Array<String>] topic names.
|
429
|
+
# @return [Hash<String, Hash<Integer, Integer>>]
|
430
|
+
# @example
|
431
|
+
# last_offsets_for('topic-1', 'topic-2') # =>
|
432
|
+
# # {
|
433
|
+
# # 'topic-1' => { 0 => 100, 1 => 100 },
|
434
|
+
# # 'topic-2' => { 0 => 100, 1 => 100 }
|
435
|
+
# # }
|
436
|
+
def last_offsets_for(*topics)
|
437
|
+
@cluster.add_target_topics(topics)
|
438
|
+
topics.map {|topic|
|
439
|
+
partition_ids = @cluster.partitions_for(topic).collect(&:partition_id)
|
440
|
+
partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest)
|
441
|
+
[topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h]
|
442
|
+
}.to_h
|
443
|
+
end
|
444
|
+
|
417
445
|
# Closes all connections to the Kafka brokers and frees up used resources.
|
418
446
|
#
|
419
447
|
# @return [nil]
|
data/lib/kafka/cluster.rb
CHANGED
@@ -157,6 +157,9 @@ module Kafka
|
|
157
157
|
end
|
158
158
|
|
159
159
|
offsets
|
160
|
+
rescue Kafka::ProtocolError
|
161
|
+
mark_as_stale!
|
162
|
+
raise
|
160
163
|
end
|
161
164
|
|
162
165
|
def resolve_offset(topic, partition, offset)
|
@@ -190,7 +193,7 @@ module Kafka
|
|
190
193
|
# @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
|
191
194
|
# @return [Protocol::MetadataResponse] the cluster metadata.
|
192
195
|
def fetch_cluster_info
|
193
|
-
@seed_brokers.each do |node|
|
196
|
+
@seed_brokers.shuffle.each do |node|
|
194
197
|
@logger.info "Fetching cluster metadata from #{node}"
|
195
198
|
|
196
199
|
begin
|
data/lib/kafka/connection.rb
CHANGED
@@ -2,11 +2,41 @@ require "stringio"
|
|
2
2
|
require "kafka/socket_with_timeout"
|
3
3
|
require "kafka/ssl_socket_with_timeout"
|
4
4
|
require "kafka/protocol/request_message"
|
5
|
+
require "kafka/protocol/null_response"
|
5
6
|
require "kafka/protocol/encoder"
|
6
7
|
require "kafka/protocol/decoder"
|
7
8
|
|
8
9
|
module Kafka
|
9
10
|
|
11
|
+
# An asynchronous response object allows us to deliver a response at some
|
12
|
+
# later point in time.
|
13
|
+
#
|
14
|
+
# When instantiating an AsyncResponse, you provide a response decoder and
|
15
|
+
# a block that will force the caller to wait until a response is available.
|
16
|
+
class AsyncResponse
|
17
|
+
# Use a custom "nil" value so that nil can be an actual value.
|
18
|
+
MISSING = Object.new
|
19
|
+
|
20
|
+
def initialize(decoder, &block)
|
21
|
+
@decoder = decoder
|
22
|
+
@block = block
|
23
|
+
@response = MISSING
|
24
|
+
end
|
25
|
+
|
26
|
+
# Block until a response is available.
|
27
|
+
def call
|
28
|
+
@block.call if @response == MISSING
|
29
|
+
@response
|
30
|
+
end
|
31
|
+
|
32
|
+
# Deliver the response data.
|
33
|
+
#
|
34
|
+
# After calling this, `#call` will returned the decoded response.
|
35
|
+
def deliver(data)
|
36
|
+
@response = @decoder.decode(data)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
10
40
|
# A connection to a single Kafka broker.
|
11
41
|
#
|
12
42
|
# Usually you'll need a separate connection to each broker in a cluster, since most
|
@@ -28,6 +58,9 @@ module Kafka
|
|
28
58
|
SOCKET_TIMEOUT = 10
|
29
59
|
CONNECT_TIMEOUT = 10
|
30
60
|
|
61
|
+
attr_reader :encoder
|
62
|
+
attr_reader :decoder
|
63
|
+
|
31
64
|
# Opens a connection to a Kafka broker.
|
32
65
|
#
|
33
66
|
# @param host [String] the hostname of the broker.
|
@@ -75,6 +108,18 @@ module Kafka
|
|
75
108
|
#
|
76
109
|
# @return [Object] the response.
|
77
110
|
def send_request(request)
|
111
|
+
# Immediately block on the asynchronous request.
|
112
|
+
send_async_request(request).call
|
113
|
+
end
|
114
|
+
|
115
|
+
# Sends a request over the connection.
|
116
|
+
#
|
117
|
+
# @param request [#encode, #response_class] the request that should be
|
118
|
+
# encoded and written.
|
119
|
+
#
|
120
|
+
# @return [AsyncResponse] the async response, allowing the caller to choose
|
121
|
+
# when to block.
|
122
|
+
def send_async_request(request)
|
78
123
|
# Default notification payload.
|
79
124
|
notification = {
|
80
125
|
broker_host: @host,
|
@@ -83,15 +128,41 @@ module Kafka
|
|
83
128
|
response_size: 0,
|
84
129
|
}
|
85
130
|
|
86
|
-
@instrumenter.
|
87
|
-
|
131
|
+
@instrumenter.start("request.connection", notification)
|
132
|
+
|
133
|
+
open unless open?
|
134
|
+
|
135
|
+
@correlation_id += 1
|
88
136
|
|
89
|
-
|
137
|
+
write_request(request, notification)
|
90
138
|
|
91
|
-
|
139
|
+
response_class = request.response_class
|
140
|
+
correlation_id = @correlation_id
|
92
141
|
|
93
|
-
|
94
|
-
|
142
|
+
if response_class.nil?
|
143
|
+
async_response = AsyncResponse.new(Protocol::NullResponse) { nil }
|
144
|
+
|
145
|
+
# Immediately deliver a nil value.
|
146
|
+
async_response.deliver(nil)
|
147
|
+
|
148
|
+
@instrumenter.finish("request.connection", notification)
|
149
|
+
|
150
|
+
async_response
|
151
|
+
else
|
152
|
+
async_response = AsyncResponse.new(response_class) {
|
153
|
+
# A caller is trying to read the response, so we have to wait for it
|
154
|
+
# before we can return.
|
155
|
+
wait_for_response(correlation_id, notification)
|
156
|
+
|
157
|
+
# Once done, we can finish the instrumentation.
|
158
|
+
@instrumenter.finish("request.connection", notification)
|
159
|
+
}
|
160
|
+
|
161
|
+
# Store the asynchronous response so that data can be delivered to it
|
162
|
+
# at a later time.
|
163
|
+
@pending_async_responses[correlation_id] = async_response
|
164
|
+
|
165
|
+
async_response
|
95
166
|
end
|
96
167
|
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
97
168
|
close
|
@@ -115,6 +186,9 @@ module Kafka
|
|
115
186
|
|
116
187
|
# Correlation id is initialized to zero and bumped for each request.
|
117
188
|
@correlation_id = 0
|
189
|
+
|
190
|
+
# The pipeline of pending response futures must be reset.
|
191
|
+
@pending_async_responses = {}
|
118
192
|
rescue Errno::ETIMEDOUT => e
|
119
193
|
@logger.error "Timed out while trying to connect to #{self}: #{e}"
|
120
194
|
raise ConnectionError, e
|
@@ -156,8 +230,8 @@ module Kafka
|
|
156
230
|
# a given Decoder.
|
157
231
|
#
|
158
232
|
# @return [nil]
|
159
|
-
def read_response(
|
160
|
-
@logger.debug "Waiting for response #{
|
233
|
+
def read_response(expected_correlation_id, notification)
|
234
|
+
@logger.debug "Waiting for response #{expected_correlation_id} from #{to_s}"
|
161
235
|
|
162
236
|
data = @decoder.bytes
|
163
237
|
notification[:response_size] = data.bytesize
|
@@ -166,32 +240,49 @@ module Kafka
|
|
166
240
|
response_decoder = Kafka::Protocol::Decoder.new(buffer)
|
167
241
|
|
168
242
|
correlation_id = response_decoder.int32
|
169
|
-
response = response_class.decode(response_decoder)
|
170
243
|
|
171
244
|
@logger.debug "Received response #{correlation_id} from #{to_s}"
|
172
245
|
|
173
|
-
return correlation_id,
|
246
|
+
return correlation_id, response_decoder
|
174
247
|
rescue Errno::ETIMEDOUT
|
175
|
-
@logger.error "Timed out while waiting for response #{
|
248
|
+
@logger.error "Timed out while waiting for response #{expected_correlation_id}"
|
176
249
|
raise
|
250
|
+
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
251
|
+
close
|
252
|
+
|
253
|
+
raise ConnectionError, "Connection error: #{e}"
|
177
254
|
end
|
178
255
|
|
179
|
-
def wait_for_response(
|
256
|
+
def wait_for_response(expected_correlation_id, notification)
|
180
257
|
loop do
|
181
|
-
correlation_id,
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
258
|
+
correlation_id, data = read_response(expected_correlation_id, notification)
|
259
|
+
|
260
|
+
if correlation_id < expected_correlation_id
|
261
|
+
# There may have been a previous request that timed out before the client
|
262
|
+
# was able to read the response. In that case, the response will still be
|
263
|
+
# sitting in the socket waiting to be read. If the response we just read
|
264
|
+
# was to a previous request, we deliver it to the pending async response
|
265
|
+
# future.
|
266
|
+
async_response = @pending_async_responses.delete(correlation_id)
|
267
|
+
async_response.deliver(data) if async_response
|
268
|
+
elsif correlation_id > expected_correlation_id
|
269
|
+
raise Kafka::Error, "Correlation id mismatch: expected #{expected_correlation_id} but got #{correlation_id}"
|
191
270
|
else
|
192
|
-
|
271
|
+
# If the request was asynchronous, deliver the response to the pending
|
272
|
+
# async response future.
|
273
|
+
async_response = @pending_async_responses.delete(correlation_id)
|
274
|
+
async_response.deliver(data)
|
275
|
+
|
276
|
+
return async_response.call
|
193
277
|
end
|
194
278
|
end
|
279
|
+
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
280
|
+
notification[:exception] = [e.class.name, e.message]
|
281
|
+
notification[:exception_object] = e
|
282
|
+
|
283
|
+
close
|
284
|
+
|
285
|
+
raise ConnectionError, "Connection error: #{e}"
|
195
286
|
end
|
196
287
|
end
|
197
288
|
end
|