ruby-kafka 0.3.17 → 0.3.18.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile.lock +6 -2
- data/README.md +76 -14
- data/lib/kafka.rb +72 -0
- data/lib/kafka/broker.rb +22 -0
- data/lib/kafka/client.rb +29 -1
- data/lib/kafka/cluster.rb +4 -1
- data/lib/kafka/connection.rb +114 -23
- data/lib/kafka/connection_builder.rb +30 -3
- data/lib/kafka/consumer.rb +35 -10
- data/lib/kafka/datadog.rb +1 -1
- data/lib/kafka/fetch_operation.rb +14 -6
- data/lib/kafka/instrumenter.rb +28 -4
- data/lib/kafka/protocol.rb +27 -9
- data/lib/kafka/protocol/null_response.rb +11 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +31 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +26 -0
- data/lib/kafka/sasl_gssapi_authenticator.rb +69 -0
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +2 -0
- data/vendor/bundle/bin/bundler +17 -0
- data/vendor/bundle/bin/coderay +17 -0
- data/vendor/bundle/bin/dotenv +17 -0
- data/vendor/bundle/bin/htmldiff +17 -0
- data/vendor/bundle/bin/ldiff +17 -0
- data/vendor/bundle/bin/pry +17 -0
- data/vendor/bundle/bin/rake +17 -0
- data/vendor/bundle/bin/rspec +17 -0
- data/vendor/bundle/bin/ruby-prof +17 -0
- data/vendor/bundle/bin/ruby-prof-check-trace +17 -0
- metadata +32 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80c7dd382316d8fd5465ab7f73a4393cf3ed01cf
|
4
|
+
data.tar.gz: b30842098f9076127c33eb762dffc71424de2866
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90b3ed06f47b6e2b22a7623bcf3825206b7745170207cb9e2226adcc2ee60988769e7ad498a241e30eb0f04dbf3daa56acef4c91c5334b04ded7d1cb429e4866
|
7
|
+
data.tar.gz: 72c20e866cd03e575bbae4d82a86fe56195463412929dbb4656c1cfc16ef4c9f109317897ad7773cc79ccd22da395c440522b174e786f2ff358629c4db897dc8
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
ruby-kafka (0.3.
|
4
|
+
ruby-kafka (0.3.18.beta1)
|
5
|
+
gssapi (>= 1.2.0)
|
5
6
|
|
6
7
|
GEM
|
7
8
|
remote: https://rubygems.org/
|
@@ -23,6 +24,9 @@ GEM
|
|
23
24
|
dogstatsd-ruby (2.1.0)
|
24
25
|
dotenv (2.1.0)
|
25
26
|
excon (0.54.0)
|
27
|
+
ffi (1.9.18)
|
28
|
+
gssapi (1.2.0)
|
29
|
+
ffi (>= 1.0.1)
|
26
30
|
i18n (0.7.0)
|
27
31
|
json (1.8.3)
|
28
32
|
method_source (0.8.2)
|
@@ -83,4 +87,4 @@ RUBY VERSION
|
|
83
87
|
ruby 2.2.3p173
|
84
88
|
|
85
89
|
BUNDLED WITH
|
86
|
-
1.
|
90
|
+
1.14.6
|
data/README.md
CHANGED
@@ -33,7 +33,9 @@ Although parts of this library work with Kafka 0.8 – specifically, the Produce
|
|
33
33
|
7. [Monitoring](#monitoring)
|
34
34
|
1. [Reporting Metrics to Datadog](#reporting-metrics-to-datadog)
|
35
35
|
8. [Understanding Timeouts](#understanding-timeouts)
|
36
|
-
9. [
|
36
|
+
9. [Security](#security)
|
37
|
+
1. [Encryption and Authentication using SSL](#encryption-and-authentication-using-ssl)
|
38
|
+
2. [Authentication using SASL](#authentication-using-sasl)
|
37
39
|
4. [Design](#design)
|
38
40
|
1. [Producer Design](#producer-design)
|
39
41
|
2. [Asynchronous Producer Design](#asynchronous-producer-design)
|
@@ -101,7 +103,7 @@ require "kafka"
|
|
101
103
|
kafka = Kafka.new(
|
102
104
|
# At least one of these nodes must be available:
|
103
105
|
seed_brokers: ["kafka1:9092", "kafka2:9092"],
|
104
|
-
|
106
|
+
|
105
107
|
# Set an optional client id in order to identify the client to Kafka:
|
106
108
|
client_id: "my-application",
|
107
109
|
)
|
@@ -198,7 +200,7 @@ These policies can be used alone or in combination.
|
|
198
200
|
producer = kafka.async_producer(
|
199
201
|
# Trigger a delivery once 100 messages have been buffered.
|
200
202
|
delivery_threshold: 100,
|
201
|
-
|
203
|
+
|
202
204
|
# Trigger a delivery every 30 seconds.
|
203
205
|
delivery_interval: 30,
|
204
206
|
)
|
@@ -435,13 +437,13 @@ In your controllers, simply call the producer directly:
|
|
435
437
|
class OrdersController
|
436
438
|
def create
|
437
439
|
@order = Order.create!(params[:order])
|
438
|
-
|
440
|
+
|
439
441
|
event = {
|
440
442
|
order_id: @order.id,
|
441
443
|
amount: @order.amount,
|
442
444
|
timestamp: Time.now,
|
443
445
|
}
|
444
|
-
|
446
|
+
|
445
447
|
$kafka_producer.produce(event.to_json, topic: "order_events")
|
446
448
|
end
|
447
449
|
end
|
@@ -449,8 +451,6 @@ end
|
|
449
451
|
|
450
452
|
### Consuming Messages from Kafka
|
451
453
|
|
452
|
-
**Warning:** The Consumer API is still alpha level and will likely change. The consumer code should not be considered stable, as it hasn't been exhaustively tested in production environments yet.
|
453
|
-
|
454
454
|
Consuming messages from a Kafka topic is simple:
|
455
455
|
|
456
456
|
```ruby
|
@@ -501,9 +501,11 @@ Each consumer process will be assigned one or more partitions from each topic th
|
|
501
501
|
|
502
502
|
In order to be able to resume processing after a consumer crashes, each consumer will periodically _checkpoint_ its position within each partition it reads from. Since each partition has a monotonically increasing sequence of message offsets, this works by _committing_ the offset of the last message that was processed in a given partition. Kafka handles these commits and allows another consumer in a group to resume from the last commit when a member crashes or becomes unresponsive.
|
503
503
|
|
504
|
-
By default, offsets are committed every 10 seconds. You can increase the frequency, known as the _offset commit interval_, to limit the duration of double-processing scenarios, at the cost of a lower throughput due to the added coordination. If you want to improve throughput, and double-processing is of less concern to you, then you can decrease the frequency.
|
504
|
+
By default, offsets are committed every 10 seconds. You can increase the frequency, known as the _offset commit interval_, to limit the duration of double-processing scenarios, at the cost of a lower throughput due to the added coordination. If you want to improve throughput, and double-processing is of less concern to you, then you can decrease the frequency. Set the commit interval to zero in order to disable the timer-based commit trigger entirely.
|
505
|
+
|
506
|
+
In addition to the time based trigger it's possible to trigger checkpointing in response to _n_ messages having been processed, known as the _offset commit threshold_. This puts a bound on the number of messages that can be double-processed before the problem is detected. Setting this to 1 will cause an offset commit to take place every time a message has been processed. By default this trigger is disabled (set to zero).
|
505
507
|
|
506
|
-
|
508
|
+
It is possible to trigger an immediate offset commit by calling `Consumer#commit_offsets`. This blocks the caller until the Kafka cluster has acknowledged the commit.
|
507
509
|
|
508
510
|
Stale offsets are periodically purged by the broker. The broker setting `offsets.retention.minutes` controls the retention window for committed offsets, and defaults to 1 day. The length of the retention window, known as _offset retention time_, can be changed for the consumer.
|
509
511
|
|
@@ -524,6 +526,46 @@ consumer = kafka.consumer(
|
|
524
526
|
)
|
525
527
|
```
|
526
528
|
|
529
|
+
For some use cases it may be necessary to control when messages are marked as processed. Note that since only the consumer position within each partition can be saved, marking a message as processed implies that all messages in the partition with a lower offset should also be considered as having been processed.
|
530
|
+
|
531
|
+
The method `Consumer#mark_message_as_processed` marks a message (and all those that precede it in a partition) as having been processed. This is an advanced API that you should only use if you know what you're doing.
|
532
|
+
|
533
|
+
```ruby
|
534
|
+
# Manually controlling checkpointing:
|
535
|
+
|
536
|
+
# Typically you want to use this API in order to buffer messages until some
|
537
|
+
# special "commit" message is received, e.g. in order to group together
|
538
|
+
# transactions consisting of several items.
|
539
|
+
buffer = []
|
540
|
+
|
541
|
+
# Messages will not be marked as processed automatically. If you shut down the
|
542
|
+
# consumer without calling `#mark_message_as_processed` first, the consumer will
|
543
|
+
# not resume where you left off!
|
544
|
+
consumer.each_message(automatically_mark_as_processed: false) do |message|
|
545
|
+
# Our messages are JSON with a `type` field and other stuff.
|
546
|
+
event = JSON.parse(message.value)
|
547
|
+
|
548
|
+
case event.fetch("type")
|
549
|
+
when "add_to_cart"
|
550
|
+
buffer << event
|
551
|
+
when "complete_purchase"
|
552
|
+
# We've received all the messages we need, time to save the transaction.
|
553
|
+
save_transaction(buffer)
|
554
|
+
|
555
|
+
# Now we can set the checkpoint by marking the last message as processed.
|
556
|
+
consumer.mark_message_as_processed(message)
|
557
|
+
|
558
|
+
# We can optionally trigger an immediate, blocking offset commit in order
|
559
|
+
# to minimize the risk of crashing before the automatic triggers have
|
560
|
+
# kicked in.
|
561
|
+
consumer.commit_offsets
|
562
|
+
|
563
|
+
# Make the buffer ready for the next transaction.
|
564
|
+
buffer.clear
|
565
|
+
end
|
566
|
+
end
|
567
|
+
```
|
568
|
+
|
527
569
|
|
528
570
|
#### Topic Subscriptions
|
529
571
|
|
@@ -614,9 +656,9 @@ If you want to have at most one second of latency, set `max_wait_time: 1`.
|
|
614
656
|
|
615
657
|
### Thread Safety
|
616
658
|
|
617
|
-
You typically don't want to share a Kafka client between threads, since the network communication is not synchronized. Furthermore, you should avoid using threads in a consumer unless you're very careful about waiting for all work to complete before returning from the `#each_message` or `#each_batch` block. This is because _checkpointing_ assumes that returning from the block means that the messages that have been yielded have been successfully processed.
|
659
|
+
You typically don't want to share a Kafka client object between threads, since the network communication is not synchronized. Furthermore, you should avoid using threads in a consumer unless you're very careful about waiting for all work to complete before returning from the `#each_message` or `#each_batch` block. This is because _checkpointing_ assumes that returning from the block means that the messages that have been yielded have been successfully processed.
|
618
660
|
|
619
|
-
You should also avoid sharing a synchronous producer between threads, as the internal buffers are not thread safe. However, the _asynchronous_ producer should be safe to use in a multi-threaded environment.
|
661
|
+
You should also avoid sharing a synchronous producer between threads, as the internal buffers are not thread safe. However, the _asynchronous_ producer should be safe to use in a multi-threaded environment. This is because producers, when instantiated, get their own copy of any non-thread-safe data such as network sockets. Furthermore, the asynchronous producer has been designed in such a way to only a single background thread operates on this data while any foreground thread with a reference to the producer object can only send messages to that background thread over a safe queue. Therefore it is safe to share an async producer object between many threads.
|
620
662
|
|
621
663
|
### Logging
|
622
664
|
|
@@ -721,11 +763,13 @@ When sending many messages, it's likely that the client needs to send some messa
|
|
721
763
|
|
722
764
|
Make sure your application can survive being blocked for so long.
|
723
765
|
|
724
|
-
###
|
766
|
+
### Security
|
767
|
+
|
768
|
+
#### Encryption and Authentication using SSL
|
725
769
|
|
726
770
|
By default, communication between Kafka clients and brokers is unencrypted and unauthenticated. Kafka 0.9 added optional support for [encryption and client authentication and authorization](http://kafka.apache.org/documentation.html#security_ssl). There are two layers of security made possible by this:
|
727
771
|
|
728
|
-
|
772
|
+
##### Encryption of Communication
|
729
773
|
|
730
774
|
By enabling SSL encryption you can have some confidence that messages can be sent to Kafka over an untrusted network without being intercepted.
|
731
775
|
|
@@ -740,7 +784,7 @@ kafka = Kafka.new(
|
|
740
784
|
|
741
785
|
Without passing the CA certificate to the client it would be impossible to protect against [man-in-the-middle attacks](https://en.wikipedia.org/wiki/Man-in-the-middle_attack).
|
742
786
|
|
743
|
-
|
787
|
+
##### Client Authentication
|
744
788
|
|
745
789
|
In order to authenticate the client to the cluster, you need to pass in a certificate and key created for the client and trusted by the brokers.
|
746
790
|
|
@@ -755,6 +799,24 @@ kafka = Kafka.new(
|
|
755
799
|
|
756
800
|
Once client authentication is set up, it is possible to configure the Kafka cluster to [authorize client requests](http://kafka.apache.org/documentation.html#security_authz).
|
757
801
|
|
802
|
+
##### Using JKS Certificates
|
803
|
+
|
804
|
+
Typically, Kafka certificates come in the JKS format, which isn't supported by ruby-kafka. There's [a wiki page](https://github.com/zendesk/ruby-kafka/wiki/Creating-X509-certificates-from-JKS-format) that describes how to generate valid X509 certificates from JKS certificates.
|
805
|
+
|
806
|
+
#### Authentication using SASL
|
807
|
+
|
808
|
+
Kafka has support for using SASL to authenticate clients. Currently only the GSSAPI mechanism is supported by ruby-kafka.
|
809
|
+
|
810
|
+
In order to authenticate using SASL, set your principal and optionally your keytab when initializing the Kafka client:
|
811
|
+
|
812
|
+
```ruby
|
813
|
+
kafka = Kafka.new(
|
814
|
+
sasl_gssapi_principal: 'kafka/kafka.example.com@EXAMPLE.COM',
|
815
|
+
sasl_gssapi_keytab: '/etc/keytabs/kafka.keytab',
|
816
|
+
# ...
|
817
|
+
)
|
818
|
+
```
|
819
|
+
|
758
820
|
## Design
|
759
821
|
|
760
822
|
The library has been designed as a layered system, with each layer having a clear responsibility:
|
data/lib/kafka.rb
CHANGED
@@ -79,6 +79,7 @@ module Kafka
|
|
79
79
|
class NotCoordinatorForGroup < ProtocolError
|
80
80
|
end
|
81
81
|
|
82
|
+
# 17
|
82
83
|
# For a request which attempts to access an invalid topic (e.g. one which has
|
83
84
|
# an illegal name), or if an attempt is made to write to an internal topic
|
84
85
|
# (such as the consumer offsets topic).
|
@@ -90,37 +91,105 @@ module Kafka
|
|
90
91
|
class RecordListTooLarge < ProtocolError
|
91
92
|
end
|
92
93
|
|
94
|
+
# 19
|
93
95
|
# Returned from a produce request when the number of in-sync replicas is
|
94
96
|
# lower than the configured minimum and requiredAcks is -1.
|
95
97
|
class NotEnoughReplicas < ProtocolError
|
96
98
|
end
|
97
99
|
|
100
|
+
# 20
|
98
101
|
# Returned from a produce request when the message was written to the log,
|
99
102
|
# but with fewer in-sync replicas than required.
|
100
103
|
class NotEnoughReplicasAfterAppend < ProtocolError
|
101
104
|
end
|
102
105
|
|
106
|
+
# 21
|
103
107
|
# Returned from a produce request if the requested requiredAcks is invalid
|
104
108
|
# (anything other than -1, 1, or 0).
|
105
109
|
class InvalidRequiredAcks < ProtocolError
|
106
110
|
end
|
107
111
|
|
112
|
+
# 9
|
108
113
|
# Raised if a replica is expected on a broker, but is not. Can be safely ignored.
|
109
114
|
class ReplicaNotAvailable < ProtocolError
|
110
115
|
end
|
111
116
|
|
117
|
+
#
|
112
118
|
class UnknownMemberId < ProtocolError
|
113
119
|
end
|
114
120
|
|
121
|
+
# 27
|
115
122
|
class RebalanceInProgress < ProtocolError
|
116
123
|
end
|
117
124
|
|
125
|
+
# 22
|
118
126
|
class IllegalGeneration < ProtocolError
|
119
127
|
end
|
120
128
|
|
129
|
+
# 26
|
121
130
|
class InvalidSessionTimeout < ProtocolError
|
122
131
|
end
|
123
132
|
|
133
|
+
# 28
|
134
|
+
class InvalidCommitOffsetSize < ProtocolError
|
135
|
+
end
|
136
|
+
|
137
|
+
# 29
|
138
|
+
class TopicAuthorizationCode < ProtocolError
|
139
|
+
end
|
140
|
+
|
141
|
+
# 30
|
142
|
+
class GroupAuthorizationCode < ProtocolError
|
143
|
+
end
|
144
|
+
|
145
|
+
# 31
|
146
|
+
class ClusterAuthorizationCode < ProtocolError
|
147
|
+
end
|
148
|
+
|
149
|
+
# 32
|
150
|
+
class InvalidTimestamp < ProtocolError
|
151
|
+
end
|
152
|
+
|
153
|
+
# 33
|
154
|
+
class UnsupportedSaslMechanism < ProtocolError
|
155
|
+
end
|
156
|
+
|
157
|
+
# 34
|
158
|
+
class InvalidSaslState < ProtocolError
|
159
|
+
end
|
160
|
+
|
161
|
+
# 35
|
162
|
+
class UnsupportedVersion < ProtocolError
|
163
|
+
end
|
164
|
+
|
165
|
+
# 36
|
166
|
+
class TopicAlreadyExists < Error
|
167
|
+
end
|
168
|
+
|
169
|
+
# 37
|
170
|
+
class InvalidPartitions < Error
|
171
|
+
end
|
172
|
+
|
173
|
+
# 38
|
174
|
+
class InvalidReplicationFactor < Error
|
175
|
+
end
|
176
|
+
|
177
|
+
# 39
|
178
|
+
class InvalidReplicaAssignment < Error
|
179
|
+
end
|
180
|
+
|
181
|
+
# 40
|
182
|
+
class InvalidConfig < Error
|
183
|
+
end
|
184
|
+
|
185
|
+
# 41
|
186
|
+
class NotController < Error
|
187
|
+
end
|
188
|
+
|
189
|
+
# 42
|
190
|
+
class InvalidRequest < Error
|
191
|
+
end
|
192
|
+
|
124
193
|
# Raised when there's a network connection error.
|
125
194
|
class ConnectionError < Error
|
126
195
|
end
|
@@ -142,6 +211,9 @@ module Kafka
|
|
142
211
|
class FetchError < Error
|
143
212
|
end
|
144
213
|
|
214
|
+
class NoPartitionsAssignedError < Error
|
215
|
+
end
|
216
|
+
|
145
217
|
# Initializes a new Kafka client.
|
146
218
|
#
|
147
219
|
# @see Client#initialize
|
data/lib/kafka/broker.rb
CHANGED
@@ -40,6 +40,22 @@ module Kafka
|
|
40
40
|
@connection.send_request(request)
|
41
41
|
end
|
42
42
|
|
43
|
+
# Fetches messages asynchronously.
|
44
|
+
#
|
45
|
+
# The fetch request is sent to the broker, but the response is not read.
|
46
|
+
# This allows the broker to process the request, wait for new messages,
|
47
|
+
# and send a response without the client having to wait. In order to
|
48
|
+
# read the response, call `#call` on the returned object. This will
|
49
|
+
# block the caller until the response is available.
|
50
|
+
#
|
51
|
+
# @param (see Kafka::Protocol::FetchRequest#initialize)
|
52
|
+
# @return [Kafka::AsyncResponse]
|
53
|
+
def fetch_messages_async(**options)
|
54
|
+
request = Protocol::FetchRequest.new(**options)
|
55
|
+
|
56
|
+
@connection.send_async_request(request)
|
57
|
+
end
|
58
|
+
|
43
59
|
# Lists the offset of the specified topics and partitions.
|
44
60
|
#
|
45
61
|
# @param (see Kafka::Protocol::ListOffsetRequest#initialize)
|
@@ -101,5 +117,11 @@ module Kafka
|
|
101
117
|
|
102
118
|
@connection.send_request(request)
|
103
119
|
end
|
120
|
+
|
121
|
+
def sasl_handshake(**options)
|
122
|
+
request = Protocol::SaslHandshakeRequest(**options)
|
123
|
+
|
124
|
+
@connection.send_request(request)
|
125
|
+
end
|
104
126
|
end
|
105
127
|
end
|
data/lib/kafka/client.rb
CHANGED
@@ -40,8 +40,14 @@ module Kafka
|
|
40
40
|
# @param ssl_client_cert_key [String, nil] a PEM encoded client cert key to use with an
|
41
41
|
# SSL connection. Must be used in combination with ssl_client_cert.
|
42
42
|
#
|
43
|
+
# @param sasl_gssapi_principal [String, nil] a KRB5 principal
|
44
|
+
#
|
45
|
+
# @param sasl_gssapi_keytab [String, nil] a KRB5 keytab filepath
|
46
|
+
#
|
43
47
|
# @return [Client]
|
44
|
-
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
|
48
|
+
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
|
49
|
+
ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
|
50
|
+
sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil)
|
45
51
|
@logger = logger || Logger.new(nil)
|
46
52
|
@instrumenter = Instrumenter.new(client_id: client_id)
|
47
53
|
@seed_brokers = normalize_seed_brokers(seed_brokers)
|
@@ -55,6 +61,8 @@ module Kafka
|
|
55
61
|
ssl_context: ssl_context,
|
56
62
|
logger: @logger,
|
57
63
|
instrumenter: @instrumenter,
|
64
|
+
sasl_gssapi_principal: sasl_gssapi_principal,
|
65
|
+
sasl_gssapi_keytab: sasl_gssapi_keytab
|
58
66
|
)
|
59
67
|
|
60
68
|
@cluster = initialize_cluster
|
@@ -414,6 +422,26 @@ module Kafka
|
|
414
422
|
@cluster.resolve_offset(topic, partition, :latest) - 1
|
415
423
|
end
|
416
424
|
|
425
|
+
|
426
|
+
# Retrieve the offset of the last message in each partition of the specified topics.
|
427
|
+
#
|
428
|
+
# @param topics [Array<String>] topic names.
|
429
|
+
# @return [Hash<String, Hash<Integer, Integer>>]
|
430
|
+
# @example
|
431
|
+
# last_offsets_for('topic-1', 'topic-2') # =>
|
432
|
+
# # {
|
433
|
+
# # 'topic-1' => { 0 => 100, 1 => 100 },
|
434
|
+
# # 'topic-2' => { 0 => 100, 1 => 100 }
|
435
|
+
# # }
|
436
|
+
def last_offsets_for(*topics)
|
437
|
+
@cluster.add_target_topics(topics)
|
438
|
+
topics.map {|topic|
|
439
|
+
partition_ids = @cluster.partitions_for(topic).collect(&:partition_id)
|
440
|
+
partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest)
|
441
|
+
[topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h]
|
442
|
+
}.to_h
|
443
|
+
end
|
444
|
+
|
417
445
|
# Closes all connections to the Kafka brokers and frees up used resources.
|
418
446
|
#
|
419
447
|
# @return [nil]
|
data/lib/kafka/cluster.rb
CHANGED
@@ -157,6 +157,9 @@ module Kafka
|
|
157
157
|
end
|
158
158
|
|
159
159
|
offsets
|
160
|
+
rescue Kafka::ProtocolError
|
161
|
+
mark_as_stale!
|
162
|
+
raise
|
160
163
|
end
|
161
164
|
|
162
165
|
def resolve_offset(topic, partition, offset)
|
@@ -190,7 +193,7 @@ module Kafka
|
|
190
193
|
# @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
|
191
194
|
# @return [Protocol::MetadataResponse] the cluster metadata.
|
192
195
|
def fetch_cluster_info
|
193
|
-
@seed_brokers.each do |node|
|
196
|
+
@seed_brokers.shuffle.each do |node|
|
194
197
|
@logger.info "Fetching cluster metadata from #{node}"
|
195
198
|
|
196
199
|
begin
|
data/lib/kafka/connection.rb
CHANGED
@@ -2,11 +2,41 @@ require "stringio"
|
|
2
2
|
require "kafka/socket_with_timeout"
|
3
3
|
require "kafka/ssl_socket_with_timeout"
|
4
4
|
require "kafka/protocol/request_message"
|
5
|
+
require "kafka/protocol/null_response"
|
5
6
|
require "kafka/protocol/encoder"
|
6
7
|
require "kafka/protocol/decoder"
|
7
8
|
|
8
9
|
module Kafka
|
9
10
|
|
11
|
+
# An asynchronous response object allows us to deliver a response at some
|
12
|
+
# later point in time.
|
13
|
+
#
|
14
|
+
# When instantiating an AsyncResponse, you provide a response decoder and
|
15
|
+
# a block that will force the caller to wait until a response is available.
|
16
|
+
class AsyncResponse
|
17
|
+
# Use a custom "nil" value so that nil can be an actual value.
|
18
|
+
MISSING = Object.new
|
19
|
+
|
20
|
+
def initialize(decoder, &block)
|
21
|
+
@decoder = decoder
|
22
|
+
@block = block
|
23
|
+
@response = MISSING
|
24
|
+
end
|
25
|
+
|
26
|
+
# Block until a response is available.
|
27
|
+
def call
|
28
|
+
@block.call if @response == MISSING
|
29
|
+
@response
|
30
|
+
end
|
31
|
+
|
32
|
+
# Deliver the response data.
|
33
|
+
#
|
34
|
+
# After calling this, `#call` will returned the decoded response.
|
35
|
+
def deliver(data)
|
36
|
+
@response = @decoder.decode(data)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
10
40
|
# A connection to a single Kafka broker.
|
11
41
|
#
|
12
42
|
# Usually you'll need a separate connection to each broker in a cluster, since most
|
@@ -28,6 +58,9 @@ module Kafka
|
|
28
58
|
SOCKET_TIMEOUT = 10
|
29
59
|
CONNECT_TIMEOUT = 10
|
30
60
|
|
61
|
+
attr_reader :encoder
|
62
|
+
attr_reader :decoder
|
63
|
+
|
31
64
|
# Opens a connection to a Kafka broker.
|
32
65
|
#
|
33
66
|
# @param host [String] the hostname of the broker.
|
@@ -75,6 +108,18 @@ module Kafka
|
|
75
108
|
#
|
76
109
|
# @return [Object] the response.
|
77
110
|
def send_request(request)
|
111
|
+
# Immediately block on the asynchronous request.
|
112
|
+
send_async_request(request).call
|
113
|
+
end
|
114
|
+
|
115
|
+
# Sends a request over the connection.
|
116
|
+
#
|
117
|
+
# @param request [#encode, #response_class] the request that should be
|
118
|
+
# encoded and written.
|
119
|
+
#
|
120
|
+
# @return [AsyncResponse] the async response, allowing the caller to choose
|
121
|
+
# when to block.
|
122
|
+
def send_async_request(request)
|
78
123
|
# Default notification payload.
|
79
124
|
notification = {
|
80
125
|
broker_host: @host,
|
@@ -83,15 +128,41 @@ module Kafka
|
|
83
128
|
response_size: 0,
|
84
129
|
}
|
85
130
|
|
86
|
-
@instrumenter.
|
87
|
-
|
131
|
+
@instrumenter.start("request.connection", notification)
|
132
|
+
|
133
|
+
open unless open?
|
134
|
+
|
135
|
+
@correlation_id += 1
|
88
136
|
|
89
|
-
|
137
|
+
write_request(request, notification)
|
90
138
|
|
91
|
-
|
139
|
+
response_class = request.response_class
|
140
|
+
correlation_id = @correlation_id
|
92
141
|
|
93
|
-
|
94
|
-
|
142
|
+
if response_class.nil?
|
143
|
+
async_response = AsyncResponse.new(Protocol::NullResponse) { nil }
|
144
|
+
|
145
|
+
# Immediately deliver a nil value.
|
146
|
+
async_response.deliver(nil)
|
147
|
+
|
148
|
+
@instrumenter.finish("request.connection", notification)
|
149
|
+
|
150
|
+
async_response
|
151
|
+
else
|
152
|
+
async_response = AsyncResponse.new(response_class) {
|
153
|
+
# A caller is trying to read the response, so we have to wait for it
|
154
|
+
# before we can return.
|
155
|
+
wait_for_response(correlation_id, notification)
|
156
|
+
|
157
|
+
# Once done, we can finish the instrumentation.
|
158
|
+
@instrumenter.finish("request.connection", notification)
|
159
|
+
}
|
160
|
+
|
161
|
+
# Store the asynchronous response so that data can be delivered to it
|
162
|
+
# at a later time.
|
163
|
+
@pending_async_responses[correlation_id] = async_response
|
164
|
+
|
165
|
+
async_response
|
95
166
|
end
|
96
167
|
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
97
168
|
close
|
@@ -115,6 +186,9 @@ module Kafka
|
|
115
186
|
|
116
187
|
# Correlation id is initialized to zero and bumped for each request.
|
117
188
|
@correlation_id = 0
|
189
|
+
|
190
|
+
# The pipeline of pending response futures must be reset.
|
191
|
+
@pending_async_responses = {}
|
118
192
|
rescue Errno::ETIMEDOUT => e
|
119
193
|
@logger.error "Timed out while trying to connect to #{self}: #{e}"
|
120
194
|
raise ConnectionError, e
|
@@ -156,8 +230,8 @@ module Kafka
|
|
156
230
|
# a given Decoder.
|
157
231
|
#
|
158
232
|
# @return [nil]
|
159
|
-
def read_response(
|
160
|
-
@logger.debug "Waiting for response #{
|
233
|
+
def read_response(expected_correlation_id, notification)
|
234
|
+
@logger.debug "Waiting for response #{expected_correlation_id} from #{to_s}"
|
161
235
|
|
162
236
|
data = @decoder.bytes
|
163
237
|
notification[:response_size] = data.bytesize
|
@@ -166,32 +240,49 @@ module Kafka
|
|
166
240
|
response_decoder = Kafka::Protocol::Decoder.new(buffer)
|
167
241
|
|
168
242
|
correlation_id = response_decoder.int32
|
169
|
-
response = response_class.decode(response_decoder)
|
170
243
|
|
171
244
|
@logger.debug "Received response #{correlation_id} from #{to_s}"
|
172
245
|
|
173
|
-
return correlation_id,
|
246
|
+
return correlation_id, response_decoder
|
174
247
|
rescue Errno::ETIMEDOUT
|
175
|
-
@logger.error "Timed out while waiting for response #{
|
248
|
+
@logger.error "Timed out while waiting for response #{expected_correlation_id}"
|
176
249
|
raise
|
250
|
+
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
251
|
+
close
|
252
|
+
|
253
|
+
raise ConnectionError, "Connection error: #{e}"
|
177
254
|
end
|
178
255
|
|
179
|
-
def wait_for_response(
|
256
|
+
def wait_for_response(expected_correlation_id, notification)
|
180
257
|
loop do
|
181
|
-
correlation_id,
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
258
|
+
correlation_id, data = read_response(expected_correlation_id, notification)
|
259
|
+
|
260
|
+
if correlation_id < expected_correlation_id
|
261
|
+
# There may have been a previous request that timed out before the client
|
262
|
+
# was able to read the response. In that case, the response will still be
|
263
|
+
# sitting in the socket waiting to be read. If the response we just read
|
264
|
+
# was to a previous request, we deliver it to the pending async response
|
265
|
+
# future.
|
266
|
+
async_response = @pending_async_responses.delete(correlation_id)
|
267
|
+
async_response.deliver(data) if async_response
|
268
|
+
elsif correlation_id > expected_correlation_id
|
269
|
+
raise Kafka::Error, "Correlation id mismatch: expected #{expected_correlation_id} but got #{correlation_id}"
|
191
270
|
else
|
192
|
-
|
271
|
+
# If the request was asynchronous, deliver the response to the pending
|
272
|
+
# async response future.
|
273
|
+
async_response = @pending_async_responses.delete(correlation_id)
|
274
|
+
async_response.deliver(data)
|
275
|
+
|
276
|
+
return async_response.call
|
193
277
|
end
|
194
278
|
end
|
279
|
+
rescue Errno::EPIPE, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError => e
|
280
|
+
notification[:exception] = [e.class.name, e.message]
|
281
|
+
notification[:exception_object] = e
|
282
|
+
|
283
|
+
close
|
284
|
+
|
285
|
+
raise ConnectionError, "Connection error: #{e}"
|
195
286
|
end
|
196
287
|
end
|
197
288
|
end
|