ruby-kafka 0.7.0 → 0.7.1.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +3 -3
- data/.gitignore +1 -0
- data/CHANGELOG.md +4 -0
- data/lib/kafka.rb +32 -0
- data/lib/kafka/broker.rb +18 -0
- data/lib/kafka/client.rb +38 -4
- data/lib/kafka/cluster.rb +60 -37
- data/lib/kafka/consumer.rb +2 -2
- data/lib/kafka/fetch_operation.rb +18 -59
- data/lib/kafka/fetched_batch.rb +9 -9
- data/lib/kafka/fetched_batch_generator.rb +114 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +2 -2
- data/lib/kafka/produce_operation.rb +52 -14
- data/lib/kafka/producer.rb +82 -2
- data/lib/kafka/protocol.rb +68 -48
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/decoder.rb +3 -6
- data/lib/kafka/protocol/encoder.rb +6 -11
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +3 -1
- data/lib/kafka/protocol/fetch_response.rb +37 -18
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/list_offset_request.rb +8 -2
- data/lib/kafka/protocol/list_offset_response.rb +11 -6
- data/lib/kafka/protocol/record.rb +9 -0
- data/lib/kafka/protocol/record_batch.rb +17 -1
- data/lib/kafka/ssl_context.rb +19 -5
- data/lib/kafka/transaction_manager.rb +261 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +1 -1
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c6a5d1dd1969107f9107675cad6848630f8cf13ce4ce223b0fa7c84bc4facf03
|
4
|
+
data.tar.gz: 2797042cbef4be8b219d2103dc5f63cbe36bb5c68aae738178ed02b197579254
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3dbfbb842fbc7b370d197a6f8020d4c88814c64383fe9cacd8e4338e4cafbdf222573476ba6bcf4d4ab05263b30ab0b63de2b7bc861d06730a303aec0a8d7a9
|
7
|
+
data.tar.gz: 12aba0aeacb8ceb731591f037ef075b3218d6c44710c5852fdfa1e5c769649bea95b4d609a9920ba48ce80b207a82628c488759d4c7cb48317ce71bbee36bfd0
|
data/.circleci/config.yml
CHANGED
@@ -41,7 +41,7 @@ jobs:
|
|
41
41
|
- run: bundle install --path vendor/bundle
|
42
42
|
- run: bundle exec rspec --profile --tag functional spec/functional
|
43
43
|
|
44
|
-
kafka-1.0:
|
44
|
+
kafka-1.0.0:
|
45
45
|
docker:
|
46
46
|
- image: circleci/ruby:2.5.1-node
|
47
47
|
environment:
|
@@ -111,5 +111,5 @@ workflows:
|
|
111
111
|
jobs:
|
112
112
|
- unit
|
113
113
|
- kafka-0.11
|
114
|
-
- kafka-1.0
|
115
|
-
- kafka-1.1
|
114
|
+
- kafka-1.0.0
|
115
|
+
- kafka-1.1
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/lib/kafka.rb
CHANGED
@@ -245,6 +245,38 @@ module Kafka
|
|
245
245
|
class FailedScramAuthentication < SaslScramError
|
246
246
|
end
|
247
247
|
|
248
|
+
# The broker received an out of order sequence number
|
249
|
+
class OutOfOrderSequenceNumberError < Error
|
250
|
+
end
|
251
|
+
|
252
|
+
# The broker received a duplicate sequence number
|
253
|
+
class DuplicateSequenceNumberError < Error
|
254
|
+
end
|
255
|
+
|
256
|
+
# Producer attempted an operation with an old epoch. Either there is a newer producer with the same transactionalId, or the producer's transaction has been expired by the broker.
|
257
|
+
class InvalidProducerEpochError < Error
|
258
|
+
end
|
259
|
+
|
260
|
+
# The producer attempted a transactional operation in an invalid state
|
261
|
+
class InvalidTxnStateError < Error
|
262
|
+
end
|
263
|
+
|
264
|
+
# The producer attempted to use a producer id which is not currently assigned to its transactional id
|
265
|
+
class InvalidProducerIDMappingError < Error
|
266
|
+
end
|
267
|
+
|
268
|
+
# The transaction timeout is larger than the maximum value allowed by the broker (as configured by transaction.max.timeout.ms).
|
269
|
+
class InvalidTransactionTimeoutError < Error
|
270
|
+
end
|
271
|
+
|
272
|
+
# The producer attempted to update a transaction while another concurrent operation on the same transaction was ongoing
|
273
|
+
class ConcurrentTransactionError < Error
|
274
|
+
end
|
275
|
+
|
276
|
+
# Indicates that the transaction coordinator sending a WriteTxnMarker is no longer the current coordinator for a given producer
|
277
|
+
class TransactionCoordinatorFencedError < Error
|
278
|
+
end
|
279
|
+
|
248
280
|
# Initializes a new Kafka client.
|
249
281
|
#
|
250
282
|
# @see Client#initialize
|
data/lib/kafka/broker.rb
CHANGED
@@ -164,6 +164,24 @@ module Kafka
|
|
164
164
|
send_request(request)
|
165
165
|
end
|
166
166
|
|
167
|
+
def init_producer_id(**options)
|
168
|
+
request = Protocol::InitProducerIDRequest.new(**options)
|
169
|
+
|
170
|
+
send_request(request)
|
171
|
+
end
|
172
|
+
|
173
|
+
def add_partitions_to_txn(**options)
|
174
|
+
request = Protocol::AddPartitionsToTxnRequest.new(**options)
|
175
|
+
|
176
|
+
send_request(request)
|
177
|
+
end
|
178
|
+
|
179
|
+
def end_txn(**options)
|
180
|
+
request = Protocol::EndTxnRequest.new(**options)
|
181
|
+
|
182
|
+
send_request(request)
|
183
|
+
end
|
184
|
+
|
167
185
|
private
|
168
186
|
|
169
187
|
def send_request(request)
|
data/lib/kafka/client.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "kafka/ssl_context"
|
4
4
|
require "kafka/cluster"
|
5
|
+
require "kafka/transaction_manager"
|
5
6
|
require "kafka/broker_info"
|
6
7
|
require "kafka/producer"
|
7
8
|
require "kafka/consumer"
|
@@ -60,7 +61,7 @@ module Kafka
|
|
60
61
|
# @return [Client]
|
61
62
|
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
|
62
63
|
ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
|
63
|
-
sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
|
64
|
+
ssl_client_cert_chain: nil, sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
|
64
65
|
sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
|
65
66
|
sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
|
66
67
|
sasl_over_ssl: true, ssl_ca_certs_from_system: false)
|
@@ -73,6 +74,7 @@ module Kafka
|
|
73
74
|
ca_cert: ssl_ca_cert,
|
74
75
|
client_cert: ssl_client_cert,
|
75
76
|
client_cert_key: ssl_client_cert_key,
|
77
|
+
client_cert_chain: ssl_client_cert_chain,
|
76
78
|
ca_certs_from_system: ssl_ca_certs_from_system,
|
77
79
|
)
|
78
80
|
|
@@ -158,8 +160,16 @@ module Kafka
|
|
158
160
|
instrumenter: @instrumenter,
|
159
161
|
)
|
160
162
|
|
163
|
+
transaction_manager = TransactionManager.new(
|
164
|
+
cluster: @cluster,
|
165
|
+
logger: @logger,
|
166
|
+
idempotent: false,
|
167
|
+
transactional: false
|
168
|
+
)
|
169
|
+
|
161
170
|
operation = ProduceOperation.new(
|
162
171
|
cluster: @cluster,
|
172
|
+
transaction_manager: transaction_manager,
|
163
173
|
buffer: buffer,
|
164
174
|
required_acks: 1,
|
165
175
|
ack_timeout: 10,
|
@@ -222,15 +232,39 @@ module Kafka
|
|
222
232
|
# are per-partition rather than per-topic or per-producer.
|
223
233
|
#
|
224
234
|
# @return [Kafka::Producer] the Kafka producer.
|
225
|
-
def producer(
|
235
|
+
def producer(
|
236
|
+
compression_codec: nil,
|
237
|
+
compression_threshold: 1,
|
238
|
+
ack_timeout: 5,
|
239
|
+
required_acks: :all,
|
240
|
+
max_retries: 2,
|
241
|
+
retry_backoff: 1,
|
242
|
+
max_buffer_size: 1000,
|
243
|
+
max_buffer_bytesize: 10_000_000,
|
244
|
+
idempotent: false,
|
245
|
+
transactional: false,
|
246
|
+
transactional_id: nil,
|
247
|
+
transactional_timeout: 60
|
248
|
+
)
|
249
|
+
cluster = initialize_cluster
|
226
250
|
compressor = Compressor.new(
|
227
251
|
codec_name: compression_codec,
|
228
252
|
threshold: compression_threshold,
|
229
253
|
instrumenter: @instrumenter,
|
230
254
|
)
|
231
255
|
|
256
|
+
transaction_manager = TransactionManager.new(
|
257
|
+
cluster: cluster,
|
258
|
+
logger: @logger,
|
259
|
+
idempotent: idempotent,
|
260
|
+
transactional: transactional,
|
261
|
+
transactional_id: transactional_id,
|
262
|
+
transactional_timeout: transactional_timeout,
|
263
|
+
)
|
264
|
+
|
232
265
|
Producer.new(
|
233
|
-
cluster:
|
266
|
+
cluster: cluster,
|
267
|
+
transaction_manager: transaction_manager,
|
234
268
|
logger: @logger,
|
235
269
|
instrumenter: @instrumenter,
|
236
270
|
compressor: compressor,
|
@@ -478,7 +512,7 @@ module Kafka
|
|
478
512
|
|
479
513
|
batches.each do |batch|
|
480
514
|
batch.messages.each(&block)
|
481
|
-
offsets[batch.partition] = batch.last_offset + 1 unless batch.
|
515
|
+
offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
|
482
516
|
end
|
483
517
|
end
|
484
518
|
end
|
data/lib/kafka/cluster.rb
CHANGED
@@ -111,48 +111,32 @@ module Kafka
|
|
111
111
|
connect_to_broker(get_leader_id(topic, partition))
|
112
112
|
end
|
113
113
|
|
114
|
+
# Finds the broker acting as the coordinator of the given group.
|
115
|
+
#
|
116
|
+
# @param group_id: [String]
|
117
|
+
# @return [Broker] the broker that's currently coordinator.
|
114
118
|
def get_group_coordinator(group_id:)
|
115
119
|
@logger.debug "Getting group coordinator for `#{group_id}`"
|
116
|
-
|
117
120
|
refresh_metadata_if_necessary!
|
121
|
+
get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_GROUP, group_id)
|
122
|
+
end
|
118
123
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
)
|
126
|
-
|
127
|
-
Protocol.handle_error(response.error_code, response.error_message)
|
128
|
-
|
129
|
-
coordinator_id = response.coordinator_id
|
130
|
-
|
131
|
-
@logger.debug "Coordinator for group `#{group_id}` is #{coordinator_id}. Connecting..."
|
132
|
-
|
133
|
-
# It's possible that a new broker is introduced to the cluster and
|
134
|
-
# becomes the coordinator before we have a chance to refresh_metadata.
|
135
|
-
coordinator = begin
|
136
|
-
connect_to_broker(coordinator_id)
|
137
|
-
rescue Kafka::NoSuchBroker
|
138
|
-
@logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
|
139
|
-
refresh_metadata!
|
140
|
-
connect_to_broker(coordinator_id)
|
141
|
-
end
|
124
|
+
# Finds the broker acting as the coordinator of the given transaction.
|
125
|
+
#
|
126
|
+
# @param transactional_id: [String]
|
127
|
+
# @return [Broker] the broker that's currently coordinator.
|
128
|
+
def get_transaction_coordinator(transactional_id:)
|
129
|
+
@logger.debug "Getting transaction coordinator for `#{transactional_id}`"
|
142
130
|
|
143
|
-
|
131
|
+
refresh_metadata_if_necessary!
|
144
132
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
@logger.error "Failed to get group coordinator info from #{broker}: #{e}"
|
152
|
-
end
|
133
|
+
if transactional_id.nil?
|
134
|
+
# Get a random_broker
|
135
|
+
@logger.debug "Transaction ID is not available. Choose a random broker."
|
136
|
+
return random_broker
|
137
|
+
else
|
138
|
+
get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_TRANSACTION, transactional_id)
|
153
139
|
end
|
154
|
-
|
155
|
-
raise Kafka::Error, "Failed to find group coordinator"
|
156
140
|
end
|
157
141
|
|
158
142
|
def partitions_for(topic)
|
@@ -313,8 +297,7 @@ module Kafka
|
|
313
297
|
topic => broker_partitions.map {|partition|
|
314
298
|
{
|
315
299
|
partition: partition,
|
316
|
-
time: offset
|
317
|
-
max_offsets: 1,
|
300
|
+
time: offset
|
318
301
|
}
|
319
302
|
}
|
320
303
|
}
|
@@ -427,5 +410,45 @@ module Kafka
|
|
427
410
|
def controller_broker
|
428
411
|
connect_to_broker(cluster_info.controller_id)
|
429
412
|
end
|
413
|
+
|
414
|
+
def get_coordinator(coordinator_type, coordinator_key)
|
415
|
+
cluster_info.brokers.each do |broker_info|
|
416
|
+
begin
|
417
|
+
broker = connect_to_broker(broker_info.node_id)
|
418
|
+
response = broker.find_coordinator(
|
419
|
+
coordinator_type: coordinator_type,
|
420
|
+
coordinator_key: coordinator_key
|
421
|
+
)
|
422
|
+
|
423
|
+
Protocol.handle_error(response.error_code, response.error_message)
|
424
|
+
|
425
|
+
coordinator_id = response.coordinator_id
|
426
|
+
|
427
|
+
@logger.debug "Coordinator for `#{coordinator_key}` is #{coordinator_id}. Connecting..."
|
428
|
+
|
429
|
+
# It's possible that a new broker is introduced to the cluster and
|
430
|
+
# becomes the coordinator before we have a chance to refresh_metadata.
|
431
|
+
coordinator = begin
|
432
|
+
connect_to_broker(coordinator_id)
|
433
|
+
rescue Kafka::NoSuchBroker
|
434
|
+
@logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
|
435
|
+
refresh_metadata!
|
436
|
+
connect_to_broker(coordinator_id)
|
437
|
+
end
|
438
|
+
|
439
|
+
@logger.debug "Connected to coordinator: #{coordinator} for `#{coordinator_key}`"
|
440
|
+
|
441
|
+
return coordinator
|
442
|
+
rescue CoordinatorNotAvailable
|
443
|
+
@logger.debug "Coordinator not available; retrying in 1s"
|
444
|
+
sleep 1
|
445
|
+
retry
|
446
|
+
rescue ConnectionError => e
|
447
|
+
@logger.error "Failed to get coordinator info from #{broker}: #{e}"
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
raise Kafka::Error, "Failed to find coordinator"
|
452
|
+
end
|
430
453
|
end
|
431
454
|
end
|
data/lib/kafka/consumer.rb
CHANGED
@@ -310,9 +310,9 @@ module Kafka
|
|
310
310
|
@instrumenter.instrument("process_batch.consumer", notification) do
|
311
311
|
begin
|
312
312
|
yield batch
|
313
|
-
@current_offsets[batch.topic][batch.partition] = batch.last_offset
|
313
|
+
@current_offsets[batch.topic][batch.partition] = batch.last_offset unless batch.unknown_last_offset?
|
314
314
|
rescue => e
|
315
|
-
offset_range = (batch.first_offset..batch.last_offset)
|
315
|
+
offset_range = (batch.first_offset..batch.last_offset || batch.highwater_mark_offset)
|
316
316
|
location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
|
317
317
|
backtrace = e.backtrace.join("\n")
|
318
318
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "kafka/
|
3
|
+
require "kafka/fetched_offset_resolver"
|
4
|
+
require "kafka/fetched_batch_generator"
|
4
5
|
|
5
6
|
module Kafka
|
6
7
|
|
@@ -27,6 +28,10 @@ module Kafka
|
|
27
28
|
@max_bytes = max_bytes
|
28
29
|
@max_wait_time = max_wait_time
|
29
30
|
@topics = {}
|
31
|
+
|
32
|
+
@offset_resolver = Kafka::FetchedOffsetResolver.new(
|
33
|
+
logger: logger
|
34
|
+
)
|
30
35
|
end
|
31
36
|
|
32
37
|
def fetch_from_partition(topic, partition, offset: :latest, max_bytes: 1048576)
|
@@ -63,8 +68,8 @@ module Kafka
|
|
63
68
|
end
|
64
69
|
end
|
65
70
|
|
66
|
-
topics_by_broker.flat_map
|
67
|
-
|
71
|
+
topics_by_broker.flat_map do |broker, topics|
|
72
|
+
@offset_resolver.resolve!(broker, topics)
|
68
73
|
|
69
74
|
options = {
|
70
75
|
max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
|
@@ -75,8 +80,8 @@ module Kafka
|
|
75
80
|
|
76
81
|
response = broker.fetch_messages(**options)
|
77
82
|
|
78
|
-
response.topics.flat_map
|
79
|
-
fetched_topic.partitions.map
|
83
|
+
response.topics.flat_map do |fetched_topic|
|
84
|
+
fetched_topic.partitions.map do |fetched_partition|
|
80
85
|
begin
|
81
86
|
Protocol.handle_error(fetched_partition.error_code)
|
82
87
|
rescue Kafka::OffsetOutOfRange => e
|
@@ -92,64 +97,18 @@ module Kafka
|
|
92
97
|
raise e
|
93
98
|
end
|
94
99
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
FetchedBatch.new(
|
104
|
-
topic: fetched_topic.name,
|
105
|
-
partition: fetched_partition.partition,
|
106
|
-
highwater_mark_offset: fetched_partition.highwater_mark_offset,
|
107
|
-
messages: messages,
|
108
|
-
)
|
109
|
-
}
|
110
|
-
}
|
111
|
-
}
|
100
|
+
Kafka::FetchedBatchGenerator.new(
|
101
|
+
fetched_topic.name,
|
102
|
+
fetched_partition,
|
103
|
+
logger: @logger
|
104
|
+
).generate
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
112
108
|
rescue Kafka::ConnectionError, Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
|
113
109
|
@cluster.mark_as_stale!
|
114
110
|
|
115
111
|
raise
|
116
112
|
end
|
117
|
-
|
118
|
-
private
|
119
|
-
|
120
|
-
def resolve_offsets(broker, topics)
|
121
|
-
pending_topics = {}
|
122
|
-
|
123
|
-
topics.each do |topic, partitions|
|
124
|
-
partitions.each do |partition, options|
|
125
|
-
offset = options.fetch(:fetch_offset)
|
126
|
-
next if offset >= 0
|
127
|
-
|
128
|
-
@logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
|
129
|
-
|
130
|
-
pending_topics[topic] ||= []
|
131
|
-
pending_topics[topic] << {
|
132
|
-
partition: partition,
|
133
|
-
time: offset,
|
134
|
-
max_offsets: 1,
|
135
|
-
}
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
return topics if pending_topics.empty?
|
140
|
-
|
141
|
-
response = broker.list_offsets(topics: pending_topics)
|
142
|
-
|
143
|
-
pending_topics.each do |topic, partitions|
|
144
|
-
partitions.each do |options|
|
145
|
-
partition = options.fetch(:partition)
|
146
|
-
resolved_offset = response.offset_for(topic, partition)
|
147
|
-
|
148
|
-
@logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
|
149
|
-
|
150
|
-
topics[topic][partition][:fetch_offset] = resolved_offset || 0
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
154
113
|
end
|
155
114
|
end
|