ruby-kafka 0.7.0 → 0.7.1.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +3 -3
- data/.gitignore +1 -0
- data/CHANGELOG.md +4 -0
- data/lib/kafka.rb +32 -0
- data/lib/kafka/broker.rb +18 -0
- data/lib/kafka/client.rb +38 -4
- data/lib/kafka/cluster.rb +60 -37
- data/lib/kafka/consumer.rb +2 -2
- data/lib/kafka/fetch_operation.rb +18 -59
- data/lib/kafka/fetched_batch.rb +9 -9
- data/lib/kafka/fetched_batch_generator.rb +114 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +2 -2
- data/lib/kafka/produce_operation.rb +52 -14
- data/lib/kafka/producer.rb +82 -2
- data/lib/kafka/protocol.rb +68 -48
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/decoder.rb +3 -6
- data/lib/kafka/protocol/encoder.rb +6 -11
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +3 -1
- data/lib/kafka/protocol/fetch_response.rb +37 -18
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/list_offset_request.rb +8 -2
- data/lib/kafka/protocol/list_offset_response.rb +11 -6
- data/lib/kafka/protocol/record.rb +9 -0
- data/lib/kafka/protocol/record_batch.rb +17 -1
- data/lib/kafka/ssl_context.rb +19 -5
- data/lib/kafka/transaction_manager.rb +261 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +1 -1
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c6a5d1dd1969107f9107675cad6848630f8cf13ce4ce223b0fa7c84bc4facf03
|
4
|
+
data.tar.gz: 2797042cbef4be8b219d2103dc5f63cbe36bb5c68aae738178ed02b197579254
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3dbfbb842fbc7b370d197a6f8020d4c88814c64383fe9cacd8e4338e4cafbdf222573476ba6bcf4d4ab05263b30ab0b63de2b7bc861d06730a303aec0a8d7a9
|
7
|
+
data.tar.gz: 12aba0aeacb8ceb731591f037ef075b3218d6c44710c5852fdfa1e5c769649bea95b4d609a9920ba48ce80b207a82628c488759d4c7cb48317ce71bbee36bfd0
|
data/.circleci/config.yml
CHANGED
@@ -41,7 +41,7 @@ jobs:
|
|
41
41
|
- run: bundle install --path vendor/bundle
|
42
42
|
- run: bundle exec rspec --profile --tag functional spec/functional
|
43
43
|
|
44
|
-
kafka-1.0:
|
44
|
+
kafka-1.0.0:
|
45
45
|
docker:
|
46
46
|
- image: circleci/ruby:2.5.1-node
|
47
47
|
environment:
|
@@ -111,5 +111,5 @@ workflows:
|
|
111
111
|
jobs:
|
112
112
|
- unit
|
113
113
|
- kafka-0.11
|
114
|
-
- kafka-1.0
|
115
|
-
- kafka-1.1
|
114
|
+
- kafka-1.0.0
|
115
|
+
- kafka-1.1
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/lib/kafka.rb
CHANGED
@@ -245,6 +245,38 @@ module Kafka
|
|
245
245
|
class FailedScramAuthentication < SaslScramError
|
246
246
|
end
|
247
247
|
|
248
|
+
# The broker received an out of order sequence number
|
249
|
+
class OutOfOrderSequenceNumberError < Error
|
250
|
+
end
|
251
|
+
|
252
|
+
# The broker received a duplicate sequence number
|
253
|
+
class DuplicateSequenceNumberError < Error
|
254
|
+
end
|
255
|
+
|
256
|
+
# Producer attempted an operation with an old epoch. Either there is a newer producer with the same transactionalId, or the producer's transaction has been expired by the broker.
|
257
|
+
class InvalidProducerEpochError < Error
|
258
|
+
end
|
259
|
+
|
260
|
+
# The producer attempted a transactional operation in an invalid state
|
261
|
+
class InvalidTxnStateError < Error
|
262
|
+
end
|
263
|
+
|
264
|
+
# The producer attempted to use a producer id which is not currently assigned to its transactional id
|
265
|
+
class InvalidProducerIDMappingError < Error
|
266
|
+
end
|
267
|
+
|
268
|
+
# The transaction timeout is larger than the maximum value allowed by the broker (as configured by transaction.max.timeout.ms).
|
269
|
+
class InvalidTransactionTimeoutError < Error
|
270
|
+
end
|
271
|
+
|
272
|
+
# The producer attempted to update a transaction while another concurrent operation on the same transaction was ongoing
|
273
|
+
class ConcurrentTransactionError < Error
|
274
|
+
end
|
275
|
+
|
276
|
+
# Indicates that the transaction coordinator sending a WriteTxnMarker is no longer the current coordinator for a given producer
|
277
|
+
class TransactionCoordinatorFencedError < Error
|
278
|
+
end
|
279
|
+
|
248
280
|
# Initializes a new Kafka client.
|
249
281
|
#
|
250
282
|
# @see Client#initialize
|
data/lib/kafka/broker.rb
CHANGED
@@ -164,6 +164,24 @@ module Kafka
|
|
164
164
|
send_request(request)
|
165
165
|
end
|
166
166
|
|
167
|
+
def init_producer_id(**options)
|
168
|
+
request = Protocol::InitProducerIDRequest.new(**options)
|
169
|
+
|
170
|
+
send_request(request)
|
171
|
+
end
|
172
|
+
|
173
|
+
def add_partitions_to_txn(**options)
|
174
|
+
request = Protocol::AddPartitionsToTxnRequest.new(**options)
|
175
|
+
|
176
|
+
send_request(request)
|
177
|
+
end
|
178
|
+
|
179
|
+
def end_txn(**options)
|
180
|
+
request = Protocol::EndTxnRequest.new(**options)
|
181
|
+
|
182
|
+
send_request(request)
|
183
|
+
end
|
184
|
+
|
167
185
|
private
|
168
186
|
|
169
187
|
def send_request(request)
|
data/lib/kafka/client.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "kafka/ssl_context"
|
4
4
|
require "kafka/cluster"
|
5
|
+
require "kafka/transaction_manager"
|
5
6
|
require "kafka/broker_info"
|
6
7
|
require "kafka/producer"
|
7
8
|
require "kafka/consumer"
|
@@ -60,7 +61,7 @@ module Kafka
|
|
60
61
|
# @return [Client]
|
61
62
|
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
|
62
63
|
ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
|
63
|
-
sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
|
64
|
+
ssl_client_cert_chain: nil, sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
|
64
65
|
sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
|
65
66
|
sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
|
66
67
|
sasl_over_ssl: true, ssl_ca_certs_from_system: false)
|
@@ -73,6 +74,7 @@ module Kafka
|
|
73
74
|
ca_cert: ssl_ca_cert,
|
74
75
|
client_cert: ssl_client_cert,
|
75
76
|
client_cert_key: ssl_client_cert_key,
|
77
|
+
client_cert_chain: ssl_client_cert_chain,
|
76
78
|
ca_certs_from_system: ssl_ca_certs_from_system,
|
77
79
|
)
|
78
80
|
|
@@ -158,8 +160,16 @@ module Kafka
|
|
158
160
|
instrumenter: @instrumenter,
|
159
161
|
)
|
160
162
|
|
163
|
+
transaction_manager = TransactionManager.new(
|
164
|
+
cluster: @cluster,
|
165
|
+
logger: @logger,
|
166
|
+
idempotent: false,
|
167
|
+
transactional: false
|
168
|
+
)
|
169
|
+
|
161
170
|
operation = ProduceOperation.new(
|
162
171
|
cluster: @cluster,
|
172
|
+
transaction_manager: transaction_manager,
|
163
173
|
buffer: buffer,
|
164
174
|
required_acks: 1,
|
165
175
|
ack_timeout: 10,
|
@@ -222,15 +232,39 @@ module Kafka
|
|
222
232
|
# are per-partition rather than per-topic or per-producer.
|
223
233
|
#
|
224
234
|
# @return [Kafka::Producer] the Kafka producer.
|
225
|
-
def producer(
|
235
|
+
def producer(
|
236
|
+
compression_codec: nil,
|
237
|
+
compression_threshold: 1,
|
238
|
+
ack_timeout: 5,
|
239
|
+
required_acks: :all,
|
240
|
+
max_retries: 2,
|
241
|
+
retry_backoff: 1,
|
242
|
+
max_buffer_size: 1000,
|
243
|
+
max_buffer_bytesize: 10_000_000,
|
244
|
+
idempotent: false,
|
245
|
+
transactional: false,
|
246
|
+
transactional_id: nil,
|
247
|
+
transactional_timeout: 60
|
248
|
+
)
|
249
|
+
cluster = initialize_cluster
|
226
250
|
compressor = Compressor.new(
|
227
251
|
codec_name: compression_codec,
|
228
252
|
threshold: compression_threshold,
|
229
253
|
instrumenter: @instrumenter,
|
230
254
|
)
|
231
255
|
|
256
|
+
transaction_manager = TransactionManager.new(
|
257
|
+
cluster: cluster,
|
258
|
+
logger: @logger,
|
259
|
+
idempotent: idempotent,
|
260
|
+
transactional: transactional,
|
261
|
+
transactional_id: transactional_id,
|
262
|
+
transactional_timeout: transactional_timeout,
|
263
|
+
)
|
264
|
+
|
232
265
|
Producer.new(
|
233
|
-
cluster:
|
266
|
+
cluster: cluster,
|
267
|
+
transaction_manager: transaction_manager,
|
234
268
|
logger: @logger,
|
235
269
|
instrumenter: @instrumenter,
|
236
270
|
compressor: compressor,
|
@@ -478,7 +512,7 @@ module Kafka
|
|
478
512
|
|
479
513
|
batches.each do |batch|
|
480
514
|
batch.messages.each(&block)
|
481
|
-
offsets[batch.partition] = batch.last_offset + 1 unless batch.
|
515
|
+
offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
|
482
516
|
end
|
483
517
|
end
|
484
518
|
end
|
data/lib/kafka/cluster.rb
CHANGED
@@ -111,48 +111,32 @@ module Kafka
|
|
111
111
|
connect_to_broker(get_leader_id(topic, partition))
|
112
112
|
end
|
113
113
|
|
114
|
+
# Finds the broker acting as the coordinator of the given group.
|
115
|
+
#
|
116
|
+
# @param group_id: [String]
|
117
|
+
# @return [Broker] the broker that's currently coordinator.
|
114
118
|
def get_group_coordinator(group_id:)
|
115
119
|
@logger.debug "Getting group coordinator for `#{group_id}`"
|
116
|
-
|
117
120
|
refresh_metadata_if_necessary!
|
121
|
+
get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_GROUP, group_id)
|
122
|
+
end
|
118
123
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
)
|
126
|
-
|
127
|
-
Protocol.handle_error(response.error_code, response.error_message)
|
128
|
-
|
129
|
-
coordinator_id = response.coordinator_id
|
130
|
-
|
131
|
-
@logger.debug "Coordinator for group `#{group_id}` is #{coordinator_id}. Connecting..."
|
132
|
-
|
133
|
-
# It's possible that a new broker is introduced to the cluster and
|
134
|
-
# becomes the coordinator before we have a chance to refresh_metadata.
|
135
|
-
coordinator = begin
|
136
|
-
connect_to_broker(coordinator_id)
|
137
|
-
rescue Kafka::NoSuchBroker
|
138
|
-
@logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
|
139
|
-
refresh_metadata!
|
140
|
-
connect_to_broker(coordinator_id)
|
141
|
-
end
|
124
|
+
# Finds the broker acting as the coordinator of the given transaction.
|
125
|
+
#
|
126
|
+
# @param transactional_id: [String]
|
127
|
+
# @return [Broker] the broker that's currently coordinator.
|
128
|
+
def get_transaction_coordinator(transactional_id:)
|
129
|
+
@logger.debug "Getting transaction coordinator for `#{transactional_id}`"
|
142
130
|
|
143
|
-
|
131
|
+
refresh_metadata_if_necessary!
|
144
132
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
@logger.error "Failed to get group coordinator info from #{broker}: #{e}"
|
152
|
-
end
|
133
|
+
if transactional_id.nil?
|
134
|
+
# Get a random_broker
|
135
|
+
@logger.debug "Transaction ID is not available. Choose a random broker."
|
136
|
+
return random_broker
|
137
|
+
else
|
138
|
+
get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_TRANSACTION, transactional_id)
|
153
139
|
end
|
154
|
-
|
155
|
-
raise Kafka::Error, "Failed to find group coordinator"
|
156
140
|
end
|
157
141
|
|
158
142
|
def partitions_for(topic)
|
@@ -313,8 +297,7 @@ module Kafka
|
|
313
297
|
topic => broker_partitions.map {|partition|
|
314
298
|
{
|
315
299
|
partition: partition,
|
316
|
-
time: offset
|
317
|
-
max_offsets: 1,
|
300
|
+
time: offset
|
318
301
|
}
|
319
302
|
}
|
320
303
|
}
|
@@ -427,5 +410,45 @@ module Kafka
|
|
427
410
|
def controller_broker
|
428
411
|
connect_to_broker(cluster_info.controller_id)
|
429
412
|
end
|
413
|
+
|
414
|
+
def get_coordinator(coordinator_type, coordinator_key)
|
415
|
+
cluster_info.brokers.each do |broker_info|
|
416
|
+
begin
|
417
|
+
broker = connect_to_broker(broker_info.node_id)
|
418
|
+
response = broker.find_coordinator(
|
419
|
+
coordinator_type: coordinator_type,
|
420
|
+
coordinator_key: coordinator_key
|
421
|
+
)
|
422
|
+
|
423
|
+
Protocol.handle_error(response.error_code, response.error_message)
|
424
|
+
|
425
|
+
coordinator_id = response.coordinator_id
|
426
|
+
|
427
|
+
@logger.debug "Coordinator for `#{coordinator_key}` is #{coordinator_id}. Connecting..."
|
428
|
+
|
429
|
+
# It's possible that a new broker is introduced to the cluster and
|
430
|
+
# becomes the coordinator before we have a chance to refresh_metadata.
|
431
|
+
coordinator = begin
|
432
|
+
connect_to_broker(coordinator_id)
|
433
|
+
rescue Kafka::NoSuchBroker
|
434
|
+
@logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
|
435
|
+
refresh_metadata!
|
436
|
+
connect_to_broker(coordinator_id)
|
437
|
+
end
|
438
|
+
|
439
|
+
@logger.debug "Connected to coordinator: #{coordinator} for `#{coordinator_key}`"
|
440
|
+
|
441
|
+
return coordinator
|
442
|
+
rescue CoordinatorNotAvailable
|
443
|
+
@logger.debug "Coordinator not available; retrying in 1s"
|
444
|
+
sleep 1
|
445
|
+
retry
|
446
|
+
rescue ConnectionError => e
|
447
|
+
@logger.error "Failed to get coordinator info from #{broker}: #{e}"
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
raise Kafka::Error, "Failed to find coordinator"
|
452
|
+
end
|
430
453
|
end
|
431
454
|
end
|
data/lib/kafka/consumer.rb
CHANGED
@@ -310,9 +310,9 @@ module Kafka
|
|
310
310
|
@instrumenter.instrument("process_batch.consumer", notification) do
|
311
311
|
begin
|
312
312
|
yield batch
|
313
|
-
@current_offsets[batch.topic][batch.partition] = batch.last_offset
|
313
|
+
@current_offsets[batch.topic][batch.partition] = batch.last_offset unless batch.unknown_last_offset?
|
314
314
|
rescue => e
|
315
|
-
offset_range = (batch.first_offset..batch.last_offset)
|
315
|
+
offset_range = (batch.first_offset..batch.last_offset || batch.highwater_mark_offset)
|
316
316
|
location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
|
317
317
|
backtrace = e.backtrace.join("\n")
|
318
318
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "kafka/
|
3
|
+
require "kafka/fetched_offset_resolver"
|
4
|
+
require "kafka/fetched_batch_generator"
|
4
5
|
|
5
6
|
module Kafka
|
6
7
|
|
@@ -27,6 +28,10 @@ module Kafka
|
|
27
28
|
@max_bytes = max_bytes
|
28
29
|
@max_wait_time = max_wait_time
|
29
30
|
@topics = {}
|
31
|
+
|
32
|
+
@offset_resolver = Kafka::FetchedOffsetResolver.new(
|
33
|
+
logger: logger
|
34
|
+
)
|
30
35
|
end
|
31
36
|
|
32
37
|
def fetch_from_partition(topic, partition, offset: :latest, max_bytes: 1048576)
|
@@ -63,8 +68,8 @@ module Kafka
|
|
63
68
|
end
|
64
69
|
end
|
65
70
|
|
66
|
-
topics_by_broker.flat_map
|
67
|
-
|
71
|
+
topics_by_broker.flat_map do |broker, topics|
|
72
|
+
@offset_resolver.resolve!(broker, topics)
|
68
73
|
|
69
74
|
options = {
|
70
75
|
max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
|
@@ -75,8 +80,8 @@ module Kafka
|
|
75
80
|
|
76
81
|
response = broker.fetch_messages(**options)
|
77
82
|
|
78
|
-
response.topics.flat_map
|
79
|
-
fetched_topic.partitions.map
|
83
|
+
response.topics.flat_map do |fetched_topic|
|
84
|
+
fetched_topic.partitions.map do |fetched_partition|
|
80
85
|
begin
|
81
86
|
Protocol.handle_error(fetched_partition.error_code)
|
82
87
|
rescue Kafka::OffsetOutOfRange => e
|
@@ -92,64 +97,18 @@ module Kafka
|
|
92
97
|
raise e
|
93
98
|
end
|
94
99
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
FetchedBatch.new(
|
104
|
-
topic: fetched_topic.name,
|
105
|
-
partition: fetched_partition.partition,
|
106
|
-
highwater_mark_offset: fetched_partition.highwater_mark_offset,
|
107
|
-
messages: messages,
|
108
|
-
)
|
109
|
-
}
|
110
|
-
}
|
111
|
-
}
|
100
|
+
Kafka::FetchedBatchGenerator.new(
|
101
|
+
fetched_topic.name,
|
102
|
+
fetched_partition,
|
103
|
+
logger: @logger
|
104
|
+
).generate
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
112
108
|
rescue Kafka::ConnectionError, Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
|
113
109
|
@cluster.mark_as_stale!
|
114
110
|
|
115
111
|
raise
|
116
112
|
end
|
117
|
-
|
118
|
-
private
|
119
|
-
|
120
|
-
def resolve_offsets(broker, topics)
|
121
|
-
pending_topics = {}
|
122
|
-
|
123
|
-
topics.each do |topic, partitions|
|
124
|
-
partitions.each do |partition, options|
|
125
|
-
offset = options.fetch(:fetch_offset)
|
126
|
-
next if offset >= 0
|
127
|
-
|
128
|
-
@logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
|
129
|
-
|
130
|
-
pending_topics[topic] ||= []
|
131
|
-
pending_topics[topic] << {
|
132
|
-
partition: partition,
|
133
|
-
time: offset,
|
134
|
-
max_offsets: 1,
|
135
|
-
}
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
return topics if pending_topics.empty?
|
140
|
-
|
141
|
-
response = broker.list_offsets(topics: pending_topics)
|
142
|
-
|
143
|
-
pending_topics.each do |topic, partitions|
|
144
|
-
partitions.each do |options|
|
145
|
-
partition = options.fetch(:partition)
|
146
|
-
resolved_offset = response.offset_for(topic, partition)
|
147
|
-
|
148
|
-
@logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
|
149
|
-
|
150
|
-
topics[topic][partition][:fetch_offset] = resolved_offset || 0
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
154
113
|
end
|
155
114
|
end
|