ruby-kafka 0.7.0 → 0.7.1.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +3 -3
  3. data/.gitignore +1 -0
  4. data/CHANGELOG.md +4 -0
  5. data/lib/kafka.rb +32 -0
  6. data/lib/kafka/broker.rb +18 -0
  7. data/lib/kafka/client.rb +38 -4
  8. data/lib/kafka/cluster.rb +60 -37
  9. data/lib/kafka/consumer.rb +2 -2
  10. data/lib/kafka/fetch_operation.rb +18 -59
  11. data/lib/kafka/fetched_batch.rb +9 -9
  12. data/lib/kafka/fetched_batch_generator.rb +114 -0
  13. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  14. data/lib/kafka/fetcher.rb +2 -2
  15. data/lib/kafka/produce_operation.rb +52 -14
  16. data/lib/kafka/producer.rb +82 -2
  17. data/lib/kafka/protocol.rb +68 -48
  18. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  19. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  20. data/lib/kafka/protocol/decoder.rb +3 -6
  21. data/lib/kafka/protocol/encoder.rb +6 -11
  22. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  23. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  24. data/lib/kafka/protocol/fetch_request.rb +3 -1
  25. data/lib/kafka/protocol/fetch_response.rb +37 -18
  26. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  27. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  28. data/lib/kafka/protocol/list_offset_request.rb +8 -2
  29. data/lib/kafka/protocol/list_offset_response.rb +11 -6
  30. data/lib/kafka/protocol/record.rb +9 -0
  31. data/lib/kafka/protocol/record_batch.rb +17 -1
  32. data/lib/kafka/ssl_context.rb +19 -5
  33. data/lib/kafka/transaction_manager.rb +261 -0
  34. data/lib/kafka/transaction_state_machine.rb +72 -0
  35. data/lib/kafka/version.rb +1 -1
  36. data/ruby-kafka.gemspec +1 -1
  37. metadata +20 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1d27b2212172f3f44a3402ad32c7b30413d8e3b0eef28e9d46161c977740fb27
4
- data.tar.gz: 58345eea3931d45849a46913d03e29887e2257bffafbb9a1ca531b700ed6960f
3
+ metadata.gz: c6a5d1dd1969107f9107675cad6848630f8cf13ce4ce223b0fa7c84bc4facf03
4
+ data.tar.gz: 2797042cbef4be8b219d2103dc5f63cbe36bb5c68aae738178ed02b197579254
5
5
  SHA512:
6
- metadata.gz: 1b64e322958a28c917efcd9a66f1bf3f2a1b17403830b915467670a0cedad9a810eed2d472c5359c8af963c4d890f0ca8d20175831cb628a9406548b23834607
7
- data.tar.gz: 5dfb9df17ca7cc57b6435f03cf690e39517132ac3170c78e090781af20f72b91893785b03f864c9e460e2f9f1fca93149bd916975351c38bbd32606a6ce1b323
6
+ metadata.gz: d3dbfbb842fbc7b370d197a6f8020d4c88814c64383fe9cacd8e4338e4cafbdf222573476ba6bcf4d4ab05263b30ab0b63de2b7bc861d06730a303aec0a8d7a9
7
+ data.tar.gz: 12aba0aeacb8ceb731591f037ef075b3218d6c44710c5852fdfa1e5c769649bea95b4d609a9920ba48ce80b207a82628c488759d4c7cb48317ce71bbee36bfd0
data/.circleci/config.yml CHANGED
@@ -41,7 +41,7 @@ jobs:
41
41
  - run: bundle install --path vendor/bundle
42
42
  - run: bundle exec rspec --profile --tag functional spec/functional
43
43
 
44
- kafka-1.0:
44
+ kafka-1.0.0:
45
45
  docker:
46
46
  - image: circleci/ruby:2.5.1-node
47
47
  environment:
@@ -111,5 +111,5 @@ workflows:
111
111
  jobs:
112
112
  - unit
113
113
  - kafka-0.11
114
- - kafka-1.0
115
- - kafka-1.1
114
+ - kafka-1.0.0
115
+ - kafka-1.1
data/.gitignore CHANGED
@@ -10,3 +10,4 @@
10
10
  .env
11
11
  *.log
12
12
  *.swp
13
+ .byebug_history
data/CHANGELOG.md CHANGED
@@ -4,6 +4,10 @@ Changes and additions to the library will be listed here.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## 0.7.1
8
+
9
+ - Exactly Once Delivery and Transactional Messaging Support (#608).
10
+
7
11
  ## 0.7.0
8
12
 
9
13
  - Drop support for Kafka 0.10 in favor of native support for Kafka 0.11.
data/lib/kafka.rb CHANGED
@@ -245,6 +245,38 @@ module Kafka
245
245
  class FailedScramAuthentication < SaslScramError
246
246
  end
247
247
 
248
+ # The broker received an out of order sequence number
249
+ class OutOfOrderSequenceNumberError < Error
250
+ end
251
+
252
+ # The broker received a duplicate sequence number
253
+ class DuplicateSequenceNumberError < Error
254
+ end
255
+
256
+ # Producer attempted an operation with an old epoch. Either there is a newer producer with the same transactionalId, or the producer's transaction has been expired by the broker.
257
+ class InvalidProducerEpochError < Error
258
+ end
259
+
260
+ # The producer attempted a transactional operation in an invalid state
261
+ class InvalidTxnStateError < Error
262
+ end
263
+
264
+ # The producer attempted to use a producer id which is not currently assigned to its transactional id
265
+ class InvalidProducerIDMappingError < Error
266
+ end
267
+
268
+ # The transaction timeout is larger than the maximum value allowed by the broker (as configured by transaction.max.timeout.ms).
269
+ class InvalidTransactionTimeoutError < Error
270
+ end
271
+
272
+ # The producer attempted to update a transaction while another concurrent operation on the same transaction was ongoing
273
+ class ConcurrentTransactionError < Error
274
+ end
275
+
276
+ # Indicates that the transaction coordinator sending a WriteTxnMarker is no longer the current coordinator for a given producer
277
+ class TransactionCoordinatorFencedError < Error
278
+ end
279
+
248
280
  # Initializes a new Kafka client.
249
281
  #
250
282
  # @see Client#initialize
data/lib/kafka/broker.rb CHANGED
@@ -164,6 +164,24 @@ module Kafka
164
164
  send_request(request)
165
165
  end
166
166
 
167
+ def init_producer_id(**options)
168
+ request = Protocol::InitProducerIDRequest.new(**options)
169
+
170
+ send_request(request)
171
+ end
172
+
173
+ def add_partitions_to_txn(**options)
174
+ request = Protocol::AddPartitionsToTxnRequest.new(**options)
175
+
176
+ send_request(request)
177
+ end
178
+
179
+ def end_txn(**options)
180
+ request = Protocol::EndTxnRequest.new(**options)
181
+
182
+ send_request(request)
183
+ end
184
+
167
185
  private
168
186
 
169
187
  def send_request(request)
data/lib/kafka/client.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "kafka/ssl_context"
4
4
  require "kafka/cluster"
5
+ require "kafka/transaction_manager"
5
6
  require "kafka/broker_info"
6
7
  require "kafka/producer"
7
8
  require "kafka/consumer"
@@ -60,7 +61,7 @@ module Kafka
60
61
  # @return [Client]
61
62
  def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
62
63
  ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
63
- sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
64
+ ssl_client_cert_chain: nil, sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
64
65
  sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
65
66
  sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
66
67
  sasl_over_ssl: true, ssl_ca_certs_from_system: false)
@@ -73,6 +74,7 @@ module Kafka
73
74
  ca_cert: ssl_ca_cert,
74
75
  client_cert: ssl_client_cert,
75
76
  client_cert_key: ssl_client_cert_key,
77
+ client_cert_chain: ssl_client_cert_chain,
76
78
  ca_certs_from_system: ssl_ca_certs_from_system,
77
79
  )
78
80
 
@@ -158,8 +160,16 @@ module Kafka
158
160
  instrumenter: @instrumenter,
159
161
  )
160
162
 
163
+ transaction_manager = TransactionManager.new(
164
+ cluster: @cluster,
165
+ logger: @logger,
166
+ idempotent: false,
167
+ transactional: false
168
+ )
169
+
161
170
  operation = ProduceOperation.new(
162
171
  cluster: @cluster,
172
+ transaction_manager: transaction_manager,
163
173
  buffer: buffer,
164
174
  required_acks: 1,
165
175
  ack_timeout: 10,
@@ -222,15 +232,39 @@ module Kafka
222
232
  # are per-partition rather than per-topic or per-producer.
223
233
  #
224
234
  # @return [Kafka::Producer] the Kafka producer.
225
- def producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
235
+ def producer(
236
+ compression_codec: nil,
237
+ compression_threshold: 1,
238
+ ack_timeout: 5,
239
+ required_acks: :all,
240
+ max_retries: 2,
241
+ retry_backoff: 1,
242
+ max_buffer_size: 1000,
243
+ max_buffer_bytesize: 10_000_000,
244
+ idempotent: false,
245
+ transactional: false,
246
+ transactional_id: nil,
247
+ transactional_timeout: 60
248
+ )
249
+ cluster = initialize_cluster
226
250
  compressor = Compressor.new(
227
251
  codec_name: compression_codec,
228
252
  threshold: compression_threshold,
229
253
  instrumenter: @instrumenter,
230
254
  )
231
255
 
256
+ transaction_manager = TransactionManager.new(
257
+ cluster: cluster,
258
+ logger: @logger,
259
+ idempotent: idempotent,
260
+ transactional: transactional,
261
+ transactional_id: transactional_id,
262
+ transactional_timeout: transactional_timeout,
263
+ )
264
+
232
265
  Producer.new(
233
- cluster: initialize_cluster,
266
+ cluster: cluster,
267
+ transaction_manager: transaction_manager,
234
268
  logger: @logger,
235
269
  instrumenter: @instrumenter,
236
270
  compressor: compressor,
@@ -478,7 +512,7 @@ module Kafka
478
512
 
479
513
  batches.each do |batch|
480
514
  batch.messages.each(&block)
481
- offsets[batch.partition] = batch.last_offset + 1 unless batch.empty?
515
+ offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
482
516
  end
483
517
  end
484
518
  end
data/lib/kafka/cluster.rb CHANGED
@@ -111,48 +111,32 @@ module Kafka
111
111
  connect_to_broker(get_leader_id(topic, partition))
112
112
  end
113
113
 
114
+ # Finds the broker acting as the coordinator of the given group.
115
+ #
116
+ # @param group_id: [String]
117
+ # @return [Broker] the broker that's currently coordinator.
114
118
  def get_group_coordinator(group_id:)
115
119
  @logger.debug "Getting group coordinator for `#{group_id}`"
116
-
117
120
  refresh_metadata_if_necessary!
121
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_GROUP, group_id)
122
+ end
118
123
 
119
- cluster_info.brokers.each do |broker_info|
120
- begin
121
- broker = connect_to_broker(broker_info.node_id)
122
- response = broker.find_coordinator(
123
- coordinator_type: Kafka::Protocol::COORDINATOR_TYPE_GROUP,
124
- coordinator_key: group_id
125
- )
126
-
127
- Protocol.handle_error(response.error_code, response.error_message)
128
-
129
- coordinator_id = response.coordinator_id
130
-
131
- @logger.debug "Coordinator for group `#{group_id}` is #{coordinator_id}. Connecting..."
132
-
133
- # It's possible that a new broker is introduced to the cluster and
134
- # becomes the coordinator before we have a chance to refresh_metadata.
135
- coordinator = begin
136
- connect_to_broker(coordinator_id)
137
- rescue Kafka::NoSuchBroker
138
- @logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
139
- refresh_metadata!
140
- connect_to_broker(coordinator_id)
141
- end
124
+ # Finds the broker acting as the coordinator of the given transaction.
125
+ #
126
+ # @param transactional_id: [String]
127
+ # @return [Broker] the broker that's currently coordinator.
128
+ def get_transaction_coordinator(transactional_id:)
129
+ @logger.debug "Getting transaction coordinator for `#{transactional_id}`"
142
130
 
143
- @logger.debug "Connected to coordinator: #{coordinator} for group `#{group_id}`"
131
+ refresh_metadata_if_necessary!
144
132
 
145
- return coordinator
146
- rescue CoordinatorNotAvailable
147
- @logger.debug "Coordinator not available; retrying in 1s"
148
- sleep 1
149
- retry
150
- rescue ConnectionError => e
151
- @logger.error "Failed to get group coordinator info from #{broker}: #{e}"
152
- end
133
+ if transactional_id.nil?
134
+ # Get a random_broker
135
+ @logger.debug "Transaction ID is not available. Choose a random broker."
136
+ return random_broker
137
+ else
138
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_TRANSACTION, transactional_id)
153
139
  end
154
-
155
- raise Kafka::Error, "Failed to find group coordinator"
156
140
  end
157
141
 
158
142
  def partitions_for(topic)
@@ -313,8 +297,7 @@ module Kafka
313
297
  topic => broker_partitions.map {|partition|
314
298
  {
315
299
  partition: partition,
316
- time: offset,
317
- max_offsets: 1,
300
+ time: offset
318
301
  }
319
302
  }
320
303
  }
@@ -427,5 +410,45 @@ module Kafka
427
410
  def controller_broker
428
411
  connect_to_broker(cluster_info.controller_id)
429
412
  end
413
+
414
+ def get_coordinator(coordinator_type, coordinator_key)
415
+ cluster_info.brokers.each do |broker_info|
416
+ begin
417
+ broker = connect_to_broker(broker_info.node_id)
418
+ response = broker.find_coordinator(
419
+ coordinator_type: coordinator_type,
420
+ coordinator_key: coordinator_key
421
+ )
422
+
423
+ Protocol.handle_error(response.error_code, response.error_message)
424
+
425
+ coordinator_id = response.coordinator_id
426
+
427
+ @logger.debug "Coordinator for `#{coordinator_key}` is #{coordinator_id}. Connecting..."
428
+
429
+ # It's possible that a new broker is introduced to the cluster and
430
+ # becomes the coordinator before we have a chance to refresh_metadata.
431
+ coordinator = begin
432
+ connect_to_broker(coordinator_id)
433
+ rescue Kafka::NoSuchBroker
434
+ @logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
435
+ refresh_metadata!
436
+ connect_to_broker(coordinator_id)
437
+ end
438
+
439
+ @logger.debug "Connected to coordinator: #{coordinator} for `#{coordinator_key}`"
440
+
441
+ return coordinator
442
+ rescue CoordinatorNotAvailable
443
+ @logger.debug "Coordinator not available; retrying in 1s"
444
+ sleep 1
445
+ retry
446
+ rescue ConnectionError => e
447
+ @logger.error "Failed to get coordinator info from #{broker}: #{e}"
448
+ end
449
+ end
450
+
451
+ raise Kafka::Error, "Failed to find coordinator"
452
+ end
430
453
  end
431
454
  end
@@ -310,9 +310,9 @@ module Kafka
310
310
  @instrumenter.instrument("process_batch.consumer", notification) do
311
311
  begin
312
312
  yield batch
313
- @current_offsets[batch.topic][batch.partition] = batch.last_offset
313
+ @current_offsets[batch.topic][batch.partition] = batch.last_offset unless batch.unknown_last_offset?
314
314
  rescue => e
315
- offset_range = (batch.first_offset..batch.last_offset)
315
+ offset_range = (batch.first_offset..batch.last_offset || batch.highwater_mark_offset)
316
316
  location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
317
317
  backtrace = e.backtrace.join("\n")
318
318
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "kafka/fetched_batch"
3
+ require "kafka/fetched_offset_resolver"
4
+ require "kafka/fetched_batch_generator"
4
5
 
5
6
  module Kafka
6
7
 
@@ -27,6 +28,10 @@ module Kafka
27
28
  @max_bytes = max_bytes
28
29
  @max_wait_time = max_wait_time
29
30
  @topics = {}
31
+
32
+ @offset_resolver = Kafka::FetchedOffsetResolver.new(
33
+ logger: logger
34
+ )
30
35
  end
31
36
 
32
37
  def fetch_from_partition(topic, partition, offset: :latest, max_bytes: 1048576)
@@ -63,8 +68,8 @@ module Kafka
63
68
  end
64
69
  end
65
70
 
66
- topics_by_broker.flat_map {|broker, topics|
67
- resolve_offsets(broker, topics)
71
+ topics_by_broker.flat_map do |broker, topics|
72
+ @offset_resolver.resolve!(broker, topics)
68
73
 
69
74
  options = {
70
75
  max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
@@ -75,8 +80,8 @@ module Kafka
75
80
 
76
81
  response = broker.fetch_messages(**options)
77
82
 
78
- response.topics.flat_map {|fetched_topic|
79
- fetched_topic.partitions.map {|fetched_partition|
83
+ response.topics.flat_map do |fetched_topic|
84
+ fetched_topic.partitions.map do |fetched_partition|
80
85
  begin
81
86
  Protocol.handle_error(fetched_partition.error_code)
82
87
  rescue Kafka::OffsetOutOfRange => e
@@ -92,64 +97,18 @@ module Kafka
92
97
  raise e
93
98
  end
94
99
 
95
- messages = fetched_partition.messages.map {|message|
96
- FetchedMessage.new(
97
- message: message,
98
- topic: fetched_topic.name,
99
- partition: fetched_partition.partition,
100
- )
101
- }
102
-
103
- FetchedBatch.new(
104
- topic: fetched_topic.name,
105
- partition: fetched_partition.partition,
106
- highwater_mark_offset: fetched_partition.highwater_mark_offset,
107
- messages: messages,
108
- )
109
- }
110
- }
111
- }
100
+ Kafka::FetchedBatchGenerator.new(
101
+ fetched_topic.name,
102
+ fetched_partition,
103
+ logger: @logger
104
+ ).generate
105
+ end
106
+ end
107
+ end
112
108
  rescue Kafka::ConnectionError, Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
113
109
  @cluster.mark_as_stale!
114
110
 
115
111
  raise
116
112
  end
117
-
118
- private
119
-
120
- def resolve_offsets(broker, topics)
121
- pending_topics = {}
122
-
123
- topics.each do |topic, partitions|
124
- partitions.each do |partition, options|
125
- offset = options.fetch(:fetch_offset)
126
- next if offset >= 0
127
-
128
- @logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
129
-
130
- pending_topics[topic] ||= []
131
- pending_topics[topic] << {
132
- partition: partition,
133
- time: offset,
134
- max_offsets: 1,
135
- }
136
- end
137
- end
138
-
139
- return topics if pending_topics.empty?
140
-
141
- response = broker.list_offsets(topics: pending_topics)
142
-
143
- pending_topics.each do |topic, partitions|
144
- partitions.each do |options|
145
- partition = options.fetch(:partition)
146
- resolved_offset = response.offset_for(topic, partition)
147
-
148
- @logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
149
-
150
- topics[topic][partition][:fetch_offset] = resolved_offset || 0
151
- end
152
- end
153
- end
154
113
  end
155
114
  end