ruby-kafka 0.7.0 → 0.7.1.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +3 -3
  3. data/.gitignore +1 -0
  4. data/CHANGELOG.md +4 -0
  5. data/lib/kafka.rb +32 -0
  6. data/lib/kafka/broker.rb +18 -0
  7. data/lib/kafka/client.rb +38 -4
  8. data/lib/kafka/cluster.rb +60 -37
  9. data/lib/kafka/consumer.rb +2 -2
  10. data/lib/kafka/fetch_operation.rb +18 -59
  11. data/lib/kafka/fetched_batch.rb +9 -9
  12. data/lib/kafka/fetched_batch_generator.rb +114 -0
  13. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  14. data/lib/kafka/fetcher.rb +2 -2
  15. data/lib/kafka/produce_operation.rb +52 -14
  16. data/lib/kafka/producer.rb +82 -2
  17. data/lib/kafka/protocol.rb +68 -48
  18. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  19. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  20. data/lib/kafka/protocol/decoder.rb +3 -6
  21. data/lib/kafka/protocol/encoder.rb +6 -11
  22. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  23. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  24. data/lib/kafka/protocol/fetch_request.rb +3 -1
  25. data/lib/kafka/protocol/fetch_response.rb +37 -18
  26. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  27. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  28. data/lib/kafka/protocol/list_offset_request.rb +8 -2
  29. data/lib/kafka/protocol/list_offset_response.rb +11 -6
  30. data/lib/kafka/protocol/record.rb +9 -0
  31. data/lib/kafka/protocol/record_batch.rb +17 -1
  32. data/lib/kafka/ssl_context.rb +19 -5
  33. data/lib/kafka/transaction_manager.rb +261 -0
  34. data/lib/kafka/transaction_state_machine.rb +72 -0
  35. data/lib/kafka/version.rb +1 -1
  36. data/ruby-kafka.gemspec +1 -1
  37. metadata +20 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1d27b2212172f3f44a3402ad32c7b30413d8e3b0eef28e9d46161c977740fb27
4
- data.tar.gz: 58345eea3931d45849a46913d03e29887e2257bffafbb9a1ca531b700ed6960f
3
+ metadata.gz: c6a5d1dd1969107f9107675cad6848630f8cf13ce4ce223b0fa7c84bc4facf03
4
+ data.tar.gz: 2797042cbef4be8b219d2103dc5f63cbe36bb5c68aae738178ed02b197579254
5
5
  SHA512:
6
- metadata.gz: 1b64e322958a28c917efcd9a66f1bf3f2a1b17403830b915467670a0cedad9a810eed2d472c5359c8af963c4d890f0ca8d20175831cb628a9406548b23834607
7
- data.tar.gz: 5dfb9df17ca7cc57b6435f03cf690e39517132ac3170c78e090781af20f72b91893785b03f864c9e460e2f9f1fca93149bd916975351c38bbd32606a6ce1b323
6
+ metadata.gz: d3dbfbb842fbc7b370d197a6f8020d4c88814c64383fe9cacd8e4338e4cafbdf222573476ba6bcf4d4ab05263b30ab0b63de2b7bc861d06730a303aec0a8d7a9
7
+ data.tar.gz: 12aba0aeacb8ceb731591f037ef075b3218d6c44710c5852fdfa1e5c769649bea95b4d609a9920ba48ce80b207a82628c488759d4c7cb48317ce71bbee36bfd0
data/.circleci/config.yml CHANGED
@@ -41,7 +41,7 @@ jobs:
41
41
  - run: bundle install --path vendor/bundle
42
42
  - run: bundle exec rspec --profile --tag functional spec/functional
43
43
 
44
- kafka-1.0:
44
+ kafka-1.0.0:
45
45
  docker:
46
46
  - image: circleci/ruby:2.5.1-node
47
47
  environment:
@@ -111,5 +111,5 @@ workflows:
111
111
  jobs:
112
112
  - unit
113
113
  - kafka-0.11
114
- - kafka-1.0
115
- - kafka-1.1
114
+ - kafka-1.0.0
115
+ - kafka-1.1
data/.gitignore CHANGED
@@ -10,3 +10,4 @@
10
10
  .env
11
11
  *.log
12
12
  *.swp
13
+ .byebug_history
data/CHANGELOG.md CHANGED
@@ -4,6 +4,10 @@ Changes and additions to the library will be listed here.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## 0.7.1
8
+
9
+ - Exactly Once Delivery and Transactional Messaging Support (#608).
10
+
7
11
  ## 0.7.0
8
12
 
9
13
  - Drop support for Kafka 0.10 in favor of native support for Kafka 0.11.
data/lib/kafka.rb CHANGED
@@ -245,6 +245,38 @@ module Kafka
245
245
  class FailedScramAuthentication < SaslScramError
246
246
  end
247
247
 
248
+ # The broker received an out of order sequence number
249
+ class OutOfOrderSequenceNumberError < Error
250
+ end
251
+
252
+ # The broker received a duplicate sequence number
253
+ class DuplicateSequenceNumberError < Error
254
+ end
255
+
256
+ # Producer attempted an operation with an old epoch. Either there is a newer producer with the same transactionalId, or the producer's transaction has been expired by the broker.
257
+ class InvalidProducerEpochError < Error
258
+ end
259
+
260
+ # The producer attempted a transactional operation in an invalid state
261
+ class InvalidTxnStateError < Error
262
+ end
263
+
264
+ # The producer attempted to use a producer id which is not currently assigned to its transactional id
265
+ class InvalidProducerIDMappingError < Error
266
+ end
267
+
268
+ # The transaction timeout is larger than the maximum value allowed by the broker (as configured by transaction.max.timeout.ms).
269
+ class InvalidTransactionTimeoutError < Error
270
+ end
271
+
272
+ # The producer attempted to update a transaction while another concurrent operation on the same transaction was ongoing
273
+ class ConcurrentTransactionError < Error
274
+ end
275
+
276
+ # Indicates that the transaction coordinator sending a WriteTxnMarker is no longer the current coordinator for a given producer
277
+ class TransactionCoordinatorFencedError < Error
278
+ end
279
+
248
280
  # Initializes a new Kafka client.
249
281
  #
250
282
  # @see Client#initialize
data/lib/kafka/broker.rb CHANGED
@@ -164,6 +164,24 @@ module Kafka
164
164
  send_request(request)
165
165
  end
166
166
 
167
+ def init_producer_id(**options)
168
+ request = Protocol::InitProducerIDRequest.new(**options)
169
+
170
+ send_request(request)
171
+ end
172
+
173
+ def add_partitions_to_txn(**options)
174
+ request = Protocol::AddPartitionsToTxnRequest.new(**options)
175
+
176
+ send_request(request)
177
+ end
178
+
179
+ def end_txn(**options)
180
+ request = Protocol::EndTxnRequest.new(**options)
181
+
182
+ send_request(request)
183
+ end
184
+
167
185
  private
168
186
 
169
187
  def send_request(request)
data/lib/kafka/client.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "kafka/ssl_context"
4
4
  require "kafka/cluster"
5
+ require "kafka/transaction_manager"
5
6
  require "kafka/broker_info"
6
7
  require "kafka/producer"
7
8
  require "kafka/consumer"
@@ -60,7 +61,7 @@ module Kafka
60
61
  # @return [Client]
61
62
  def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
62
63
  ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
63
- sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
64
+ ssl_client_cert_chain: nil, sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
64
65
  sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
65
66
  sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
66
67
  sasl_over_ssl: true, ssl_ca_certs_from_system: false)
@@ -73,6 +74,7 @@ module Kafka
73
74
  ca_cert: ssl_ca_cert,
74
75
  client_cert: ssl_client_cert,
75
76
  client_cert_key: ssl_client_cert_key,
77
+ client_cert_chain: ssl_client_cert_chain,
76
78
  ca_certs_from_system: ssl_ca_certs_from_system,
77
79
  )
78
80
 
@@ -158,8 +160,16 @@ module Kafka
158
160
  instrumenter: @instrumenter,
159
161
  )
160
162
 
163
+ transaction_manager = TransactionManager.new(
164
+ cluster: @cluster,
165
+ logger: @logger,
166
+ idempotent: false,
167
+ transactional: false
168
+ )
169
+
161
170
  operation = ProduceOperation.new(
162
171
  cluster: @cluster,
172
+ transaction_manager: transaction_manager,
163
173
  buffer: buffer,
164
174
  required_acks: 1,
165
175
  ack_timeout: 10,
@@ -222,15 +232,39 @@ module Kafka
222
232
  # are per-partition rather than per-topic or per-producer.
223
233
  #
224
234
  # @return [Kafka::Producer] the Kafka producer.
225
- def producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
235
+ def producer(
236
+ compression_codec: nil,
237
+ compression_threshold: 1,
238
+ ack_timeout: 5,
239
+ required_acks: :all,
240
+ max_retries: 2,
241
+ retry_backoff: 1,
242
+ max_buffer_size: 1000,
243
+ max_buffer_bytesize: 10_000_000,
244
+ idempotent: false,
245
+ transactional: false,
246
+ transactional_id: nil,
247
+ transactional_timeout: 60
248
+ )
249
+ cluster = initialize_cluster
226
250
  compressor = Compressor.new(
227
251
  codec_name: compression_codec,
228
252
  threshold: compression_threshold,
229
253
  instrumenter: @instrumenter,
230
254
  )
231
255
 
256
+ transaction_manager = TransactionManager.new(
257
+ cluster: cluster,
258
+ logger: @logger,
259
+ idempotent: idempotent,
260
+ transactional: transactional,
261
+ transactional_id: transactional_id,
262
+ transactional_timeout: transactional_timeout,
263
+ )
264
+
232
265
  Producer.new(
233
- cluster: initialize_cluster,
266
+ cluster: cluster,
267
+ transaction_manager: transaction_manager,
234
268
  logger: @logger,
235
269
  instrumenter: @instrumenter,
236
270
  compressor: compressor,
@@ -478,7 +512,7 @@ module Kafka
478
512
 
479
513
  batches.each do |batch|
480
514
  batch.messages.each(&block)
481
- offsets[batch.partition] = batch.last_offset + 1 unless batch.empty?
515
+ offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
482
516
  end
483
517
  end
484
518
  end
data/lib/kafka/cluster.rb CHANGED
@@ -111,48 +111,32 @@ module Kafka
111
111
  connect_to_broker(get_leader_id(topic, partition))
112
112
  end
113
113
 
114
+ # Finds the broker acting as the coordinator of the given group.
115
+ #
116
+ # @param group_id: [String]
117
+ # @return [Broker] the broker that's currently coordinator.
114
118
  def get_group_coordinator(group_id:)
115
119
  @logger.debug "Getting group coordinator for `#{group_id}`"
116
-
117
120
  refresh_metadata_if_necessary!
121
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_GROUP, group_id)
122
+ end
118
123
 
119
- cluster_info.brokers.each do |broker_info|
120
- begin
121
- broker = connect_to_broker(broker_info.node_id)
122
- response = broker.find_coordinator(
123
- coordinator_type: Kafka::Protocol::COORDINATOR_TYPE_GROUP,
124
- coordinator_key: group_id
125
- )
126
-
127
- Protocol.handle_error(response.error_code, response.error_message)
128
-
129
- coordinator_id = response.coordinator_id
130
-
131
- @logger.debug "Coordinator for group `#{group_id}` is #{coordinator_id}. Connecting..."
132
-
133
- # It's possible that a new broker is introduced to the cluster and
134
- # becomes the coordinator before we have a chance to refresh_metadata.
135
- coordinator = begin
136
- connect_to_broker(coordinator_id)
137
- rescue Kafka::NoSuchBroker
138
- @logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
139
- refresh_metadata!
140
- connect_to_broker(coordinator_id)
141
- end
124
+ # Finds the broker acting as the coordinator of the given transaction.
125
+ #
126
+ # @param transactional_id: [String]
127
+ # @return [Broker] the broker that's currently coordinator.
128
+ def get_transaction_coordinator(transactional_id:)
129
+ @logger.debug "Getting transaction coordinator for `#{transactional_id}`"
142
130
 
143
- @logger.debug "Connected to coordinator: #{coordinator} for group `#{group_id}`"
131
+ refresh_metadata_if_necessary!
144
132
 
145
- return coordinator
146
- rescue CoordinatorNotAvailable
147
- @logger.debug "Coordinator not available; retrying in 1s"
148
- sleep 1
149
- retry
150
- rescue ConnectionError => e
151
- @logger.error "Failed to get group coordinator info from #{broker}: #{e}"
152
- end
133
+ if transactional_id.nil?
134
+ # Get a random_broker
135
+ @logger.debug "Transaction ID is not available. Choose a random broker."
136
+ return random_broker
137
+ else
138
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_TRANSACTION, transactional_id)
153
139
  end
154
-
155
- raise Kafka::Error, "Failed to find group coordinator"
156
140
  end
157
141
 
158
142
  def partitions_for(topic)
@@ -313,8 +297,7 @@ module Kafka
313
297
  topic => broker_partitions.map {|partition|
314
298
  {
315
299
  partition: partition,
316
- time: offset,
317
- max_offsets: 1,
300
+ time: offset
318
301
  }
319
302
  }
320
303
  }
@@ -427,5 +410,45 @@ module Kafka
427
410
  def controller_broker
428
411
  connect_to_broker(cluster_info.controller_id)
429
412
  end
413
+
414
+ def get_coordinator(coordinator_type, coordinator_key)
415
+ cluster_info.brokers.each do |broker_info|
416
+ begin
417
+ broker = connect_to_broker(broker_info.node_id)
418
+ response = broker.find_coordinator(
419
+ coordinator_type: coordinator_type,
420
+ coordinator_key: coordinator_key
421
+ )
422
+
423
+ Protocol.handle_error(response.error_code, response.error_message)
424
+
425
+ coordinator_id = response.coordinator_id
426
+
427
+ @logger.debug "Coordinator for `#{coordinator_key}` is #{coordinator_id}. Connecting..."
428
+
429
+ # It's possible that a new broker is introduced to the cluster and
430
+ # becomes the coordinator before we have a chance to refresh_metadata.
431
+ coordinator = begin
432
+ connect_to_broker(coordinator_id)
433
+ rescue Kafka::NoSuchBroker
434
+ @logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
435
+ refresh_metadata!
436
+ connect_to_broker(coordinator_id)
437
+ end
438
+
439
+ @logger.debug "Connected to coordinator: #{coordinator} for `#{coordinator_key}`"
440
+
441
+ return coordinator
442
+ rescue CoordinatorNotAvailable
443
+ @logger.debug "Coordinator not available; retrying in 1s"
444
+ sleep 1
445
+ retry
446
+ rescue ConnectionError => e
447
+ @logger.error "Failed to get coordinator info from #{broker}: #{e}"
448
+ end
449
+ end
450
+
451
+ raise Kafka::Error, "Failed to find coordinator"
452
+ end
430
453
  end
431
454
  end
@@ -310,9 +310,9 @@ module Kafka
310
310
  @instrumenter.instrument("process_batch.consumer", notification) do
311
311
  begin
312
312
  yield batch
313
- @current_offsets[batch.topic][batch.partition] = batch.last_offset
313
+ @current_offsets[batch.topic][batch.partition] = batch.last_offset unless batch.unknown_last_offset?
314
314
  rescue => e
315
- offset_range = (batch.first_offset..batch.last_offset)
315
+ offset_range = (batch.first_offset..batch.last_offset || batch.highwater_mark_offset)
316
316
  location = "#{batch.topic}/#{batch.partition} in offset range #{offset_range}"
317
317
  backtrace = e.backtrace.join("\n")
318
318
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "kafka/fetched_batch"
3
+ require "kafka/fetched_offset_resolver"
4
+ require "kafka/fetched_batch_generator"
4
5
 
5
6
  module Kafka
6
7
 
@@ -27,6 +28,10 @@ module Kafka
27
28
  @max_bytes = max_bytes
28
29
  @max_wait_time = max_wait_time
29
30
  @topics = {}
31
+
32
+ @offset_resolver = Kafka::FetchedOffsetResolver.new(
33
+ logger: logger
34
+ )
30
35
  end
31
36
 
32
37
  def fetch_from_partition(topic, partition, offset: :latest, max_bytes: 1048576)
@@ -63,8 +68,8 @@ module Kafka
63
68
  end
64
69
  end
65
70
 
66
- topics_by_broker.flat_map {|broker, topics|
67
- resolve_offsets(broker, topics)
71
+ topics_by_broker.flat_map do |broker, topics|
72
+ @offset_resolver.resolve!(broker, topics)
68
73
 
69
74
  options = {
70
75
  max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
@@ -75,8 +80,8 @@ module Kafka
75
80
 
76
81
  response = broker.fetch_messages(**options)
77
82
 
78
- response.topics.flat_map {|fetched_topic|
79
- fetched_topic.partitions.map {|fetched_partition|
83
+ response.topics.flat_map do |fetched_topic|
84
+ fetched_topic.partitions.map do |fetched_partition|
80
85
  begin
81
86
  Protocol.handle_error(fetched_partition.error_code)
82
87
  rescue Kafka::OffsetOutOfRange => e
@@ -92,64 +97,18 @@ module Kafka
92
97
  raise e
93
98
  end
94
99
 
95
- messages = fetched_partition.messages.map {|message|
96
- FetchedMessage.new(
97
- message: message,
98
- topic: fetched_topic.name,
99
- partition: fetched_partition.partition,
100
- )
101
- }
102
-
103
- FetchedBatch.new(
104
- topic: fetched_topic.name,
105
- partition: fetched_partition.partition,
106
- highwater_mark_offset: fetched_partition.highwater_mark_offset,
107
- messages: messages,
108
- )
109
- }
110
- }
111
- }
100
+ Kafka::FetchedBatchGenerator.new(
101
+ fetched_topic.name,
102
+ fetched_partition,
103
+ logger: @logger
104
+ ).generate
105
+ end
106
+ end
107
+ end
112
108
  rescue Kafka::ConnectionError, Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
113
109
  @cluster.mark_as_stale!
114
110
 
115
111
  raise
116
112
  end
117
-
118
- private
119
-
120
- def resolve_offsets(broker, topics)
121
- pending_topics = {}
122
-
123
- topics.each do |topic, partitions|
124
- partitions.each do |partition, options|
125
- offset = options.fetch(:fetch_offset)
126
- next if offset >= 0
127
-
128
- @logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
129
-
130
- pending_topics[topic] ||= []
131
- pending_topics[topic] << {
132
- partition: partition,
133
- time: offset,
134
- max_offsets: 1,
135
- }
136
- end
137
- end
138
-
139
- return topics if pending_topics.empty?
140
-
141
- response = broker.list_offsets(topics: pending_topics)
142
-
143
- pending_topics.each do |topic, partitions|
144
- partitions.each do |options|
145
- partition = options.fetch(:partition)
146
- resolved_offset = response.offset_for(topic, partition)
147
-
148
- @logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
149
-
150
- topics[topic][partition][:fetch_offset] = resolved_offset || 0
151
- end
152
- end
153
- end
154
113
  end
155
114
  end