ruby-kafka 0.7.0 → 0.7.1.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +3 -3
  3. data/.gitignore +1 -0
  4. data/CHANGELOG.md +4 -0
  5. data/lib/kafka.rb +32 -0
  6. data/lib/kafka/broker.rb +18 -0
  7. data/lib/kafka/client.rb +38 -4
  8. data/lib/kafka/cluster.rb +60 -37
  9. data/lib/kafka/consumer.rb +2 -2
  10. data/lib/kafka/fetch_operation.rb +18 -59
  11. data/lib/kafka/fetched_batch.rb +9 -9
  12. data/lib/kafka/fetched_batch_generator.rb +114 -0
  13. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  14. data/lib/kafka/fetcher.rb +2 -2
  15. data/lib/kafka/produce_operation.rb +52 -14
  16. data/lib/kafka/producer.rb +82 -2
  17. data/lib/kafka/protocol.rb +68 -48
  18. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  19. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  20. data/lib/kafka/protocol/decoder.rb +3 -6
  21. data/lib/kafka/protocol/encoder.rb +6 -11
  22. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  23. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  24. data/lib/kafka/protocol/fetch_request.rb +3 -1
  25. data/lib/kafka/protocol/fetch_response.rb +37 -18
  26. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  27. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  28. data/lib/kafka/protocol/list_offset_request.rb +8 -2
  29. data/lib/kafka/protocol/list_offset_response.rb +11 -6
  30. data/lib/kafka/protocol/record.rb +9 -0
  31. data/lib/kafka/protocol/record_batch.rb +17 -1
  32. data/lib/kafka/ssl_context.rb +19 -5
  33. data/lib/kafka/transaction_manager.rb +261 -0
  34. data/lib/kafka/transaction_state_machine.rb +72 -0
  35. data/lib/kafka/version.rb +1 -1
  36. data/ruby-kafka.gemspec +1 -1
  37. metadata +20 -4
@@ -10,23 +10,31 @@ module Kafka
10
10
  # @return [Integer]
11
11
  attr_reader :partition
12
12
 
13
+ # @return [Integer]
14
+ attr_reader :last_offset
15
+
13
16
  # @return [Integer] the offset of the most recent message in the partition.
14
17
  attr_reader :highwater_mark_offset
15
18
 
16
19
  # @return [Array<Kafka::FetchedMessage>]
17
20
  attr_accessor :messages
18
21
 
19
- def initialize(topic:, partition:, highwater_mark_offset:, messages:)
22
+ def initialize(topic:, partition:, highwater_mark_offset:, messages:, last_offset: nil)
20
23
  @topic = topic
21
24
  @partition = partition
22
25
  @highwater_mark_offset = highwater_mark_offset
23
26
  @messages = messages
27
+ @last_offset = last_offset
24
28
  end
25
29
 
26
30
  def empty?
27
31
  @messages.empty?
28
32
  end
29
33
 
34
+ def unknown_last_offset?
35
+ @last_offset.nil?
36
+ end
37
+
30
38
  def first_offset
31
39
  if empty?
32
40
  nil
@@ -35,14 +43,6 @@ module Kafka
35
43
  end
36
44
  end
37
45
 
38
- def last_offset
39
- if empty?
40
- highwater_mark_offset - 1
41
- else
42
- messages.last.offset
43
- end
44
- end
45
-
46
46
  def offset_lag
47
47
  if empty?
48
48
  0
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/fetched_batch"
4
+
5
+ module Kafka
6
+ class FetchedBatchGenerator
7
+ COMMITTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x01".freeze
8
+ ABORTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x00".freeze
9
+
10
+ def initialize(topic, fetched_partition, logger:)
11
+ @topic = topic
12
+ @fetched_partition = fetched_partition
13
+ @logger = logger
14
+ end
15
+
16
+ def generate
17
+ if @fetched_partition.messages.empty?
18
+ empty_fetched_batch
19
+ elsif @fetched_partition.messages.first.is_a?(Kafka::Protocol::MessageSet)
20
+ extract_messages
21
+ else
22
+ extract_records
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def empty_fetched_batch
29
+ FetchedBatch.new(
30
+ topic: @topic,
31
+ partition: @fetched_partition.partition,
32
+ last_offset: nil,
33
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
34
+ messages: []
35
+ )
36
+ end
37
+
38
+ def extract_messages
39
+ last_offset = nil
40
+ messages = @fetched_partition.messages.flat_map do |message_set|
41
+ message_set.messages.map do |message|
42
+ last_offset = message.offset if last_offset.nil? || last_offset < message.offset
43
+ FetchedMessage.new(
44
+ message: message,
45
+ topic: @topic,
46
+ partition: @fetched_partition.partition
47
+ )
48
+ end
49
+ end
50
+ FetchedBatch.new(
51
+ topic: @topic,
52
+ partition: @fetched_partition.partition,
53
+ last_offset: last_offset,
54
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
55
+ messages: messages
56
+ )
57
+ end
58
+
59
+ def extract_records
60
+ records = []
61
+ last_offset = nil
62
+ aborted_transactions = @fetched_partition.aborted_transactions.sort_by(&:first_offset)
63
+ aborted_producer_ids = {}
64
+
65
+ @fetched_partition.messages.each do |record_batch|
66
+ last_offset = record_batch.last_offset if last_offset.nil? || last_offset < record_batch.last_offset
67
+ # Find the list of aborted producer IDs less than current offset
68
+ unless aborted_transactions.empty?
69
+ if aborted_transactions.first.first_offset <= record_batch.last_offset
70
+ aborted_transaction = aborted_transactions.shift
71
+ aborted_producer_ids[aborted_transaction.producer_id] = aborted_transaction.first_offset
72
+ end
73
+ end
74
+
75
+ if abort_marker?(record_batch)
76
+ # Abort marker, remove the producer from the aborted list
77
+ aborted_producer_ids.delete(record_batch.producer_id)
78
+ elsif aborted_producer_ids.key?(record_batch.producer_id) && record_batch.in_transaction
79
+ # Reject aborted record batch
80
+ @logger.info("Reject #{record_batch.records.size} aborted records of topic '#{@topic}', partition #{@fetched_partition.partition}, from offset #{record_batch.first_offset}")
81
+ next
82
+ end
83
+
84
+ record_batch.records.each do |record|
85
+ unless record.is_control_record
86
+ records << FetchedMessage.new(
87
+ message: record,
88
+ topic: @topic,
89
+ partition: @fetched_partition.partition
90
+ )
91
+ end
92
+ end
93
+ end
94
+
95
+ FetchedBatch.new(
96
+ topic: @topic,
97
+ partition: @fetched_partition.partition,
98
+ last_offset: last_offset,
99
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
100
+ messages: records
101
+ )
102
+ end
103
+
104
+ def abort_marker?(record_batch)
105
+ return false unless record_batch.is_control_batch
106
+
107
+ if record_batch.records.empty?
108
+ raise "Invalid control record batch at topic '#{@topic}', partition #{@fetched_partition}"
109
+ end
110
+
111
+ record_batch.records.first.key == ABORTED_TRANSACTION_SIGNAL
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class FetchedOffsetResolver
5
+ def initialize(logger:)
6
+ @logger = logger
7
+ end
8
+
9
+ def resolve!(broker, topics)
10
+ pending_topics = filter_pending_topics(topics)
11
+ return topics if pending_topics.empty?
12
+
13
+ response = broker.list_offsets(topics: pending_topics)
14
+
15
+ pending_topics.each do |topic, partitions|
16
+ partitions.each do |options|
17
+ partition = options.fetch(:partition)
18
+ resolved_offset = response.offset_for(topic, partition)
19
+
20
+ @logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
21
+
22
+ topics[topic][partition][:fetch_offset] = resolved_offset || 0
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def filter_pending_topics(topics)
30
+ pending_topics = {}
31
+ topics.each do |topic, partitions|
32
+ partitions.each do |partition, options|
33
+ offset = options.fetch(:fetch_offset)
34
+ next if offset >= 0
35
+
36
+ @logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
37
+
38
+ pending_topics[topic] ||= []
39
+ pending_topics[topic] << {
40
+ partition: partition,
41
+ time: offset
42
+ }
43
+ end
44
+ end
45
+ pending_topics
46
+ end
47
+ end
48
+ end
data/lib/kafka/fetcher.rb CHANGED
@@ -144,9 +144,9 @@ module Kafka
144
144
  highwater_mark_offset: batch.highwater_mark_offset,
145
145
  message_count: batch.messages.count,
146
146
  })
147
-
148
- @next_offsets[batch.topic][batch.partition] = batch.last_offset + 1
149
147
  end
148
+
149
+ @next_offsets[batch.topic][batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
150
150
  end
151
151
 
152
152
  @queue << [:batches, batches]
@@ -30,8 +30,9 @@ module Kafka
30
30
  # * `:sent_message_count` – the number of messages that were successfully sent.
31
31
  #
32
32
  class ProduceOperation
33
- def initialize(cluster:, buffer:, compressor:, required_acks:, ack_timeout:, logger:, instrumenter:)
33
+ def initialize(cluster:, transaction_manager:, buffer:, compressor:, required_acks:, ack_timeout:, logger:, instrumenter:)
34
34
  @cluster = cluster
35
+ @transaction_manager = transaction_manager
35
36
  @buffer = buffer
36
37
  @required_acks = required_acks
37
38
  @ack_timeout = ack_timeout
@@ -41,12 +42,23 @@ module Kafka
41
42
  end
42
43
 
43
44
  def execute
45
+ if (@transaction_manager.idempotent? || @transaction_manager.transactional?) && @required_acks != -1
46
+ raise 'You must set required_acks option to :all to use idempotent / transactional production'
47
+ end
48
+
49
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
50
+ raise "Produce operation can only be executed in a pending transaction"
51
+ end
52
+
44
53
  @instrumenter.instrument("send_messages.producer") do |notification|
45
54
  message_count = @buffer.size
46
55
 
47
56
  notification[:message_count] = message_count
48
57
 
49
58
  begin
59
+ if @transaction_manager.idempotent? || @transaction_manager.transactional?
60
+ @transaction_manager.init_producer_id
61
+ end
50
62
  send_buffered_messages
51
63
  ensure
52
64
  notification[:sent_message_count] = message_count - @buffer.size
@@ -58,6 +70,7 @@ module Kafka
58
70
 
59
71
  def send_buffered_messages
60
72
  messages_for_broker = {}
73
+ topic_partitions = {}
61
74
 
62
75
  @buffer.each do |topic, partition, messages|
63
76
  begin
@@ -65,6 +78,9 @@ module Kafka
65
78
 
66
79
  @logger.debug "Current leader for #{topic}/#{partition} is node #{broker}"
67
80
 
81
+ topic_partitions[topic] ||= Set.new
82
+ topic_partitions[topic].add(partition)
83
+
68
84
  messages_for_broker[broker] ||= MessageBuffer.new
69
85
  messages_for_broker[broker].concat(messages, topic: topic, partition: partition)
70
86
  rescue Kafka::Error => e
@@ -81,25 +97,40 @@ module Kafka
81
97
  end
82
98
  end
83
99
 
100
+ # Add topic and partition to transaction
101
+ if @transaction_manager.transactional?
102
+ @transaction_manager.add_partitions_to_transaction(topic_partitions)
103
+ end
104
+
84
105
  messages_for_broker.each do |broker, message_buffer|
85
106
  begin
86
107
  @logger.info "Sending #{message_buffer.size} messages to #{broker}"
87
108
 
88
- messages_for_topics = {}
109
+ records_for_topics = {}
89
110
 
90
111
  message_buffer.each do |topic, partition, records|
91
- record_batch = Protocol::RecordBatch.new(records: records)
92
- messages_for_topics[topic] ||= {}
93
- messages_for_topics[topic][partition] = record_batch
112
+ record_batch = Protocol::RecordBatch.new(
113
+ records: records,
114
+ first_sequence: @transaction_manager.next_sequence_for(
115
+ topic, partition
116
+ ),
117
+ in_transaction: @transaction_manager.transactional?,
118
+ producer_id: @transaction_manager.producer_id,
119
+ producer_epoch: @transaction_manager.producer_epoch
120
+ )
121
+ records_for_topics[topic] ||= {}
122
+ records_for_topics[topic][partition] = record_batch
94
123
  end
95
124
 
96
125
  response = broker.produce(
97
- messages_for_topics: messages_for_topics,
126
+ messages_for_topics: records_for_topics,
127
+ compressor: @compressor,
98
128
  required_acks: @required_acks,
99
129
  timeout: @ack_timeout * 1000, # Kafka expects the timeout in milliseconds.
130
+ transactional_id: @transaction_manager.transactional_id
100
131
  )
101
132
 
102
- handle_response(broker, response) if response
133
+ handle_response(broker, response, records_for_topics) if response
103
134
  rescue ConnectionError => e
104
135
  @logger.error "Could not connect to broker #{broker}: #{e}"
105
136
 
@@ -109,11 +140,12 @@ module Kafka
109
140
  end
110
141
  end
111
142
 
112
- def handle_response(broker, response)
143
+ def handle_response(broker, response, records_for_topics)
113
144
  response.each_partition do |topic_info, partition_info|
114
145
  topic = topic_info.topic
115
146
  partition = partition_info.partition
116
- messages = @buffer.messages_for(topic: topic, partition: partition)
147
+ record_batch = records_for_topics[topic][partition]
148
+ records = record_batch.records
117
149
  ack_time = Time.now
118
150
 
119
151
  begin
@@ -128,14 +160,20 @@ module Kafka
128
160
  raise e
129
161
  end
130
162
 
131
- messages.each_with_index do |message, index|
163
+ if @transaction_manager.idempotent? || @transaction_manager.transactional?
164
+ @transaction_manager.update_sequence_for(
165
+ topic, partition, record_batch.first_sequence + record_batch.size
166
+ )
167
+ end
168
+
169
+ records.each_with_index do |record, index|
132
170
  @instrumenter.instrument("ack_message.producer", {
133
- key: message.key,
134
- value: message.value,
171
+ key: record.key,
172
+ value: record.value,
135
173
  topic: topic,
136
174
  partition: partition,
137
175
  offset: partition_info.offset + index,
138
- delay: ack_time - message.create_time,
176
+ delay: ack_time - record.create_time,
139
177
  })
140
178
  end
141
179
  rescue Kafka::CorruptMessage
@@ -156,7 +194,7 @@ module Kafka
156
194
  rescue Kafka::NotEnoughReplicasAfterAppend
157
195
  @logger.error "Messages written, but to fewer in-sync replicas than required for #{topic}/#{partition}"
158
196
  else
159
- @logger.debug "Successfully appended #{messages.count} messages to #{topic}/#{partition} on #{broker}"
197
+ @logger.debug "Successfully appended #{records.count} messages to #{topic}/#{partition} on #{broker}"
160
198
 
161
199
  # The messages were successfully written; clear them from the buffer.
162
200
  @buffer.clear_messages(topic: topic, partition: partition)
@@ -9,7 +9,6 @@ require "kafka/pending_message"
9
9
  require "kafka/compressor"
10
10
 
11
11
  module Kafka
12
-
13
12
  # Allows sending messages to a Kafka cluster.
14
13
  #
15
14
  # Typically you won't instantiate this class yourself, but rather have {Kafka::Client}
@@ -126,9 +125,11 @@ module Kafka
126
125
  # end
127
126
  #
128
127
  class Producer
128
+ class AbortTransaction < StandardError; end
129
129
 
130
- def initialize(cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
130
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
131
131
  @cluster = cluster
132
+ @transaction_manager = transaction_manager
132
133
  @logger = logger
133
134
  @instrumenter = instrumenter
134
135
  @required_acks = required_acks == :all ? -1 : required_acks
@@ -201,6 +202,12 @@ module Kafka
201
202
  "Cannot produce to #{topic}, max buffer bytesize (#{@max_buffer_bytesize} bytes) reached"
202
203
  end
203
204
 
205
+ # If the producer is in transactional mode, all the message production
206
+ # must be used when the producer is currently in transaction
207
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
208
+ raise 'You must trigger begin_transaction before producing messages'
209
+ end
210
+
204
211
  @target_topics.add(topic)
205
212
  @pending_message_queue.write(message)
206
213
 
@@ -267,9 +274,81 @@ module Kafka
267
274
  #
268
275
  # @return [nil]
269
276
  def shutdown
277
+ @transaction_manager.close
270
278
  @cluster.disconnect
271
279
  end
272
280
 
281
+ # Initializes the producer to ready for future transactions. This method
282
+ # should be triggered once, before any tranactions are created.
283
+ #
284
+ # @return [nil]
285
+ def init_transactions
286
+ @transaction_manager.init_transactions
287
+ end
288
+
289
+ # Mark the beginning of a transaction. This method transitions the state
290
+ # of the transaction trantiions to IN_TRANSACTION.
291
+ #
292
+ # All producing operations can only be executed while the transation is
293
+ # in this state. The records are persisted by Kafka brokers, but not visible
294
+ # the consumers until the #commit_transaction method is trigger. After a
295
+ # timeout period without committed, the transaction is timeout and
296
+ # considered as aborted.
297
+ #
298
+ # @return [nil]
299
+ def begin_transaction
300
+ @transaction_manager.begin_transaction
301
+ end
302
+
303
+ # This method commits the pending transaction, marks all the produced
304
+ # records committed. After that, they are visible to the consumers.
305
+ #
306
+ # This method can only be called if and only if the current transaction
307
+ # is at IN_TRANSACTION state.
308
+ #
309
+ # @return [nil]
310
+ def commit_transaction
311
+ @transaction_manager.commit_transaction
312
+ end
313
+
314
+ # This method abort the pending transaction, marks all the produced
315
+ # records aborted. All the records will be wiped out by the brokers and the
316
+ # cosumers don't have a chance to consume those messages, except they enable
317
+ # consuming uncommitted option.
318
+ #
319
+ # This method can only be called if and only if the current transaction
320
+ # is at IN_TRANSACTION state.
321
+ #
322
+ # @return [nil]
323
+ def abort_transaction
324
+ @transaction_manager.abort_transaction
325
+ end
326
+
327
+ # Syntactic sugar to enable easier transaction usage. Do the following steps
328
+ #
329
+ # - Start the transaction (with Producer#begin_transaction)
330
+ # - Yield the given block
331
+ # - Commit the transaction (with Producer#commit_transaction)
332
+ #
333
+ # If the block raises exception, the transaction is automatically aborted
334
+ # *before* bubble up the exception.
335
+ #
336
+ # If the block raises Kafka::Producer::AbortTransaction indicator exception,
337
+ # it aborts the transaction silently, without throwing up that exception.
338
+ #
339
+ # @return [nil]
340
+ def transaction
341
+ raise 'This method requires a block' unless block_given?
342
+ begin_transaction
343
+ yield
344
+ commit_transaction
345
+ rescue Kafka::Producer::AbortTransaction
346
+ abort_transaction
347
+ rescue
348
+ abort_transaction
349
+ raise
350
+ end
351
+
273
352
  private
274
353
 
275
354
  def deliver_messages_with_retries(notification)
@@ -279,6 +358,7 @@ module Kafka
279
358
 
280
359
  operation = ProduceOperation.new(
281
360
  cluster: @cluster,
361
+ transaction_manager: @transaction_manager,
282
362
  buffer: @buffer,
283
363
  required_acks: @required_acks,
284
364
  ack_timeout: @ack_timeout,