ruby-kafka 0.7.0 → 0.7.1.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +3 -3
  3. data/.gitignore +1 -0
  4. data/CHANGELOG.md +4 -0
  5. data/lib/kafka.rb +32 -0
  6. data/lib/kafka/broker.rb +18 -0
  7. data/lib/kafka/client.rb +38 -4
  8. data/lib/kafka/cluster.rb +60 -37
  9. data/lib/kafka/consumer.rb +2 -2
  10. data/lib/kafka/fetch_operation.rb +18 -59
  11. data/lib/kafka/fetched_batch.rb +9 -9
  12. data/lib/kafka/fetched_batch_generator.rb +114 -0
  13. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  14. data/lib/kafka/fetcher.rb +2 -2
  15. data/lib/kafka/produce_operation.rb +52 -14
  16. data/lib/kafka/producer.rb +82 -2
  17. data/lib/kafka/protocol.rb +68 -48
  18. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  19. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  20. data/lib/kafka/protocol/decoder.rb +3 -6
  21. data/lib/kafka/protocol/encoder.rb +6 -11
  22. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  23. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  24. data/lib/kafka/protocol/fetch_request.rb +3 -1
  25. data/lib/kafka/protocol/fetch_response.rb +37 -18
  26. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  27. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  28. data/lib/kafka/protocol/list_offset_request.rb +8 -2
  29. data/lib/kafka/protocol/list_offset_response.rb +11 -6
  30. data/lib/kafka/protocol/record.rb +9 -0
  31. data/lib/kafka/protocol/record_batch.rb +17 -1
  32. data/lib/kafka/ssl_context.rb +19 -5
  33. data/lib/kafka/transaction_manager.rb +261 -0
  34. data/lib/kafka/transaction_state_machine.rb +72 -0
  35. data/lib/kafka/version.rb +1 -1
  36. data/ruby-kafka.gemspec +1 -1
  37. metadata +20 -4
@@ -10,23 +10,31 @@ module Kafka
10
10
  # @return [Integer]
11
11
  attr_reader :partition
12
12
 
13
+ # @return [Integer]
14
+ attr_reader :last_offset
15
+
13
16
  # @return [Integer] the offset of the most recent message in the partition.
14
17
  attr_reader :highwater_mark_offset
15
18
 
16
19
  # @return [Array<Kafka::FetchedMessage>]
17
20
  attr_accessor :messages
18
21
 
19
- def initialize(topic:, partition:, highwater_mark_offset:, messages:)
22
+ def initialize(topic:, partition:, highwater_mark_offset:, messages:, last_offset: nil)
20
23
  @topic = topic
21
24
  @partition = partition
22
25
  @highwater_mark_offset = highwater_mark_offset
23
26
  @messages = messages
27
+ @last_offset = last_offset
24
28
  end
25
29
 
26
30
  def empty?
27
31
  @messages.empty?
28
32
  end
29
33
 
34
+ def unknown_last_offset?
35
+ @last_offset.nil?
36
+ end
37
+
30
38
  def first_offset
31
39
  if empty?
32
40
  nil
@@ -35,14 +43,6 @@ module Kafka
35
43
  end
36
44
  end
37
45
 
38
- def last_offset
39
- if empty?
40
- highwater_mark_offset - 1
41
- else
42
- messages.last.offset
43
- end
44
- end
45
-
46
46
  def offset_lag
47
47
  if empty?
48
48
  0
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/fetched_batch"
4
+
5
+ module Kafka
6
+ class FetchedBatchGenerator
7
+ COMMITTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x01".freeze
8
+ ABORTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x00".freeze
9
+
10
+ def initialize(topic, fetched_partition, logger:)
11
+ @topic = topic
12
+ @fetched_partition = fetched_partition
13
+ @logger = logger
14
+ end
15
+
16
+ def generate
17
+ if @fetched_partition.messages.empty?
18
+ empty_fetched_batch
19
+ elsif @fetched_partition.messages.first.is_a?(Kafka::Protocol::MessageSet)
20
+ extract_messages
21
+ else
22
+ extract_records
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def empty_fetched_batch
29
+ FetchedBatch.new(
30
+ topic: @topic,
31
+ partition: @fetched_partition.partition,
32
+ last_offset: nil,
33
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
34
+ messages: []
35
+ )
36
+ end
37
+
38
+ def extract_messages
39
+ last_offset = nil
40
+ messages = @fetched_partition.messages.flat_map do |message_set|
41
+ message_set.messages.map do |message|
42
+ last_offset = message.offset if last_offset.nil? || last_offset < message.offset
43
+ FetchedMessage.new(
44
+ message: message,
45
+ topic: @topic,
46
+ partition: @fetched_partition.partition
47
+ )
48
+ end
49
+ end
50
+ FetchedBatch.new(
51
+ topic: @topic,
52
+ partition: @fetched_partition.partition,
53
+ last_offset: last_offset,
54
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
55
+ messages: messages
56
+ )
57
+ end
58
+
59
+ def extract_records
60
+ records = []
61
+ last_offset = nil
62
+ aborted_transactions = @fetched_partition.aborted_transactions.sort_by(&:first_offset)
63
+ aborted_producer_ids = {}
64
+
65
+ @fetched_partition.messages.each do |record_batch|
66
+ last_offset = record_batch.last_offset if last_offset.nil? || last_offset < record_batch.last_offset
67
+ # Find the list of aborted producer IDs less than current offset
68
+ unless aborted_transactions.empty?
69
+ if aborted_transactions.first.first_offset <= record_batch.last_offset
70
+ aborted_transaction = aborted_transactions.shift
71
+ aborted_producer_ids[aborted_transaction.producer_id] = aborted_transaction.first_offset
72
+ end
73
+ end
74
+
75
+ if abort_marker?(record_batch)
76
+ # Abort marker, remove the producer from the aborted list
77
+ aborted_producer_ids.delete(record_batch.producer_id)
78
+ elsif aborted_producer_ids.key?(record_batch.producer_id) && record_batch.in_transaction
79
+ # Reject aborted record batch
80
+ @logger.info("Reject #{record_batch.records.size} aborted records of topic '#{@topic}', partition #{@fetched_partition.partition}, from offset #{record_batch.first_offset}")
81
+ next
82
+ end
83
+
84
+ record_batch.records.each do |record|
85
+ unless record.is_control_record
86
+ records << FetchedMessage.new(
87
+ message: record,
88
+ topic: @topic,
89
+ partition: @fetched_partition.partition
90
+ )
91
+ end
92
+ end
93
+ end
94
+
95
+ FetchedBatch.new(
96
+ topic: @topic,
97
+ partition: @fetched_partition.partition,
98
+ last_offset: last_offset,
99
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
100
+ messages: records
101
+ )
102
+ end
103
+
104
+ def abort_marker?(record_batch)
105
+ return false unless record_batch.is_control_batch
106
+
107
+ if record_batch.records.empty?
108
+ raise "Invalid control record batch at topic '#{@topic}', partition #{@fetched_partition}"
109
+ end
110
+
111
+ record_batch.records.first.key == ABORTED_TRANSACTION_SIGNAL
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class FetchedOffsetResolver
5
+ def initialize(logger:)
6
+ @logger = logger
7
+ end
8
+
9
+ def resolve!(broker, topics)
10
+ pending_topics = filter_pending_topics(topics)
11
+ return topics if pending_topics.empty?
12
+
13
+ response = broker.list_offsets(topics: pending_topics)
14
+
15
+ pending_topics.each do |topic, partitions|
16
+ partitions.each do |options|
17
+ partition = options.fetch(:partition)
18
+ resolved_offset = response.offset_for(topic, partition)
19
+
20
+ @logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
21
+
22
+ topics[topic][partition][:fetch_offset] = resolved_offset || 0
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def filter_pending_topics(topics)
30
+ pending_topics = {}
31
+ topics.each do |topic, partitions|
32
+ partitions.each do |partition, options|
33
+ offset = options.fetch(:fetch_offset)
34
+ next if offset >= 0
35
+
36
+ @logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
37
+
38
+ pending_topics[topic] ||= []
39
+ pending_topics[topic] << {
40
+ partition: partition,
41
+ time: offset
42
+ }
43
+ end
44
+ end
45
+ pending_topics
46
+ end
47
+ end
48
+ end
data/lib/kafka/fetcher.rb CHANGED
@@ -144,9 +144,9 @@ module Kafka
144
144
  highwater_mark_offset: batch.highwater_mark_offset,
145
145
  message_count: batch.messages.count,
146
146
  })
147
-
148
- @next_offsets[batch.topic][batch.partition] = batch.last_offset + 1
149
147
  end
148
+
149
+ @next_offsets[batch.topic][batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
150
150
  end
151
151
 
152
152
  @queue << [:batches, batches]
@@ -30,8 +30,9 @@ module Kafka
30
30
  # * `:sent_message_count` – the number of messages that were successfully sent.
31
31
  #
32
32
  class ProduceOperation
33
- def initialize(cluster:, buffer:, compressor:, required_acks:, ack_timeout:, logger:, instrumenter:)
33
+ def initialize(cluster:, transaction_manager:, buffer:, compressor:, required_acks:, ack_timeout:, logger:, instrumenter:)
34
34
  @cluster = cluster
35
+ @transaction_manager = transaction_manager
35
36
  @buffer = buffer
36
37
  @required_acks = required_acks
37
38
  @ack_timeout = ack_timeout
@@ -41,12 +42,23 @@ module Kafka
41
42
  end
42
43
 
43
44
  def execute
45
+ if (@transaction_manager.idempotent? || @transaction_manager.transactional?) && @required_acks != -1
46
+ raise 'You must set required_acks option to :all to use idempotent / transactional production'
47
+ end
48
+
49
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
50
+ raise "Produce operation can only be executed in a pending transaction"
51
+ end
52
+
44
53
  @instrumenter.instrument("send_messages.producer") do |notification|
45
54
  message_count = @buffer.size
46
55
 
47
56
  notification[:message_count] = message_count
48
57
 
49
58
  begin
59
+ if @transaction_manager.idempotent? || @transaction_manager.transactional?
60
+ @transaction_manager.init_producer_id
61
+ end
50
62
  send_buffered_messages
51
63
  ensure
52
64
  notification[:sent_message_count] = message_count - @buffer.size
@@ -58,6 +70,7 @@ module Kafka
58
70
 
59
71
  def send_buffered_messages
60
72
  messages_for_broker = {}
73
+ topic_partitions = {}
61
74
 
62
75
  @buffer.each do |topic, partition, messages|
63
76
  begin
@@ -65,6 +78,9 @@ module Kafka
65
78
 
66
79
  @logger.debug "Current leader for #{topic}/#{partition} is node #{broker}"
67
80
 
81
+ topic_partitions[topic] ||= Set.new
82
+ topic_partitions[topic].add(partition)
83
+
68
84
  messages_for_broker[broker] ||= MessageBuffer.new
69
85
  messages_for_broker[broker].concat(messages, topic: topic, partition: partition)
70
86
  rescue Kafka::Error => e
@@ -81,25 +97,40 @@ module Kafka
81
97
  end
82
98
  end
83
99
 
100
+ # Add topic and partition to transaction
101
+ if @transaction_manager.transactional?
102
+ @transaction_manager.add_partitions_to_transaction(topic_partitions)
103
+ end
104
+
84
105
  messages_for_broker.each do |broker, message_buffer|
85
106
  begin
86
107
  @logger.info "Sending #{message_buffer.size} messages to #{broker}"
87
108
 
88
- messages_for_topics = {}
109
+ records_for_topics = {}
89
110
 
90
111
  message_buffer.each do |topic, partition, records|
91
- record_batch = Protocol::RecordBatch.new(records: records)
92
- messages_for_topics[topic] ||= {}
93
- messages_for_topics[topic][partition] = record_batch
112
+ record_batch = Protocol::RecordBatch.new(
113
+ records: records,
114
+ first_sequence: @transaction_manager.next_sequence_for(
115
+ topic, partition
116
+ ),
117
+ in_transaction: @transaction_manager.transactional?,
118
+ producer_id: @transaction_manager.producer_id,
119
+ producer_epoch: @transaction_manager.producer_epoch
120
+ )
121
+ records_for_topics[topic] ||= {}
122
+ records_for_topics[topic][partition] = record_batch
94
123
  end
95
124
 
96
125
  response = broker.produce(
97
- messages_for_topics: messages_for_topics,
126
+ messages_for_topics: records_for_topics,
127
+ compressor: @compressor,
98
128
  required_acks: @required_acks,
99
129
  timeout: @ack_timeout * 1000, # Kafka expects the timeout in milliseconds.
130
+ transactional_id: @transaction_manager.transactional_id
100
131
  )
101
132
 
102
- handle_response(broker, response) if response
133
+ handle_response(broker, response, records_for_topics) if response
103
134
  rescue ConnectionError => e
104
135
  @logger.error "Could not connect to broker #{broker}: #{e}"
105
136
 
@@ -109,11 +140,12 @@ module Kafka
109
140
  end
110
141
  end
111
142
 
112
- def handle_response(broker, response)
143
+ def handle_response(broker, response, records_for_topics)
113
144
  response.each_partition do |topic_info, partition_info|
114
145
  topic = topic_info.topic
115
146
  partition = partition_info.partition
116
- messages = @buffer.messages_for(topic: topic, partition: partition)
147
+ record_batch = records_for_topics[topic][partition]
148
+ records = record_batch.records
117
149
  ack_time = Time.now
118
150
 
119
151
  begin
@@ -128,14 +160,20 @@ module Kafka
128
160
  raise e
129
161
  end
130
162
 
131
- messages.each_with_index do |message, index|
163
+ if @transaction_manager.idempotent? || @transaction_manager.transactional?
164
+ @transaction_manager.update_sequence_for(
165
+ topic, partition, record_batch.first_sequence + record_batch.size
166
+ )
167
+ end
168
+
169
+ records.each_with_index do |record, index|
132
170
  @instrumenter.instrument("ack_message.producer", {
133
- key: message.key,
134
- value: message.value,
171
+ key: record.key,
172
+ value: record.value,
135
173
  topic: topic,
136
174
  partition: partition,
137
175
  offset: partition_info.offset + index,
138
- delay: ack_time - message.create_time,
176
+ delay: ack_time - record.create_time,
139
177
  })
140
178
  end
141
179
  rescue Kafka::CorruptMessage
@@ -156,7 +194,7 @@ module Kafka
156
194
  rescue Kafka::NotEnoughReplicasAfterAppend
157
195
  @logger.error "Messages written, but to fewer in-sync replicas than required for #{topic}/#{partition}"
158
196
  else
159
- @logger.debug "Successfully appended #{messages.count} messages to #{topic}/#{partition} on #{broker}"
197
+ @logger.debug "Successfully appended #{records.count} messages to #{topic}/#{partition} on #{broker}"
160
198
 
161
199
  # The messages were successfully written; clear them from the buffer.
162
200
  @buffer.clear_messages(topic: topic, partition: partition)
@@ -9,7 +9,6 @@ require "kafka/pending_message"
9
9
  require "kafka/compressor"
10
10
 
11
11
  module Kafka
12
-
13
12
  # Allows sending messages to a Kafka cluster.
14
13
  #
15
14
  # Typically you won't instantiate this class yourself, but rather have {Kafka::Client}
@@ -126,9 +125,11 @@ module Kafka
126
125
  # end
127
126
  #
128
127
  class Producer
128
+ class AbortTransaction < StandardError; end
129
129
 
130
- def initialize(cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
130
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
131
131
  @cluster = cluster
132
+ @transaction_manager = transaction_manager
132
133
  @logger = logger
133
134
  @instrumenter = instrumenter
134
135
  @required_acks = required_acks == :all ? -1 : required_acks
@@ -201,6 +202,12 @@ module Kafka
201
202
  "Cannot produce to #{topic}, max buffer bytesize (#{@max_buffer_bytesize} bytes) reached"
202
203
  end
203
204
 
205
+ # If the producer is in transactional mode, all the message production
206
+ # must be used when the producer is currently in transaction
207
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
208
+ raise 'You must trigger begin_transaction before producing messages'
209
+ end
210
+
204
211
  @target_topics.add(topic)
205
212
  @pending_message_queue.write(message)
206
213
 
@@ -267,9 +274,81 @@ module Kafka
267
274
  #
268
275
  # @return [nil]
269
276
  def shutdown
277
+ @transaction_manager.close
270
278
  @cluster.disconnect
271
279
  end
272
280
 
281
+ # Initializes the producer to ready for future transactions. This method
282
+ # should be triggered once, before any tranactions are created.
283
+ #
284
+ # @return [nil]
285
+ def init_transactions
286
+ @transaction_manager.init_transactions
287
+ end
288
+
289
+ # Mark the beginning of a transaction. This method transitions the state
290
+ # of the transaction trantiions to IN_TRANSACTION.
291
+ #
292
+ # All producing operations can only be executed while the transation is
293
+ # in this state. The records are persisted by Kafka brokers, but not visible
294
+ # the consumers until the #commit_transaction method is trigger. After a
295
+ # timeout period without committed, the transaction is timeout and
296
+ # considered as aborted.
297
+ #
298
+ # @return [nil]
299
+ def begin_transaction
300
+ @transaction_manager.begin_transaction
301
+ end
302
+
303
+ # This method commits the pending transaction, marks all the produced
304
+ # records committed. After that, they are visible to the consumers.
305
+ #
306
+ # This method can only be called if and only if the current transaction
307
+ # is at IN_TRANSACTION state.
308
+ #
309
+ # @return [nil]
310
+ def commit_transaction
311
+ @transaction_manager.commit_transaction
312
+ end
313
+
314
+ # This method abort the pending transaction, marks all the produced
315
+ # records aborted. All the records will be wiped out by the brokers and the
316
+ # cosumers don't have a chance to consume those messages, except they enable
317
+ # consuming uncommitted option.
318
+ #
319
+ # This method can only be called if and only if the current transaction
320
+ # is at IN_TRANSACTION state.
321
+ #
322
+ # @return [nil]
323
+ def abort_transaction
324
+ @transaction_manager.abort_transaction
325
+ end
326
+
327
+ # Syntactic sugar to enable easier transaction usage. Do the following steps
328
+ #
329
+ # - Start the transaction (with Producer#begin_transaction)
330
+ # - Yield the given block
331
+ # - Commit the transaction (with Producer#commit_transaction)
332
+ #
333
+ # If the block raises exception, the transaction is automatically aborted
334
+ # *before* bubble up the exception.
335
+ #
336
+ # If the block raises Kafka::Producer::AbortTransaction indicator exception,
337
+ # it aborts the transaction silently, without throwing up that exception.
338
+ #
339
+ # @return [nil]
340
+ def transaction
341
+ raise 'This method requires a block' unless block_given?
342
+ begin_transaction
343
+ yield
344
+ commit_transaction
345
+ rescue Kafka::Producer::AbortTransaction
346
+ abort_transaction
347
+ rescue
348
+ abort_transaction
349
+ raise
350
+ end
351
+
273
352
  private
274
353
 
275
354
  def deliver_messages_with_retries(notification)
@@ -279,6 +358,7 @@ module Kafka
279
358
 
280
359
  operation = ProduceOperation.new(
281
360
  cluster: @cluster,
361
+ transaction_manager: @transaction_manager,
282
362
  buffer: @buffer,
283
363
  required_acks: @required_acks,
284
364
  ack_timeout: @ack_timeout,