ruby-kafka 0.7.0.alpha1 → 0.7.0.alpha2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3a913d7cca1f36c5d37398c39620db551399b18073ae1cc11eb110e0d32f8a39
4
- data.tar.gz: 5ccef5d293d0d8732eb592496468f8f7840a4a8a62ca1982c0c81d7b224a2f93
3
+ metadata.gz: 2ddcaacf16a7990d53463e5302a389cae33c7782d279ada36a7b32d229a302a5
4
+ data.tar.gz: 872a5d08ec43a2e4ad550b362bacbcf3c5a523de8bcea49ebc240086b7bba74d
5
5
  SHA512:
6
- metadata.gz: e21b28dd124d83ca1c470f807e21ceb1d4c117a6aa9f22103f405752f4212fc5119d969b09e58a6bd35dfe56ea7611274dd79cb7c10fe8c573ce6031b3a8a547
7
- data.tar.gz: dab421eb3da87aa62e134e16a0cafb73825df1ca09b7dd3c702d12e9235c303ec286b581b4f5c87fafc95ae19983d76db3995fb1eda5ef922ba1bae4f224bbbc
6
+ metadata.gz: 4352915740c56ff947cf91e951a9298f39a78db7c51421456a8db2c037f642ab6f791bfe3eb1bbeff01700d573e342a020b0ed7896c481996cf59d6e21d3b4f2
7
+ data.tar.gz: 8dec7bfdb51ff8321c176b29c6a3cf378b837c807aaf050b8cefe8ce5224044f016f16234f9dfae5104946882fe7817d69650206219e6640f44738c60bff682d
data/.circleci/config.yml CHANGED
@@ -11,38 +11,6 @@ jobs:
11
11
  - run: bundle exec rspec
12
12
  - run: bundle exec rubocop
13
13
 
14
- kafka-0.10:
15
- docker:
16
- - image: circleci/ruby:2.4.1-node
17
- environment:
18
- LOG_LEVEL: DEBUG
19
- - image: wurstmeister/zookeeper
20
- - image: wurstmeister/kafka:0.10.2.1
21
- environment:
22
- KAFKA_ADVERTISED_HOST_NAME: localhost
23
- KAFKA_ADVERTISED_PORT: 9092
24
- KAFKA_PORT: 9092
25
- KAFKA_ZOOKEEPER_CONNECT: localhost:2181
26
- KAFKA_DELETE_TOPIC_ENABLE: true
27
- - image: wurstmeister/kafka:0.10.2.1
28
- environment:
29
- KAFKA_ADVERTISED_HOST_NAME: localhost
30
- KAFKA_ADVERTISED_PORT: 9093
31
- KAFKA_PORT: 9093
32
- KAFKA_ZOOKEEPER_CONNECT: localhost:2181
33
- KAFKA_DELETE_TOPIC_ENABLE: true
34
- - image: wurstmeister/kafka:0.10.2.1
35
- environment:
36
- KAFKA_ADVERTISED_HOST_NAME: localhost
37
- KAFKA_ADVERTISED_PORT: 9094
38
- KAFKA_PORT: 9094
39
- KAFKA_ZOOKEEPER_CONNECT: localhost:2181
40
- KAFKA_DELETE_TOPIC_ENABLE: true
41
- steps:
42
- - checkout
43
- - run: bundle install --path vendor/bundle
44
- - run: bundle exec rspec --profile --tag functional spec/functional
45
-
46
14
  kafka-0.11:
47
15
  docker:
48
16
  - image: circleci/ruby:2.4.1-node
@@ -110,6 +78,5 @@ workflows:
110
78
  test:
111
79
  jobs:
112
80
  - unit
113
- - kafka-0.10
114
81
  - kafka-0.11
115
82
  - kafka-1.0
data/lib/kafka.rb CHANGED
@@ -42,6 +42,10 @@ module Kafka
42
42
  class CorruptMessage < ProtocolError
43
43
  end
44
44
 
45
+ # When the record array length doesn't match real number of received records
46
+ class InsufficientDataMessage < Error
47
+ end
48
+
45
49
  class UnknownError < ProtocolError
46
50
  end
47
51
 
@@ -30,13 +30,31 @@ module Kafka
30
30
  @instrumenter = instrumenter
31
31
  end
32
32
 
33
- # @param message_set [Protocol::MessageSet]
33
+ # @param record_batch [Protocol::RecordBatch]
34
34
  # @param offset [Integer] used to simulate broker behaviour in tests
35
- # @return [Protocol::MessageSet]
36
- def compress(message_set, offset: -1)
35
+ # @return [Protocol::RecordBatch]
36
+ def compress(record_batch, offset: -1)
37
+ if record_batch.is_a?(Protocol::RecordBatch)
38
+ compress_record_batch(record_batch)
39
+ else
40
+ # Deprecated message set format
41
+ compress_message_set(record_batch, offset)
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def compress_message_set(message_set, offset)
37
48
  return message_set if @codec.nil? || message_set.size < @threshold
38
49
 
39
- compressed_data = compress_data(message_set)
50
+ data = Protocol::Encoder.encode_with(message_set)
51
+ compressed_data = @codec.compress(data)
52
+
53
+ @instrumenter.instrument("compress.compressor") do |notification|
54
+ notification[:message_count] = message_set.size
55
+ notification[:uncompressed_bytesize] = data.bytesize
56
+ notification[:compressed_bytesize] = compressed_data.bytesize
57
+ end
40
58
 
41
59
  wrapper_message = Protocol::Message.new(
42
60
  value: compressed_data,
@@ -47,20 +65,21 @@ module Kafka
47
65
  Protocol::MessageSet.new(messages: [wrapper_message])
48
66
  end
49
67
 
50
- private
68
+ def compress_record_batch(record_batch)
69
+ if @codec.nil? || record_batch.size < @threshold
70
+ record_batch.codec_id = 0
71
+ return Protocol::Encoder.encode_with(record_batch)
72
+ end
51
73
 
52
- def compress_data(message_set)
53
- data = Protocol::Encoder.encode_with(message_set)
74
+ record_batch.codec_id = @codec.codec_id
75
+ data = Protocol::Encoder.encode_with(record_batch)
54
76
 
55
77
  @instrumenter.instrument("compress.compressor") do |notification|
56
- compressed_data = @codec.compress(data)
57
-
58
- notification[:message_count] = message_set.size
59
- notification[:uncompressed_bytesize] = data.bytesize
60
- notification[:compressed_bytesize] = compressed_data.bytesize
61
-
62
- compressed_data
78
+ notification[:message_count] = record_batch.size
79
+ notification[:compressed_bytesize] = data.bytesize
63
80
  end
81
+
82
+ data
64
83
  end
65
84
  end
66
85
  end
@@ -221,7 +221,7 @@ module Kafka
221
221
 
222
222
  @instrumenter.instrument("process_message.consumer", notification) do
223
223
  begin
224
- yield message
224
+ yield message unless message.is_control_record
225
225
  @current_offsets[message.topic][message.partition] = message.offset
226
226
  rescue => e
227
227
  location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
@@ -289,6 +289,9 @@ module Kafka
289
289
 
290
290
  batches.each do |batch|
291
291
  unless batch.empty?
292
+ raw_messages = batch.messages
293
+ batch.messages = raw_messages.reject(&:is_control_record)
294
+
292
295
  notification = {
293
296
  topic: batch.topic,
294
297
  partition: batch.partition,
@@ -314,9 +317,10 @@ module Kafka
314
317
  @logger.error "Exception raised when processing #{location} -- #{e.class}: #{e}\n#{backtrace}"
315
318
 
316
319
  raise ProcessingError.new(batch.topic, batch.partition, offset_range)
320
+ ensure
321
+ batch.messages = raw_messages
317
322
  end
318
323
  end
319
-
320
324
  mark_message_as_processed(batch.messages.last) if automatically_mark_as_processed
321
325
 
322
326
  # We've successfully processed a batch from the partition, so we can clear
@@ -14,7 +14,7 @@ module Kafka
14
14
  attr_reader :highwater_mark_offset
15
15
 
16
16
  # @return [Array<Kafka::FetchedMessage>]
17
- attr_reader :messages
17
+ attr_accessor :messages
18
18
 
19
19
  def initialize(topic:, partition:, highwater_mark_offset:, messages:)
20
20
  @topic = topic
@@ -33,5 +33,10 @@ module Kafka
33
33
  def create_time
34
34
  @message.create_time
35
35
  end
36
+
37
+ # @return [Boolean] whether this record is a control record
38
+ def is_control_record
39
+ @message.is_control_record
40
+ end
36
41
  end
37
42
  end
@@ -17,7 +17,7 @@ module Kafka
17
17
  end
18
18
 
19
19
  def write(value:, key:, topic:, partition:, create_time: Time.now)
20
- message = Protocol::Message.new(key: key, value: value, create_time: create_time)
20
+ message = Protocol::Record.new(key: key, value: value, create_time: create_time)
21
21
 
22
22
  buffer_for(topic, partition) << message
23
23
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/protocol/message_set"
4
+ require "kafka/protocol/record_batch"
4
5
 
5
6
  module Kafka
6
7
  # A produce operation attempts to send all messages in a buffer to the Kafka cluster.
@@ -86,12 +87,10 @@ module Kafka
86
87
 
87
88
  messages_for_topics = {}
88
89
 
89
- message_buffer.each do |topic, partition, messages|
90
- message_set = Protocol::MessageSet.new(messages: messages)
91
- message_set = @compressor.compress(message_set)
92
-
90
+ message_buffer.each do |topic, partition, records|
91
+ record_batch = Protocol::RecordBatch.new(records: records)
93
92
  messages_for_topics[topic] ||= {}
94
- messages_for_topics[topic][partition] = message_set
93
+ messages_for_topics[topic][partition] = record_batch
95
94
  end
96
95
 
97
96
  response = broker.produce(
@@ -2,11 +2,12 @@
2
2
 
3
3
  module Kafka
4
4
  module Protocol
5
-
6
5
  # A decoder wraps an IO object, making it easy to read specific data types
7
6
  # from it. The Kafka protocol is not self-describing, so a client must call
8
7
  # these methods in just the right order for things to work.
9
8
  class Decoder
9
+ VARINT_MASK = 0b10000000
10
+
10
11
  def self.from_string(str)
11
12
  new(StringIO.new(str))
12
13
  end
@@ -22,6 +23,16 @@ module Kafka
22
23
  @io.eof?
23
24
  end
24
25
 
26
+ # Get some next bytes without touching the current io offset
27
+ #
28
+ # @return [Integer]
29
+ def peek(offset, length)
30
+ data = @io.read(offset + length)
31
+ return [] if data.nil?
32
+ @io.ungetc(data)
33
+ data.bytes[offset, offset + length] || []
34
+ end
35
+
25
36
  # Decodes an 8-bit boolean from the IO object.
26
37
  #
27
38
  # @return [Boolean]
@@ -70,6 +81,15 @@ module Kafka
70
81
  size.times.map(&block)
71
82
  end
72
83
 
84
+ # Decodes an array from the IO object.
85
+ # Just like #array except the size is in varint format
86
+ #
87
+ # @return [Array]
88
+ def varint_array(&block)
89
+ size = varint
90
+ size.times.map(&block)
91
+ end
92
+
73
93
  # Decodes a string from the IO object.
74
94
  #
75
95
  # @return [String]
@@ -83,6 +103,35 @@ module Kafka
83
103
  end
84
104
  end
85
105
 
106
+ # Decodes a string from the IO object, the size is in varint format
107
+ #
108
+ # @return [String]
109
+ def varint_string
110
+ size = varint
111
+
112
+ if size == -1
113
+ nil
114
+ else
115
+ read(size)
116
+ end
117
+ end
118
+
119
+ # Read an integer under varints serializing from the IO object.
120
+ # https://developers.google.com/protocol-buffers/docs/encoding#varints
121
+ #
122
+ # @return [Integer]
123
+ def varint
124
+ group = 0
125
+ data = 0
126
+ loop do
127
+ chunk = int8
128
+ data |= (chunk & (~VARINT_MASK)) << group
129
+ group += 7
130
+ break if (chunk & VARINT_MASK) == 0
131
+ end
132
+ data & 0b1 != 0 ? ~(data >> 1) : (data >> 1)
133
+ end
134
+
86
135
  # Decodes a list of bytes from the IO object.
87
136
  #
88
137
  # @return [String]
@@ -96,6 +145,19 @@ module Kafka
96
145
  end
97
146
  end
98
147
 
148
+ # Decodes a list of bytes from the IO object. The size is in varint format
149
+ #
150
+ # @return [String]
151
+ def varint_bytes
152
+ size = varint
153
+
154
+ if size == -1
155
+ nil
156
+ else
157
+ read(size)
158
+ end
159
+ end
160
+
99
161
  # Reads the specified number of bytes from the IO object, returning them
100
162
  # as a String.
101
163
  #
@@ -4,10 +4,10 @@ require "stringio"
4
4
 
5
5
  module Kafka
6
6
  module Protocol
7
-
8
7
  # An encoder wraps an IO object, making it easy to write specific data types
9
8
  # to it.
10
9
  class Encoder
10
+ VARINT_MASK = 0b10000000
11
11
 
12
12
  # Initializes a new encoder.
13
13
  #
@@ -85,6 +85,20 @@ module Kafka
85
85
  end
86
86
  end
87
87
 
88
+ # Writes an array to the IO object.
89
+ # Just like #write_array, unless the size is under varint format
90
+ #
91
+ # @param array [Array]
92
+ # @return [nil]
93
+ def write_varint_array(array, &block)
94
+ if array.nil?
95
+ write_varint(-1)
96
+ else
97
+ write_varint(array.size)
98
+ array.each(&block)
99
+ end
100
+ end
101
+
88
102
  # Writes a string to the IO object.
89
103
  #
90
104
  # @param string [String]
@@ -98,6 +112,40 @@ module Kafka
98
112
  end
99
113
  end
100
114
 
115
+ # Writes a string to the IO object, the size is under varint format
116
+ #
117
+ # @param string [String]
118
+ # @return [nil]
119
+ def write_varint_string(string)
120
+ if string.nil?
121
+ write_varint(-1)
122
+ else
123
+ write_varint(string.bytesize)
124
+ write(string)
125
+ end
126
+ end
127
+
128
+ # Writes an integer under varints serializing to the IO object.
129
+ # https://developers.google.com/protocol-buffers/docs/encoding#varints
130
+ #
131
+ # @param string [Integer]
132
+ # @return [nil]
133
+ def write_varint(int)
134
+ int = int << 1
135
+ int = ~int | 1 if int < 0
136
+
137
+ loop do
138
+ chunk = int & (~VARINT_MASK)
139
+ int = int >> 7
140
+ if int == 0
141
+ write_int8(chunk)
142
+ return
143
+ else
144
+ write_int8(chunk | VARINT_MASK)
145
+ end
146
+ end
147
+ end
148
+
101
149
  # Writes a byte string to the IO object.
102
150
  #
103
151
  # @param bytes [String]
@@ -111,6 +159,19 @@ module Kafka
111
159
  end
112
160
  end
113
161
 
162
+ # Writes a byte string to the IO object, the size is under varint format
163
+ #
164
+ # @param bytes [String]
165
+ # @return [nil]
166
+ def write_varint_bytes(bytes)
167
+ if bytes.nil?
168
+ write_varint(-1)
169
+ else
170
+ write_varint(bytes.bytesize)
171
+ write(bytes)
172
+ end
173
+ end
174
+
114
175
  # Encodes an object into a new buffer.
115
176
  #
116
177
  # @param object [#encode] the object that will encode itself.
@@ -17,6 +17,8 @@ module Kafka
17
17
  # MaxBytes => int32
18
18
  #
19
19
  class FetchRequest
20
+ ISOLATION_READ_UNCOMMITTED = 0
21
+ ISOLATION_READ_COMMITTED = 1
20
22
 
21
23
  # @param max_wait_time [Integer]
22
24
  # @param min_bytes [Integer]
@@ -34,7 +36,7 @@ module Kafka
34
36
  end
35
37
 
36
38
  def api_version
37
- 3
39
+ 4
38
40
  end
39
41
 
40
42
  def response_class
@@ -46,6 +48,7 @@ module Kafka
46
48
  encoder.write_int32(@max_wait_time)
47
49
  encoder.write_int32(@min_bytes)
48
50
  encoder.write_int32(@max_bytes)
51
+ encoder.write_int8(ISOLATION_READ_COMMITTED)
49
52
 
50
53
  encoder.write_array(@topics) do |topic, partitions|
51
54
  encoder.write_string(topic)
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/protocol/message_set"
4
+ require "kafka/protocol/record_batch"
4
5
 
5
6
  module Kafka
6
7
  module Protocol
@@ -17,15 +18,20 @@ module Kafka
17
18
  # MessageSetSize => int32
18
19
  #
19
20
  class FetchResponse
21
+ MAGIC_BYTE_OFFSET = 16
22
+ MAGIC_BYTE_LENGTH = 1
23
+
20
24
  class FetchedPartition
21
25
  attr_reader :partition, :error_code
22
- attr_reader :highwater_mark_offset, :messages
26
+ attr_reader :highwater_mark_offset, :last_stable_offset, :aborted_transactions, :messages
23
27
 
24
- def initialize(partition:, error_code:, highwater_mark_offset:, messages:)
28
+ def initialize(partition:, error_code:, highwater_mark_offset:, last_stable_offset:, aborted_transactions:, messages:)
25
29
  @partition = partition
26
30
  @error_code = error_code
27
31
  @highwater_mark_offset = highwater_mark_offset
28
32
  @messages = messages
33
+ @last_stable_offset = last_stable_offset
34
+ @aborted_transactions = aborted_transactions
29
35
  end
30
36
  end
31
37
 
@@ -55,15 +61,36 @@ module Kafka
55
61
  partition = decoder.int32
56
62
  error_code = decoder.int16
57
63
  highwater_mark_offset = decoder.int64
64
+ last_stable_offset = decoder.int64
65
+
66
+ aborted_transactions = decoder.array do
67
+ producer_id = decoder.int64
68
+ first_offset = decoder.int64
69
+ {
70
+ producer_id: producer_id,
71
+ first_offset: first_offset
72
+ }
73
+ end
74
+
75
+ messages_decoder = Decoder.from_string(decoder.bytes)
76
+ messages = []
77
+ magic_byte = messages_decoder.peek(MAGIC_BYTE_OFFSET, MAGIC_BYTE_LENGTH)[0].to_i
58
78
 
59
- message_set_decoder = Decoder.from_string(decoder.bytes)
60
- message_set = MessageSet.decode(message_set_decoder)
79
+ if magic_byte == RecordBatch::MAGIC_BYTE
80
+ record_batch = RecordBatch.decode(messages_decoder)
81
+ messages = record_batch.records
82
+ else
83
+ message_set = MessageSet.decode(messages_decoder)
84
+ messages = message_set.messages
85
+ end
61
86
 
62
87
  FetchedPartition.new(
63
88
  partition: partition,
64
89
  error_code: error_code,
65
90
  highwater_mark_offset: highwater_mark_offset,
66
- messages: message_set.messages,
91
+ last_stable_offset: last_stable_offset,
92
+ aborted_transactions: aborted_transactions,
93
+ messages: messages
67
94
  )
68
95
  end
69
96
 
@@ -27,15 +27,17 @@ module Kafka
27
27
  # Value => bytes
28
28
  #
29
29
  class ProduceRequest
30
- attr_reader :required_acks, :timeout, :messages_for_topics
30
+ attr_reader :transactional_id, :required_acks, :timeout, :messages_for_topics, :compressor
31
31
 
32
32
  # @param required_acks [Integer]
33
33
  # @param timeout [Integer]
34
34
  # @param messages_for_topics [Hash]
35
- def initialize(required_acks:, timeout:, messages_for_topics:)
35
+ def initialize(transactional_id: nil, required_acks:, timeout:, messages_for_topics:, compressor: nil)
36
+ @transactional_id = transactional_id
36
37
  @required_acks = required_acks
37
38
  @timeout = timeout
38
39
  @messages_for_topics = messages_for_topics
40
+ @compressor = compressor
39
41
  end
40
42
 
41
43
  def api_key
@@ -43,7 +45,7 @@ module Kafka
43
45
  end
44
46
 
45
47
  def api_version
46
- 2
48
+ 3
47
49
  end
48
50
 
49
51
  def response_class
@@ -59,24 +61,32 @@ module Kafka
59
61
  end
60
62
 
61
63
  def encode(encoder)
64
+ encoder.write_string(@transactional_id)
62
65
  encoder.write_int16(@required_acks)
63
66
  encoder.write_int32(@timeout)
64
67
 
65
68
  encoder.write_array(@messages_for_topics) do |topic, messages_for_partition|
66
69
  encoder.write_string(topic)
67
70
 
68
- encoder.write_array(messages_for_partition) do |partition, message_set|
71
+ encoder.write_array(messages_for_partition) do |partition, record_batch|
69
72
  encoder.write_int32(partition)
70
73
 
71
- # When encoding the message set into the request, the bytesize of the message
72
- # set must precede the actual data. Therefore we need to encode the entire
73
- # message set into a separate buffer first.
74
- encoded_message_set = Encoder.encode_with(message_set)
75
-
76
- encoder.write_bytes(encoded_message_set)
74
+ record_batch.fulfill_relative_data
75
+ encoded_record_batch = compress(record_batch)
76
+ encoder.write_bytes(encoded_record_batch)
77
77
  end
78
78
  end
79
79
  end
80
+
81
+ private
82
+
83
+ def compress(record_batch)
84
+ if @compressor.nil?
85
+ Protocol::Encoder.encode_with(record_batch)
86
+ else
87
+ @compressor.compress(record_batch)
88
+ end
89
+ end
80
90
  end
81
91
  end
82
92
  end
@@ -0,0 +1,79 @@
1
+ module Kafka
2
+ module Protocol
3
+ class Record
4
+ attr_reader :key, :value, :headers, :attributes, :bytesize
5
+ attr_accessor :offset_delta, :timestamp_delta, :offset, :create_time, :is_control_record
6
+
7
+ def initialize(
8
+ key: nil,
9
+ value:,
10
+ headers: {},
11
+ attributes: 0,
12
+ offset_delta: 0,
13
+ timestamp_delta: 0,
14
+ create_time: Time.now,
15
+ is_control_record: false
16
+ )
17
+ @key = key
18
+ @value = value
19
+ @headers = headers
20
+ @attributes = attributes
21
+
22
+ @offset_delta = offset_delta
23
+ @timestamp_delta = timestamp_delta
24
+ @create_time = create_time
25
+ @is_control_record = is_control_record
26
+
27
+ @bytesize = @key.to_s.bytesize + @value.to_s.bytesize
28
+ end
29
+
30
+ def encode(encoder)
31
+ record_buffer = StringIO.new
32
+
33
+ record_encoder = Encoder.new(record_buffer)
34
+
35
+ record_encoder.write_int8(@attributes)
36
+ record_encoder.write_varint(@timestamp_delta)
37
+ record_encoder.write_varint(@offset_delta)
38
+
39
+ record_encoder.write_varint_string(@key)
40
+ record_encoder.write_varint_bytes(@value)
41
+
42
+ record_encoder.write_varint_array(@headers.to_a) do |header_key, header_value|
43
+ record_encoder.write_varint_string(header_key)
44
+ record_encoder.write_varint_bytes(header_value)
45
+ end
46
+
47
+ encoder.write_varint_bytes(record_buffer.string)
48
+ end
49
+
50
+ def self.decode(decoder)
51
+ record_decoder = Decoder.from_string(decoder.varint_bytes)
52
+
53
+ attributes = record_decoder.int8
54
+ timestamp_delta = record_decoder.varint
55
+ offset_delta = record_decoder.varint
56
+
57
+ key = record_decoder.varint_string
58
+ value = record_decoder.varint_bytes
59
+
60
+ headers = {}
61
+ record_decoder.varint_array do
62
+ header_key = record_decoder.varint_string
63
+ header_value = record_decoder.varint_bytes
64
+
65
+ headers[header_key] = header_value
66
+ end
67
+
68
+ new(
69
+ key: key,
70
+ value: value,
71
+ headers: headers,
72
+ attributes: attributes,
73
+ offset_delta: offset_delta,
74
+ timestamp_delta: timestamp_delta
75
+ )
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,202 @@
1
+ require 'digest/crc32'
2
+ require 'kafka/protocol/record'
3
+
4
+ module Kafka
5
+ module Protocol
6
+ class RecordBatch
7
+ MAGIC_BYTE = 2
8
+ # The size of metadata before the real record data
9
+ RECORD_BATCH_OVERHEAD = 49
10
+ # Masks to extract information from attributes
11
+ CODEC_ID_MASK = 0b00000111
12
+ IN_TRANSACTION_MASK = 0b00010000
13
+ IS_CONTROL_BATCH_MASK = 0b00100000
14
+
15
+ attr_reader :records, :first_offset, :first_timestamp, :partition_leader_epoch, :in_transaction, :is_control_batch, :last_offset_delta, :max_timestamp, :producer_id, :producer_epoch, :first_sequence
16
+
17
+ attr_accessor :codec_id
18
+
19
+ def initialize(
20
+ records: [],
21
+ first_offset: 0,
22
+ first_timestamp: Time.now,
23
+ partition_leader_epoch: 0,
24
+ codec_id: 0,
25
+ in_transaction: false,
26
+ is_control_batch: false,
27
+ last_offset_delta: 0,
28
+ producer_id: -1,
29
+ producer_epoch: 0,
30
+ first_sequence: 0,
31
+ max_timestamp: Time.now
32
+ )
33
+ @records = records
34
+ @first_offset = first_offset
35
+ @first_timestamp = first_timestamp
36
+ @codec_id = codec_id
37
+
38
+ # Records verification
39
+ @last_offset_delta = last_offset_delta
40
+ @max_timestamp = max_timestamp
41
+
42
+ # Transaction information
43
+ @producer_id = producer_id
44
+ @producer_epoch = producer_epoch
45
+
46
+ @first_sequence = first_sequence
47
+ @partition_leader_epoch = partition_leader_epoch
48
+ @in_transaction = in_transaction
49
+ @is_control_batch = is_control_batch
50
+
51
+ mark_control_record
52
+ end
53
+
54
+ def size
55
+ @records.size
56
+ end
57
+
58
+ def attributes
59
+ 0x0000 | @codec_id |
60
+ (@in_transaction ? IN_TRANSACTION_MASK : 0x0) |
61
+ (@is_control_batch ? IS_CONTROL_BATCH_MASK : 0x0)
62
+ end
63
+
64
+ def encode(encoder)
65
+ encoder.write_int64(@first_offset)
66
+
67
+ record_batch_buffer = StringIO.new
68
+ record_batch_encoder = Encoder.new(record_batch_buffer)
69
+
70
+ record_batch_encoder.write_int32(@partition_leader_epoch)
71
+ record_batch_encoder.write_int8(MAGIC_BYTE)
72
+
73
+ body = encode_record_batch_body
74
+ crc = Digest::CRC32c.checksum(body)
75
+
76
+ record_batch_encoder.write_int32(crc)
77
+ record_batch_encoder.write(body)
78
+
79
+ encoder.write_bytes(record_batch_buffer.string)
80
+ end
81
+
82
+ def encode_record_batch_body
83
+ buffer = StringIO.new
84
+ encoder = Encoder.new(buffer)
85
+
86
+ encoder.write_int16(attributes)
87
+ encoder.write_int32(@last_offset_delta)
88
+ encoder.write_int64((@first_timestamp.to_f * 1000).to_i)
89
+ encoder.write_int64((@max_timestamp.to_f * 1000).to_i)
90
+
91
+ encoder.write_int64(@producer_id)
92
+ encoder.write_int16(@producer_epoch)
93
+ encoder.write_int32(@first_sequence)
94
+
95
+ encoder.write_int32(@records.length)
96
+
97
+ records_array = encode_record_array
98
+ if compressed?
99
+ codec = Compression.find_codec_by_id(@codec_id)
100
+ records_array = codec.compress(records_array)
101
+ end
102
+ encoder.write(records_array)
103
+
104
+ buffer.string
105
+ end
106
+
107
+ def encode_record_array
108
+ buffer = StringIO.new
109
+ encoder = Encoder.new(buffer)
110
+ @records.each do |record|
111
+ record.encode(encoder)
112
+ end
113
+ buffer.string
114
+ end
115
+
116
+ def compressed?
117
+ @codec_id != 0
118
+ end
119
+
120
+ def fulfill_relative_data
121
+ first_record = records.min_by { |record| record.create_time }
122
+ @first_timestamp = first_record.nil? ? Time.now : first_record.create_time
123
+
124
+ last_record = records.max_by { |record| record.create_time }
125
+ @max_timestamp = last_record.nil? ? Time.now : last_record.create_time
126
+
127
+ records.each_with_index do |record, index|
128
+ record.offset_delta = index
129
+ record.timestamp_delta = (record.create_time - first_timestamp).to_i
130
+ end
131
+ @last_offset_delta = records.length - 1
132
+ end
133
+
134
+ def self.decode(decoder)
135
+ first_offset = decoder.int64
136
+
137
+ record_batch_raw = decoder.bytes
138
+ record_batch_decoder = Decoder.from_string(record_batch_raw)
139
+
140
+ partition_leader_epoch = record_batch_decoder.int32
141
+ # Currently, the magic byte is used to distingush legacy MessageSet and
142
+ # RecordBatch. Therefore, we don't care about magic byte here yet.
143
+ _magic_byte = record_batch_decoder.int8
144
+ _crc = record_batch_decoder.int32
145
+
146
+ attributes = record_batch_decoder.int16
147
+ codec_id = attributes & CODEC_ID_MASK
148
+ in_transaction = (attributes & IN_TRANSACTION_MASK) > 0
149
+ is_control_batch = (attributes & IS_CONTROL_BATCH_MASK) > 0
150
+
151
+ last_offset_delta = record_batch_decoder.int32
152
+ first_timestamp = Time.at(record_batch_decoder.int64 / 1000)
153
+ max_timestamp = Time.at(record_batch_decoder.int64 / 1000)
154
+
155
+ producer_id = record_batch_decoder.int64
156
+ producer_epoch = record_batch_decoder.int16
157
+ first_sequence = record_batch_decoder.int32
158
+
159
+ records_array_length = record_batch_decoder.int32
160
+ records_array_raw = record_batch_decoder.read(
161
+ record_batch_raw.size - RECORD_BATCH_OVERHEAD
162
+ )
163
+ if codec_id != 0
164
+ codec = Compression.find_codec_by_id(codec_id)
165
+ records_array_raw = codec.decompress(records_array_raw)
166
+ end
167
+
168
+ records_array_decoder = Decoder.from_string(records_array_raw)
169
+ records_array = []
170
+ until records_array_decoder.eof?
171
+ record = Record.decode(records_array_decoder)
172
+ record.offset = first_offset + record.offset_delta
173
+ record.create_time = first_timestamp + record.timestamp_delta
174
+ records_array << record
175
+ end
176
+
177
+ raise InsufficientDataMessage if records_array.length != records_array_length
178
+
179
+ new(
180
+ records: records_array,
181
+ first_offset: first_offset,
182
+ first_timestamp: first_timestamp,
183
+ partition_leader_epoch: partition_leader_epoch,
184
+ in_transaction: in_transaction,
185
+ is_control_batch: is_control_batch,
186
+ last_offset_delta: last_offset_delta,
187
+ producer_id: producer_id,
188
+ producer_epoch: producer_epoch,
189
+ first_sequence: first_sequence,
190
+ max_timestamp: max_timestamp
191
+ )
192
+ end
193
+
194
+ def mark_control_record
195
+ if in_transaction && is_control_batch
196
+ record = @records.first
197
+ record.is_control_record = true unless record.nil?
198
+ end
199
+ end
200
+ end
201
+ end
202
+ end
data/lib/kafka/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kafka
4
- VERSION = "0.7.0.alpha1"
4
+ VERSION = "0.7.0.alpha2"
5
5
  end
data/ruby-kafka.gemspec CHANGED
@@ -27,6 +27,8 @@ Gem::Specification.new do |spec|
27
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
28
  spec.require_paths = ["lib"]
29
29
 
30
+ spec.add_dependency 'digest-crc'
31
+
30
32
  spec.add_development_dependency "bundler", ">= 1.9.5"
31
33
  spec.add_development_dependency "rake", "~> 10.0"
32
34
  spec.add_development_dependency "rspec"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0.alpha1
4
+ version: 0.7.0.alpha2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
@@ -10,6 +10,20 @@ bindir: exe
10
10
  cert_chain: []
11
11
  date: 2018-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: digest-crc
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -379,6 +393,8 @@ files:
379
393
  - lib/kafka/protocol/offset_fetch_response.rb
380
394
  - lib/kafka/protocol/produce_request.rb
381
395
  - lib/kafka/protocol/produce_response.rb
396
+ - lib/kafka/protocol/record.rb
397
+ - lib/kafka/protocol/record_batch.rb
382
398
  - lib/kafka/protocol/request_message.rb
383
399
  - lib/kafka/protocol/sasl_handshake_request.rb
384
400
  - lib/kafka/protocol/sasl_handshake_response.rb