ruby-kafka 0.7.0.alpha1 → 0.7.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3a913d7cca1f36c5d37398c39620db551399b18073ae1cc11eb110e0d32f8a39
4
- data.tar.gz: 5ccef5d293d0d8732eb592496468f8f7840a4a8a62ca1982c0c81d7b224a2f93
3
+ metadata.gz: 2ddcaacf16a7990d53463e5302a389cae33c7782d279ada36a7b32d229a302a5
4
+ data.tar.gz: 872a5d08ec43a2e4ad550b362bacbcf3c5a523de8bcea49ebc240086b7bba74d
5
5
  SHA512:
6
- metadata.gz: e21b28dd124d83ca1c470f807e21ceb1d4c117a6aa9f22103f405752f4212fc5119d969b09e58a6bd35dfe56ea7611274dd79cb7c10fe8c573ce6031b3a8a547
7
- data.tar.gz: dab421eb3da87aa62e134e16a0cafb73825df1ca09b7dd3c702d12e9235c303ec286b581b4f5c87fafc95ae19983d76db3995fb1eda5ef922ba1bae4f224bbbc
6
+ metadata.gz: 4352915740c56ff947cf91e951a9298f39a78db7c51421456a8db2c037f642ab6f791bfe3eb1bbeff01700d573e342a020b0ed7896c481996cf59d6e21d3b4f2
7
+ data.tar.gz: 8dec7bfdb51ff8321c176b29c6a3cf378b837c807aaf050b8cefe8ce5224044f016f16234f9dfae5104946882fe7817d69650206219e6640f44738c60bff682d
data/.circleci/config.yml CHANGED
@@ -11,38 +11,6 @@ jobs:
11
11
  - run: bundle exec rspec
12
12
  - run: bundle exec rubocop
13
13
 
14
- kafka-0.10:
15
- docker:
16
- - image: circleci/ruby:2.4.1-node
17
- environment:
18
- LOG_LEVEL: DEBUG
19
- - image: wurstmeister/zookeeper
20
- - image: wurstmeister/kafka:0.10.2.1
21
- environment:
22
- KAFKA_ADVERTISED_HOST_NAME: localhost
23
- KAFKA_ADVERTISED_PORT: 9092
24
- KAFKA_PORT: 9092
25
- KAFKA_ZOOKEEPER_CONNECT: localhost:2181
26
- KAFKA_DELETE_TOPIC_ENABLE: true
27
- - image: wurstmeister/kafka:0.10.2.1
28
- environment:
29
- KAFKA_ADVERTISED_HOST_NAME: localhost
30
- KAFKA_ADVERTISED_PORT: 9093
31
- KAFKA_PORT: 9093
32
- KAFKA_ZOOKEEPER_CONNECT: localhost:2181
33
- KAFKA_DELETE_TOPIC_ENABLE: true
34
- - image: wurstmeister/kafka:0.10.2.1
35
- environment:
36
- KAFKA_ADVERTISED_HOST_NAME: localhost
37
- KAFKA_ADVERTISED_PORT: 9094
38
- KAFKA_PORT: 9094
39
- KAFKA_ZOOKEEPER_CONNECT: localhost:2181
40
- KAFKA_DELETE_TOPIC_ENABLE: true
41
- steps:
42
- - checkout
43
- - run: bundle install --path vendor/bundle
44
- - run: bundle exec rspec --profile --tag functional spec/functional
45
-
46
14
  kafka-0.11:
47
15
  docker:
48
16
  - image: circleci/ruby:2.4.1-node
@@ -110,6 +78,5 @@ workflows:
110
78
  test:
111
79
  jobs:
112
80
  - unit
113
- - kafka-0.10
114
81
  - kafka-0.11
115
82
  - kafka-1.0
data/lib/kafka.rb CHANGED
@@ -42,6 +42,10 @@ module Kafka
42
42
  class CorruptMessage < ProtocolError
43
43
  end
44
44
 
45
+ # When the record array length doesn't match real number of received records
46
+ class InsufficientDataMessage < Error
47
+ end
48
+
45
49
  class UnknownError < ProtocolError
46
50
  end
47
51
 
@@ -30,13 +30,31 @@ module Kafka
30
30
  @instrumenter = instrumenter
31
31
  end
32
32
 
33
- # @param message_set [Protocol::MessageSet]
33
+ # @param record_batch [Protocol::RecordBatch]
34
34
  # @param offset [Integer] used to simulate broker behaviour in tests
35
- # @return [Protocol::MessageSet]
36
- def compress(message_set, offset: -1)
35
+ # @return [Protocol::RecordBatch]
36
+ def compress(record_batch, offset: -1)
37
+ if record_batch.is_a?(Protocol::RecordBatch)
38
+ compress_record_batch(record_batch)
39
+ else
40
+ # Deprecated message set format
41
+ compress_message_set(record_batch, offset)
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def compress_message_set(message_set, offset)
37
48
  return message_set if @codec.nil? || message_set.size < @threshold
38
49
 
39
- compressed_data = compress_data(message_set)
50
+ data = Protocol::Encoder.encode_with(message_set)
51
+ compressed_data = @codec.compress(data)
52
+
53
+ @instrumenter.instrument("compress.compressor") do |notification|
54
+ notification[:message_count] = message_set.size
55
+ notification[:uncompressed_bytesize] = data.bytesize
56
+ notification[:compressed_bytesize] = compressed_data.bytesize
57
+ end
40
58
 
41
59
  wrapper_message = Protocol::Message.new(
42
60
  value: compressed_data,
@@ -47,20 +65,21 @@ module Kafka
47
65
  Protocol::MessageSet.new(messages: [wrapper_message])
48
66
  end
49
67
 
50
- private
68
+ def compress_record_batch(record_batch)
69
+ if @codec.nil? || record_batch.size < @threshold
70
+ record_batch.codec_id = 0
71
+ return Protocol::Encoder.encode_with(record_batch)
72
+ end
51
73
 
52
- def compress_data(message_set)
53
- data = Protocol::Encoder.encode_with(message_set)
74
+ record_batch.codec_id = @codec.codec_id
75
+ data = Protocol::Encoder.encode_with(record_batch)
54
76
 
55
77
  @instrumenter.instrument("compress.compressor") do |notification|
56
- compressed_data = @codec.compress(data)
57
-
58
- notification[:message_count] = message_set.size
59
- notification[:uncompressed_bytesize] = data.bytesize
60
- notification[:compressed_bytesize] = compressed_data.bytesize
61
-
62
- compressed_data
78
+ notification[:message_count] = record_batch.size
79
+ notification[:compressed_bytesize] = data.bytesize
63
80
  end
81
+
82
+ data
64
83
  end
65
84
  end
66
85
  end
@@ -221,7 +221,7 @@ module Kafka
221
221
 
222
222
  @instrumenter.instrument("process_message.consumer", notification) do
223
223
  begin
224
- yield message
224
+ yield message unless message.is_control_record
225
225
  @current_offsets[message.topic][message.partition] = message.offset
226
226
  rescue => e
227
227
  location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
@@ -289,6 +289,9 @@ module Kafka
289
289
 
290
290
  batches.each do |batch|
291
291
  unless batch.empty?
292
+ raw_messages = batch.messages
293
+ batch.messages = raw_messages.reject(&:is_control_record)
294
+
292
295
  notification = {
293
296
  topic: batch.topic,
294
297
  partition: batch.partition,
@@ -314,9 +317,10 @@ module Kafka
314
317
  @logger.error "Exception raised when processing #{location} -- #{e.class}: #{e}\n#{backtrace}"
315
318
 
316
319
  raise ProcessingError.new(batch.topic, batch.partition, offset_range)
320
+ ensure
321
+ batch.messages = raw_messages
317
322
  end
318
323
  end
319
-
320
324
  mark_message_as_processed(batch.messages.last) if automatically_mark_as_processed
321
325
 
322
326
  # We've successfully processed a batch from the partition, so we can clear
@@ -14,7 +14,7 @@ module Kafka
14
14
  attr_reader :highwater_mark_offset
15
15
 
16
16
  # @return [Array<Kafka::FetchedMessage>]
17
- attr_reader :messages
17
+ attr_accessor :messages
18
18
 
19
19
  def initialize(topic:, partition:, highwater_mark_offset:, messages:)
20
20
  @topic = topic
@@ -33,5 +33,10 @@ module Kafka
33
33
  def create_time
34
34
  @message.create_time
35
35
  end
36
+
37
+ # @return [Boolean] whether this record is a control record
38
+ def is_control_record
39
+ @message.is_control_record
40
+ end
36
41
  end
37
42
  end
@@ -17,7 +17,7 @@ module Kafka
17
17
  end
18
18
 
19
19
  def write(value:, key:, topic:, partition:, create_time: Time.now)
20
- message = Protocol::Message.new(key: key, value: value, create_time: create_time)
20
+ message = Protocol::Record.new(key: key, value: value, create_time: create_time)
21
21
 
22
22
  buffer_for(topic, partition) << message
23
23
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/protocol/message_set"
4
+ require "kafka/protocol/record_batch"
4
5
 
5
6
  module Kafka
6
7
  # A produce operation attempts to send all messages in a buffer to the Kafka cluster.
@@ -86,12 +87,10 @@ module Kafka
86
87
 
87
88
  messages_for_topics = {}
88
89
 
89
- message_buffer.each do |topic, partition, messages|
90
- message_set = Protocol::MessageSet.new(messages: messages)
91
- message_set = @compressor.compress(message_set)
92
-
90
+ message_buffer.each do |topic, partition, records|
91
+ record_batch = Protocol::RecordBatch.new(records: records)
93
92
  messages_for_topics[topic] ||= {}
94
- messages_for_topics[topic][partition] = message_set
93
+ messages_for_topics[topic][partition] = record_batch
95
94
  end
96
95
 
97
96
  response = broker.produce(
@@ -2,11 +2,12 @@
2
2
 
3
3
  module Kafka
4
4
  module Protocol
5
-
6
5
  # A decoder wraps an IO object, making it easy to read specific data types
7
6
  # from it. The Kafka protocol is not self-describing, so a client must call
8
7
  # these methods in just the right order for things to work.
9
8
  class Decoder
9
+ VARINT_MASK = 0b10000000
10
+
10
11
  def self.from_string(str)
11
12
  new(StringIO.new(str))
12
13
  end
@@ -22,6 +23,16 @@ module Kafka
22
23
  @io.eof?
23
24
  end
24
25
 
26
+ # Get some next bytes without touching the current io offset
27
+ #
28
+ # @return [Integer]
29
+ def peek(offset, length)
30
+ data = @io.read(offset + length)
31
+ return [] if data.nil?
32
+ @io.ungetc(data)
33
+ data.bytes[offset, offset + length] || []
34
+ end
35
+
25
36
  # Decodes an 8-bit boolean from the IO object.
26
37
  #
27
38
  # @return [Boolean]
@@ -70,6 +81,15 @@ module Kafka
70
81
  size.times.map(&block)
71
82
  end
72
83
 
84
+ # Decodes an array from the IO object.
85
+ # Just like #array except the size is in varint format
86
+ #
87
+ # @return [Array]
88
+ def varint_array(&block)
89
+ size = varint
90
+ size.times.map(&block)
91
+ end
92
+
73
93
  # Decodes a string from the IO object.
74
94
  #
75
95
  # @return [String]
@@ -83,6 +103,35 @@ module Kafka
83
103
  end
84
104
  end
85
105
 
106
+ # Decodes a string from the IO object, the size is in varint format
107
+ #
108
+ # @return [String]
109
+ def varint_string
110
+ size = varint
111
+
112
+ if size == -1
113
+ nil
114
+ else
115
+ read(size)
116
+ end
117
+ end
118
+
119
+ # Read an integer under varints serializing from the IO object.
120
+ # https://developers.google.com/protocol-buffers/docs/encoding#varints
121
+ #
122
+ # @return [Integer]
123
+ def varint
124
+ group = 0
125
+ data = 0
126
+ loop do
127
+ chunk = int8
128
+ data |= (chunk & (~VARINT_MASK)) << group
129
+ group += 7
130
+ break if (chunk & VARINT_MASK) == 0
131
+ end
132
+ data & 0b1 != 0 ? ~(data >> 1) : (data >> 1)
133
+ end
134
+
86
135
  # Decodes a list of bytes from the IO object.
87
136
  #
88
137
  # @return [String]
@@ -96,6 +145,19 @@ module Kafka
96
145
  end
97
146
  end
98
147
 
148
+ # Decodes a list of bytes from the IO object. The size is in varint format
149
+ #
150
+ # @return [String]
151
+ def varint_bytes
152
+ size = varint
153
+
154
+ if size == -1
155
+ nil
156
+ else
157
+ read(size)
158
+ end
159
+ end
160
+
99
161
  # Reads the specified number of bytes from the IO object, returning them
100
162
  # as a String.
101
163
  #
@@ -4,10 +4,10 @@ require "stringio"
4
4
 
5
5
  module Kafka
6
6
  module Protocol
7
-
8
7
  # An encoder wraps an IO object, making it easy to write specific data types
9
8
  # to it.
10
9
  class Encoder
10
+ VARINT_MASK = 0b10000000
11
11
 
12
12
  # Initializes a new encoder.
13
13
  #
@@ -85,6 +85,20 @@ module Kafka
85
85
  end
86
86
  end
87
87
 
88
+ # Writes an array to the IO object.
89
+ # Just like #write_array, unless the size is under varint format
90
+ #
91
+ # @param array [Array]
92
+ # @return [nil]
93
+ def write_varint_array(array, &block)
94
+ if array.nil?
95
+ write_varint(-1)
96
+ else
97
+ write_varint(array.size)
98
+ array.each(&block)
99
+ end
100
+ end
101
+
88
102
  # Writes a string to the IO object.
89
103
  #
90
104
  # @param string [String]
@@ -98,6 +112,40 @@ module Kafka
98
112
  end
99
113
  end
100
114
 
115
+ # Writes a string to the IO object, the size is under varint format
116
+ #
117
+ # @param string [String]
118
+ # @return [nil]
119
+ def write_varint_string(string)
120
+ if string.nil?
121
+ write_varint(-1)
122
+ else
123
+ write_varint(string.bytesize)
124
+ write(string)
125
+ end
126
+ end
127
+
128
+ # Writes an integer under varints serializing to the IO object.
129
+ # https://developers.google.com/protocol-buffers/docs/encoding#varints
130
+ #
131
+ # @param string [Integer]
132
+ # @return [nil]
133
+ def write_varint(int)
134
+ int = int << 1
135
+ int = ~int | 1 if int < 0
136
+
137
+ loop do
138
+ chunk = int & (~VARINT_MASK)
139
+ int = int >> 7
140
+ if int == 0
141
+ write_int8(chunk)
142
+ return
143
+ else
144
+ write_int8(chunk | VARINT_MASK)
145
+ end
146
+ end
147
+ end
148
+
101
149
  # Writes a byte string to the IO object.
102
150
  #
103
151
  # @param bytes [String]
@@ -111,6 +159,19 @@ module Kafka
111
159
  end
112
160
  end
113
161
 
162
+ # Writes a byte string to the IO object, the size is under varint format
163
+ #
164
+ # @param bytes [String]
165
+ # @return [nil]
166
+ def write_varint_bytes(bytes)
167
+ if bytes.nil?
168
+ write_varint(-1)
169
+ else
170
+ write_varint(bytes.bytesize)
171
+ write(bytes)
172
+ end
173
+ end
174
+
114
175
  # Encodes an object into a new buffer.
115
176
  #
116
177
  # @param object [#encode] the object that will encode itself.
@@ -17,6 +17,8 @@ module Kafka
17
17
  # MaxBytes => int32
18
18
  #
19
19
  class FetchRequest
20
+ ISOLATION_READ_UNCOMMITTED = 0
21
+ ISOLATION_READ_COMMITTED = 1
20
22
 
21
23
  # @param max_wait_time [Integer]
22
24
  # @param min_bytes [Integer]
@@ -34,7 +36,7 @@ module Kafka
34
36
  end
35
37
 
36
38
  def api_version
37
- 3
39
+ 4
38
40
  end
39
41
 
40
42
  def response_class
@@ -46,6 +48,7 @@ module Kafka
46
48
  encoder.write_int32(@max_wait_time)
47
49
  encoder.write_int32(@min_bytes)
48
50
  encoder.write_int32(@max_bytes)
51
+ encoder.write_int8(ISOLATION_READ_COMMITTED)
49
52
 
50
53
  encoder.write_array(@topics) do |topic, partitions|
51
54
  encoder.write_string(topic)
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/protocol/message_set"
4
+ require "kafka/protocol/record_batch"
4
5
 
5
6
  module Kafka
6
7
  module Protocol
@@ -17,15 +18,20 @@ module Kafka
17
18
  # MessageSetSize => int32
18
19
  #
19
20
  class FetchResponse
21
+ MAGIC_BYTE_OFFSET = 16
22
+ MAGIC_BYTE_LENGTH = 1
23
+
20
24
  class FetchedPartition
21
25
  attr_reader :partition, :error_code
22
- attr_reader :highwater_mark_offset, :messages
26
+ attr_reader :highwater_mark_offset, :last_stable_offset, :aborted_transactions, :messages
23
27
 
24
- def initialize(partition:, error_code:, highwater_mark_offset:, messages:)
28
+ def initialize(partition:, error_code:, highwater_mark_offset:, last_stable_offset:, aborted_transactions:, messages:)
25
29
  @partition = partition
26
30
  @error_code = error_code
27
31
  @highwater_mark_offset = highwater_mark_offset
28
32
  @messages = messages
33
+ @last_stable_offset = last_stable_offset
34
+ @aborted_transactions = aborted_transactions
29
35
  end
30
36
  end
31
37
 
@@ -55,15 +61,36 @@ module Kafka
55
61
  partition = decoder.int32
56
62
  error_code = decoder.int16
57
63
  highwater_mark_offset = decoder.int64
64
+ last_stable_offset = decoder.int64
65
+
66
+ aborted_transactions = decoder.array do
67
+ producer_id = decoder.int64
68
+ first_offset = decoder.int64
69
+ {
70
+ producer_id: producer_id,
71
+ first_offset: first_offset
72
+ }
73
+ end
74
+
75
+ messages_decoder = Decoder.from_string(decoder.bytes)
76
+ messages = []
77
+ magic_byte = messages_decoder.peek(MAGIC_BYTE_OFFSET, MAGIC_BYTE_LENGTH)[0].to_i
58
78
 
59
- message_set_decoder = Decoder.from_string(decoder.bytes)
60
- message_set = MessageSet.decode(message_set_decoder)
79
+ if magic_byte == RecordBatch::MAGIC_BYTE
80
+ record_batch = RecordBatch.decode(messages_decoder)
81
+ messages = record_batch.records
82
+ else
83
+ message_set = MessageSet.decode(messages_decoder)
84
+ messages = message_set.messages
85
+ end
61
86
 
62
87
  FetchedPartition.new(
63
88
  partition: partition,
64
89
  error_code: error_code,
65
90
  highwater_mark_offset: highwater_mark_offset,
66
- messages: message_set.messages,
91
+ last_stable_offset: last_stable_offset,
92
+ aborted_transactions: aborted_transactions,
93
+ messages: messages
67
94
  )
68
95
  end
69
96
 
@@ -27,15 +27,17 @@ module Kafka
27
27
  # Value => bytes
28
28
  #
29
29
  class ProduceRequest
30
- attr_reader :required_acks, :timeout, :messages_for_topics
30
+ attr_reader :transactional_id, :required_acks, :timeout, :messages_for_topics, :compressor
31
31
 
32
32
  # @param required_acks [Integer]
33
33
  # @param timeout [Integer]
34
34
  # @param messages_for_topics [Hash]
35
- def initialize(required_acks:, timeout:, messages_for_topics:)
35
+ def initialize(transactional_id: nil, required_acks:, timeout:, messages_for_topics:, compressor: nil)
36
+ @transactional_id = transactional_id
36
37
  @required_acks = required_acks
37
38
  @timeout = timeout
38
39
  @messages_for_topics = messages_for_topics
40
+ @compressor = compressor
39
41
  end
40
42
 
41
43
  def api_key
@@ -43,7 +45,7 @@ module Kafka
43
45
  end
44
46
 
45
47
  def api_version
46
- 2
48
+ 3
47
49
  end
48
50
 
49
51
  def response_class
@@ -59,24 +61,32 @@ module Kafka
59
61
  end
60
62
 
61
63
  def encode(encoder)
64
+ encoder.write_string(@transactional_id)
62
65
  encoder.write_int16(@required_acks)
63
66
  encoder.write_int32(@timeout)
64
67
 
65
68
  encoder.write_array(@messages_for_topics) do |topic, messages_for_partition|
66
69
  encoder.write_string(topic)
67
70
 
68
- encoder.write_array(messages_for_partition) do |partition, message_set|
71
+ encoder.write_array(messages_for_partition) do |partition, record_batch|
69
72
  encoder.write_int32(partition)
70
73
 
71
- # When encoding the message set into the request, the bytesize of the message
72
- # set must precede the actual data. Therefore we need to encode the entire
73
- # message set into a separate buffer first.
74
- encoded_message_set = Encoder.encode_with(message_set)
75
-
76
- encoder.write_bytes(encoded_message_set)
74
+ record_batch.fulfill_relative_data
75
+ encoded_record_batch = compress(record_batch)
76
+ encoder.write_bytes(encoded_record_batch)
77
77
  end
78
78
  end
79
79
  end
80
+
81
+ private
82
+
83
+ def compress(record_batch)
84
+ if @compressor.nil?
85
+ Protocol::Encoder.encode_with(record_batch)
86
+ else
87
+ @compressor.compress(record_batch)
88
+ end
89
+ end
80
90
  end
81
91
  end
82
92
  end
@@ -0,0 +1,79 @@
1
+ module Kafka
2
+ module Protocol
3
+ class Record
4
+ attr_reader :key, :value, :headers, :attributes, :bytesize
5
+ attr_accessor :offset_delta, :timestamp_delta, :offset, :create_time, :is_control_record
6
+
7
+ def initialize(
8
+ key: nil,
9
+ value:,
10
+ headers: {},
11
+ attributes: 0,
12
+ offset_delta: 0,
13
+ timestamp_delta: 0,
14
+ create_time: Time.now,
15
+ is_control_record: false
16
+ )
17
+ @key = key
18
+ @value = value
19
+ @headers = headers
20
+ @attributes = attributes
21
+
22
+ @offset_delta = offset_delta
23
+ @timestamp_delta = timestamp_delta
24
+ @create_time = create_time
25
+ @is_control_record = is_control_record
26
+
27
+ @bytesize = @key.to_s.bytesize + @value.to_s.bytesize
28
+ end
29
+
30
+ def encode(encoder)
31
+ record_buffer = StringIO.new
32
+
33
+ record_encoder = Encoder.new(record_buffer)
34
+
35
+ record_encoder.write_int8(@attributes)
36
+ record_encoder.write_varint(@timestamp_delta)
37
+ record_encoder.write_varint(@offset_delta)
38
+
39
+ record_encoder.write_varint_string(@key)
40
+ record_encoder.write_varint_bytes(@value)
41
+
42
+ record_encoder.write_varint_array(@headers.to_a) do |header_key, header_value|
43
+ record_encoder.write_varint_string(header_key)
44
+ record_encoder.write_varint_bytes(header_value)
45
+ end
46
+
47
+ encoder.write_varint_bytes(record_buffer.string)
48
+ end
49
+
50
+ def self.decode(decoder)
51
+ record_decoder = Decoder.from_string(decoder.varint_bytes)
52
+
53
+ attributes = record_decoder.int8
54
+ timestamp_delta = record_decoder.varint
55
+ offset_delta = record_decoder.varint
56
+
57
+ key = record_decoder.varint_string
58
+ value = record_decoder.varint_bytes
59
+
60
+ headers = {}
61
+ record_decoder.varint_array do
62
+ header_key = record_decoder.varint_string
63
+ header_value = record_decoder.varint_bytes
64
+
65
+ headers[header_key] = header_value
66
+ end
67
+
68
+ new(
69
+ key: key,
70
+ value: value,
71
+ headers: headers,
72
+ attributes: attributes,
73
+ offset_delta: offset_delta,
74
+ timestamp_delta: timestamp_delta
75
+ )
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,202 @@
1
+ require 'digest/crc32'
2
+ require 'kafka/protocol/record'
3
+
4
+ module Kafka
5
+ module Protocol
6
+ class RecordBatch
7
+ MAGIC_BYTE = 2
8
+ # The size of metadata before the real record data
9
+ RECORD_BATCH_OVERHEAD = 49
10
+ # Masks to extract information from attributes
11
+ CODEC_ID_MASK = 0b00000111
12
+ IN_TRANSACTION_MASK = 0b00010000
13
+ IS_CONTROL_BATCH_MASK = 0b00100000
14
+
15
+ attr_reader :records, :first_offset, :first_timestamp, :partition_leader_epoch, :in_transaction, :is_control_batch, :last_offset_delta, :max_timestamp, :producer_id, :producer_epoch, :first_sequence
16
+
17
+ attr_accessor :codec_id
18
+
19
+ def initialize(
20
+ records: [],
21
+ first_offset: 0,
22
+ first_timestamp: Time.now,
23
+ partition_leader_epoch: 0,
24
+ codec_id: 0,
25
+ in_transaction: false,
26
+ is_control_batch: false,
27
+ last_offset_delta: 0,
28
+ producer_id: -1,
29
+ producer_epoch: 0,
30
+ first_sequence: 0,
31
+ max_timestamp: Time.now
32
+ )
33
+ @records = records
34
+ @first_offset = first_offset
35
+ @first_timestamp = first_timestamp
36
+ @codec_id = codec_id
37
+
38
+ # Records verification
39
+ @last_offset_delta = last_offset_delta
40
+ @max_timestamp = max_timestamp
41
+
42
+ # Transaction information
43
+ @producer_id = producer_id
44
+ @producer_epoch = producer_epoch
45
+
46
+ @first_sequence = first_sequence
47
+ @partition_leader_epoch = partition_leader_epoch
48
+ @in_transaction = in_transaction
49
+ @is_control_batch = is_control_batch
50
+
51
+ mark_control_record
52
+ end
53
+
54
+ def size
55
+ @records.size
56
+ end
57
+
58
+ def attributes
59
+ 0x0000 | @codec_id |
60
+ (@in_transaction ? IN_TRANSACTION_MASK : 0x0) |
61
+ (@is_control_batch ? IS_CONTROL_BATCH_MASK : 0x0)
62
+ end
63
+
64
+ def encode(encoder)
65
+ encoder.write_int64(@first_offset)
66
+
67
+ record_batch_buffer = StringIO.new
68
+ record_batch_encoder = Encoder.new(record_batch_buffer)
69
+
70
+ record_batch_encoder.write_int32(@partition_leader_epoch)
71
+ record_batch_encoder.write_int8(MAGIC_BYTE)
72
+
73
+ body = encode_record_batch_body
74
+ crc = Digest::CRC32c.checksum(body)
75
+
76
+ record_batch_encoder.write_int32(crc)
77
+ record_batch_encoder.write(body)
78
+
79
+ encoder.write_bytes(record_batch_buffer.string)
80
+ end
81
+
82
+ def encode_record_batch_body
83
+ buffer = StringIO.new
84
+ encoder = Encoder.new(buffer)
85
+
86
+ encoder.write_int16(attributes)
87
+ encoder.write_int32(@last_offset_delta)
88
+ encoder.write_int64((@first_timestamp.to_f * 1000).to_i)
89
+ encoder.write_int64((@max_timestamp.to_f * 1000).to_i)
90
+
91
+ encoder.write_int64(@producer_id)
92
+ encoder.write_int16(@producer_epoch)
93
+ encoder.write_int32(@first_sequence)
94
+
95
+ encoder.write_int32(@records.length)
96
+
97
+ records_array = encode_record_array
98
+ if compressed?
99
+ codec = Compression.find_codec_by_id(@codec_id)
100
+ records_array = codec.compress(records_array)
101
+ end
102
+ encoder.write(records_array)
103
+
104
+ buffer.string
105
+ end
106
+
107
+ def encode_record_array
108
+ buffer = StringIO.new
109
+ encoder = Encoder.new(buffer)
110
+ @records.each do |record|
111
+ record.encode(encoder)
112
+ end
113
+ buffer.string
114
+ end
115
+
116
+ def compressed?
117
+ @codec_id != 0
118
+ end
119
+
120
+ def fulfill_relative_data
121
+ first_record = records.min_by { |record| record.create_time }
122
+ @first_timestamp = first_record.nil? ? Time.now : first_record.create_time
123
+
124
+ last_record = records.max_by { |record| record.create_time }
125
+ @max_timestamp = last_record.nil? ? Time.now : last_record.create_time
126
+
127
+ records.each_with_index do |record, index|
128
+ record.offset_delta = index
129
+ record.timestamp_delta = (record.create_time - first_timestamp).to_i
130
+ end
131
+ @last_offset_delta = records.length - 1
132
+ end
133
+
134
+ def self.decode(decoder)
135
+ first_offset = decoder.int64
136
+
137
+ record_batch_raw = decoder.bytes
138
+ record_batch_decoder = Decoder.from_string(record_batch_raw)
139
+
140
+ partition_leader_epoch = record_batch_decoder.int32
141
+ # Currently, the magic byte is used to distingush legacy MessageSet and
142
+ # RecordBatch. Therefore, we don't care about magic byte here yet.
143
+ _magic_byte = record_batch_decoder.int8
144
+ _crc = record_batch_decoder.int32
145
+
146
+ attributes = record_batch_decoder.int16
147
+ codec_id = attributes & CODEC_ID_MASK
148
+ in_transaction = (attributes & IN_TRANSACTION_MASK) > 0
149
+ is_control_batch = (attributes & IS_CONTROL_BATCH_MASK) > 0
150
+
151
+ last_offset_delta = record_batch_decoder.int32
152
+ first_timestamp = Time.at(record_batch_decoder.int64 / 1000)
153
+ max_timestamp = Time.at(record_batch_decoder.int64 / 1000)
154
+
155
+ producer_id = record_batch_decoder.int64
156
+ producer_epoch = record_batch_decoder.int16
157
+ first_sequence = record_batch_decoder.int32
158
+
159
+ records_array_length = record_batch_decoder.int32
160
+ records_array_raw = record_batch_decoder.read(
161
+ record_batch_raw.size - RECORD_BATCH_OVERHEAD
162
+ )
163
+ if codec_id != 0
164
+ codec = Compression.find_codec_by_id(codec_id)
165
+ records_array_raw = codec.decompress(records_array_raw)
166
+ end
167
+
168
+ records_array_decoder = Decoder.from_string(records_array_raw)
169
+ records_array = []
170
+ until records_array_decoder.eof?
171
+ record = Record.decode(records_array_decoder)
172
+ record.offset = first_offset + record.offset_delta
173
+ record.create_time = first_timestamp + record.timestamp_delta
174
+ records_array << record
175
+ end
176
+
177
+ raise InsufficientDataMessage if records_array.length != records_array_length
178
+
179
+ new(
180
+ records: records_array,
181
+ first_offset: first_offset,
182
+ first_timestamp: first_timestamp,
183
+ partition_leader_epoch: partition_leader_epoch,
184
+ in_transaction: in_transaction,
185
+ is_control_batch: is_control_batch,
186
+ last_offset_delta: last_offset_delta,
187
+ producer_id: producer_id,
188
+ producer_epoch: producer_epoch,
189
+ first_sequence: first_sequence,
190
+ max_timestamp: max_timestamp
191
+ )
192
+ end
193
+
194
+ def mark_control_record
195
+ if in_transaction && is_control_batch
196
+ record = @records.first
197
+ record.is_control_record = true unless record.nil?
198
+ end
199
+ end
200
+ end
201
+ end
202
+ end
data/lib/kafka/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kafka
4
- VERSION = "0.7.0.alpha1"
4
+ VERSION = "0.7.0.alpha2"
5
5
  end
data/ruby-kafka.gemspec CHANGED
@@ -27,6 +27,8 @@ Gem::Specification.new do |spec|
27
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
28
  spec.require_paths = ["lib"]
29
29
 
30
+ spec.add_dependency 'digest-crc'
31
+
30
32
  spec.add_development_dependency "bundler", ">= 1.9.5"
31
33
  spec.add_development_dependency "rake", "~> 10.0"
32
34
  spec.add_development_dependency "rspec"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0.alpha1
4
+ version: 0.7.0.alpha2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
@@ -10,6 +10,20 @@ bindir: exe
10
10
  cert_chain: []
11
11
  date: 2018-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: digest-crc
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -379,6 +393,8 @@ files:
379
393
  - lib/kafka/protocol/offset_fetch_response.rb
380
394
  - lib/kafka/protocol/produce_request.rb
381
395
  - lib/kafka/protocol/produce_response.rb
396
+ - lib/kafka/protocol/record.rb
397
+ - lib/kafka/protocol/record_batch.rb
382
398
  - lib/kafka/protocol/request_message.rb
383
399
  - lib/kafka/protocol/sasl_handshake_request.rb
384
400
  - lib/kafka/protocol/sasl_handshake_response.rb