ruby-kafka 0.7.0.alpha1 → 0.7.0.alpha2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +0 -33
- data/lib/kafka.rb +4 -0
- data/lib/kafka/compressor.rb +33 -14
- data/lib/kafka/consumer.rb +6 -2
- data/lib/kafka/fetched_batch.rb +1 -1
- data/lib/kafka/fetched_message.rb +5 -0
- data/lib/kafka/message_buffer.rb +1 -1
- data/lib/kafka/produce_operation.rb +4 -5
- data/lib/kafka/protocol/decoder.rb +63 -1
- data/lib/kafka/protocol/encoder.rb +62 -1
- data/lib/kafka/protocol/fetch_request.rb +4 -1
- data/lib/kafka/protocol/fetch_response.rb +32 -5
- data/lib/kafka/protocol/produce_request.rb +20 -10
- data/lib/kafka/protocol/record.rb +79 -0
- data/lib/kafka/protocol/record_batch.rb +202 -0
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +2 -0
- metadata +17 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ddcaacf16a7990d53463e5302a389cae33c7782d279ada36a7b32d229a302a5
|
4
|
+
data.tar.gz: 872a5d08ec43a2e4ad550b362bacbcf3c5a523de8bcea49ebc240086b7bba74d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4352915740c56ff947cf91e951a9298f39a78db7c51421456a8db2c037f642ab6f791bfe3eb1bbeff01700d573e342a020b0ed7896c481996cf59d6e21d3b4f2
|
7
|
+
data.tar.gz: 8dec7bfdb51ff8321c176b29c6a3cf378b837c807aaf050b8cefe8ce5224044f016f16234f9dfae5104946882fe7817d69650206219e6640f44738c60bff682d
|
data/.circleci/config.yml
CHANGED
@@ -11,38 +11,6 @@ jobs:
|
|
11
11
|
- run: bundle exec rspec
|
12
12
|
- run: bundle exec rubocop
|
13
13
|
|
14
|
-
kafka-0.10:
|
15
|
-
docker:
|
16
|
-
- image: circleci/ruby:2.4.1-node
|
17
|
-
environment:
|
18
|
-
LOG_LEVEL: DEBUG
|
19
|
-
- image: wurstmeister/zookeeper
|
20
|
-
- image: wurstmeister/kafka:0.10.2.1
|
21
|
-
environment:
|
22
|
-
KAFKA_ADVERTISED_HOST_NAME: localhost
|
23
|
-
KAFKA_ADVERTISED_PORT: 9092
|
24
|
-
KAFKA_PORT: 9092
|
25
|
-
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
26
|
-
KAFKA_DELETE_TOPIC_ENABLE: true
|
27
|
-
- image: wurstmeister/kafka:0.10.2.1
|
28
|
-
environment:
|
29
|
-
KAFKA_ADVERTISED_HOST_NAME: localhost
|
30
|
-
KAFKA_ADVERTISED_PORT: 9093
|
31
|
-
KAFKA_PORT: 9093
|
32
|
-
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
33
|
-
KAFKA_DELETE_TOPIC_ENABLE: true
|
34
|
-
- image: wurstmeister/kafka:0.10.2.1
|
35
|
-
environment:
|
36
|
-
KAFKA_ADVERTISED_HOST_NAME: localhost
|
37
|
-
KAFKA_ADVERTISED_PORT: 9094
|
38
|
-
KAFKA_PORT: 9094
|
39
|
-
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
40
|
-
KAFKA_DELETE_TOPIC_ENABLE: true
|
41
|
-
steps:
|
42
|
-
- checkout
|
43
|
-
- run: bundle install --path vendor/bundle
|
44
|
-
- run: bundle exec rspec --profile --tag functional spec/functional
|
45
|
-
|
46
14
|
kafka-0.11:
|
47
15
|
docker:
|
48
16
|
- image: circleci/ruby:2.4.1-node
|
@@ -110,6 +78,5 @@ workflows:
|
|
110
78
|
test:
|
111
79
|
jobs:
|
112
80
|
- unit
|
113
|
-
- kafka-0.10
|
114
81
|
- kafka-0.11
|
115
82
|
- kafka-1.0
|
data/lib/kafka.rb
CHANGED
data/lib/kafka/compressor.rb
CHANGED
@@ -30,13 +30,31 @@ module Kafka
|
|
30
30
|
@instrumenter = instrumenter
|
31
31
|
end
|
32
32
|
|
33
|
-
# @param
|
33
|
+
# @param record_batch [Protocol::RecordBatch]
|
34
34
|
# @param offset [Integer] used to simulate broker behaviour in tests
|
35
|
-
# @return [Protocol::
|
36
|
-
def compress(
|
35
|
+
# @return [Protocol::RecordBatch]
|
36
|
+
def compress(record_batch, offset: -1)
|
37
|
+
if record_batch.is_a?(Protocol::RecordBatch)
|
38
|
+
compress_record_batch(record_batch)
|
39
|
+
else
|
40
|
+
# Deprecated message set format
|
41
|
+
compress_message_set(record_batch, offset)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def compress_message_set(message_set, offset)
|
37
48
|
return message_set if @codec.nil? || message_set.size < @threshold
|
38
49
|
|
39
|
-
|
50
|
+
data = Protocol::Encoder.encode_with(message_set)
|
51
|
+
compressed_data = @codec.compress(data)
|
52
|
+
|
53
|
+
@instrumenter.instrument("compress.compressor") do |notification|
|
54
|
+
notification[:message_count] = message_set.size
|
55
|
+
notification[:uncompressed_bytesize] = data.bytesize
|
56
|
+
notification[:compressed_bytesize] = compressed_data.bytesize
|
57
|
+
end
|
40
58
|
|
41
59
|
wrapper_message = Protocol::Message.new(
|
42
60
|
value: compressed_data,
|
@@ -47,20 +65,21 @@ module Kafka
|
|
47
65
|
Protocol::MessageSet.new(messages: [wrapper_message])
|
48
66
|
end
|
49
67
|
|
50
|
-
|
68
|
+
def compress_record_batch(record_batch)
|
69
|
+
if @codec.nil? || record_batch.size < @threshold
|
70
|
+
record_batch.codec_id = 0
|
71
|
+
return Protocol::Encoder.encode_with(record_batch)
|
72
|
+
end
|
51
73
|
|
52
|
-
|
53
|
-
data = Protocol::Encoder.encode_with(
|
74
|
+
record_batch.codec_id = @codec.codec_id
|
75
|
+
data = Protocol::Encoder.encode_with(record_batch)
|
54
76
|
|
55
77
|
@instrumenter.instrument("compress.compressor") do |notification|
|
56
|
-
|
57
|
-
|
58
|
-
notification[:message_count] = message_set.size
|
59
|
-
notification[:uncompressed_bytesize] = data.bytesize
|
60
|
-
notification[:compressed_bytesize] = compressed_data.bytesize
|
61
|
-
|
62
|
-
compressed_data
|
78
|
+
notification[:message_count] = record_batch.size
|
79
|
+
notification[:compressed_bytesize] = data.bytesize
|
63
80
|
end
|
81
|
+
|
82
|
+
data
|
64
83
|
end
|
65
84
|
end
|
66
85
|
end
|
data/lib/kafka/consumer.rb
CHANGED
@@ -221,7 +221,7 @@ module Kafka
|
|
221
221
|
|
222
222
|
@instrumenter.instrument("process_message.consumer", notification) do
|
223
223
|
begin
|
224
|
-
yield message
|
224
|
+
yield message unless message.is_control_record
|
225
225
|
@current_offsets[message.topic][message.partition] = message.offset
|
226
226
|
rescue => e
|
227
227
|
location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
|
@@ -289,6 +289,9 @@ module Kafka
|
|
289
289
|
|
290
290
|
batches.each do |batch|
|
291
291
|
unless batch.empty?
|
292
|
+
raw_messages = batch.messages
|
293
|
+
batch.messages = raw_messages.reject(&:is_control_record)
|
294
|
+
|
292
295
|
notification = {
|
293
296
|
topic: batch.topic,
|
294
297
|
partition: batch.partition,
|
@@ -314,9 +317,10 @@ module Kafka
|
|
314
317
|
@logger.error "Exception raised when processing #{location} -- #{e.class}: #{e}\n#{backtrace}"
|
315
318
|
|
316
319
|
raise ProcessingError.new(batch.topic, batch.partition, offset_range)
|
320
|
+
ensure
|
321
|
+
batch.messages = raw_messages
|
317
322
|
end
|
318
323
|
end
|
319
|
-
|
320
324
|
mark_message_as_processed(batch.messages.last) if automatically_mark_as_processed
|
321
325
|
|
322
326
|
# We've successfully processed a batch from the partition, so we can clear
|
data/lib/kafka/fetched_batch.rb
CHANGED
data/lib/kafka/message_buffer.rb
CHANGED
@@ -17,7 +17,7 @@ module Kafka
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def write(value:, key:, topic:, partition:, create_time: Time.now)
|
20
|
-
message = Protocol::
|
20
|
+
message = Protocol::Record.new(key: key, value: value, create_time: create_time)
|
21
21
|
|
22
22
|
buffer_for(topic, partition) << message
|
23
23
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/protocol/message_set"
|
4
|
+
require "kafka/protocol/record_batch"
|
4
5
|
|
5
6
|
module Kafka
|
6
7
|
# A produce operation attempts to send all messages in a buffer to the Kafka cluster.
|
@@ -86,12 +87,10 @@ module Kafka
|
|
86
87
|
|
87
88
|
messages_for_topics = {}
|
88
89
|
|
89
|
-
message_buffer.each do |topic, partition,
|
90
|
-
|
91
|
-
message_set = @compressor.compress(message_set)
|
92
|
-
|
90
|
+
message_buffer.each do |topic, partition, records|
|
91
|
+
record_batch = Protocol::RecordBatch.new(records: records)
|
93
92
|
messages_for_topics[topic] ||= {}
|
94
|
-
messages_for_topics[topic][partition] =
|
93
|
+
messages_for_topics[topic][partition] = record_batch
|
95
94
|
end
|
96
95
|
|
97
96
|
response = broker.produce(
|
@@ -2,11 +2,12 @@
|
|
2
2
|
|
3
3
|
module Kafka
|
4
4
|
module Protocol
|
5
|
-
|
6
5
|
# A decoder wraps an IO object, making it easy to read specific data types
|
7
6
|
# from it. The Kafka protocol is not self-describing, so a client must call
|
8
7
|
# these methods in just the right order for things to work.
|
9
8
|
class Decoder
|
9
|
+
VARINT_MASK = 0b10000000
|
10
|
+
|
10
11
|
def self.from_string(str)
|
11
12
|
new(StringIO.new(str))
|
12
13
|
end
|
@@ -22,6 +23,16 @@ module Kafka
|
|
22
23
|
@io.eof?
|
23
24
|
end
|
24
25
|
|
26
|
+
# Get some next bytes without touching the current io offset
|
27
|
+
#
|
28
|
+
# @return [Integer]
|
29
|
+
def peek(offset, length)
|
30
|
+
data = @io.read(offset + length)
|
31
|
+
return [] if data.nil?
|
32
|
+
@io.ungetc(data)
|
33
|
+
data.bytes[offset, offset + length] || []
|
34
|
+
end
|
35
|
+
|
25
36
|
# Decodes an 8-bit boolean from the IO object.
|
26
37
|
#
|
27
38
|
# @return [Boolean]
|
@@ -70,6 +81,15 @@ module Kafka
|
|
70
81
|
size.times.map(&block)
|
71
82
|
end
|
72
83
|
|
84
|
+
# Decodes an array from the IO object.
|
85
|
+
# Just like #array except the size is in varint format
|
86
|
+
#
|
87
|
+
# @return [Array]
|
88
|
+
def varint_array(&block)
|
89
|
+
size = varint
|
90
|
+
size.times.map(&block)
|
91
|
+
end
|
92
|
+
|
73
93
|
# Decodes a string from the IO object.
|
74
94
|
#
|
75
95
|
# @return [String]
|
@@ -83,6 +103,35 @@ module Kafka
|
|
83
103
|
end
|
84
104
|
end
|
85
105
|
|
106
|
+
# Decodes a string from the IO object, the size is in varint format
|
107
|
+
#
|
108
|
+
# @return [String]
|
109
|
+
def varint_string
|
110
|
+
size = varint
|
111
|
+
|
112
|
+
if size == -1
|
113
|
+
nil
|
114
|
+
else
|
115
|
+
read(size)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Read an integer under varints serializing from the IO object.
|
120
|
+
# https://developers.google.com/protocol-buffers/docs/encoding#varints
|
121
|
+
#
|
122
|
+
# @return [Integer]
|
123
|
+
def varint
|
124
|
+
group = 0
|
125
|
+
data = 0
|
126
|
+
loop do
|
127
|
+
chunk = int8
|
128
|
+
data |= (chunk & (~VARINT_MASK)) << group
|
129
|
+
group += 7
|
130
|
+
break if (chunk & VARINT_MASK) == 0
|
131
|
+
end
|
132
|
+
data & 0b1 != 0 ? ~(data >> 1) : (data >> 1)
|
133
|
+
end
|
134
|
+
|
86
135
|
# Decodes a list of bytes from the IO object.
|
87
136
|
#
|
88
137
|
# @return [String]
|
@@ -96,6 +145,19 @@ module Kafka
|
|
96
145
|
end
|
97
146
|
end
|
98
147
|
|
148
|
+
# Decodes a list of bytes from the IO object. The size is in varint format
|
149
|
+
#
|
150
|
+
# @return [String]
|
151
|
+
def varint_bytes
|
152
|
+
size = varint
|
153
|
+
|
154
|
+
if size == -1
|
155
|
+
nil
|
156
|
+
else
|
157
|
+
read(size)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
99
161
|
# Reads the specified number of bytes from the IO object, returning them
|
100
162
|
# as a String.
|
101
163
|
#
|
@@ -4,10 +4,10 @@ require "stringio"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
module Protocol
|
7
|
-
|
8
7
|
# An encoder wraps an IO object, making it easy to write specific data types
|
9
8
|
# to it.
|
10
9
|
class Encoder
|
10
|
+
VARINT_MASK = 0b10000000
|
11
11
|
|
12
12
|
# Initializes a new encoder.
|
13
13
|
#
|
@@ -85,6 +85,20 @@ module Kafka
|
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
88
|
+
# Writes an array to the IO object.
|
89
|
+
# Just like #write_array, unless the size is under varint format
|
90
|
+
#
|
91
|
+
# @param array [Array]
|
92
|
+
# @return [nil]
|
93
|
+
def write_varint_array(array, &block)
|
94
|
+
if array.nil?
|
95
|
+
write_varint(-1)
|
96
|
+
else
|
97
|
+
write_varint(array.size)
|
98
|
+
array.each(&block)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
88
102
|
# Writes a string to the IO object.
|
89
103
|
#
|
90
104
|
# @param string [String]
|
@@ -98,6 +112,40 @@ module Kafka
|
|
98
112
|
end
|
99
113
|
end
|
100
114
|
|
115
|
+
# Writes a string to the IO object, the size is under varint format
|
116
|
+
#
|
117
|
+
# @param string [String]
|
118
|
+
# @return [nil]
|
119
|
+
def write_varint_string(string)
|
120
|
+
if string.nil?
|
121
|
+
write_varint(-1)
|
122
|
+
else
|
123
|
+
write_varint(string.bytesize)
|
124
|
+
write(string)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Writes an integer under varints serializing to the IO object.
|
129
|
+
# https://developers.google.com/protocol-buffers/docs/encoding#varints
|
130
|
+
#
|
131
|
+
# @param string [Integer]
|
132
|
+
# @return [nil]
|
133
|
+
def write_varint(int)
|
134
|
+
int = int << 1
|
135
|
+
int = ~int | 1 if int < 0
|
136
|
+
|
137
|
+
loop do
|
138
|
+
chunk = int & (~VARINT_MASK)
|
139
|
+
int = int >> 7
|
140
|
+
if int == 0
|
141
|
+
write_int8(chunk)
|
142
|
+
return
|
143
|
+
else
|
144
|
+
write_int8(chunk | VARINT_MASK)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
101
149
|
# Writes a byte string to the IO object.
|
102
150
|
#
|
103
151
|
# @param bytes [String]
|
@@ -111,6 +159,19 @@ module Kafka
|
|
111
159
|
end
|
112
160
|
end
|
113
161
|
|
162
|
+
# Writes a byte string to the IO object, the size is under varint format
|
163
|
+
#
|
164
|
+
# @param bytes [String]
|
165
|
+
# @return [nil]
|
166
|
+
def write_varint_bytes(bytes)
|
167
|
+
if bytes.nil?
|
168
|
+
write_varint(-1)
|
169
|
+
else
|
170
|
+
write_varint(bytes.bytesize)
|
171
|
+
write(bytes)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
114
175
|
# Encodes an object into a new buffer.
|
115
176
|
#
|
116
177
|
# @param object [#encode] the object that will encode itself.
|
@@ -17,6 +17,8 @@ module Kafka
|
|
17
17
|
# MaxBytes => int32
|
18
18
|
#
|
19
19
|
class FetchRequest
|
20
|
+
ISOLATION_READ_UNCOMMITTED = 0
|
21
|
+
ISOLATION_READ_COMMITTED = 1
|
20
22
|
|
21
23
|
# @param max_wait_time [Integer]
|
22
24
|
# @param min_bytes [Integer]
|
@@ -34,7 +36,7 @@ module Kafka
|
|
34
36
|
end
|
35
37
|
|
36
38
|
def api_version
|
37
|
-
|
39
|
+
4
|
38
40
|
end
|
39
41
|
|
40
42
|
def response_class
|
@@ -46,6 +48,7 @@ module Kafka
|
|
46
48
|
encoder.write_int32(@max_wait_time)
|
47
49
|
encoder.write_int32(@min_bytes)
|
48
50
|
encoder.write_int32(@max_bytes)
|
51
|
+
encoder.write_int8(ISOLATION_READ_COMMITTED)
|
49
52
|
|
50
53
|
encoder.write_array(@topics) do |topic, partitions|
|
51
54
|
encoder.write_string(topic)
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/protocol/message_set"
|
4
|
+
require "kafka/protocol/record_batch"
|
4
5
|
|
5
6
|
module Kafka
|
6
7
|
module Protocol
|
@@ -17,15 +18,20 @@ module Kafka
|
|
17
18
|
# MessageSetSize => int32
|
18
19
|
#
|
19
20
|
class FetchResponse
|
21
|
+
MAGIC_BYTE_OFFSET = 16
|
22
|
+
MAGIC_BYTE_LENGTH = 1
|
23
|
+
|
20
24
|
class FetchedPartition
|
21
25
|
attr_reader :partition, :error_code
|
22
|
-
attr_reader :highwater_mark_offset, :messages
|
26
|
+
attr_reader :highwater_mark_offset, :last_stable_offset, :aborted_transactions, :messages
|
23
27
|
|
24
|
-
def initialize(partition:, error_code:, highwater_mark_offset:, messages:)
|
28
|
+
def initialize(partition:, error_code:, highwater_mark_offset:, last_stable_offset:, aborted_transactions:, messages:)
|
25
29
|
@partition = partition
|
26
30
|
@error_code = error_code
|
27
31
|
@highwater_mark_offset = highwater_mark_offset
|
28
32
|
@messages = messages
|
33
|
+
@last_stable_offset = last_stable_offset
|
34
|
+
@aborted_transactions = aborted_transactions
|
29
35
|
end
|
30
36
|
end
|
31
37
|
|
@@ -55,15 +61,36 @@ module Kafka
|
|
55
61
|
partition = decoder.int32
|
56
62
|
error_code = decoder.int16
|
57
63
|
highwater_mark_offset = decoder.int64
|
64
|
+
last_stable_offset = decoder.int64
|
65
|
+
|
66
|
+
aborted_transactions = decoder.array do
|
67
|
+
producer_id = decoder.int64
|
68
|
+
first_offset = decoder.int64
|
69
|
+
{
|
70
|
+
producer_id: producer_id,
|
71
|
+
first_offset: first_offset
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
messages_decoder = Decoder.from_string(decoder.bytes)
|
76
|
+
messages = []
|
77
|
+
magic_byte = messages_decoder.peek(MAGIC_BYTE_OFFSET, MAGIC_BYTE_LENGTH)[0].to_i
|
58
78
|
|
59
|
-
|
60
|
-
|
79
|
+
if magic_byte == RecordBatch::MAGIC_BYTE
|
80
|
+
record_batch = RecordBatch.decode(messages_decoder)
|
81
|
+
messages = record_batch.records
|
82
|
+
else
|
83
|
+
message_set = MessageSet.decode(messages_decoder)
|
84
|
+
messages = message_set.messages
|
85
|
+
end
|
61
86
|
|
62
87
|
FetchedPartition.new(
|
63
88
|
partition: partition,
|
64
89
|
error_code: error_code,
|
65
90
|
highwater_mark_offset: highwater_mark_offset,
|
66
|
-
|
91
|
+
last_stable_offset: last_stable_offset,
|
92
|
+
aborted_transactions: aborted_transactions,
|
93
|
+
messages: messages
|
67
94
|
)
|
68
95
|
end
|
69
96
|
|
@@ -27,15 +27,17 @@ module Kafka
|
|
27
27
|
# Value => bytes
|
28
28
|
#
|
29
29
|
class ProduceRequest
|
30
|
-
attr_reader :required_acks, :timeout, :messages_for_topics
|
30
|
+
attr_reader :transactional_id, :required_acks, :timeout, :messages_for_topics, :compressor
|
31
31
|
|
32
32
|
# @param required_acks [Integer]
|
33
33
|
# @param timeout [Integer]
|
34
34
|
# @param messages_for_topics [Hash]
|
35
|
-
def initialize(required_acks:, timeout:, messages_for_topics:)
|
35
|
+
def initialize(transactional_id: nil, required_acks:, timeout:, messages_for_topics:, compressor: nil)
|
36
|
+
@transactional_id = transactional_id
|
36
37
|
@required_acks = required_acks
|
37
38
|
@timeout = timeout
|
38
39
|
@messages_for_topics = messages_for_topics
|
40
|
+
@compressor = compressor
|
39
41
|
end
|
40
42
|
|
41
43
|
def api_key
|
@@ -43,7 +45,7 @@ module Kafka
|
|
43
45
|
end
|
44
46
|
|
45
47
|
def api_version
|
46
|
-
|
48
|
+
3
|
47
49
|
end
|
48
50
|
|
49
51
|
def response_class
|
@@ -59,24 +61,32 @@ module Kafka
|
|
59
61
|
end
|
60
62
|
|
61
63
|
def encode(encoder)
|
64
|
+
encoder.write_string(@transactional_id)
|
62
65
|
encoder.write_int16(@required_acks)
|
63
66
|
encoder.write_int32(@timeout)
|
64
67
|
|
65
68
|
encoder.write_array(@messages_for_topics) do |topic, messages_for_partition|
|
66
69
|
encoder.write_string(topic)
|
67
70
|
|
68
|
-
encoder.write_array(messages_for_partition) do |partition,
|
71
|
+
encoder.write_array(messages_for_partition) do |partition, record_batch|
|
69
72
|
encoder.write_int32(partition)
|
70
73
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
encoded_message_set = Encoder.encode_with(message_set)
|
75
|
-
|
76
|
-
encoder.write_bytes(encoded_message_set)
|
74
|
+
record_batch.fulfill_relative_data
|
75
|
+
encoded_record_batch = compress(record_batch)
|
76
|
+
encoder.write_bytes(encoded_record_batch)
|
77
77
|
end
|
78
78
|
end
|
79
79
|
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def compress(record_batch)
|
84
|
+
if @compressor.nil?
|
85
|
+
Protocol::Encoder.encode_with(record_batch)
|
86
|
+
else
|
87
|
+
@compressor.compress(record_batch)
|
88
|
+
end
|
89
|
+
end
|
80
90
|
end
|
81
91
|
end
|
82
92
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Kafka
|
2
|
+
module Protocol
|
3
|
+
class Record
|
4
|
+
attr_reader :key, :value, :headers, :attributes, :bytesize
|
5
|
+
attr_accessor :offset_delta, :timestamp_delta, :offset, :create_time, :is_control_record
|
6
|
+
|
7
|
+
def initialize(
|
8
|
+
key: nil,
|
9
|
+
value:,
|
10
|
+
headers: {},
|
11
|
+
attributes: 0,
|
12
|
+
offset_delta: 0,
|
13
|
+
timestamp_delta: 0,
|
14
|
+
create_time: Time.now,
|
15
|
+
is_control_record: false
|
16
|
+
)
|
17
|
+
@key = key
|
18
|
+
@value = value
|
19
|
+
@headers = headers
|
20
|
+
@attributes = attributes
|
21
|
+
|
22
|
+
@offset_delta = offset_delta
|
23
|
+
@timestamp_delta = timestamp_delta
|
24
|
+
@create_time = create_time
|
25
|
+
@is_control_record = is_control_record
|
26
|
+
|
27
|
+
@bytesize = @key.to_s.bytesize + @value.to_s.bytesize
|
28
|
+
end
|
29
|
+
|
30
|
+
def encode(encoder)
|
31
|
+
record_buffer = StringIO.new
|
32
|
+
|
33
|
+
record_encoder = Encoder.new(record_buffer)
|
34
|
+
|
35
|
+
record_encoder.write_int8(@attributes)
|
36
|
+
record_encoder.write_varint(@timestamp_delta)
|
37
|
+
record_encoder.write_varint(@offset_delta)
|
38
|
+
|
39
|
+
record_encoder.write_varint_string(@key)
|
40
|
+
record_encoder.write_varint_bytes(@value)
|
41
|
+
|
42
|
+
record_encoder.write_varint_array(@headers.to_a) do |header_key, header_value|
|
43
|
+
record_encoder.write_varint_string(header_key)
|
44
|
+
record_encoder.write_varint_bytes(header_value)
|
45
|
+
end
|
46
|
+
|
47
|
+
encoder.write_varint_bytes(record_buffer.string)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.decode(decoder)
|
51
|
+
record_decoder = Decoder.from_string(decoder.varint_bytes)
|
52
|
+
|
53
|
+
attributes = record_decoder.int8
|
54
|
+
timestamp_delta = record_decoder.varint
|
55
|
+
offset_delta = record_decoder.varint
|
56
|
+
|
57
|
+
key = record_decoder.varint_string
|
58
|
+
value = record_decoder.varint_bytes
|
59
|
+
|
60
|
+
headers = {}
|
61
|
+
record_decoder.varint_array do
|
62
|
+
header_key = record_decoder.varint_string
|
63
|
+
header_value = record_decoder.varint_bytes
|
64
|
+
|
65
|
+
headers[header_key] = header_value
|
66
|
+
end
|
67
|
+
|
68
|
+
new(
|
69
|
+
key: key,
|
70
|
+
value: value,
|
71
|
+
headers: headers,
|
72
|
+
attributes: attributes,
|
73
|
+
offset_delta: offset_delta,
|
74
|
+
timestamp_delta: timestamp_delta
|
75
|
+
)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
require 'digest/crc32'
|
2
|
+
require 'kafka/protocol/record'
|
3
|
+
|
4
|
+
module Kafka
|
5
|
+
module Protocol
|
6
|
+
class RecordBatch
|
7
|
+
MAGIC_BYTE = 2
|
8
|
+
# The size of metadata before the real record data
|
9
|
+
RECORD_BATCH_OVERHEAD = 49
|
10
|
+
# Masks to extract information from attributes
|
11
|
+
CODEC_ID_MASK = 0b00000111
|
12
|
+
IN_TRANSACTION_MASK = 0b00010000
|
13
|
+
IS_CONTROL_BATCH_MASK = 0b00100000
|
14
|
+
|
15
|
+
attr_reader :records, :first_offset, :first_timestamp, :partition_leader_epoch, :in_transaction, :is_control_batch, :last_offset_delta, :max_timestamp, :producer_id, :producer_epoch, :first_sequence
|
16
|
+
|
17
|
+
attr_accessor :codec_id
|
18
|
+
|
19
|
+
def initialize(
|
20
|
+
records: [],
|
21
|
+
first_offset: 0,
|
22
|
+
first_timestamp: Time.now,
|
23
|
+
partition_leader_epoch: 0,
|
24
|
+
codec_id: 0,
|
25
|
+
in_transaction: false,
|
26
|
+
is_control_batch: false,
|
27
|
+
last_offset_delta: 0,
|
28
|
+
producer_id: -1,
|
29
|
+
producer_epoch: 0,
|
30
|
+
first_sequence: 0,
|
31
|
+
max_timestamp: Time.now
|
32
|
+
)
|
33
|
+
@records = records
|
34
|
+
@first_offset = first_offset
|
35
|
+
@first_timestamp = first_timestamp
|
36
|
+
@codec_id = codec_id
|
37
|
+
|
38
|
+
# Records verification
|
39
|
+
@last_offset_delta = last_offset_delta
|
40
|
+
@max_timestamp = max_timestamp
|
41
|
+
|
42
|
+
# Transaction information
|
43
|
+
@producer_id = producer_id
|
44
|
+
@producer_epoch = producer_epoch
|
45
|
+
|
46
|
+
@first_sequence = first_sequence
|
47
|
+
@partition_leader_epoch = partition_leader_epoch
|
48
|
+
@in_transaction = in_transaction
|
49
|
+
@is_control_batch = is_control_batch
|
50
|
+
|
51
|
+
mark_control_record
|
52
|
+
end
|
53
|
+
|
54
|
+
def size
|
55
|
+
@records.size
|
56
|
+
end
|
57
|
+
|
58
|
+
def attributes
|
59
|
+
0x0000 | @codec_id |
|
60
|
+
(@in_transaction ? IN_TRANSACTION_MASK : 0x0) |
|
61
|
+
(@is_control_batch ? IS_CONTROL_BATCH_MASK : 0x0)
|
62
|
+
end
|
63
|
+
|
64
|
+
def encode(encoder)
|
65
|
+
encoder.write_int64(@first_offset)
|
66
|
+
|
67
|
+
record_batch_buffer = StringIO.new
|
68
|
+
record_batch_encoder = Encoder.new(record_batch_buffer)
|
69
|
+
|
70
|
+
record_batch_encoder.write_int32(@partition_leader_epoch)
|
71
|
+
record_batch_encoder.write_int8(MAGIC_BYTE)
|
72
|
+
|
73
|
+
body = encode_record_batch_body
|
74
|
+
crc = Digest::CRC32c.checksum(body)
|
75
|
+
|
76
|
+
record_batch_encoder.write_int32(crc)
|
77
|
+
record_batch_encoder.write(body)
|
78
|
+
|
79
|
+
encoder.write_bytes(record_batch_buffer.string)
|
80
|
+
end
|
81
|
+
|
82
|
+
def encode_record_batch_body
|
83
|
+
buffer = StringIO.new
|
84
|
+
encoder = Encoder.new(buffer)
|
85
|
+
|
86
|
+
encoder.write_int16(attributes)
|
87
|
+
encoder.write_int32(@last_offset_delta)
|
88
|
+
encoder.write_int64((@first_timestamp.to_f * 1000).to_i)
|
89
|
+
encoder.write_int64((@max_timestamp.to_f * 1000).to_i)
|
90
|
+
|
91
|
+
encoder.write_int64(@producer_id)
|
92
|
+
encoder.write_int16(@producer_epoch)
|
93
|
+
encoder.write_int32(@first_sequence)
|
94
|
+
|
95
|
+
encoder.write_int32(@records.length)
|
96
|
+
|
97
|
+
records_array = encode_record_array
|
98
|
+
if compressed?
|
99
|
+
codec = Compression.find_codec_by_id(@codec_id)
|
100
|
+
records_array = codec.compress(records_array)
|
101
|
+
end
|
102
|
+
encoder.write(records_array)
|
103
|
+
|
104
|
+
buffer.string
|
105
|
+
end
|
106
|
+
|
107
|
+
def encode_record_array
|
108
|
+
buffer = StringIO.new
|
109
|
+
encoder = Encoder.new(buffer)
|
110
|
+
@records.each do |record|
|
111
|
+
record.encode(encoder)
|
112
|
+
end
|
113
|
+
buffer.string
|
114
|
+
end
|
115
|
+
|
116
|
+
def compressed?
|
117
|
+
@codec_id != 0
|
118
|
+
end
|
119
|
+
|
120
|
+
def fulfill_relative_data
|
121
|
+
first_record = records.min_by { |record| record.create_time }
|
122
|
+
@first_timestamp = first_record.nil? ? Time.now : first_record.create_time
|
123
|
+
|
124
|
+
last_record = records.max_by { |record| record.create_time }
|
125
|
+
@max_timestamp = last_record.nil? ? Time.now : last_record.create_time
|
126
|
+
|
127
|
+
records.each_with_index do |record, index|
|
128
|
+
record.offset_delta = index
|
129
|
+
record.timestamp_delta = (record.create_time - first_timestamp).to_i
|
130
|
+
end
|
131
|
+
@last_offset_delta = records.length - 1
|
132
|
+
end
|
133
|
+
|
134
|
+
def self.decode(decoder)
|
135
|
+
first_offset = decoder.int64
|
136
|
+
|
137
|
+
record_batch_raw = decoder.bytes
|
138
|
+
record_batch_decoder = Decoder.from_string(record_batch_raw)
|
139
|
+
|
140
|
+
partition_leader_epoch = record_batch_decoder.int32
|
141
|
+
# Currently, the magic byte is used to distingush legacy MessageSet and
|
142
|
+
# RecordBatch. Therefore, we don't care about magic byte here yet.
|
143
|
+
_magic_byte = record_batch_decoder.int8
|
144
|
+
_crc = record_batch_decoder.int32
|
145
|
+
|
146
|
+
attributes = record_batch_decoder.int16
|
147
|
+
codec_id = attributes & CODEC_ID_MASK
|
148
|
+
in_transaction = (attributes & IN_TRANSACTION_MASK) > 0
|
149
|
+
is_control_batch = (attributes & IS_CONTROL_BATCH_MASK) > 0
|
150
|
+
|
151
|
+
last_offset_delta = record_batch_decoder.int32
|
152
|
+
first_timestamp = Time.at(record_batch_decoder.int64 / 1000)
|
153
|
+
max_timestamp = Time.at(record_batch_decoder.int64 / 1000)
|
154
|
+
|
155
|
+
producer_id = record_batch_decoder.int64
|
156
|
+
producer_epoch = record_batch_decoder.int16
|
157
|
+
first_sequence = record_batch_decoder.int32
|
158
|
+
|
159
|
+
records_array_length = record_batch_decoder.int32
|
160
|
+
records_array_raw = record_batch_decoder.read(
|
161
|
+
record_batch_raw.size - RECORD_BATCH_OVERHEAD
|
162
|
+
)
|
163
|
+
if codec_id != 0
|
164
|
+
codec = Compression.find_codec_by_id(codec_id)
|
165
|
+
records_array_raw = codec.decompress(records_array_raw)
|
166
|
+
end
|
167
|
+
|
168
|
+
records_array_decoder = Decoder.from_string(records_array_raw)
|
169
|
+
records_array = []
|
170
|
+
until records_array_decoder.eof?
|
171
|
+
record = Record.decode(records_array_decoder)
|
172
|
+
record.offset = first_offset + record.offset_delta
|
173
|
+
record.create_time = first_timestamp + record.timestamp_delta
|
174
|
+
records_array << record
|
175
|
+
end
|
176
|
+
|
177
|
+
raise InsufficientDataMessage if records_array.length != records_array_length
|
178
|
+
|
179
|
+
new(
|
180
|
+
records: records_array,
|
181
|
+
first_offset: first_offset,
|
182
|
+
first_timestamp: first_timestamp,
|
183
|
+
partition_leader_epoch: partition_leader_epoch,
|
184
|
+
in_transaction: in_transaction,
|
185
|
+
is_control_batch: is_control_batch,
|
186
|
+
last_offset_delta: last_offset_delta,
|
187
|
+
producer_id: producer_id,
|
188
|
+
producer_epoch: producer_epoch,
|
189
|
+
first_sequence: first_sequence,
|
190
|
+
max_timestamp: max_timestamp
|
191
|
+
)
|
192
|
+
end
|
193
|
+
|
194
|
+
def mark_control_record
|
195
|
+
if in_transaction && is_control_batch
|
196
|
+
record = @records.first
|
197
|
+
record.is_control_record = true unless record.nil?
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
data/lib/kafka/version.rb
CHANGED
data/ruby-kafka.gemspec
CHANGED
@@ -27,6 +27,8 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
|
+
spec.add_dependency 'digest-crc'
|
31
|
+
|
30
32
|
spec.add_development_dependency "bundler", ">= 1.9.5"
|
31
33
|
spec.add_development_dependency "rake", "~> 10.0"
|
32
34
|
spec.add_development_dependency "rspec"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.0.
|
4
|
+
version: 0.7.0.alpha2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
@@ -10,6 +10,20 @@ bindir: exe
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2018-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: digest-crc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -379,6 +393,8 @@ files:
|
|
379
393
|
- lib/kafka/protocol/offset_fetch_response.rb
|
380
394
|
- lib/kafka/protocol/produce_request.rb
|
381
395
|
- lib/kafka/protocol/produce_response.rb
|
396
|
+
- lib/kafka/protocol/record.rb
|
397
|
+
- lib/kafka/protocol/record_batch.rb
|
382
398
|
- lib/kafka/protocol/request_message.rb
|
383
399
|
- lib/kafka/protocol/sasl_handshake_request.rb
|
384
400
|
- lib/kafka/protocol/sasl_handshake_response.rb
|