ruby-kafka 0.7.0.alpha1 → 0.7.0.alpha2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +0 -33
- data/lib/kafka.rb +4 -0
- data/lib/kafka/compressor.rb +33 -14
- data/lib/kafka/consumer.rb +6 -2
- data/lib/kafka/fetched_batch.rb +1 -1
- data/lib/kafka/fetched_message.rb +5 -0
- data/lib/kafka/message_buffer.rb +1 -1
- data/lib/kafka/produce_operation.rb +4 -5
- data/lib/kafka/protocol/decoder.rb +63 -1
- data/lib/kafka/protocol/encoder.rb +62 -1
- data/lib/kafka/protocol/fetch_request.rb +4 -1
- data/lib/kafka/protocol/fetch_response.rb +32 -5
- data/lib/kafka/protocol/produce_request.rb +20 -10
- data/lib/kafka/protocol/record.rb +79 -0
- data/lib/kafka/protocol/record_batch.rb +202 -0
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +2 -0
- metadata +17 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ddcaacf16a7990d53463e5302a389cae33c7782d279ada36a7b32d229a302a5
|
4
|
+
data.tar.gz: 872a5d08ec43a2e4ad550b362bacbcf3c5a523de8bcea49ebc240086b7bba74d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4352915740c56ff947cf91e951a9298f39a78db7c51421456a8db2c037f642ab6f791bfe3eb1bbeff01700d573e342a020b0ed7896c481996cf59d6e21d3b4f2
|
7
|
+
data.tar.gz: 8dec7bfdb51ff8321c176b29c6a3cf378b837c807aaf050b8cefe8ce5224044f016f16234f9dfae5104946882fe7817d69650206219e6640f44738c60bff682d
|
data/.circleci/config.yml
CHANGED
@@ -11,38 +11,6 @@ jobs:
|
|
11
11
|
- run: bundle exec rspec
|
12
12
|
- run: bundle exec rubocop
|
13
13
|
|
14
|
-
kafka-0.10:
|
15
|
-
docker:
|
16
|
-
- image: circleci/ruby:2.4.1-node
|
17
|
-
environment:
|
18
|
-
LOG_LEVEL: DEBUG
|
19
|
-
- image: wurstmeister/zookeeper
|
20
|
-
- image: wurstmeister/kafka:0.10.2.1
|
21
|
-
environment:
|
22
|
-
KAFKA_ADVERTISED_HOST_NAME: localhost
|
23
|
-
KAFKA_ADVERTISED_PORT: 9092
|
24
|
-
KAFKA_PORT: 9092
|
25
|
-
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
26
|
-
KAFKA_DELETE_TOPIC_ENABLE: true
|
27
|
-
- image: wurstmeister/kafka:0.10.2.1
|
28
|
-
environment:
|
29
|
-
KAFKA_ADVERTISED_HOST_NAME: localhost
|
30
|
-
KAFKA_ADVERTISED_PORT: 9093
|
31
|
-
KAFKA_PORT: 9093
|
32
|
-
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
33
|
-
KAFKA_DELETE_TOPIC_ENABLE: true
|
34
|
-
- image: wurstmeister/kafka:0.10.2.1
|
35
|
-
environment:
|
36
|
-
KAFKA_ADVERTISED_HOST_NAME: localhost
|
37
|
-
KAFKA_ADVERTISED_PORT: 9094
|
38
|
-
KAFKA_PORT: 9094
|
39
|
-
KAFKA_ZOOKEEPER_CONNECT: localhost:2181
|
40
|
-
KAFKA_DELETE_TOPIC_ENABLE: true
|
41
|
-
steps:
|
42
|
-
- checkout
|
43
|
-
- run: bundle install --path vendor/bundle
|
44
|
-
- run: bundle exec rspec --profile --tag functional spec/functional
|
45
|
-
|
46
14
|
kafka-0.11:
|
47
15
|
docker:
|
48
16
|
- image: circleci/ruby:2.4.1-node
|
@@ -110,6 +78,5 @@ workflows:
|
|
110
78
|
test:
|
111
79
|
jobs:
|
112
80
|
- unit
|
113
|
-
- kafka-0.10
|
114
81
|
- kafka-0.11
|
115
82
|
- kafka-1.0
|
data/lib/kafka.rb
CHANGED
data/lib/kafka/compressor.rb
CHANGED
@@ -30,13 +30,31 @@ module Kafka
|
|
30
30
|
@instrumenter = instrumenter
|
31
31
|
end
|
32
32
|
|
33
|
-
# @param
|
33
|
+
# @param record_batch [Protocol::RecordBatch]
|
34
34
|
# @param offset [Integer] used to simulate broker behaviour in tests
|
35
|
-
# @return [Protocol::
|
36
|
-
def compress(
|
35
|
+
# @return [Protocol::RecordBatch]
|
36
|
+
def compress(record_batch, offset: -1)
|
37
|
+
if record_batch.is_a?(Protocol::RecordBatch)
|
38
|
+
compress_record_batch(record_batch)
|
39
|
+
else
|
40
|
+
# Deprecated message set format
|
41
|
+
compress_message_set(record_batch, offset)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def compress_message_set(message_set, offset)
|
37
48
|
return message_set if @codec.nil? || message_set.size < @threshold
|
38
49
|
|
39
|
-
|
50
|
+
data = Protocol::Encoder.encode_with(message_set)
|
51
|
+
compressed_data = @codec.compress(data)
|
52
|
+
|
53
|
+
@instrumenter.instrument("compress.compressor") do |notification|
|
54
|
+
notification[:message_count] = message_set.size
|
55
|
+
notification[:uncompressed_bytesize] = data.bytesize
|
56
|
+
notification[:compressed_bytesize] = compressed_data.bytesize
|
57
|
+
end
|
40
58
|
|
41
59
|
wrapper_message = Protocol::Message.new(
|
42
60
|
value: compressed_data,
|
@@ -47,20 +65,21 @@ module Kafka
|
|
47
65
|
Protocol::MessageSet.new(messages: [wrapper_message])
|
48
66
|
end
|
49
67
|
|
50
|
-
|
68
|
+
def compress_record_batch(record_batch)
|
69
|
+
if @codec.nil? || record_batch.size < @threshold
|
70
|
+
record_batch.codec_id = 0
|
71
|
+
return Protocol::Encoder.encode_with(record_batch)
|
72
|
+
end
|
51
73
|
|
52
|
-
|
53
|
-
data = Protocol::Encoder.encode_with(
|
74
|
+
record_batch.codec_id = @codec.codec_id
|
75
|
+
data = Protocol::Encoder.encode_with(record_batch)
|
54
76
|
|
55
77
|
@instrumenter.instrument("compress.compressor") do |notification|
|
56
|
-
|
57
|
-
|
58
|
-
notification[:message_count] = message_set.size
|
59
|
-
notification[:uncompressed_bytesize] = data.bytesize
|
60
|
-
notification[:compressed_bytesize] = compressed_data.bytesize
|
61
|
-
|
62
|
-
compressed_data
|
78
|
+
notification[:message_count] = record_batch.size
|
79
|
+
notification[:compressed_bytesize] = data.bytesize
|
63
80
|
end
|
81
|
+
|
82
|
+
data
|
64
83
|
end
|
65
84
|
end
|
66
85
|
end
|
data/lib/kafka/consumer.rb
CHANGED
@@ -221,7 +221,7 @@ module Kafka
|
|
221
221
|
|
222
222
|
@instrumenter.instrument("process_message.consumer", notification) do
|
223
223
|
begin
|
224
|
-
yield message
|
224
|
+
yield message unless message.is_control_record
|
225
225
|
@current_offsets[message.topic][message.partition] = message.offset
|
226
226
|
rescue => e
|
227
227
|
location = "#{message.topic}/#{message.partition} at offset #{message.offset}"
|
@@ -289,6 +289,9 @@ module Kafka
|
|
289
289
|
|
290
290
|
batches.each do |batch|
|
291
291
|
unless batch.empty?
|
292
|
+
raw_messages = batch.messages
|
293
|
+
batch.messages = raw_messages.reject(&:is_control_record)
|
294
|
+
|
292
295
|
notification = {
|
293
296
|
topic: batch.topic,
|
294
297
|
partition: batch.partition,
|
@@ -314,9 +317,10 @@ module Kafka
|
|
314
317
|
@logger.error "Exception raised when processing #{location} -- #{e.class}: #{e}\n#{backtrace}"
|
315
318
|
|
316
319
|
raise ProcessingError.new(batch.topic, batch.partition, offset_range)
|
320
|
+
ensure
|
321
|
+
batch.messages = raw_messages
|
317
322
|
end
|
318
323
|
end
|
319
|
-
|
320
324
|
mark_message_as_processed(batch.messages.last) if automatically_mark_as_processed
|
321
325
|
|
322
326
|
# We've successfully processed a batch from the partition, so we can clear
|
data/lib/kafka/fetched_batch.rb
CHANGED
data/lib/kafka/message_buffer.rb
CHANGED
@@ -17,7 +17,7 @@ module Kafka
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def write(value:, key:, topic:, partition:, create_time: Time.now)
|
20
|
-
message = Protocol::
|
20
|
+
message = Protocol::Record.new(key: key, value: value, create_time: create_time)
|
21
21
|
|
22
22
|
buffer_for(topic, partition) << message
|
23
23
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/protocol/message_set"
|
4
|
+
require "kafka/protocol/record_batch"
|
4
5
|
|
5
6
|
module Kafka
|
6
7
|
# A produce operation attempts to send all messages in a buffer to the Kafka cluster.
|
@@ -86,12 +87,10 @@ module Kafka
|
|
86
87
|
|
87
88
|
messages_for_topics = {}
|
88
89
|
|
89
|
-
message_buffer.each do |topic, partition,
|
90
|
-
|
91
|
-
message_set = @compressor.compress(message_set)
|
92
|
-
|
90
|
+
message_buffer.each do |topic, partition, records|
|
91
|
+
record_batch = Protocol::RecordBatch.new(records: records)
|
93
92
|
messages_for_topics[topic] ||= {}
|
94
|
-
messages_for_topics[topic][partition] =
|
93
|
+
messages_for_topics[topic][partition] = record_batch
|
95
94
|
end
|
96
95
|
|
97
96
|
response = broker.produce(
|
@@ -2,11 +2,12 @@
|
|
2
2
|
|
3
3
|
module Kafka
|
4
4
|
module Protocol
|
5
|
-
|
6
5
|
# A decoder wraps an IO object, making it easy to read specific data types
|
7
6
|
# from it. The Kafka protocol is not self-describing, so a client must call
|
8
7
|
# these methods in just the right order for things to work.
|
9
8
|
class Decoder
|
9
|
+
VARINT_MASK = 0b10000000
|
10
|
+
|
10
11
|
def self.from_string(str)
|
11
12
|
new(StringIO.new(str))
|
12
13
|
end
|
@@ -22,6 +23,16 @@ module Kafka
|
|
22
23
|
@io.eof?
|
23
24
|
end
|
24
25
|
|
26
|
+
# Get some next bytes without touching the current io offset
|
27
|
+
#
|
28
|
+
# @return [Integer]
|
29
|
+
def peek(offset, length)
|
30
|
+
data = @io.read(offset + length)
|
31
|
+
return [] if data.nil?
|
32
|
+
@io.ungetc(data)
|
33
|
+
data.bytes[offset, offset + length] || []
|
34
|
+
end
|
35
|
+
|
25
36
|
# Decodes an 8-bit boolean from the IO object.
|
26
37
|
#
|
27
38
|
# @return [Boolean]
|
@@ -70,6 +81,15 @@ module Kafka
|
|
70
81
|
size.times.map(&block)
|
71
82
|
end
|
72
83
|
|
84
|
+
# Decodes an array from the IO object.
|
85
|
+
# Just like #array except the size is in varint format
|
86
|
+
#
|
87
|
+
# @return [Array]
|
88
|
+
def varint_array(&block)
|
89
|
+
size = varint
|
90
|
+
size.times.map(&block)
|
91
|
+
end
|
92
|
+
|
73
93
|
# Decodes a string from the IO object.
|
74
94
|
#
|
75
95
|
# @return [String]
|
@@ -83,6 +103,35 @@ module Kafka
|
|
83
103
|
end
|
84
104
|
end
|
85
105
|
|
106
|
+
# Decodes a string from the IO object, the size is in varint format
|
107
|
+
#
|
108
|
+
# @return [String]
|
109
|
+
def varint_string
|
110
|
+
size = varint
|
111
|
+
|
112
|
+
if size == -1
|
113
|
+
nil
|
114
|
+
else
|
115
|
+
read(size)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Read an integer under varints serializing from the IO object.
|
120
|
+
# https://developers.google.com/protocol-buffers/docs/encoding#varints
|
121
|
+
#
|
122
|
+
# @return [Integer]
|
123
|
+
def varint
|
124
|
+
group = 0
|
125
|
+
data = 0
|
126
|
+
loop do
|
127
|
+
chunk = int8
|
128
|
+
data |= (chunk & (~VARINT_MASK)) << group
|
129
|
+
group += 7
|
130
|
+
break if (chunk & VARINT_MASK) == 0
|
131
|
+
end
|
132
|
+
data & 0b1 != 0 ? ~(data >> 1) : (data >> 1)
|
133
|
+
end
|
134
|
+
|
86
135
|
# Decodes a list of bytes from the IO object.
|
87
136
|
#
|
88
137
|
# @return [String]
|
@@ -96,6 +145,19 @@ module Kafka
|
|
96
145
|
end
|
97
146
|
end
|
98
147
|
|
148
|
+
# Decodes a list of bytes from the IO object. The size is in varint format
|
149
|
+
#
|
150
|
+
# @return [String]
|
151
|
+
def varint_bytes
|
152
|
+
size = varint
|
153
|
+
|
154
|
+
if size == -1
|
155
|
+
nil
|
156
|
+
else
|
157
|
+
read(size)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
99
161
|
# Reads the specified number of bytes from the IO object, returning them
|
100
162
|
# as a String.
|
101
163
|
#
|
@@ -4,10 +4,10 @@ require "stringio"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
module Protocol
|
7
|
-
|
8
7
|
# An encoder wraps an IO object, making it easy to write specific data types
|
9
8
|
# to it.
|
10
9
|
class Encoder
|
10
|
+
VARINT_MASK = 0b10000000
|
11
11
|
|
12
12
|
# Initializes a new encoder.
|
13
13
|
#
|
@@ -85,6 +85,20 @@ module Kafka
|
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
88
|
+
# Writes an array to the IO object.
|
89
|
+
# Just like #write_array, unless the size is under varint format
|
90
|
+
#
|
91
|
+
# @param array [Array]
|
92
|
+
# @return [nil]
|
93
|
+
def write_varint_array(array, &block)
|
94
|
+
if array.nil?
|
95
|
+
write_varint(-1)
|
96
|
+
else
|
97
|
+
write_varint(array.size)
|
98
|
+
array.each(&block)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
88
102
|
# Writes a string to the IO object.
|
89
103
|
#
|
90
104
|
# @param string [String]
|
@@ -98,6 +112,40 @@ module Kafka
|
|
98
112
|
end
|
99
113
|
end
|
100
114
|
|
115
|
+
# Writes a string to the IO object, the size is under varint format
|
116
|
+
#
|
117
|
+
# @param string [String]
|
118
|
+
# @return [nil]
|
119
|
+
def write_varint_string(string)
|
120
|
+
if string.nil?
|
121
|
+
write_varint(-1)
|
122
|
+
else
|
123
|
+
write_varint(string.bytesize)
|
124
|
+
write(string)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Writes an integer under varints serializing to the IO object.
|
129
|
+
# https://developers.google.com/protocol-buffers/docs/encoding#varints
|
130
|
+
#
|
131
|
+
# @param string [Integer]
|
132
|
+
# @return [nil]
|
133
|
+
def write_varint(int)
|
134
|
+
int = int << 1
|
135
|
+
int = ~int | 1 if int < 0
|
136
|
+
|
137
|
+
loop do
|
138
|
+
chunk = int & (~VARINT_MASK)
|
139
|
+
int = int >> 7
|
140
|
+
if int == 0
|
141
|
+
write_int8(chunk)
|
142
|
+
return
|
143
|
+
else
|
144
|
+
write_int8(chunk | VARINT_MASK)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
101
149
|
# Writes a byte string to the IO object.
|
102
150
|
#
|
103
151
|
# @param bytes [String]
|
@@ -111,6 +159,19 @@ module Kafka
|
|
111
159
|
end
|
112
160
|
end
|
113
161
|
|
162
|
+
# Writes a byte string to the IO object, the size is under varint format
|
163
|
+
#
|
164
|
+
# @param bytes [String]
|
165
|
+
# @return [nil]
|
166
|
+
def write_varint_bytes(bytes)
|
167
|
+
if bytes.nil?
|
168
|
+
write_varint(-1)
|
169
|
+
else
|
170
|
+
write_varint(bytes.bytesize)
|
171
|
+
write(bytes)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
114
175
|
# Encodes an object into a new buffer.
|
115
176
|
#
|
116
177
|
# @param object [#encode] the object that will encode itself.
|
@@ -17,6 +17,8 @@ module Kafka
|
|
17
17
|
# MaxBytes => int32
|
18
18
|
#
|
19
19
|
class FetchRequest
|
20
|
+
ISOLATION_READ_UNCOMMITTED = 0
|
21
|
+
ISOLATION_READ_COMMITTED = 1
|
20
22
|
|
21
23
|
# @param max_wait_time [Integer]
|
22
24
|
# @param min_bytes [Integer]
|
@@ -34,7 +36,7 @@ module Kafka
|
|
34
36
|
end
|
35
37
|
|
36
38
|
def api_version
|
37
|
-
|
39
|
+
4
|
38
40
|
end
|
39
41
|
|
40
42
|
def response_class
|
@@ -46,6 +48,7 @@ module Kafka
|
|
46
48
|
encoder.write_int32(@max_wait_time)
|
47
49
|
encoder.write_int32(@min_bytes)
|
48
50
|
encoder.write_int32(@max_bytes)
|
51
|
+
encoder.write_int8(ISOLATION_READ_COMMITTED)
|
49
52
|
|
50
53
|
encoder.write_array(@topics) do |topic, partitions|
|
51
54
|
encoder.write_string(topic)
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "kafka/protocol/message_set"
|
4
|
+
require "kafka/protocol/record_batch"
|
4
5
|
|
5
6
|
module Kafka
|
6
7
|
module Protocol
|
@@ -17,15 +18,20 @@ module Kafka
|
|
17
18
|
# MessageSetSize => int32
|
18
19
|
#
|
19
20
|
class FetchResponse
|
21
|
+
MAGIC_BYTE_OFFSET = 16
|
22
|
+
MAGIC_BYTE_LENGTH = 1
|
23
|
+
|
20
24
|
class FetchedPartition
|
21
25
|
attr_reader :partition, :error_code
|
22
|
-
attr_reader :highwater_mark_offset, :messages
|
26
|
+
attr_reader :highwater_mark_offset, :last_stable_offset, :aborted_transactions, :messages
|
23
27
|
|
24
|
-
def initialize(partition:, error_code:, highwater_mark_offset:, messages:)
|
28
|
+
def initialize(partition:, error_code:, highwater_mark_offset:, last_stable_offset:, aborted_transactions:, messages:)
|
25
29
|
@partition = partition
|
26
30
|
@error_code = error_code
|
27
31
|
@highwater_mark_offset = highwater_mark_offset
|
28
32
|
@messages = messages
|
33
|
+
@last_stable_offset = last_stable_offset
|
34
|
+
@aborted_transactions = aborted_transactions
|
29
35
|
end
|
30
36
|
end
|
31
37
|
|
@@ -55,15 +61,36 @@ module Kafka
|
|
55
61
|
partition = decoder.int32
|
56
62
|
error_code = decoder.int16
|
57
63
|
highwater_mark_offset = decoder.int64
|
64
|
+
last_stable_offset = decoder.int64
|
65
|
+
|
66
|
+
aborted_transactions = decoder.array do
|
67
|
+
producer_id = decoder.int64
|
68
|
+
first_offset = decoder.int64
|
69
|
+
{
|
70
|
+
producer_id: producer_id,
|
71
|
+
first_offset: first_offset
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
messages_decoder = Decoder.from_string(decoder.bytes)
|
76
|
+
messages = []
|
77
|
+
magic_byte = messages_decoder.peek(MAGIC_BYTE_OFFSET, MAGIC_BYTE_LENGTH)[0].to_i
|
58
78
|
|
59
|
-
|
60
|
-
|
79
|
+
if magic_byte == RecordBatch::MAGIC_BYTE
|
80
|
+
record_batch = RecordBatch.decode(messages_decoder)
|
81
|
+
messages = record_batch.records
|
82
|
+
else
|
83
|
+
message_set = MessageSet.decode(messages_decoder)
|
84
|
+
messages = message_set.messages
|
85
|
+
end
|
61
86
|
|
62
87
|
FetchedPartition.new(
|
63
88
|
partition: partition,
|
64
89
|
error_code: error_code,
|
65
90
|
highwater_mark_offset: highwater_mark_offset,
|
66
|
-
|
91
|
+
last_stable_offset: last_stable_offset,
|
92
|
+
aborted_transactions: aborted_transactions,
|
93
|
+
messages: messages
|
67
94
|
)
|
68
95
|
end
|
69
96
|
|
@@ -27,15 +27,17 @@ module Kafka
|
|
27
27
|
# Value => bytes
|
28
28
|
#
|
29
29
|
class ProduceRequest
|
30
|
-
attr_reader :required_acks, :timeout, :messages_for_topics
|
30
|
+
attr_reader :transactional_id, :required_acks, :timeout, :messages_for_topics, :compressor
|
31
31
|
|
32
32
|
# @param required_acks [Integer]
|
33
33
|
# @param timeout [Integer]
|
34
34
|
# @param messages_for_topics [Hash]
|
35
|
-
def initialize(required_acks:, timeout:, messages_for_topics:)
|
35
|
+
def initialize(transactional_id: nil, required_acks:, timeout:, messages_for_topics:, compressor: nil)
|
36
|
+
@transactional_id = transactional_id
|
36
37
|
@required_acks = required_acks
|
37
38
|
@timeout = timeout
|
38
39
|
@messages_for_topics = messages_for_topics
|
40
|
+
@compressor = compressor
|
39
41
|
end
|
40
42
|
|
41
43
|
def api_key
|
@@ -43,7 +45,7 @@ module Kafka
|
|
43
45
|
end
|
44
46
|
|
45
47
|
def api_version
|
46
|
-
|
48
|
+
3
|
47
49
|
end
|
48
50
|
|
49
51
|
def response_class
|
@@ -59,24 +61,32 @@ module Kafka
|
|
59
61
|
end
|
60
62
|
|
61
63
|
def encode(encoder)
|
64
|
+
encoder.write_string(@transactional_id)
|
62
65
|
encoder.write_int16(@required_acks)
|
63
66
|
encoder.write_int32(@timeout)
|
64
67
|
|
65
68
|
encoder.write_array(@messages_for_topics) do |topic, messages_for_partition|
|
66
69
|
encoder.write_string(topic)
|
67
70
|
|
68
|
-
encoder.write_array(messages_for_partition) do |partition,
|
71
|
+
encoder.write_array(messages_for_partition) do |partition, record_batch|
|
69
72
|
encoder.write_int32(partition)
|
70
73
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
encoded_message_set = Encoder.encode_with(message_set)
|
75
|
-
|
76
|
-
encoder.write_bytes(encoded_message_set)
|
74
|
+
record_batch.fulfill_relative_data
|
75
|
+
encoded_record_batch = compress(record_batch)
|
76
|
+
encoder.write_bytes(encoded_record_batch)
|
77
77
|
end
|
78
78
|
end
|
79
79
|
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def compress(record_batch)
|
84
|
+
if @compressor.nil?
|
85
|
+
Protocol::Encoder.encode_with(record_batch)
|
86
|
+
else
|
87
|
+
@compressor.compress(record_batch)
|
88
|
+
end
|
89
|
+
end
|
80
90
|
end
|
81
91
|
end
|
82
92
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Kafka
|
2
|
+
module Protocol
|
3
|
+
class Record
|
4
|
+
attr_reader :key, :value, :headers, :attributes, :bytesize
|
5
|
+
attr_accessor :offset_delta, :timestamp_delta, :offset, :create_time, :is_control_record
|
6
|
+
|
7
|
+
def initialize(
|
8
|
+
key: nil,
|
9
|
+
value:,
|
10
|
+
headers: {},
|
11
|
+
attributes: 0,
|
12
|
+
offset_delta: 0,
|
13
|
+
timestamp_delta: 0,
|
14
|
+
create_time: Time.now,
|
15
|
+
is_control_record: false
|
16
|
+
)
|
17
|
+
@key = key
|
18
|
+
@value = value
|
19
|
+
@headers = headers
|
20
|
+
@attributes = attributes
|
21
|
+
|
22
|
+
@offset_delta = offset_delta
|
23
|
+
@timestamp_delta = timestamp_delta
|
24
|
+
@create_time = create_time
|
25
|
+
@is_control_record = is_control_record
|
26
|
+
|
27
|
+
@bytesize = @key.to_s.bytesize + @value.to_s.bytesize
|
28
|
+
end
|
29
|
+
|
30
|
+
def encode(encoder)
|
31
|
+
record_buffer = StringIO.new
|
32
|
+
|
33
|
+
record_encoder = Encoder.new(record_buffer)
|
34
|
+
|
35
|
+
record_encoder.write_int8(@attributes)
|
36
|
+
record_encoder.write_varint(@timestamp_delta)
|
37
|
+
record_encoder.write_varint(@offset_delta)
|
38
|
+
|
39
|
+
record_encoder.write_varint_string(@key)
|
40
|
+
record_encoder.write_varint_bytes(@value)
|
41
|
+
|
42
|
+
record_encoder.write_varint_array(@headers.to_a) do |header_key, header_value|
|
43
|
+
record_encoder.write_varint_string(header_key)
|
44
|
+
record_encoder.write_varint_bytes(header_value)
|
45
|
+
end
|
46
|
+
|
47
|
+
encoder.write_varint_bytes(record_buffer.string)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.decode(decoder)
|
51
|
+
record_decoder = Decoder.from_string(decoder.varint_bytes)
|
52
|
+
|
53
|
+
attributes = record_decoder.int8
|
54
|
+
timestamp_delta = record_decoder.varint
|
55
|
+
offset_delta = record_decoder.varint
|
56
|
+
|
57
|
+
key = record_decoder.varint_string
|
58
|
+
value = record_decoder.varint_bytes
|
59
|
+
|
60
|
+
headers = {}
|
61
|
+
record_decoder.varint_array do
|
62
|
+
header_key = record_decoder.varint_string
|
63
|
+
header_value = record_decoder.varint_bytes
|
64
|
+
|
65
|
+
headers[header_key] = header_value
|
66
|
+
end
|
67
|
+
|
68
|
+
new(
|
69
|
+
key: key,
|
70
|
+
value: value,
|
71
|
+
headers: headers,
|
72
|
+
attributes: attributes,
|
73
|
+
offset_delta: offset_delta,
|
74
|
+
timestamp_delta: timestamp_delta
|
75
|
+
)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
require 'digest/crc32'
|
2
|
+
require 'kafka/protocol/record'
|
3
|
+
|
4
|
+
module Kafka
|
5
|
+
module Protocol
|
6
|
+
class RecordBatch
|
7
|
+
MAGIC_BYTE = 2
|
8
|
+
# The size of metadata before the real record data
|
9
|
+
RECORD_BATCH_OVERHEAD = 49
|
10
|
+
# Masks to extract information from attributes
|
11
|
+
CODEC_ID_MASK = 0b00000111
|
12
|
+
IN_TRANSACTION_MASK = 0b00010000
|
13
|
+
IS_CONTROL_BATCH_MASK = 0b00100000
|
14
|
+
|
15
|
+
attr_reader :records, :first_offset, :first_timestamp, :partition_leader_epoch, :in_transaction, :is_control_batch, :last_offset_delta, :max_timestamp, :producer_id, :producer_epoch, :first_sequence
|
16
|
+
|
17
|
+
attr_accessor :codec_id
|
18
|
+
|
19
|
+
def initialize(
|
20
|
+
records: [],
|
21
|
+
first_offset: 0,
|
22
|
+
first_timestamp: Time.now,
|
23
|
+
partition_leader_epoch: 0,
|
24
|
+
codec_id: 0,
|
25
|
+
in_transaction: false,
|
26
|
+
is_control_batch: false,
|
27
|
+
last_offset_delta: 0,
|
28
|
+
producer_id: -1,
|
29
|
+
producer_epoch: 0,
|
30
|
+
first_sequence: 0,
|
31
|
+
max_timestamp: Time.now
|
32
|
+
)
|
33
|
+
@records = records
|
34
|
+
@first_offset = first_offset
|
35
|
+
@first_timestamp = first_timestamp
|
36
|
+
@codec_id = codec_id
|
37
|
+
|
38
|
+
# Records verification
|
39
|
+
@last_offset_delta = last_offset_delta
|
40
|
+
@max_timestamp = max_timestamp
|
41
|
+
|
42
|
+
# Transaction information
|
43
|
+
@producer_id = producer_id
|
44
|
+
@producer_epoch = producer_epoch
|
45
|
+
|
46
|
+
@first_sequence = first_sequence
|
47
|
+
@partition_leader_epoch = partition_leader_epoch
|
48
|
+
@in_transaction = in_transaction
|
49
|
+
@is_control_batch = is_control_batch
|
50
|
+
|
51
|
+
mark_control_record
|
52
|
+
end
|
53
|
+
|
54
|
+
def size
|
55
|
+
@records.size
|
56
|
+
end
|
57
|
+
|
58
|
+
def attributes
|
59
|
+
0x0000 | @codec_id |
|
60
|
+
(@in_transaction ? IN_TRANSACTION_MASK : 0x0) |
|
61
|
+
(@is_control_batch ? IS_CONTROL_BATCH_MASK : 0x0)
|
62
|
+
end
|
63
|
+
|
64
|
+
def encode(encoder)
|
65
|
+
encoder.write_int64(@first_offset)
|
66
|
+
|
67
|
+
record_batch_buffer = StringIO.new
|
68
|
+
record_batch_encoder = Encoder.new(record_batch_buffer)
|
69
|
+
|
70
|
+
record_batch_encoder.write_int32(@partition_leader_epoch)
|
71
|
+
record_batch_encoder.write_int8(MAGIC_BYTE)
|
72
|
+
|
73
|
+
body = encode_record_batch_body
|
74
|
+
crc = Digest::CRC32c.checksum(body)
|
75
|
+
|
76
|
+
record_batch_encoder.write_int32(crc)
|
77
|
+
record_batch_encoder.write(body)
|
78
|
+
|
79
|
+
encoder.write_bytes(record_batch_buffer.string)
|
80
|
+
end
|
81
|
+
|
82
|
+
def encode_record_batch_body
|
83
|
+
buffer = StringIO.new
|
84
|
+
encoder = Encoder.new(buffer)
|
85
|
+
|
86
|
+
encoder.write_int16(attributes)
|
87
|
+
encoder.write_int32(@last_offset_delta)
|
88
|
+
encoder.write_int64((@first_timestamp.to_f * 1000).to_i)
|
89
|
+
encoder.write_int64((@max_timestamp.to_f * 1000).to_i)
|
90
|
+
|
91
|
+
encoder.write_int64(@producer_id)
|
92
|
+
encoder.write_int16(@producer_epoch)
|
93
|
+
encoder.write_int32(@first_sequence)
|
94
|
+
|
95
|
+
encoder.write_int32(@records.length)
|
96
|
+
|
97
|
+
records_array = encode_record_array
|
98
|
+
if compressed?
|
99
|
+
codec = Compression.find_codec_by_id(@codec_id)
|
100
|
+
records_array = codec.compress(records_array)
|
101
|
+
end
|
102
|
+
encoder.write(records_array)
|
103
|
+
|
104
|
+
buffer.string
|
105
|
+
end
|
106
|
+
|
107
|
+
def encode_record_array
|
108
|
+
buffer = StringIO.new
|
109
|
+
encoder = Encoder.new(buffer)
|
110
|
+
@records.each do |record|
|
111
|
+
record.encode(encoder)
|
112
|
+
end
|
113
|
+
buffer.string
|
114
|
+
end
|
115
|
+
|
116
|
+
def compressed?
|
117
|
+
@codec_id != 0
|
118
|
+
end
|
119
|
+
|
120
|
+
def fulfill_relative_data
|
121
|
+
first_record = records.min_by { |record| record.create_time }
|
122
|
+
@first_timestamp = first_record.nil? ? Time.now : first_record.create_time
|
123
|
+
|
124
|
+
last_record = records.max_by { |record| record.create_time }
|
125
|
+
@max_timestamp = last_record.nil? ? Time.now : last_record.create_time
|
126
|
+
|
127
|
+
records.each_with_index do |record, index|
|
128
|
+
record.offset_delta = index
|
129
|
+
record.timestamp_delta = (record.create_time - first_timestamp).to_i
|
130
|
+
end
|
131
|
+
@last_offset_delta = records.length - 1
|
132
|
+
end
|
133
|
+
|
134
|
+
def self.decode(decoder)
|
135
|
+
first_offset = decoder.int64
|
136
|
+
|
137
|
+
record_batch_raw = decoder.bytes
|
138
|
+
record_batch_decoder = Decoder.from_string(record_batch_raw)
|
139
|
+
|
140
|
+
partition_leader_epoch = record_batch_decoder.int32
|
141
|
+
# Currently, the magic byte is used to distingush legacy MessageSet and
|
142
|
+
# RecordBatch. Therefore, we don't care about magic byte here yet.
|
143
|
+
_magic_byte = record_batch_decoder.int8
|
144
|
+
_crc = record_batch_decoder.int32
|
145
|
+
|
146
|
+
attributes = record_batch_decoder.int16
|
147
|
+
codec_id = attributes & CODEC_ID_MASK
|
148
|
+
in_transaction = (attributes & IN_TRANSACTION_MASK) > 0
|
149
|
+
is_control_batch = (attributes & IS_CONTROL_BATCH_MASK) > 0
|
150
|
+
|
151
|
+
last_offset_delta = record_batch_decoder.int32
|
152
|
+
first_timestamp = Time.at(record_batch_decoder.int64 / 1000)
|
153
|
+
max_timestamp = Time.at(record_batch_decoder.int64 / 1000)
|
154
|
+
|
155
|
+
producer_id = record_batch_decoder.int64
|
156
|
+
producer_epoch = record_batch_decoder.int16
|
157
|
+
first_sequence = record_batch_decoder.int32
|
158
|
+
|
159
|
+
records_array_length = record_batch_decoder.int32
|
160
|
+
records_array_raw = record_batch_decoder.read(
|
161
|
+
record_batch_raw.size - RECORD_BATCH_OVERHEAD
|
162
|
+
)
|
163
|
+
if codec_id != 0
|
164
|
+
codec = Compression.find_codec_by_id(codec_id)
|
165
|
+
records_array_raw = codec.decompress(records_array_raw)
|
166
|
+
end
|
167
|
+
|
168
|
+
records_array_decoder = Decoder.from_string(records_array_raw)
|
169
|
+
records_array = []
|
170
|
+
until records_array_decoder.eof?
|
171
|
+
record = Record.decode(records_array_decoder)
|
172
|
+
record.offset = first_offset + record.offset_delta
|
173
|
+
record.create_time = first_timestamp + record.timestamp_delta
|
174
|
+
records_array << record
|
175
|
+
end
|
176
|
+
|
177
|
+
raise InsufficientDataMessage if records_array.length != records_array_length
|
178
|
+
|
179
|
+
new(
|
180
|
+
records: records_array,
|
181
|
+
first_offset: first_offset,
|
182
|
+
first_timestamp: first_timestamp,
|
183
|
+
partition_leader_epoch: partition_leader_epoch,
|
184
|
+
in_transaction: in_transaction,
|
185
|
+
is_control_batch: is_control_batch,
|
186
|
+
last_offset_delta: last_offset_delta,
|
187
|
+
producer_id: producer_id,
|
188
|
+
producer_epoch: producer_epoch,
|
189
|
+
first_sequence: first_sequence,
|
190
|
+
max_timestamp: max_timestamp
|
191
|
+
)
|
192
|
+
end
|
193
|
+
|
194
|
+
def mark_control_record
|
195
|
+
if in_transaction && is_control_batch
|
196
|
+
record = @records.first
|
197
|
+
record.is_control_record = true unless record.nil?
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
data/lib/kafka/version.rb
CHANGED
data/ruby-kafka.gemspec
CHANGED
@@ -27,6 +27,8 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
|
+
spec.add_dependency 'digest-crc'
|
31
|
+
|
30
32
|
spec.add_development_dependency "bundler", ">= 1.9.5"
|
31
33
|
spec.add_development_dependency "rake", "~> 10.0"
|
32
34
|
spec.add_development_dependency "rspec"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.0.
|
4
|
+
version: 0.7.0.alpha2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
@@ -10,6 +10,20 @@ bindir: exe
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2018-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: digest-crc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -379,6 +393,8 @@ files:
|
|
379
393
|
- lib/kafka/protocol/offset_fetch_response.rb
|
380
394
|
- lib/kafka/protocol/produce_request.rb
|
381
395
|
- lib/kafka/protocol/produce_response.rb
|
396
|
+
- lib/kafka/protocol/record.rb
|
397
|
+
- lib/kafka/protocol/record_batch.rb
|
382
398
|
- lib/kafka/protocol/request_message.rb
|
383
399
|
- lib/kafka/protocol/sasl_handshake_request.rb
|
384
400
|
- lib/kafka/protocol/sasl_handshake_response.rb
|