leffen-kafka-rb 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +202 -0
- data/README.md +123 -0
- data/Rakefile +40 -0
- data/bin/leffen-kafka-consumer +6 -0
- data/bin/leffen-kafka-publish +6 -0
- data/lib/kafka.rb +40 -0
- data/lib/kafka/batch.rb +28 -0
- data/lib/kafka/cli.rb +170 -0
- data/lib/kafka/consumer.rb +104 -0
- data/lib/kafka/encoder.rb +59 -0
- data/lib/kafka/error_codes.rb +35 -0
- data/lib/kafka/io.rb +57 -0
- data/lib/kafka/message.rb +209 -0
- data/lib/kafka/multi_producer.rb +35 -0
- data/lib/kafka/producer.rb +42 -0
- data/lib/kafka/producer_request.rb +26 -0
- data/lib/kafka/request_type.rb +23 -0
- data/lib/leffen-kafka.rb +16 -0
- data/spec/batch_spec.rb +35 -0
- data/spec/cli_spec.rb +133 -0
- data/spec/consumer_spec.rb +146 -0
- data/spec/encoder_spec.rb +251 -0
- data/spec/io_spec.rb +88 -0
- data/spec/kafka_spec.rb +20 -0
- data/spec/message_spec.rb +227 -0
- data/spec/multi_producer_spec.rb +74 -0
- data/spec/producer_request_spec.rb +38 -0
- data/spec/producer_spec.rb +71 -0
- data/spec/spec_helper.rb +18 -0
- metadata +107 -0
data/lib/kafka/cli.rb
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
require 'optparse'
|
17
|
+
|
18
|
+
module Kafka
|
19
|
+
module CLI #:nodoc: all
|
20
|
+
extend self
|
21
|
+
|
22
|
+
def publish!
|
23
|
+
read_env
|
24
|
+
parse_args
|
25
|
+
validate_config
|
26
|
+
if config[:message]
|
27
|
+
push(config, config.delete(:message))
|
28
|
+
else
|
29
|
+
publish(config)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def subscribe!
|
35
|
+
read_env
|
36
|
+
parse_args
|
37
|
+
validate_config
|
38
|
+
subscribe(config)
|
39
|
+
end
|
40
|
+
|
41
|
+
def validate_config
|
42
|
+
if config[:help]
|
43
|
+
puts help
|
44
|
+
exit
|
45
|
+
end
|
46
|
+
config[:host] ||= IO::HOST
|
47
|
+
config[:port] ||= IO::PORT
|
48
|
+
config[:topic].is_a?(String) or raise "Missing topic"
|
49
|
+
|
50
|
+
rescue RuntimeError => e
|
51
|
+
puts e.message
|
52
|
+
puts help
|
53
|
+
exit
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_args(args = ARGV)
|
57
|
+
option_parser.parse(args)
|
58
|
+
end
|
59
|
+
|
60
|
+
def read_env(env = ENV)
|
61
|
+
config[:host] = env["KAFKA_HOST"] if env["KAFKA_HOST"]
|
62
|
+
config[:port] = env["KAFKA_PORT"].to_i if env["KAFKA_PORT"]
|
63
|
+
config[:topic] = env["KAFKA_TOPIC"] if env["KAFKA_TOPIC"]
|
64
|
+
config[:compression] = string_to_compression(env["KAFKA_COMPRESSION"]) if env["KAFKA_COMPRESSION"]
|
65
|
+
end
|
66
|
+
|
67
|
+
def config
|
68
|
+
@config ||= {:compression => string_to_compression("no")}
|
69
|
+
end
|
70
|
+
|
71
|
+
def help
|
72
|
+
option_parser.to_s
|
73
|
+
end
|
74
|
+
|
75
|
+
def option_parser
|
76
|
+
OptionParser.new do |opts|
|
77
|
+
opts.banner = "Usage: #{program_name} [options]"
|
78
|
+
opts.separator ""
|
79
|
+
|
80
|
+
opts.on("-h","--host HOST", "Set the kafka hostname") do |h|
|
81
|
+
config[:host] = h
|
82
|
+
end
|
83
|
+
|
84
|
+
opts.on("-p", "--port PORT", "Set the kafka port") do |port|
|
85
|
+
config[:port] = port.to_i
|
86
|
+
end
|
87
|
+
|
88
|
+
opts.on("-t", "--topic TOPIC", "Set the kafka topic") do |topic|
|
89
|
+
config[:topic] = topic
|
90
|
+
end
|
91
|
+
|
92
|
+
opts.on("-c", "--compression no|gzip|snappy", "Set the compression method") do |meth|
|
93
|
+
config[:compression] = string_to_compression(meth)
|
94
|
+
end if publish?
|
95
|
+
|
96
|
+
opts.on("-m","--message MESSAGE", "Message to send") do |msg|
|
97
|
+
config[:message] = msg
|
98
|
+
end if publish?
|
99
|
+
|
100
|
+
opts.separator ""
|
101
|
+
|
102
|
+
opts.on("--help", "show the help") do
|
103
|
+
config[:help] = true
|
104
|
+
end
|
105
|
+
|
106
|
+
opts.separator ""
|
107
|
+
opts.separator "You can set the host, port, topic and compression from the environment variables: KAFKA_HOST, KAFKA_PORT, KAFKA_TOPIC AND KAFKA_COMPRESSION"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def publish?
|
112
|
+
program_name == "leffen-kafka-publish"
|
113
|
+
end
|
114
|
+
|
115
|
+
def subscribe?
|
116
|
+
program_name == "kafka-subscribe"
|
117
|
+
end
|
118
|
+
|
119
|
+
def program_name(pn = $0)
|
120
|
+
File.basename(pn)
|
121
|
+
end
|
122
|
+
|
123
|
+
def string_to_compression(meth)
|
124
|
+
case meth
|
125
|
+
when "no" then Message::NO_COMPRESSION
|
126
|
+
when "gzip" then Message::GZIP_COMPRESSION
|
127
|
+
when "snappy" then Message::SNAPPY_COMPRESSION
|
128
|
+
else raise "No supported compression"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def push(options, message)
|
133
|
+
Producer.new(options).push(Message.new(message))
|
134
|
+
end
|
135
|
+
|
136
|
+
def publish(options)
|
137
|
+
trap(:INT){ exit }
|
138
|
+
producer = Producer.new(options)
|
139
|
+
loop do
|
140
|
+
publish_loop(producer)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def publish_loop(producer)
|
145
|
+
message = read_input
|
146
|
+
producer.push(Message.new(message))
|
147
|
+
end
|
148
|
+
|
149
|
+
def read_input
|
150
|
+
input = $stdin.gets
|
151
|
+
if input
|
152
|
+
input.strip
|
153
|
+
else
|
154
|
+
exit # gets return nil when eof
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
def subscribe(options)
|
160
|
+
trap(:INT){ exit }
|
161
|
+
consumer = Consumer.new(options)
|
162
|
+
consumer.loop do |messages|
|
163
|
+
messages.each do |message|
|
164
|
+
puts message.payload
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
module Kafka
|
17
|
+
class Consumer
|
18
|
+
|
19
|
+
include Kafka::IO
|
20
|
+
|
21
|
+
MAX_SIZE = 1024 * 1024 # 1 megabyte
|
22
|
+
DEFAULT_POLLING_INTERVAL = 2 # 2 seconds
|
23
|
+
MAX_OFFSETS = 1
|
24
|
+
LATEST_OFFSET = -1
|
25
|
+
EARLIEST_OFFSET = -2
|
26
|
+
|
27
|
+
attr_accessor :topic, :partition, :offset, :max_size, :request_type, :polling
|
28
|
+
|
29
|
+
def initialize(options = {})
|
30
|
+
self.topic = options[:topic] || "test"
|
31
|
+
self.partition = options[:partition] || 0
|
32
|
+
self.host = options[:host] || HOST
|
33
|
+
self.port = options[:port] || PORT
|
34
|
+
self.offset = options[:offset]
|
35
|
+
self.max_size = options[:max_size] || MAX_SIZE
|
36
|
+
self.polling = options[:polling] || DEFAULT_POLLING_INTERVAL
|
37
|
+
connect(host, port)
|
38
|
+
end
|
39
|
+
|
40
|
+
def loop(&block)
|
41
|
+
messages = []
|
42
|
+
while (true) do
|
43
|
+
messages = consume
|
44
|
+
block.call(messages) if messages && !messages.empty?
|
45
|
+
sleep(polling)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def consume
|
50
|
+
self.offset ||= fetch_latest_offset
|
51
|
+
send_consume_request
|
52
|
+
message_set = Kafka::Message.parse_from(read_data_response)
|
53
|
+
self.offset += message_set.size
|
54
|
+
message_set.messages
|
55
|
+
rescue SocketError
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
|
59
|
+
def fetch_latest_offset
|
60
|
+
send_offsets_request(LATEST_OFFSET)
|
61
|
+
read_offsets_response
|
62
|
+
end
|
63
|
+
|
64
|
+
def fetch_earliest_offset
|
65
|
+
send_offsets_request(EARLIEST_OFFSET)
|
66
|
+
read_offsets_response
|
67
|
+
end
|
68
|
+
|
69
|
+
def send_offsets_request(offset)
|
70
|
+
write(encoded_request_size)
|
71
|
+
write(encode_request(Kafka::RequestType::OFFSETS, topic, partition, offset, MAX_OFFSETS))
|
72
|
+
end
|
73
|
+
|
74
|
+
def read_offsets_response
|
75
|
+
read_data_response[4,8].reverse.unpack('q')[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
def send_consume_request
|
79
|
+
write(encoded_request_size)
|
80
|
+
write(encode_request(Kafka::RequestType::FETCH, topic, partition, offset, max_size))
|
81
|
+
end
|
82
|
+
|
83
|
+
def read_data_response
|
84
|
+
data_length = read(4).unpack("N").shift
|
85
|
+
data = read(data_length)
|
86
|
+
# TODO: inspect error code instead of skipping it
|
87
|
+
data[2, data.length]
|
88
|
+
end
|
89
|
+
|
90
|
+
def encoded_request_size
|
91
|
+
size = 2 + 2 + topic.length + 4 + 8 + 4
|
92
|
+
[size].pack("N")
|
93
|
+
end
|
94
|
+
|
95
|
+
def encode_request(request_type, topic, partition, offset, max_size)
|
96
|
+
request_type = [request_type].pack("n")
|
97
|
+
topic = [topic.length].pack('n') + topic
|
98
|
+
partition = [partition].pack("N")
|
99
|
+
offset = [offset].pack("q").reverse # DIY 64bit big endian integer
|
100
|
+
max_size = [max_size].pack("N")
|
101
|
+
request_type + topic + partition + offset + max_size
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
module Kafka
|
17
|
+
module Encoder
|
18
|
+
def self.message(message, compression = Message::NO_COMPRESSION)
|
19
|
+
message.encode(compression)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.message_block(topic, partition, messages, compression)
|
23
|
+
message_set = message_set(messages, compression)
|
24
|
+
|
25
|
+
topic = [topic.length].pack("n") + topic
|
26
|
+
partition = [partition].pack("N")
|
27
|
+
messages = [message_set.length].pack("N") + message_set
|
28
|
+
|
29
|
+
return topic + partition + messages
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.message_set(messages, compression)
|
33
|
+
message_set = Array(messages).collect { |message|
|
34
|
+
self.message(message)
|
35
|
+
}.join("")
|
36
|
+
message_set = self.message(Message.new(message_set), compression) unless compression == Message::NO_COMPRESSION
|
37
|
+
message_set
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.produce(topic, partition, messages, compression = Message::NO_COMPRESSION)
|
41
|
+
request = [RequestType::PRODUCE].pack("n")
|
42
|
+
data = request + self.message_block(topic, partition, messages, compression)
|
43
|
+
|
44
|
+
return [data.length].pack("N") + data
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.multiproduce(producer_requests, compression = Message::NO_COMPRESSION)
|
48
|
+
part_set = Array(producer_requests).map { |req|
|
49
|
+
self.message_block(req.topic, req.partition, req.messages, compression)
|
50
|
+
}
|
51
|
+
|
52
|
+
request = [RequestType::MULTIPRODUCE].pack("n")
|
53
|
+
parts = [part_set.length].pack("n") + part_set.join("")
|
54
|
+
data = request + parts
|
55
|
+
|
56
|
+
return [data.length].pack("N") + data
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
module Kafka
|
16
|
+
module ErrorCodes
|
17
|
+
NO_ERROR = 0
|
18
|
+
OFFSET_OUT_OF_RANGE = 1
|
19
|
+
INVALID_MESSAGE_CODE = 2
|
20
|
+
WRONG_PARTITION_CODE = 3
|
21
|
+
INVALID_RETCH_SIZE_CODE = 4
|
22
|
+
|
23
|
+
STRINGS = {
|
24
|
+
0 => 'No error',
|
25
|
+
1 => 'Offset out of range',
|
26
|
+
2 => 'Invalid message code',
|
27
|
+
3 => 'Wrong partition code',
|
28
|
+
4 => 'Invalid retch size code',
|
29
|
+
}
|
30
|
+
|
31
|
+
def self.to_s(code)
|
32
|
+
STRINGS[code] || 'Unknown error'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/kafka/io.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
module Kafka
|
16
|
+
module IO
|
17
|
+
attr_accessor :socket, :host, :port, :compression
|
18
|
+
|
19
|
+
HOST = "localhost"
|
20
|
+
PORT = 9092
|
21
|
+
|
22
|
+
def connect(host, port)
|
23
|
+
raise ArgumentError, "No host or port specified" unless host && port
|
24
|
+
self.host = host
|
25
|
+
self.port = port
|
26
|
+
self.socket = TCPSocket.new(host, port)
|
27
|
+
end
|
28
|
+
|
29
|
+
def reconnect
|
30
|
+
self.socket = TCPSocket.new(self.host, self.port)
|
31
|
+
rescue
|
32
|
+
self.disconnect
|
33
|
+
raise
|
34
|
+
end
|
35
|
+
|
36
|
+
def disconnect
|
37
|
+
self.socket.close rescue nil
|
38
|
+
self.socket = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def read(length)
|
42
|
+
self.socket.read(length) || raise(SocketError, "no data")
|
43
|
+
rescue
|
44
|
+
self.disconnect
|
45
|
+
raise SocketError, "cannot read: #{$!.message}"
|
46
|
+
end
|
47
|
+
|
48
|
+
def write(data)
|
49
|
+
self.reconnect unless self.socket
|
50
|
+
self.socket.write(data)
|
51
|
+
rescue
|
52
|
+
self.disconnect
|
53
|
+
raise SocketError, "cannot write: #{$!.message}"
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
module Kafka
|
16
|
+
|
17
|
+
# A message. The format of a message is as follows:
|
18
|
+
#
|
19
|
+
# 4 byte big-endian int: length of message in bytes (including the rest of
|
20
|
+
# the header, but excluding the length field itself)
|
21
|
+
# 1 byte: "magic" identifier (format version number)
|
22
|
+
#
|
23
|
+
# If the magic byte == 0, there is one more header field:
|
24
|
+
#
|
25
|
+
# 4 byte big-endian int: CRC32 checksum of the payload
|
26
|
+
#
|
27
|
+
# If the magic byte == 1, there are two more header fields:
|
28
|
+
#
|
29
|
+
# 1 byte: "attributes" (flags for compression, codec etc)
|
30
|
+
# 4 byte big-endian int: CRC32 checksum of the payload
|
31
|
+
#
|
32
|
+
# All following bytes are the payload.
|
33
|
+
class Message
|
34
|
+
|
35
|
+
MAGIC_IDENTIFIER_DEFAULT = 0
|
36
|
+
MAGIC_IDENTIFIER_COMPRESSION = 1
|
37
|
+
NO_COMPRESSION = 0
|
38
|
+
GZIP_COMPRESSION = 1
|
39
|
+
SNAPPY_COMPRESSION = 2
|
40
|
+
BASIC_MESSAGE_HEADER = 'NC'.freeze
|
41
|
+
VERSION_0_HEADER = 'N'.freeze
|
42
|
+
VERSION_1_HEADER = 'CN'.freeze
|
43
|
+
COMPRESSION_CODEC_MASK = 0x03
|
44
|
+
|
45
|
+
attr_accessor :magic, :checksum, :payload
|
46
|
+
|
47
|
+
def initialize(payload = nil, magic = MAGIC_IDENTIFIER_DEFAULT, checksum = nil)
|
48
|
+
self.magic = magic
|
49
|
+
self.payload = payload || ""
|
50
|
+
self.checksum = checksum || self.calculate_checksum
|
51
|
+
@compression = NO_COMPRESSION
|
52
|
+
end
|
53
|
+
|
54
|
+
def calculate_checksum
|
55
|
+
Zlib.crc32(self.payload)
|
56
|
+
end
|
57
|
+
|
58
|
+
def valid?
|
59
|
+
self.checksum == calculate_checksum
|
60
|
+
end
|
61
|
+
|
62
|
+
# Takes a byte string containing one or more messages; returns a MessageSet
|
63
|
+
# with the messages parsed from the string, and the number of bytes
|
64
|
+
# consumed from the string.
|
65
|
+
def self.parse_from(data)
|
66
|
+
messages = []
|
67
|
+
bytes_processed = 0
|
68
|
+
|
69
|
+
while bytes_processed <= data.length - 5 # 5 = size of BASIC_MESSAGE_HEADER
|
70
|
+
message_size, magic = data[bytes_processed, 5].unpack(BASIC_MESSAGE_HEADER)
|
71
|
+
break if bytes_processed + message_size + 4 > data.length # message is truncated
|
72
|
+
|
73
|
+
case magic
|
74
|
+
when MAGIC_IDENTIFIER_DEFAULT
|
75
|
+
# | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ...
|
76
|
+
# | | | |
|
77
|
+
# | message_size |magic| checksum | payload ...
|
78
|
+
payload_size = message_size - 5 # 5 = sizeof(magic) + sizeof(checksum)
|
79
|
+
checksum = data[bytes_processed + 5, 4].unpack(VERSION_0_HEADER).shift
|
80
|
+
payload = data[bytes_processed + 9, payload_size]
|
81
|
+
messages << Kafka::Message.new(payload, magic, checksum)
|
82
|
+
|
83
|
+
when MAGIC_IDENTIFIER_COMPRESSION
|
84
|
+
# | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 ...
|
85
|
+
# | | | | |
|
86
|
+
# | size |magic|attrs| checksum | payload ...
|
87
|
+
payload_size = message_size - 6 # 6 = sizeof(magic) + sizeof(attrs) + sizeof(checksum)
|
88
|
+
attributes, checksum = data[bytes_processed + 5, 5].unpack(VERSION_1_HEADER)
|
89
|
+
payload = data[bytes_processed + 10, payload_size]
|
90
|
+
|
91
|
+
case attributes & COMPRESSION_CODEC_MASK
|
92
|
+
when NO_COMPRESSION # a single uncompressed message
|
93
|
+
messages << Kafka::Message.new(payload, magic, checksum)
|
94
|
+
when GZIP_COMPRESSION # a gzip-compressed message set -- parse recursively
|
95
|
+
uncompressed = Zlib::GzipReader.new(StringIO.new(payload)).read
|
96
|
+
message_set = parse_from(uncompressed)
|
97
|
+
raise 'malformed compressed message' if message_set.size != uncompressed.size
|
98
|
+
messages.concat(message_set.messages)
|
99
|
+
when SNAPPY_COMPRESSION # a snappy-compresses message set -- parse recursively
|
100
|
+
ensure_snappy! do
|
101
|
+
uncompressed = Snappy::Reader.new(StringIO.new(payload)).read
|
102
|
+
message_set = parse_from(uncompressed)
|
103
|
+
raise 'malformed compressed message' if message_set.size != uncompressed.size
|
104
|
+
messages.concat(message_set.messages)
|
105
|
+
end
|
106
|
+
else
|
107
|
+
# https://cwiki.apache.org/confluence/display/KAFKA/Compression
|
108
|
+
raise "Unsupported Kafka compression codec: #{attributes & COMPRESSION_CODEC_MASK}"
|
109
|
+
end
|
110
|
+
|
111
|
+
else
|
112
|
+
raise "Unsupported Kafka message version: magic number #{magic}"
|
113
|
+
end
|
114
|
+
|
115
|
+
bytes_processed += message_size + 4 # 4 = sizeof(message_size)
|
116
|
+
end
|
117
|
+
|
118
|
+
MessageSet.new(bytes_processed, messages)
|
119
|
+
end
|
120
|
+
|
121
|
+
def encode(compression = NO_COMPRESSION)
|
122
|
+
@compression = compression
|
123
|
+
|
124
|
+
self.payload = asciify_payload
|
125
|
+
self.payload = compress_payload if compression?
|
126
|
+
|
127
|
+
data = magic_and_compression + [calculate_checksum].pack("N") + payload
|
128
|
+
[data.length].pack("N") + data
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
# Encapsulates a list of Kafka messages (as Kafka::Message objects in the
|
133
|
+
# +messages+ attribute) and their total serialized size in bytes (the +size+
|
134
|
+
# attribute).
|
135
|
+
class MessageSet < Struct.new(:size, :messages); end
|
136
|
+
|
137
|
+
def self.ensure_snappy!
|
138
|
+
if Object.const_defined? "Snappy"
|
139
|
+
yield
|
140
|
+
else
|
141
|
+
fail "Snappy not available!"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def ensure_snappy! &block
|
146
|
+
self.class.ensure_snappy! &block
|
147
|
+
end
|
148
|
+
|
149
|
+
private
|
150
|
+
|
151
|
+
attr_reader :compression
|
152
|
+
|
153
|
+
def compression?
|
154
|
+
compression != NO_COMPRESSION
|
155
|
+
end
|
156
|
+
|
157
|
+
def magic_and_compression
|
158
|
+
if compression?
|
159
|
+
[MAGIC_IDENTIFIER_COMPRESSION, compression].pack("CC")
|
160
|
+
else
|
161
|
+
[MAGIC_IDENTIFIER_DEFAULT].pack("C")
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def asciify_payload
|
166
|
+
if RUBY_VERSION[0, 3] == "1.8"
|
167
|
+
payload
|
168
|
+
else
|
169
|
+
payload.to_s.force_encoding(Encoding::ASCII_8BIT)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def compress_payload
|
174
|
+
case compression
|
175
|
+
when GZIP_COMPRESSION
|
176
|
+
gzip
|
177
|
+
when SNAPPY_COMPRESSION
|
178
|
+
snappy
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def gzip
|
183
|
+
with_buffer do |buffer|
|
184
|
+
gz = Zlib::GzipWriter.new buffer, nil, nil
|
185
|
+
gz.write payload
|
186
|
+
gz.close
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def snappy
|
191
|
+
ensure_snappy! do
|
192
|
+
with_buffer do |buffer|
|
193
|
+
Snappy::Writer.new buffer do |w|
|
194
|
+
w << payload
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def with_buffer
|
201
|
+
buffer = StringIO.new
|
202
|
+
buffer.set_encoding Encoding::ASCII_8BIT unless RUBY_VERSION =~ /^1\.8/
|
203
|
+
yield buffer if block_given?
|
204
|
+
buffer.rewind
|
205
|
+
buffer.string
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|