leffen-kafka-rb 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,170 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ require 'optparse'
17
+
18
+ module Kafka
19
+ module CLI #:nodoc: all
20
+ extend self
21
+
22
+ def publish!
23
+ read_env
24
+ parse_args
25
+ validate_config
26
+ if config[:message]
27
+ push(config, config.delete(:message))
28
+ else
29
+ publish(config)
30
+ end
31
+ end
32
+
33
+
34
+ def subscribe!
35
+ read_env
36
+ parse_args
37
+ validate_config
38
+ subscribe(config)
39
+ end
40
+
41
+ def validate_config
42
+ if config[:help]
43
+ puts help
44
+ exit
45
+ end
46
+ config[:host] ||= IO::HOST
47
+ config[:port] ||= IO::PORT
48
+ config[:topic].is_a?(String) or raise "Missing topic"
49
+
50
+ rescue RuntimeError => e
51
+ puts e.message
52
+ puts help
53
+ exit
54
+ end
55
+
56
+ def parse_args(args = ARGV)
57
+ option_parser.parse(args)
58
+ end
59
+
60
+ def read_env(env = ENV)
61
+ config[:host] = env["KAFKA_HOST"] if env["KAFKA_HOST"]
62
+ config[:port] = env["KAFKA_PORT"].to_i if env["KAFKA_PORT"]
63
+ config[:topic] = env["KAFKA_TOPIC"] if env["KAFKA_TOPIC"]
64
+ config[:compression] = string_to_compression(env["KAFKA_COMPRESSION"]) if env["KAFKA_COMPRESSION"]
65
+ end
66
+
67
+ def config
68
+ @config ||= {:compression => string_to_compression("no")}
69
+ end
70
+
71
+ def help
72
+ option_parser.to_s
73
+ end
74
+
75
+ def option_parser
76
+ OptionParser.new do |opts|
77
+ opts.banner = "Usage: #{program_name} [options]"
78
+ opts.separator ""
79
+
80
+ opts.on("-h","--host HOST", "Set the kafka hostname") do |h|
81
+ config[:host] = h
82
+ end
83
+
84
+ opts.on("-p", "--port PORT", "Set the kafka port") do |port|
85
+ config[:port] = port.to_i
86
+ end
87
+
88
+ opts.on("-t", "--topic TOPIC", "Set the kafka topic") do |topic|
89
+ config[:topic] = topic
90
+ end
91
+
92
+ opts.on("-c", "--compression no|gzip|snappy", "Set the compression method") do |meth|
93
+ config[:compression] = string_to_compression(meth)
94
+ end if publish?
95
+
96
+ opts.on("-m","--message MESSAGE", "Message to send") do |msg|
97
+ config[:message] = msg
98
+ end if publish?
99
+
100
+ opts.separator ""
101
+
102
+ opts.on("--help", "show the help") do
103
+ config[:help] = true
104
+ end
105
+
106
+ opts.separator ""
107
+ opts.separator "You can set the host, port, topic and compression from the environment variables: KAFKA_HOST, KAFKA_PORT, KAFKA_TOPIC AND KAFKA_COMPRESSION"
108
+ end
109
+ end
110
+
111
+ def publish?
112
+ program_name == "leffen-kafka-publish"
113
+ end
114
+
115
+ def subscribe?
116
+ program_name == "kafka-subscribe"
117
+ end
118
+
119
+ def program_name(pn = $0)
120
+ File.basename(pn)
121
+ end
122
+
123
+ def string_to_compression(meth)
124
+ case meth
125
+ when "no" then Message::NO_COMPRESSION
126
+ when "gzip" then Message::GZIP_COMPRESSION
127
+ when "snappy" then Message::SNAPPY_COMPRESSION
128
+ else raise "No supported compression"
129
+ end
130
+ end
131
+
132
+ def push(options, message)
133
+ Producer.new(options).push(Message.new(message))
134
+ end
135
+
136
+ def publish(options)
137
+ trap(:INT){ exit }
138
+ producer = Producer.new(options)
139
+ loop do
140
+ publish_loop(producer)
141
+ end
142
+ end
143
+
144
+ def publish_loop(producer)
145
+ message = read_input
146
+ producer.push(Message.new(message))
147
+ end
148
+
149
+ def read_input
150
+ input = $stdin.gets
151
+ if input
152
+ input.strip
153
+ else
154
+ exit # gets return nil when eof
155
+ end
156
+
157
+ end
158
+
159
+ def subscribe(options)
160
+ trap(:INT){ exit }
161
+ consumer = Consumer.new(options)
162
+ consumer.loop do |messages|
163
+ messages.each do |message|
164
+ puts message.payload
165
+ end
166
+ end
167
+ end
168
+
169
+ end
170
+ end
@@ -0,0 +1,104 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ module Kafka
17
+ class Consumer
18
+
19
+ include Kafka::IO
20
+
21
+ MAX_SIZE = 1024 * 1024 # 1 megabyte
22
+ DEFAULT_POLLING_INTERVAL = 2 # 2 seconds
23
+ MAX_OFFSETS = 1
24
+ LATEST_OFFSET = -1
25
+ EARLIEST_OFFSET = -2
26
+
27
+ attr_accessor :topic, :partition, :offset, :max_size, :request_type, :polling
28
+
29
+ def initialize(options = {})
30
+ self.topic = options[:topic] || "test"
31
+ self.partition = options[:partition] || 0
32
+ self.host = options[:host] || HOST
33
+ self.port = options[:port] || PORT
34
+ self.offset = options[:offset]
35
+ self.max_size = options[:max_size] || MAX_SIZE
36
+ self.polling = options[:polling] || DEFAULT_POLLING_INTERVAL
37
+ connect(host, port)
38
+ end
39
+
40
+ def loop(&block)
41
+ messages = []
42
+ while (true) do
43
+ messages = consume
44
+ block.call(messages) if messages && !messages.empty?
45
+ sleep(polling)
46
+ end
47
+ end
48
+
49
+ def consume
50
+ self.offset ||= fetch_latest_offset
51
+ send_consume_request
52
+ message_set = Kafka::Message.parse_from(read_data_response)
53
+ self.offset += message_set.size
54
+ message_set.messages
55
+ rescue SocketError
56
+ nil
57
+ end
58
+
59
+ def fetch_latest_offset
60
+ send_offsets_request(LATEST_OFFSET)
61
+ read_offsets_response
62
+ end
63
+
64
+ def fetch_earliest_offset
65
+ send_offsets_request(EARLIEST_OFFSET)
66
+ read_offsets_response
67
+ end
68
+
69
+ def send_offsets_request(offset)
70
+ write(encoded_request_size)
71
+ write(encode_request(Kafka::RequestType::OFFSETS, topic, partition, offset, MAX_OFFSETS))
72
+ end
73
+
74
+ def read_offsets_response
75
+ read_data_response[4,8].reverse.unpack('q')[0]
76
+ end
77
+
78
+ def send_consume_request
79
+ write(encoded_request_size)
80
+ write(encode_request(Kafka::RequestType::FETCH, topic, partition, offset, max_size))
81
+ end
82
+
83
+ def read_data_response
84
+ data_length = read(4).unpack("N").shift
85
+ data = read(data_length)
86
+ # TODO: inspect error code instead of skipping it
87
+ data[2, data.length]
88
+ end
89
+
90
+ def encoded_request_size
91
+ size = 2 + 2 + topic.length + 4 + 8 + 4
92
+ [size].pack("N")
93
+ end
94
+
95
+ def encode_request(request_type, topic, partition, offset, max_size)
96
+ request_type = [request_type].pack("n")
97
+ topic = [topic.length].pack('n') + topic
98
+ partition = [partition].pack("N")
99
+ offset = [offset].pack("q").reverse # DIY 64bit big endian integer
100
+ max_size = [max_size].pack("N")
101
+ request_type + topic + partition + offset + max_size
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,59 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ module Kafka
17
+ module Encoder
18
+ def self.message(message, compression = Message::NO_COMPRESSION)
19
+ message.encode(compression)
20
+ end
21
+
22
+ def self.message_block(topic, partition, messages, compression)
23
+ message_set = message_set(messages, compression)
24
+
25
+ topic = [topic.length].pack("n") + topic
26
+ partition = [partition].pack("N")
27
+ messages = [message_set.length].pack("N") + message_set
28
+
29
+ return topic + partition + messages
30
+ end
31
+
32
+ def self.message_set(messages, compression)
33
+ message_set = Array(messages).collect { |message|
34
+ self.message(message)
35
+ }.join("")
36
+ message_set = self.message(Message.new(message_set), compression) unless compression == Message::NO_COMPRESSION
37
+ message_set
38
+ end
39
+
40
+ def self.produce(topic, partition, messages, compression = Message::NO_COMPRESSION)
41
+ request = [RequestType::PRODUCE].pack("n")
42
+ data = request + self.message_block(topic, partition, messages, compression)
43
+
44
+ return [data.length].pack("N") + data
45
+ end
46
+
47
+ def self.multiproduce(producer_requests, compression = Message::NO_COMPRESSION)
48
+ part_set = Array(producer_requests).map { |req|
49
+ self.message_block(req.topic, req.partition, req.messages, compression)
50
+ }
51
+
52
+ request = [RequestType::MULTIPRODUCE].pack("n")
53
+ parts = [part_set.length].pack("n") + part_set.join("")
54
+ data = request + parts
55
+
56
+ return [data.length].pack("N") + data
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,35 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ module Kafka
16
+ module ErrorCodes
17
+ NO_ERROR = 0
18
+ OFFSET_OUT_OF_RANGE = 1
19
+ INVALID_MESSAGE_CODE = 2
20
+ WRONG_PARTITION_CODE = 3
21
+ INVALID_RETCH_SIZE_CODE = 4
22
+
23
+ STRINGS = {
24
+ 0 => 'No error',
25
+ 1 => 'Offset out of range',
26
+ 2 => 'Invalid message code',
27
+ 3 => 'Wrong partition code',
28
+ 4 => 'Invalid retch size code',
29
+ }
30
+
31
+ def self.to_s(code)
32
+ STRINGS[code] || 'Unknown error'
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,57 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ module Kafka
16
+ module IO
17
+ attr_accessor :socket, :host, :port, :compression
18
+
19
+ HOST = "localhost"
20
+ PORT = 9092
21
+
22
+ def connect(host, port)
23
+ raise ArgumentError, "No host or port specified" unless host && port
24
+ self.host = host
25
+ self.port = port
26
+ self.socket = TCPSocket.new(host, port)
27
+ end
28
+
29
+ def reconnect
30
+ self.socket = TCPSocket.new(self.host, self.port)
31
+ rescue
32
+ self.disconnect
33
+ raise
34
+ end
35
+
36
+ def disconnect
37
+ self.socket.close rescue nil
38
+ self.socket = nil
39
+ end
40
+
41
+ def read(length)
42
+ self.socket.read(length) || raise(SocketError, "no data")
43
+ rescue
44
+ self.disconnect
45
+ raise SocketError, "cannot read: #{$!.message}"
46
+ end
47
+
48
+ def write(data)
49
+ self.reconnect unless self.socket
50
+ self.socket.write(data)
51
+ rescue
52
+ self.disconnect
53
+ raise SocketError, "cannot write: #{$!.message}"
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,209 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ module Kafka
16
+
17
+ # A message. The format of a message is as follows:
18
+ #
19
+ # 4 byte big-endian int: length of message in bytes (including the rest of
20
+ # the header, but excluding the length field itself)
21
+ # 1 byte: "magic" identifier (format version number)
22
+ #
23
+ # If the magic byte == 0, there is one more header field:
24
+ #
25
+ # 4 byte big-endian int: CRC32 checksum of the payload
26
+ #
27
+ # If the magic byte == 1, there are two more header fields:
28
+ #
29
+ # 1 byte: "attributes" (flags for compression, codec etc)
30
+ # 4 byte big-endian int: CRC32 checksum of the payload
31
+ #
32
+ # All following bytes are the payload.
33
+ class Message
34
+
35
+ MAGIC_IDENTIFIER_DEFAULT = 0
36
+ MAGIC_IDENTIFIER_COMPRESSION = 1
37
+ NO_COMPRESSION = 0
38
+ GZIP_COMPRESSION = 1
39
+ SNAPPY_COMPRESSION = 2
40
+ BASIC_MESSAGE_HEADER = 'NC'.freeze
41
+ VERSION_0_HEADER = 'N'.freeze
42
+ VERSION_1_HEADER = 'CN'.freeze
43
+ COMPRESSION_CODEC_MASK = 0x03
44
+
45
+ attr_accessor :magic, :checksum, :payload
46
+
47
+ def initialize(payload = nil, magic = MAGIC_IDENTIFIER_DEFAULT, checksum = nil)
48
+ self.magic = magic
49
+ self.payload = payload || ""
50
+ self.checksum = checksum || self.calculate_checksum
51
+ @compression = NO_COMPRESSION
52
+ end
53
+
54
+ def calculate_checksum
55
+ Zlib.crc32(self.payload)
56
+ end
57
+
58
+ def valid?
59
+ self.checksum == calculate_checksum
60
+ end
61
+
62
+ # Takes a byte string containing one or more messages; returns a MessageSet
63
+ # with the messages parsed from the string, and the number of bytes
64
+ # consumed from the string.
65
+ def self.parse_from(data)
66
+ messages = []
67
+ bytes_processed = 0
68
+
69
+ while bytes_processed <= data.length - 5 # 5 = size of BASIC_MESSAGE_HEADER
70
+ message_size, magic = data[bytes_processed, 5].unpack(BASIC_MESSAGE_HEADER)
71
+ break if bytes_processed + message_size + 4 > data.length # message is truncated
72
+
73
+ case magic
74
+ when MAGIC_IDENTIFIER_DEFAULT
75
+ # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ...
76
+ # | | | |
77
+ # | message_size |magic| checksum | payload ...
78
+ payload_size = message_size - 5 # 5 = sizeof(magic) + sizeof(checksum)
79
+ checksum = data[bytes_processed + 5, 4].unpack(VERSION_0_HEADER).shift
80
+ payload = data[bytes_processed + 9, payload_size]
81
+ messages << Kafka::Message.new(payload, magic, checksum)
82
+
83
+ when MAGIC_IDENTIFIER_COMPRESSION
84
+ # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 ...
85
+ # | | | | |
86
+ # | size |magic|attrs| checksum | payload ...
87
+ payload_size = message_size - 6 # 6 = sizeof(magic) + sizeof(attrs) + sizeof(checksum)
88
+ attributes, checksum = data[bytes_processed + 5, 5].unpack(VERSION_1_HEADER)
89
+ payload = data[bytes_processed + 10, payload_size]
90
+
91
+ case attributes & COMPRESSION_CODEC_MASK
92
+ when NO_COMPRESSION # a single uncompressed message
93
+ messages << Kafka::Message.new(payload, magic, checksum)
94
+ when GZIP_COMPRESSION # a gzip-compressed message set -- parse recursively
95
+ uncompressed = Zlib::GzipReader.new(StringIO.new(payload)).read
96
+ message_set = parse_from(uncompressed)
97
+ raise 'malformed compressed message' if message_set.size != uncompressed.size
98
+ messages.concat(message_set.messages)
99
+ when SNAPPY_COMPRESSION # a snappy-compresses message set -- parse recursively
100
+ ensure_snappy! do
101
+ uncompressed = Snappy::Reader.new(StringIO.new(payload)).read
102
+ message_set = parse_from(uncompressed)
103
+ raise 'malformed compressed message' if message_set.size != uncompressed.size
104
+ messages.concat(message_set.messages)
105
+ end
106
+ else
107
+ # https://cwiki.apache.org/confluence/display/KAFKA/Compression
108
+ raise "Unsupported Kafka compression codec: #{attributes & COMPRESSION_CODEC_MASK}"
109
+ end
110
+
111
+ else
112
+ raise "Unsupported Kafka message version: magic number #{magic}"
113
+ end
114
+
115
+ bytes_processed += message_size + 4 # 4 = sizeof(message_size)
116
+ end
117
+
118
+ MessageSet.new(bytes_processed, messages)
119
+ end
120
+
121
+ def encode(compression = NO_COMPRESSION)
122
+ @compression = compression
123
+
124
+ self.payload = asciify_payload
125
+ self.payload = compress_payload if compression?
126
+
127
+ data = magic_and_compression + [calculate_checksum].pack("N") + payload
128
+ [data.length].pack("N") + data
129
+ end
130
+
131
+
132
+ # Encapsulates a list of Kafka messages (as Kafka::Message objects in the
133
+ # +messages+ attribute) and their total serialized size in bytes (the +size+
134
+ # attribute).
135
+ class MessageSet < Struct.new(:size, :messages); end
136
+
137
+ def self.ensure_snappy!
138
+ if Object.const_defined? "Snappy"
139
+ yield
140
+ else
141
+ fail "Snappy not available!"
142
+ end
143
+ end
144
+
145
+ def ensure_snappy! &block
146
+ self.class.ensure_snappy! &block
147
+ end
148
+
149
+ private
150
+
151
+ attr_reader :compression
152
+
153
+ def compression?
154
+ compression != NO_COMPRESSION
155
+ end
156
+
157
+ def magic_and_compression
158
+ if compression?
159
+ [MAGIC_IDENTIFIER_COMPRESSION, compression].pack("CC")
160
+ else
161
+ [MAGIC_IDENTIFIER_DEFAULT].pack("C")
162
+ end
163
+ end
164
+
165
+ def asciify_payload
166
+ if RUBY_VERSION[0, 3] == "1.8"
167
+ payload
168
+ else
169
+ payload.to_s.force_encoding(Encoding::ASCII_8BIT)
170
+ end
171
+ end
172
+
173
+ def compress_payload
174
+ case compression
175
+ when GZIP_COMPRESSION
176
+ gzip
177
+ when SNAPPY_COMPRESSION
178
+ snappy
179
+ end
180
+ end
181
+
182
+ def gzip
183
+ with_buffer do |buffer|
184
+ gz = Zlib::GzipWriter.new buffer, nil, nil
185
+ gz.write payload
186
+ gz.close
187
+ end
188
+ end
189
+
190
+ def snappy
191
+ ensure_snappy! do
192
+ with_buffer do |buffer|
193
+ Snappy::Writer.new buffer do |w|
194
+ w << payload
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ def with_buffer
201
+ buffer = StringIO.new
202
+ buffer.set_encoding Encoding::ASCII_8BIT unless RUBY_VERSION =~ /^1\.8/
203
+ yield buffer if block_given?
204
+ buffer.rewind
205
+ buffer.string
206
+ end
207
+ end
208
+ end
209
+