leffen-kafka-rb 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,170 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ require 'optparse'
17
+
18
+ module Kafka
19
+ module CLI #:nodoc: all
20
+ extend self
21
+
22
+ def publish!
23
+ read_env
24
+ parse_args
25
+ validate_config
26
+ if config[:message]
27
+ push(config, config.delete(:message))
28
+ else
29
+ publish(config)
30
+ end
31
+ end
32
+
33
+
34
+ def subscribe!
35
+ read_env
36
+ parse_args
37
+ validate_config
38
+ subscribe(config)
39
+ end
40
+
41
+ def validate_config
42
+ if config[:help]
43
+ puts help
44
+ exit
45
+ end
46
+ config[:host] ||= IO::HOST
47
+ config[:port] ||= IO::PORT
48
+ config[:topic].is_a?(String) or raise "Missing topic"
49
+
50
+ rescue RuntimeError => e
51
+ puts e.message
52
+ puts help
53
+ exit
54
+ end
55
+
56
+ def parse_args(args = ARGV)
57
+ option_parser.parse(args)
58
+ end
59
+
60
+ def read_env(env = ENV)
61
+ config[:host] = env["KAFKA_HOST"] if env["KAFKA_HOST"]
62
+ config[:port] = env["KAFKA_PORT"].to_i if env["KAFKA_PORT"]
63
+ config[:topic] = env["KAFKA_TOPIC"] if env["KAFKA_TOPIC"]
64
+ config[:compression] = string_to_compression(env["KAFKA_COMPRESSION"]) if env["KAFKA_COMPRESSION"]
65
+ end
66
+
67
+ def config
68
+ @config ||= {:compression => string_to_compression("no")}
69
+ end
70
+
71
+ def help
72
+ option_parser.to_s
73
+ end
74
+
75
+ def option_parser
76
+ OptionParser.new do |opts|
77
+ opts.banner = "Usage: #{program_name} [options]"
78
+ opts.separator ""
79
+
80
+ opts.on("-h","--host HOST", "Set the kafka hostname") do |h|
81
+ config[:host] = h
82
+ end
83
+
84
+ opts.on("-p", "--port PORT", "Set the kafka port") do |port|
85
+ config[:port] = port.to_i
86
+ end
87
+
88
+ opts.on("-t", "--topic TOPIC", "Set the kafka topic") do |topic|
89
+ config[:topic] = topic
90
+ end
91
+
92
+ opts.on("-c", "--compression no|gzip|snappy", "Set the compression method") do |meth|
93
+ config[:compression] = string_to_compression(meth)
94
+ end if publish?
95
+
96
+ opts.on("-m","--message MESSAGE", "Message to send") do |msg|
97
+ config[:message] = msg
98
+ end if publish?
99
+
100
+ opts.separator ""
101
+
102
+ opts.on("--help", "show the help") do
103
+ config[:help] = true
104
+ end
105
+
106
+ opts.separator ""
107
+ opts.separator "You can set the host, port, topic and compression from the environment variables: KAFKA_HOST, KAFKA_PORT, KAFKA_TOPIC AND KAFKA_COMPRESSION"
108
+ end
109
+ end
110
+
111
+ def publish?
112
+ program_name == "leffen-kafka-publish"
113
+ end
114
+
115
+ def subscribe?
116
+ program_name == "kafka-subscribe"
117
+ end
118
+
119
+ def program_name(pn = $0)
120
+ File.basename(pn)
121
+ end
122
+
123
+ def string_to_compression(meth)
124
+ case meth
125
+ when "no" then Message::NO_COMPRESSION
126
+ when "gzip" then Message::GZIP_COMPRESSION
127
+ when "snappy" then Message::SNAPPY_COMPRESSION
128
+ else raise "No supported compression"
129
+ end
130
+ end
131
+
132
+ def push(options, message)
133
+ Producer.new(options).push(Message.new(message))
134
+ end
135
+
136
+ def publish(options)
137
+ trap(:INT){ exit }
138
+ producer = Producer.new(options)
139
+ loop do
140
+ publish_loop(producer)
141
+ end
142
+ end
143
+
144
+ def publish_loop(producer)
145
+ message = read_input
146
+ producer.push(Message.new(message))
147
+ end
148
+
149
+ def read_input
150
+ input = $stdin.gets
151
+ if input
152
+ input.strip
153
+ else
154
+ exit # gets return nil when eof
155
+ end
156
+
157
+ end
158
+
159
+ def subscribe(options)
160
+ trap(:INT){ exit }
161
+ consumer = Consumer.new(options)
162
+ consumer.loop do |messages|
163
+ messages.each do |message|
164
+ puts message.payload
165
+ end
166
+ end
167
+ end
168
+
169
+ end
170
+ end
@@ -0,0 +1,104 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ module Kafka
17
+ class Consumer
18
+
19
+ include Kafka::IO
20
+
21
+ MAX_SIZE = 1024 * 1024 # 1 megabyte
22
+ DEFAULT_POLLING_INTERVAL = 2 # 2 seconds
23
+ MAX_OFFSETS = 1
24
+ LATEST_OFFSET = -1
25
+ EARLIEST_OFFSET = -2
26
+
27
+ attr_accessor :topic, :partition, :offset, :max_size, :request_type, :polling
28
+
29
+ def initialize(options = {})
30
+ self.topic = options[:topic] || "test"
31
+ self.partition = options[:partition] || 0
32
+ self.host = options[:host] || HOST
33
+ self.port = options[:port] || PORT
34
+ self.offset = options[:offset]
35
+ self.max_size = options[:max_size] || MAX_SIZE
36
+ self.polling = options[:polling] || DEFAULT_POLLING_INTERVAL
37
+ connect(host, port)
38
+ end
39
+
40
+ def loop(&block)
41
+ messages = []
42
+ while (true) do
43
+ messages = consume
44
+ block.call(messages) if messages && !messages.empty?
45
+ sleep(polling)
46
+ end
47
+ end
48
+
49
+ def consume
50
+ self.offset ||= fetch_latest_offset
51
+ send_consume_request
52
+ message_set = Kafka::Message.parse_from(read_data_response)
53
+ self.offset += message_set.size
54
+ message_set.messages
55
+ rescue SocketError
56
+ nil
57
+ end
58
+
59
+ def fetch_latest_offset
60
+ send_offsets_request(LATEST_OFFSET)
61
+ read_offsets_response
62
+ end
63
+
64
+ def fetch_earliest_offset
65
+ send_offsets_request(EARLIEST_OFFSET)
66
+ read_offsets_response
67
+ end
68
+
69
+ def send_offsets_request(offset)
70
+ write(encoded_request_size)
71
+ write(encode_request(Kafka::RequestType::OFFSETS, topic, partition, offset, MAX_OFFSETS))
72
+ end
73
+
74
+ def read_offsets_response
75
+ read_data_response[4,8].reverse.unpack('q')[0]
76
+ end
77
+
78
+ def send_consume_request
79
+ write(encoded_request_size)
80
+ write(encode_request(Kafka::RequestType::FETCH, topic, partition, offset, max_size))
81
+ end
82
+
83
+ def read_data_response
84
+ data_length = read(4).unpack("N").shift
85
+ data = read(data_length)
86
+ # TODO: inspect error code instead of skipping it
87
+ data[2, data.length]
88
+ end
89
+
90
+ def encoded_request_size
91
+ size = 2 + 2 + topic.length + 4 + 8 + 4
92
+ [size].pack("N")
93
+ end
94
+
95
+ def encode_request(request_type, topic, partition, offset, max_size)
96
+ request_type = [request_type].pack("n")
97
+ topic = [topic.length].pack('n') + topic
98
+ partition = [partition].pack("N")
99
+ offset = [offset].pack("q").reverse # DIY 64bit big endian integer
100
+ max_size = [max_size].pack("N")
101
+ request_type + topic + partition + offset + max_size
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,59 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ module Kafka
17
+ module Encoder
18
+ def self.message(message, compression = Message::NO_COMPRESSION)
19
+ message.encode(compression)
20
+ end
21
+
22
+ def self.message_block(topic, partition, messages, compression)
23
+ message_set = message_set(messages, compression)
24
+
25
+ topic = [topic.length].pack("n") + topic
26
+ partition = [partition].pack("N")
27
+ messages = [message_set.length].pack("N") + message_set
28
+
29
+ return topic + partition + messages
30
+ end
31
+
32
+ def self.message_set(messages, compression)
33
+ message_set = Array(messages).collect { |message|
34
+ self.message(message)
35
+ }.join("")
36
+ message_set = self.message(Message.new(message_set), compression) unless compression == Message::NO_COMPRESSION
37
+ message_set
38
+ end
39
+
40
+ def self.produce(topic, partition, messages, compression = Message::NO_COMPRESSION)
41
+ request = [RequestType::PRODUCE].pack("n")
42
+ data = request + self.message_block(topic, partition, messages, compression)
43
+
44
+ return [data.length].pack("N") + data
45
+ end
46
+
47
+ def self.multiproduce(producer_requests, compression = Message::NO_COMPRESSION)
48
+ part_set = Array(producer_requests).map { |req|
49
+ self.message_block(req.topic, req.partition, req.messages, compression)
50
+ }
51
+
52
+ request = [RequestType::MULTIPRODUCE].pack("n")
53
+ parts = [part_set.length].pack("n") + part_set.join("")
54
+ data = request + parts
55
+
56
+ return [data.length].pack("N") + data
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,35 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ module Kafka
16
+ module ErrorCodes
17
+ NO_ERROR = 0
18
+ OFFSET_OUT_OF_RANGE = 1
19
+ INVALID_MESSAGE_CODE = 2
20
+ WRONG_PARTITION_CODE = 3
21
+ INVALID_RETCH_SIZE_CODE = 4
22
+
23
+ STRINGS = {
24
+ 0 => 'No error',
25
+ 1 => 'Offset out of range',
26
+ 2 => 'Invalid message code',
27
+ 3 => 'Wrong partition code',
28
+ 4 => 'Invalid retch size code',
29
+ }
30
+
31
+ def self.to_s(code)
32
+ STRINGS[code] || 'Unknown error'
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,57 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ module Kafka
16
+ module IO
17
+ attr_accessor :socket, :host, :port, :compression
18
+
19
+ HOST = "localhost"
20
+ PORT = 9092
21
+
22
+ def connect(host, port)
23
+ raise ArgumentError, "No host or port specified" unless host && port
24
+ self.host = host
25
+ self.port = port
26
+ self.socket = TCPSocket.new(host, port)
27
+ end
28
+
29
+ def reconnect
30
+ self.socket = TCPSocket.new(self.host, self.port)
31
+ rescue
32
+ self.disconnect
33
+ raise
34
+ end
35
+
36
+ def disconnect
37
+ self.socket.close rescue nil
38
+ self.socket = nil
39
+ end
40
+
41
+ def read(length)
42
+ self.socket.read(length) || raise(SocketError, "no data")
43
+ rescue
44
+ self.disconnect
45
+ raise SocketError, "cannot read: #{$!.message}"
46
+ end
47
+
48
+ def write(data)
49
+ self.reconnect unless self.socket
50
+ self.socket.write(data)
51
+ rescue
52
+ self.disconnect
53
+ raise SocketError, "cannot write: #{$!.message}"
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,209 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ module Kafka
16
+
17
+ # A message. The format of a message is as follows:
18
+ #
19
+ # 4 byte big-endian int: length of message in bytes (including the rest of
20
+ # the header, but excluding the length field itself)
21
+ # 1 byte: "magic" identifier (format version number)
22
+ #
23
+ # If the magic byte == 0, there is one more header field:
24
+ #
25
+ # 4 byte big-endian int: CRC32 checksum of the payload
26
+ #
27
+ # If the magic byte == 1, there are two more header fields:
28
+ #
29
+ # 1 byte: "attributes" (flags for compression, codec etc)
30
+ # 4 byte big-endian int: CRC32 checksum of the payload
31
+ #
32
+ # All following bytes are the payload.
33
+ class Message
34
+
35
+ MAGIC_IDENTIFIER_DEFAULT = 0
36
+ MAGIC_IDENTIFIER_COMPRESSION = 1
37
+ NO_COMPRESSION = 0
38
+ GZIP_COMPRESSION = 1
39
+ SNAPPY_COMPRESSION = 2
40
+ BASIC_MESSAGE_HEADER = 'NC'.freeze
41
+ VERSION_0_HEADER = 'N'.freeze
42
+ VERSION_1_HEADER = 'CN'.freeze
43
+ COMPRESSION_CODEC_MASK = 0x03
44
+
45
+ attr_accessor :magic, :checksum, :payload
46
+
47
+ def initialize(payload = nil, magic = MAGIC_IDENTIFIER_DEFAULT, checksum = nil)
48
+ self.magic = magic
49
+ self.payload = payload || ""
50
+ self.checksum = checksum || self.calculate_checksum
51
+ @compression = NO_COMPRESSION
52
+ end
53
+
54
+ def calculate_checksum
55
+ Zlib.crc32(self.payload)
56
+ end
57
+
58
+ def valid?
59
+ self.checksum == calculate_checksum
60
+ end
61
+
62
+ # Takes a byte string containing one or more messages; returns a MessageSet
63
+ # with the messages parsed from the string, and the number of bytes
64
+ # consumed from the string.
65
+ def self.parse_from(data)
66
+ messages = []
67
+ bytes_processed = 0
68
+
69
+ while bytes_processed <= data.length - 5 # 5 = size of BASIC_MESSAGE_HEADER
70
+ message_size, magic = data[bytes_processed, 5].unpack(BASIC_MESSAGE_HEADER)
71
+ break if bytes_processed + message_size + 4 > data.length # message is truncated
72
+
73
+ case magic
74
+ when MAGIC_IDENTIFIER_DEFAULT
75
+ # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ...
76
+ # | | | |
77
+ # | message_size |magic| checksum | payload ...
78
+ payload_size = message_size - 5 # 5 = sizeof(magic) + sizeof(checksum)
79
+ checksum = data[bytes_processed + 5, 4].unpack(VERSION_0_HEADER).shift
80
+ payload = data[bytes_processed + 9, payload_size]
81
+ messages << Kafka::Message.new(payload, magic, checksum)
82
+
83
+ when MAGIC_IDENTIFIER_COMPRESSION
84
+ # | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 ...
85
+ # | | | | |
86
+ # | size |magic|attrs| checksum | payload ...
87
+ payload_size = message_size - 6 # 6 = sizeof(magic) + sizeof(attrs) + sizeof(checksum)
88
+ attributes, checksum = data[bytes_processed + 5, 5].unpack(VERSION_1_HEADER)
89
+ payload = data[bytes_processed + 10, payload_size]
90
+
91
+ case attributes & COMPRESSION_CODEC_MASK
92
+ when NO_COMPRESSION # a single uncompressed message
93
+ messages << Kafka::Message.new(payload, magic, checksum)
94
+ when GZIP_COMPRESSION # a gzip-compressed message set -- parse recursively
95
+ uncompressed = Zlib::GzipReader.new(StringIO.new(payload)).read
96
+ message_set = parse_from(uncompressed)
97
+ raise 'malformed compressed message' if message_set.size != uncompressed.size
98
+ messages.concat(message_set.messages)
99
+ when SNAPPY_COMPRESSION # a snappy-compresses message set -- parse recursively
100
+ ensure_snappy! do
101
+ uncompressed = Snappy::Reader.new(StringIO.new(payload)).read
102
+ message_set = parse_from(uncompressed)
103
+ raise 'malformed compressed message' if message_set.size != uncompressed.size
104
+ messages.concat(message_set.messages)
105
+ end
106
+ else
107
+ # https://cwiki.apache.org/confluence/display/KAFKA/Compression
108
+ raise "Unsupported Kafka compression codec: #{attributes & COMPRESSION_CODEC_MASK}"
109
+ end
110
+
111
+ else
112
+ raise "Unsupported Kafka message version: magic number #{magic}"
113
+ end
114
+
115
+ bytes_processed += message_size + 4 # 4 = sizeof(message_size)
116
+ end
117
+
118
+ MessageSet.new(bytes_processed, messages)
119
+ end
120
+
121
+ def encode(compression = NO_COMPRESSION)
122
+ @compression = compression
123
+
124
+ self.payload = asciify_payload
125
+ self.payload = compress_payload if compression?
126
+
127
+ data = magic_and_compression + [calculate_checksum].pack("N") + payload
128
+ [data.length].pack("N") + data
129
+ end
130
+
131
+
132
+ # Encapsulates a list of Kafka messages (as Kafka::Message objects in the
133
+ # +messages+ attribute) and their total serialized size in bytes (the +size+
134
+ # attribute).
135
+ class MessageSet < Struct.new(:size, :messages); end
136
+
137
+ def self.ensure_snappy!
138
+ if Object.const_defined? "Snappy"
139
+ yield
140
+ else
141
+ fail "Snappy not available!"
142
+ end
143
+ end
144
+
145
+ def ensure_snappy! &block
146
+ self.class.ensure_snappy! &block
147
+ end
148
+
149
+ private
150
+
151
+ attr_reader :compression
152
+
153
+ def compression?
154
+ compression != NO_COMPRESSION
155
+ end
156
+
157
+ def magic_and_compression
158
+ if compression?
159
+ [MAGIC_IDENTIFIER_COMPRESSION, compression].pack("CC")
160
+ else
161
+ [MAGIC_IDENTIFIER_DEFAULT].pack("C")
162
+ end
163
+ end
164
+
165
+ def asciify_payload
166
+ if RUBY_VERSION[0, 3] == "1.8"
167
+ payload
168
+ else
169
+ payload.to_s.force_encoding(Encoding::ASCII_8BIT)
170
+ end
171
+ end
172
+
173
+ def compress_payload
174
+ case compression
175
+ when GZIP_COMPRESSION
176
+ gzip
177
+ when SNAPPY_COMPRESSION
178
+ snappy
179
+ end
180
+ end
181
+
182
+ def gzip
183
+ with_buffer do |buffer|
184
+ gz = Zlib::GzipWriter.new buffer, nil, nil
185
+ gz.write payload
186
+ gz.close
187
+ end
188
+ end
189
+
190
+ def snappy
191
+ ensure_snappy! do
192
+ with_buffer do |buffer|
193
+ Snappy::Writer.new buffer do |w|
194
+ w << payload
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ def with_buffer
201
+ buffer = StringIO.new
202
+ buffer.set_encoding Encoding::ASCII_8BIT unless RUBY_VERSION =~ /^1\.8/
203
+ yield buffer if block_given?
204
+ buffer.rewind
205
+ buffer.string
206
+ end
207
+ end
208
+ end
209
+