kafka-rb 0.0.2 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -13,40 +13,49 @@ sudo gem install kafka-rb
13
13
 
14
14
  ### Sending a simple message
15
15
 
16
- require 'kafka-rb'
16
+ require 'kafka'
17
+ producer = Kafka::Producer.new
18
+ message = Kafka::Message.new("some random message content")
19
+ producer.send(message)
17
20
 
18
- producer = Kafka::Producer.new
21
+ ### Sending a sequence of messages
19
22
 
20
- message = Kafka::Message.new("some random message content")
23
+ require 'kafka'
24
+ producer = Kafka::Producer.new
25
+ message1 = Kafka::Message.new("some random message content")
26
+ message2 = Kafka::Message.new("some more content")
27
+ producer.send([message1, message2])
21
28
 
22
- producer.send(message)
29
+ ### Batching a bunch of messages using the block syntax
23
30
 
24
- ### sending a sequence of messages
31
+ require 'kafka'
32
+ producer = Kafka::Producer.new
33
+ producer.batch do |messages|
34
+ puts "Batching a send of multiple messages.."
35
+ messages << Kafka::Message.new("first message to send")
36
+ messages << Kafka::Message.new("second message to send")
37
+ end
25
38
 
26
- require 'kafka-rb'
27
-
28
- producer = Kafka::Producer.new
29
-
30
- message1 = Kafka::Message.new("some random message content")
31
-
32
- message2 = Kafka::Message.new("some more content")
33
-
34
- producer.send([message1, message2])
35
-
36
- ### batching a bunch of messages using the block syntax
39
+ * they will be sent all at once, after the block execution
37
40
 
38
- require 'kafka-rb'
41
+ ### Consuming messages one by one
39
42
 
40
- producer = Kafka::Producer.new
43
+ require 'kafka'
44
+ consumer = Kafka::Consumer.new
45
+ messages = consumer.consume
41
46
 
42
- producer.batch do |messages|
47
+ ### Consuming messages using a block loop
43
48
 
44
- puts "Batching a send of multiple messages.."
49
+ require 'kafka'
50
+ consumer = Kafka::Consumer.new
51
+ consumer.loop do |messages|
52
+ puts "Received"
53
+ puts messages
54
+ end
45
55
 
46
- messages << Kafka::Message.new("first message to send")
47
56
 
48
- messages << Kafka::Message.new("second message to send")
57
+ Contact for questions
49
58
 
50
- end
59
+ alejandrocrosa at(@) gmail.com
51
60
 
52
- * they will be sent all at once, after the block execution
61
+ http://twitter.com/alejandrocrosa
data/Rakefile CHANGED
@@ -5,13 +5,13 @@ require 'date'
5
5
  require 'spec/rake/spectask'
6
6
 
7
7
  GEM = 'kafka-rb'
8
- GEM_NAME = 'Kafka Client Producer'
9
- GEM_VERSION = '0.0.2'
8
+ GEM_NAME = 'Kafka Client'
9
+ GEM_VERSION = '0.0.5'
10
10
  AUTHORS = ['Alejandro Crosa']
11
11
  EMAIL = "alejandrocrosa@gmail.com"
12
12
  HOMEPAGE = "http://github.com/acrosa/kafka-rb"
13
13
  SUMMARY = "A Ruby client for the Kafka distributed publish/subscribe messaging service"
14
- DESCRIPTION = "kafka-rb allows you to produce messages to the Kafka distributed publish/subscribe messaging service."
14
+ DESCRIPTION = "kafka-rb allows you to produce and consume messages using the Kafka distributed publish/subscribe messaging service."
15
15
 
16
16
  spec = Gem::Specification.new do |s|
17
17
  s.name = GEM
@@ -0,0 +1,82 @@
1
+ module Kafka
2
+ class Consumer
3
+
4
+ include Kafka::IO
5
+
6
+ CONSUME_REQUEST_TYPE = Kafka::RequestType::FETCH
7
+ MAX_SIZE = 1048576 # 1 MB
8
+ DEFAULT_POLLING_INTERVAL = 2 # 2 seconds
9
+
10
+ attr_accessor :topic, :partition, :offset, :max_size, :request_type, :polling
11
+
12
+ def initialize(options = {})
13
+ self.topic = options[:topic] || "test"
14
+ self.partition = options[:partition] || 0
15
+ self.host = options[:host] || "localhost"
16
+ self.port = options[:port] || 9092
17
+ self.offset = options[:offset] || 0
18
+ self.max_size = options[:max_size] || MAX_SIZE
19
+ self.request_type = options[:request_type] || CONSUME_REQUEST_TYPE
20
+ self.polling = options[:polling] || DEFAULT_POLLING_INTERVAL
21
+ self.connect(self.host, self.port)
22
+ end
23
+
24
+ # REQUEST TYPE ID + TOPIC LENGTH + TOPIC + PARTITION + OFFSET + MAX SIZE
25
+ def request_size
26
+ 2 + 2 + topic.length + 4 + 8 + 4
27
+ end
28
+
29
+ def encode_request_size
30
+ [self.request_size].pack("N")
31
+ end
32
+
33
+ def encode_request(request_type, topic, partition, offset, max_size)
34
+ request_type = [request_type].pack("n")
35
+ topic = [topic.length].pack('n') + topic
36
+ partition = [partition].pack("N")
37
+ offset = [offset].pack("Q").reverse # DIY 64bit big endian integer
38
+ max_size = [max_size].pack("N")
39
+
40
+ request_type + topic + partition + offset + max_size
41
+ end
42
+
43
+ def consume
44
+ self.send_consume_request # request data
45
+ data = self.read_data_response # read data response
46
+ self.parse_message_set_from(data) # parse message set
47
+ end
48
+
49
+ def loop(&block)
50
+ messages = []
51
+ while(true) do
52
+ messages = self.consume
53
+ block.call(messages) if messages && !messages.empty?
54
+ sleep(self.polling)
55
+ end
56
+ end
57
+
58
+ def read_data_response
59
+ data_length = self.socket.read(4).unpack("N").shift # read length
60
+ data = self.socket.read(data_length) # read message set
61
+ data[2, data.length] # we start with a 2 byte offset
62
+ end
63
+
64
+ def send_consume_request
65
+ self.write(self.encode_request_size) # write request_size
66
+ self.write(self.encode_request(self.request_type, self.topic, self.partition, self.offset, self.max_size)) # write request
67
+ end
68
+
69
+ def parse_message_set_from(data)
70
+ messages = []
71
+ processed = 0
72
+ length = data.length - 4
73
+ while(processed <= length) do
74
+ message_size = data[processed, 4].unpack("N").shift
75
+ messages << Kafka::Message.parse_from(data[processed, message_size + 4])
76
+ processed += 4 + message_size
77
+ end
78
+ self.offset += processed
79
+ messages
80
+ end
81
+ end
82
+ end
data/lib/kafka/io.rb CHANGED
@@ -26,5 +26,14 @@ module Kafka
26
26
  self.reconnect
27
27
  self.socket.write(data) # retry
28
28
  end
29
+
30
+ def read(length)
31
+ begin
32
+ self.socket.read(length)
33
+ rescue Errno::EAGAIN
34
+ self.disconnect
35
+ raise Errno::EAGAIN, "Timeout reading from the socket"
36
+ end
37
+ end
29
38
  end
30
39
  end
data/lib/kafka/message.rb CHANGED
@@ -5,13 +5,15 @@ module Kafka
5
5
  # 4 byte CRC32 of the payload
6
6
  # N - 5 byte payload
7
7
  class Message
8
+
8
9
  MAGIC_IDENTIFIER_DEFAULT = 0
10
+
9
11
  attr_accessor :magic, :checksum, :payload
10
12
 
11
- def initialize(payload = nil, magic = MAGIC_IDENTIFIER_DEFAULT)
12
- self.magic = magic
13
- self.payload = payload
14
- self.checksum = self.calculate_checksum
13
+ def initialize(payload = nil, magic = MAGIC_IDENTIFIER_DEFAULT, checksum = nil)
14
+ self.magic = magic
15
+ self.payload = payload
16
+ self.checksum = checksum || self.calculate_checksum
15
17
  end
16
18
 
17
19
  def calculate_checksum
@@ -21,5 +23,13 @@ module Kafka
21
23
  def valid?
22
24
  self.checksum == Zlib.crc32(self.payload)
23
25
  end
26
+
27
+ def self.parse_from(binary)
28
+ size = binary[0, 4].unpack("N").shift.to_i
29
+ magic = binary[4, 1].unpack("C").shift
30
+ checksum = binary[5, 4].unpack("N").shift
31
+ payload = binary[9, size] # 5 = 1 + 4 is Magic + Checksum
32
+ return Kafka::Message.new(payload, magic, checksum)
33
+ end
24
34
  end
25
- end
35
+ end
@@ -3,15 +3,15 @@ module Kafka
3
3
 
4
4
  include Kafka::IO
5
5
 
6
- PRODUCE_REQUEST_ID = 0
6
+ PRODUCE_REQUEST_ID = Kafka::RequestType::PRODUCE
7
7
 
8
8
  attr_accessor :topic, :partition
9
9
 
10
10
  def initialize(options = {})
11
- self.topic = options[:topic] || "test"
12
- self.partition = options[:partition] || 0
13
- self.host = options[:host] || "localhost"
14
- self.port = options[:port] || 9092
11
+ self.topic = options[:topic] || "test"
12
+ self.partition = options[:partition] || 0
13
+ self.host = options[:host] || "localhost"
14
+ self.port = options[:port] || 9092
15
15
  self.connect(self.host, self.port)
16
16
  end
17
17
 
@@ -0,0 +1,9 @@
1
+ module Kafka
2
+ module RequestType
3
+ PRODUCE = 0
4
+ FETCH = 1
5
+ MULTIFETCH = 2
6
+ MULTIPRODUCE = 3
7
+ OFFSETS = 4
8
+ end
9
+ end
data/lib/kafka.rb CHANGED
@@ -1,9 +1,12 @@
1
1
  require 'socket'
2
2
  require 'zlib'
3
+
3
4
  require File.join(File.dirname(__FILE__), "kafka", "io")
5
+ require File.join(File.dirname(__FILE__), "kafka", "request_type")
4
6
  require File.join(File.dirname(__FILE__), "kafka", "batch")
5
7
  require File.join(File.dirname(__FILE__), "kafka", "message")
6
8
  require File.join(File.dirname(__FILE__), "kafka", "producer")
9
+ require File.join(File.dirname(__FILE__), "kafka", "consumer")
7
10
 
8
11
  module Kafka
9
12
  end
@@ -0,0 +1,120 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Consumer do
4
+
5
+ before(:each) do
6
+ @mocked_socket = mock(TCPSocket)
7
+ TCPSocket.stub!(:new).and_return(@mocked_socket) # don't use a real socket
8
+ @consumer = Consumer.new
9
+ end
10
+
11
+ describe "Kafka Consumer" do
12
+
13
+ it "should have a CONSUME_REQUEST_TYPE" do
14
+ Consumer::CONSUME_REQUEST_TYPE.should eql(1)
15
+ @consumer.should respond_to(:request_type)
16
+ end
17
+
18
+ it "should have a topic and a partition" do
19
+ @consumer.should respond_to(:topic)
20
+ @consumer.should respond_to(:partition)
21
+ end
22
+
23
+ it "should have a polling option, and a default value" do
24
+ Consumer::DEFAULT_POLLING_INTERVAL.should eql(2)
25
+ @consumer.should respond_to(:polling)
26
+ @consumer.polling.should eql(2)
27
+ end
28
+
29
+ it "should set a topic and partition on initialize" do
30
+ @consumer = Consumer.new({ :host => "localhost", :port => 9092, :topic => "testing" })
31
+ @consumer.topic.should eql("testing")
32
+ @consumer.partition.should eql(0)
33
+ @consumer = Consumer.new({ :topic => "testing", :partition => 3 })
34
+ @consumer.partition.should eql(3)
35
+ end
36
+
37
+ it "should set default host and port if none is specified" do
38
+ @consumer = Consumer.new
39
+ @consumer.host.should eql("localhost")
40
+ @consumer.port.should eql(9092)
41
+ end
42
+
43
+ it "should have a default offset, and be able to set it" do
44
+ @consumer.offset.should eql(0)
45
+ @consumer = Consumer.new({ :offset => 1111 })
46
+ @consumer.offset.should eql(1111)
47
+ end
48
+
49
+ it "should have a max size" do
50
+ Consumer::MAX_SIZE.should eql(1048576)
51
+ @consumer.max_size.should eql(1048576)
52
+ end
53
+
54
+ it "should return the size of the request" do
55
+ @consumer.request_size.should eql(24)
56
+ @consumer.topic = "someothertopicname"
57
+ @consumer.request_size.should eql(38)
58
+ @consumer.encode_request_size.should eql([@consumer.request_size].pack("N"))
59
+ end
60
+
61
+ it "should encode a request to consume" do
62
+ bytes = [Kafka::Consumer::CONSUME_REQUEST_TYPE].pack("n") + ["test".length].pack("n") + "test" + [0].pack("N") + [0].pack("L_") + [Kafka::Consumer::MAX_SIZE].pack("N")
63
+ @consumer.encode_request(Kafka::Consumer::CONSUME_REQUEST_TYPE, "test", 0, 0, Kafka::Consumer::MAX_SIZE).should eql(bytes)
64
+ end
65
+
66
+ it "should read the response data" do
67
+ bytes = [12].pack("N") + [0].pack("C") + [1120192889].pack("N") + "ale"
68
+ @mocked_socket.should_receive(:read).exactly(:twice).and_return(bytes)
69
+ @consumer.read_data_response.should eql(bytes[2, bytes.length])
70
+ end
71
+
72
+ it "should send a consumer request" do
73
+ @consumer.stub!(:encode_request_size).and_return(666)
74
+ @consumer.stub!(:encode_request).and_return("someencodedrequest")
75
+ @consumer.should_receive(:write).with("someencodedrequest").exactly(:once).and_return(true)
76
+ @consumer.should_receive(:write).with(666).exactly(:once).and_return(true)
77
+ @consumer.send_consume_request.should eql(true)
78
+ end
79
+
80
+ it "should parse a message set from bytes" do
81
+ bytes = [12].pack("N") + [0].pack("C") + [1120192889].pack("N") + "ale"
82
+ message = @consumer.parse_message_set_from(bytes).first
83
+ message.payload.should eql("ale")
84
+ message.checksum.should eql(1120192889)
85
+ message.magic.should eql(0)
86
+ message.valid?.should eql(true)
87
+ end
88
+
89
+ it "should consume messages" do
90
+ @consumer.should_receive(:send_consume_request).and_return(true)
91
+ @consumer.should_receive(:read_data_response).and_return("")
92
+ @consumer.consume.should eql([])
93
+ end
94
+
95
+ it "should loop and execute a block with the consumed messages" do
96
+ @consumer.stub!(:consume).and_return([mock(Kafka::Message)])
97
+ messages = []
98
+ messages.should_receive(:<<).exactly(:once).and_return([])
99
+ @consumer.loop do |message|
100
+ messages << message
101
+ break # we don't wanna loop forever on the test
102
+ end
103
+ end
104
+
105
+ it "should loop (every N seconds, configurable on polling attribute), and execute a block with the consumed messages" do
106
+ @consumer = Consumer.new({ :polling => 1 })
107
+ @consumer.stub!(:consume).and_return([mock(Kafka::Message)])
108
+ messages = []
109
+ messages.should_receive(:<<).exactly(:twice).and_return([])
110
+ executed_times = 0
111
+ @consumer.loop do |message|
112
+ messages << message
113
+ executed_times += 1
114
+ break if executed_times >= 2 # we don't wanna loop forever on the test, only 2 seconds
115
+ end
116
+
117
+ executed_times.should eql(2)
118
+ end
119
+ end
120
+ end
data/spec/io_spec.rb CHANGED
@@ -39,6 +39,19 @@ describe IO do
39
39
  @io.write(data).should eql(9)
40
40
  end
41
41
 
42
+ it "should read from a socket" do
43
+ length = 200
44
+ @mocked_socket.should_receive(:read).with(length).and_return(nil)
45
+ @io.read(length)
46
+ end
47
+
48
+ it "should disconnect on a timeout when reading from a socket (to aviod protocol desync state)" do
49
+ length = 200
50
+ @mocked_socket.should_receive(:read).with(length).and_raise(Errno::EAGAIN)
51
+ @io.should_receive(:disconnect)
52
+ lambda { @io.read(length) }.should raise_error(Errno::EAGAIN)
53
+ end
54
+
42
55
  it "should disconnect" do
43
56
  @io.should respond_to(:disconnect)
44
57
  @mocked_socket.should_receive(:close).and_return(nil)
data/spec/message_spec.rb CHANGED
@@ -39,6 +39,17 @@ describe Message do
39
39
  @message.valid?.should eql(true)
40
40
  @message.checksum = 0
41
41
  @message.valid?.should eql(false)
42
+ @message = Message.new("alejandro", 0, 66666666) # 66666666 is a funny checksum
43
+ @message.valid?.should eql(false)
44
+ end
45
+
46
+ it "should parse a message from bytes" do
47
+ bytes = [12].pack("N") + [0].pack("C") + [1120192889].pack("N") + "ale"
48
+ message = Kafka::Message.parse_from(bytes)
49
+ message.valid?.should eql(true)
50
+ message.magic.should eql(0)
51
+ message.checksum.should eql(1120192889)
52
+ message.payload.should eql("ale")
42
53
  end
43
54
  end
44
55
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kafka-rb
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 21
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 5
10
+ version: 0.0.5
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alejandro Crosa
@@ -15,7 +15,7 @@ autorequire: kafka-rb
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-11 00:00:00 -08:00
18
+ date: 2011-01-14 00:00:00 -08:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: "0"
33
33
  type: :development
34
34
  version_requirements: *id001
35
- description: kafka-rb allows you to produce messages to the Kafka distributed publish/subscribe messaging service.
35
+ description: kafka-rb allows you to produce and consume messages using the Kafka distributed publish/subscribe messaging service.
36
36
  email: alejandrocrosa@gmail.com
37
37
  executables: []
38
38
 
@@ -45,12 +45,14 @@ files:
45
45
  - README.md
46
46
  - Rakefile
47
47
  - lib/kafka/batch.rb
48
+ - lib/kafka/consumer.rb
48
49
  - lib/kafka/io.rb
49
50
  - lib/kafka/message.rb
50
51
  - lib/kafka/producer.rb
52
+ - lib/kafka/request_type.rb
51
53
  - lib/kafka.rb
52
- - lib/test.rb
53
54
  - spec/batch_spec.rb
55
+ - spec/consumer_spec.rb
54
56
  - spec/io_spec.rb
55
57
  - spec/kafka_spec.rb
56
58
  - spec/message_spec.rb
data/lib/test.rb DELETED
@@ -1,32 +0,0 @@
1
- $KCODE = 'UTF-8'
2
-
3
- require 'zlib'
4
-
5
- PRODUCE_REQUEST_ID = 0
6
-
7
- def encode_message(message)
8
- # <MAGIC_BYTE: char> <CRC32: int> <PAYLOAD: bytes>
9
- data = [0].pack("C").to_s + [Zlib.crc32(message)].pack('N').to_s + message
10
- # print ("CHECKSUM " + Zlib.crc32(message).to_s)
11
- # print ("MES " + data.length.to_s)
12
- return data
13
- end
14
- # encode_message("ale")
15
-
16
- def encode_produce_request(topic, partition, messages)
17
- encoded = messages.collect { |m| encode_message(m) }
18
- message_set = encoded.collect { |e| puts "Message size #{e.length}"; [e.length].pack("N") + e }.join("")
19
-
20
- puts "MESSAGE"
21
- puts message_set.inspect
22
-
23
- data = [PRODUCE_REQUEST_ID].pack("n") + \
24
- [topic.length].pack("n") + topic.to_s + \
25
- [partition].pack("N") + \
26
- [message_set.length].pack("N") + message_set
27
- puts "DATA " + message_set.length.to_s
28
- return [data.length].pack("N") + data
29
- end
30
-
31
- socket = TCPSocket.open("localhost", 9092)
32
- socket.write encode_produce_request("test", 0, ["ale"])