kafka-rb 0.0.2 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -13,40 +13,49 @@ sudo gem install kafka-rb
13
13
 
14
14
  ### Sending a simple message
15
15
 
16
- require 'kafka-rb'
16
+ require 'kafka'
17
+ producer = Kafka::Producer.new
18
+ message = Kafka::Message.new("some random message content")
19
+ producer.send(message)
17
20
 
18
- producer = Kafka::Producer.new
21
+ ### Sending a sequence of messages
19
22
 
20
- message = Kafka::Message.new("some random message content")
23
+ require 'kafka'
24
+ producer = Kafka::Producer.new
25
+ message1 = Kafka::Message.new("some random message content")
26
+ message2 = Kafka::Message.new("some more content")
27
+ producer.send([message1, message2])
21
28
 
22
- producer.send(message)
29
+ ### Batching a bunch of messages using the block syntax
23
30
 
24
- ### sending a sequence of messages
31
+ require 'kafka'
32
+ producer = Kafka::Producer.new
33
+ producer.batch do |messages|
34
+ puts "Batching a send of multiple messages.."
35
+ messages << Kafka::Message.new("first message to send")
36
+ messages << Kafka::Message.new("second message to send")
37
+ end
25
38
 
26
- require 'kafka-rb'
27
-
28
- producer = Kafka::Producer.new
29
-
30
- message1 = Kafka::Message.new("some random message content")
31
-
32
- message2 = Kafka::Message.new("some more content")
33
-
34
- producer.send([message1, message2])
35
-
36
- ### batching a bunch of messages using the block syntax
39
+ * they will be sent all at once, after the block execution
37
40
 
38
- require 'kafka-rb'
41
+ ### Consuming messages one by one
39
42
 
40
- producer = Kafka::Producer.new
43
+ require 'kafka'
44
+ consumer = Kafka::Consumer.new
45
+ messages = consumer.consume
41
46
 
42
- producer.batch do |messages|
47
+ ### Consuming messages using a block loop
43
48
 
44
- puts "Batching a send of multiple messages.."
49
+ require 'kafka'
50
+ consumer = Kafka::Consumer.new
51
+ consumer.loop do |messages|
52
+ puts "Received"
53
+ puts messages
54
+ end
45
55
 
46
- messages << Kafka::Message.new("first message to send")
47
56
 
48
- messages << Kafka::Message.new("second message to send")
57
+ Contact for questions
49
58
 
50
- end
59
+ alejandrocrosa at(@) gmail.com
51
60
 
52
- * they will be sent all at once, after the block execution
61
+ http://twitter.com/alejandrocrosa
data/Rakefile CHANGED
@@ -5,13 +5,13 @@ require 'date'
5
5
  require 'spec/rake/spectask'
6
6
 
7
7
  GEM = 'kafka-rb'
8
- GEM_NAME = 'Kafka Client Producer'
9
- GEM_VERSION = '0.0.2'
8
+ GEM_NAME = 'Kafka Client'
9
+ GEM_VERSION = '0.0.5'
10
10
  AUTHORS = ['Alejandro Crosa']
11
11
  EMAIL = "alejandrocrosa@gmail.com"
12
12
  HOMEPAGE = "http://github.com/acrosa/kafka-rb"
13
13
  SUMMARY = "A Ruby client for the Kafka distributed publish/subscribe messaging service"
14
- DESCRIPTION = "kafka-rb allows you to produce messages to the Kafka distributed publish/subscribe messaging service."
14
+ DESCRIPTION = "kafka-rb allows you to produce and consume messages using the Kafka distributed publish/subscribe messaging service."
15
15
 
16
16
  spec = Gem::Specification.new do |s|
17
17
  s.name = GEM
@@ -0,0 +1,82 @@
1
+ module Kafka
2
+ class Consumer
3
+
4
+ include Kafka::IO
5
+
6
+ CONSUME_REQUEST_TYPE = Kafka::RequestType::FETCH
7
+ MAX_SIZE = 1048576 # 1 MB
8
+ DEFAULT_POLLING_INTERVAL = 2 # 2 seconds
9
+
10
+ attr_accessor :topic, :partition, :offset, :max_size, :request_type, :polling
11
+
12
+ def initialize(options = {})
13
+ self.topic = options[:topic] || "test"
14
+ self.partition = options[:partition] || 0
15
+ self.host = options[:host] || "localhost"
16
+ self.port = options[:port] || 9092
17
+ self.offset = options[:offset] || 0
18
+ self.max_size = options[:max_size] || MAX_SIZE
19
+ self.request_type = options[:request_type] || CONSUME_REQUEST_TYPE
20
+ self.polling = options[:polling] || DEFAULT_POLLING_INTERVAL
21
+ self.connect(self.host, self.port)
22
+ end
23
+
24
+ # REQUEST TYPE ID + TOPIC LENGTH + TOPIC + PARTITION + OFFSET + MAX SIZE
25
+ def request_size
26
+ 2 + 2 + topic.length + 4 + 8 + 4
27
+ end
28
+
29
+ def encode_request_size
30
+ [self.request_size].pack("N")
31
+ end
32
+
33
+ def encode_request(request_type, topic, partition, offset, max_size)
34
+ request_type = [request_type].pack("n")
35
+ topic = [topic.length].pack('n') + topic
36
+ partition = [partition].pack("N")
37
+ offset = [offset].pack("Q").reverse # DIY 64bit big endian integer
38
+ max_size = [max_size].pack("N")
39
+
40
+ request_type + topic + partition + offset + max_size
41
+ end
42
+
43
+ def consume
44
+ self.send_consume_request # request data
45
+ data = self.read_data_response # read data response
46
+ self.parse_message_set_from(data) # parse message set
47
+ end
48
+
49
+ def loop(&block)
50
+ messages = []
51
+ while(true) do
52
+ messages = self.consume
53
+ block.call(messages) if messages && !messages.empty?
54
+ sleep(self.polling)
55
+ end
56
+ end
57
+
58
+ def read_data_response
59
+ data_length = self.socket.read(4).unpack("N").shift # read length
60
+ data = self.socket.read(data_length) # read message set
61
+ data[2, data.length] # we start with a 2 byte offset
62
+ end
63
+
64
+ def send_consume_request
65
+ self.write(self.encode_request_size) # write request_size
66
+ self.write(self.encode_request(self.request_type, self.topic, self.partition, self.offset, self.max_size)) # write request
67
+ end
68
+
69
+ def parse_message_set_from(data)
70
+ messages = []
71
+ processed = 0
72
+ length = data.length - 4
73
+ while(processed <= length) do
74
+ message_size = data[processed, 4].unpack("N").shift
75
+ messages << Kafka::Message.parse_from(data[processed, message_size + 4])
76
+ processed += 4 + message_size
77
+ end
78
+ self.offset += processed
79
+ messages
80
+ end
81
+ end
82
+ end
data/lib/kafka/io.rb CHANGED
@@ -26,5 +26,14 @@ module Kafka
26
26
  self.reconnect
27
27
  self.socket.write(data) # retry
28
28
  end
29
+
30
+ def read(length)
31
+ begin
32
+ self.socket.read(length)
33
+ rescue Errno::EAGAIN
34
+ self.disconnect
35
+ raise Errno::EAGAIN, "Timeout reading from the socket"
36
+ end
37
+ end
29
38
  end
30
39
  end
data/lib/kafka/message.rb CHANGED
@@ -5,13 +5,15 @@ module Kafka
5
5
  # 4 byte CRC32 of the payload
6
6
  # N - 5 byte payload
7
7
  class Message
8
+
8
9
  MAGIC_IDENTIFIER_DEFAULT = 0
10
+
9
11
  attr_accessor :magic, :checksum, :payload
10
12
 
11
- def initialize(payload = nil, magic = MAGIC_IDENTIFIER_DEFAULT)
12
- self.magic = magic
13
- self.payload = payload
14
- self.checksum = self.calculate_checksum
13
+ def initialize(payload = nil, magic = MAGIC_IDENTIFIER_DEFAULT, checksum = nil)
14
+ self.magic = magic
15
+ self.payload = payload
16
+ self.checksum = checksum || self.calculate_checksum
15
17
  end
16
18
 
17
19
  def calculate_checksum
@@ -21,5 +23,13 @@ module Kafka
21
23
  def valid?
22
24
  self.checksum == Zlib.crc32(self.payload)
23
25
  end
26
+
27
+ def self.parse_from(binary)
28
+ size = binary[0, 4].unpack("N").shift.to_i
29
+ magic = binary[4, 1].unpack("C").shift
30
+ checksum = binary[5, 4].unpack("N").shift
31
+ payload = binary[9, size] # 5 = 1 + 4 is Magic + Checksum
32
+ return Kafka::Message.new(payload, magic, checksum)
33
+ end
24
34
  end
25
- end
35
+ end
@@ -3,15 +3,15 @@ module Kafka
3
3
 
4
4
  include Kafka::IO
5
5
 
6
- PRODUCE_REQUEST_ID = 0
6
+ PRODUCE_REQUEST_ID = Kafka::RequestType::PRODUCE
7
7
 
8
8
  attr_accessor :topic, :partition
9
9
 
10
10
  def initialize(options = {})
11
- self.topic = options[:topic] || "test"
12
- self.partition = options[:partition] || 0
13
- self.host = options[:host] || "localhost"
14
- self.port = options[:port] || 9092
11
+ self.topic = options[:topic] || "test"
12
+ self.partition = options[:partition] || 0
13
+ self.host = options[:host] || "localhost"
14
+ self.port = options[:port] || 9092
15
15
  self.connect(self.host, self.port)
16
16
  end
17
17
 
@@ -0,0 +1,9 @@
1
+ module Kafka
2
+ module RequestType
3
+ PRODUCE = 0
4
+ FETCH = 1
5
+ MULTIFETCH = 2
6
+ MULTIPRODUCE = 3
7
+ OFFSETS = 4
8
+ end
9
+ end
data/lib/kafka.rb CHANGED
@@ -1,9 +1,12 @@
1
1
  require 'socket'
2
2
  require 'zlib'
3
+
3
4
  require File.join(File.dirname(__FILE__), "kafka", "io")
5
+ require File.join(File.dirname(__FILE__), "kafka", "request_type")
4
6
  require File.join(File.dirname(__FILE__), "kafka", "batch")
5
7
  require File.join(File.dirname(__FILE__), "kafka", "message")
6
8
  require File.join(File.dirname(__FILE__), "kafka", "producer")
9
+ require File.join(File.dirname(__FILE__), "kafka", "consumer")
7
10
 
8
11
  module Kafka
9
12
  end
@@ -0,0 +1,120 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Consumer do
4
+
5
+ before(:each) do
6
+ @mocked_socket = mock(TCPSocket)
7
+ TCPSocket.stub!(:new).and_return(@mocked_socket) # don't use a real socket
8
+ @consumer = Consumer.new
9
+ end
10
+
11
+ describe "Kafka Consumer" do
12
+
13
+ it "should have a CONSUME_REQUEST_TYPE" do
14
+ Consumer::CONSUME_REQUEST_TYPE.should eql(1)
15
+ @consumer.should respond_to(:request_type)
16
+ end
17
+
18
+ it "should have a topic and a partition" do
19
+ @consumer.should respond_to(:topic)
20
+ @consumer.should respond_to(:partition)
21
+ end
22
+
23
+ it "should have a polling option, and a default value" do
24
+ Consumer::DEFAULT_POLLING_INTERVAL.should eql(2)
25
+ @consumer.should respond_to(:polling)
26
+ @consumer.polling.should eql(2)
27
+ end
28
+
29
+ it "should set a topic and partition on initialize" do
30
+ @consumer = Consumer.new({ :host => "localhost", :port => 9092, :topic => "testing" })
31
+ @consumer.topic.should eql("testing")
32
+ @consumer.partition.should eql(0)
33
+ @consumer = Consumer.new({ :topic => "testing", :partition => 3 })
34
+ @consumer.partition.should eql(3)
35
+ end
36
+
37
+ it "should set default host and port if none is specified" do
38
+ @consumer = Consumer.new
39
+ @consumer.host.should eql("localhost")
40
+ @consumer.port.should eql(9092)
41
+ end
42
+
43
+ it "should have a default offset, and be able to set it" do
44
+ @consumer.offset.should eql(0)
45
+ @consumer = Consumer.new({ :offset => 1111 })
46
+ @consumer.offset.should eql(1111)
47
+ end
48
+
49
+ it "should have a max size" do
50
+ Consumer::MAX_SIZE.should eql(1048576)
51
+ @consumer.max_size.should eql(1048576)
52
+ end
53
+
54
+ it "should return the size of the request" do
55
+ @consumer.request_size.should eql(24)
56
+ @consumer.topic = "someothertopicname"
57
+ @consumer.request_size.should eql(38)
58
+ @consumer.encode_request_size.should eql([@consumer.request_size].pack("N"))
59
+ end
60
+
61
+ it "should encode a request to consume" do
62
+ bytes = [Kafka::Consumer::CONSUME_REQUEST_TYPE].pack("n") + ["test".length].pack("n") + "test" + [0].pack("N") + [0].pack("L_") + [Kafka::Consumer::MAX_SIZE].pack("N")
63
+ @consumer.encode_request(Kafka::Consumer::CONSUME_REQUEST_TYPE, "test", 0, 0, Kafka::Consumer::MAX_SIZE).should eql(bytes)
64
+ end
65
+
66
+ it "should read the response data" do
67
+ bytes = [12].pack("N") + [0].pack("C") + [1120192889].pack("N") + "ale"
68
+ @mocked_socket.should_receive(:read).exactly(:twice).and_return(bytes)
69
+ @consumer.read_data_response.should eql(bytes[2, bytes.length])
70
+ end
71
+
72
+ it "should send a consumer request" do
73
+ @consumer.stub!(:encode_request_size).and_return(666)
74
+ @consumer.stub!(:encode_request).and_return("someencodedrequest")
75
+ @consumer.should_receive(:write).with("someencodedrequest").exactly(:once).and_return(true)
76
+ @consumer.should_receive(:write).with(666).exactly(:once).and_return(true)
77
+ @consumer.send_consume_request.should eql(true)
78
+ end
79
+
80
+ it "should parse a message set from bytes" do
81
+ bytes = [12].pack("N") + [0].pack("C") + [1120192889].pack("N") + "ale"
82
+ message = @consumer.parse_message_set_from(bytes).first
83
+ message.payload.should eql("ale")
84
+ message.checksum.should eql(1120192889)
85
+ message.magic.should eql(0)
86
+ message.valid?.should eql(true)
87
+ end
88
+
89
+ it "should consume messages" do
90
+ @consumer.should_receive(:send_consume_request).and_return(true)
91
+ @consumer.should_receive(:read_data_response).and_return("")
92
+ @consumer.consume.should eql([])
93
+ end
94
+
95
+ it "should loop and execute a block with the consumed messages" do
96
+ @consumer.stub!(:consume).and_return([mock(Kafka::Message)])
97
+ messages = []
98
+ messages.should_receive(:<<).exactly(:once).and_return([])
99
+ @consumer.loop do |message|
100
+ messages << message
101
+ break # we don't wanna loop forever on the test
102
+ end
103
+ end
104
+
105
+ it "should loop (every N seconds, configurable on polling attribute), and execute a block with the consumed messages" do
106
+ @consumer = Consumer.new({ :polling => 1 })
107
+ @consumer.stub!(:consume).and_return([mock(Kafka::Message)])
108
+ messages = []
109
+ messages.should_receive(:<<).exactly(:twice).and_return([])
110
+ executed_times = 0
111
+ @consumer.loop do |message|
112
+ messages << message
113
+ executed_times += 1
114
+ break if executed_times >= 2 # we don't wanna loop forever on the test, only 2 seconds
115
+ end
116
+
117
+ executed_times.should eql(2)
118
+ end
119
+ end
120
+ end
data/spec/io_spec.rb CHANGED
@@ -39,6 +39,19 @@ describe IO do
39
39
  @io.write(data).should eql(9)
40
40
  end
41
41
 
42
+ it "should read from a socket" do
43
+ length = 200
44
+ @mocked_socket.should_receive(:read).with(length).and_return(nil)
45
+ @io.read(length)
46
+ end
47
+
48
+ it "should disconnect on a timeout when reading from a socket (to aviod protocol desync state)" do
49
+ length = 200
50
+ @mocked_socket.should_receive(:read).with(length).and_raise(Errno::EAGAIN)
51
+ @io.should_receive(:disconnect)
52
+ lambda { @io.read(length) }.should raise_error(Errno::EAGAIN)
53
+ end
54
+
42
55
  it "should disconnect" do
43
56
  @io.should respond_to(:disconnect)
44
57
  @mocked_socket.should_receive(:close).and_return(nil)
data/spec/message_spec.rb CHANGED
@@ -39,6 +39,17 @@ describe Message do
39
39
  @message.valid?.should eql(true)
40
40
  @message.checksum = 0
41
41
  @message.valid?.should eql(false)
42
+ @message = Message.new("alejandro", 0, 66666666) # 66666666 is a funny checksum
43
+ @message.valid?.should eql(false)
44
+ end
45
+
46
+ it "should parse a message from bytes" do
47
+ bytes = [12].pack("N") + [0].pack("C") + [1120192889].pack("N") + "ale"
48
+ message = Kafka::Message.parse_from(bytes)
49
+ message.valid?.should eql(true)
50
+ message.magic.should eql(0)
51
+ message.checksum.should eql(1120192889)
52
+ message.payload.should eql("ale")
42
53
  end
43
54
  end
44
55
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kafka-rb
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 21
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 5
10
+ version: 0.0.5
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alejandro Crosa
@@ -15,7 +15,7 @@ autorequire: kafka-rb
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-11 00:00:00 -08:00
18
+ date: 2011-01-14 00:00:00 -08:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: "0"
33
33
  type: :development
34
34
  version_requirements: *id001
35
- description: kafka-rb allows you to produce messages to the Kafka distributed publish/subscribe messaging service.
35
+ description: kafka-rb allows you to produce and consume messages using the Kafka distributed publish/subscribe messaging service.
36
36
  email: alejandrocrosa@gmail.com
37
37
  executables: []
38
38
 
@@ -45,12 +45,14 @@ files:
45
45
  - README.md
46
46
  - Rakefile
47
47
  - lib/kafka/batch.rb
48
+ - lib/kafka/consumer.rb
48
49
  - lib/kafka/io.rb
49
50
  - lib/kafka/message.rb
50
51
  - lib/kafka/producer.rb
52
+ - lib/kafka/request_type.rb
51
53
  - lib/kafka.rb
52
- - lib/test.rb
53
54
  - spec/batch_spec.rb
55
+ - spec/consumer_spec.rb
54
56
  - spec/io_spec.rb
55
57
  - spec/kafka_spec.rb
56
58
  - spec/message_spec.rb
data/lib/test.rb DELETED
@@ -1,32 +0,0 @@
1
- $KCODE = 'UTF-8'
2
-
3
- require 'zlib'
4
-
5
- PRODUCE_REQUEST_ID = 0
6
-
7
- def encode_message(message)
8
- # <MAGIC_BYTE: char> <CRC32: int> <PAYLOAD: bytes>
9
- data = [0].pack("C").to_s + [Zlib.crc32(message)].pack('N').to_s + message
10
- # print ("CHECKSUM " + Zlib.crc32(message).to_s)
11
- # print ("MES " + data.length.to_s)
12
- return data
13
- end
14
- # encode_message("ale")
15
-
16
- def encode_produce_request(topic, partition, messages)
17
- encoded = messages.collect { |m| encode_message(m) }
18
- message_set = encoded.collect { |e| puts "Message size #{e.length}"; [e.length].pack("N") + e }.join("")
19
-
20
- puts "MESSAGE"
21
- puts message_set.inspect
22
-
23
- data = [PRODUCE_REQUEST_ID].pack("n") + \
24
- [topic.length].pack("n") + topic.to_s + \
25
- [partition].pack("N") + \
26
- [message_set.length].pack("N") + message_set
27
- puts "DATA " + message_set.length.to_s
28
- return [data.length].pack("N") + data
29
- end
30
-
31
- socket = TCPSocket.open("localhost", 9092)
32
- socket.write encode_produce_request("test", 0, ["ale"])