kafka_syrup 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +79 -0
- data/Rakefile +8 -0
- data/kafka_syrup.gemspec +32 -0
- data/lib/kafka_syrup.rb +68 -0
- data/lib/kafka_syrup/broker.rb +42 -0
- data/lib/kafka_syrup/encoding.rb +59 -0
- data/lib/kafka_syrup/errors.rb +15 -0
- data/lib/kafka_syrup/partition_consumer.rb +111 -0
- data/lib/kafka_syrup/protocol.rb +22 -0
- data/lib/kafka_syrup/protocol/base.rb +41 -0
- data/lib/kafka_syrup/protocol/errors.rb +32 -0
- data/lib/kafka_syrup/protocol/fetch_request.rb +60 -0
- data/lib/kafka_syrup/protocol/fetch_response.rb +84 -0
- data/lib/kafka_syrup/protocol/message.rb +58 -0
- data/lib/kafka_syrup/protocol/message_set.rb +53 -0
- data/lib/kafka_syrup/protocol/metadata_request.rb +23 -0
- data/lib/kafka_syrup/protocol/metadata_response.rb +105 -0
- data/lib/kafka_syrup/protocol/offset_request.rb +53 -0
- data/lib/kafka_syrup/protocol/offset_response.rb +69 -0
- data/lib/kafka_syrup/protocol/produce_request.rb +69 -0
- data/lib/kafka_syrup/protocol/produce_response.rb +69 -0
- data/lib/kafka_syrup/protocol/request.rb +37 -0
- data/lib/kafka_syrup/protocol/response.rb +22 -0
- data/lib/kafka_syrup/topic_consumer.rb +301 -0
- data/lib/kafka_syrup/topic_producer.rb +59 -0
- data/lib/kafka_syrup/utils.rb +15 -0
- data/lib/kafka_syrup/version.rb +3 -0
- metadata +188 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 68b32dfdf15ed7d504a6c86ae20f166d9bc946a4
|
4
|
+
data.tar.gz: 1df748f47f3aff2b675bcd4971f1aeed6656e429
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8857d9dfdcb3df0c5364d0ce0cb1c9f1b230525576cd5959d3020d557d9396e9ce7e61dec30f55589f0535f9f6536695be61f5843857eeb2d759f6f0080bef8b
|
7
|
+
data.tar.gz: bcaf2bde3c43304d0531c388e86a6a92713e03a63fe0dacd8f543573ec534e2ef7baae48cc1154156e1d49bf5d7f2af36579741197c4d94e75546f6cbaac43d8
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Delbert Mitten
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# KafkaSyrup
|
2
|
+
|
3
|
+
KafkaSyrup is a Kafka client compatible with the Kafka 0.8 API and
|
4
|
+
above.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'kafka_syrup'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install kafka-syrup
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
### Configuration
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
require 'kafka_syrup'
|
28
|
+
|
29
|
+
KafkaSyrup.configure do |config|
|
30
|
+
config.brokers = 'localhost:9092,localhost:9093,localhost:9094'
|
31
|
+
|
32
|
+
# For consuming in a group
|
33
|
+
config.zookeeper_hosts = 'localhost:2181'
|
34
|
+
end
|
35
|
+
```
|
36
|
+
|
37
|
+
*Additional configuration options can be found in the main
|
38
|
+
kafka_syrup.rb file*
|
39
|
+
|
40
|
+
### Sending messages to Kafka
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
producer = KafkaSyrup::Producer.new(topic: :foo)
|
44
|
+
|
45
|
+
producer.send_message('hello world')
|
46
|
+
```
|
47
|
+
|
48
|
+
### Consuming messages from a single Kafka partition
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
consumer = KafkaSyrup::PartitionConsumer.new(topic: :foo, partition: 1)
|
52
|
+
|
53
|
+
consumer.fetch
|
54
|
+
|
55
|
+
# It is possible to limit the number of messages returned:
|
56
|
+
consumer.fetch(50)
|
57
|
+
```
|
58
|
+
|
59
|
+
*Note that regardless of the limit, fetch() will block until it has received at least one
|
60
|
+
message.*
|
61
|
+
|
62
|
+
### Consuming messages in a group
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
consumer = KafkaSyrup::TopicConsumer.new(topic: :foo, group: :bar)
|
66
|
+
|
67
|
+
consumer.fetch
|
68
|
+
|
69
|
+
# It is possible to limit the number of messages returned:
|
70
|
+
consumer.fetch(50)
|
71
|
+
```
|
72
|
+
|
73
|
+
*Note that regardless of the limit, fetch() will block until it has received at least one
|
74
|
+
message.*
|
75
|
+
|
76
|
+
The topic consumer utilizes zookeeper for coordination with other
|
77
|
+
members of the group and is fully compatible with the normal Kafka high
|
78
|
+
level consumer. (ie - kafk_syrup clients and java kafka clients can
|
79
|
+
coexist in the same group with no problem.)
|
data/Rakefile
ADDED
data/kafka_syrup.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'kafka_syrup/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "kafka_syrup"
|
8
|
+
spec.version = KafkaSyrup::VERSION
|
9
|
+
spec.authors = ['Delbert Mitten']
|
10
|
+
spec.email = ['drmitten@gmail.com']
|
11
|
+
|
12
|
+
spec.summary = %q{A high level Kafka client.}
|
13
|
+
spec.description = %q{A high level Kafka client that supports producer, low level consumers, and high level consumers.}
|
14
|
+
spec.homepage = 'https://github.com/drmitten/kafka_syrup'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency 'zk', '~> 1.9'
|
23
|
+
spec.add_dependency 'multi_json', '~> 1.8'
|
24
|
+
|
25
|
+
spec.add_development_dependency 'bundler', '~> 1.9'
|
26
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
27
|
+
spec.add_development_dependency 'minitest', '~> 5.6'
|
28
|
+
spec.add_development_dependency 'minitest-reporters', '~> 1.0'
|
29
|
+
spec.add_development_dependency 'mocha', '~> 1.1'
|
30
|
+
|
31
|
+
spec.add_development_dependency 'pry', '~> 0.10'
|
32
|
+
end
|
data/lib/kafka_syrup.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
%w[
|
2
|
+
errors
|
3
|
+
utils
|
4
|
+
encoding
|
5
|
+
protocol
|
6
|
+
version
|
7
|
+
broker
|
8
|
+
topic_producer
|
9
|
+
partition_consumer
|
10
|
+
topic_consumer
|
11
|
+
].each{ |file| require "kafka_syrup/#{file}" }
|
12
|
+
|
13
|
+
module KafkaSyrup
|
14
|
+
E = Encoding # Just to abbreviate the typing
|
15
|
+
|
16
|
+
Configuration = Struct.new :produce_required_acks, :produce_timeout, :consume_max_wait_time, :consume_min_bytes, :consume_max_bytes, :so_sndbuf,
|
17
|
+
:brokers, :zookeeper_hosts, :zookeeper_path, :retry_backoff, :logger do
|
18
|
+
|
19
|
+
def with_defaults
|
20
|
+
self.produce_required_acks = -1
|
21
|
+
self.produce_timeout = 1500
|
22
|
+
self.consume_max_wait_time = 100
|
23
|
+
self.consume_min_bytes = 1
|
24
|
+
self.consume_max_bytes = 1024 * 1024
|
25
|
+
self.so_sndbuf = 100 * 1024
|
26
|
+
self.brokers = ''
|
27
|
+
self.zookeeper_path = '/'
|
28
|
+
self.retry_backoff = 10 * 1000
|
29
|
+
self
|
30
|
+
end
|
31
|
+
|
32
|
+
def logger
|
33
|
+
return @logger if @logger
|
34
|
+
@logger = Logger.new(STDOUT)
|
35
|
+
@logger.formatter = ->(severity, datetime, progname, msg){ "[#{datetime}] #{severity} : #{msg}\n" }
|
36
|
+
@logger.level = Logger::WARN
|
37
|
+
@logger
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class << self
|
42
|
+
def configure
|
43
|
+
yield config
|
44
|
+
end
|
45
|
+
|
46
|
+
def config
|
47
|
+
@config ||= Configuration.new.with_defaults
|
48
|
+
end
|
49
|
+
|
50
|
+
def brokers
|
51
|
+
@brokers ||= config.brokers.split(',').map(&:strip).map{ |info| Broker.new(*info.split(':')) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def get_metadata(*topics)
|
55
|
+
request = KafkaSyrup::Protocol::MetadataRequest.new(*topics)
|
56
|
+
|
57
|
+
brokers.each do |broker|
|
58
|
+
begin
|
59
|
+
response = broker.send_request(request, close: true)
|
60
|
+
return response
|
61
|
+
rescue StandardError
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
raise NoBrokers
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module KafkaSyrup
|
2
|
+
class Broker
|
3
|
+
attr_accessor :host, :port
|
4
|
+
|
5
|
+
def initialize(host, port)
|
6
|
+
self.host = host
|
7
|
+
self.port = port
|
8
|
+
self.extend Communications
|
9
|
+
end
|
10
|
+
|
11
|
+
module Communications
|
12
|
+
def socket
|
13
|
+
unless @socket.respond_to?(:closed?) && !@socket.closed?
|
14
|
+
@socket = Socket.new(:INET, :SOCK_STREAM)
|
15
|
+
@socket.setsockopt(:SOCKET, :SO_SNDBUF, KafkaSyrup.config.so_sndbuf)
|
16
|
+
@socket.connect(Socket.pack_sockaddr_in(port, host))
|
17
|
+
end
|
18
|
+
@socket
|
19
|
+
rescue => e
|
20
|
+
@socket.close rescue
|
21
|
+
@socket = nil
|
22
|
+
raise e
|
23
|
+
end
|
24
|
+
|
25
|
+
def send_request(req, opts = {}, &block)
|
26
|
+
begin
|
27
|
+
socket.write(req.encode)
|
28
|
+
|
29
|
+
response = self.class.const_get(req.class.to_s.sub(/Request$/, 'Response')).new(socket, &block)
|
30
|
+
rescue KafkaSyrup::KafkaResponseError => e
|
31
|
+
raise e
|
32
|
+
rescue StandardError => e
|
33
|
+
raise KafkaSyrup::SocketReadError.new(e)
|
34
|
+
end
|
35
|
+
|
36
|
+
socket.close if opts[:close]
|
37
|
+
|
38
|
+
response
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module KafkaSyrup
|
2
|
+
module Encoding
|
3
|
+
class << self
|
4
|
+
# Integer encoding methods
|
5
|
+
{ 8 => 'c',
|
6
|
+
16 => 's>',
|
7
|
+
32 => 'l>',
|
8
|
+
64 => 'q>'
|
9
|
+
}.each do |size, pattern|
|
10
|
+
define_method "write_int#{size}" do |num|
|
11
|
+
[num].pack(pattern)
|
12
|
+
end
|
13
|
+
|
14
|
+
define_method "read_int#{size}" do |io|
|
15
|
+
io.read(size/8).unpack(pattern).first
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# String and Byte encoding methods
|
20
|
+
{ string: 16,
|
21
|
+
bytes: 32
|
22
|
+
}.each do |type, size|
|
23
|
+
define_method "write_#{type}" do |val|
|
24
|
+
len = val.to_s.length
|
25
|
+
if len > 0
|
26
|
+
send("write_int#{size}", len) + val.to_s
|
27
|
+
else
|
28
|
+
send("write_int#{size}", -1)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
define_method "read_#{type}" do |io|
|
33
|
+
len = send("read_int#{size}", io)
|
34
|
+
if len > 0
|
35
|
+
io.read(len)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def write_array(items, &block)
|
41
|
+
result = write_int32(items.length)
|
42
|
+
if block_given?
|
43
|
+
result += items.map(&block).join
|
44
|
+
else
|
45
|
+
items.each do |item|
|
46
|
+
result += item.respond_to?(:encode) ? item.encode : item
|
47
|
+
end
|
48
|
+
end
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
def read_array(io, &block)
|
53
|
+
[].tap do |result|
|
54
|
+
read_int32(io).times{ result << yield(io) }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module KafkaSyrup
|
2
|
+
class Error < StandardError
|
3
|
+
def initialize(e = nil)
|
4
|
+
super
|
5
|
+
set_backtrace e.backtrace if e.is_a?(StandardError)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class SocketReadError < Error; end;
|
10
|
+
class NoBrokers < Error; end;
|
11
|
+
class TopicNotFound < Error; end;
|
12
|
+
class PartitionNotFound < Error; end;
|
13
|
+
class BrokerNotFound < Error; end;
|
14
|
+
class NotRegistered < Error; end;
|
15
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module KafkaSyrup
|
2
|
+
class PartitionConsumer
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
attr_accessor :topic, :partition, :broker, :offset, :max_bytes, :messages, :thread, :control_queue, :lock
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
load_args(*args)
|
9
|
+
|
10
|
+
refresh_metadata
|
11
|
+
|
12
|
+
self.messages = Queue.new
|
13
|
+
self.control_queue = Queue.new
|
14
|
+
self.lock = Mutex.new
|
15
|
+
end
|
16
|
+
|
17
|
+
def refresh_metadata
|
18
|
+
broker && broker.socket && broker.socket.close
|
19
|
+
|
20
|
+
meta = KafkaSyrup.get_metadata(topic)
|
21
|
+
|
22
|
+
self.broker = meta.brokers.detect{ |b| b.node == partition_from_response(meta).leader }
|
23
|
+
raise BrokerNotFound unless self.broker
|
24
|
+
self.broker.extend KafkaSyrup::Broker::Communications
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_available_offset(time = :latest)
|
28
|
+
request = KafkaSyrup::Protocol::OffsetRequest.new
|
29
|
+
request.add_topic(topic).add_partition(partition, time == :earliest ? -2 : -1)
|
30
|
+
|
31
|
+
response = broker.send_request(request)
|
32
|
+
partition_from_response(response).offsets.last
|
33
|
+
end
|
34
|
+
|
35
|
+
def fetch_from_broker(&block)
|
36
|
+
lock.synchronize{ self.offset = get_available_offset(offset) } unless offset.is_a?(Fixnum)
|
37
|
+
|
38
|
+
opts = { max_bytes: max_bytes } if max_bytes
|
39
|
+
request = KafkaSyrup::Protocol::FetchRequest.new(opts)
|
40
|
+
request.add_topic(topic).add_partition(partition, offset)
|
41
|
+
|
42
|
+
response = partition_from_response(broker.send_request(request, &block))
|
43
|
+
|
44
|
+
lock.synchronize{ self.offset = response.messages.last.offset + 1 } unless response.messages.empty?
|
45
|
+
rescue KafkaSyrup::KafkaResponseErrors::OffsetOutOfRange
|
46
|
+
low = get_available_offset(:earliest)
|
47
|
+
high = get_available_offset
|
48
|
+
|
49
|
+
lock.synchronize{ self.offset = offset < low ? low : high }
|
50
|
+
end
|
51
|
+
|
52
|
+
def fetch(limit = nil)
|
53
|
+
start_fetcher_thread unless thread
|
54
|
+
|
55
|
+
control_queue.push(:fetch) if messages.empty? && control_queue.num_waiting > 0
|
56
|
+
|
57
|
+
result = []
|
58
|
+
|
59
|
+
loop do
|
60
|
+
result << messages.pop
|
61
|
+
break if messages.empty? || (limit && result.count == limit)
|
62
|
+
end
|
63
|
+
|
64
|
+
self.offset = result.last[:offset] + 1
|
65
|
+
|
66
|
+
result
|
67
|
+
end
|
68
|
+
|
69
|
+
def retry_backoff
|
70
|
+
@retry_backoff ||= KafkaSyrup.config.retry_backoff / 1000.0
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def partition_from_response(response)
|
76
|
+
topic_meta = response.topics.detect{ |t| t.name == topic.to_s }
|
77
|
+
raise TopicNotFound unless topic_meta
|
78
|
+
partition_meta = topic_meta.partitions.detect{ |p| p.id == partition }
|
79
|
+
raise PartitionNotFound unless partition_meta
|
80
|
+
partition_meta
|
81
|
+
end
|
82
|
+
|
83
|
+
def start_fetcher_thread
|
84
|
+
self.thread = Thread.new do
|
85
|
+
log.debug "Starting Fetcher Thread for partition #{partition}"
|
86
|
+
loop do
|
87
|
+
begin
|
88
|
+
control_queue.pop # wait for start message
|
89
|
+
log.debug "Fetching from partition #{partition}"
|
90
|
+
|
91
|
+
num_received = 0
|
92
|
+
begin
|
93
|
+
fetch_from_broker do |msg|
|
94
|
+
messages.push partition: partition, offset: msg.offset, message: msg.value
|
95
|
+
num_received += 1
|
96
|
+
end
|
97
|
+
|
98
|
+
# No messages received so backoff a bit before retrying
|
99
|
+
sleep retry_backoff if num_received == 0
|
100
|
+
rescue
|
101
|
+
sleep retry_backoff
|
102
|
+
end
|
103
|
+
rescue
|
104
|
+
sleep retry_backoff
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
sleep 0.1 # Slight sleep to let the thread start waiting on the control queue (avoids deadlock)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|