promiscuous-poseidon_cluster 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +8 -0
- data/.travis.yml +7 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +77 -0
- data/README.md +95 -0
- data/Rakefile +21 -0
- data/examples/consumer_group.rb +33 -0
- data/lib/poseidon/cluster.rb +28 -0
- data/lib/poseidon/consumer_group.rb +435 -0
- data/lib/poseidon_cluster.rb +1 -0
- data/poseidon_cluster.gemspec +28 -0
- data/scenario/.gitignore +1 -0
- data/scenario/consumer.rb +17 -0
- data/scenario/producer.rb +23 -0
- data/scenario/run.rb +35 -0
- data/scenario/scenario.rb +134 -0
- data/spec/lib/poseidon/cluster_spec.rb +19 -0
- data/spec/lib/poseidon/consumer_group_spec.rb +286 -0
- data/spec/spec_helper.rb +14 -0
- metadata +183 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f7653a5f9ee3e08c3ccda22787d2a9da5b752b86
|
4
|
+
data.tar.gz: 6447aadb67ad485dc77e79ea91af4d3b99bd49e1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 203099eb7d08fa1f80dd197c5e4094352291dc197a4e8137efde19450a4c4224e2cdc23bfdc9dd6789607c0544c326ce329c3c81bbd1d7a30f557f30ae0deddb
|
7
|
+
data.tar.gz: d78680cc99b1dd11fb11183f00fc4186835e8515ade5ae933b10d0a39a55f7bb0f5a99afb2d68e371198659402a20b421b6e9b80f3700006b941e7462a094889
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
poseidon_cluster (0.3.0)
|
5
|
+
poseidon (>= 0.0.5.pre1)
|
6
|
+
zk
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
coveralls (0.7.0)
|
12
|
+
multi_json (~> 1.3)
|
13
|
+
rest-client
|
14
|
+
simplecov (>= 0.7)
|
15
|
+
term-ansicolor
|
16
|
+
thor
|
17
|
+
diff-lcs (1.2.5)
|
18
|
+
docile (1.1.3)
|
19
|
+
little-plugger (1.1.3)
|
20
|
+
logging (1.8.2)
|
21
|
+
little-plugger (>= 1.1.3)
|
22
|
+
multi_json (>= 1.8.4)
|
23
|
+
mime-types (2.3)
|
24
|
+
multi_json (1.10.1)
|
25
|
+
poseidon (0.0.5.pre1)
|
26
|
+
rake (10.3.2)
|
27
|
+
rest-client (1.6.7)
|
28
|
+
mime-types (>= 1.16)
|
29
|
+
rspec (3.0.0)
|
30
|
+
rspec-core (~> 3.0.0)
|
31
|
+
rspec-expectations (~> 3.0.0)
|
32
|
+
rspec-mocks (~> 3.0.0)
|
33
|
+
rspec-core (3.0.0)
|
34
|
+
rspec-support (~> 3.0.0)
|
35
|
+
rspec-expectations (3.0.0)
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
37
|
+
rspec-support (~> 3.0.0)
|
38
|
+
rspec-its (1.0.1)
|
39
|
+
rspec-core (>= 2.99.0.beta1)
|
40
|
+
rspec-expectations (>= 2.99.0.beta1)
|
41
|
+
rspec-mocks (3.0.0)
|
42
|
+
rspec-support (~> 3.0.0)
|
43
|
+
rspec-support (3.0.0)
|
44
|
+
simplecov (0.8.2)
|
45
|
+
docile (~> 1.1.0)
|
46
|
+
multi_json
|
47
|
+
simplecov-html (~> 0.8.0)
|
48
|
+
simplecov-html (0.8.0)
|
49
|
+
slyphon-log4j (1.2.15)
|
50
|
+
slyphon-zookeeper_jar (3.3.5-java)
|
51
|
+
term-ansicolor (1.3.0)
|
52
|
+
tins (~> 1.0)
|
53
|
+
thor (0.19.1)
|
54
|
+
tins (1.3.0)
|
55
|
+
yard (0.8.7.4)
|
56
|
+
zk (1.9.4)
|
57
|
+
logging (~> 1.8.2)
|
58
|
+
zookeeper (~> 1.4.0)
|
59
|
+
zookeeper (1.4.9)
|
60
|
+
zookeeper (1.4.9-java)
|
61
|
+
slyphon-log4j (= 1.2.15)
|
62
|
+
slyphon-zookeeper_jar (= 3.3.5)
|
63
|
+
|
64
|
+
PLATFORMS
|
65
|
+
java
|
66
|
+
ruby
|
67
|
+
|
68
|
+
DEPENDENCIES
|
69
|
+
bundler
|
70
|
+
coveralls
|
71
|
+
poseidon_cluster!
|
72
|
+
rake
|
73
|
+
rspec
|
74
|
+
rspec-its
|
75
|
+
slyphon-log4j (= 1.2.15)
|
76
|
+
slyphon-zookeeper_jar (= 3.3.5)
|
77
|
+
yard
|
data/README.md
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
# Poseidon Cluster [![Build Status](https://travis-ci.org/bsm/poseidon_cluster.png?branch=master)](https://travis-ci.org/bsm/poseidon_cluster) [![Coverage Status](https://coveralls.io/repos/bsm/poseidon_cluster/badge.png?branch=master)](https://coveralls.io/r/bsm/poseidon_cluster?branch=master)
|
2
|
+
|
3
|
+
Poseidon Cluster is a cluster extension of the excellent [Poseidon](http://github.com/bpot/poseidon) Ruby client for Kafka 0.8+. It implements the distribution concept of self-rebalancing *Consumer Groups* and supports the consumption of a single topic from multiple instances.
|
4
|
+
|
5
|
+
Consumer group instances share a common group name, and each message published to a topic is delivered to one instance within each subscribing consumer group. Consumer instances can be in separate processes or on separate machines.
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
Launch a consumer group:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
require 'poseidon_cluster'
|
13
|
+
|
14
|
+
consumer = Poseidon::ConsumerGroup.new(
|
15
|
+
"my-group", # Group name
|
16
|
+
["kafka1.host:9092", "kafka2.host:9092"], # Kafka brokers
|
17
|
+
["kafka1.host:2181", "kafka2.host:2181"], # Zookeepers hosts
|
18
|
+
"my-topic") # Topic name
|
19
|
+
|
20
|
+
consumer.partitions # => [0, 1, 2, 3] - all partitions of 'my-topic'
|
21
|
+
consumer.claimed # => [0, 1] - partitions this instance has claimed
|
22
|
+
```
|
23
|
+
|
24
|
+
Fetch a bulk of messages, auto-commit the offset:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
consumer.fetch do |partition, bulk|
|
28
|
+
bulk.each do |m|
|
29
|
+
puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
Get the offset for a partition:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
consumer.offset(0) # => 320 - current offset from partition 0
|
38
|
+
```
|
39
|
+
|
40
|
+
Fetch more messages, commit manually:
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
consumer.fetch commit: false do |partition, bulk|
|
44
|
+
bulk.each do |m|
|
45
|
+
puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
|
46
|
+
end
|
47
|
+
|
48
|
+
consumer.commit partition, bulk.last.offset+1 unless bulk.empty?
|
49
|
+
end
|
50
|
+
```
|
51
|
+
|
52
|
+
Initiate a fetch-loop, consume indefinitely:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
consumer.fetch_loop do |partition, bulk|
|
56
|
+
bulk.each do |m|
|
57
|
+
puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
For more details and information, please see the [Poseidon::ConsumerGroup](http://rubydoc.info/github/bsm/poseidon_cluster/Poseidon/ConsumerGroup) documentation and the [Examples](https://github.com/bsm/poseidon_cluster/tree/master/examples).
|
63
|
+
|
64
|
+
## Running Tests
|
65
|
+
|
66
|
+
The test suite will automatically download, configure and run Kafka locally, you only need a JRE. Run the suite via:
|
67
|
+
|
68
|
+
```bash
|
69
|
+
bundle exec rake spec
|
70
|
+
```
|
71
|
+
|
72
|
+
## Licence
|
73
|
+
|
74
|
+
```
|
75
|
+
Copyright (c) 2014 Black Square Media
|
76
|
+
|
77
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
78
|
+
a copy of this software and associated documentation files (the
|
79
|
+
"Software"), to deal in the Software without restriction, including
|
80
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
81
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
82
|
+
permit persons to whom the Software is furnished to do so, subject to
|
83
|
+
the following conditions:
|
84
|
+
|
85
|
+
The above copyright notice and this permission notice shall be
|
86
|
+
included in all copies or substantial portions of the Software.
|
87
|
+
|
88
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
89
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
90
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
91
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
92
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
93
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
94
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
95
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
require 'yard'
|
7
|
+
require 'yard/rake/yardoc_task'
|
8
|
+
YARD::Rake::YardocTask.new
|
9
|
+
|
10
|
+
require 'coveralls/rake/task'
|
11
|
+
Coveralls::RakeTask.new
|
12
|
+
namespace :spec do
|
13
|
+
task coveralls: [:spec, 'coveralls:push']
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Run full integration test scenario"
|
17
|
+
task :scenario do
|
18
|
+
load File.expand_path("../scenario/run.rb", __FILE__)
|
19
|
+
end
|
20
|
+
|
21
|
+
task default: :spec
|
@@ -0,0 +1,33 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
PLEASE NOTE
|
4
|
+
|
5
|
+
This example uses threads, but you could equally use fork or run your
|
6
|
+
consumer groups from completely separate process and from multiple machines.
|
7
|
+
|
8
|
+
=end
|
9
|
+
require 'poseidon_cluster'
|
10
|
+
|
11
|
+
# Create a consumer group
|
12
|
+
group1 = Poseidon::ConsumerGroup.new "my-group", ["host1:9092", "host2:9092"], ["host1:2181", "host2:2181"], "my-topic"
|
13
|
+
|
14
|
+
# Start consuming "my-topic" in a background thread
|
15
|
+
thread1 = Thread.new do
|
16
|
+
group1.fetch_loop do |partition, messages|
|
17
|
+
puts "Consumer #1 fetched #{messages.size} from #{partition}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Create a second consumer group
|
22
|
+
group2 = Poseidon::ConsumerGroup.new "my-group", ["host1:9092", "host2:9092"], ["host1:2181", "host2:2181"], "my-topic"
|
23
|
+
|
24
|
+
# Now consuming all partitions of "my-topic" in parallel
|
25
|
+
thread2 = Thread.new do
|
26
|
+
group2.fetch_loop do |partition, messages|
|
27
|
+
puts "Consumer #2 fetched #{messages.size} from #{partition}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Join threads, loop forever
|
32
|
+
[thread1, thread2].each(&:join)
|
33
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'timeout'
|
3
|
+
require 'zk'
|
4
|
+
require 'poseidon'
|
5
|
+
require 'thread'
|
6
|
+
|
7
|
+
module Poseidon::Cluster
|
8
|
+
MAX_INT32 = 0x7fffffff
|
9
|
+
@@sem = Mutex.new
|
10
|
+
@@inc = 0
|
11
|
+
|
12
|
+
# @return [Integer] an incremented number
|
13
|
+
# @api private
|
14
|
+
def self.inc!
|
15
|
+
@@sem.synchronize { @@inc += 1; @@inc = 1 if @@inc > MAX_INT32; @@inc }
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [String] an globally unique identifier
|
19
|
+
# @api private
|
20
|
+
def self.guid
|
21
|
+
[::Socket.gethostname, ::Process.pid, ::Time.now.to_i, inc!].join("-")
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
%w|consumer_group|.each do |name|
|
27
|
+
require "poseidon/#{name}"
|
28
|
+
end
|
@@ -0,0 +1,435 @@
|
|
1
|
+
# A ConsumerGroup operates on all partitions of a single topic. The goal is to ensure
|
2
|
+
# each topic message is consumed only once, no matter of the number of consumer instances within
|
3
|
+
# a cluster, as described in: http://kafka.apache.org/documentation.html#distributionimpl.
|
4
|
+
#
|
5
|
+
# The ConsumerGroup internally creates multiple PartitionConsumer instances. It uses Zookkeper
|
6
|
+
# and follows a simple consumer rebalancing algorithms which allows all the consumers
|
7
|
+
# in a group to come into consensus on which consumer is consuming which partitions. Each
|
8
|
+
# ConsumerGroup can 'claim' 0-n partitions and will consume their messages until another
|
9
|
+
# ConsumerGroup instance joins or leaves the cluster.
|
10
|
+
#
|
11
|
+
# Please note: ConsumerGroups themselves don't implement any threading or concurrency.
|
12
|
+
# When consuming messages, they simply round-robin across the claimed partitions. If you wish
|
13
|
+
# to parallelize consumption simply create multiple ConsumerGroups instances. The built-in
|
14
|
+
# concensus algorithm will automatically rebalance the available partitions between them and you
|
15
|
+
# can then decide for yourself if you want to run them in multiple thread or processes, ideally
|
16
|
+
# on multiple boxes.
|
17
|
+
#
|
18
|
+
# Unlike stated in the Kafka documentation, consumer rebalancing is *only* triggered on each
|
19
|
+
# addition or removal of consumers within the same group, while the addition of broker nodes
|
20
|
+
# and/or partition *does currently not trigger* a rebalancing cycle.
|
21
|
+
#
|
22
|
+
# @api public
|
23
|
+
class Poseidon::ConsumerGroup
|
24
|
+
DEFAULT_CLAIM_TIMEOUT = 30
|
25
|
+
DEFAULT_LOOP_DELAY = 1
|
26
|
+
|
27
|
+
# Poseidon::ConsumerGroup::Consumer is internally used by Poseidon::ConsumerGroup.
|
28
|
+
# Don't invoke it directly.
|
29
|
+
#
|
30
|
+
# @api private
|
31
|
+
class Consumer < ::Poseidon::PartitionConsumer
|
32
|
+
|
33
|
+
# @attr_reader [Integer] partition consumer partition
|
34
|
+
attr_reader :partition
|
35
|
+
|
36
|
+
# @api private
|
37
|
+
def initialize(group, partition, options = {})
|
38
|
+
broker = group.leader(partition)
|
39
|
+
offset = group.offset(partition)
|
40
|
+
offset = (options[:trail] ? :latest_offset : :earliest_offset) if offset == 0
|
41
|
+
options.delete(:trail)
|
42
|
+
super group.id, broker.host, broker.port, group.topic, partition, offset, options
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
# @param [Integer] pnum number of partitions size
|
48
|
+
# @param [Array<String>] cids consumer IDs
|
49
|
+
# @param [String] id consumer ID
|
50
|
+
# @return [Range, NilClass] selectable range, if any
|
51
|
+
def self.pick(pnum, cids, id)
|
52
|
+
cids = cids.sort
|
53
|
+
pos = cids.index(id)
|
54
|
+
return unless pos && pos < cids.size
|
55
|
+
|
56
|
+
step = pnum.fdiv(cids.size).ceil
|
57
|
+
frst = pos*step
|
58
|
+
last = (pos+1)*step-1
|
59
|
+
last = pnum-1 if last > pnum-1
|
60
|
+
return if last < 0 || last < frst
|
61
|
+
|
62
|
+
(frst..last)
|
63
|
+
end
|
64
|
+
|
65
|
+
# @attr_reader [String] name Group name
|
66
|
+
attr_reader :name
|
67
|
+
|
68
|
+
# @attr_reader [String] topic Topic name
|
69
|
+
attr_reader :topic
|
70
|
+
|
71
|
+
# @attr_reader [Poseidon::BrokerPool] pool Broker pool
|
72
|
+
attr_reader :pool
|
73
|
+
|
74
|
+
# @attr_reader [ZK::Client] zk Zookeeper client
|
75
|
+
attr_reader :zk
|
76
|
+
|
77
|
+
# @attr_reader [Hash] options Consumer options
|
78
|
+
attr_reader :options
|
79
|
+
|
80
|
+
# Create a new consumer group, which processes all partition of the specified topic.
|
81
|
+
#
|
82
|
+
# @param [String] name Group name
|
83
|
+
# @param [Array<String>] brokers A list of known brokers, e.g. ["localhost:9092"]
|
84
|
+
# @param [Array<String>] zookeepers A list of known zookeepers, e.g. ["localhost:2181"]
|
85
|
+
# @param [String] topic Topic to operate on
|
86
|
+
# @param [Hash] options Consumer options
|
87
|
+
# @option options [Integer] :max_bytes Maximum number of bytes to fetch. Default: 1048576 (1MB)
|
88
|
+
# @option options [Integer] :max_wait_ms How long to block until the server sends us data. Default: 100 (100ms)
|
89
|
+
# @option options [Integer] :min_bytes Smallest amount of data the server should send us. Default: 0 (Send us data as soon as it is ready)
|
90
|
+
# @option options [Integer] :claim_timeout Maximum number of seconds to wait for a partition claim. Default: 10
|
91
|
+
# @option options [Integer] :loop_delay Number of seconds to delay the next fetch (in #fetch_loop) if nothing was returned. Default: 1
|
92
|
+
# @option options [Integer] :socket_timeout_ms broker connection wait timeout in ms. Default: 10000
|
93
|
+
# @option options [Boolean] :register Automatically register instance and start consuming. Default: true
|
94
|
+
# @option options [Boolean] :trail Starts reading messages from the latest partitions offsets and skips 'old' messages . Default: false
|
95
|
+
#
|
96
|
+
# @api public
|
97
|
+
def initialize(name, brokers, zookeepers, topic, options = {})
|
98
|
+
@name = name
|
99
|
+
@topic = topic
|
100
|
+
@zk = ::ZK.new(zookeepers.join(","))
|
101
|
+
# Poseidon::BrokerPool doesn't provide default value for this option
|
102
|
+
# Configuring default value like this isn't beautiful, though.. by kssminus
|
103
|
+
options[:socket_timeout_ms] ||= 10000
|
104
|
+
@options = options
|
105
|
+
@consumers = []
|
106
|
+
@pool = ::Poseidon::BrokerPool.new(id, brokers, options[:socket_timeout_ms])
|
107
|
+
@mutex = Mutex.new
|
108
|
+
@registered = false
|
109
|
+
|
110
|
+
register! unless options[:register] == false
|
111
|
+
end
|
112
|
+
|
113
|
+
# @return [String] a globally unique identifier
|
114
|
+
def id
|
115
|
+
@id ||= [name, Poseidon::Cluster.guid].join("-")
|
116
|
+
end
|
117
|
+
|
118
|
+
# @return [Hash<Symbol,String>] registry paths
|
119
|
+
def registries
|
120
|
+
@registries ||= {
|
121
|
+
consumer: "/consumers/#{name}/ids",
|
122
|
+
owner: "/consumers/#{name}/owners/#{topic}",
|
123
|
+
offset: "/consumers/#{name}/offsets/#{topic}",
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
# @return [Poseidon::ClusterMetadata] cluster metadata
|
128
|
+
def metadata
|
129
|
+
@metadata ||= Poseidon::ClusterMetadata.new.tap {|m| m.update pool.fetch_metadata([topic]) }
|
130
|
+
end
|
131
|
+
|
132
|
+
# @return [Poseidon::TopicMetadata] topic metadata
|
133
|
+
def topic_metadata
|
134
|
+
@topic_metadata ||= metadata.metadata_for_topics([topic])[topic]
|
135
|
+
end
|
136
|
+
|
137
|
+
# @return [Boolean] true if registered
|
138
|
+
def registered?
|
139
|
+
@registered
|
140
|
+
end
|
141
|
+
|
142
|
+
# @return [Boolean] true if registration was successful, false if already registered
|
143
|
+
def register!
|
144
|
+
return false if registered?
|
145
|
+
|
146
|
+
# Register instance
|
147
|
+
registries.each do |_, path|
|
148
|
+
zk.mkdir_p(path)
|
149
|
+
end
|
150
|
+
zk.create(consumer_path, "{}", ephemeral: true)
|
151
|
+
zk.register(registries[:consumer]) {|_| rebalance! }
|
152
|
+
|
153
|
+
# Rebalance
|
154
|
+
rebalance!
|
155
|
+
@registered = true
|
156
|
+
end
|
157
|
+
|
158
|
+
# Reloads metadata/broker/partition information
|
159
|
+
def reload
|
160
|
+
@metadata = @topic_metadata = nil
|
161
|
+
metadata
|
162
|
+
self
|
163
|
+
end
|
164
|
+
|
165
|
+
# Closes the consumer group gracefully, only really useful in tests
|
166
|
+
# @api private
|
167
|
+
def close
|
168
|
+
@mutex.synchronize { release_all! }
|
169
|
+
zk.close
|
170
|
+
end
|
171
|
+
|
172
|
+
# @param [Integer] partition
|
173
|
+
# @return [Poseidon::Protocol::Broker] the leader for the given partition
|
174
|
+
def leader(partition)
|
175
|
+
metadata.lead_broker_for_partition(topic, partition)
|
176
|
+
end
|
177
|
+
|
178
|
+
# @param [Integer] partition
|
179
|
+
# @return [Integer] the latest stored offset for the given partition
|
180
|
+
def offset(partition)
|
181
|
+
data, _ = zk.get offset_path(partition), ignore: :no_node
|
182
|
+
data.to_i
|
183
|
+
end
|
184
|
+
|
185
|
+
# Commits the latest offset for a partition
|
186
|
+
# @param [Integer] partition
|
187
|
+
# @param [Integer] offset
|
188
|
+
def commit(partition, offset)
|
189
|
+
zk.set offset_path(partition), offset.to_s
|
190
|
+
rescue ZK::Exceptions::NoNode
|
191
|
+
zk.create offset_path(partition), offset.to_s, ignore: :node_exists
|
192
|
+
end
|
193
|
+
|
194
|
+
# Sorted partitions by broker address (so partitions on the same broker are clustered together)
|
195
|
+
# @return [Array<Poseidon::Protocol::PartitionMetadata>] sorted partitions
|
196
|
+
def partitions
|
197
|
+
return [] unless topic_metadata
|
198
|
+
|
199
|
+
topic_metadata.available_partitions.sort_by do |part|
|
200
|
+
broker = metadata.brokers[part.leader]
|
201
|
+
[broker.host, broker.port].join(":")
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Partitions currently claimed and consumed by this group instance
|
206
|
+
# @return [Array<Integer>] partition IDs
|
207
|
+
def claimed
|
208
|
+
@consumers.map(&:partition).sort
|
209
|
+
end
|
210
|
+
|
211
|
+
# Checks out a single partition consumer. Round-robins between claimed partitions.
|
212
|
+
#
|
213
|
+
# @yield [consumer] The processing block
|
214
|
+
# @yieldparam [Consumer] consumer The consumer instance
|
215
|
+
# @yieldreturn [Boolean] return false to stop auto-commit
|
216
|
+
#
|
217
|
+
# @param [Hash] opts
|
218
|
+
# @option opts [Boolean] :commit Automatically commit consumer offset (default: true)
|
219
|
+
# @return [Boolean] true if a consumer was checked out, false if none could be claimed
|
220
|
+
#
|
221
|
+
# @example
|
222
|
+
#
|
223
|
+
# ok = group.checkout do |consumer|
|
224
|
+
# puts "Checked out consumer for partition #{consumer.partition}"
|
225
|
+
# end
|
226
|
+
# ok # => true if the block was run, false otherwise
|
227
|
+
#
|
228
|
+
# @api public
|
229
|
+
def checkout(opts = {})
|
230
|
+
consumer = nil
|
231
|
+
commit = @mutex.synchronize do
|
232
|
+
consumer = @consumers.shift
|
233
|
+
return false unless consumer
|
234
|
+
|
235
|
+
@consumers.push consumer
|
236
|
+
yield consumer
|
237
|
+
end
|
238
|
+
|
239
|
+
unless opts[:commit] == false || commit == false
|
240
|
+
commit consumer.partition, consumer.offset
|
241
|
+
end
|
242
|
+
true
|
243
|
+
end
|
244
|
+
|
245
|
+
# Convenience method to fetch messages from the broker.
|
246
|
+
# Round-robins between claimed partitions.
|
247
|
+
#
|
248
|
+
# @yield [partition, messages] The processing block
|
249
|
+
# @yieldparam [Integer] partition The source partition
|
250
|
+
# @yieldparam [Array<Message>] messages The fetched messages
|
251
|
+
# @yieldreturn [Boolean] return false to prevent auto-commit
|
252
|
+
#
|
253
|
+
# @param [Hash] opts
|
254
|
+
# @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
|
255
|
+
# @return [Boolean] true if messages were fetched, false if none could be claimed
|
256
|
+
#
|
257
|
+
# @example
|
258
|
+
#
|
259
|
+
# ok = group.fetch do |n, messages|
|
260
|
+
# puts "Fetched #{messages.size} messages for partition #{n}"
|
261
|
+
# end
|
262
|
+
# ok # => true if the block was run, false otherwise
|
263
|
+
#
|
264
|
+
# @api public
|
265
|
+
def fetch(opts = {})
|
266
|
+
checkout(opts) do |consumer|
|
267
|
+
yield consumer.partition, consumer.fetch
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
# Initializes an infinite fetch loop. This method blocks!
|
272
|
+
#
|
273
|
+
# Will wait for `loop_delay` seconds after each failed fetch. This may happen when there is
|
274
|
+
# no new data or when the consumer hasn't claimed any partitions.
|
275
|
+
#
|
276
|
+
# SPECIAL ATTENTION:
|
277
|
+
# When 'breaking out' of the loop, you must do it before processing the messages, as the
|
278
|
+
# the last offset will not be committed. Please see examples below.
|
279
|
+
#
|
280
|
+
# @yield [partition, messages] The processing block
|
281
|
+
# @yieldparam [Integer] partition The source partition, may be -1 if no partitions are claimed
|
282
|
+
# @yieldparam [Array<Message>] messages The fetched messages
|
283
|
+
# @yieldreturn [Boolean] return false to prevent auto-commit
|
284
|
+
#
|
285
|
+
# @param [Hash] opts
|
286
|
+
# @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
|
287
|
+
# @option opts [Boolean] :loop_delay Delay override in seconds after unsuccessful fetch.
|
288
|
+
#
|
289
|
+
# @example
|
290
|
+
#
|
291
|
+
# group.fetch_loop do |n, messages|
|
292
|
+
# puts "Fetched #{messages.size} messages for partition #{n}"
|
293
|
+
# end
|
294
|
+
# puts "Done" # => this code is never reached
|
295
|
+
#
|
296
|
+
# @example Stopping the loop (wrong)
|
297
|
+
#
|
298
|
+
# counts = Hash.new(0)
|
299
|
+
# group.fetch_loop do |n, messages|
|
300
|
+
# counts[n] += messages.size
|
301
|
+
# puts "Status: #{counts.inspect}"
|
302
|
+
# break if counts[0] > 100
|
303
|
+
# end
|
304
|
+
# puts "Result: #{counts.inspect}"
|
305
|
+
# puts "Offset: #{group.offset(0)}"
|
306
|
+
#
|
307
|
+
# # Output:
|
308
|
+
# # Status: {0=>30}
|
309
|
+
# # Status: {0=>60}
|
310
|
+
# # Status: {0=>90}
|
311
|
+
# # Status: {0=>120}
|
312
|
+
# # Result: {0=>120}
|
313
|
+
# # Offset: 90 # => Last offset was not committed!
|
314
|
+
#
|
315
|
+
# @example Stopping the loop (correct)
|
316
|
+
#
|
317
|
+
# counts = Hash.new(0)
|
318
|
+
# group.fetch_loop do |n, messages|
|
319
|
+
# break if counts[0] > 100
|
320
|
+
# counts[n] += messages.size
|
321
|
+
# puts "Status: #{counts.inspect}"
|
322
|
+
# end
|
323
|
+
# puts "Result: #{counts.inspect}"
|
324
|
+
# puts "Offset: #{group.offset(0)}"
|
325
|
+
#
|
326
|
+
# # Output:
|
327
|
+
# # Status: {0=>30}
|
328
|
+
# # Status: {0=>60}
|
329
|
+
# # Status: {0=>90}
|
330
|
+
# # Status: {0=>120}
|
331
|
+
# # Result: {0=>120}
|
332
|
+
# # Offset: 120
|
333
|
+
#
|
334
|
+
# @api public
|
335
|
+
def fetch_loop(opts = {})
|
336
|
+
delay = opts[:loop_delay] || options[:loop_delay] || DEFAULT_LOOP_DELAY
|
337
|
+
|
338
|
+
loop do
|
339
|
+
mp = false
|
340
|
+
ok = fetch(opts) do |n, messages|
|
341
|
+
mp = !messages.empty?
|
342
|
+
yield n, messages
|
343
|
+
end
|
344
|
+
|
345
|
+
# Yield over an empty array if nothing claimed,
|
346
|
+
# to allow user to e.g. break out of the loop
|
347
|
+
unless ok
|
348
|
+
yield -1, []
|
349
|
+
end
|
350
|
+
|
351
|
+
# Sleep if either not claimes or nothing returned
|
352
|
+
unless ok && mp
|
353
|
+
sleep delay
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
protected
|
359
|
+
|
360
|
+
# Rebalance algorithm:
|
361
|
+
#
|
362
|
+
# * let CG be all consumers in the same group that consume topic T
|
363
|
+
# * let PT be all partitions producing topic T
|
364
|
+
# * sort CG
|
365
|
+
# * sort PT (so partitions on the same broker are clustered together)
|
366
|
+
# * let POS be our index position in CG and let N = size(PT)/size(CG)
|
367
|
+
# * assign partitions from POS*N to (POS+1)*N-1
|
368
|
+
def rebalance!
|
369
|
+
return if @pending
|
370
|
+
|
371
|
+
@pending = true
|
372
|
+
@mutex.synchronize do
|
373
|
+
@pending = nil
|
374
|
+
|
375
|
+
release_all!
|
376
|
+
reload
|
377
|
+
|
378
|
+
ids = zk.children(registries[:consumer], watch: true)
|
379
|
+
pms = partitions
|
380
|
+
rng = self.class.pick(pms.size, ids, id)
|
381
|
+
|
382
|
+
pms[rng].each do |pm|
|
383
|
+
if @pending
|
384
|
+
release_all!
|
385
|
+
break
|
386
|
+
end
|
387
|
+
|
388
|
+
consumer = claim!(pm.id)
|
389
|
+
@consumers.push(consumer) if consumer
|
390
|
+
end if rng
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
# Release all consumer claims
|
395
|
+
def release_all!
|
396
|
+
@consumers.each {|c| release!(c.partition) }
|
397
|
+
@consumers.clear
|
398
|
+
end
|
399
|
+
|
400
|
+
private
|
401
|
+
|
402
|
+
# Claim the ownership of the partition for this consumer
|
403
|
+
# @raise [Timeout::Error]
|
404
|
+
def claim!(partition)
|
405
|
+
path = claim_path(partition)
|
406
|
+
Timeout.timeout options[:claim_timout] || DEFAULT_CLAIM_TIMEOUT do
|
407
|
+
while zk.create(path, id, ephemeral: true, ignore: :node_exists).nil?
|
408
|
+
return if @pending
|
409
|
+
sleep(0.1)
|
410
|
+
end
|
411
|
+
end
|
412
|
+
Consumer.new self, partition, options.dup
|
413
|
+
end
|
414
|
+
|
415
|
+
# Release ownership of the partition
|
416
|
+
def release!(partition)
|
417
|
+
zk.delete claim_path(partition), ignore: :no_node
|
418
|
+
end
|
419
|
+
|
420
|
+
# @return [String] zookeeper ownership claim path
|
421
|
+
def claim_path(partition)
|
422
|
+
"#{registries[:owner]}/#{partition}"
|
423
|
+
end
|
424
|
+
|
425
|
+
# @return [String] zookeeper offset storage path
|
426
|
+
def offset_path(partition)
|
427
|
+
"#{registries[:offset]}/#{partition}"
|
428
|
+
end
|
429
|
+
|
430
|
+
# @return [String] zookeeper consumer registration path
|
431
|
+
def consumer_path
|
432
|
+
"#{registries[:consumer]}/#{id}"
|
433
|
+
end
|
434
|
+
|
435
|
+
end
|