promiscuous-poseidon_cluster 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +8 -0
- data/.travis.yml +7 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +77 -0
- data/README.md +95 -0
- data/Rakefile +21 -0
- data/examples/consumer_group.rb +33 -0
- data/lib/poseidon/cluster.rb +28 -0
- data/lib/poseidon/consumer_group.rb +435 -0
- data/lib/poseidon_cluster.rb +1 -0
- data/poseidon_cluster.gemspec +28 -0
- data/scenario/.gitignore +1 -0
- data/scenario/consumer.rb +17 -0
- data/scenario/producer.rb +23 -0
- data/scenario/run.rb +35 -0
- data/scenario/scenario.rb +134 -0
- data/spec/lib/poseidon/cluster_spec.rb +19 -0
- data/spec/lib/poseidon/consumer_group_spec.rb +286 -0
- data/spec/spec_helper.rb +14 -0
- metadata +183 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f7653a5f9ee3e08c3ccda22787d2a9da5b752b86
|
4
|
+
data.tar.gz: 6447aadb67ad485dc77e79ea91af4d3b99bd49e1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 203099eb7d08fa1f80dd197c5e4094352291dc197a4e8137efde19450a4c4224e2cdc23bfdc9dd6789607c0544c326ce329c3c81bbd1d7a30f557f30ae0deddb
|
7
|
+
data.tar.gz: d78680cc99b1dd11fb11183f00fc4186835e8515ade5ae933b10d0a39a55f7bb0f5a99afb2d68e371198659402a20b421b6e9b80f3700006b941e7462a094889
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
poseidon_cluster (0.3.0)
|
5
|
+
poseidon (>= 0.0.5.pre1)
|
6
|
+
zk
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
coveralls (0.7.0)
|
12
|
+
multi_json (~> 1.3)
|
13
|
+
rest-client
|
14
|
+
simplecov (>= 0.7)
|
15
|
+
term-ansicolor
|
16
|
+
thor
|
17
|
+
diff-lcs (1.2.5)
|
18
|
+
docile (1.1.3)
|
19
|
+
little-plugger (1.1.3)
|
20
|
+
logging (1.8.2)
|
21
|
+
little-plugger (>= 1.1.3)
|
22
|
+
multi_json (>= 1.8.4)
|
23
|
+
mime-types (2.3)
|
24
|
+
multi_json (1.10.1)
|
25
|
+
poseidon (0.0.5.pre1)
|
26
|
+
rake (10.3.2)
|
27
|
+
rest-client (1.6.7)
|
28
|
+
mime-types (>= 1.16)
|
29
|
+
rspec (3.0.0)
|
30
|
+
rspec-core (~> 3.0.0)
|
31
|
+
rspec-expectations (~> 3.0.0)
|
32
|
+
rspec-mocks (~> 3.0.0)
|
33
|
+
rspec-core (3.0.0)
|
34
|
+
rspec-support (~> 3.0.0)
|
35
|
+
rspec-expectations (3.0.0)
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
37
|
+
rspec-support (~> 3.0.0)
|
38
|
+
rspec-its (1.0.1)
|
39
|
+
rspec-core (>= 2.99.0.beta1)
|
40
|
+
rspec-expectations (>= 2.99.0.beta1)
|
41
|
+
rspec-mocks (3.0.0)
|
42
|
+
rspec-support (~> 3.0.0)
|
43
|
+
rspec-support (3.0.0)
|
44
|
+
simplecov (0.8.2)
|
45
|
+
docile (~> 1.1.0)
|
46
|
+
multi_json
|
47
|
+
simplecov-html (~> 0.8.0)
|
48
|
+
simplecov-html (0.8.0)
|
49
|
+
slyphon-log4j (1.2.15)
|
50
|
+
slyphon-zookeeper_jar (3.3.5-java)
|
51
|
+
term-ansicolor (1.3.0)
|
52
|
+
tins (~> 1.0)
|
53
|
+
thor (0.19.1)
|
54
|
+
tins (1.3.0)
|
55
|
+
yard (0.8.7.4)
|
56
|
+
zk (1.9.4)
|
57
|
+
logging (~> 1.8.2)
|
58
|
+
zookeeper (~> 1.4.0)
|
59
|
+
zookeeper (1.4.9)
|
60
|
+
zookeeper (1.4.9-java)
|
61
|
+
slyphon-log4j (= 1.2.15)
|
62
|
+
slyphon-zookeeper_jar (= 3.3.5)
|
63
|
+
|
64
|
+
PLATFORMS
|
65
|
+
java
|
66
|
+
ruby
|
67
|
+
|
68
|
+
DEPENDENCIES
|
69
|
+
bundler
|
70
|
+
coveralls
|
71
|
+
poseidon_cluster!
|
72
|
+
rake
|
73
|
+
rspec
|
74
|
+
rspec-its
|
75
|
+
slyphon-log4j (= 1.2.15)
|
76
|
+
slyphon-zookeeper_jar (= 3.3.5)
|
77
|
+
yard
|
data/README.md
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
# Poseidon Cluster [](https://travis-ci.org/bsm/poseidon_cluster) [](https://coveralls.io/r/bsm/poseidon_cluster?branch=master)
|
2
|
+
|
3
|
+
Poseidon Cluster is a cluster extension of the excellent [Poseidon](http://github.com/bpot/poseidon) Ruby client for Kafka 0.8+. It implements the distribution concept of self-rebalancing *Consumer Groups* and supports the consumption of a single topic from multiple instances.
|
4
|
+
|
5
|
+
Consumer group instances share a common group name, and each message published to a topic is delivered to one instance within each subscribing consumer group. Consumer instances can be in separate processes or on separate machines.
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
Launch a consumer group:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
require 'poseidon_cluster'
|
13
|
+
|
14
|
+
consumer = Poseidon::ConsumerGroup.new(
|
15
|
+
"my-group", # Group name
|
16
|
+
["kafka1.host:9092", "kafka2.host:9092"], # Kafka brokers
|
17
|
+
["kafka1.host:2181", "kafka2.host:2181"], # Zookeepers hosts
|
18
|
+
"my-topic") # Topic name
|
19
|
+
|
20
|
+
consumer.partitions # => [0, 1, 2, 3] - all partitions of 'my-topic'
|
21
|
+
consumer.claimed # => [0, 1] - partitions this instance has claimed
|
22
|
+
```
|
23
|
+
|
24
|
+
Fetch a bulk of messages, auto-commit the offset:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
consumer.fetch do |partition, bulk|
|
28
|
+
bulk.each do |m|
|
29
|
+
puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
Get the offset for a partition:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
consumer.offset(0) # => 320 - current offset from partition 0
|
38
|
+
```
|
39
|
+
|
40
|
+
Fetch more messages, commit manually:
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
consumer.fetch commit: false do |partition, bulk|
|
44
|
+
bulk.each do |m|
|
45
|
+
puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
|
46
|
+
end
|
47
|
+
|
48
|
+
consumer.commit partition, bulk.last.offset+1 unless bulk.empty?
|
49
|
+
end
|
50
|
+
```
|
51
|
+
|
52
|
+
Initiate a fetch-loop, consume indefinitely:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
consumer.fetch_loop do |partition, bulk|
|
56
|
+
bulk.each do |m|
|
57
|
+
puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
For more details and information, please see the [Poseidon::ConsumerGroup](http://rubydoc.info/github/bsm/poseidon_cluster/Poseidon/ConsumerGroup) documentation and the [Examples](https://github.com/bsm/poseidon_cluster/tree/master/examples).
|
63
|
+
|
64
|
+
## Running Tests
|
65
|
+
|
66
|
+
The test suite will automatically download, configure and run Kafka locally, you only need a JRE. Run the suite via:
|
67
|
+
|
68
|
+
```bash
|
69
|
+
bundle exec rake spec
|
70
|
+
```
|
71
|
+
|
72
|
+
## Licence
|
73
|
+
|
74
|
+
```
|
75
|
+
Copyright (c) 2014 Black Square Media
|
76
|
+
|
77
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
78
|
+
a copy of this software and associated documentation files (the
|
79
|
+
"Software"), to deal in the Software without restriction, including
|
80
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
81
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
82
|
+
permit persons to whom the Software is furnished to do so, subject to
|
83
|
+
the following conditions:
|
84
|
+
|
85
|
+
The above copyright notice and this permission notice shall be
|
86
|
+
included in all copies or substantial portions of the Software.
|
87
|
+
|
88
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
89
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
90
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
91
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
92
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
93
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
94
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
95
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
require 'yard'
|
7
|
+
require 'yard/rake/yardoc_task'
|
8
|
+
YARD::Rake::YardocTask.new
|
9
|
+
|
10
|
+
require 'coveralls/rake/task'
|
11
|
+
Coveralls::RakeTask.new
|
12
|
+
namespace :spec do
|
13
|
+
task coveralls: [:spec, 'coveralls:push']
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Run full integration test scenario"
|
17
|
+
task :scenario do
|
18
|
+
load File.expand_path("../scenario/run.rb", __FILE__)
|
19
|
+
end
|
20
|
+
|
21
|
+
task default: :spec
|
@@ -0,0 +1,33 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
PLEASE NOTE
|
4
|
+
|
5
|
+
This example uses threads, but you could equally use fork or run your
|
6
|
+
consumer groups from completely separate process and from multiple machines.
|
7
|
+
|
8
|
+
=end
|
9
|
+
require 'poseidon_cluster'
|
10
|
+
|
11
|
+
# Create a consumer group
|
12
|
+
group1 = Poseidon::ConsumerGroup.new "my-group", ["host1:9092", "host2:9092"], ["host1:2181", "host2:2181"], "my-topic"
|
13
|
+
|
14
|
+
# Start consuming "my-topic" in a background thread
|
15
|
+
thread1 = Thread.new do
|
16
|
+
group1.fetch_loop do |partition, messages|
|
17
|
+
puts "Consumer #1 fetched #{messages.size} from #{partition}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Create a second consumer group
|
22
|
+
group2 = Poseidon::ConsumerGroup.new "my-group", ["host1:9092", "host2:9092"], ["host1:2181", "host2:2181"], "my-topic"
|
23
|
+
|
24
|
+
# Now consuming all partitions of "my-topic" in parallel
|
25
|
+
thread2 = Thread.new do
|
26
|
+
group2.fetch_loop do |partition, messages|
|
27
|
+
puts "Consumer #2 fetched #{messages.size} from #{partition}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Join threads, loop forever
|
32
|
+
[thread1, thread2].each(&:join)
|
33
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'timeout'
|
3
|
+
require 'zk'
|
4
|
+
require 'poseidon'
|
5
|
+
require 'thread'
|
6
|
+
|
7
|
+
module Poseidon::Cluster
|
8
|
+
MAX_INT32 = 0x7fffffff
|
9
|
+
@@sem = Mutex.new
|
10
|
+
@@inc = 0
|
11
|
+
|
12
|
+
# @return [Integer] an incremented number
|
13
|
+
# @api private
|
14
|
+
def self.inc!
|
15
|
+
@@sem.synchronize { @@inc += 1; @@inc = 1 if @@inc > MAX_INT32; @@inc }
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [String] an globally unique identifier
|
19
|
+
# @api private
|
20
|
+
def self.guid
|
21
|
+
[::Socket.gethostname, ::Process.pid, ::Time.now.to_i, inc!].join("-")
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
%w|consumer_group|.each do |name|
|
27
|
+
require "poseidon/#{name}"
|
28
|
+
end
|
@@ -0,0 +1,435 @@
|
|
1
|
+
# A ConsumerGroup operates on all partitions of a single topic. The goal is to ensure
|
2
|
+
# each topic message is consumed only once, no matter of the number of consumer instances within
|
3
|
+
# a cluster, as described in: http://kafka.apache.org/documentation.html#distributionimpl.
|
4
|
+
#
|
5
|
+
# The ConsumerGroup internally creates multiple PartitionConsumer instances. It uses Zookkeper
|
6
|
+
# and follows a simple consumer rebalancing algorithms which allows all the consumers
|
7
|
+
# in a group to come into consensus on which consumer is consuming which partitions. Each
|
8
|
+
# ConsumerGroup can 'claim' 0-n partitions and will consume their messages until another
|
9
|
+
# ConsumerGroup instance joins or leaves the cluster.
|
10
|
+
#
|
11
|
+
# Please note: ConsumerGroups themselves don't implement any threading or concurrency.
|
12
|
+
# When consuming messages, they simply round-robin across the claimed partitions. If you wish
|
13
|
+
# to parallelize consumption simply create multiple ConsumerGroups instances. The built-in
|
14
|
+
# concensus algorithm will automatically rebalance the available partitions between them and you
|
15
|
+
# can then decide for yourself if you want to run them in multiple thread or processes, ideally
|
16
|
+
# on multiple boxes.
|
17
|
+
#
|
18
|
+
# Unlike stated in the Kafka documentation, consumer rebalancing is *only* triggered on each
|
19
|
+
# addition or removal of consumers within the same group, while the addition of broker nodes
|
20
|
+
# and/or partition *does currently not trigger* a rebalancing cycle.
|
21
|
+
#
|
22
|
+
# @api public
|
23
|
+
class Poseidon::ConsumerGroup
|
24
|
+
DEFAULT_CLAIM_TIMEOUT = 30
|
25
|
+
DEFAULT_LOOP_DELAY = 1
|
26
|
+
|
27
|
+
# Poseidon::ConsumerGroup::Consumer is internally used by Poseidon::ConsumerGroup.
|
28
|
+
# Don't invoke it directly.
|
29
|
+
#
|
30
|
+
# @api private
|
31
|
+
class Consumer < ::Poseidon::PartitionConsumer
|
32
|
+
|
33
|
+
# @attr_reader [Integer] partition consumer partition
|
34
|
+
attr_reader :partition
|
35
|
+
|
36
|
+
# @api private
|
37
|
+
def initialize(group, partition, options = {})
|
38
|
+
broker = group.leader(partition)
|
39
|
+
offset = group.offset(partition)
|
40
|
+
offset = (options[:trail] ? :latest_offset : :earliest_offset) if offset == 0
|
41
|
+
options.delete(:trail)
|
42
|
+
super group.id, broker.host, broker.port, group.topic, partition, offset, options
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
# @param [Integer] pnum number of partitions size
|
48
|
+
# @param [Array<String>] cids consumer IDs
|
49
|
+
# @param [String] id consumer ID
|
50
|
+
# @return [Range, NilClass] selectable range, if any
|
51
|
+
def self.pick(pnum, cids, id)
|
52
|
+
cids = cids.sort
|
53
|
+
pos = cids.index(id)
|
54
|
+
return unless pos && pos < cids.size
|
55
|
+
|
56
|
+
step = pnum.fdiv(cids.size).ceil
|
57
|
+
frst = pos*step
|
58
|
+
last = (pos+1)*step-1
|
59
|
+
last = pnum-1 if last > pnum-1
|
60
|
+
return if last < 0 || last < frst
|
61
|
+
|
62
|
+
(frst..last)
|
63
|
+
end
|
64
|
+
|
65
|
+
# @attr_reader [String] name Group name
|
66
|
+
attr_reader :name
|
67
|
+
|
68
|
+
# @attr_reader [String] topic Topic name
|
69
|
+
attr_reader :topic
|
70
|
+
|
71
|
+
# @attr_reader [Poseidon::BrokerPool] pool Broker pool
|
72
|
+
attr_reader :pool
|
73
|
+
|
74
|
+
# @attr_reader [ZK::Client] zk Zookeeper client
|
75
|
+
attr_reader :zk
|
76
|
+
|
77
|
+
# @attr_reader [Hash] options Consumer options
|
78
|
+
attr_reader :options
|
79
|
+
|
80
|
+
# Create a new consumer group, which processes all partition of the specified topic.
|
81
|
+
#
|
82
|
+
# @param [String] name Group name
|
83
|
+
# @param [Array<String>] brokers A list of known brokers, e.g. ["localhost:9092"]
|
84
|
+
# @param [Array<String>] zookeepers A list of known zookeepers, e.g. ["localhost:2181"]
|
85
|
+
# @param [String] topic Topic to operate on
|
86
|
+
# @param [Hash] options Consumer options
|
87
|
+
# @option options [Integer] :max_bytes Maximum number of bytes to fetch. Default: 1048576 (1MB)
|
88
|
+
# @option options [Integer] :max_wait_ms How long to block until the server sends us data. Default: 100 (100ms)
|
89
|
+
# @option options [Integer] :min_bytes Smallest amount of data the server should send us. Default: 0 (Send us data as soon as it is ready)
|
90
|
+
# @option options [Integer] :claim_timeout Maximum number of seconds to wait for a partition claim. Default: 10
|
91
|
+
# @option options [Integer] :loop_delay Number of seconds to delay the next fetch (in #fetch_loop) if nothing was returned. Default: 1
|
92
|
+
# @option options [Integer] :socket_timeout_ms broker connection wait timeout in ms. Default: 10000
|
93
|
+
# @option options [Boolean] :register Automatically register instance and start consuming. Default: true
|
94
|
+
# @option options [Boolean] :trail Starts reading messages from the latest partitions offsets and skips 'old' messages . Default: false
|
95
|
+
#
|
96
|
+
# @api public
|
97
|
+
def initialize(name, brokers, zookeepers, topic, options = {})
|
98
|
+
@name = name
|
99
|
+
@topic = topic
|
100
|
+
@zk = ::ZK.new(zookeepers.join(","))
|
101
|
+
# Poseidon::BrokerPool doesn't provide default value for this option
|
102
|
+
# Configuring default value like this isn't beautiful, though.. by kssminus
|
103
|
+
options[:socket_timeout_ms] ||= 10000
|
104
|
+
@options = options
|
105
|
+
@consumers = []
|
106
|
+
@pool = ::Poseidon::BrokerPool.new(id, brokers, options[:socket_timeout_ms])
|
107
|
+
@mutex = Mutex.new
|
108
|
+
@registered = false
|
109
|
+
|
110
|
+
register! unless options[:register] == false
|
111
|
+
end
|
112
|
+
|
113
|
+
# @return [String] a globally unique identifier
|
114
|
+
def id
|
115
|
+
@id ||= [name, Poseidon::Cluster.guid].join("-")
|
116
|
+
end
|
117
|
+
|
118
|
+
# @return [Hash<Symbol,String>] registry paths
|
119
|
+
def registries
|
120
|
+
@registries ||= {
|
121
|
+
consumer: "/consumers/#{name}/ids",
|
122
|
+
owner: "/consumers/#{name}/owners/#{topic}",
|
123
|
+
offset: "/consumers/#{name}/offsets/#{topic}",
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
# @return [Poseidon::ClusterMetadata] cluster metadata
|
128
|
+
def metadata
|
129
|
+
@metadata ||= Poseidon::ClusterMetadata.new.tap {|m| m.update pool.fetch_metadata([topic]) }
|
130
|
+
end
|
131
|
+
|
132
|
+
# @return [Poseidon::TopicMetadata] topic metadata
|
133
|
+
def topic_metadata
|
134
|
+
@topic_metadata ||= metadata.metadata_for_topics([topic])[topic]
|
135
|
+
end
|
136
|
+
|
137
|
+
# @return [Boolean] true if registered
|
138
|
+
def registered?
|
139
|
+
@registered
|
140
|
+
end
|
141
|
+
|
142
|
+
# @return [Boolean] true if registration was successful, false if already registered
|
143
|
+
def register!
|
144
|
+
return false if registered?
|
145
|
+
|
146
|
+
# Register instance
|
147
|
+
registries.each do |_, path|
|
148
|
+
zk.mkdir_p(path)
|
149
|
+
end
|
150
|
+
zk.create(consumer_path, "{}", ephemeral: true)
|
151
|
+
zk.register(registries[:consumer]) {|_| rebalance! }
|
152
|
+
|
153
|
+
# Rebalance
|
154
|
+
rebalance!
|
155
|
+
@registered = true
|
156
|
+
end
|
157
|
+
|
158
|
+
# Reloads metadata/broker/partition information
|
159
|
+
def reload
|
160
|
+
@metadata = @topic_metadata = nil
|
161
|
+
metadata
|
162
|
+
self
|
163
|
+
end
|
164
|
+
|
165
|
+
# Closes the consumer group gracefully, only really useful in tests
|
166
|
+
# @api private
|
167
|
+
def close
|
168
|
+
@mutex.synchronize { release_all! }
|
169
|
+
zk.close
|
170
|
+
end
|
171
|
+
|
172
|
+
# @param [Integer] partition
|
173
|
+
# @return [Poseidon::Protocol::Broker] the leader for the given partition
|
174
|
+
def leader(partition)
|
175
|
+
metadata.lead_broker_for_partition(topic, partition)
|
176
|
+
end
|
177
|
+
|
178
|
+
# @param [Integer] partition
|
179
|
+
# @return [Integer] the latest stored offset for the given partition
|
180
|
+
def offset(partition)
|
181
|
+
data, _ = zk.get offset_path(partition), ignore: :no_node
|
182
|
+
data.to_i
|
183
|
+
end
|
184
|
+
|
185
|
+
# Commits the latest offset for a partition
|
186
|
+
# @param [Integer] partition
|
187
|
+
# @param [Integer] offset
|
188
|
+
def commit(partition, offset)
|
189
|
+
zk.set offset_path(partition), offset.to_s
|
190
|
+
rescue ZK::Exceptions::NoNode
|
191
|
+
zk.create offset_path(partition), offset.to_s, ignore: :node_exists
|
192
|
+
end
|
193
|
+
|
194
|
+
# Sorted partitions by broker address (so partitions on the same broker are clustered together)
|
195
|
+
# @return [Array<Poseidon::Protocol::PartitionMetadata>] sorted partitions
|
196
|
+
def partitions
|
197
|
+
return [] unless topic_metadata
|
198
|
+
|
199
|
+
topic_metadata.available_partitions.sort_by do |part|
|
200
|
+
broker = metadata.brokers[part.leader]
|
201
|
+
[broker.host, broker.port].join(":")
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Partitions currently claimed and consumed by this group instance
|
206
|
+
# @return [Array<Integer>] partition IDs
|
207
|
+
def claimed
|
208
|
+
@consumers.map(&:partition).sort
|
209
|
+
end
|
210
|
+
|
211
|
+
# Checks out a single partition consumer. Round-robins between claimed partitions.
|
212
|
+
#
|
213
|
+
# @yield [consumer] The processing block
|
214
|
+
# @yieldparam [Consumer] consumer The consumer instance
|
215
|
+
# @yieldreturn [Boolean] return false to stop auto-commit
|
216
|
+
#
|
217
|
+
# @param [Hash] opts
|
218
|
+
# @option opts [Boolean] :commit Automatically commit consumer offset (default: true)
|
219
|
+
# @return [Boolean] true if a consumer was checked out, false if none could be claimed
|
220
|
+
#
|
221
|
+
# @example
|
222
|
+
#
|
223
|
+
# ok = group.checkout do |consumer|
|
224
|
+
# puts "Checked out consumer for partition #{consumer.partition}"
|
225
|
+
# end
|
226
|
+
# ok # => true if the block was run, false otherwise
|
227
|
+
#
|
228
|
+
# @api public
|
229
|
+
def checkout(opts = {})
|
230
|
+
consumer = nil
|
231
|
+
commit = @mutex.synchronize do
|
232
|
+
consumer = @consumers.shift
|
233
|
+
return false unless consumer
|
234
|
+
|
235
|
+
@consumers.push consumer
|
236
|
+
yield consumer
|
237
|
+
end
|
238
|
+
|
239
|
+
unless opts[:commit] == false || commit == false
|
240
|
+
commit consumer.partition, consumer.offset
|
241
|
+
end
|
242
|
+
true
|
243
|
+
end
|
244
|
+
|
245
|
+
# Convenience method to fetch messages from the broker.
|
246
|
+
# Round-robins between claimed partitions.
|
247
|
+
#
|
248
|
+
# @yield [partition, messages] The processing block
|
249
|
+
# @yieldparam [Integer] partition The source partition
|
250
|
+
# @yieldparam [Array<Message>] messages The fetched messages
|
251
|
+
# @yieldreturn [Boolean] return false to prevent auto-commit
|
252
|
+
#
|
253
|
+
# @param [Hash] opts
|
254
|
+
# @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
|
255
|
+
# @return [Boolean] true if messages were fetched, false if none could be claimed
|
256
|
+
#
|
257
|
+
# @example
|
258
|
+
#
|
259
|
+
# ok = group.fetch do |n, messages|
|
260
|
+
# puts "Fetched #{messages.size} messages for partition #{n}"
|
261
|
+
# end
|
262
|
+
# ok # => true if the block was run, false otherwise
|
263
|
+
#
|
264
|
+
# @api public
|
265
|
+
def fetch(opts = {})
|
266
|
+
checkout(opts) do |consumer|
|
267
|
+
yield consumer.partition, consumer.fetch
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
# Initializes an infinite fetch loop. This method blocks!
|
272
|
+
#
|
273
|
+
# Will wait for `loop_delay` seconds after each failed fetch. This may happen when there is
|
274
|
+
# no new data or when the consumer hasn't claimed any partitions.
|
275
|
+
#
|
276
|
+
# SPECIAL ATTENTION:
|
277
|
+
# When 'breaking out' of the loop, you must do it before processing the messages, as the
|
278
|
+
# the last offset will not be committed. Please see examples below.
|
279
|
+
#
|
280
|
+
# @yield [partition, messages] The processing block
|
281
|
+
# @yieldparam [Integer] partition The source partition, may be -1 if no partitions are claimed
|
282
|
+
# @yieldparam [Array<Message>] messages The fetched messages
|
283
|
+
# @yieldreturn [Boolean] return false to prevent auto-commit
|
284
|
+
#
|
285
|
+
# @param [Hash] opts
|
286
|
+
# @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
|
287
|
+
# @option opts [Boolean] :loop_delay Delay override in seconds after unsuccessful fetch.
|
288
|
+
#
|
289
|
+
# @example
|
290
|
+
#
|
291
|
+
# group.fetch_loop do |n, messages|
|
292
|
+
# puts "Fetched #{messages.size} messages for partition #{n}"
|
293
|
+
# end
|
294
|
+
# puts "Done" # => this code is never reached
|
295
|
+
#
|
296
|
+
# @example Stopping the loop (wrong)
|
297
|
+
#
|
298
|
+
# counts = Hash.new(0)
|
299
|
+
# group.fetch_loop do |n, messages|
|
300
|
+
# counts[n] += messages.size
|
301
|
+
# puts "Status: #{counts.inspect}"
|
302
|
+
# break if counts[0] > 100
|
303
|
+
# end
|
304
|
+
# puts "Result: #{counts.inspect}"
|
305
|
+
# puts "Offset: #{group.offset(0)}"
|
306
|
+
#
|
307
|
+
# # Output:
|
308
|
+
# # Status: {0=>30}
|
309
|
+
# # Status: {0=>60}
|
310
|
+
# # Status: {0=>90}
|
311
|
+
# # Status: {0=>120}
|
312
|
+
# # Result: {0=>120}
|
313
|
+
# # Offset: 90 # => Last offset was not committed!
|
314
|
+
#
|
315
|
+
# @example Stopping the loop (correct)
|
316
|
+
#
|
317
|
+
# counts = Hash.new(0)
|
318
|
+
# group.fetch_loop do |n, messages|
|
319
|
+
# break if counts[0] > 100
|
320
|
+
# counts[n] += messages.size
|
321
|
+
# puts "Status: #{counts.inspect}"
|
322
|
+
# end
|
323
|
+
# puts "Result: #{counts.inspect}"
|
324
|
+
# puts "Offset: #{group.offset(0)}"
|
325
|
+
#
|
326
|
+
# # Output:
|
327
|
+
# # Status: {0=>30}
|
328
|
+
# # Status: {0=>60}
|
329
|
+
# # Status: {0=>90}
|
330
|
+
# # Status: {0=>120}
|
331
|
+
# # Result: {0=>120}
|
332
|
+
# # Offset: 120
|
333
|
+
#
|
334
|
+
# @api public
|
335
|
+
def fetch_loop(opts = {})
|
336
|
+
delay = opts[:loop_delay] || options[:loop_delay] || DEFAULT_LOOP_DELAY
|
337
|
+
|
338
|
+
loop do
|
339
|
+
mp = false
|
340
|
+
ok = fetch(opts) do |n, messages|
|
341
|
+
mp = !messages.empty?
|
342
|
+
yield n, messages
|
343
|
+
end
|
344
|
+
|
345
|
+
# Yield over an empty array if nothing claimed,
|
346
|
+
# to allow user to e.g. break out of the loop
|
347
|
+
unless ok
|
348
|
+
yield -1, []
|
349
|
+
end
|
350
|
+
|
351
|
+
# Sleep if either not claimes or nothing returned
|
352
|
+
unless ok && mp
|
353
|
+
sleep delay
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
protected
|
359
|
+
|
360
|
+
# Rebalance algorithm:
|
361
|
+
#
|
362
|
+
# * let CG be all consumers in the same group that consume topic T
|
363
|
+
# * let PT be all partitions producing topic T
|
364
|
+
# * sort CG
|
365
|
+
# * sort PT (so partitions on the same broker are clustered together)
|
366
|
+
# * let POS be our index position in CG and let N = size(PT)/size(CG)
|
367
|
+
# * assign partitions from POS*N to (POS+1)*N-1
|
368
|
+
def rebalance!
|
369
|
+
return if @pending
|
370
|
+
|
371
|
+
@pending = true
|
372
|
+
@mutex.synchronize do
|
373
|
+
@pending = nil
|
374
|
+
|
375
|
+
release_all!
|
376
|
+
reload
|
377
|
+
|
378
|
+
ids = zk.children(registries[:consumer], watch: true)
|
379
|
+
pms = partitions
|
380
|
+
rng = self.class.pick(pms.size, ids, id)
|
381
|
+
|
382
|
+
pms[rng].each do |pm|
|
383
|
+
if @pending
|
384
|
+
release_all!
|
385
|
+
break
|
386
|
+
end
|
387
|
+
|
388
|
+
consumer = claim!(pm.id)
|
389
|
+
@consumers.push(consumer) if consumer
|
390
|
+
end if rng
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
# Release all consumer claims
|
395
|
+
def release_all!
|
396
|
+
@consumers.each {|c| release!(c.partition) }
|
397
|
+
@consumers.clear
|
398
|
+
end
|
399
|
+
|
400
|
+
private
|
401
|
+
|
402
|
+
# Claim the ownership of the partition for this consumer
|
403
|
+
# @raise [Timeout::Error]
|
404
|
+
def claim!(partition)
|
405
|
+
path = claim_path(partition)
|
406
|
+
Timeout.timeout options[:claim_timout] || DEFAULT_CLAIM_TIMEOUT do
|
407
|
+
while zk.create(path, id, ephemeral: true, ignore: :node_exists).nil?
|
408
|
+
return if @pending
|
409
|
+
sleep(0.1)
|
410
|
+
end
|
411
|
+
end
|
412
|
+
Consumer.new self, partition, options.dup
|
413
|
+
end
|
414
|
+
|
415
|
+
# Release ownership of the partition
|
416
|
+
def release!(partition)
|
417
|
+
zk.delete claim_path(partition), ignore: :no_node
|
418
|
+
end
|
419
|
+
|
420
|
+
# @return [String] zookeeper ownership claim path
|
421
|
+
def claim_path(partition)
|
422
|
+
"#{registries[:owner]}/#{partition}"
|
423
|
+
end
|
424
|
+
|
425
|
+
# @return [String] zookeeper offset storage path
|
426
|
+
def offset_path(partition)
|
427
|
+
"#{registries[:offset]}/#{partition}"
|
428
|
+
end
|
429
|
+
|
430
|
+
# @return [String] zookeeper consumer registration path
|
431
|
+
def consumer_path
|
432
|
+
"#{registries[:consumer]}/#{id}"
|
433
|
+
end
|
434
|
+
|
435
|
+
end
|