poseidon_cluster 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bd4a2f60912c1cf42f542630df36bff3fda31f36
4
+ data.tar.gz: 892927b055dca81c8805ef204cfadab0be1b4d88
5
+ SHA512:
6
+ metadata.gz: 72d72af7e3c642c93cad4d98c67ae628895a8344497466d7299a5a763c97d95754bb2c7750bd5666d39d79584ba989ec07d57c5767a1a61ff46775778bde8fd0
7
+ data.tar.gz: 5637f76ec6318cb52b2a353f02324f2c7f1c7e2fd57213ad1c39d6e87f724471a6932a2fc1e9253a21ced77fb01f1273e3fe61ce50ac5807d0727ab5b886cd17
@@ -0,0 +1,5 @@
1
+ logs/
2
+ kafka*/
3
+ doc/
4
+ .yardoc/
5
+ .bundle/
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.1.0
5
+ - 2.0.0
6
+ - 1.9.3
7
+ env:
8
+ - SLOW=1
9
+ - SLOW=0
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+ gem "poseidon", git: "https://github.com/dim/poseidon.git"
5
+ gem "snappy"
@@ -0,0 +1,47 @@
1
+ GIT
2
+ remote: https://github.com/dim/poseidon.git
3
+ revision: f853e8f07a6f2ff4a520d3f7e1dac030453a18ed
4
+ specs:
5
+ poseidon (0.0.4)
6
+
7
+ PATH
8
+ remote: .
9
+ specs:
10
+ poseidon_cluster (0.0.2)
11
+ poseidon
12
+ zk
13
+
14
+ GEM
15
+ remote: https://rubygems.org/
16
+ specs:
17
+ diff-lcs (1.2.5)
18
+ little-plugger (1.1.3)
19
+ logging (1.7.2)
20
+ little-plugger (>= 1.1.3)
21
+ rake (10.1.1)
22
+ rspec (2.14.1)
23
+ rspec-core (~> 2.14.0)
24
+ rspec-expectations (~> 2.14.0)
25
+ rspec-mocks (~> 2.14.0)
26
+ rspec-core (2.14.7)
27
+ rspec-expectations (2.14.4)
28
+ diff-lcs (>= 1.1.3, < 2.0)
29
+ rspec-mocks (2.14.4)
30
+ snappy (0.0.10)
31
+ yard (0.8.7.3)
32
+ zk (1.9.3)
33
+ logging (~> 1.7.2)
34
+ zookeeper (~> 1.4.0)
35
+ zookeeper (1.4.8)
36
+
37
+ PLATFORMS
38
+ ruby
39
+
40
+ DEPENDENCIES
41
+ bundler
42
+ poseidon!
43
+ poseidon_cluster!
44
+ rake
45
+ rspec
46
+ snappy
47
+ yard
@@ -0,0 +1,76 @@
1
+ # Poseidon Cluster [![Build Status](https://travis-ci.org/bsm/poseidon_cluster.png?branch=master)](https://travis-ci.org/bsm/poseidon_cluster)
2
+
3
+ Poseidon Cluster is a cluster extenstion the excellent [Poseidon](http://github.com/bpot/poseidon) Ruby client for Kafka 0.8+. It implements the distribution concept of self-rebalancing *Consumer Groups* and supports the consumption of a single topic from multiple instances.
4
+
5
+ Consumer group instances share a common group name, and each message published to a topic is delivered to one instance within each subscribing consumer group. Consumer instances can be in separate processes or on separate machines.
6
+
7
+ ## Usage
8
+
9
+ ### Consuming messages
10
+
11
+ ```ruby
12
+ require 'poseidon_cluster'
13
+
14
+ consumer = Poseidon::ConsumerGroup.new(
15
+ "my-group", # Group name
16
+ ["kafka1.host:9092", "kafka2.host:9092"], # Kafka brokers
17
+ ["kafka1.host:2181", "kafka2.host:2181"], # Zookeepers hosts
18
+ "my-topic") # Topic name
19
+
20
+ consumer.partitions # => [0, 1, 2, 3] - all partitions of 'my-topic'
21
+ consumer.claimed # => [0, 1] - partitions this instance is consuming
22
+
23
+ # Fetch a bulk of messages, auto-commit the offset
24
+ consumer.fetch do |partition, bulk|
25
+ bulk.each do |m|
26
+ puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
27
+ end
28
+ end
29
+
30
+ # Get the offset for a partition
31
+ consumer.offset(0) # => 320 - current offset from partition 0
32
+
33
+ # Fetch more, commit manually
34
+ consumer.fetch commit: false do |partition, bulk|
35
+ bulk.each do |m|
36
+ puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
37
+ end
38
+
39
+ consumer.commit partition, bulk.last.offset+1 unless bulk.empty?
40
+ end
41
+ ```
42
+
43
+ For more details and information, please see the [Poseidon::ConsumerGroup](http://rubydoc.info/github/bsm/poseidon_cluster/Poseidon/ConsumerGroup) documentation.
44
+
45
+ ## Running Tests
46
+
47
+ The test suite will automatically download, configure and run Kafka locally, you only need a JRE. Run the suite via:
48
+
49
+ ```bash
50
+ bundle exec rake spec
51
+ ```
52
+
53
+ ## Licence
54
+
55
+ ```
56
+ Copyright (c) 2014 Black Square Media
57
+
58
+ Permission is hereby granted, free of charge, to any person obtaining
59
+ a copy of this software and associated documentation files (the
60
+ "Software"), to deal in the Software without restriction, including
61
+ without limitation the rights to use, copy, modify, merge, publish,
62
+ distribute, sublicense, and/or sell copies of the Software, and to
63
+ permit persons to whom the Software is furnished to do so, subject to
64
+ the following conditions:
65
+
66
+ The above copyright notice and this permission notice shall be
67
+ included in all copies or substantial portions of the Software.
68
+
69
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
70
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
71
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
72
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
73
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
74
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
75
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
76
+ ```
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rspec/core/rake_task'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ require 'yard'
7
+ require 'yard/rake/yardoc_task'
8
+ YARD::Rake::YardocTask.new
9
+
10
+ task default: :spec
@@ -0,0 +1,22 @@
1
+ require 'socket'
2
+ require 'timeout'
3
+ require 'zk'
4
+ require 'poseidon'
5
+ require 'thread'
6
+
7
+ module Poseidon::Cluster
8
+ MAX_INT32 = 0x7fffffff
9
+ @@sem = Mutex.new
10
+ @@inc = 0
11
+
12
+ # @return [Integer] an incremented number
13
+ # @api private
14
+ def self.inc!
15
+ @@sem.synchronize { @@inc += 1; @@inc = 1 if @@inc > MAX_INT32; @@inc }
16
+ end
17
+
18
+ end
19
+
20
+ %w|consumer_group|.each do |name|
21
+ require "poseidon/#{name}"
22
+ end
@@ -0,0 +1,276 @@
1
+ # A ConsumerGroup operates on all partitions of a single topic. The goal is to ensure
2
+ # each topic message is consumed only once, no matter of the number of consumer instances within
3
+ # a cluster, as described in: http://kafka.apache.org/documentation.html#distributionimpl.
4
+ #
5
+ # The ConsumerGroup internally creates multiple PartitionConsumer instances. It uses Zookkeper
6
+ # and follows a simple consumer rebalancing algorithms which allows all the consumers
7
+ # in a group to come into consensus on which consumer is consuming which partitions. Each
8
+ # ConsumerGroup can 'claim' 0-n partitions and will consume their messages until another
9
+ # ConsumerGroup instance joins or leaves the cluster.
10
+ #
11
+ # Please note: ConsumerGroups themselves don't implement any threading or concurrency.
12
+ # When consuming messages, they simply round-robin across the claimed partitions. If you wish
13
+ # to parallelize consumption simply create multiple ConsumerGroups instances. The built-in
14
+ # concensus algorithm will automatically rebalance the available partitions between them and you
15
+ # can then decide for yourself if you want to run them in multiple thread or processes, ideally
16
+ # on multiple boxes.
17
+ #
18
+ # Unlike stated in the Kafka documentation, consumer rebalancing is *only* triggered on each
19
+ # addition or removal of consumers within the same group, while the addition of broker nodes
20
+ # and/or partition *does currently not trigger* a rebalancing cycle.
21
+ #
22
+ # @api public
23
+ class Poseidon::ConsumerGroup
24
+ DEFAULT_CLAIM_TIMEOUT = 10
25
+
26
+ # Poseidon::ConsumerGroup::Consumer is internally used by Poseidon::ConsumerGroup.
27
+ # Don't invoke it directly.
28
+ #
29
+ # @api private
30
+ class Consumer < ::Poseidon::PartitionConsumer
31
+
32
+ # @attr_reader [Integer] partition Consumer partition
33
+ attr_reader :partition
34
+
35
+ # @api private
36
+ def initialize(group, partition, options = {})
37
+ broker = group.leader(partition)
38
+ offset = group.offset(partition)
39
+ super group.id, broker.host, broker.port, group.topic, partition, offset, options
40
+ end
41
+
42
+ end
43
+
44
+ # @attr_reader [String] name Group name
45
+ attr_reader :name
46
+
47
+ # @attr_reader [String] topic Topic name
48
+ attr_reader :topic
49
+
50
+ # @attr_reader [Poseidon::BrokerPool] pool Broker pool
51
+ attr_reader :pool
52
+
53
+ # @attr_reader [ZK::Client] zk Zookeeper client
54
+ attr_reader :zk
55
+
56
+ # @attr_reader [Hash] options Consumer options
57
+ attr_reader :options
58
+
59
+ # Create a new consumer group, which processes all partition of the specified topic.
60
+ #
61
+ # @param [String] name Group name
62
+ # @param [Array<String>] brokers A list of known brokers, e.g. ["localhost:9092"]
63
+ # @param [Array<String>] zookeepers A list of known zookeepers, e.g. ["localhost:2181"]
64
+ # @param [String] topic Topic to operate on
65
+ # @param [Hash] options Partition consumer options, see Poseidon::PartitionConsumer#initialize
66
+ #
67
+ # @api public
68
+ def initialize(name, brokers, zookeepers, topic, options = {})
69
+ @name = name
70
+ @topic = topic
71
+ @zk = ::ZK.new(zookeepers.join(","))
72
+ @options = options
73
+ @consumers = []
74
+ @pool = ::Poseidon::BrokerPool.new(id, brokers)
75
+ @mutex = Mutex.new
76
+
77
+ # Register instance
78
+ registries.each do |_, path|
79
+ zk.mkdir_p(path)
80
+ end
81
+ zk.create(consumer_path, "{}", ephemeral: true)
82
+ zk.register(registries[:consumer]) {|_| rebalance! }
83
+
84
+ # Rebalance
85
+ rebalance!
86
+ end
87
+
88
+ # @return [String] a globally unique identifier
89
+ def id
90
+ @id ||= [name, ::Socket.gethostname, ::Process.pid, ::Time.now.to_i, ::Poseidon::Cluster.inc!].join("-")
91
+ end
92
+
93
+ # @return [Hash<Symbol,String>] registry paths
94
+ def registries
95
+ @registries ||= {
96
+ consumer: "/consumers/#{name}/ids",
97
+ owner: "/consumers/#{name}/owners/#{topic}",
98
+ offset: "/consumers/#{name}/offsets/#{topic}",
99
+ }
100
+ end
101
+
102
+ # @return [Poseidon::ClusterMetadata] cluster metadata
103
+ def metadata
104
+ @metadata ||= Poseidon::ClusterMetadata.new.tap {|m| m.update pool.fetch_metadata([topic]) }
105
+ end
106
+
107
+ # @return [Poseidon::TopicMetadata] topic metadata
108
+ def topic_metadata
109
+ @topic_metadata ||= metadata.metadata_for_topics([topic])[topic]
110
+ end
111
+
112
+ # Reloads metadata/broker/partition information
113
+ def reload
114
+ @metadata = @topic_metadata = nil
115
+ metadata
116
+ self
117
+ end
118
+
119
+ # Closes the consumer group gracefully, only really useful in tests
120
+ # @api private
121
+ def close
122
+ @mutex.synchronize do
123
+ release_all!
124
+ zk.close
125
+ end
126
+ end
127
+
128
+ # @param [Integer] partition
129
+ # @return [Poseidon::Protocol::Broker] the leader for the given partition
130
+ def leader(partition)
131
+ metadata.lead_broker_for_partition(topic, partition)
132
+ end
133
+
134
+ # @param [Integer] partition
135
+ # @return [Integer] the latest stored offset for the given partition
136
+ def offset(partition)
137
+ data, _ = zk.get offset_path(partition), ignore: :no_node
138
+ data.to_i
139
+ end
140
+
141
+ # Commits the latest offset for a partition
142
+ # @param [Integer] partition
143
+ # @param [Integer] offset
144
+ def commit(partition, offset)
145
+ zk.set offset_path(partition), offset.to_s
146
+ rescue ZK::Exceptions::NoNode
147
+ zk.create offset_path(partition), offset.to_s, ignore: :node_exists
148
+ end
149
+
150
+ # Sorted partitions by broker address (so partitions on the same broker are clustered together)
151
+ # @return [Array<Poseidon::Protocol::PartitionMetadata>] sorted partitions
152
+ def partitions
153
+ topic_metadata.partitions.sort_by do |part|
154
+ broker = metadata.brokers[part.leader]
155
+ [broker.host, broker.port].join(":")
156
+ end
157
+ end
158
+
159
+ # Partitions currently claimed and consumed by this group instance
160
+ # @return [Array<Integer>] partition IDs
161
+ def claimed
162
+ @consumers.map(&:partition).sort
163
+ end
164
+
165
+ # Checks out a single partition consumer. Round-robins between claimed partitions.
166
+ #
167
+ # @yield [consumer] The processing block
168
+ # @yieldparam [Consumer] consumer The consumer instance
169
+ # @yieldreturn [Boolean] return false to stop auto-commit
170
+ #
171
+ # @param [Hash] opts
172
+ # @option opts [Boolean] :commit Automatically commit consumer offset (default: true)
173
+ #
174
+ # @api public
175
+ def checkout(opts = {})
176
+ @mutex.synchronize do
177
+ consumer = @consumers.shift
178
+ break unless consumer
179
+
180
+ @consumers.push(consumer)
181
+ result = yield(consumer)
182
+
183
+ unless opts[:commit] == false || result == false
184
+ commit consumer.partition, consumer.offset
185
+ end
186
+ end
187
+ nil
188
+ end
189
+
190
+ # Convenience method to fetch messages from the broker.
191
+ # Round-robins between claimed partitions.
192
+ #
193
+ # @yield [partition, messages] The processing block
194
+ # @yieldparam [Integer] partition The source partition
195
+ # @yieldparam [Array<Message>] messages The fetched messages
196
+ # @yieldreturn [Boolean] return false to stop commit
197
+ #
198
+ # @param [Hash] opts
199
+ # @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
200
+ #
201
+ # @api public
202
+ def fetch(opts = {})
203
+ checkout(opts) do |consumer|
204
+ yield consumer.partition, consumer.fetch
205
+ end
206
+ end
207
+
208
+ protected
209
+
210
+ # Rebalance algorithm:
211
+ #
212
+ # * let CG be all consumers in the same group that consume topic T
213
+ # * let PT be all partitions producing topic T
214
+ # * sort CG
215
+ # * sort PT (so partitions on the same broker are clustered together)
216
+ # * let POS be our index position in CG and let N = size(PT)/size(CG)
217
+ # * assign partitions from POS*N to (POS+1)*N-1
218
+ def rebalance!
219
+ @mutex.synchronize do
220
+ reload
221
+ cg = zk.children(registries[:consumer], watch: true).sort
222
+ pt = partitions
223
+ pos = cg.index(id)
224
+ n = pt.size / cg.size
225
+ n = 1 if n < 1
226
+
227
+ first = pos*n
228
+ last = (pos+1)*n-1
229
+
230
+ release_all!
231
+ (pt[first..last] || []).each do |part|
232
+ consumer = claim!(part.id)
233
+ @consumers.push(consumer)
234
+ end
235
+ end
236
+ end
237
+
238
+ # Release all consumer claims
239
+ def release_all!
240
+ @consumers.each {|c| release!(c.partition) }
241
+ @consumers.clear
242
+ end
243
+
244
+ # Claim the ownership of the partition for this consumer
245
+ # @raise [Timeout::Error]
246
+ def claim!(partition)
247
+ path = claim_path(partition)
248
+ Timeout.timeout(options[:claim_timout] || DEFAULT_CLAIM_TIMEOUT) do
249
+ sleep(0.01) while zk.create(path, id, ephemeral: true, ignore: :node_exists).nil?
250
+ end
251
+ Consumer.new(self, partition, options.dup)
252
+ end
253
+
254
+ # Release ownership of the partition
255
+ def release!(partition)
256
+ zk.delete claim_path(partition), ignore: :no_node
257
+ end
258
+
259
+ private
260
+
261
+ # @return [String] zookeeper ownership claim path
262
+ def claim_path(partition)
263
+ "#{registries[:owner]}/#{partition}"
264
+ end
265
+
266
+ # @return [String] zookeeper offset storage path
267
+ def offset_path(partition)
268
+ "#{registries[:offset]}/#{partition}"
269
+ end
270
+
271
+ # @return [String] zookeeper consumer registration path
272
+ def consumer_path
273
+ "#{registries[:consumer]}/#{id}"
274
+ end
275
+
276
+ end
@@ -0,0 +1 @@
1
+ require 'poseidon/cluster'
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.required_ruby_version = '>= 1.9.1'
3
+ s.required_rubygems_version = ">= 1.8.0"
4
+
5
+ s.name = File.basename(__FILE__, '.gemspec')
6
+ s.summary = "Poseidon cluster extensions"
7
+ s.description = "Cluster extensions for Poseidon, a producer and consumer implementation for Kafka >= 0.8"
8
+ s.version = "0.0.2"
9
+
10
+ s.authors = ["Black Square Media"]
11
+ s.email = "info@blacksquaremedia.com"
12
+ s.homepage = "https://github.com/bsm/poseidon_cluster"
13
+
14
+ s.require_path = 'lib'
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+
18
+ s.add_dependency "poseidon"
19
+ s.add_dependency "zk"
20
+
21
+ s.add_development_dependency "rake"
22
+ s.add_development_dependency "bundler"
23
+ s.add_development_dependency "rspec"
24
+ s.add_development_dependency "yard"
25
+ end
@@ -0,0 +1,144 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::ConsumerGroup, integration: true do
4
+
5
+ def new_group(max_bytes = 1024*8, name = TOPIC_NAME)
6
+ described_class.new "my-group", ["localhost:29092"], ["localhost:22181"], name, max_bytes: max_bytes
7
+ end
8
+
9
+ subject { new_group }
10
+ after { zookeeper.rm_rf "/consumers/#{subject.name}" }
11
+
12
+ let(:consumed) { Hash.new(0) }
13
+ let(:zookeeper) { ::ZK.new("localhost:22181") }
14
+
15
+ def stored_offsets
16
+ { 0 => subject.offset(0), 1 => subject.offset(1) }
17
+ end
18
+
19
+ describe "small batches" do
20
+
21
+ it "should consume messages from all partitions" do
22
+ 5.times do
23
+ subject.fetch {|n, msgs| consumed[n] += msgs.size }
24
+ end
25
+ consumed.values.inject(0, :+).should < 676
26
+
27
+ 5.times do
28
+ subject.fetch {|n, msgs| consumed[n] += msgs.size }
29
+ end
30
+ consumed.keys.should =~ [0, 1]
31
+ consumed.values.inject(0, :+).should == 676
32
+ consumed.should == stored_offsets
33
+ end
34
+
35
+ end
36
+
37
+ describe "large batches" do
38
+ subject { new_group 1024 * 1024 * 10 }
39
+
40
+ it "should consume messages from all partitions" do
41
+ 5.times do
42
+ subject.fetch {|n, msgs| consumed[n] += msgs.size }
43
+ end
44
+ consumed.keys.should =~ [0, 1]
45
+ consumed.values.inject(0, :+).should == 676
46
+ consumed.should == stored_offsets
47
+ end
48
+ end
49
+
50
+ describe "fuzzing" do
51
+
52
+ def in_thread(batch_size, target, qu)
53
+ Thread.new do
54
+ group = new_group(batch_size)
55
+ sum = 0
56
+ while sum < target && qu.size < 676
57
+ group.fetch {|_, m| sum += m.size; m.size.times { qu << true } }
58
+ end
59
+ group.close
60
+ sum
61
+ end
62
+ end
63
+
64
+ it "should consume from multiple sources" do
65
+ q = Queue.new
66
+ a = in_thread(4001, 200, q)
67
+ b = in_thread(4002, 50, q)
68
+ c = in_thread(4003, 120, q)
69
+ d = in_thread(4004, 40, q)
70
+ e = in_thread(4005, 400, q)
71
+ vals = [a, b, c, d, e].map &:value
72
+ vals.inject(0, :+).should == 676
73
+ end
74
+
75
+ end
76
+
77
+ describe "multi-process fuzzing", slow: true do
78
+ before do
79
+ producer = Poseidon::Producer.new(["localhost:29092"], "my-producer")
80
+ payload = "data" * 10
81
+ 100.times do
82
+ messages = (0...1000).map do |i|
83
+ Poseidon::MessageToSend.new("slow-topic", payload, i.to_s)
84
+ end
85
+ producer.send_messages(messages)
86
+ end
87
+ end
88
+
89
+ it 'should consume correctly' do
90
+ read, write = IO.pipe
91
+ pid1 = fork do
92
+ group = new_group(64*1024, "slow-topic")
93
+ 10.times do
94
+ 5.times { group.fetch {|_, m| write.write "1:#{m.size}\n" }}
95
+ sleep(1)
96
+ end
97
+ end
98
+ pid2 = fork do
99
+ group = new_group(32*1024, "slow-topic")
100
+ 5.times do
101
+ 10.times { group.fetch {|_, m| write.write "2:#{m.size}\n" }}
102
+ sleep(1)
103
+ end
104
+ end
105
+ pid3 = fork do
106
+ group = new_group(8*1024, "slow-topic")
107
+ 5.times do
108
+ 50.times { group.fetch {|_, m| write.write "3:#{m.size}\n" }}
109
+ end
110
+ end
111
+ Process.wait(pid2)
112
+
113
+ pid4 = fork do
114
+ group = new_group(8*1024, "slow-topic")
115
+ 5.times do
116
+ 50.times { group.fetch {|_, m| write.write "4:#{m.size}\n" }}
117
+ end
118
+ end
119
+ pid5 = fork do
120
+ group = new_group(32*1024, "slow-topic")
121
+ 8.times do
122
+ 50.times { group.fetch {|_, m| write.write "5:#{m.size}\n" }}
123
+ sleep(2)
124
+ end
125
+ end
126
+ Process.wait(pid1)
127
+ Process.wait(pid3)
128
+ Process.wait(pid4)
129
+ Process.wait(pid5)
130
+ write.close
131
+ raw = read.read
132
+ read.close
133
+
134
+ stats = raw.lines.inject(Hash.new(0)) do |res, line|
135
+ pid, count = line.chomp.split(":")
136
+ res[pid.to_i] += count.to_i
137
+ res
138
+ end
139
+ stats.keys.size.should be_within(1).of(4)
140
+ stats.values.inject(0, :+).should == 100_000
141
+ end
142
+
143
+ end
144
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::Cluster do
4
+
5
+ it 'should generate incremented numbers (atomically)' do
6
+ num = described_class.inc!
7
+ (described_class.inc! - num).should == 1
8
+
9
+ (0...5).map do
10
+ Thread.new { 100.times { described_class.inc! }}
11
+ end.each &:join
12
+ (described_class.inc! - num).should == 502
13
+ end
14
+
15
+ end
@@ -0,0 +1,190 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::ConsumerGroup do
4
+
5
+ def new_group
6
+ group = described_class.new "my-group", ["localhost:29092", "localhost:29091"], ["localhost:22181"], TOPIC_NAME
7
+ groups.push(group)
8
+ group
9
+ end
10
+
11
+ def fetch_response(n)
12
+ set = Poseidon::MessageSet.new
13
+ n.times {|i| set << Poseidon::Message.new(value: "value", key: "key", offset: i) }
14
+ pfr = Poseidon::Protocol::PartitionFetchResponse.new(0, 0, 100, set)
15
+ tfr = Poseidon::Protocol::TopicFetchResponse.new(TOPIC_NAME, [pfr])
16
+ Poseidon::Protocol::FetchResponse.new(nil, [tfr])
17
+ end
18
+
19
+ let :groups do
20
+ []
21
+ end
22
+
23
+ let :brokers do
24
+ [ Poseidon::Protocol::Broker.new(1, "localhost", 29092), # id,host,port
25
+ Poseidon::Protocol::Broker.new(2, "localhost", 29091), ]
26
+ end
27
+
28
+ let :partitions do
29
+ [ Poseidon::Protocol::PartitionMetadata.new(0, 0, 1, [1,2], []), # err,id,leader,replicas,isr
30
+ Poseidon::Protocol::PartitionMetadata.new(0, 1, 2, [1,2], []), ]
31
+ end
32
+
33
+ let :topics do
34
+ [ Poseidon::TopicMetadata.new(Poseidon::Protocol::TopicMetadataStruct.new(0, TOPIC_NAME, partitions)) ]
35
+ end
36
+
37
+ let :metadata do
38
+ Poseidon::Protocol::MetadataResponse.new nil, brokers.dup, topics.dup
39
+ end
40
+
41
+ subject { new_group }
42
+ before do
43
+ Poseidon::BrokerPool.any_instance.stub(:fetch_metadata_from_broker).and_return(metadata)
44
+ Poseidon::Connection.any_instance.stub(:fetch).with{|_, _, req| req[0].partition_fetches[0].partition == 0 }.and_return(fetch_response(10))
45
+ Poseidon::Connection.any_instance.stub(:fetch).with{|_, _, req| req[0].partition_fetches[0].partition == 1 }.and_return(fetch_response(5))
46
+ end
47
+ after do
48
+ subject.zk.rm_rf "/consumers/#{subject.name}"
49
+ groups.each(&:close)
50
+ end
51
+
52
+ its(:name) { should == "my-group" }
53
+ its(:topic) { should == TOPIC_NAME }
54
+ its(:pool) { should be_instance_of(Poseidon::BrokerPool) }
55
+ its(:id) { should match(/\Amy-group\-[\w\-\.]+?\-\d{1,5}\-\d{10}\-\d{1,3}\z/) }
56
+ its(:zk) { should be_instance_of(ZK::Client::Threaded) }
57
+
58
+ its(:claimed) { should == [0, 1] }
59
+ its(:metadata) { should be_instance_of(Poseidon::ClusterMetadata) }
60
+ its(:topic_metadata) { should be_instance_of(Poseidon::TopicMetadata) }
61
+ its(:registries) { should == {
62
+ consumer: "/consumers/my-group/ids",
63
+ owner: "/consumers/my-group/owners/my-topic",
64
+ offset: "/consumers/my-group/offsets/my-topic",
65
+ }}
66
+
67
+ its("metadata.brokers.keys") { should =~ [1,2] }
68
+ its("topic_metadata.partition_count") { should == 2 }
69
+
70
+ it "should register with zookeeper" do
71
+ subject.zk.children("/consumers/my-group/ids").should include(subject.id)
72
+ stat = subject.zk.stat("/consumers/my-group/ids")
73
+ stat.ephemeral_owner.should be(0)
74
+
75
+ data, stat = subject.zk.get("/consumers/my-group/ids/#{subject.id}")
76
+ data.should == "{}"
77
+ stat.num_children.should == 0
78
+ stat.ephemeral_owner.should > 0
79
+ end
80
+
81
+ it "should sort partitions by leader address" do
82
+ subject.partitions.map(&:id).should == [1, 0]
83
+ end
84
+
85
+ it "should return the offset for each partition" do
86
+ subject.offset(0).should == 0
87
+ subject.offset(1).should == 0
88
+ subject.offset(2).should == 0
89
+ subject.fetch {|*| true }
90
+ subject.offset(0).should == 0
91
+ subject.offset(1).should == 5
92
+ subject.offset(2).should == 0
93
+ end
94
+
95
+ it "should return the leader for a partition" do
96
+ subject.leader(0).should == brokers[0]
97
+ subject.leader(1).should == brokers[1]
98
+ subject.leader(2).should be_nil
99
+ end
100
+
101
+ it "should checkout individual partition consumers (atomically)" do
102
+ subject.checkout {|c| c.partition.should == 1 }
103
+ subject.checkout {|c| c.partition.should == 0 }
104
+
105
+ n = 0
106
+ a = Thread.new do
107
+ 100.times { subject.checkout {|_| n+=1 } }
108
+ Thread.pass
109
+ 100.times { subject.checkout {|_| n+=1 } }
110
+ end
111
+ b = Thread.new do
112
+ 100.times { subject.checkout {|_| n+=1 } }
113
+ Thread.pass
114
+ 100.times { subject.checkout {|_| n+=1 } }
115
+ end
116
+ [a, b].each &:join
117
+ n.should == 400
118
+ end
119
+
120
+ describe "rebalance" do
121
+
122
+ it "should watch out for new consumers joining/leaving" do
123
+ subject.should_receive(:rebalance!).twice.and_call_original
124
+ new_group.should_receive(:rebalance!).once.and_call_original
125
+ new_group
126
+ end
127
+
128
+ it "should distribute available partitions between consumers" do
129
+ subject.claimed.should == [0, 1]
130
+
131
+ b = new_group
132
+ wait_for { subject.claimed.size > 0 }
133
+ wait_for { b.claimed.size > 0 }
134
+ subject.claimed.should == [1]
135
+ b.claimed.should == [0]
136
+
137
+ c = new_group
138
+ subject.claimed.should == [1]
139
+ b.claimed.should == [0]
140
+ c.claimed.should == []
141
+
142
+ b.close
143
+ wait_for { b.claimed.size < 0 }
144
+ wait_for { c.claimed.size > 0 }
145
+
146
+ subject.claimed.should == [1]
147
+ b.claimed.should == []
148
+ c.claimed.should == [0]
149
+ end
150
+
151
+ end
152
+
153
+ describe "fetch" do
154
+
155
+ it "should return messages from owned partitions" do
156
+ subject.fetch do |n, msg|
157
+ n.should == 1
158
+ msg.size.should == 5
159
+ end
160
+ subject.fetch do |n, msg|
161
+ n.should == 0
162
+ msg.size.should == 10
163
+ end
164
+ subject.fetch do |n, msg|
165
+ n.should == 1
166
+ msg.size.should == 5
167
+ end
168
+ end
169
+
170
+ it "should auto-commit fetched offset" do
171
+ -> {
172
+ subject.fetch {|n, _| n.should == 1 }
173
+ }.should change { subject.offset(1) }.from(0).to(5)
174
+ end
175
+
176
+ it "should skip auto-commits if requested" do
177
+ -> {
178
+ subject.fetch(commit: false) {|n, _| n.should == 1 }
179
+ }.should_not change { subject.offset(1) }
180
+ end
181
+
182
+ it "should skip auto-commits if block results in false" do
183
+ -> {
184
+ subject.fetch {|n, _| n.should == 1; false }
185
+ }.should_not change { subject.offset(1) }
186
+ end
187
+
188
+ end
189
+
190
+ end
@@ -0,0 +1,75 @@
1
+ require 'poseidon_cluster'
2
+ require 'rspec'
3
+ require 'fileutils'
4
+ require 'pathname'
5
+
6
+ TOPIC_NAME = "my-topic"
7
+ KAFKA_LOCAL = File.expand_path("../kafka_2.8.0-0.8.0", __FILE__)
8
+ KAFKA_ROOT = Pathname.new(ENV["KAFKA_ROOT"] || KAFKA_LOCAL)
9
+
10
+ module Poseidon::SpecHelper
11
+
12
+ def wait_for(&truth)
13
+ 100.times do
14
+ break if truth.call
15
+ sleep(0.01)
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ RSpec.configure do |c|
22
+ c.include Poseidon::SpecHelper
23
+ c.filter_run_excluding slow: true unless ENV["SLOW"] == "1"
24
+
25
+ c.before :suite do
26
+ kafka_bin = KAFKA_ROOT.join("bin", "kafka-server-start.sh")
27
+ kafka_cfg = KAFKA_ROOT.join("config", "server-poseidon.properties")
28
+ zookp_bin = KAFKA_ROOT.join("bin", "zookeeper-server-start.sh")
29
+ zookp_cfg = KAFKA_ROOT.join("config", "zookeeper-poseidon.properties")
30
+
31
+ if KAFKA_ROOT.to_s == KAFKA_LOCAL && !kafka_bin.file?
32
+ puts "---> Downloading Kafka"
33
+ target = Pathname.new(File.expand_path("../", __FILE__))
34
+ system("cd #{target} && curl http://www.us.apache.org/dist/kafka/0.8.0/kafka_2.8.0-0.8.0.tar.gz | tar xz") ||
35
+ raise("Unable to download Kafka")
36
+
37
+ kafka_cfg.open("w") do |f|
38
+ f.write KAFKA_ROOT.join("config", "server.properties").read.sub("=9092", "=29092").sub(":2181", ":22181").sub("/tmp/kafka-logs", "/tmp/kafka-logs-poseidon")
39
+ end
40
+ zookp_cfg.open("w") do |f|
41
+ f.write KAFKA_ROOT.join("config", "zookeeper.properties").read.sub("=2181", "=22181")
42
+ end
43
+ end
44
+
45
+ # Ensure all required files are present
46
+ [kafka_bin, zookp_bin, kafka_cfg, zookp_cfg].each do |path|
47
+ raise "Unable to locate #{path}. File does not exist!" unless path.file?
48
+ end
49
+
50
+ # Start Zookeeper & Kafka
51
+ $ZOOKP_PID = spawn zookp_bin.to_s, zookp_cfg.to_s, out: '/dev/null' # , err: '/dev/null'
52
+ $KAFKA_PID = spawn kafka_bin.to_s, kafka_cfg.to_s, out: '/dev/null' #, err: '/dev/null'
53
+
54
+ # Produce some fixtures
55
+ producer = Poseidon::Producer.new(["localhost:29092"], "my-producer")
56
+ payload = "data" * 10
57
+ messages = ("aa".."zz").map do |key|
58
+ Poseidon::MessageToSend.new(TOPIC_NAME, [key, payload].join(":"), key)
59
+ end
60
+
61
+ ok = false
62
+ 100.times do
63
+ break if (ok = producer.send_messages(messages))
64
+ sleep(0.1)
65
+ end
66
+ raise "Unable to start Kafka instance." unless ok
67
+ end
68
+
69
+ c.after :suite do
70
+ Process.kill :TERM, $KAFKA_PID if $KAFKA_PID
71
+ Process.kill :TERM, $ZOOKP_PID if $ZOOKP_PID
72
+ FileUtils.rm_rf "/tmp/kafka-logs-poseidon"
73
+ end
74
+
75
+ end
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: poseidon_cluster
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Black Square Media
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: poseidon
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: zk
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Cluster extensions for Poseidon, a producer and consumer implementation
98
+ for Kafka >= 0.8
99
+ email: info@blacksquaremedia.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".travis.yml"
106
+ - Gemfile
107
+ - Gemfile.lock
108
+ - README.md
109
+ - Rakefile
110
+ - lib/poseidon/cluster.rb
111
+ - lib/poseidon/consumer_group.rb
112
+ - lib/poseidon_cluster.rb
113
+ - poseidon_cluster.gemspec
114
+ - spec/integration/poseidon/consumer_group_spec.rb
115
+ - spec/lib/poseidon/cluster_spec.rb
116
+ - spec/lib/poseidon/consumer_group_spec.rb
117
+ - spec/spec_helper.rb
118
+ homepage: https://github.com/bsm/poseidon_cluster
119
+ licenses: []
120
+ metadata: {}
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: 1.9.1
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ version: 1.8.0
135
+ requirements: []
136
+ rubyforge_project:
137
+ rubygems_version: 2.2.0.rc.1
138
+ signing_key:
139
+ specification_version: 4
140
+ summary: Poseidon cluster extensions
141
+ test_files: []
142
+ has_rdoc: