poseidon_cluster 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bd4a2f60912c1cf42f542630df36bff3fda31f36
4
+ data.tar.gz: 892927b055dca81c8805ef204cfadab0be1b4d88
5
+ SHA512:
6
+ metadata.gz: 72d72af7e3c642c93cad4d98c67ae628895a8344497466d7299a5a763c97d95754bb2c7750bd5666d39d79584ba989ec07d57c5767a1a61ff46775778bde8fd0
7
+ data.tar.gz: 5637f76ec6318cb52b2a353f02324f2c7f1c7e2fd57213ad1c39d6e87f724471a6932a2fc1e9253a21ced77fb01f1273e3fe61ce50ac5807d0727ab5b886cd17
@@ -0,0 +1,5 @@
1
+ logs/
2
+ kafka*/
3
+ doc/
4
+ .yardoc/
5
+ .bundle/
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.1.0
5
+ - 2.0.0
6
+ - 1.9.3
7
+ env:
8
+ - SLOW=1
9
+ - SLOW=0
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+ gem "poseidon", git: "https://github.com/dim/poseidon.git"
5
+ gem "snappy"
@@ -0,0 +1,47 @@
1
+ GIT
2
+ remote: https://github.com/dim/poseidon.git
3
+ revision: f853e8f07a6f2ff4a520d3f7e1dac030453a18ed
4
+ specs:
5
+ poseidon (0.0.4)
6
+
7
+ PATH
8
+ remote: .
9
+ specs:
10
+ poseidon_cluster (0.0.2)
11
+ poseidon
12
+ zk
13
+
14
+ GEM
15
+ remote: https://rubygems.org/
16
+ specs:
17
+ diff-lcs (1.2.5)
18
+ little-plugger (1.1.3)
19
+ logging (1.7.2)
20
+ little-plugger (>= 1.1.3)
21
+ rake (10.1.1)
22
+ rspec (2.14.1)
23
+ rspec-core (~> 2.14.0)
24
+ rspec-expectations (~> 2.14.0)
25
+ rspec-mocks (~> 2.14.0)
26
+ rspec-core (2.14.7)
27
+ rspec-expectations (2.14.4)
28
+ diff-lcs (>= 1.1.3, < 2.0)
29
+ rspec-mocks (2.14.4)
30
+ snappy (0.0.10)
31
+ yard (0.8.7.3)
32
+ zk (1.9.3)
33
+ logging (~> 1.7.2)
34
+ zookeeper (~> 1.4.0)
35
+ zookeeper (1.4.8)
36
+
37
+ PLATFORMS
38
+ ruby
39
+
40
+ DEPENDENCIES
41
+ bundler
42
+ poseidon!
43
+ poseidon_cluster!
44
+ rake
45
+ rspec
46
+ snappy
47
+ yard
@@ -0,0 +1,76 @@
1
+ # Poseidon Cluster [![Build Status](https://travis-ci.org/bsm/poseidon_cluster.png?branch=master)](https://travis-ci.org/bsm/poseidon_cluster)
2
+
3
+ Poseidon Cluster is a cluster extenstion the excellent [Poseidon](http://github.com/bpot/poseidon) Ruby client for Kafka 0.8+. It implements the distribution concept of self-rebalancing *Consumer Groups* and supports the consumption of a single topic from multiple instances.
4
+
5
+ Consumer group instances share a common group name, and each message published to a topic is delivered to one instance within each subscribing consumer group. Consumer instances can be in separate processes or on separate machines.
6
+
7
+ ## Usage
8
+
9
+ ### Consuming messages
10
+
11
+ ```ruby
12
+ require 'poseidon_cluster'
13
+
14
+ consumer = Poseidon::ConsumerGroup.new(
15
+ "my-group", # Group name
16
+ ["kafka1.host:9092", "kafka2.host:9092"], # Kafka brokers
17
+ ["kafka1.host:2181", "kafka2.host:2181"], # Zookeepers hosts
18
+ "my-topic") # Topic name
19
+
20
+ consumer.partitions # => [0, 1, 2, 3] - all partitions of 'my-topic'
21
+ consumer.claimed # => [0, 1] - partitions this instance is consuming
22
+
23
+ # Fetch a bulk of messages, auto-commit the offset
24
+ consumer.fetch do |partition, bulk|
25
+ bulk.each do |m|
26
+ puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
27
+ end
28
+ end
29
+
30
+ # Get the offset for a partition
31
+ consumer.offset(0) # => 320 - current offset from partition 0
32
+
33
+ # Fetch more, commit manually
34
+ consumer.fetch commit: false do |partition, bulk|
35
+ bulk.each do |m|
36
+ puts "Fetched '#{m.value}' at #{m.offset} from #{partition}"
37
+ end
38
+
39
+ consumer.commit partition, bulk.last.offset+1 unless bulk.empty?
40
+ end
41
+ ```
42
+
43
+ For more details and information, please see the [Poseidon::ConsumerGroup](http://rubydoc.info/github/bsm/poseidon_cluster/Poseidon/ConsumerGroup) documentation.
44
+
45
+ ## Running Tests
46
+
47
+ The test suite will automatically download, configure and run Kafka locally, you only need a JRE. Run the suite via:
48
+
49
+ ```bash
50
+ bundle exec rake spec
51
+ ```
52
+
53
+ ## Licence
54
+
55
+ ```
56
+ Copyright (c) 2014 Black Square Media
57
+
58
+ Permission is hereby granted, free of charge, to any person obtaining
59
+ a copy of this software and associated documentation files (the
60
+ "Software"), to deal in the Software without restriction, including
61
+ without limitation the rights to use, copy, modify, merge, publish,
62
+ distribute, sublicense, and/or sell copies of the Software, and to
63
+ permit persons to whom the Software is furnished to do so, subject to
64
+ the following conditions:
65
+
66
+ The above copyright notice and this permission notice shall be
67
+ included in all copies or substantial portions of the Software.
68
+
69
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
70
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
71
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
72
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
73
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
74
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
75
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
76
+ ```
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rspec/core/rake_task'
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ require 'yard'
7
+ require 'yard/rake/yardoc_task'
8
+ YARD::Rake::YardocTask.new
9
+
10
+ task default: :spec
@@ -0,0 +1,22 @@
1
+ require 'socket'
2
+ require 'timeout'
3
+ require 'zk'
4
+ require 'poseidon'
5
+ require 'thread'
6
+
7
+ module Poseidon::Cluster
8
+ MAX_INT32 = 0x7fffffff
9
+ @@sem = Mutex.new
10
+ @@inc = 0
11
+
12
+ # @return [Integer] an incremented number
13
+ # @api private
14
+ def self.inc!
15
+ @@sem.synchronize { @@inc += 1; @@inc = 1 if @@inc > MAX_INT32; @@inc }
16
+ end
17
+
18
+ end
19
+
20
+ %w|consumer_group|.each do |name|
21
+ require "poseidon/#{name}"
22
+ end
@@ -0,0 +1,276 @@
1
+ # A ConsumerGroup operates on all partitions of a single topic. The goal is to ensure
2
+ # each topic message is consumed only once, no matter of the number of consumer instances within
3
+ # a cluster, as described in: http://kafka.apache.org/documentation.html#distributionimpl.
4
+ #
5
+ # The ConsumerGroup internally creates multiple PartitionConsumer instances. It uses Zookkeper
6
+ # and follows a simple consumer rebalancing algorithms which allows all the consumers
7
+ # in a group to come into consensus on which consumer is consuming which partitions. Each
8
+ # ConsumerGroup can 'claim' 0-n partitions and will consume their messages until another
9
+ # ConsumerGroup instance joins or leaves the cluster.
10
+ #
11
+ # Please note: ConsumerGroups themselves don't implement any threading or concurrency.
12
+ # When consuming messages, they simply round-robin across the claimed partitions. If you wish
13
+ # to parallelize consumption simply create multiple ConsumerGroups instances. The built-in
14
+ # concensus algorithm will automatically rebalance the available partitions between them and you
15
+ # can then decide for yourself if you want to run them in multiple thread or processes, ideally
16
+ # on multiple boxes.
17
+ #
18
+ # Unlike stated in the Kafka documentation, consumer rebalancing is *only* triggered on each
19
+ # addition or removal of consumers within the same group, while the addition of broker nodes
20
+ # and/or partition *does currently not trigger* a rebalancing cycle.
21
+ #
22
+ # @api public
23
+ class Poseidon::ConsumerGroup
24
+ DEFAULT_CLAIM_TIMEOUT = 10
25
+
26
+ # Poseidon::ConsumerGroup::Consumer is internally used by Poseidon::ConsumerGroup.
27
+ # Don't invoke it directly.
28
+ #
29
+ # @api private
30
+ class Consumer < ::Poseidon::PartitionConsumer
31
+
32
+ # @attr_reader [Integer] partition Consumer partition
33
+ attr_reader :partition
34
+
35
+ # @api private
36
+ def initialize(group, partition, options = {})
37
+ broker = group.leader(partition)
38
+ offset = group.offset(partition)
39
+ super group.id, broker.host, broker.port, group.topic, partition, offset, options
40
+ end
41
+
42
+ end
43
+
44
+ # @attr_reader [String] name Group name
45
+ attr_reader :name
46
+
47
+ # @attr_reader [String] topic Topic name
48
+ attr_reader :topic
49
+
50
+ # @attr_reader [Poseidon::BrokerPool] pool Broker pool
51
+ attr_reader :pool
52
+
53
+ # @attr_reader [ZK::Client] zk Zookeeper client
54
+ attr_reader :zk
55
+
56
+ # @attr_reader [Hash] options Consumer options
57
+ attr_reader :options
58
+
59
+ # Create a new consumer group, which processes all partition of the specified topic.
60
+ #
61
+ # @param [String] name Group name
62
+ # @param [Array<String>] brokers A list of known brokers, e.g. ["localhost:9092"]
63
+ # @param [Array<String>] zookeepers A list of known zookeepers, e.g. ["localhost:2181"]
64
+ # @param [String] topic Topic to operate on
65
+ # @param [Hash] options Partition consumer options, see Poseidon::PartitionConsumer#initialize
66
+ #
67
+ # @api public
68
+ def initialize(name, brokers, zookeepers, topic, options = {})
69
+ @name = name
70
+ @topic = topic
71
+ @zk = ::ZK.new(zookeepers.join(","))
72
+ @options = options
73
+ @consumers = []
74
+ @pool = ::Poseidon::BrokerPool.new(id, brokers)
75
+ @mutex = Mutex.new
76
+
77
+ # Register instance
78
+ registries.each do |_, path|
79
+ zk.mkdir_p(path)
80
+ end
81
+ zk.create(consumer_path, "{}", ephemeral: true)
82
+ zk.register(registries[:consumer]) {|_| rebalance! }
83
+
84
+ # Rebalance
85
+ rebalance!
86
+ end
87
+
88
+ # @return [String] a globally unique identifier
89
+ def id
90
+ @id ||= [name, ::Socket.gethostname, ::Process.pid, ::Time.now.to_i, ::Poseidon::Cluster.inc!].join("-")
91
+ end
92
+
93
+ # @return [Hash<Symbol,String>] registry paths
94
+ def registries
95
+ @registries ||= {
96
+ consumer: "/consumers/#{name}/ids",
97
+ owner: "/consumers/#{name}/owners/#{topic}",
98
+ offset: "/consumers/#{name}/offsets/#{topic}",
99
+ }
100
+ end
101
+
102
+ # @return [Poseidon::ClusterMetadata] cluster metadata
103
+ def metadata
104
+ @metadata ||= Poseidon::ClusterMetadata.new.tap {|m| m.update pool.fetch_metadata([topic]) }
105
+ end
106
+
107
+ # @return [Poseidon::TopicMetadata] topic metadata
108
+ def topic_metadata
109
+ @topic_metadata ||= metadata.metadata_for_topics([topic])[topic]
110
+ end
111
+
112
+ # Reloads metadata/broker/partition information
113
+ def reload
114
+ @metadata = @topic_metadata = nil
115
+ metadata
116
+ self
117
+ end
118
+
119
+ # Closes the consumer group gracefully, only really useful in tests
120
+ # @api private
121
+ def close
122
+ @mutex.synchronize do
123
+ release_all!
124
+ zk.close
125
+ end
126
+ end
127
+
128
+ # @param [Integer] partition
129
+ # @return [Poseidon::Protocol::Broker] the leader for the given partition
130
+ def leader(partition)
131
+ metadata.lead_broker_for_partition(topic, partition)
132
+ end
133
+
134
+ # @param [Integer] partition
135
+ # @return [Integer] the latest stored offset for the given partition
136
+ def offset(partition)
137
+ data, _ = zk.get offset_path(partition), ignore: :no_node
138
+ data.to_i
139
+ end
140
+
141
+ # Commits the latest offset for a partition
142
+ # @param [Integer] partition
143
+ # @param [Integer] offset
144
+ def commit(partition, offset)
145
+ zk.set offset_path(partition), offset.to_s
146
+ rescue ZK::Exceptions::NoNode
147
+ zk.create offset_path(partition), offset.to_s, ignore: :node_exists
148
+ end
149
+
150
+ # Sorted partitions by broker address (so partitions on the same broker are clustered together)
151
+ # @return [Array<Poseidon::Protocol::PartitionMetadata>] sorted partitions
152
+ def partitions
153
+ topic_metadata.partitions.sort_by do |part|
154
+ broker = metadata.brokers[part.leader]
155
+ [broker.host, broker.port].join(":")
156
+ end
157
+ end
158
+
159
+ # Partitions currently claimed and consumed by this group instance
160
+ # @return [Array<Integer>] partition IDs
161
+ def claimed
162
+ @consumers.map(&:partition).sort
163
+ end
164
+
165
+ # Checks out a single partition consumer. Round-robins between claimed partitions.
166
+ #
167
+ # @yield [consumer] The processing block
168
+ # @yieldparam [Consumer] consumer The consumer instance
169
+ # @yieldreturn [Boolean] return false to stop auto-commit
170
+ #
171
+ # @param [Hash] opts
172
+ # @option opts [Boolean] :commit Automatically commit consumer offset (default: true)
173
+ #
174
+ # @api public
175
+ def checkout(opts = {})
176
+ @mutex.synchronize do
177
+ consumer = @consumers.shift
178
+ break unless consumer
179
+
180
+ @consumers.push(consumer)
181
+ result = yield(consumer)
182
+
183
+ unless opts[:commit] == false || result == false
184
+ commit consumer.partition, consumer.offset
185
+ end
186
+ end
187
+ nil
188
+ end
189
+
190
+ # Convenience method to fetch messages from the broker.
191
+ # Round-robins between claimed partitions.
192
+ #
193
+ # @yield [partition, messages] The processing block
194
+ # @yieldparam [Integer] partition The source partition
195
+ # @yieldparam [Array<Message>] messages The fetched messages
196
+ # @yieldreturn [Boolean] return false to stop commit
197
+ #
198
+ # @param [Hash] opts
199
+ # @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
200
+ #
201
+ # @api public
202
+ def fetch(opts = {})
203
+ checkout(opts) do |consumer|
204
+ yield consumer.partition, consumer.fetch
205
+ end
206
+ end
207
+
208
+ protected
209
+
210
+ # Rebalance algorithm:
211
+ #
212
+ # * let CG be all consumers in the same group that consume topic T
213
+ # * let PT be all partitions producing topic T
214
+ # * sort CG
215
+ # * sort PT (so partitions on the same broker are clustered together)
216
+ # * let POS be our index position in CG and let N = size(PT)/size(CG)
217
+ # * assign partitions from POS*N to (POS+1)*N-1
218
+ def rebalance!
219
+ @mutex.synchronize do
220
+ reload
221
+ cg = zk.children(registries[:consumer], watch: true).sort
222
+ pt = partitions
223
+ pos = cg.index(id)
224
+ n = pt.size / cg.size
225
+ n = 1 if n < 1
226
+
227
+ first = pos*n
228
+ last = (pos+1)*n-1
229
+
230
+ release_all!
231
+ (pt[first..last] || []).each do |part|
232
+ consumer = claim!(part.id)
233
+ @consumers.push(consumer)
234
+ end
235
+ end
236
+ end
237
+
238
+ # Release all consumer claims
239
+ def release_all!
240
+ @consumers.each {|c| release!(c.partition) }
241
+ @consumers.clear
242
+ end
243
+
244
+ # Claim the ownership of the partition for this consumer
245
+ # @raise [Timeout::Error]
246
+ def claim!(partition)
247
+ path = claim_path(partition)
248
+ Timeout.timeout(options[:claim_timout] || DEFAULT_CLAIM_TIMEOUT) do
249
+ sleep(0.01) while zk.create(path, id, ephemeral: true, ignore: :node_exists).nil?
250
+ end
251
+ Consumer.new(self, partition, options.dup)
252
+ end
253
+
254
+ # Release ownership of the partition
255
+ def release!(partition)
256
+ zk.delete claim_path(partition), ignore: :no_node
257
+ end
258
+
259
+ private
260
+
261
+ # @return [String] zookeeper ownership claim path
262
+ def claim_path(partition)
263
+ "#{registries[:owner]}/#{partition}"
264
+ end
265
+
266
+ # @return [String] zookeeper offset storage path
267
+ def offset_path(partition)
268
+ "#{registries[:offset]}/#{partition}"
269
+ end
270
+
271
+ # @return [String] zookeeper consumer registration path
272
+ def consumer_path
273
+ "#{registries[:consumer]}/#{id}"
274
+ end
275
+
276
+ end
@@ -0,0 +1 @@
1
+ require 'poseidon/cluster'
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.required_ruby_version = '>= 1.9.1'
3
+ s.required_rubygems_version = ">= 1.8.0"
4
+
5
+ s.name = File.basename(__FILE__, '.gemspec')
6
+ s.summary = "Poseidon cluster extensions"
7
+ s.description = "Cluster extensions for Poseidon, a producer and consumer implementation for Kafka >= 0.8"
8
+ s.version = "0.0.2"
9
+
10
+ s.authors = ["Black Square Media"]
11
+ s.email = "info@blacksquaremedia.com"
12
+ s.homepage = "https://github.com/bsm/poseidon_cluster"
13
+
14
+ s.require_path = 'lib'
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+
18
+ s.add_dependency "poseidon"
19
+ s.add_dependency "zk"
20
+
21
+ s.add_development_dependency "rake"
22
+ s.add_development_dependency "bundler"
23
+ s.add_development_dependency "rspec"
24
+ s.add_development_dependency "yard"
25
+ end
@@ -0,0 +1,144 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::ConsumerGroup, integration: true do
4
+
5
+ def new_group(max_bytes = 1024*8, name = TOPIC_NAME)
6
+ described_class.new "my-group", ["localhost:29092"], ["localhost:22181"], name, max_bytes: max_bytes
7
+ end
8
+
9
+ subject { new_group }
10
+ after { zookeeper.rm_rf "/consumers/#{subject.name}" }
11
+
12
+ let(:consumed) { Hash.new(0) }
13
+ let(:zookeeper) { ::ZK.new("localhost:22181") }
14
+
15
+ def stored_offsets
16
+ { 0 => subject.offset(0), 1 => subject.offset(1) }
17
+ end
18
+
19
+ describe "small batches" do
20
+
21
+ it "should consume messages from all partitions" do
22
+ 5.times do
23
+ subject.fetch {|n, msgs| consumed[n] += msgs.size }
24
+ end
25
+ consumed.values.inject(0, :+).should < 676
26
+
27
+ 5.times do
28
+ subject.fetch {|n, msgs| consumed[n] += msgs.size }
29
+ end
30
+ consumed.keys.should =~ [0, 1]
31
+ consumed.values.inject(0, :+).should == 676
32
+ consumed.should == stored_offsets
33
+ end
34
+
35
+ end
36
+
37
+ describe "large batches" do
38
+ subject { new_group 1024 * 1024 * 10 }
39
+
40
+ it "should consume messages from all partitions" do
41
+ 5.times do
42
+ subject.fetch {|n, msgs| consumed[n] += msgs.size }
43
+ end
44
+ consumed.keys.should =~ [0, 1]
45
+ consumed.values.inject(0, :+).should == 676
46
+ consumed.should == stored_offsets
47
+ end
48
+ end
49
+
50
+ describe "fuzzing" do
51
+
52
+ def in_thread(batch_size, target, qu)
53
+ Thread.new do
54
+ group = new_group(batch_size)
55
+ sum = 0
56
+ while sum < target && qu.size < 676
57
+ group.fetch {|_, m| sum += m.size; m.size.times { qu << true } }
58
+ end
59
+ group.close
60
+ sum
61
+ end
62
+ end
63
+
64
+ it "should consume from multiple sources" do
65
+ q = Queue.new
66
+ a = in_thread(4001, 200, q)
67
+ b = in_thread(4002, 50, q)
68
+ c = in_thread(4003, 120, q)
69
+ d = in_thread(4004, 40, q)
70
+ e = in_thread(4005, 400, q)
71
+ vals = [a, b, c, d, e].map &:value
72
+ vals.inject(0, :+).should == 676
73
+ end
74
+
75
+ end
76
+
77
+ describe "multi-process fuzzing", slow: true do
78
+ before do
79
+ producer = Poseidon::Producer.new(["localhost:29092"], "my-producer")
80
+ payload = "data" * 10
81
+ 100.times do
82
+ messages = (0...1000).map do |i|
83
+ Poseidon::MessageToSend.new("slow-topic", payload, i.to_s)
84
+ end
85
+ producer.send_messages(messages)
86
+ end
87
+ end
88
+
89
+ it 'should consume correctly' do
90
+ read, write = IO.pipe
91
+ pid1 = fork do
92
+ group = new_group(64*1024, "slow-topic")
93
+ 10.times do
94
+ 5.times { group.fetch {|_, m| write.write "1:#{m.size}\n" }}
95
+ sleep(1)
96
+ end
97
+ end
98
+ pid2 = fork do
99
+ group = new_group(32*1024, "slow-topic")
100
+ 5.times do
101
+ 10.times { group.fetch {|_, m| write.write "2:#{m.size}\n" }}
102
+ sleep(1)
103
+ end
104
+ end
105
+ pid3 = fork do
106
+ group = new_group(8*1024, "slow-topic")
107
+ 5.times do
108
+ 50.times { group.fetch {|_, m| write.write "3:#{m.size}\n" }}
109
+ end
110
+ end
111
+ Process.wait(pid2)
112
+
113
+ pid4 = fork do
114
+ group = new_group(8*1024, "slow-topic")
115
+ 5.times do
116
+ 50.times { group.fetch {|_, m| write.write "4:#{m.size}\n" }}
117
+ end
118
+ end
119
+ pid5 = fork do
120
+ group = new_group(32*1024, "slow-topic")
121
+ 8.times do
122
+ 50.times { group.fetch {|_, m| write.write "5:#{m.size}\n" }}
123
+ sleep(2)
124
+ end
125
+ end
126
+ Process.wait(pid1)
127
+ Process.wait(pid3)
128
+ Process.wait(pid4)
129
+ Process.wait(pid5)
130
+ write.close
131
+ raw = read.read
132
+ read.close
133
+
134
+ stats = raw.lines.inject(Hash.new(0)) do |res, line|
135
+ pid, count = line.chomp.split(":")
136
+ res[pid.to_i] += count.to_i
137
+ res
138
+ end
139
+ stats.keys.size.should be_within(1).of(4)
140
+ stats.values.inject(0, :+).should == 100_000
141
+ end
142
+
143
+ end
144
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::Cluster do
4
+
5
+ it 'should generate incremented numbers (atomically)' do
6
+ num = described_class.inc!
7
+ (described_class.inc! - num).should == 1
8
+
9
+ (0...5).map do
10
+ Thread.new { 100.times { described_class.inc! }}
11
+ end.each &:join
12
+ (described_class.inc! - num).should == 502
13
+ end
14
+
15
+ end
@@ -0,0 +1,190 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::ConsumerGroup do
4
+
5
+ def new_group
6
+ group = described_class.new "my-group", ["localhost:29092", "localhost:29091"], ["localhost:22181"], TOPIC_NAME
7
+ groups.push(group)
8
+ group
9
+ end
10
+
11
+ def fetch_response(n)
12
+ set = Poseidon::MessageSet.new
13
+ n.times {|i| set << Poseidon::Message.new(value: "value", key: "key", offset: i) }
14
+ pfr = Poseidon::Protocol::PartitionFetchResponse.new(0, 0, 100, set)
15
+ tfr = Poseidon::Protocol::TopicFetchResponse.new(TOPIC_NAME, [pfr])
16
+ Poseidon::Protocol::FetchResponse.new(nil, [tfr])
17
+ end
18
+
19
+ let :groups do
20
+ []
21
+ end
22
+
23
+ let :brokers do
24
+ [ Poseidon::Protocol::Broker.new(1, "localhost", 29092), # id,host,port
25
+ Poseidon::Protocol::Broker.new(2, "localhost", 29091), ]
26
+ end
27
+
28
+ let :partitions do
29
+ [ Poseidon::Protocol::PartitionMetadata.new(0, 0, 1, [1,2], []), # err,id,leader,replicas,isr
30
+ Poseidon::Protocol::PartitionMetadata.new(0, 1, 2, [1,2], []), ]
31
+ end
32
+
33
+ let :topics do
34
+ [ Poseidon::TopicMetadata.new(Poseidon::Protocol::TopicMetadataStruct.new(0, TOPIC_NAME, partitions)) ]
35
+ end
36
+
37
+ let :metadata do
38
+ Poseidon::Protocol::MetadataResponse.new nil, brokers.dup, topics.dup
39
+ end
40
+
41
+ subject { new_group }
42
+ before do
43
+ Poseidon::BrokerPool.any_instance.stub(:fetch_metadata_from_broker).and_return(metadata)
44
+ Poseidon::Connection.any_instance.stub(:fetch).with{|_, _, req| req[0].partition_fetches[0].partition == 0 }.and_return(fetch_response(10))
45
+ Poseidon::Connection.any_instance.stub(:fetch).with{|_, _, req| req[0].partition_fetches[0].partition == 1 }.and_return(fetch_response(5))
46
+ end
47
+ after do
48
+ subject.zk.rm_rf "/consumers/#{subject.name}"
49
+ groups.each(&:close)
50
+ end
51
+
52
+ its(:name) { should == "my-group" }
53
+ its(:topic) { should == TOPIC_NAME }
54
+ its(:pool) { should be_instance_of(Poseidon::BrokerPool) }
55
+ its(:id) { should match(/\Amy-group\-[\w\-\.]+?\-\d{1,5}\-\d{10}\-\d{1,3}\z/) }
56
+ its(:zk) { should be_instance_of(ZK::Client::Threaded) }
57
+
58
+ its(:claimed) { should == [0, 1] }
59
+ its(:metadata) { should be_instance_of(Poseidon::ClusterMetadata) }
60
+ its(:topic_metadata) { should be_instance_of(Poseidon::TopicMetadata) }
61
+ its(:registries) { should == {
62
+ consumer: "/consumers/my-group/ids",
63
+ owner: "/consumers/my-group/owners/my-topic",
64
+ offset: "/consumers/my-group/offsets/my-topic",
65
+ }}
66
+
67
+ its("metadata.brokers.keys") { should =~ [1,2] }
68
+ its("topic_metadata.partition_count") { should == 2 }
69
+
70
+ it "should register with zookeeper" do
71
+ subject.zk.children("/consumers/my-group/ids").should include(subject.id)
72
+ stat = subject.zk.stat("/consumers/my-group/ids")
73
+ stat.ephemeral_owner.should be(0)
74
+
75
+ data, stat = subject.zk.get("/consumers/my-group/ids/#{subject.id}")
76
+ data.should == "{}"
77
+ stat.num_children.should == 0
78
+ stat.ephemeral_owner.should > 0
79
+ end
80
+
81
+ it "should sort partitions by leader address" do
82
+ subject.partitions.map(&:id).should == [1, 0]
83
+ end
84
+
85
+ it "should return the offset for each partition" do
86
+ subject.offset(0).should == 0
87
+ subject.offset(1).should == 0
88
+ subject.offset(2).should == 0
89
+ subject.fetch {|*| true }
90
+ subject.offset(0).should == 0
91
+ subject.offset(1).should == 5
92
+ subject.offset(2).should == 0
93
+ end
94
+
95
+ it "should return the leader for a partition" do
96
+ subject.leader(0).should == brokers[0]
97
+ subject.leader(1).should == brokers[1]
98
+ subject.leader(2).should be_nil
99
+ end
100
+
101
+ it "should checkout individual partition consumers (atomically)" do
102
+ subject.checkout {|c| c.partition.should == 1 }
103
+ subject.checkout {|c| c.partition.should == 0 }
104
+
105
+ n = 0
106
+ a = Thread.new do
107
+ 100.times { subject.checkout {|_| n+=1 } }
108
+ Thread.pass
109
+ 100.times { subject.checkout {|_| n+=1 } }
110
+ end
111
+ b = Thread.new do
112
+ 100.times { subject.checkout {|_| n+=1 } }
113
+ Thread.pass
114
+ 100.times { subject.checkout {|_| n+=1 } }
115
+ end
116
+ [a, b].each &:join
117
+ n.should == 400
118
+ end
119
+
120
+ describe "rebalance" do
121
+
122
+ it "should watch out for new consumers joining/leaving" do
123
+ subject.should_receive(:rebalance!).twice.and_call_original
124
+ new_group.should_receive(:rebalance!).once.and_call_original
125
+ new_group
126
+ end
127
+
128
+ it "should distribute available partitions between consumers" do
129
+ subject.claimed.should == [0, 1]
130
+
131
+ b = new_group
132
+ wait_for { subject.claimed.size > 0 }
133
+ wait_for { b.claimed.size > 0 }
134
+ subject.claimed.should == [1]
135
+ b.claimed.should == [0]
136
+
137
+ c = new_group
138
+ subject.claimed.should == [1]
139
+ b.claimed.should == [0]
140
+ c.claimed.should == []
141
+
142
+ b.close
143
+ wait_for { b.claimed.size < 0 }
144
+ wait_for { c.claimed.size > 0 }
145
+
146
+ subject.claimed.should == [1]
147
+ b.claimed.should == []
148
+ c.claimed.should == [0]
149
+ end
150
+
151
+ end
152
+
153
+ describe "fetch" do
154
+
155
+ it "should return messages from owned partitions" do
156
+ subject.fetch do |n, msg|
157
+ n.should == 1
158
+ msg.size.should == 5
159
+ end
160
+ subject.fetch do |n, msg|
161
+ n.should == 0
162
+ msg.size.should == 10
163
+ end
164
+ subject.fetch do |n, msg|
165
+ n.should == 1
166
+ msg.size.should == 5
167
+ end
168
+ end
169
+
170
+ it "should auto-commit fetched offset" do
171
+ -> {
172
+ subject.fetch {|n, _| n.should == 1 }
173
+ }.should change { subject.offset(1) }.from(0).to(5)
174
+ end
175
+
176
+ it "should skip auto-commits if requested" do
177
+ -> {
178
+ subject.fetch(commit: false) {|n, _| n.should == 1 }
179
+ }.should_not change { subject.offset(1) }
180
+ end
181
+
182
+ it "should skip auto-commits if block results in false" do
183
+ -> {
184
+ subject.fetch {|n, _| n.should == 1; false }
185
+ }.should_not change { subject.offset(1) }
186
+ end
187
+
188
+ end
189
+
190
+ end
@@ -0,0 +1,75 @@
1
+ require 'poseidon_cluster'
2
+ require 'rspec'
3
+ require 'fileutils'
4
+ require 'pathname'
5
+
6
+ TOPIC_NAME = "my-topic"
7
+ KAFKA_LOCAL = File.expand_path("../kafka_2.8.0-0.8.0", __FILE__)
8
+ KAFKA_ROOT = Pathname.new(ENV["KAFKA_ROOT"] || KAFKA_LOCAL)
9
+
10
+ module Poseidon::SpecHelper
11
+
12
+ def wait_for(&truth)
13
+ 100.times do
14
+ break if truth.call
15
+ sleep(0.01)
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ RSpec.configure do |c|
22
+ c.include Poseidon::SpecHelper
23
+ c.filter_run_excluding slow: true unless ENV["SLOW"] == "1"
24
+
25
+ c.before :suite do
26
+ kafka_bin = KAFKA_ROOT.join("bin", "kafka-server-start.sh")
27
+ kafka_cfg = KAFKA_ROOT.join("config", "server-poseidon.properties")
28
+ zookp_bin = KAFKA_ROOT.join("bin", "zookeeper-server-start.sh")
29
+ zookp_cfg = KAFKA_ROOT.join("config", "zookeeper-poseidon.properties")
30
+
31
+ if KAFKA_ROOT.to_s == KAFKA_LOCAL && !kafka_bin.file?
32
+ puts "---> Downloading Kafka"
33
+ target = Pathname.new(File.expand_path("../", __FILE__))
34
+ system("cd #{target} && curl http://www.us.apache.org/dist/kafka/0.8.0/kafka_2.8.0-0.8.0.tar.gz | tar xz") ||
35
+ raise("Unable to download Kafka")
36
+
37
+ kafka_cfg.open("w") do |f|
38
+ f.write KAFKA_ROOT.join("config", "server.properties").read.sub("=9092", "=29092").sub(":2181", ":22181").sub("/tmp/kafka-logs", "/tmp/kafka-logs-poseidon")
39
+ end
40
+ zookp_cfg.open("w") do |f|
41
+ f.write KAFKA_ROOT.join("config", "zookeeper.properties").read.sub("=2181", "=22181")
42
+ end
43
+ end
44
+
45
+ # Ensure all required files are present
46
+ [kafka_bin, zookp_bin, kafka_cfg, zookp_cfg].each do |path|
47
+ raise "Unable to locate #{path}. File does not exist!" unless path.file?
48
+ end
49
+
50
+ # Start Zookeeper & Kafka
51
+ $ZOOKP_PID = spawn zookp_bin.to_s, zookp_cfg.to_s, out: '/dev/null' # , err: '/dev/null'
52
+ $KAFKA_PID = spawn kafka_bin.to_s, kafka_cfg.to_s, out: '/dev/null' #, err: '/dev/null'
53
+
54
+ # Produce some fixtures
55
+ producer = Poseidon::Producer.new(["localhost:29092"], "my-producer")
56
+ payload = "data" * 10
57
+ messages = ("aa".."zz").map do |key|
58
+ Poseidon::MessageToSend.new(TOPIC_NAME, [key, payload].join(":"), key)
59
+ end
60
+
61
+ ok = false
62
+ 100.times do
63
+ break if (ok = producer.send_messages(messages))
64
+ sleep(0.1)
65
+ end
66
+ raise "Unable to start Kafka instance." unless ok
67
+ end
68
+
69
+ c.after :suite do
70
+ Process.kill :TERM, $KAFKA_PID if $KAFKA_PID
71
+ Process.kill :TERM, $ZOOKP_PID if $ZOOKP_PID
72
+ FileUtils.rm_rf "/tmp/kafka-logs-poseidon"
73
+ end
74
+
75
+ end
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: poseidon_cluster
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Black Square Media
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: poseidon
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: zk
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Cluster extensions for Poseidon, a producer and consumer implementation
98
+ for Kafka >= 0.8
99
+ email: info@blacksquaremedia.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".travis.yml"
106
+ - Gemfile
107
+ - Gemfile.lock
108
+ - README.md
109
+ - Rakefile
110
+ - lib/poseidon/cluster.rb
111
+ - lib/poseidon/consumer_group.rb
112
+ - lib/poseidon_cluster.rb
113
+ - poseidon_cluster.gemspec
114
+ - spec/integration/poseidon/consumer_group_spec.rb
115
+ - spec/lib/poseidon/cluster_spec.rb
116
+ - spec/lib/poseidon/consumer_group_spec.rb
117
+ - spec/spec_helper.rb
118
+ homepage: https://github.com/bsm/poseidon_cluster
119
+ licenses: []
120
+ metadata: {}
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: 1.9.1
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ version: 1.8.0
135
+ requirements: []
136
+ rubyforge_project:
137
+ rubygems_version: 2.2.0.rc.1
138
+ signing_key:
139
+ specification_version: 4
140
+ summary: Poseidon cluster extensions
141
+ test_files: []
142
+ has_rdoc: