promiscuous-poseidon_cluster 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1 @@
1
+ require 'poseidon/cluster'
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |s|
2
+ s.required_ruby_version = '>= 2.0.0'
3
+ s.required_rubygems_version = ">= 1.8.0"
4
+
5
+ s.name = File.basename(__FILE__, '.gemspec')
6
+ s.summary = "Poseidon cluster extensions"
7
+ s.description = "Cluster extensions for Poseidon, a producer and consumer implementation for Kafka >= 0.8"
8
+ s.version = "0.3.0"
9
+
10
+ s.authors = ["Black Square Media"]
11
+ s.email = "info@blacksquaremedia.com"
12
+ s.homepage = "https://github.com/promiscuous-io/poseidon_cluster"
13
+
14
+ s.require_path = 'lib'
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features,scenario}/*`.split("\n")
17
+
18
+ s.add_dependency "poseidon", ">= 0.0.5.pre1"
19
+ s.add_dependency "zk"
20
+
21
+ s.add_development_dependency "rake"
22
+ s.add_development_dependency "bundler"
23
+ s.add_development_dependency "rspec"
24
+ s.add_development_dependency "rspec-its"
25
+ s.add_development_dependency "yard"
26
+ s.add_development_dependency "coveralls"
27
+
28
+ end
@@ -0,0 +1 @@
1
+ output.txt
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bundler/setup'
3
+ require 'poseidon_cluster'
4
+
5
+ name = ARGV[0].to_s
6
+ output = File.open(ARGV[1], "a")
7
+ output.sync = true
8
+
9
+ total = 0
10
+ consumer = Poseidon::ConsumerGroup.new "my-group", ["localhost:29092"], ["localhost:22181"], "my-topic", max_bytes: 256*1024
11
+ consumer.fetch_loop do |n, messages|
12
+ break if name[0] > 'Q' && total > 0
13
+ messages.each do |m|
14
+ output.write "#{name},#{n},#{m.value}\n"
15
+ end
16
+ total += messages.size
17
+ end
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bundler/setup'
3
+ require 'poseidon'
4
+
5
+ limit, offset = ARGV[0].to_i, ARGV[1].to_i
6
+ producer = Poseidon::Producer.new ["localhost:29092"], "poseidon-producer"
7
+
8
+ while limit > 0 do
9
+ batch = limit > 10000 ? 10000 : limit
10
+ limit -= batch
11
+
12
+ messages = (0...batch).map do
13
+ num = offset.to_s.rjust(8, "0")
14
+ offset += 1
15
+ Poseidon::MessageToSend.new "my-topic", num, Time.now.to_s+num
16
+ end
17
+
18
+ 10.times do
19
+ ok = producer.send_messages messages
20
+ break if ok
21
+ sleep(1)
22
+ end
23
+ end
data/scenario/run.rb ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'timeout'
5
+ require File.expand_path("../scenario", __FILE__)
6
+
7
+ # Start Zookeeper & Kafka
8
+ Scenario.run do
9
+ 5.times do
10
+ produce 1000
11
+ end
12
+ consume "A"
13
+ consume "B"
14
+ consume "C"
15
+ checkpoint!
16
+
17
+ 15.times { produce 1000 }
18
+ consume "D"
19
+ 10.times { produce 1000 }
20
+ consume "X"
21
+ 10.times { produce 1000 }
22
+ checkpoint!
23
+
24
+ 20.times { produce 1000 }
25
+ consume "E"
26
+ consume "F"
27
+ 15.times { produce 1000 }
28
+ consume "Y"
29
+ 50.times { produce 100 }
30
+ 20.times { produce 1000 }
31
+
32
+ checkpoint!
33
+ end
34
+
35
+
@@ -0,0 +1,134 @@
1
+ require 'fileutils'
2
+ require 'pathname'
3
+
4
+ module Scenario
5
+ extend self
6
+
7
+ ROOT = Pathname.new(File.expand_path("../", __FILE__))
8
+ VERSION = "0.8.1.1"
9
+ SERVER = ROOT.join "kafka_2.10-#{VERSION}"
10
+
11
+ TOPIC_NAME = "my-topic"
12
+ KAFKA_BIN = SERVER.join("bin", "kafka-server-start.sh")
13
+ KAFKA_CFG = SERVER.join("config", "server-poseidon.properties")
14
+ KAFKA_TMP = "/tmp/kafka-logs-poseidon"
15
+ ZOOKP_BIN = SERVER.join("bin", "zookeeper-server-start.sh")
16
+ ZOOKP_CFG = SERVER.join("config", "zookeeper-poseidon.properties")
17
+ ZOOKP_TMP = "/tmp/zookeeper-poseidon"
18
+ LOG4J_CFG = SERVER.join("config", "log4j.properties")
19
+ OUTPUT = Scenario::ROOT.join("output.txt")
20
+
21
+ @@pids = {}
22
+ @@total = 0
23
+
24
+ def run(&block)
25
+ setup
26
+ instance_eval(&block)
27
+ rescue => e
28
+ abort [e, *e.backtrace[0,20]].join("\n")
29
+ ensure
30
+ teardown
31
+ end
32
+
33
+ def setup
34
+ FileUtils.rm_rf OUTPUT.to_s
35
+ configure
36
+
37
+ # Ensure all required files are present
38
+ [KAFKA_BIN, ZOOKP_BIN, KAFKA_CFG, ZOOKP_CFG].each do |path|
39
+ abort "Unable to locate #{path}. File does not exist!" unless path.file?
40
+ end
41
+
42
+ Signal.trap("INT") { teardown }
43
+
44
+ spawn KAFKA_BIN, KAFKA_CFG
45
+ spawn ZOOKP_BIN, ZOOKP_CFG
46
+ sleep(2)
47
+ end
48
+
49
+ def teardown
50
+ @@pids.each do |_, pid|
51
+ Process.kill :TERM, pid
52
+ end
53
+ sleep(1)
54
+ FileUtils.rm_rf KAFKA_TMP.to_s
55
+ FileUtils.rm_rf ZOOKP_TMP.to_s
56
+
57
+ fail! unless numlines == @@total
58
+ end
59
+
60
+ def configure
61
+ download
62
+
63
+ KAFKA_CFG.open("w") do |f|
64
+ f.write SERVER.join("config", "server.properties").read.
65
+ sub("=9092", "=29092").
66
+ sub(":2181", ":22181").
67
+ sub("num.partitions=2", "num.partitions=12").
68
+ sub("log.flush.interval.ms=1000", "log.flush.interval.ms=10").
69
+ sub("/tmp/kafka-logs", KAFKA_TMP)
70
+ end
71
+ ZOOKP_CFG.open("w") do |f|
72
+ f.write SERVER.join("config", "zookeeper.properties").read.
73
+ sub("/tmp/zookeeper", ZOOKP_TMP).
74
+ sub("=2181", "=22181")
75
+ end
76
+ content = LOG4J_CFG.read
77
+ LOG4J_CFG.open("w") do |f|
78
+ f.write content.gsub("INFO", "FATAL")
79
+ end if content.include?("INFO")
80
+ end
81
+
82
+ def download
83
+ return if SERVER.directory?
84
+ sh "cd #{ROOT} && curl http://www.mirrorservice.org/sites/ftp.apache.org/kafka/#{VERSION}/kafka_2.10-#{VERSION}.tgz | tar xz"
85
+ end
86
+
87
+ def checkpoint!(timeout = 10)
88
+ puts "--> Verifying #{@@total}"
89
+ timeout.times do
90
+ if numlines > @@total
91
+ break
92
+ elsif numlines < @@total
93
+ sleep(1)
94
+ else
95
+ return
96
+ end
97
+ end
98
+ fail!
99
+ end
100
+
101
+ def consume(name)
102
+ puts "--> Launching consumer #{name}"
103
+ spawn ROOT.join("consumer.rb"), name, OUTPUT
104
+ end
105
+
106
+ def produce(count)
107
+ puts "--> Producing messages #{@@total}-#{@@total+count-1}"
108
+ sh ROOT.join("producer.rb"), count, @@total
109
+ @@total += count
110
+ end
111
+
112
+ def numlines
113
+ `wc -l #{OUTPUT} 2> /dev/null`.to_i
114
+ end
115
+
116
+ def abort(message)
117
+ Kernel.abort "ERROR: #{message}"
118
+ end
119
+
120
+ def fail!
121
+ Kernel.abort "FAILED: expected #{@@total} but was #{numlines}"
122
+ end
123
+
124
+ def sh(*bits)
125
+ cmd = bits.join(" ")
126
+ system(cmd) || abort(cmd)
127
+ end
128
+
129
+ def spawn(*args)
130
+ cmd = args.join(" ")
131
+ @@pids[cmd] = Process.spawn(cmd)
132
+ end
133
+
134
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::Cluster do
4
+
5
+ it 'should generate incremented numbers (atomically)' do
6
+ num = described_class.inc!
7
+ (described_class.inc! - num).should == 1
8
+
9
+ (0...5).map do
10
+ Thread.new { 100.times { described_class.inc! }}
11
+ end.each &:join
12
+ (described_class.inc! - num).should == 502
13
+ end
14
+
15
+ it 'should generate GUIDs' do
16
+ described_class.guid.should match(/\A[\w\-\.]+?\-\d{1,5}\-\d{10}\-\d{1,3}\z/)
17
+ end
18
+
19
+ end
@@ -0,0 +1,286 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::ConsumerGroup do
4
+
5
+ def fetch_response(n)
6
+ set = Poseidon::MessageSet.new
7
+ n.times {|i| set << Poseidon::Message.new(value: "value", key: "key", offset: i) }
8
+ pfr = Poseidon::Protocol::PartitionFetchResponse.new(0, 0, 100, set)
9
+ tfr = Poseidon::Protocol::TopicFetchResponse.new("mytopic", [pfr])
10
+ Poseidon::Protocol::FetchResponse.new(nil, [tfr])
11
+ end
12
+
13
+ let :brokers do
14
+ [ Poseidon::Protocol::Broker.new(1, "localhost", 29092), # id,host,port
15
+ Poseidon::Protocol::Broker.new(2, "localhost", 29091), ]
16
+ end
17
+
18
+ let :partitions do
19
+ [ Poseidon::Protocol::PartitionMetadata.new(0, 0, 1, [1,2], []), # err,id,leader,replicas,isr
20
+ Poseidon::Protocol::PartitionMetadata.new(0, 1, 2, [1,2], []), ]
21
+ end
22
+
23
+ let :topics do
24
+ [ Poseidon::TopicMetadata.new(Poseidon::Protocol::TopicMetadataStruct.new(0, "mytopic", partitions)) ]
25
+ end
26
+
27
+ let :metadata do
28
+ Poseidon::Protocol::MetadataResponse.new nil, brokers.dup, topics.dup
29
+ end
30
+
31
+ let :zk_client do
32
+ double "ZK", mkdir_p: nil, get: nil, set: nil, delete: nil, create: "/path", register: nil, children: ["my-group-UNIQUEID"], close: nil
33
+ end
34
+
35
+ let(:group) { described_class.new "my-group", ["localhost:29092", "localhost:29091"], ["localhost:22181"], "mytopic" }
36
+ subject { group }
37
+
38
+ before do
39
+ allow(ZK).to receive_messages(new: zk_client)
40
+ allow(Poseidon::Cluster).to receive_messages(guid: "UNIQUEID")
41
+ allow_any_instance_of(Poseidon::ConsumerGroup).to receive(:sleep)
42
+ allow_any_instance_of(Poseidon::PartitionConsumer).to receive_messages(resolve_offset_if_necessary: 0)
43
+ allow_any_instance_of(Poseidon::BrokerPool).to receive_messages(fetch_metadata_from_broker: metadata)
44
+
45
+ allow_any_instance_of(Poseidon::Connection).to receive(:fetch).with(10000, 1, ->req { req[0].partition_fetches[0].partition == 0 }).and_return(fetch_response(10))
46
+ allow_any_instance_of(Poseidon::Connection).to receive(:fetch).with(10000, 1, ->req { req[0].partition_fetches[0].partition == 1 }).and_return(fetch_response(5))
47
+ end
48
+
49
+ it { should be_registered }
50
+ its(:name) { should == "my-group" }
51
+ its(:topic) { should == "mytopic" }
52
+ its(:pool) { should be_instance_of(Poseidon::BrokerPool) }
53
+ its(:id) { should == "my-group-UNIQUEID" }
54
+ its(:zk) { should be(zk_client) }
55
+
56
+ its(:claimed) { should == [0, 1] }
57
+ its(:metadata) { should be_instance_of(Poseidon::ClusterMetadata) }
58
+ its(:topic_metadata) { should be_instance_of(Poseidon::TopicMetadata) }
59
+ its(:registries) { should == {
60
+ consumer: "/consumers/my-group/ids",
61
+ owner: "/consumers/my-group/owners/mytopic",
62
+ offset: "/consumers/my-group/offsets/mytopic",
63
+ }}
64
+
65
+ its("metadata.brokers.keys") { should =~ [1,2] }
66
+ its("topic_metadata.partition_count") { should == 2 }
67
+
68
+ it "should register with zookeeper and rebalance" do
69
+ zk_client.should_receive(:mkdir_p).with("/consumers/my-group/ids")
70
+ zk_client.should_receive(:mkdir_p).with("/consumers/my-group/owners/mytopic")
71
+ zk_client.should_receive(:mkdir_p).with("/consumers/my-group/offsets/mytopic")
72
+ zk_client.should_receive(:create).with("/consumers/my-group/ids/my-group-UNIQUEID", "{}", ephemeral: true)
73
+ zk_client.should_receive(:register).with("/consumers/my-group/ids")
74
+ described_class.any_instance.should_receive :rebalance!
75
+
76
+ subject
77
+ end
78
+
79
+ it "should sort partitions by leader address" do
80
+ subject.partitions.map(&:id).should == [1, 0]
81
+ end
82
+
83
+ it "should not fail if topic doesn't exist" do
84
+ no_topics = Poseidon::Protocol::MetadataResponse.new nil, brokers.dup, []
85
+ Poseidon::BrokerPool.any_instance.stub(:fetch_metadata_from_broker).and_return(no_topics)
86
+
87
+ subject.partitions.should == []
88
+ subject.claimed.should == []
89
+ end
90
+
91
+ it "should return the offset for each partition" do
92
+ zk_client.should_receive(:get).with("/consumers/my-group/offsets/mytopic/0", ignore: :no_node).and_return([nil])
93
+ subject.offset(0).should == 0
94
+
95
+ zk_client.should_receive(:get).with("/consumers/my-group/offsets/mytopic/1", ignore: :no_node).and_return(["21", nil])
96
+ subject.offset(1).should == 21
97
+
98
+ zk_client.should_receive(:get).with("/consumers/my-group/offsets/mytopic/2", ignore: :no_node).and_return(["0", nil])
99
+ subject.offset(2).should == 0
100
+ end
101
+
102
+ it "should return the leader for a partition" do
103
+ subject.leader(0).should == brokers[0]
104
+ subject.leader(1).should == brokers[1]
105
+ subject.leader(2).should be_nil
106
+ end
107
+
108
+ it "should checkout individual partition consumers (atomically)" do
109
+ subject.checkout {|c| c.partition.should == 1 }.should be_truthy
110
+ subject.checkout {|c| c.partition.should == 0 }.should be_truthy
111
+
112
+ n = 0
113
+ a = Thread.new do
114
+ 100.times { subject.checkout {|_| n+=1 } }
115
+ Thread.pass
116
+ 100.times { subject.checkout {|_| n+=1 } }
117
+ end
118
+ b = Thread.new do
119
+ 100.times { subject.checkout {|_| n+=1 } }
120
+ Thread.pass
121
+ 100.times { subject.checkout {|_| n+=1 } }
122
+ end
123
+ [a, b].each &:join
124
+ n.should == 400
125
+ end
126
+
127
+ describe "consumer" do
128
+ subject { described_class::Consumer.new group, 1 }
129
+ before { group.stub(:offset).with(1).and_return(432) }
130
+
131
+ it { should be_a(Poseidon::PartitionConsumer) }
132
+ its(:offset) { should == 432 }
133
+
134
+ it 'should start with the earliest offset if none stored' do
135
+ group.unstub(:offset)
136
+ subject.offset.should == :earliest_offset
137
+ end
138
+
139
+ it 'should start with the latest offset if none stored and in trailing mode' do
140
+ group.unstub(:offset)
141
+ trailing_consumer = described_class::Consumer.new group, 1, {trail: true}
142
+ trailing_consumer.offset.should == :latest_offset
143
+ end
144
+
145
+ end
146
+
147
+ describe "rebalance" do
148
+
149
+ it "should watch out for new consumers joining/leaving" do
150
+ described_class.any_instance.should_receive(:rebalance!)
151
+ subject
152
+ end
153
+
154
+ it "should distribute available partitions between consumers" do
155
+ subject.claimed.should == [0, 1]
156
+ zk_client.stub children: ["my-group-UNIQUEID", "my-group-OTHERID"]
157
+ -> { subject.send :rebalance! }.should change { subject.claimed }.to([0])
158
+ zk_client.stub children: ["my-group-UNIQUEID", "my-group-OTHERID", "my-group-THIRDID"]
159
+ -> { subject.send :rebalance! }.should change { subject.claimed }.to([])
160
+ end
161
+
162
+ it "should allocate partitions correctly" do
163
+ subject.claimed.should == [0, 1]
164
+
165
+ zk_client.stub children: ["my-group-UNIQUEID", "my-group-ZID"]
166
+ zk_client.should_receive(:delete).with("/consumers/my-group/owners/mytopic/1", ignore: :no_node)
167
+ -> { subject.send :rebalance! }.should change { subject.claimed }.to([1])
168
+
169
+ zk_client.stub children: ["my-group-UNIQUEID", "my-group-ZID", "my-group-AID"]
170
+ -> { subject.send :rebalance! }.should change { subject.claimed }.to([0])
171
+ end
172
+
173
+ end
174
+
175
+ describe "fetch" do
176
+
177
+ it "should return messages from claimed partitions" do
178
+ subject.fetch do |n, msg|
179
+ n.should == 1
180
+ msg.size.should == 5
181
+ end.should be_truthy
182
+
183
+ subject.fetch do |n, msg|
184
+ n.should == 0
185
+ msg.size.should == 10
186
+ end.should be_truthy
187
+
188
+ subject.fetch do |n, msg|
189
+ n.should == 1
190
+ msg.size.should == 5
191
+ end.should be_truthy
192
+ end
193
+
194
+ it "should auto-commit fetched offset" do
195
+ zk_client.should_receive(:set).with("/consumers/my-group/offsets/mytopic/1", "5")
196
+ subject.fetch {|n, _| n.should == 1 }
197
+ end
198
+
199
+ it "should skip auto-commits if requested" do
200
+ zk_client.should_not_receive(:set)
201
+ subject.fetch(commit: false) {|n, _| n.should == 1 }
202
+ end
203
+
204
+ it "should skip auto-commits if block results in false" do
205
+ zk_client.should_not_receive(:set)
206
+ subject.fetch {|n, _| n.should == 1; false }
207
+ end
208
+
209
+ it "should return false when trying to fetch messages without a claim" do
210
+ no_topics = Poseidon::Protocol::MetadataResponse.new nil, brokers.dup, []
211
+ Poseidon::BrokerPool.any_instance.stub fetch_metadata_from_broker: no_topics
212
+
213
+ subject.claimed.should == []
214
+ subject.fetch {|*| }.should be_falsey
215
+ end
216
+
217
+ it "should return true even when no messages were fetched" do
218
+ Poseidon::Connection.any_instance.stub fetch: fetch_response(0)
219
+ subject.fetch {|*| }.should be_truthy
220
+ end
221
+
222
+ end
223
+
224
+ describe "fetch_loop" do
225
+
226
+ it "should fetch indefinitely" do
227
+ total, cycles = 0, 0
228
+ subject.fetch_loop do |_, m|
229
+ total += m.size
230
+ break if (cycles+=1) > 2
231
+ end
232
+ total.should == 20
233
+ cycles.should == 3
234
+ end
235
+
236
+ it "should delay fetch was unsuccessful" do
237
+ subject.stub fetch: false
238
+
239
+ cycles = 0
240
+ subject.should_receive(:sleep).with(1)
241
+ subject.fetch_loop do |n, m|
242
+ n.should == -1
243
+ m.should == []
244
+ break if (cycles+=1) > 1
245
+ end
246
+ end
247
+
248
+ it "should delay fetch didn't yield any results" do
249
+ subject.stub(:fetch).and_yield(3, []).and_return(true)
250
+
251
+ cycles = 0
252
+ subject.should_receive(:sleep).with(1)
253
+ subject.fetch_loop do |n, m|
254
+ n.should == 3
255
+ m.should == []
256
+ break if (cycles+=1) > 1
257
+ end
258
+ end
259
+
260
+ end
261
+
262
+ describe "pick" do
263
+
264
+ { [3, ["N1", "N2", "N3"], "N1"] => (0..0),
265
+ [3, ["N1", "N2", "N3"], "N2"] => (1..1),
266
+ [3, ["N1", "N2", "N3"], "N3"] => (2..2),
267
+ [4, ["N2", "N4", "N3", "N1"], "N3"] => (2..2),
268
+ [3, ["N1", "N2", "N3"], "N4"] => nil,
269
+ [5, ["N1", "N2", "N3"], "N1"] => (0..1),
270
+ [5, ["N1", "N2", "N3"], "N2"] => (2..3),
271
+ [5, ["N1", "N2", "N3"], "N3"] => (4..4),
272
+ [5, ["N1", "N2", "N3"], "N4"] => nil,
273
+ [2, ["N1", "N2"], "N9"] => nil,
274
+ [1, ["N1", "N2", "N3"], "N1"] => (0..0),
275
+ [1, ["N1", "N2", "N3"], "N2"] => nil,
276
+ [1, ["N1", "N2", "N3"], "N3"] => nil,
277
+ [5, ["N1", "N2"], "N1"] => (0..2),
278
+ [5, ["N1", "N2"], "N2"] => (3..4),
279
+ }.each do |args, expected|
280
+ it "should pick #{expected.inspect} from #{args.inspect}" do
281
+ described_class.pick(*args).should == expected
282
+ end
283
+ end
284
+
285
+ end
286
+ end