promiscuous-poseidon_cluster 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ require 'poseidon/cluster'
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |s|
2
+ s.required_ruby_version = '>= 2.0.0'
3
+ s.required_rubygems_version = ">= 1.8.0"
4
+
5
+ s.name = File.basename(__FILE__, '.gemspec')
6
+ s.summary = "Poseidon cluster extensions"
7
+ s.description = "Cluster extensions for Poseidon, a producer and consumer implementation for Kafka >= 0.8"
8
+ s.version = "0.3.0"
9
+
10
+ s.authors = ["Black Square Media"]
11
+ s.email = "info@blacksquaremedia.com"
12
+ s.homepage = "https://github.com/promiscuous-io/poseidon_cluster"
13
+
14
+ s.require_path = 'lib'
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features,scenario}/*`.split("\n")
17
+
18
+ s.add_dependency "poseidon", ">= 0.0.5.pre1"
19
+ s.add_dependency "zk"
20
+
21
+ s.add_development_dependency "rake"
22
+ s.add_development_dependency "bundler"
23
+ s.add_development_dependency "rspec"
24
+ s.add_development_dependency "rspec-its"
25
+ s.add_development_dependency "yard"
26
+ s.add_development_dependency "coveralls"
27
+
28
+ end
@@ -0,0 +1 @@
1
+ output.txt
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bundler/setup'
3
+ require 'poseidon_cluster'
4
+
5
+ name = ARGV[0].to_s
6
+ output = File.open(ARGV[1], "a")
7
+ output.sync = true
8
+
9
+ total = 0
10
+ consumer = Poseidon::ConsumerGroup.new "my-group", ["localhost:29092"], ["localhost:22181"], "my-topic", max_bytes: 256*1024
11
+ consumer.fetch_loop do |n, messages|
12
+ break if name[0] > 'Q' && total > 0
13
+ messages.each do |m|
14
+ output.write "#{name},#{n},#{m.value}\n"
15
+ end
16
+ total += messages.size
17
+ end
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bundler/setup'
3
+ require 'poseidon'
4
+
5
+ limit, offset = ARGV[0].to_i, ARGV[1].to_i
6
+ producer = Poseidon::Producer.new ["localhost:29092"], "poseidon-producer"
7
+
8
+ while limit > 0 do
9
+ batch = limit > 10000 ? 10000 : limit
10
+ limit -= batch
11
+
12
+ messages = (0...batch).map do
13
+ num = offset.to_s.rjust(8, "0")
14
+ offset += 1
15
+ Poseidon::MessageToSend.new "my-topic", num, Time.now.to_s+num
16
+ end
17
+
18
+ 10.times do
19
+ ok = producer.send_messages messages
20
+ break if ok
21
+ sleep(1)
22
+ end
23
+ end
data/scenario/run.rb ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'timeout'
5
+ require File.expand_path("../scenario", __FILE__)
6
+
7
+ # Start Zookeeper & Kafka
8
+ Scenario.run do
9
+ 5.times do
10
+ produce 1000
11
+ end
12
+ consume "A"
13
+ consume "B"
14
+ consume "C"
15
+ checkpoint!
16
+
17
+ 15.times { produce 1000 }
18
+ consume "D"
19
+ 10.times { produce 1000 }
20
+ consume "X"
21
+ 10.times { produce 1000 }
22
+ checkpoint!
23
+
24
+ 20.times { produce 1000 }
25
+ consume "E"
26
+ consume "F"
27
+ 15.times { produce 1000 }
28
+ consume "Y"
29
+ 50.times { produce 100 }
30
+ 20.times { produce 1000 }
31
+
32
+ checkpoint!
33
+ end
34
+
35
+
@@ -0,0 +1,134 @@
1
+ require 'fileutils'
2
+ require 'pathname'
3
+
4
+ module Scenario
5
+ extend self
6
+
7
+ ROOT = Pathname.new(File.expand_path("../", __FILE__))
8
+ VERSION = "0.8.1.1"
9
+ SERVER = ROOT.join "kafka_2.10-#{VERSION}"
10
+
11
+ TOPIC_NAME = "my-topic"
12
+ KAFKA_BIN = SERVER.join("bin", "kafka-server-start.sh")
13
+ KAFKA_CFG = SERVER.join("config", "server-poseidon.properties")
14
+ KAFKA_TMP = "/tmp/kafka-logs-poseidon"
15
+ ZOOKP_BIN = SERVER.join("bin", "zookeeper-server-start.sh")
16
+ ZOOKP_CFG = SERVER.join("config", "zookeeper-poseidon.properties")
17
+ ZOOKP_TMP = "/tmp/zookeeper-poseidon"
18
+ LOG4J_CFG = SERVER.join("config", "log4j.properties")
19
+ OUTPUT = Scenario::ROOT.join("output.txt")
20
+
21
+ @@pids = {}
22
+ @@total = 0
23
+
24
+ def run(&block)
25
+ setup
26
+ instance_eval(&block)
27
+ rescue => e
28
+ abort [e, *e.backtrace[0,20]].join("\n")
29
+ ensure
30
+ teardown
31
+ end
32
+
33
+ def setup
34
+ FileUtils.rm_rf OUTPUT.to_s
35
+ configure
36
+
37
+ # Ensure all required files are present
38
+ [KAFKA_BIN, ZOOKP_BIN, KAFKA_CFG, ZOOKP_CFG].each do |path|
39
+ abort "Unable to locate #{path}. File does not exist!" unless path.file?
40
+ end
41
+
42
+ Signal.trap("INT") { teardown }
43
+
44
+ spawn KAFKA_BIN, KAFKA_CFG
45
+ spawn ZOOKP_BIN, ZOOKP_CFG
46
+ sleep(2)
47
+ end
48
+
49
+ def teardown
50
+ @@pids.each do |_, pid|
51
+ Process.kill :TERM, pid
52
+ end
53
+ sleep(1)
54
+ FileUtils.rm_rf KAFKA_TMP.to_s
55
+ FileUtils.rm_rf ZOOKP_TMP.to_s
56
+
57
+ fail! unless numlines == @@total
58
+ end
59
+
60
+ def configure
61
+ download
62
+
63
+ KAFKA_CFG.open("w") do |f|
64
+ f.write SERVER.join("config", "server.properties").read.
65
+ sub("=9092", "=29092").
66
+ sub(":2181", ":22181").
67
+ sub("num.partitions=2", "num.partitions=12").
68
+ sub("log.flush.interval.ms=1000", "log.flush.interval.ms=10").
69
+ sub("/tmp/kafka-logs", KAFKA_TMP)
70
+ end
71
+ ZOOKP_CFG.open("w") do |f|
72
+ f.write SERVER.join("config", "zookeeper.properties").read.
73
+ sub("/tmp/zookeeper", ZOOKP_TMP).
74
+ sub("=2181", "=22181")
75
+ end
76
+ content = LOG4J_CFG.read
77
+ LOG4J_CFG.open("w") do |f|
78
+ f.write content.gsub("INFO", "FATAL")
79
+ end if content.include?("INFO")
80
+ end
81
+
82
+ def download
83
+ return if SERVER.directory?
84
+ sh "cd #{ROOT} && curl http://www.mirrorservice.org/sites/ftp.apache.org/kafka/#{VERSION}/kafka_2.10-#{VERSION}.tgz | tar xz"
85
+ end
86
+
87
+ def checkpoint!(timeout = 10)
88
+ puts "--> Verifying #{@@total}"
89
+ timeout.times do
90
+ if numlines > @@total
91
+ break
92
+ elsif numlines < @@total
93
+ sleep(1)
94
+ else
95
+ return
96
+ end
97
+ end
98
+ fail!
99
+ end
100
+
101
+ def consume(name)
102
+ puts "--> Launching consumer #{name}"
103
+ spawn ROOT.join("consumer.rb"), name, OUTPUT
104
+ end
105
+
106
+ def produce(count)
107
+ puts "--> Producing messages #{@@total}-#{@@total+count-1}"
108
+ sh ROOT.join("producer.rb"), count, @@total
109
+ @@total += count
110
+ end
111
+
112
+ def numlines
113
+ `wc -l #{OUTPUT} 2> /dev/null`.to_i
114
+ end
115
+
116
+ def abort(message)
117
+ Kernel.abort "ERROR: #{message}"
118
+ end
119
+
120
+ def fail!
121
+ Kernel.abort "FAILED: expected #{@@total} but was #{numlines}"
122
+ end
123
+
124
+ def sh(*bits)
125
+ cmd = bits.join(" ")
126
+ system(cmd) || abort(cmd)
127
+ end
128
+
129
+ def spawn(*args)
130
+ cmd = args.join(" ")
131
+ @@pids[cmd] = Process.spawn(cmd)
132
+ end
133
+
134
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::Cluster do
4
+
5
+ it 'should generate incremented numbers (atomically)' do
6
+ num = described_class.inc!
7
+ (described_class.inc! - num).should == 1
8
+
9
+ (0...5).map do
10
+ Thread.new { 100.times { described_class.inc! }}
11
+ end.each &:join
12
+ (described_class.inc! - num).should == 502
13
+ end
14
+
15
+ it 'should generate GUIDs' do
16
+ described_class.guid.should match(/\A[\w\-\.]+?\-\d{1,5}\-\d{10}\-\d{1,3}\z/)
17
+ end
18
+
19
+ end
@@ -0,0 +1,286 @@
1
+ require 'spec_helper'
2
+
3
+ describe Poseidon::ConsumerGroup do
4
+
5
+ def fetch_response(n)
6
+ set = Poseidon::MessageSet.new
7
+ n.times {|i| set << Poseidon::Message.new(value: "value", key: "key", offset: i) }
8
+ pfr = Poseidon::Protocol::PartitionFetchResponse.new(0, 0, 100, set)
9
+ tfr = Poseidon::Protocol::TopicFetchResponse.new("mytopic", [pfr])
10
+ Poseidon::Protocol::FetchResponse.new(nil, [tfr])
11
+ end
12
+
13
+ let :brokers do
14
+ [ Poseidon::Protocol::Broker.new(1, "localhost", 29092), # id,host,port
15
+ Poseidon::Protocol::Broker.new(2, "localhost", 29091), ]
16
+ end
17
+
18
+ let :partitions do
19
+ [ Poseidon::Protocol::PartitionMetadata.new(0, 0, 1, [1,2], []), # err,id,leader,replicas,isr
20
+ Poseidon::Protocol::PartitionMetadata.new(0, 1, 2, [1,2], []), ]
21
+ end
22
+
23
+ let :topics do
24
+ [ Poseidon::TopicMetadata.new(Poseidon::Protocol::TopicMetadataStruct.new(0, "mytopic", partitions)) ]
25
+ end
26
+
27
+ let :metadata do
28
+ Poseidon::Protocol::MetadataResponse.new nil, brokers.dup, topics.dup
29
+ end
30
+
31
+ let :zk_client do
32
+ double "ZK", mkdir_p: nil, get: nil, set: nil, delete: nil, create: "/path", register: nil, children: ["my-group-UNIQUEID"], close: nil
33
+ end
34
+
35
+ let(:group) { described_class.new "my-group", ["localhost:29092", "localhost:29091"], ["localhost:22181"], "mytopic" }
36
+ subject { group }
37
+
38
+ before do
39
+ allow(ZK).to receive_messages(new: zk_client)
40
+ allow(Poseidon::Cluster).to receive_messages(guid: "UNIQUEID")
41
+ allow_any_instance_of(Poseidon::ConsumerGroup).to receive(:sleep)
42
+ allow_any_instance_of(Poseidon::PartitionConsumer).to receive_messages(resolve_offset_if_necessary: 0)
43
+ allow_any_instance_of(Poseidon::BrokerPool).to receive_messages(fetch_metadata_from_broker: metadata)
44
+
45
+ allow_any_instance_of(Poseidon::Connection).to receive(:fetch).with(10000, 1, ->req { req[0].partition_fetches[0].partition == 0 }).and_return(fetch_response(10))
46
+ allow_any_instance_of(Poseidon::Connection).to receive(:fetch).with(10000, 1, ->req { req[0].partition_fetches[0].partition == 1 }).and_return(fetch_response(5))
47
+ end
48
+
49
+ it { should be_registered }
50
+ its(:name) { should == "my-group" }
51
+ its(:topic) { should == "mytopic" }
52
+ its(:pool) { should be_instance_of(Poseidon::BrokerPool) }
53
+ its(:id) { should == "my-group-UNIQUEID" }
54
+ its(:zk) { should be(zk_client) }
55
+
56
+ its(:claimed) { should == [0, 1] }
57
+ its(:metadata) { should be_instance_of(Poseidon::ClusterMetadata) }
58
+ its(:topic_metadata) { should be_instance_of(Poseidon::TopicMetadata) }
59
+ its(:registries) { should == {
60
+ consumer: "/consumers/my-group/ids",
61
+ owner: "/consumers/my-group/owners/mytopic",
62
+ offset: "/consumers/my-group/offsets/mytopic",
63
+ }}
64
+
65
+ its("metadata.brokers.keys") { should =~ [1,2] }
66
+ its("topic_metadata.partition_count") { should == 2 }
67
+
68
+ it "should register with zookeeper and rebalance" do
69
+ zk_client.should_receive(:mkdir_p).with("/consumers/my-group/ids")
70
+ zk_client.should_receive(:mkdir_p).with("/consumers/my-group/owners/mytopic")
71
+ zk_client.should_receive(:mkdir_p).with("/consumers/my-group/offsets/mytopic")
72
+ zk_client.should_receive(:create).with("/consumers/my-group/ids/my-group-UNIQUEID", "{}", ephemeral: true)
73
+ zk_client.should_receive(:register).with("/consumers/my-group/ids")
74
+ described_class.any_instance.should_receive :rebalance!
75
+
76
+ subject
77
+ end
78
+
79
+ it "should sort partitions by leader address" do
80
+ subject.partitions.map(&:id).should == [1, 0]
81
+ end
82
+
83
+ it "should not fail if topic doesn't exist" do
84
+ no_topics = Poseidon::Protocol::MetadataResponse.new nil, brokers.dup, []
85
+ Poseidon::BrokerPool.any_instance.stub(:fetch_metadata_from_broker).and_return(no_topics)
86
+
87
+ subject.partitions.should == []
88
+ subject.claimed.should == []
89
+ end
90
+
91
+ it "should return the offset for each partition" do
92
+ zk_client.should_receive(:get).with("/consumers/my-group/offsets/mytopic/0", ignore: :no_node).and_return([nil])
93
+ subject.offset(0).should == 0
94
+
95
+ zk_client.should_receive(:get).with("/consumers/my-group/offsets/mytopic/1", ignore: :no_node).and_return(["21", nil])
96
+ subject.offset(1).should == 21
97
+
98
+ zk_client.should_receive(:get).with("/consumers/my-group/offsets/mytopic/2", ignore: :no_node).and_return(["0", nil])
99
+ subject.offset(2).should == 0
100
+ end
101
+
102
+ it "should return the leader for a partition" do
103
+ subject.leader(0).should == brokers[0]
104
+ subject.leader(1).should == brokers[1]
105
+ subject.leader(2).should be_nil
106
+ end
107
+
108
+ it "should checkout individual partition consumers (atomically)" do
109
+ subject.checkout {|c| c.partition.should == 1 }.should be_truthy
110
+ subject.checkout {|c| c.partition.should == 0 }.should be_truthy
111
+
112
+ n = 0
113
+ a = Thread.new do
114
+ 100.times { subject.checkout {|_| n+=1 } }
115
+ Thread.pass
116
+ 100.times { subject.checkout {|_| n+=1 } }
117
+ end
118
+ b = Thread.new do
119
+ 100.times { subject.checkout {|_| n+=1 } }
120
+ Thread.pass
121
+ 100.times { subject.checkout {|_| n+=1 } }
122
+ end
123
+ [a, b].each &:join
124
+ n.should == 400
125
+ end
126
+
127
+ describe "consumer" do
128
+ subject { described_class::Consumer.new group, 1 }
129
+ before { group.stub(:offset).with(1).and_return(432) }
130
+
131
+ it { should be_a(Poseidon::PartitionConsumer) }
132
+ its(:offset) { should == 432 }
133
+
134
+ it 'should start with the earliest offset if none stored' do
135
+ group.unstub(:offset)
136
+ subject.offset.should == :earliest_offset
137
+ end
138
+
139
+ it 'should start with the latest offset if none stored and in trailing mode' do
140
+ group.unstub(:offset)
141
+ trailing_consumer = described_class::Consumer.new group, 1, {trail: true}
142
+ trailing_consumer.offset.should == :latest_offset
143
+ end
144
+
145
+ end
146
+
147
+ describe "rebalance" do
148
+
149
+ it "should watch out for new consumers joining/leaving" do
150
+ described_class.any_instance.should_receive(:rebalance!)
151
+ subject
152
+ end
153
+
154
+ it "should distribute available partitions between consumers" do
155
+ subject.claimed.should == [0, 1]
156
+ zk_client.stub children: ["my-group-UNIQUEID", "my-group-OTHERID"]
157
+ -> { subject.send :rebalance! }.should change { subject.claimed }.to([0])
158
+ zk_client.stub children: ["my-group-UNIQUEID", "my-group-OTHERID", "my-group-THIRDID"]
159
+ -> { subject.send :rebalance! }.should change { subject.claimed }.to([])
160
+ end
161
+
162
+ it "should allocate partitions correctly" do
163
+ subject.claimed.should == [0, 1]
164
+
165
+ zk_client.stub children: ["my-group-UNIQUEID", "my-group-ZID"]
166
+ zk_client.should_receive(:delete).with("/consumers/my-group/owners/mytopic/1", ignore: :no_node)
167
+ -> { subject.send :rebalance! }.should change { subject.claimed }.to([1])
168
+
169
+ zk_client.stub children: ["my-group-UNIQUEID", "my-group-ZID", "my-group-AID"]
170
+ -> { subject.send :rebalance! }.should change { subject.claimed }.to([0])
171
+ end
172
+
173
+ end
174
+
175
+ describe "fetch" do
176
+
177
+ it "should return messages from claimed partitions" do
178
+ subject.fetch do |n, msg|
179
+ n.should == 1
180
+ msg.size.should == 5
181
+ end.should be_truthy
182
+
183
+ subject.fetch do |n, msg|
184
+ n.should == 0
185
+ msg.size.should == 10
186
+ end.should be_truthy
187
+
188
+ subject.fetch do |n, msg|
189
+ n.should == 1
190
+ msg.size.should == 5
191
+ end.should be_truthy
192
+ end
193
+
194
+ it "should auto-commit fetched offset" do
195
+ zk_client.should_receive(:set).with("/consumers/my-group/offsets/mytopic/1", "5")
196
+ subject.fetch {|n, _| n.should == 1 }
197
+ end
198
+
199
+ it "should skip auto-commits if requested" do
200
+ zk_client.should_not_receive(:set)
201
+ subject.fetch(commit: false) {|n, _| n.should == 1 }
202
+ end
203
+
204
+ it "should skip auto-commits if block results in false" do
205
+ zk_client.should_not_receive(:set)
206
+ subject.fetch {|n, _| n.should == 1; false }
207
+ end
208
+
209
+ it "should return false when trying to fetch messages without a claim" do
210
+ no_topics = Poseidon::Protocol::MetadataResponse.new nil, brokers.dup, []
211
+ Poseidon::BrokerPool.any_instance.stub fetch_metadata_from_broker: no_topics
212
+
213
+ subject.claimed.should == []
214
+ subject.fetch {|*| }.should be_falsey
215
+ end
216
+
217
+ it "should return true even when no messages were fetched" do
218
+ Poseidon::Connection.any_instance.stub fetch: fetch_response(0)
219
+ subject.fetch {|*| }.should be_truthy
220
+ end
221
+
222
+ end
223
+
224
+ describe "fetch_loop" do
225
+
226
+ it "should fetch indefinitely" do
227
+ total, cycles = 0, 0
228
+ subject.fetch_loop do |_, m|
229
+ total += m.size
230
+ break if (cycles+=1) > 2
231
+ end
232
+ total.should == 20
233
+ cycles.should == 3
234
+ end
235
+
236
+ it "should delay fetch was unsuccessful" do
237
+ subject.stub fetch: false
238
+
239
+ cycles = 0
240
+ subject.should_receive(:sleep).with(1)
241
+ subject.fetch_loop do |n, m|
242
+ n.should == -1
243
+ m.should == []
244
+ break if (cycles+=1) > 1
245
+ end
246
+ end
247
+
248
+ it "should delay fetch didn't yield any results" do
249
+ subject.stub(:fetch).and_yield(3, []).and_return(true)
250
+
251
+ cycles = 0
252
+ subject.should_receive(:sleep).with(1)
253
+ subject.fetch_loop do |n, m|
254
+ n.should == 3
255
+ m.should == []
256
+ break if (cycles+=1) > 1
257
+ end
258
+ end
259
+
260
+ end
261
+
262
+ describe "pick" do
263
+
264
+ { [3, ["N1", "N2", "N3"], "N1"] => (0..0),
265
+ [3, ["N1", "N2", "N3"], "N2"] => (1..1),
266
+ [3, ["N1", "N2", "N3"], "N3"] => (2..2),
267
+ [4, ["N2", "N4", "N3", "N1"], "N3"] => (2..2),
268
+ [3, ["N1", "N2", "N3"], "N4"] => nil,
269
+ [5, ["N1", "N2", "N3"], "N1"] => (0..1),
270
+ [5, ["N1", "N2", "N3"], "N2"] => (2..3),
271
+ [5, ["N1", "N2", "N3"], "N3"] => (4..4),
272
+ [5, ["N1", "N2", "N3"], "N4"] => nil,
273
+ [2, ["N1", "N2"], "N9"] => nil,
274
+ [1, ["N1", "N2", "N3"], "N1"] => (0..0),
275
+ [1, ["N1", "N2", "N3"], "N2"] => nil,
276
+ [1, ["N1", "N2", "N3"], "N3"] => nil,
277
+ [5, ["N1", "N2"], "N1"] => (0..2),
278
+ [5, ["N1", "N2"], "N2"] => (3..4),
279
+ }.each do |args, expected|
280
+ it "should pick #{expected.inspect} from #{args.inspect}" do
281
+ described_class.pick(*args).should == expected
282
+ end
283
+ end
284
+
285
+ end
286
+ end