elaine 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.ruby-version +1 -0
- data/Gemfile +3 -0
- data/README.md +90 -0
- data/Rakefile +3 -0
- data/autotest/discover.rb +1 -0
- data/elaine.gemspec +29 -0
- data/examples/.gitignore +1 -0
- data/examples/erdos-renyi-N_1000-E_0.2.egonets +1000 -0
- data/lib/elaine.rb +30 -0
- data/lib/elaine/coordinator.rb +33 -0
- data/lib/elaine/distributed.rb +11 -0
- data/lib/elaine/distributed/coordinator.rb +134 -0
- data/lib/elaine/distributed/post_office.rb +80 -0
- data/lib/elaine/distributed/vertex.rb +52 -0
- data/lib/elaine/distributed/worker.rb +85 -0
- data/lib/elaine/version.rb +3 -0
- data/lib/elaine/vertex.rb +41 -0
- data/lib/elaine/worker.rb +25 -0
- data/spec/coordinator_spec.rb +95 -0
- data/spec/distributed_coordinator_spec.rb +143 -0
- data/spec/distributed_helper.rb +196 -0
- data/spec/distributed_page_rank_vertex.rb +23 -0
- data/spec/distributed_triad_census_vertex.rb +41 -0
- data/spec/helper.rb +10 -0
- data/spec/test_add_vertex.rb +7 -0
- data/spec/test_coordinator_node.rb +12 -0
- data/spec/test_worker_node1.rb +12 -0
- data/spec/test_worker_node2.rb +12 -0
- data/spec/vertex_spec.rb +56 -0
- data/spec/worker_spec.rb +61 -0
- metadata +173 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
module Elaine
|
2
|
+
class Worker
|
3
|
+
attr_reader :vertices, :active
|
4
|
+
|
5
|
+
def initialize(graph = [])
|
6
|
+
raise 'empty worker graph' if graph.empty?
|
7
|
+
@vertices = graph
|
8
|
+
@active = graph.size
|
9
|
+
end
|
10
|
+
|
11
|
+
def superstep
|
12
|
+
Thread.new do
|
13
|
+
@vertices.each do |v|
|
14
|
+
v.messages = PostOffice.instance.read(v.id)
|
15
|
+
v.active! if v.messages.size > 0
|
16
|
+
end
|
17
|
+
|
18
|
+
active = @vertices.select {|v| v.active?}
|
19
|
+
active.each {|v| v.step}
|
20
|
+
|
21
|
+
@active = active.select {|v| v.active?}.size
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe Coordinator do
|
4
|
+
it 'should not allow empty graphs' do
|
5
|
+
lambda { Coordinator.new([]) }.should raise_error
|
6
|
+
end
|
7
|
+
|
8
|
+
let(:graph) do
|
9
|
+
[
|
10
|
+
AddVertex.new(:igvita, 1, :wikipedia),
|
11
|
+
AddVertex.new(:wikipedia, 2, :google),
|
12
|
+
AddVertex.new(:google, 1, :wikipedia)
|
13
|
+
]
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should partition graphs with variable worker sizes' do
|
17
|
+
c = Coordinator.new(graph)
|
18
|
+
c.workers.size.should == 1
|
19
|
+
|
20
|
+
c = Coordinator.new(graph, partitions: 2)
|
21
|
+
c.workers.size.should == 2
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should schedule workers to run until there are no active vertices' do
|
25
|
+
c = Coordinator.new(graph)
|
26
|
+
c.run
|
27
|
+
|
28
|
+
c.workers.each do |w|
|
29
|
+
w.vertices.each do |v|
|
30
|
+
v.value.should == 5
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context 'PageRank' do
|
36
|
+
class PageRankVertex < Vertex
|
37
|
+
def compute
|
38
|
+
if superstep >= 1
|
39
|
+
sum = messages.inject(0) {|total,msg| total += msg; total }
|
40
|
+
@value = (0.15 / 3) + 0.85 * sum
|
41
|
+
end
|
42
|
+
|
43
|
+
if superstep < 30
|
44
|
+
deliver_to_all_neighbors(@value / neighbors.size)
|
45
|
+
else
|
46
|
+
halt
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
it 'should calculate PageRank of a circular graph' do
|
53
|
+
graph = [
|
54
|
+
# name value out-edges
|
55
|
+
PageRankVertex.new(:igvita, 1, :wikipedia),
|
56
|
+
PageRankVertex.new(:wikipedia, 1, :google),
|
57
|
+
PageRankVertex.new(:google, 1, :igvita)
|
58
|
+
]
|
59
|
+
|
60
|
+
c = Coordinator.new(graph)
|
61
|
+
c.run
|
62
|
+
|
63
|
+
c.workers.each do |w|
|
64
|
+
w.vertices.each do |v|
|
65
|
+
(v.value * 100).to_i.should == 33
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'should calculate PageRank of arbitrary graph' do
|
71
|
+
graph = [
|
72
|
+
# page 1 -> page 1, page 2 (0.18)
|
73
|
+
# page 2 -> page 1, page 3 (0.13)
|
74
|
+
# page 3 -> page 3 (0.69)
|
75
|
+
|
76
|
+
# name value out-edges
|
77
|
+
PageRankVertex.new(:igvita, 1, :igvita, :wikipedia),
|
78
|
+
PageRankVertex.new(:wikipedia, 1, :igvita, :google),
|
79
|
+
PageRankVertex.new(:google, 1, :google)
|
80
|
+
]
|
81
|
+
|
82
|
+
c = Coordinator.new(graph)
|
83
|
+
c.run
|
84
|
+
|
85
|
+
c.workers.each do |w|
|
86
|
+
(w.vertices.find {|v| v.id == :igvita }.value * 100).ceil.to_i.should == 19
|
87
|
+
(w.vertices.find {|v| v.id == :wikipedia }.value * 100).ceil.to_i.should == 13
|
88
|
+
(w.vertices.find {|v| v.id == :google }.value * 100).to_i.should == 68
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'should parition nodes by hashing the node id'
|
94
|
+
it 'should allow scheduling multiple partitions to a single worker'
|
95
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require 'distributed_helper'
|
2
|
+
|
3
|
+
describe Elaine::Distributed::Coordinator do
|
4
|
+
# it 'should not allow empty graphs' do
|
5
|
+
# lambda { Coordinator.new([]) }.should raise_error
|
6
|
+
# end
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
|
10
|
+
TestCoordinator.start
|
11
|
+
TestCoordinator.wait_until_ready
|
12
|
+
TestWorker1.start
|
13
|
+
TestWorker1.wait_until_ready
|
14
|
+
TestWorker2.start
|
15
|
+
TestWorker2.wait_until_ready
|
16
|
+
end
|
17
|
+
|
18
|
+
after(:each) do
|
19
|
+
TestWorker1.stop
|
20
|
+
TestWorker2.stop
|
21
|
+
TestCoordinator.stop
|
22
|
+
end
|
23
|
+
|
24
|
+
let(:graph) do
|
25
|
+
[
|
26
|
+
{
|
27
|
+
klazz: DistributedAddVertex,
|
28
|
+
id: :igvita,
|
29
|
+
value: 1,
|
30
|
+
outedges: [:wikipedia]
|
31
|
+
},
|
32
|
+
{
|
33
|
+
klazz: DistributedAddVertex,
|
34
|
+
id: :wikipedia,
|
35
|
+
value: 2,
|
36
|
+
outedges: [:google]
|
37
|
+
},
|
38
|
+
{
|
39
|
+
klazz: DistributedAddVertex,
|
40
|
+
id: :google,
|
41
|
+
value: 1,
|
42
|
+
outedges: [:wikipedia]
|
43
|
+
}
|
44
|
+
]
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
it "should schedule workers to run intil there are no active vertices" do
|
51
|
+
|
52
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].graph = graph
|
53
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].partition
|
54
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].run_job
|
55
|
+
values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
|
56
|
+
values.each do |v|
|
57
|
+
v[:value].should == 5
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should calculate PageRank of a circular graph" do
|
63
|
+
g =[
|
64
|
+
{
|
65
|
+
klazz: DistributedPageRankVertex,
|
66
|
+
id: :igvita,
|
67
|
+
value: 1,
|
68
|
+
outedges: [:wikipedia]
|
69
|
+
},
|
70
|
+
{
|
71
|
+
klazz: DistributedPageRankVertex,
|
72
|
+
id: :wikipedia,
|
73
|
+
value: 1,
|
74
|
+
outedges: [:google]
|
75
|
+
},
|
76
|
+
{
|
77
|
+
klazz: DistributedPageRankVertex,
|
78
|
+
id: :google,
|
79
|
+
value: 1,
|
80
|
+
outedges: [:igvita]
|
81
|
+
}
|
82
|
+
]
|
83
|
+
|
84
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].graph = g
|
85
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].partition
|
86
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].run_job
|
87
|
+
|
88
|
+
values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
|
89
|
+
values.each do |v|
|
90
|
+
(v[:value] * 100).to_i.should == 33
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
it "should calculate PageRank of an arbitrary graph" do
|
96
|
+
g = [
|
97
|
+
# page 1 -> page 1, page 2 (0.18)
|
98
|
+
# page 2 -> page 1, page 3 (0.13)
|
99
|
+
# page 3 -> page 3 (0.69)
|
100
|
+
|
101
|
+
# name value out-edges
|
102
|
+
{
|
103
|
+
klazz: DistributedPageRankVertex,
|
104
|
+
id: :igvita,
|
105
|
+
value: 1,
|
106
|
+
outedges: [:igvita, :wikipedia]
|
107
|
+
},
|
108
|
+
{
|
109
|
+
klazz: DistributedPageRankVertex,
|
110
|
+
id: :wikipedia,
|
111
|
+
value: 1,
|
112
|
+
outedges: [:igvita, :google]
|
113
|
+
},
|
114
|
+
{
|
115
|
+
klazz: DistributedPageRankVertex,
|
116
|
+
id: :google,
|
117
|
+
value: 1,
|
118
|
+
outedges: [:google]
|
119
|
+
}
|
120
|
+
]
|
121
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].graph = g
|
122
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].partition
|
123
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].run_job
|
124
|
+
|
125
|
+
vertex_values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
|
126
|
+
vertex_values.each do |v|
|
127
|
+
if v[:id] == :igvita
|
128
|
+
(v[:value] * 100).ceil.to_i.should == 19
|
129
|
+
elsif v[:id] == :wikipedia
|
130
|
+
(v[:value] * 100).ceil.to_i.should == 13
|
131
|
+
elsif v[:id] == :google
|
132
|
+
(v[:value] * 100).to_i.should == 68
|
133
|
+
else
|
134
|
+
fail "Unexpected node id: #{v[:id]}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
it 'should parition nodes by hashing the node id'
|
142
|
+
it 'should allow scheduling multiple partitions to a single worker'
|
143
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'elaine'
|
2
|
+
require 'elaine/distributed'
|
3
|
+
require 'dcell'
|
4
|
+
|
5
|
+
# DCell.start
|
6
|
+
|
7
|
+
class DistributedAddVertex < Elaine::Distributed::Vertex
|
8
|
+
def compute
|
9
|
+
@value += 1
|
10
|
+
halt if @value >= 5
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class DistributedPageRankVertex < Elaine::Distributed::Vertex
|
15
|
+
def compute
|
16
|
+
if superstep >= 1
|
17
|
+
sum = messages.inject(0) {|total,msg| total += msg; total }
|
18
|
+
@value = (0.15 / 3) + 0.85 * sum
|
19
|
+
end
|
20
|
+
|
21
|
+
if superstep < 30
|
22
|
+
deliver_to_all_neighbors(@value / neighbors.size)
|
23
|
+
else
|
24
|
+
halt
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
module TestCoordinator
|
31
|
+
PORT = 8090
|
32
|
+
def self.start
|
33
|
+
@@pid = Process.spawn Gem.ruby, File.expand_path("../test_coordinator_node.rb", __FILE__)
|
34
|
+
puts "Coordinator pid: #{@@pid}"
|
35
|
+
unless @@pid
|
36
|
+
STDERR.print "ERROR: Couldn't start test coordinator node"
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.wait_until_ready
|
42
|
+
STDERR.print "Waiting for test coordinator node to start up..."
|
43
|
+
|
44
|
+
socket = nil
|
45
|
+
30.times do
|
46
|
+
begin
|
47
|
+
socket = TCPSocket.open("127.0.0.1", PORT)
|
48
|
+
break if socket
|
49
|
+
rescue Errno::ECONNREFUSED
|
50
|
+
STDERR.print "."
|
51
|
+
sleep 1
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if socket
|
56
|
+
STDERR.puts " done!"
|
57
|
+
socket.close
|
58
|
+
else
|
59
|
+
STDERR.puts " FAILED!"
|
60
|
+
raise "couldn't connect to test node!"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.stop
|
65
|
+
puts "@@pid: #{@@pid}"
|
66
|
+
unless @@pid
|
67
|
+
STDERR.print "ERROR: Test coordinator node was never started!"
|
68
|
+
exit 1
|
69
|
+
end
|
70
|
+
Process.kill 9, @@pid
|
71
|
+
rescue Errno::ESRCH
|
72
|
+
ensure
|
73
|
+
Process.wait @@pid rescue nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
module TestWorker1
|
78
|
+
PORT = 8091
|
79
|
+
def self.start
|
80
|
+
@pid = Process.spawn Gem.ruby, File.expand_path("../test_worker_node1.rb", __FILE__)
|
81
|
+
|
82
|
+
unless @pid
|
83
|
+
STDERR.print "ERROR: Couldn't start test worker node 1"
|
84
|
+
exit 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.wait_until_ready
|
89
|
+
STDERR.print "Waiting for test worker node 1 to start up..."
|
90
|
+
|
91
|
+
socket = nil
|
92
|
+
30.times do
|
93
|
+
begin
|
94
|
+
socket = TCPSocket.open("127.0.0.1", PORT)
|
95
|
+
break if socket
|
96
|
+
rescue Errno::ECONNREFUSED
|
97
|
+
STDERR.print "."
|
98
|
+
sleep 1
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if socket
|
103
|
+
STDERR.puts " done!"
|
104
|
+
socket.close
|
105
|
+
else
|
106
|
+
STDERR.puts " FAILED!"
|
107
|
+
raise "couldn't connect to test node!"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.stop
|
112
|
+
unless @pid
|
113
|
+
STDERR.print "ERROR: Test worker node 1 was never started!"
|
114
|
+
exit 1
|
115
|
+
end
|
116
|
+
Process.kill 9, @pid
|
117
|
+
rescue Errno::ESRCH
|
118
|
+
ensure
|
119
|
+
Process.wait @pid rescue nil
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
module TestWorker2
|
127
|
+
PORT = 8092
|
128
|
+
def self.start
|
129
|
+
@pid = Process.spawn Gem.ruby, File.expand_path("../test_worker_node2.rb", __FILE__)
|
130
|
+
|
131
|
+
unless @pid
|
132
|
+
STDERR.print "ERROR: Couldn't start test worker node 2"
|
133
|
+
exit 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.wait_until_ready
|
138
|
+
STDERR.print "Waiting for test worker node 2 to start up..."
|
139
|
+
|
140
|
+
socket = nil
|
141
|
+
30.times do
|
142
|
+
begin
|
143
|
+
socket = TCPSocket.open("127.0.0.1", PORT)
|
144
|
+
break if socket
|
145
|
+
rescue Errno::ECONNREFUSED
|
146
|
+
STDERR.print "."
|
147
|
+
sleep 1
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
if socket
|
152
|
+
STDERR.puts " done!"
|
153
|
+
socket.close
|
154
|
+
else
|
155
|
+
STDERR.puts " FAILED!"
|
156
|
+
raise "couldn't connect to test node!"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.stop
|
161
|
+
unless @pid
|
162
|
+
STDERR.print "ERROR: Test worker node 2 was never started!"
|
163
|
+
exit 1
|
164
|
+
end
|
165
|
+
Process.kill 9, @pid
|
166
|
+
rescue Errno::ESRCH
|
167
|
+
ensure
|
168
|
+
Process.wait @pid rescue nil
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
RSpec.configure do |config|
|
176
|
+
config.before(:suite) do
|
177
|
+
DCell.setup
|
178
|
+
DCell.run!
|
179
|
+
end
|
180
|
+
|
181
|
+
# config.before(:all) do
|
182
|
+
|
183
|
+
# TestCoordinator.start
|
184
|
+
# TestCoordinator.wait_until_ready
|
185
|
+
# TestWorker1.start
|
186
|
+
# TestWorker1.wait_until_ready
|
187
|
+
# TestWorker2.start
|
188
|
+
# TestWorker2.wait_until_ready
|
189
|
+
# end
|
190
|
+
|
191
|
+
# config.after(:all) do
|
192
|
+
# TestWorker1.stop
|
193
|
+
# TestWorker2.stop
|
194
|
+
# TestCoordinator.stop
|
195
|
+
# end
|
196
|
+
end
|