elaine 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.ruby-version +1 -0
- data/Gemfile +3 -0
- data/README.md +90 -0
- data/Rakefile +3 -0
- data/autotest/discover.rb +1 -0
- data/elaine.gemspec +29 -0
- data/examples/.gitignore +1 -0
- data/examples/erdos-renyi-N_1000-E_0.2.egonets +1000 -0
- data/lib/elaine.rb +30 -0
- data/lib/elaine/coordinator.rb +33 -0
- data/lib/elaine/distributed.rb +11 -0
- data/lib/elaine/distributed/coordinator.rb +134 -0
- data/lib/elaine/distributed/post_office.rb +80 -0
- data/lib/elaine/distributed/vertex.rb +52 -0
- data/lib/elaine/distributed/worker.rb +85 -0
- data/lib/elaine/version.rb +3 -0
- data/lib/elaine/vertex.rb +41 -0
- data/lib/elaine/worker.rb +25 -0
- data/spec/coordinator_spec.rb +95 -0
- data/spec/distributed_coordinator_spec.rb +143 -0
- data/spec/distributed_helper.rb +196 -0
- data/spec/distributed_page_rank_vertex.rb +23 -0
- data/spec/distributed_triad_census_vertex.rb +41 -0
- data/spec/helper.rb +10 -0
- data/spec/test_add_vertex.rb +7 -0
- data/spec/test_coordinator_node.rb +12 -0
- data/spec/test_worker_node1.rb +12 -0
- data/spec/test_worker_node2.rb +12 -0
- data/spec/vertex_spec.rb +56 -0
- data/spec/worker_spec.rb +61 -0
- metadata +173 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
module Elaine
|
2
|
+
class Worker
|
3
|
+
attr_reader :vertices, :active
|
4
|
+
|
5
|
+
def initialize(graph = [])
|
6
|
+
raise 'empty worker graph' if graph.empty?
|
7
|
+
@vertices = graph
|
8
|
+
@active = graph.size
|
9
|
+
end
|
10
|
+
|
11
|
+
def superstep
|
12
|
+
Thread.new do
|
13
|
+
@vertices.each do |v|
|
14
|
+
v.messages = PostOffice.instance.read(v.id)
|
15
|
+
v.active! if v.messages.size > 0
|
16
|
+
end
|
17
|
+
|
18
|
+
active = @vertices.select {|v| v.active?}
|
19
|
+
active.each {|v| v.step}
|
20
|
+
|
21
|
+
@active = active.select {|v| v.active?}.size
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe Coordinator do
|
4
|
+
it 'should not allow empty graphs' do
|
5
|
+
lambda { Coordinator.new([]) }.should raise_error
|
6
|
+
end
|
7
|
+
|
8
|
+
let(:graph) do
|
9
|
+
[
|
10
|
+
AddVertex.new(:igvita, 1, :wikipedia),
|
11
|
+
AddVertex.new(:wikipedia, 2, :google),
|
12
|
+
AddVertex.new(:google, 1, :wikipedia)
|
13
|
+
]
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should partition graphs with variable worker sizes' do
|
17
|
+
c = Coordinator.new(graph)
|
18
|
+
c.workers.size.should == 1
|
19
|
+
|
20
|
+
c = Coordinator.new(graph, partitions: 2)
|
21
|
+
c.workers.size.should == 2
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should schedule workers to run until there are no active vertices' do
|
25
|
+
c = Coordinator.new(graph)
|
26
|
+
c.run
|
27
|
+
|
28
|
+
c.workers.each do |w|
|
29
|
+
w.vertices.each do |v|
|
30
|
+
v.value.should == 5
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context 'PageRank' do
|
36
|
+
class PageRankVertex < Vertex
|
37
|
+
def compute
|
38
|
+
if superstep >= 1
|
39
|
+
sum = messages.inject(0) {|total,msg| total += msg; total }
|
40
|
+
@value = (0.15 / 3) + 0.85 * sum
|
41
|
+
end
|
42
|
+
|
43
|
+
if superstep < 30
|
44
|
+
deliver_to_all_neighbors(@value / neighbors.size)
|
45
|
+
else
|
46
|
+
halt
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
it 'should calculate PageRank of a circular graph' do
|
53
|
+
graph = [
|
54
|
+
# name value out-edges
|
55
|
+
PageRankVertex.new(:igvita, 1, :wikipedia),
|
56
|
+
PageRankVertex.new(:wikipedia, 1, :google),
|
57
|
+
PageRankVertex.new(:google, 1, :igvita)
|
58
|
+
]
|
59
|
+
|
60
|
+
c = Coordinator.new(graph)
|
61
|
+
c.run
|
62
|
+
|
63
|
+
c.workers.each do |w|
|
64
|
+
w.vertices.each do |v|
|
65
|
+
(v.value * 100).to_i.should == 33
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'should calculate PageRank of arbitrary graph' do
|
71
|
+
graph = [
|
72
|
+
# page 1 -> page 1, page 2 (0.18)
|
73
|
+
# page 2 -> page 1, page 3 (0.13)
|
74
|
+
# page 3 -> page 3 (0.69)
|
75
|
+
|
76
|
+
# name value out-edges
|
77
|
+
PageRankVertex.new(:igvita, 1, :igvita, :wikipedia),
|
78
|
+
PageRankVertex.new(:wikipedia, 1, :igvita, :google),
|
79
|
+
PageRankVertex.new(:google, 1, :google)
|
80
|
+
]
|
81
|
+
|
82
|
+
c = Coordinator.new(graph)
|
83
|
+
c.run
|
84
|
+
|
85
|
+
c.workers.each do |w|
|
86
|
+
(w.vertices.find {|v| v.id == :igvita }.value * 100).ceil.to_i.should == 19
|
87
|
+
(w.vertices.find {|v| v.id == :wikipedia }.value * 100).ceil.to_i.should == 13
|
88
|
+
(w.vertices.find {|v| v.id == :google }.value * 100).to_i.should == 68
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'should parition nodes by hashing the node id'
|
94
|
+
it 'should allow scheduling multiple partitions to a single worker'
|
95
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require 'distributed_helper'
|
2
|
+
|
3
|
+
describe Elaine::Distributed::Coordinator do
|
4
|
+
# it 'should not allow empty graphs' do
|
5
|
+
# lambda { Coordinator.new([]) }.should raise_error
|
6
|
+
# end
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
|
10
|
+
TestCoordinator.start
|
11
|
+
TestCoordinator.wait_until_ready
|
12
|
+
TestWorker1.start
|
13
|
+
TestWorker1.wait_until_ready
|
14
|
+
TestWorker2.start
|
15
|
+
TestWorker2.wait_until_ready
|
16
|
+
end
|
17
|
+
|
18
|
+
after(:each) do
|
19
|
+
TestWorker1.stop
|
20
|
+
TestWorker2.stop
|
21
|
+
TestCoordinator.stop
|
22
|
+
end
|
23
|
+
|
24
|
+
let(:graph) do
|
25
|
+
[
|
26
|
+
{
|
27
|
+
klazz: DistributedAddVertex,
|
28
|
+
id: :igvita,
|
29
|
+
value: 1,
|
30
|
+
outedges: [:wikipedia]
|
31
|
+
},
|
32
|
+
{
|
33
|
+
klazz: DistributedAddVertex,
|
34
|
+
id: :wikipedia,
|
35
|
+
value: 2,
|
36
|
+
outedges: [:google]
|
37
|
+
},
|
38
|
+
{
|
39
|
+
klazz: DistributedAddVertex,
|
40
|
+
id: :google,
|
41
|
+
value: 1,
|
42
|
+
outedges: [:wikipedia]
|
43
|
+
}
|
44
|
+
]
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
it "should schedule workers to run intil there are no active vertices" do
|
51
|
+
|
52
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].graph = graph
|
53
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].partition
|
54
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].run_job
|
55
|
+
values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
|
56
|
+
values.each do |v|
|
57
|
+
v[:value].should == 5
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should calculate PageRank of a circular graph" do
|
63
|
+
g =[
|
64
|
+
{
|
65
|
+
klazz: DistributedPageRankVertex,
|
66
|
+
id: :igvita,
|
67
|
+
value: 1,
|
68
|
+
outedges: [:wikipedia]
|
69
|
+
},
|
70
|
+
{
|
71
|
+
klazz: DistributedPageRankVertex,
|
72
|
+
id: :wikipedia,
|
73
|
+
value: 1,
|
74
|
+
outedges: [:google]
|
75
|
+
},
|
76
|
+
{
|
77
|
+
klazz: DistributedPageRankVertex,
|
78
|
+
id: :google,
|
79
|
+
value: 1,
|
80
|
+
outedges: [:igvita]
|
81
|
+
}
|
82
|
+
]
|
83
|
+
|
84
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].graph = g
|
85
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].partition
|
86
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].run_job
|
87
|
+
|
88
|
+
values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
|
89
|
+
values.each do |v|
|
90
|
+
(v[:value] * 100).to_i.should == 33
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
it "should calculate PageRank of an arbitrary graph" do
|
96
|
+
g = [
|
97
|
+
# page 1 -> page 1, page 2 (0.18)
|
98
|
+
# page 2 -> page 1, page 3 (0.13)
|
99
|
+
# page 3 -> page 3 (0.69)
|
100
|
+
|
101
|
+
# name value out-edges
|
102
|
+
{
|
103
|
+
klazz: DistributedPageRankVertex,
|
104
|
+
id: :igvita,
|
105
|
+
value: 1,
|
106
|
+
outedges: [:igvita, :wikipedia]
|
107
|
+
},
|
108
|
+
{
|
109
|
+
klazz: DistributedPageRankVertex,
|
110
|
+
id: :wikipedia,
|
111
|
+
value: 1,
|
112
|
+
outedges: [:igvita, :google]
|
113
|
+
},
|
114
|
+
{
|
115
|
+
klazz: DistributedPageRankVertex,
|
116
|
+
id: :google,
|
117
|
+
value: 1,
|
118
|
+
outedges: [:google]
|
119
|
+
}
|
120
|
+
]
|
121
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].graph = g
|
122
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].partition
|
123
|
+
DCell::Node["test.elaine.coordinator"][:coordinator].run_job
|
124
|
+
|
125
|
+
vertex_values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
|
126
|
+
vertex_values.each do |v|
|
127
|
+
if v[:id] == :igvita
|
128
|
+
(v[:value] * 100).ceil.to_i.should == 19
|
129
|
+
elsif v[:id] == :wikipedia
|
130
|
+
(v[:value] * 100).ceil.to_i.should == 13
|
131
|
+
elsif v[:id] == :google
|
132
|
+
(v[:value] * 100).to_i.should == 68
|
133
|
+
else
|
134
|
+
fail "Unexpected node id: #{v[:id]}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
it 'should parition nodes by hashing the node id'
|
142
|
+
it 'should allow scheduling multiple partitions to a single worker'
|
143
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'elaine'
|
2
|
+
require 'elaine/distributed'
|
3
|
+
require 'dcell'
|
4
|
+
|
5
|
+
# DCell.start
|
6
|
+
|
7
|
+
class DistributedAddVertex < Elaine::Distributed::Vertex
|
8
|
+
def compute
|
9
|
+
@value += 1
|
10
|
+
halt if @value >= 5
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class DistributedPageRankVertex < Elaine::Distributed::Vertex
|
15
|
+
def compute
|
16
|
+
if superstep >= 1
|
17
|
+
sum = messages.inject(0) {|total,msg| total += msg; total }
|
18
|
+
@value = (0.15 / 3) + 0.85 * sum
|
19
|
+
end
|
20
|
+
|
21
|
+
if superstep < 30
|
22
|
+
deliver_to_all_neighbors(@value / neighbors.size)
|
23
|
+
else
|
24
|
+
halt
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
module TestCoordinator
|
31
|
+
PORT = 8090
|
32
|
+
def self.start
|
33
|
+
@@pid = Process.spawn Gem.ruby, File.expand_path("../test_coordinator_node.rb", __FILE__)
|
34
|
+
puts "Coordinator pid: #{@@pid}"
|
35
|
+
unless @@pid
|
36
|
+
STDERR.print "ERROR: Couldn't start test coordinator node"
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.wait_until_ready
|
42
|
+
STDERR.print "Waiting for test coordinator node to start up..."
|
43
|
+
|
44
|
+
socket = nil
|
45
|
+
30.times do
|
46
|
+
begin
|
47
|
+
socket = TCPSocket.open("127.0.0.1", PORT)
|
48
|
+
break if socket
|
49
|
+
rescue Errno::ECONNREFUSED
|
50
|
+
STDERR.print "."
|
51
|
+
sleep 1
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if socket
|
56
|
+
STDERR.puts " done!"
|
57
|
+
socket.close
|
58
|
+
else
|
59
|
+
STDERR.puts " FAILED!"
|
60
|
+
raise "couldn't connect to test node!"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.stop
|
65
|
+
puts "@@pid: #{@@pid}"
|
66
|
+
unless @@pid
|
67
|
+
STDERR.print "ERROR: Test coordinator node was never started!"
|
68
|
+
exit 1
|
69
|
+
end
|
70
|
+
Process.kill 9, @@pid
|
71
|
+
rescue Errno::ESRCH
|
72
|
+
ensure
|
73
|
+
Process.wait @@pid rescue nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
module TestWorker1
|
78
|
+
PORT = 8091
|
79
|
+
def self.start
|
80
|
+
@pid = Process.spawn Gem.ruby, File.expand_path("../test_worker_node1.rb", __FILE__)
|
81
|
+
|
82
|
+
unless @pid
|
83
|
+
STDERR.print "ERROR: Couldn't start test worker node 1"
|
84
|
+
exit 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.wait_until_ready
|
89
|
+
STDERR.print "Waiting for test worker node 1 to start up..."
|
90
|
+
|
91
|
+
socket = nil
|
92
|
+
30.times do
|
93
|
+
begin
|
94
|
+
socket = TCPSocket.open("127.0.0.1", PORT)
|
95
|
+
break if socket
|
96
|
+
rescue Errno::ECONNREFUSED
|
97
|
+
STDERR.print "."
|
98
|
+
sleep 1
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if socket
|
103
|
+
STDERR.puts " done!"
|
104
|
+
socket.close
|
105
|
+
else
|
106
|
+
STDERR.puts " FAILED!"
|
107
|
+
raise "couldn't connect to test node!"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.stop
|
112
|
+
unless @pid
|
113
|
+
STDERR.print "ERROR: Test worker node 1 was never started!"
|
114
|
+
exit 1
|
115
|
+
end
|
116
|
+
Process.kill 9, @pid
|
117
|
+
rescue Errno::ESRCH
|
118
|
+
ensure
|
119
|
+
Process.wait @pid rescue nil
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
module TestWorker2
|
127
|
+
PORT = 8092
|
128
|
+
def self.start
|
129
|
+
@pid = Process.spawn Gem.ruby, File.expand_path("../test_worker_node2.rb", __FILE__)
|
130
|
+
|
131
|
+
unless @pid
|
132
|
+
STDERR.print "ERROR: Couldn't start test worker node 2"
|
133
|
+
exit 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.wait_until_ready
|
138
|
+
STDERR.print "Waiting for test worker node 2 to start up..."
|
139
|
+
|
140
|
+
socket = nil
|
141
|
+
30.times do
|
142
|
+
begin
|
143
|
+
socket = TCPSocket.open("127.0.0.1", PORT)
|
144
|
+
break if socket
|
145
|
+
rescue Errno::ECONNREFUSED
|
146
|
+
STDERR.print "."
|
147
|
+
sleep 1
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
if socket
|
152
|
+
STDERR.puts " done!"
|
153
|
+
socket.close
|
154
|
+
else
|
155
|
+
STDERR.puts " FAILED!"
|
156
|
+
raise "couldn't connect to test node!"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.stop
|
161
|
+
unless @pid
|
162
|
+
STDERR.print "ERROR: Test worker node 2 was never started!"
|
163
|
+
exit 1
|
164
|
+
end
|
165
|
+
Process.kill 9, @pid
|
166
|
+
rescue Errno::ESRCH
|
167
|
+
ensure
|
168
|
+
Process.wait @pid rescue nil
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
RSpec.configure do |config|
|
176
|
+
config.before(:suite) do
|
177
|
+
DCell.setup
|
178
|
+
DCell.run!
|
179
|
+
end
|
180
|
+
|
181
|
+
# config.before(:all) do
|
182
|
+
|
183
|
+
# TestCoordinator.start
|
184
|
+
# TestCoordinator.wait_until_ready
|
185
|
+
# TestWorker1.start
|
186
|
+
# TestWorker1.wait_until_ready
|
187
|
+
# TestWorker2.start
|
188
|
+
# TestWorker2.wait_until_ready
|
189
|
+
# end
|
190
|
+
|
191
|
+
# config.after(:all) do
|
192
|
+
# TestWorker1.stop
|
193
|
+
# TestWorker2.stop
|
194
|
+
# TestCoordinator.stop
|
195
|
+
# end
|
196
|
+
end
|