elaine 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,25 @@
1
+ module Elaine
2
+ class Worker
3
+ attr_reader :vertices, :active
4
+
5
+ def initialize(graph = [])
6
+ raise 'empty worker graph' if graph.empty?
7
+ @vertices = graph
8
+ @active = graph.size
9
+ end
10
+
11
+ def superstep
12
+ Thread.new do
13
+ @vertices.each do |v|
14
+ v.messages = PostOffice.instance.read(v.id)
15
+ v.active! if v.messages.size > 0
16
+ end
17
+
18
+ active = @vertices.select {|v| v.active?}
19
+ active.each {|v| v.step}
20
+
21
+ @active = active.select {|v| v.active?}.size
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,95 @@
1
+ require 'helper'
2
+
3
+ describe Coordinator do
4
+ it 'should not allow empty graphs' do
5
+ lambda { Coordinator.new([]) }.should raise_error
6
+ end
7
+
8
+ let(:graph) do
9
+ [
10
+ AddVertex.new(:igvita, 1, :wikipedia),
11
+ AddVertex.new(:wikipedia, 2, :google),
12
+ AddVertex.new(:google, 1, :wikipedia)
13
+ ]
14
+ end
15
+
16
+ it 'should partition graphs with variable worker sizes' do
17
+ c = Coordinator.new(graph)
18
+ c.workers.size.should == 1
19
+
20
+ c = Coordinator.new(graph, partitions: 2)
21
+ c.workers.size.should == 2
22
+ end
23
+
24
+ it 'should schedule workers to run until there are no active vertices' do
25
+ c = Coordinator.new(graph)
26
+ c.run
27
+
28
+ c.workers.each do |w|
29
+ w.vertices.each do |v|
30
+ v.value.should == 5
31
+ end
32
+ end
33
+ end
34
+
35
+ context 'PageRank' do
36
+ class PageRankVertex < Vertex
37
+ def compute
38
+ if superstep >= 1
39
+ sum = messages.inject(0) {|total,msg| total += msg; total }
40
+ @value = (0.15 / 3) + 0.85 * sum
41
+ end
42
+
43
+ if superstep < 30
44
+ deliver_to_all_neighbors(@value / neighbors.size)
45
+ else
46
+ halt
47
+ end
48
+ end
49
+ end
50
+
51
+
52
+ it 'should calculate PageRank of a circular graph' do
53
+ graph = [
54
+ # name value out-edges
55
+ PageRankVertex.new(:igvita, 1, :wikipedia),
56
+ PageRankVertex.new(:wikipedia, 1, :google),
57
+ PageRankVertex.new(:google, 1, :igvita)
58
+ ]
59
+
60
+ c = Coordinator.new(graph)
61
+ c.run
62
+
63
+ c.workers.each do |w|
64
+ w.vertices.each do |v|
65
+ (v.value * 100).to_i.should == 33
66
+ end
67
+ end
68
+ end
69
+
70
+ it 'should calculate PageRank of arbitrary graph' do
71
+ graph = [
72
+ # page 1 -> page 1, page 2 (0.18)
73
+ # page 2 -> page 1, page 3 (0.13)
74
+ # page 3 -> page 3 (0.69)
75
+
76
+ # name value out-edges
77
+ PageRankVertex.new(:igvita, 1, :igvita, :wikipedia),
78
+ PageRankVertex.new(:wikipedia, 1, :igvita, :google),
79
+ PageRankVertex.new(:google, 1, :google)
80
+ ]
81
+
82
+ c = Coordinator.new(graph)
83
+ c.run
84
+
85
+ c.workers.each do |w|
86
+ (w.vertices.find {|v| v.id == :igvita }.value * 100).ceil.to_i.should == 19
87
+ (w.vertices.find {|v| v.id == :wikipedia }.value * 100).ceil.to_i.should == 13
88
+ (w.vertices.find {|v| v.id == :google }.value * 100).to_i.should == 68
89
+ end
90
+ end
91
+ end
92
+
93
+ it 'should parition nodes by hashing the node id'
94
+ it 'should allow scheduling multiple partitions to a single worker'
95
+ end
@@ -0,0 +1,143 @@
1
+ require 'distributed_helper'
2
+
3
+ describe Elaine::Distributed::Coordinator do
4
+ # it 'should not allow empty graphs' do
5
+ # lambda { Coordinator.new([]) }.should raise_error
6
+ # end
7
+
8
+ before(:each) do
9
+
10
+ TestCoordinator.start
11
+ TestCoordinator.wait_until_ready
12
+ TestWorker1.start
13
+ TestWorker1.wait_until_ready
14
+ TestWorker2.start
15
+ TestWorker2.wait_until_ready
16
+ end
17
+
18
+ after(:each) do
19
+ TestWorker1.stop
20
+ TestWorker2.stop
21
+ TestCoordinator.stop
22
+ end
23
+
24
+ let(:graph) do
25
+ [
26
+ {
27
+ klazz: DistributedAddVertex,
28
+ id: :igvita,
29
+ value: 1,
30
+ outedges: [:wikipedia]
31
+ },
32
+ {
33
+ klazz: DistributedAddVertex,
34
+ id: :wikipedia,
35
+ value: 2,
36
+ outedges: [:google]
37
+ },
38
+ {
39
+ klazz: DistributedAddVertex,
40
+ id: :google,
41
+ value: 1,
42
+ outedges: [:wikipedia]
43
+ }
44
+ ]
45
+ end
46
+
47
+
48
+
49
+
50
+ it "should schedule workers to run intil there are no active vertices" do
51
+
52
+ DCell::Node["test.elaine.coordinator"][:coordinator].graph = graph
53
+ DCell::Node["test.elaine.coordinator"][:coordinator].partition
54
+ DCell::Node["test.elaine.coordinator"][:coordinator].run_job
55
+ values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
56
+ values.each do |v|
57
+ v[:value].should == 5
58
+ end
59
+
60
+ end
61
+
62
+ it "should calculate PageRank of a circular graph" do
63
+ g =[
64
+ {
65
+ klazz: DistributedPageRankVertex,
66
+ id: :igvita,
67
+ value: 1,
68
+ outedges: [:wikipedia]
69
+ },
70
+ {
71
+ klazz: DistributedPageRankVertex,
72
+ id: :wikipedia,
73
+ value: 1,
74
+ outedges: [:google]
75
+ },
76
+ {
77
+ klazz: DistributedPageRankVertex,
78
+ id: :google,
79
+ value: 1,
80
+ outedges: [:igvita]
81
+ }
82
+ ]
83
+
84
+ DCell::Node["test.elaine.coordinator"][:coordinator].graph = g
85
+ DCell::Node["test.elaine.coordinator"][:coordinator].partition
86
+ DCell::Node["test.elaine.coordinator"][:coordinator].run_job
87
+
88
+ values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
89
+ values.each do |v|
90
+ (v[:value] * 100).to_i.should == 33
91
+ end
92
+ end
93
+
94
+
95
+ it "should calculate PageRank of an arbitrary graph" do
96
+ g = [
97
+ # page 1 -> page 1, page 2 (0.18)
98
+ # page 2 -> page 1, page 3 (0.13)
99
+ # page 3 -> page 3 (0.69)
100
+
101
+ # name value out-edges
102
+ {
103
+ klazz: DistributedPageRankVertex,
104
+ id: :igvita,
105
+ value: 1,
106
+ outedges: [:igvita, :wikipedia]
107
+ },
108
+ {
109
+ klazz: DistributedPageRankVertex,
110
+ id: :wikipedia,
111
+ value: 1,
112
+ outedges: [:igvita, :google]
113
+ },
114
+ {
115
+ klazz: DistributedPageRankVertex,
116
+ id: :google,
117
+ value: 1,
118
+ outedges: [:google]
119
+ }
120
+ ]
121
+ DCell::Node["test.elaine.coordinator"][:coordinator].graph = g
122
+ DCell::Node["test.elaine.coordinator"][:coordinator].partition
123
+ DCell::Node["test.elaine.coordinator"][:coordinator].run_job
124
+
125
+ vertex_values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
126
+ vertex_values.each do |v|
127
+ if v[:id] == :igvita
128
+ (v[:value] * 100).ceil.to_i.should == 19
129
+ elsif v[:id] == :wikipedia
130
+ (v[:value] * 100).ceil.to_i.should == 13
131
+ elsif v[:id] == :google
132
+ (v[:value] * 100).to_i.should == 68
133
+ else
134
+ fail "Unexpected node id: #{v[:id]}"
135
+ end
136
+ end
137
+
138
+
139
+ end
140
+
141
+ it 'should parition nodes by hashing the node id'
142
+ it 'should allow scheduling multiple partitions to a single worker'
143
+ end
@@ -0,0 +1,196 @@
1
+ require 'elaine'
2
+ require 'elaine/distributed'
3
+ require 'dcell'
4
+
5
+ # DCell.start
6
+
7
+ class DistributedAddVertex < Elaine::Distributed::Vertex
8
+ def compute
9
+ @value += 1
10
+ halt if @value >= 5
11
+ end
12
+ end
13
+
14
+ class DistributedPageRankVertex < Elaine::Distributed::Vertex
15
+ def compute
16
+ if superstep >= 1
17
+ sum = messages.inject(0) {|total,msg| total += msg; total }
18
+ @value = (0.15 / 3) + 0.85 * sum
19
+ end
20
+
21
+ if superstep < 30
22
+ deliver_to_all_neighbors(@value / neighbors.size)
23
+ else
24
+ halt
25
+ end
26
+ end
27
+ end
28
+
29
+
30
+ module TestCoordinator
31
+ PORT = 8090
32
+ def self.start
33
+ @@pid = Process.spawn Gem.ruby, File.expand_path("../test_coordinator_node.rb", __FILE__)
34
+ puts "Coordinator pid: #{@@pid}"
35
+ unless @@pid
36
+ STDERR.print "ERROR: Couldn't start test coordinator node"
37
+ exit 1
38
+ end
39
+ end
40
+
41
+ def self.wait_until_ready
42
+ STDERR.print "Waiting for test coordinator node to start up..."
43
+
44
+ socket = nil
45
+ 30.times do
46
+ begin
47
+ socket = TCPSocket.open("127.0.0.1", PORT)
48
+ break if socket
49
+ rescue Errno::ECONNREFUSED
50
+ STDERR.print "."
51
+ sleep 1
52
+ end
53
+ end
54
+
55
+ if socket
56
+ STDERR.puts " done!"
57
+ socket.close
58
+ else
59
+ STDERR.puts " FAILED!"
60
+ raise "couldn't connect to test node!"
61
+ end
62
+ end
63
+
64
+ def self.stop
65
+ puts "@@pid: #{@@pid}"
66
+ unless @@pid
67
+ STDERR.print "ERROR: Test coordinator node was never started!"
68
+ exit 1
69
+ end
70
+ Process.kill 9, @@pid
71
+ rescue Errno::ESRCH
72
+ ensure
73
+ Process.wait @@pid rescue nil
74
+ end
75
+ end
76
+
77
+ module TestWorker1
78
+ PORT = 8091
79
+ def self.start
80
+ @pid = Process.spawn Gem.ruby, File.expand_path("../test_worker_node1.rb", __FILE__)
81
+
82
+ unless @pid
83
+ STDERR.print "ERROR: Couldn't start test worker node 1"
84
+ exit 1
85
+ end
86
+ end
87
+
88
+ def self.wait_until_ready
89
+ STDERR.print "Waiting for test worker node 1 to start up..."
90
+
91
+ socket = nil
92
+ 30.times do
93
+ begin
94
+ socket = TCPSocket.open("127.0.0.1", PORT)
95
+ break if socket
96
+ rescue Errno::ECONNREFUSED
97
+ STDERR.print "."
98
+ sleep 1
99
+ end
100
+ end
101
+
102
+ if socket
103
+ STDERR.puts " done!"
104
+ socket.close
105
+ else
106
+ STDERR.puts " FAILED!"
107
+ raise "couldn't connect to test node!"
108
+ end
109
+ end
110
+
111
+ def self.stop
112
+ unless @pid
113
+ STDERR.print "ERROR: Test worker node 1 was never started!"
114
+ exit 1
115
+ end
116
+ Process.kill 9, @pid
117
+ rescue Errno::ESRCH
118
+ ensure
119
+ Process.wait @pid rescue nil
120
+ end
121
+
122
+
123
+ end
124
+
125
+
126
+ module TestWorker2
127
+ PORT = 8092
128
+ def self.start
129
+ @pid = Process.spawn Gem.ruby, File.expand_path("../test_worker_node2.rb", __FILE__)
130
+
131
+ unless @pid
132
+ STDERR.print "ERROR: Couldn't start test worker node 2"
133
+ exit 1
134
+ end
135
+ end
136
+
137
+ def self.wait_until_ready
138
+ STDERR.print "Waiting for test worker node 2 to start up..."
139
+
140
+ socket = nil
141
+ 30.times do
142
+ begin
143
+ socket = TCPSocket.open("127.0.0.1", PORT)
144
+ break if socket
145
+ rescue Errno::ECONNREFUSED
146
+ STDERR.print "."
147
+ sleep 1
148
+ end
149
+ end
150
+
151
+ if socket
152
+ STDERR.puts " done!"
153
+ socket.close
154
+ else
155
+ STDERR.puts " FAILED!"
156
+ raise "couldn't connect to test node!"
157
+ end
158
+ end
159
+
160
+ def self.stop
161
+ unless @pid
162
+ STDERR.print "ERROR: Test worker node 2 was never started!"
163
+ exit 1
164
+ end
165
+ Process.kill 9, @pid
166
+ rescue Errno::ESRCH
167
+ ensure
168
+ Process.wait @pid rescue nil
169
+ end
170
+
171
+
172
+ end
173
+
174
+
175
+ RSpec.configure do |config|
176
+ config.before(:suite) do
177
+ DCell.setup
178
+ DCell.run!
179
+ end
180
+
181
+ # config.before(:all) do
182
+
183
+ # TestCoordinator.start
184
+ # TestCoordinator.wait_until_ready
185
+ # TestWorker1.start
186
+ # TestWorker1.wait_until_ready
187
+ # TestWorker2.start
188
+ # TestWorker2.wait_until_ready
189
+ # end
190
+
191
+ # config.after(:all) do
192
+ # TestWorker1.stop
193
+ # TestWorker2.stop
194
+ # TestCoordinator.stop
195
+ # end
196
+ end