elaine 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ module Elaine
2
+ class Worker
3
+ attr_reader :vertices, :active
4
+
5
+ def initialize(graph = [])
6
+ raise 'empty worker graph' if graph.empty?
7
+ @vertices = graph
8
+ @active = graph.size
9
+ end
10
+
11
+ def superstep
12
+ Thread.new do
13
+ @vertices.each do |v|
14
+ v.messages = PostOffice.instance.read(v.id)
15
+ v.active! if v.messages.size > 0
16
+ end
17
+
18
+ active = @vertices.select {|v| v.active?}
19
+ active.each {|v| v.step}
20
+
21
+ @active = active.select {|v| v.active?}.size
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,95 @@
1
+ require 'helper'
2
+
3
+ describe Coordinator do
4
+ it 'should not allow empty graphs' do
5
+ lambda { Coordinator.new([]) }.should raise_error
6
+ end
7
+
8
+ let(:graph) do
9
+ [
10
+ AddVertex.new(:igvita, 1, :wikipedia),
11
+ AddVertex.new(:wikipedia, 2, :google),
12
+ AddVertex.new(:google, 1, :wikipedia)
13
+ ]
14
+ end
15
+
16
+ it 'should partition graphs with variable worker sizes' do
17
+ c = Coordinator.new(graph)
18
+ c.workers.size.should == 1
19
+
20
+ c = Coordinator.new(graph, partitions: 2)
21
+ c.workers.size.should == 2
22
+ end
23
+
24
+ it 'should schedule workers to run until there are no active vertices' do
25
+ c = Coordinator.new(graph)
26
+ c.run
27
+
28
+ c.workers.each do |w|
29
+ w.vertices.each do |v|
30
+ v.value.should == 5
31
+ end
32
+ end
33
+ end
34
+
35
+ context 'PageRank' do
36
+ class PageRankVertex < Vertex
37
+ def compute
38
+ if superstep >= 1
39
+ sum = messages.inject(0) {|total,msg| total += msg; total }
40
+ @value = (0.15 / 3) + 0.85 * sum
41
+ end
42
+
43
+ if superstep < 30
44
+ deliver_to_all_neighbors(@value / neighbors.size)
45
+ else
46
+ halt
47
+ end
48
+ end
49
+ end
50
+
51
+
52
+ it 'should calculate PageRank of a circular graph' do
53
+ graph = [
54
+ # name value out-edges
55
+ PageRankVertex.new(:igvita, 1, :wikipedia),
56
+ PageRankVertex.new(:wikipedia, 1, :google),
57
+ PageRankVertex.new(:google, 1, :igvita)
58
+ ]
59
+
60
+ c = Coordinator.new(graph)
61
+ c.run
62
+
63
+ c.workers.each do |w|
64
+ w.vertices.each do |v|
65
+ (v.value * 100).to_i.should == 33
66
+ end
67
+ end
68
+ end
69
+
70
+ it 'should calculate PageRank of arbitrary graph' do
71
+ graph = [
72
+ # page 1 -> page 1, page 2 (0.18)
73
+ # page 2 -> page 1, page 3 (0.13)
74
+ # page 3 -> page 3 (0.69)
75
+
76
+ # name value out-edges
77
+ PageRankVertex.new(:igvita, 1, :igvita, :wikipedia),
78
+ PageRankVertex.new(:wikipedia, 1, :igvita, :google),
79
+ PageRankVertex.new(:google, 1, :google)
80
+ ]
81
+
82
+ c = Coordinator.new(graph)
83
+ c.run
84
+
85
+ c.workers.each do |w|
86
+ (w.vertices.find {|v| v.id == :igvita }.value * 100).ceil.to_i.should == 19
87
+ (w.vertices.find {|v| v.id == :wikipedia }.value * 100).ceil.to_i.should == 13
88
+ (w.vertices.find {|v| v.id == :google }.value * 100).to_i.should == 68
89
+ end
90
+ end
91
+ end
92
+
93
+ it 'should parition nodes by hashing the node id'
94
+ it 'should allow scheduling multiple partitions to a single worker'
95
+ end
@@ -0,0 +1,143 @@
1
+ require 'distributed_helper'
2
+
3
+ describe Elaine::Distributed::Coordinator do
4
+ # it 'should not allow empty graphs' do
5
+ # lambda { Coordinator.new([]) }.should raise_error
6
+ # end
7
+
8
+ before(:each) do
9
+
10
+ TestCoordinator.start
11
+ TestCoordinator.wait_until_ready
12
+ TestWorker1.start
13
+ TestWorker1.wait_until_ready
14
+ TestWorker2.start
15
+ TestWorker2.wait_until_ready
16
+ end
17
+
18
+ after(:each) do
19
+ TestWorker1.stop
20
+ TestWorker2.stop
21
+ TestCoordinator.stop
22
+ end
23
+
24
+ let(:graph) do
25
+ [
26
+ {
27
+ klazz: DistributedAddVertex,
28
+ id: :igvita,
29
+ value: 1,
30
+ outedges: [:wikipedia]
31
+ },
32
+ {
33
+ klazz: DistributedAddVertex,
34
+ id: :wikipedia,
35
+ value: 2,
36
+ outedges: [:google]
37
+ },
38
+ {
39
+ klazz: DistributedAddVertex,
40
+ id: :google,
41
+ value: 1,
42
+ outedges: [:wikipedia]
43
+ }
44
+ ]
45
+ end
46
+
47
+
48
+
49
+
50
+ it "should schedule workers to run intil there are no active vertices" do
51
+
52
+ DCell::Node["test.elaine.coordinator"][:coordinator].graph = graph
53
+ DCell::Node["test.elaine.coordinator"][:coordinator].partition
54
+ DCell::Node["test.elaine.coordinator"][:coordinator].run_job
55
+ values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
56
+ values.each do |v|
57
+ v[:value].should == 5
58
+ end
59
+
60
+ end
61
+
62
+ it "should calculate PageRank of a circular graph" do
63
+ g =[
64
+ {
65
+ klazz: DistributedPageRankVertex,
66
+ id: :igvita,
67
+ value: 1,
68
+ outedges: [:wikipedia]
69
+ },
70
+ {
71
+ klazz: DistributedPageRankVertex,
72
+ id: :wikipedia,
73
+ value: 1,
74
+ outedges: [:google]
75
+ },
76
+ {
77
+ klazz: DistributedPageRankVertex,
78
+ id: :google,
79
+ value: 1,
80
+ outedges: [:igvita]
81
+ }
82
+ ]
83
+
84
+ DCell::Node["test.elaine.coordinator"][:coordinator].graph = g
85
+ DCell::Node["test.elaine.coordinator"][:coordinator].partition
86
+ DCell::Node["test.elaine.coordinator"][:coordinator].run_job
87
+
88
+ values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
89
+ values.each do |v|
90
+ (v[:value] * 100).to_i.should == 33
91
+ end
92
+ end
93
+
94
+
95
+ it "should calculate PageRank of an arbitrary graph" do
96
+ g = [
97
+ # page 1 -> page 1, page 2 (0.18)
98
+ # page 2 -> page 1, page 3 (0.13)
99
+ # page 3 -> page 3 (0.69)
100
+
101
+ # name value out-edges
102
+ {
103
+ klazz: DistributedPageRankVertex,
104
+ id: :igvita,
105
+ value: 1,
106
+ outedges: [:igvita, :wikipedia]
107
+ },
108
+ {
109
+ klazz: DistributedPageRankVertex,
110
+ id: :wikipedia,
111
+ value: 1,
112
+ outedges: [:igvita, :google]
113
+ },
114
+ {
115
+ klazz: DistributedPageRankVertex,
116
+ id: :google,
117
+ value: 1,
118
+ outedges: [:google]
119
+ }
120
+ ]
121
+ DCell::Node["test.elaine.coordinator"][:coordinator].graph = g
122
+ DCell::Node["test.elaine.coordinator"][:coordinator].partition
123
+ DCell::Node["test.elaine.coordinator"][:coordinator].run_job
124
+
125
+ vertex_values = DCell::Node["test.elaine.coordinator"][:coordinator].vertex_values
126
+ vertex_values.each do |v|
127
+ if v[:id] == :igvita
128
+ (v[:value] * 100).ceil.to_i.should == 19
129
+ elsif v[:id] == :wikipedia
130
+ (v[:value] * 100).ceil.to_i.should == 13
131
+ elsif v[:id] == :google
132
+ (v[:value] * 100).to_i.should == 68
133
+ else
134
+ fail "Unexpected node id: #{v[:id]}"
135
+ end
136
+ end
137
+
138
+
139
+ end
140
+
141
+ it 'should parition nodes by hashing the node id'
142
+ it 'should allow scheduling multiple partitions to a single worker'
143
+ end
@@ -0,0 +1,196 @@
1
+ require 'elaine'
2
+ require 'elaine/distributed'
3
+ require 'dcell'
4
+
5
+ # DCell.start
6
+
7
+ class DistributedAddVertex < Elaine::Distributed::Vertex
8
+ def compute
9
+ @value += 1
10
+ halt if @value >= 5
11
+ end
12
+ end
13
+
14
+ class DistributedPageRankVertex < Elaine::Distributed::Vertex
15
+ def compute
16
+ if superstep >= 1
17
+ sum = messages.inject(0) {|total,msg| total += msg; total }
18
+ @value = (0.15 / 3) + 0.85 * sum
19
+ end
20
+
21
+ if superstep < 30
22
+ deliver_to_all_neighbors(@value / neighbors.size)
23
+ else
24
+ halt
25
+ end
26
+ end
27
+ end
28
+
29
+
30
+ module TestCoordinator
31
+ PORT = 8090
32
+ def self.start
33
+ @@pid = Process.spawn Gem.ruby, File.expand_path("../test_coordinator_node.rb", __FILE__)
34
+ puts "Coordinator pid: #{@@pid}"
35
+ unless @@pid
36
+ STDERR.print "ERROR: Couldn't start test coordinator node"
37
+ exit 1
38
+ end
39
+ end
40
+
41
+ def self.wait_until_ready
42
+ STDERR.print "Waiting for test coordinator node to start up..."
43
+
44
+ socket = nil
45
+ 30.times do
46
+ begin
47
+ socket = TCPSocket.open("127.0.0.1", PORT)
48
+ break if socket
49
+ rescue Errno::ECONNREFUSED
50
+ STDERR.print "."
51
+ sleep 1
52
+ end
53
+ end
54
+
55
+ if socket
56
+ STDERR.puts " done!"
57
+ socket.close
58
+ else
59
+ STDERR.puts " FAILED!"
60
+ raise "couldn't connect to test node!"
61
+ end
62
+ end
63
+
64
+ def self.stop
65
+ puts "@@pid: #{@@pid}"
66
+ unless @@pid
67
+ STDERR.print "ERROR: Test coordinator node was never started!"
68
+ exit 1
69
+ end
70
+ Process.kill 9, @@pid
71
+ rescue Errno::ESRCH
72
+ ensure
73
+ Process.wait @@pid rescue nil
74
+ end
75
+ end
76
+
77
+ module TestWorker1
78
+ PORT = 8091
79
+ def self.start
80
+ @pid = Process.spawn Gem.ruby, File.expand_path("../test_worker_node1.rb", __FILE__)
81
+
82
+ unless @pid
83
+ STDERR.print "ERROR: Couldn't start test worker node 1"
84
+ exit 1
85
+ end
86
+ end
87
+
88
+ def self.wait_until_ready
89
+ STDERR.print "Waiting for test worker node 1 to start up..."
90
+
91
+ socket = nil
92
+ 30.times do
93
+ begin
94
+ socket = TCPSocket.open("127.0.0.1", PORT)
95
+ break if socket
96
+ rescue Errno::ECONNREFUSED
97
+ STDERR.print "."
98
+ sleep 1
99
+ end
100
+ end
101
+
102
+ if socket
103
+ STDERR.puts " done!"
104
+ socket.close
105
+ else
106
+ STDERR.puts " FAILED!"
107
+ raise "couldn't connect to test node!"
108
+ end
109
+ end
110
+
111
+ def self.stop
112
+ unless @pid
113
+ STDERR.print "ERROR: Test worker node 1 was never started!"
114
+ exit 1
115
+ end
116
+ Process.kill 9, @pid
117
+ rescue Errno::ESRCH
118
+ ensure
119
+ Process.wait @pid rescue nil
120
+ end
121
+
122
+
123
+ end
124
+
125
+
126
+ module TestWorker2
127
+ PORT = 8092
128
+ def self.start
129
+ @pid = Process.spawn Gem.ruby, File.expand_path("../test_worker_node2.rb", __FILE__)
130
+
131
+ unless @pid
132
+ STDERR.print "ERROR: Couldn't start test worker node 2"
133
+ exit 1
134
+ end
135
+ end
136
+
137
+ def self.wait_until_ready
138
+ STDERR.print "Waiting for test worker node 2 to start up..."
139
+
140
+ socket = nil
141
+ 30.times do
142
+ begin
143
+ socket = TCPSocket.open("127.0.0.1", PORT)
144
+ break if socket
145
+ rescue Errno::ECONNREFUSED
146
+ STDERR.print "."
147
+ sleep 1
148
+ end
149
+ end
150
+
151
+ if socket
152
+ STDERR.puts " done!"
153
+ socket.close
154
+ else
155
+ STDERR.puts " FAILED!"
156
+ raise "couldn't connect to test node!"
157
+ end
158
+ end
159
+
160
+ def self.stop
161
+ unless @pid
162
+ STDERR.print "ERROR: Test worker node 2 was never started!"
163
+ exit 1
164
+ end
165
+ Process.kill 9, @pid
166
+ rescue Errno::ESRCH
167
+ ensure
168
+ Process.wait @pid rescue nil
169
+ end
170
+
171
+
172
+ end
173
+
174
+
175
+ RSpec.configure do |config|
176
+ config.before(:suite) do
177
+ DCell.setup
178
+ DCell.run!
179
+ end
180
+
181
+ # config.before(:all) do
182
+
183
+ # TestCoordinator.start
184
+ # TestCoordinator.wait_until_ready
185
+ # TestWorker1.start
186
+ # TestWorker1.wait_until_ready
187
+ # TestWorker2.start
188
+ # TestWorker2.wait_until_ready
189
+ # end
190
+
191
+ # config.after(:all) do
192
+ # TestWorker1.stop
193
+ # TestWorker2.stop
194
+ # TestCoordinator.stop
195
+ # end
196
+ end