elaine 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.ruby-version +1 -0
- data/Gemfile +3 -0
- data/README.md +90 -0
- data/Rakefile +3 -0
- data/autotest/discover.rb +1 -0
- data/elaine.gemspec +29 -0
- data/examples/.gitignore +1 -0
- data/examples/erdos-renyi-N_1000-E_0.2.egonets +1000 -0
- data/lib/elaine.rb +30 -0
- data/lib/elaine/coordinator.rb +33 -0
- data/lib/elaine/distributed.rb +11 -0
- data/lib/elaine/distributed/coordinator.rb +134 -0
- data/lib/elaine/distributed/post_office.rb +80 -0
- data/lib/elaine/distributed/vertex.rb +52 -0
- data/lib/elaine/distributed/worker.rb +85 -0
- data/lib/elaine/version.rb +3 -0
- data/lib/elaine/vertex.rb +41 -0
- data/lib/elaine/worker.rb +25 -0
- data/spec/coordinator_spec.rb +95 -0
- data/spec/distributed_coordinator_spec.rb +143 -0
- data/spec/distributed_helper.rb +196 -0
- data/spec/distributed_page_rank_vertex.rb +23 -0
- data/spec/distributed_triad_census_vertex.rb +41 -0
- data/spec/helper.rb +10 -0
- data/spec/test_add_vertex.rb +7 -0
- data/spec/test_coordinator_node.rb +12 -0
- data/spec/test_worker_node1.rb +12 -0
- data/spec/test_worker_node2.rb +12 -0
- data/spec/vertex_spec.rb +56 -0
- data/spec/worker_spec.rb +61 -0
- metadata +173 -0
data/lib/elaine.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'elaine/vertex'
|
2
|
+
require 'elaine/worker'
|
3
|
+
require 'elaine/coordinator'
|
4
|
+
|
5
|
+
require 'singleton'
|
6
|
+
|
7
|
+
class PostOffice
|
8
|
+
include Singleton
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@mailboxes = Hash.new
|
12
|
+
@mutex = Mutex.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def deliver(to, msg)
|
16
|
+
@mutex.synchronize do
|
17
|
+
if @mailboxes[to]
|
18
|
+
@mailboxes[to].push msg
|
19
|
+
else
|
20
|
+
@mailboxes[to] = [msg]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def read(box)
|
26
|
+
@mutex.synchronize do
|
27
|
+
@mailboxes.delete(box) || []
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Elaine
|
2
|
+
class Coordinator
|
3
|
+
attr_reader :workers
|
4
|
+
|
5
|
+
def initialize(graph, options = {})
|
6
|
+
raise "empty graph" if graph.empty?
|
7
|
+
|
8
|
+
@workers = []
|
9
|
+
@options = {
|
10
|
+
:partitions => 1
|
11
|
+
}.merge(options)
|
12
|
+
|
13
|
+
partition(graph) do |subgraph|
|
14
|
+
@workers << Worker.new(subgraph)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def partition(graph)
|
19
|
+
size = (graph.size.to_f / @options[:partitions]).ceil
|
20
|
+
graph.each_slice(size) { |slice| yield slice }
|
21
|
+
end
|
22
|
+
|
23
|
+
def run
|
24
|
+
loop do
|
25
|
+
# execute a superstep and wait for workers to complete
|
26
|
+
step = @workers.select {|w| w.active > 0}.collect {|w| w.superstep }
|
27
|
+
step.each {|t| t.join}
|
28
|
+
|
29
|
+
break if @workers.select {|w| w.active > 0}.size.zero?
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'dcell'
|
2
|
+
require 'elaine/distributed/coordinator'
|
3
|
+
require 'elaine/distributed/worker'
|
4
|
+
require 'elaine/distributed/post_office'
|
5
|
+
require 'elaine/distributed/vertex'
|
6
|
+
|
7
|
+
module Elaine
|
8
|
+
module Distributed
|
9
|
+
# ... code goes here ...
|
10
|
+
end # module Distributed
|
11
|
+
end # module Elaine
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# require 'dnssd'
|
2
|
+
# require 'celluloid/io'
|
3
|
+
require 'dcell'
|
4
|
+
|
5
|
+
module Elaine
|
6
|
+
module Distributed
|
7
|
+
class Coordinator
|
8
|
+
include Celluloid
|
9
|
+
include Celluloid::Logger
|
10
|
+
# finalizer :shutdown
|
11
|
+
|
12
|
+
attr_reader :workers
|
13
|
+
attr_reader :partitions
|
14
|
+
attr_reader :num_partitions
|
15
|
+
|
16
|
+
def initialize(graph: nil, num_partitions: 1, stop_condition: Celluloid::Condition.new)
|
17
|
+
@workers = []
|
18
|
+
@num_partitions = num_partitions
|
19
|
+
@graph = graph
|
20
|
+
info "GOT GRAPH: #{graph}"
|
21
|
+
@partitions = Hash.new
|
22
|
+
@stop_condition = stop_condition
|
23
|
+
end
|
24
|
+
|
25
|
+
def graph=(g)
|
26
|
+
debug "Setting graph"
|
27
|
+
@graph = g
|
28
|
+
debug "done setting graph"
|
29
|
+
end
|
30
|
+
|
31
|
+
def zipcodes
|
32
|
+
zips = {}
|
33
|
+
@partitions.each_pair do |zip, vertices|
|
34
|
+
vertices.each do |vertex|
|
35
|
+
zips[vertex[:id]] = zip
|
36
|
+
end
|
37
|
+
end
|
38
|
+
zips
|
39
|
+
end
|
40
|
+
|
41
|
+
def partition
|
42
|
+
# not sure if we should re-initialize or not
|
43
|
+
@partitions = Hash.new
|
44
|
+
|
45
|
+
size = (@graph.size.to_f / workers.size).ceil
|
46
|
+
|
47
|
+
@graph.each_slice(size).with_index do |slice, index|
|
48
|
+
@partitions[@workers[index]] = slice
|
49
|
+
end
|
50
|
+
|
51
|
+
@partitions
|
52
|
+
end
|
53
|
+
|
54
|
+
def register_worker(worker_node)
|
55
|
+
# we could, in theory, have multiple workers in the same node, however
|
56
|
+
# i think it makes more sense to just have multiple nodes running on the
|
57
|
+
# same machine instead of multiple workers in a single node
|
58
|
+
# This should be re-evaluated at some point in the future.
|
59
|
+
info "Registering worker: #{worker_node}"
|
60
|
+
unless @workers.include? worker_node
|
61
|
+
@workers << worker_node
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def run_until_finished
|
66
|
+
# zipcodes = {}
|
67
|
+
debug "partitioning"
|
68
|
+
partition
|
69
|
+
# debug "Partitions: #{@partitions}"
|
70
|
+
|
71
|
+
# distribute the zipcodes
|
72
|
+
debug "building zipcodes"
|
73
|
+
zips = zipcodes
|
74
|
+
debug "distributing zipcodes"
|
75
|
+
@workers.each do |worker_node|
|
76
|
+
DCell::Node[worker_node][:postoffice].zipcodes = zips
|
77
|
+
end
|
78
|
+
|
79
|
+
# now send the graph
|
80
|
+
debug "distributing graph"
|
81
|
+
@partitions.each_pair do |worker_node, vertices|
|
82
|
+
DCell::Node[worker_node][:worker].init_graph vertices
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
debug "Running job"
|
87
|
+
step_num = 0
|
88
|
+
loop do
|
89
|
+
step_num += 1
|
90
|
+
# execute a superstep and wait for workers to complete
|
91
|
+
debug "Initializing superstep #{step_num}"
|
92
|
+
step = @workers.select do |w|
|
93
|
+
DCell::Node[w][:worker].active > 0
|
94
|
+
end.map {|w| DCell::Node[w][:worker].future(:init_superstep)}
|
95
|
+
step.map { |f| f.value }
|
96
|
+
|
97
|
+
debug "Running superstep #{step_num}"
|
98
|
+
step = @workers.select do |w|
|
99
|
+
DCell::Node[w][:worker].active > 0
|
100
|
+
end.map {|w| DCell::Node[w][:worker].future(:superstep)}
|
101
|
+
|
102
|
+
step.map { |f| f.value }
|
103
|
+
|
104
|
+
break if @workers.select { |w| DCell::Node[w][:worker].active > 0 }.size.zero?
|
105
|
+
end
|
106
|
+
debug "Job finished!"
|
107
|
+
end
|
108
|
+
|
109
|
+
def run_job
|
110
|
+
run_until_finished
|
111
|
+
end
|
112
|
+
|
113
|
+
def stop
|
114
|
+
@workers.each do |w|
|
115
|
+
DCell::Node[w][:worker].async.stop
|
116
|
+
end
|
117
|
+
@stop_condition.signal(true)
|
118
|
+
end
|
119
|
+
|
120
|
+
def run_and_stop
|
121
|
+
run_until_finished
|
122
|
+
@stop_condition.signal(true)
|
123
|
+
end
|
124
|
+
|
125
|
+
def vertex_values(&block)
|
126
|
+
@workers.map do |w|
|
127
|
+
worker_node = DCell::Node[w]
|
128
|
+
worker_node[:worker].vertex_values
|
129
|
+
end.flatten
|
130
|
+
end
|
131
|
+
|
132
|
+
end # class Coordinator
|
133
|
+
end # module Distributed
|
134
|
+
end # module Elaine
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'dcell'
|
2
|
+
|
3
|
+
module Elaine
|
4
|
+
module Distributed
|
5
|
+
class PostOffice
|
6
|
+
include Celluloid
|
7
|
+
include Celluloid::Logger
|
8
|
+
|
9
|
+
attr_reader :mailboxes
|
10
|
+
attr_reader :zipcodes
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@mailboxes = Hash.new
|
14
|
+
@zipcodes = Hash.new
|
15
|
+
end
|
16
|
+
|
17
|
+
def zipcodes=(zipcodes)
|
18
|
+
@zipcodes = zipcodes
|
19
|
+
|
20
|
+
# do we need to initialize all the mailboxes here?
|
21
|
+
# might be smart?
|
22
|
+
@mailboxes = Hash.new
|
23
|
+
my_id = DCell.me.id
|
24
|
+
@zipcodes.each_pair do |k, v|
|
25
|
+
if v == my_id
|
26
|
+
debug "Creating mailbox for: #{k}"
|
27
|
+
@mailboxes[k] = []
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def address(to)
|
34
|
+
node = DCell::Node[@zipcodes[to]]
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def deliver(to, msg)
|
39
|
+
|
40
|
+
node = address(to)
|
41
|
+
|
42
|
+
if node.id.eql?(DCell.me.id)
|
43
|
+
# debug "Delivering to mailbox: #{to}"
|
44
|
+
@mailboxes[to].push msg
|
45
|
+
# debug "Done delivering to mailbox: #{to}"
|
46
|
+
nil
|
47
|
+
else
|
48
|
+
# debug "Delivering message to remote mailbox: #{msg}"
|
49
|
+
node[:postoffice].async.deliver(to, msg)
|
50
|
+
# debug "Finished delivery remnote box: to #{node.id}"
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def read(mailbox)
|
56
|
+
node = address(mailbox)
|
57
|
+
if node.id.eql?(Dcell.me.id)
|
58
|
+
@mailboxes[mailbox]
|
59
|
+
else
|
60
|
+
node[:postoffice].read mailbox
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def read_all(mailbox)
|
65
|
+
node = address(mailbox)
|
66
|
+
# debug "node: #{node}"
|
67
|
+
# debug "node.id: '#{node.id}'"
|
68
|
+
# debug "DCell.me.id: '#{DCell.me.id}'"
|
69
|
+
if node.id.eql?(DCell.me.id)
|
70
|
+
msgs = @mailboxes[mailbox].map { |v| v }
|
71
|
+
@mailboxes[mailbox].clear
|
72
|
+
msgs
|
73
|
+
else
|
74
|
+
raise "Can't destructively read a non-local mailbox!"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end # class PostOffice
|
79
|
+
end # module Distributed
|
80
|
+
end # module Elaine
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'celluloid'
|
2
|
+
module Elaine
|
3
|
+
module Distributed
|
4
|
+
class Vertex
|
5
|
+
# include Celluloid
|
6
|
+
# include Celluloid::Logger
|
7
|
+
|
8
|
+
attr_reader :id
|
9
|
+
attr_accessor :value, :messages
|
10
|
+
|
11
|
+
def initialize(id, value, postoffice, outedges)
|
12
|
+
# Might be better to grab post_office dynamically with Celluloid::Actor ?
|
13
|
+
@id = id
|
14
|
+
@value = value
|
15
|
+
@outedges = outedges
|
16
|
+
@messages = []
|
17
|
+
@active = true
|
18
|
+
@superstep = 0
|
19
|
+
@postoffice = postoffice
|
20
|
+
end
|
21
|
+
|
22
|
+
def edges
|
23
|
+
block_given? ? @outedges.each {|e| yield e} : @outedges
|
24
|
+
end
|
25
|
+
|
26
|
+
def deliver_to_all_neighbors(msg)
|
27
|
+
edges.each {|e| deliver(e, msg)}
|
28
|
+
end
|
29
|
+
|
30
|
+
def deliver(to, msg)
|
31
|
+
@postoffice.async.deliver(to, msg)
|
32
|
+
end
|
33
|
+
|
34
|
+
def step
|
35
|
+
@superstep += 1
|
36
|
+
# debug "Running super step ##{@superstep}"
|
37
|
+
compute
|
38
|
+
end
|
39
|
+
|
40
|
+
def halt; @active = false; end
|
41
|
+
def active!; @active = true; end
|
42
|
+
def active?; @active; end
|
43
|
+
|
44
|
+
def superstep; @superstep; end
|
45
|
+
def neighbors; @outedges; end
|
46
|
+
|
47
|
+
def vote_to_stop; @active = false; end
|
48
|
+
|
49
|
+
def compute; end
|
50
|
+
end # class Vertex
|
51
|
+
end # module Distributed
|
52
|
+
end # module Elaine
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'dcell'
|
2
|
+
|
3
|
+
module Elaine
|
4
|
+
module Distributed
|
5
|
+
class Worker
|
6
|
+
include Celluloid
|
7
|
+
include Celluloid::Logger
|
8
|
+
|
9
|
+
attr_reader :vertices, :active, :vertices2
|
10
|
+
|
11
|
+
|
12
|
+
def initialize(coordinator_node: "elaine.coordinator", g: [], zipcodes: {}, stop_condition: Celluloid::Condition.new)
|
13
|
+
|
14
|
+
# @coordinator_node = DCell::Node["elaine.coordinator"]
|
15
|
+
@coordinator_node = coordinator_node
|
16
|
+
DCell::Node[@coordinator_node][:coordinator].register_worker DCell.me.id
|
17
|
+
|
18
|
+
@vertices = []
|
19
|
+
@superstep_num = 0
|
20
|
+
@stop_condition = stop_condition
|
21
|
+
end
|
22
|
+
|
23
|
+
def init_graph(g=[])
|
24
|
+
raise 'empty worker graph' if g.empty?
|
25
|
+
if @vertices.size > 0
|
26
|
+
@vertices.each do |v|
|
27
|
+
# Celluloid::Actor[v].terminate
|
28
|
+
end
|
29
|
+
end
|
30
|
+
@vertices = []
|
31
|
+
# raise "Graph already initialized!" if @vertices.size > 0
|
32
|
+
|
33
|
+
# we are going to assume that graphs come in as json documents
|
34
|
+
# *describing* the graph.
|
35
|
+
# @vertices = graph
|
36
|
+
# @active = graph.size
|
37
|
+
|
38
|
+
# HACK the local vertices should be dealt with differently than
|
39
|
+
# the @vertices2 member
|
40
|
+
@vertices2 = []
|
41
|
+
g.each do |n|
|
42
|
+
# n[:klazz].supervise_as n[:id], n[:id], n[:value], Celluloid::Actor[:postoffice], n[:outedges]
|
43
|
+
@vertices << n[:id]
|
44
|
+
v = n[:klazz].new n[:id], n[:value], Celluloid::Actor[:postoffice], n[:outedges]
|
45
|
+
@vertices2 << v
|
46
|
+
end
|
47
|
+
@active = @vertices.size
|
48
|
+
|
49
|
+
debug "There are #{@vertices.size} vertices in this worker."
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
# HACK this should be handled better...
|
54
|
+
def init_superstep
|
55
|
+
@vertices2.each do |v|
|
56
|
+
v.messages = Celluloid::Actor[:postoffice].read_all(v.id)
|
57
|
+
end
|
58
|
+
debug "#{DCell.me.id} finished init_superstep"
|
59
|
+
end
|
60
|
+
|
61
|
+
def pmap(enum, &block)
|
62
|
+
futures = enum.map { |elem| Celluloid::Future.new(elem, &block) }
|
63
|
+
futures.map { |future| future.value }
|
64
|
+
end
|
65
|
+
|
66
|
+
def stop
|
67
|
+
@stop_condition.signal(true)
|
68
|
+
end
|
69
|
+
|
70
|
+
def superstep
|
71
|
+
active = @vertices2.select {|v| v.active?}
|
72
|
+
|
73
|
+
|
74
|
+
pmap(active) do |v|
|
75
|
+
v.step
|
76
|
+
end
|
77
|
+
@active = active.select {|v| v.active?}.size
|
78
|
+
end
|
79
|
+
|
80
|
+
def vertex_values
|
81
|
+
@vertices2.map { |v| {id: v.id, value: v.value} }
|
82
|
+
end
|
83
|
+
end # class Worker
|
84
|
+
end # module Distributed
|
85
|
+
end # module Elaine
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Elaine
|
2
|
+
class Vertex
|
3
|
+
attr_reader :id
|
4
|
+
attr_accessor :value, :messages
|
5
|
+
|
6
|
+
def initialize(id, value, *outedges)
|
7
|
+
@id = id
|
8
|
+
@value = value
|
9
|
+
@outedges = outedges
|
10
|
+
@messages = []
|
11
|
+
@active = true
|
12
|
+
@superstep = 0
|
13
|
+
end
|
14
|
+
|
15
|
+
def edges
|
16
|
+
block_given? ? @outedges.each {|e| yield e} : @outedges
|
17
|
+
end
|
18
|
+
|
19
|
+
def deliver_to_all_neighbors(msg)
|
20
|
+
edges.each {|e| deliver(e, msg)}
|
21
|
+
end
|
22
|
+
|
23
|
+
def deliver(to, msg)
|
24
|
+
PostOffice.instance.deliver(to, msg)
|
25
|
+
end
|
26
|
+
|
27
|
+
def step
|
28
|
+
@superstep += 1
|
29
|
+
compute
|
30
|
+
end
|
31
|
+
|
32
|
+
def halt; @active = false; end
|
33
|
+
def active!; @active = true; end
|
34
|
+
def active?; @active; end
|
35
|
+
|
36
|
+
def superstep; @superstep; end
|
37
|
+
def neighbors; @outedges; end
|
38
|
+
|
39
|
+
def compute; end
|
40
|
+
end
|
41
|
+
end
|