elaine 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/elaine.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'elaine/vertex'
2
+ require 'elaine/worker'
3
+ require 'elaine/coordinator'
4
+
5
+ require 'singleton'
6
+
7
+ class PostOffice
8
+ include Singleton
9
+
10
+ def initialize
11
+ @mailboxes = Hash.new
12
+ @mutex = Mutex.new
13
+ end
14
+
15
+ def deliver(to, msg)
16
+ @mutex.synchronize do
17
+ if @mailboxes[to]
18
+ @mailboxes[to].push msg
19
+ else
20
+ @mailboxes[to] = [msg]
21
+ end
22
+ end
23
+ end
24
+
25
+ def read(box)
26
+ @mutex.synchronize do
27
+ @mailboxes.delete(box) || []
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,33 @@
1
+ module Elaine
2
+ class Coordinator
3
+ attr_reader :workers
4
+
5
+ def initialize(graph, options = {})
6
+ raise "empty graph" if graph.empty?
7
+
8
+ @workers = []
9
+ @options = {
10
+ :partitions => 1
11
+ }.merge(options)
12
+
13
+ partition(graph) do |subgraph|
14
+ @workers << Worker.new(subgraph)
15
+ end
16
+ end
17
+
18
+ def partition(graph)
19
+ size = (graph.size.to_f / @options[:partitions]).ceil
20
+ graph.each_slice(size) { |slice| yield slice }
21
+ end
22
+
23
+ def run
24
+ loop do
25
+ # execute a superstep and wait for workers to complete
26
+ step = @workers.select {|w| w.active > 0}.collect {|w| w.superstep }
27
+ step.each {|t| t.join}
28
+
29
+ break if @workers.select {|w| w.active > 0}.size.zero?
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,11 @@
1
+ require 'dcell'
2
+ require 'elaine/distributed/coordinator'
3
+ require 'elaine/distributed/worker'
4
+ require 'elaine/distributed/post_office'
5
+ require 'elaine/distributed/vertex'
6
+
7
+ module Elaine
8
+ module Distributed
9
+ # ... code goes here ...
10
+ end # module Distributed
11
+ end # module Elaine
@@ -0,0 +1,134 @@
1
+ # require 'dnssd'
2
+ # require 'celluloid/io'
3
+ require 'dcell'
4
+
5
+ module Elaine
6
+ module Distributed
7
+ class Coordinator
8
+ include Celluloid
9
+ include Celluloid::Logger
10
+ # finalizer :shutdown
11
+
12
+ attr_reader :workers
13
+ attr_reader :partitions
14
+ attr_reader :num_partitions
15
+
16
+ def initialize(graph: nil, num_partitions: 1, stop_condition: Celluloid::Condition.new)
17
+ @workers = []
18
+ @num_partitions = num_partitions
19
+ @graph = graph
20
+ info "GOT GRAPH: #{graph}"
21
+ @partitions = Hash.new
22
+ @stop_condition = stop_condition
23
+ end
24
+
25
+ def graph=(g)
26
+ debug "Setting graph"
27
+ @graph = g
28
+ debug "done setting graph"
29
+ end
30
+
31
+ def zipcodes
32
+ zips = {}
33
+ @partitions.each_pair do |zip, vertices|
34
+ vertices.each do |vertex|
35
+ zips[vertex[:id]] = zip
36
+ end
37
+ end
38
+ zips
39
+ end
40
+
41
+ def partition
42
+ # not sure if we should re-initialize or not
43
+ @partitions = Hash.new
44
+
45
+ size = (@graph.size.to_f / workers.size).ceil
46
+
47
+ @graph.each_slice(size).with_index do |slice, index|
48
+ @partitions[@workers[index]] = slice
49
+ end
50
+
51
+ @partitions
52
+ end
53
+
54
+ def register_worker(worker_node)
55
+ # we could, in theory, have multiple workers in the same node, however
56
+ # i think it makes more sense to just have multiple nodes running on the
57
+ # same machine instead of multiple workers in a single node
58
+ # This should be re-evaluated at some point in the future.
59
+ info "Registering worker: #{worker_node}"
60
+ unless @workers.include? worker_node
61
+ @workers << worker_node
62
+ end
63
+ end
64
+
65
+ def run_until_finished
66
+ # zipcodes = {}
67
+ debug "partitioning"
68
+ partition
69
+ # debug "Partitions: #{@partitions}"
70
+
71
+ # distribute the zipcodes
72
+ debug "building zipcodes"
73
+ zips = zipcodes
74
+ debug "distributing zipcodes"
75
+ @workers.each do |worker_node|
76
+ DCell::Node[worker_node][:postoffice].zipcodes = zips
77
+ end
78
+
79
+ # now send the graph
80
+ debug "distributing graph"
81
+ @partitions.each_pair do |worker_node, vertices|
82
+ DCell::Node[worker_node][:worker].init_graph vertices
83
+ end
84
+
85
+
86
+ debug "Running job"
87
+ step_num = 0
88
+ loop do
89
+ step_num += 1
90
+ # execute a superstep and wait for workers to complete
91
+ debug "Initializing superstep #{step_num}"
92
+ step = @workers.select do |w|
93
+ DCell::Node[w][:worker].active > 0
94
+ end.map {|w| DCell::Node[w][:worker].future(:init_superstep)}
95
+ step.map { |f| f.value }
96
+
97
+ debug "Running superstep #{step_num}"
98
+ step = @workers.select do |w|
99
+ DCell::Node[w][:worker].active > 0
100
+ end.map {|w| DCell::Node[w][:worker].future(:superstep)}
101
+
102
+ step.map { |f| f.value }
103
+
104
+ break if @workers.select { |w| DCell::Node[w][:worker].active > 0 }.size.zero?
105
+ end
106
+ debug "Job finished!"
107
+ end
108
+
109
+ def run_job
110
+ run_until_finished
111
+ end
112
+
113
+ def stop
114
+ @workers.each do |w|
115
+ DCell::Node[w][:worker].async.stop
116
+ end
117
+ @stop_condition.signal(true)
118
+ end
119
+
120
+ def run_and_stop
121
+ run_until_finished
122
+ @stop_condition.signal(true)
123
+ end
124
+
125
+ def vertex_values(&block)
126
+ @workers.map do |w|
127
+ worker_node = DCell::Node[w]
128
+ worker_node[:worker].vertex_values
129
+ end.flatten
130
+ end
131
+
132
+ end # class Coordinator
133
+ end # module Distributed
134
+ end # module Elaine
@@ -0,0 +1,80 @@
1
+ require 'dcell'
2
+
3
+ module Elaine
4
+ module Distributed
5
+ class PostOffice
6
+ include Celluloid
7
+ include Celluloid::Logger
8
+
9
+ attr_reader :mailboxes
10
+ attr_reader :zipcodes
11
+
12
+ def initialize
13
+ @mailboxes = Hash.new
14
+ @zipcodes = Hash.new
15
+ end
16
+
17
+ def zipcodes=(zipcodes)
18
+ @zipcodes = zipcodes
19
+
20
+ # do we need to initialize all the mailboxes here?
21
+ # might be smart?
22
+ @mailboxes = Hash.new
23
+ my_id = DCell.me.id
24
+ @zipcodes.each_pair do |k, v|
25
+ if v == my_id
26
+ debug "Creating mailbox for: #{k}"
27
+ @mailboxes[k] = []
28
+ end
29
+ end
30
+
31
+ end
32
+
33
+ def address(to)
34
+ node = DCell::Node[@zipcodes[to]]
35
+ end
36
+
37
+
38
+ def deliver(to, msg)
39
+
40
+ node = address(to)
41
+
42
+ if node.id.eql?(DCell.me.id)
43
+ # debug "Delivering to mailbox: #{to}"
44
+ @mailboxes[to].push msg
45
+ # debug "Done delivering to mailbox: #{to}"
46
+ nil
47
+ else
48
+ # debug "Delivering message to remote mailbox: #{msg}"
49
+ node[:postoffice].async.deliver(to, msg)
50
+ # debug "Finished delivery remnote box: to #{node.id}"
51
+ nil
52
+ end
53
+ end
54
+
55
+ def read(mailbox)
56
+ node = address(mailbox)
57
+ if node.id.eql?(Dcell.me.id)
58
+ @mailboxes[mailbox]
59
+ else
60
+ node[:postoffice].read mailbox
61
+ end
62
+ end
63
+
64
+ def read_all(mailbox)
65
+ node = address(mailbox)
66
+ # debug "node: #{node}"
67
+ # debug "node.id: '#{node.id}'"
68
+ # debug "DCell.me.id: '#{DCell.me.id}'"
69
+ if node.id.eql?(DCell.me.id)
70
+ msgs = @mailboxes[mailbox].map { |v| v }
71
+ @mailboxes[mailbox].clear
72
+ msgs
73
+ else
74
+ raise "Can't destructively read a non-local mailbox!"
75
+ end
76
+ end
77
+
78
+ end # class PostOffice
79
+ end # module Distributed
80
+ end # module Elaine
@@ -0,0 +1,52 @@
1
+ require 'celluloid'
2
+ module Elaine
3
+ module Distributed
4
+ class Vertex
5
+ # include Celluloid
6
+ # include Celluloid::Logger
7
+
8
+ attr_reader :id
9
+ attr_accessor :value, :messages
10
+
11
+ def initialize(id, value, postoffice, outedges)
12
+ # Might be better to grab post_office dynamically with Celluloid::Actor ?
13
+ @id = id
14
+ @value = value
15
+ @outedges = outedges
16
+ @messages = []
17
+ @active = true
18
+ @superstep = 0
19
+ @postoffice = postoffice
20
+ end
21
+
22
+ def edges
23
+ block_given? ? @outedges.each {|e| yield e} : @outedges
24
+ end
25
+
26
+ def deliver_to_all_neighbors(msg)
27
+ edges.each {|e| deliver(e, msg)}
28
+ end
29
+
30
+ def deliver(to, msg)
31
+ @postoffice.async.deliver(to, msg)
32
+ end
33
+
34
+ def step
35
+ @superstep += 1
36
+ # debug "Running super step ##{@superstep}"
37
+ compute
38
+ end
39
+
40
+ def halt; @active = false; end
41
+ def active!; @active = true; end
42
+ def active?; @active; end
43
+
44
+ def superstep; @superstep; end
45
+ def neighbors; @outedges; end
46
+
47
+ def vote_to_stop; @active = false; end
48
+
49
+ def compute; end
50
+ end # class Vertex
51
+ end # module Distributed
52
+ end # module Elaine
@@ -0,0 +1,85 @@
1
+ require 'dcell'
2
+
3
+ module Elaine
4
+ module Distributed
5
+ class Worker
6
+ include Celluloid
7
+ include Celluloid::Logger
8
+
9
+ attr_reader :vertices, :active, :vertices2
10
+
11
+
12
+ def initialize(coordinator_node: "elaine.coordinator", g: [], zipcodes: {}, stop_condition: Celluloid::Condition.new)
13
+
14
+ # @coordinator_node = DCell::Node["elaine.coordinator"]
15
+ @coordinator_node = coordinator_node
16
+ DCell::Node[@coordinator_node][:coordinator].register_worker DCell.me.id
17
+
18
+ @vertices = []
19
+ @superstep_num = 0
20
+ @stop_condition = stop_condition
21
+ end
22
+
23
+ def init_graph(g=[])
24
+ raise 'empty worker graph' if g.empty?
25
+ if @vertices.size > 0
26
+ @vertices.each do |v|
27
+ # Celluloid::Actor[v].terminate
28
+ end
29
+ end
30
+ @vertices = []
31
+ # raise "Graph already initialized!" if @vertices.size > 0
32
+
33
+ # we are going to assume that graphs come in as json documents
34
+ # *describing* the graph.
35
+ # @vertices = graph
36
+ # @active = graph.size
37
+
38
+ # HACK the local vertices should be dealt with differently than
39
+ # the @vertices2 member
40
+ @vertices2 = []
41
+ g.each do |n|
42
+ # n[:klazz].supervise_as n[:id], n[:id], n[:value], Celluloid::Actor[:postoffice], n[:outedges]
43
+ @vertices << n[:id]
44
+ v = n[:klazz].new n[:id], n[:value], Celluloid::Actor[:postoffice], n[:outedges]
45
+ @vertices2 << v
46
+ end
47
+ @active = @vertices.size
48
+
49
+ debug "There are #{@vertices.size} vertices in this worker."
50
+
51
+ end
52
+
53
+ # HACK this should be handled better...
54
+ def init_superstep
55
+ @vertices2.each do |v|
56
+ v.messages = Celluloid::Actor[:postoffice].read_all(v.id)
57
+ end
58
+ debug "#{DCell.me.id} finished init_superstep"
59
+ end
60
+
61
+ def pmap(enum, &block)
62
+ futures = enum.map { |elem| Celluloid::Future.new(elem, &block) }
63
+ futures.map { |future| future.value }
64
+ end
65
+
66
+ def stop
67
+ @stop_condition.signal(true)
68
+ end
69
+
70
+ def superstep
71
+ active = @vertices2.select {|v| v.active?}
72
+
73
+
74
+ pmap(active) do |v|
75
+ v.step
76
+ end
77
+ @active = active.select {|v| v.active?}.size
78
+ end
79
+
80
+ def vertex_values
81
+ @vertices2.map { |v| {id: v.id, value: v.value} }
82
+ end
83
+ end # class Worker
84
+ end # module Distributed
85
+ end # module Elaine
@@ -0,0 +1,3 @@
1
+ module Elaine
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,41 @@
1
+ module Elaine
2
+ class Vertex
3
+ attr_reader :id
4
+ attr_accessor :value, :messages
5
+
6
+ def initialize(id, value, *outedges)
7
+ @id = id
8
+ @value = value
9
+ @outedges = outedges
10
+ @messages = []
11
+ @active = true
12
+ @superstep = 0
13
+ end
14
+
15
+ def edges
16
+ block_given? ? @outedges.each {|e| yield e} : @outedges
17
+ end
18
+
19
+ def deliver_to_all_neighbors(msg)
20
+ edges.each {|e| deliver(e, msg)}
21
+ end
22
+
23
+ def deliver(to, msg)
24
+ PostOffice.instance.deliver(to, msg)
25
+ end
26
+
27
+ def step
28
+ @superstep += 1
29
+ compute
30
+ end
31
+
32
+ def halt; @active = false; end
33
+ def active!; @active = true; end
34
+ def active?; @active; end
35
+
36
+ def superstep; @superstep; end
37
+ def neighbors; @outedges; end
38
+
39
+ def compute; end
40
+ end
41
+ end