elaine 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/elaine.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'elaine/vertex'
2
+ require 'elaine/worker'
3
+ require 'elaine/coordinator'
4
+
5
+ require 'singleton'
6
+
7
+ class PostOffice
8
+ include Singleton
9
+
10
+ def initialize
11
+ @mailboxes = Hash.new
12
+ @mutex = Mutex.new
13
+ end
14
+
15
+ def deliver(to, msg)
16
+ @mutex.synchronize do
17
+ if @mailboxes[to]
18
+ @mailboxes[to].push msg
19
+ else
20
+ @mailboxes[to] = [msg]
21
+ end
22
+ end
23
+ end
24
+
25
+ def read(box)
26
+ @mutex.synchronize do
27
+ @mailboxes.delete(box) || []
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,33 @@
1
+ module Elaine
2
+ class Coordinator
3
+ attr_reader :workers
4
+
5
+ def initialize(graph, options = {})
6
+ raise "empty graph" if graph.empty?
7
+
8
+ @workers = []
9
+ @options = {
10
+ :partitions => 1
11
+ }.merge(options)
12
+
13
+ partition(graph) do |subgraph|
14
+ @workers << Worker.new(subgraph)
15
+ end
16
+ end
17
+
18
+ def partition(graph)
19
+ size = (graph.size.to_f / @options[:partitions]).ceil
20
+ graph.each_slice(size) { |slice| yield slice }
21
+ end
22
+
23
+ def run
24
+ loop do
25
+ # execute a superstep and wait for workers to complete
26
+ step = @workers.select {|w| w.active > 0}.collect {|w| w.superstep }
27
+ step.each {|t| t.join}
28
+
29
+ break if @workers.select {|w| w.active > 0}.size.zero?
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,11 @@
1
+ require 'dcell'
2
+ require 'elaine/distributed/coordinator'
3
+ require 'elaine/distributed/worker'
4
+ require 'elaine/distributed/post_office'
5
+ require 'elaine/distributed/vertex'
6
+
7
+ module Elaine
8
+ module Distributed
9
+ # ... code goes here ...
10
+ end # module Distributed
11
+ end # module Elaine
@@ -0,0 +1,134 @@
1
+ # require 'dnssd'
2
+ # require 'celluloid/io'
3
+ require 'dcell'
4
+
5
+ module Elaine
6
+ module Distributed
7
+ class Coordinator
8
+ include Celluloid
9
+ include Celluloid::Logger
10
+ # finalizer :shutdown
11
+
12
+ attr_reader :workers
13
+ attr_reader :partitions
14
+ attr_reader :num_partitions
15
+
16
+ def initialize(graph: nil, num_partitions: 1, stop_condition: Celluloid::Condition.new)
17
+ @workers = []
18
+ @num_partitions = num_partitions
19
+ @graph = graph
20
+ info "GOT GRAPH: #{graph}"
21
+ @partitions = Hash.new
22
+ @stop_condition = stop_condition
23
+ end
24
+
25
+ def graph=(g)
26
+ debug "Setting graph"
27
+ @graph = g
28
+ debug "done setting graph"
29
+ end
30
+
31
+ def zipcodes
32
+ zips = {}
33
+ @partitions.each_pair do |zip, vertices|
34
+ vertices.each do |vertex|
35
+ zips[vertex[:id]] = zip
36
+ end
37
+ end
38
+ zips
39
+ end
40
+
41
+ def partition
42
+ # not sure if we should re-initialize or not
43
+ @partitions = Hash.new
44
+
45
+ size = (@graph.size.to_f / workers.size).ceil
46
+
47
+ @graph.each_slice(size).with_index do |slice, index|
48
+ @partitions[@workers[index]] = slice
49
+ end
50
+
51
+ @partitions
52
+ end
53
+
54
+ def register_worker(worker_node)
55
+ # we could, in theory, have multiple workers in the same node, however
56
+ # i think it makes more sense to just have multiple nodes running on the
57
+ # same machine instead of multiple workers in a single node
58
+ # This should be re-evaluated at some point in the future.
59
+ info "Registering worker: #{worker_node}"
60
+ unless @workers.include? worker_node
61
+ @workers << worker_node
62
+ end
63
+ end
64
+
65
+ def run_until_finished
66
+ # zipcodes = {}
67
+ debug "partitioning"
68
+ partition
69
+ # debug "Partitions: #{@partitions}"
70
+
71
+ # distribute the zipcodes
72
+ debug "building zipcodes"
73
+ zips = zipcodes
74
+ debug "distributing zipcodes"
75
+ @workers.each do |worker_node|
76
+ DCell::Node[worker_node][:postoffice].zipcodes = zips
77
+ end
78
+
79
+ # now send the graph
80
+ debug "distributing graph"
81
+ @partitions.each_pair do |worker_node, vertices|
82
+ DCell::Node[worker_node][:worker].init_graph vertices
83
+ end
84
+
85
+
86
+ debug "Running job"
87
+ step_num = 0
88
+ loop do
89
+ step_num += 1
90
+ # execute a superstep and wait for workers to complete
91
+ debug "Initializing superstep #{step_num}"
92
+ step = @workers.select do |w|
93
+ DCell::Node[w][:worker].active > 0
94
+ end.map {|w| DCell::Node[w][:worker].future(:init_superstep)}
95
+ step.map { |f| f.value }
96
+
97
+ debug "Running superstep #{step_num}"
98
+ step = @workers.select do |w|
99
+ DCell::Node[w][:worker].active > 0
100
+ end.map {|w| DCell::Node[w][:worker].future(:superstep)}
101
+
102
+ step.map { |f| f.value }
103
+
104
+ break if @workers.select { |w| DCell::Node[w][:worker].active > 0 }.size.zero?
105
+ end
106
+ debug "Job finished!"
107
+ end
108
+
109
+ def run_job
110
+ run_until_finished
111
+ end
112
+
113
+ def stop
114
+ @workers.each do |w|
115
+ DCell::Node[w][:worker].async.stop
116
+ end
117
+ @stop_condition.signal(true)
118
+ end
119
+
120
+ def run_and_stop
121
+ run_until_finished
122
+ @stop_condition.signal(true)
123
+ end
124
+
125
+ def vertex_values(&block)
126
+ @workers.map do |w|
127
+ worker_node = DCell::Node[w]
128
+ worker_node[:worker].vertex_values
129
+ end.flatten
130
+ end
131
+
132
+ end # class Coordinator
133
+ end # module Distributed
134
+ end # module Elaine
@@ -0,0 +1,80 @@
1
+ require 'dcell'
2
+
3
+ module Elaine
4
+ module Distributed
5
+ class PostOffice
6
+ include Celluloid
7
+ include Celluloid::Logger
8
+
9
+ attr_reader :mailboxes
10
+ attr_reader :zipcodes
11
+
12
+ def initialize
13
+ @mailboxes = Hash.new
14
+ @zipcodes = Hash.new
15
+ end
16
+
17
+ def zipcodes=(zipcodes)
18
+ @zipcodes = zipcodes
19
+
20
+ # do we need to initialize all the mailboxes here?
21
+ # might be smart?
22
+ @mailboxes = Hash.new
23
+ my_id = DCell.me.id
24
+ @zipcodes.each_pair do |k, v|
25
+ if v == my_id
26
+ debug "Creating mailbox for: #{k}"
27
+ @mailboxes[k] = []
28
+ end
29
+ end
30
+
31
+ end
32
+
33
+ def address(to)
34
+ node = DCell::Node[@zipcodes[to]]
35
+ end
36
+
37
+
38
+ def deliver(to, msg)
39
+
40
+ node = address(to)
41
+
42
+ if node.id.eql?(DCell.me.id)
43
+ # debug "Delivering to mailbox: #{to}"
44
+ @mailboxes[to].push msg
45
+ # debug "Done delivering to mailbox: #{to}"
46
+ nil
47
+ else
48
+ # debug "Delivering message to remote mailbox: #{msg}"
49
+ node[:postoffice].async.deliver(to, msg)
50
+ # debug "Finished delivery remnote box: to #{node.id}"
51
+ nil
52
+ end
53
+ end
54
+
55
+ def read(mailbox)
56
+ node = address(mailbox)
57
+ if node.id.eql?(Dcell.me.id)
58
+ @mailboxes[mailbox]
59
+ else
60
+ node[:postoffice].read mailbox
61
+ end
62
+ end
63
+
64
+ def read_all(mailbox)
65
+ node = address(mailbox)
66
+ # debug "node: #{node}"
67
+ # debug "node.id: '#{node.id}'"
68
+ # debug "DCell.me.id: '#{DCell.me.id}'"
69
+ if node.id.eql?(DCell.me.id)
70
+ msgs = @mailboxes[mailbox].map { |v| v }
71
+ @mailboxes[mailbox].clear
72
+ msgs
73
+ else
74
+ raise "Can't destructively read a non-local mailbox!"
75
+ end
76
+ end
77
+
78
+ end # class PostOffice
79
+ end # module Distributed
80
+ end # module Elaine
@@ -0,0 +1,52 @@
1
+ require 'celluloid'
2
+ module Elaine
3
+ module Distributed
4
+ class Vertex
5
+ # include Celluloid
6
+ # include Celluloid::Logger
7
+
8
+ attr_reader :id
9
+ attr_accessor :value, :messages
10
+
11
+ def initialize(id, value, postoffice, outedges)
12
+ # Might be better to grab post_office dynamically with Celluloid::Actor ?
13
+ @id = id
14
+ @value = value
15
+ @outedges = outedges
16
+ @messages = []
17
+ @active = true
18
+ @superstep = 0
19
+ @postoffice = postoffice
20
+ end
21
+
22
+ def edges
23
+ block_given? ? @outedges.each {|e| yield e} : @outedges
24
+ end
25
+
26
+ def deliver_to_all_neighbors(msg)
27
+ edges.each {|e| deliver(e, msg)}
28
+ end
29
+
30
+ def deliver(to, msg)
31
+ @postoffice.async.deliver(to, msg)
32
+ end
33
+
34
+ def step
35
+ @superstep += 1
36
+ # debug "Running super step ##{@superstep}"
37
+ compute
38
+ end
39
+
40
+ def halt; @active = false; end
41
+ def active!; @active = true; end
42
+ def active?; @active; end
43
+
44
+ def superstep; @superstep; end
45
+ def neighbors; @outedges; end
46
+
47
+ def vote_to_stop; @active = false; end
48
+
49
+ def compute; end
50
+ end # class Vertex
51
+ end # module Distributed
52
+ end # module Elaine
@@ -0,0 +1,85 @@
1
+ require 'dcell'
2
+
3
+ module Elaine
4
+ module Distributed
5
+ class Worker
6
+ include Celluloid
7
+ include Celluloid::Logger
8
+
9
+ attr_reader :vertices, :active, :vertices2
10
+
11
+
12
+ def initialize(coordinator_node: "elaine.coordinator", g: [], zipcodes: {}, stop_condition: Celluloid::Condition.new)
13
+
14
+ # @coordinator_node = DCell::Node["elaine.coordinator"]
15
+ @coordinator_node = coordinator_node
16
+ DCell::Node[@coordinator_node][:coordinator].register_worker DCell.me.id
17
+
18
+ @vertices = []
19
+ @superstep_num = 0
20
+ @stop_condition = stop_condition
21
+ end
22
+
23
+ def init_graph(g=[])
24
+ raise 'empty worker graph' if g.empty?
25
+ if @vertices.size > 0
26
+ @vertices.each do |v|
27
+ # Celluloid::Actor[v].terminate
28
+ end
29
+ end
30
+ @vertices = []
31
+ # raise "Graph already initialized!" if @vertices.size > 0
32
+
33
+ # we are going to assume that graphs come in as json documents
34
+ # *describing* the graph.
35
+ # @vertices = graph
36
+ # @active = graph.size
37
+
38
+ # HACK the local vertices should be dealt with differently than
39
+ # the @vertices2 member
40
+ @vertices2 = []
41
+ g.each do |n|
42
+ # n[:klazz].supervise_as n[:id], n[:id], n[:value], Celluloid::Actor[:postoffice], n[:outedges]
43
+ @vertices << n[:id]
44
+ v = n[:klazz].new n[:id], n[:value], Celluloid::Actor[:postoffice], n[:outedges]
45
+ @vertices2 << v
46
+ end
47
+ @active = @vertices.size
48
+
49
+ debug "There are #{@vertices.size} vertices in this worker."
50
+
51
+ end
52
+
53
+ # HACK this should be handled better...
54
+ def init_superstep
55
+ @vertices2.each do |v|
56
+ v.messages = Celluloid::Actor[:postoffice].read_all(v.id)
57
+ end
58
+ debug "#{DCell.me.id} finished init_superstep"
59
+ end
60
+
61
+ def pmap(enum, &block)
62
+ futures = enum.map { |elem| Celluloid::Future.new(elem, &block) }
63
+ futures.map { |future| future.value }
64
+ end
65
+
66
+ def stop
67
+ @stop_condition.signal(true)
68
+ end
69
+
70
+ def superstep
71
+ active = @vertices2.select {|v| v.active?}
72
+
73
+
74
+ pmap(active) do |v|
75
+ v.step
76
+ end
77
+ @active = active.select {|v| v.active?}.size
78
+ end
79
+
80
+ def vertex_values
81
+ @vertices2.map { |v| {id: v.id, value: v.value} }
82
+ end
83
+ end # class Worker
84
+ end # module Distributed
85
+ end # module Elaine
@@ -0,0 +1,3 @@
1
+ module Elaine
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,41 @@
1
+ module Elaine
2
+ class Vertex
3
+ attr_reader :id
4
+ attr_accessor :value, :messages
5
+
6
+ def initialize(id, value, *outedges)
7
+ @id = id
8
+ @value = value
9
+ @outedges = outedges
10
+ @messages = []
11
+ @active = true
12
+ @superstep = 0
13
+ end
14
+
15
+ def edges
16
+ block_given? ? @outedges.each {|e| yield e} : @outedges
17
+ end
18
+
19
+ def deliver_to_all_neighbors(msg)
20
+ edges.each {|e| deliver(e, msg)}
21
+ end
22
+
23
+ def deliver(to, msg)
24
+ PostOffice.instance.deliver(to, msg)
25
+ end
26
+
27
+ def step
28
+ @superstep += 1
29
+ compute
30
+ end
31
+
32
+ def halt; @active = false; end
33
+ def active!; @active = true; end
34
+ def active?; @active; end
35
+
36
+ def superstep; @superstep; end
37
+ def neighbors; @outedges; end
38
+
39
+ def compute; end
40
+ end
41
+ end