cloud-crowd 0.3.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/cloud-crowd.gemspec +6 -6
- data/config/config.example.yml +23 -10
- data/lib/cloud-crowd.rb +4 -4
- data/lib/cloud_crowd/action.rb +24 -23
- data/lib/cloud_crowd/asset_store.rb +3 -1
- data/lib/cloud_crowd/asset_store/cloudfiles_store.rb +41 -0
- data/lib/cloud_crowd/asset_store/s3_store.rb +9 -7
- data/lib/cloud_crowd/models/node_record.rb +27 -26
- data/lib/cloud_crowd/models/work_unit.rb +35 -28
- data/lib/cloud_crowd/node.rb +43 -43
- data/lib/cloud_crowd/schema.rb +7 -7
- data/lib/cloud_crowd/server.rb +35 -30
- data/public/css/admin_console.css +25 -62
- data/public/js/admin_console.js +53 -70
- data/test/acceptance/test_server.rb +14 -16
- data/test/unit/test_action.rb +17 -15
- data/views/operations_center.erb +26 -13
- metadata +94 -59
@@ -6,36 +6,36 @@ module CloudCrowd
|
|
6
6
|
# are each run as a single WorkUnit.
|
7
7
|
class WorkUnit < ActiveRecord::Base
|
8
8
|
include ModelStatus
|
9
|
-
|
9
|
+
|
10
10
|
# We use a random number in (0...MAX_RESERVATION) to reserve work units.
|
11
11
|
# The size of the maximum signed integer in MySQL -- SQLite has no limit.
|
12
12
|
MAX_RESERVATION = 2147483647
|
13
|
-
|
13
|
+
|
14
14
|
# We only reserve a certain number of WorkUnits in a single go, to avoid
|
15
15
|
# reserving the entire table.
|
16
16
|
RESERVATION_LIMIT = 25
|
17
|
-
|
17
|
+
|
18
18
|
belongs_to :job
|
19
19
|
belongs_to :node_record
|
20
|
-
|
20
|
+
|
21
21
|
validates_presence_of :job_id, :status, :input, :action
|
22
|
-
|
22
|
+
|
23
23
|
# Available WorkUnits are waiting to be distributed to Nodes for processing.
|
24
24
|
named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
|
25
25
|
# Reserved WorkUnits have been marked for distribution by a central server process.
|
26
|
-
named_scope :reserved, lambda {|reservation|
|
26
|
+
named_scope :reserved, lambda {|reservation|
|
27
27
|
{:conditions => {:reservation => reservation}, :order => 'updated_at asc'}
|
28
28
|
}
|
29
|
-
|
29
|
+
|
30
30
|
# Attempt to send a list of WorkUnits to nodes with available capacity.
|
31
31
|
# A single central server process stops the same WorkUnit from being
|
32
32
|
# distributed to multiple nodes by reserving it first. The algorithm used
|
33
33
|
# should be lock-free.
|
34
34
|
#
|
35
35
|
# We reserve WorkUnits for this process in chunks of RESERVATION_LIMIT size,
|
36
|
-
# and try to match them to Nodes that are capable of handling the Action.
|
37
|
-
# WorkUnits get removed from the availability list when they are
|
38
|
-
# successfully sent, and Nodes get removed when they are busy or have the
|
36
|
+
# and try to match them to Nodes that are capable of handling the Action.
|
37
|
+
# WorkUnits get removed from the availability list when they are
|
38
|
+
# successfully sent, and Nodes get removed when they are busy or have the
|
39
39
|
# action in question disabled.
|
40
40
|
def self.distribute_to_nodes
|
41
41
|
reservation = nil
|
@@ -44,11 +44,13 @@ module CloudCrowd
|
|
44
44
|
work_units = WorkUnit.reserved(reservation)
|
45
45
|
available_nodes = NodeRecord.available
|
46
46
|
while node = available_nodes.shift and unit = work_units.shift do
|
47
|
-
if node.actions.include?
|
47
|
+
if node.actions.include?(unit.action)
|
48
48
|
if node.send_work_unit(unit)
|
49
49
|
available_nodes.push(node) unless node.busy?
|
50
50
|
next
|
51
51
|
end
|
52
|
+
else
|
53
|
+
unit.cancel_reservation
|
52
54
|
end
|
53
55
|
work_units.push(unit)
|
54
56
|
end
|
@@ -57,26 +59,26 @@ module CloudCrowd
|
|
57
59
|
ensure
|
58
60
|
WorkUnit.cancel_reservations(reservation) if reservation
|
59
61
|
end
|
60
|
-
|
61
|
-
# Reserves all available WorkUnits for this process. Returns false if there
|
62
|
+
|
63
|
+
# Reserves all available WorkUnits for this process. Returns false if there
|
62
64
|
# were none available.
|
63
65
|
def self.reserve_available(options={})
|
64
66
|
reservation = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
|
65
67
|
any = WorkUnit.available.update_all("reservation = #{reservation}", nil, options) > 0
|
66
68
|
any && reservation
|
67
69
|
end
|
68
|
-
|
70
|
+
|
69
71
|
# Cancels all outstanding WorkUnit reservations for this process.
|
70
72
|
def self.cancel_reservations(reservation)
|
71
73
|
WorkUnit.reserved(reservation).update_all('reservation = null')
|
72
74
|
end
|
73
|
-
|
75
|
+
|
74
76
|
# Cancels all outstanding WorkUnit reservations for all processes. (Useful
|
75
77
|
# in the console for debugging.)
|
76
78
|
def self.cancel_all_reservations
|
77
79
|
WorkUnit.update_all('reservation = null')
|
78
80
|
end
|
79
|
-
|
81
|
+
|
80
82
|
# Look up a WorkUnit by the worker that's currently processing it. Specified
|
81
83
|
# by <tt>pid@host</tt>.
|
82
84
|
def self.find_by_worker_name(name)
|
@@ -84,16 +86,16 @@ module CloudCrowd
|
|
84
86
|
node = NodeRecord.find_by_host(host)
|
85
87
|
node && node.work_units.find_by_worker_pid(pid)
|
86
88
|
end
|
87
|
-
|
89
|
+
|
88
90
|
# Convenience method for starting a new WorkUnit.
|
89
91
|
def self.start(job, action, input, status)
|
90
92
|
input = input.to_json unless input.is_a? String
|
91
93
|
self.create(:job => job, :action => action, :input => input, :status => status)
|
92
94
|
end
|
93
|
-
|
95
|
+
|
94
96
|
# Mark this unit as having finished successfully.
|
95
|
-
# Splitting work units are handled differently (an optimization) -- they
|
96
|
-
# immediately fire off all of their resulting WorkUnits for processing,
|
97
|
+
# Splitting work units are handled differently (an optimization) -- they
|
98
|
+
# immediately fire off all of their resulting WorkUnits for processing,
|
97
99
|
# without waiting for the rest of their splitting cousins to complete.
|
98
100
|
def finish(result, time_taken)
|
99
101
|
if splitting?
|
@@ -114,7 +116,7 @@ module CloudCrowd
|
|
114
116
|
job && job.check_for_completion
|
115
117
|
end
|
116
118
|
end
|
117
|
-
|
119
|
+
|
118
120
|
# Mark this unit as having failed. May attempt a retry.
|
119
121
|
def fail(output, time_taken)
|
120
122
|
tries = self.attempts + 1
|
@@ -129,7 +131,7 @@ module CloudCrowd
|
|
129
131
|
})
|
130
132
|
job && job.check_for_completion
|
131
133
|
end
|
132
|
-
|
134
|
+
|
133
135
|
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
134
136
|
def try_again
|
135
137
|
update_attributes({
|
@@ -138,20 +140,25 @@ module CloudCrowd
|
|
138
140
|
:attempts => self.attempts + 1
|
139
141
|
})
|
140
142
|
end
|
141
|
-
|
143
|
+
|
144
|
+
# If the node can't process the unit, cancel it's reservation.
|
145
|
+
def cancel_reservation
|
146
|
+
update_attributes!(:reservation => nil)
|
147
|
+
end
|
148
|
+
|
142
149
|
# When a Node checks out a WorkUnit, establish the connection between
|
143
150
|
# WorkUnit and NodeRecord and record the worker_pid.
|
144
151
|
def assign_to(node_record, worker_pid)
|
145
152
|
update_attributes!(:node_record => node_record, :worker_pid => worker_pid)
|
146
153
|
end
|
147
|
-
|
148
|
-
# All output needs to be wrapped in a JSON object for consistency
|
149
|
-
# (unfortunately, JSON.parse needs the top-level to be an object or array).
|
154
|
+
|
155
|
+
# All output needs to be wrapped in a JSON object for consistency
|
156
|
+
# (unfortunately, JSON.parse needs the top-level to be an object or array).
|
150
157
|
# Convenience method to provide the parsed version.
|
151
158
|
def parsed_output(out = self.output)
|
152
159
|
JSON.parse(out)['output']
|
153
160
|
end
|
154
|
-
|
161
|
+
|
155
162
|
# The JSON representation of a WorkUnit shares the Job's options with all
|
156
163
|
# its cousin WorkUnits.
|
157
164
|
def to_json
|
@@ -165,6 +172,6 @@ module CloudCrowd
|
|
165
172
|
'status' => self.status
|
166
173
|
}.to_json
|
167
174
|
end
|
168
|
-
|
175
|
+
|
169
176
|
end
|
170
177
|
end
|
data/lib/cloud_crowd/node.rb
CHANGED
@@ -1,57 +1,57 @@
|
|
1
1
|
module CloudCrowd
|
2
|
-
|
2
|
+
|
3
3
|
# A Node is a Sinatra/Thin application that runs a single instance per-machine
|
4
|
-
# It registers with the central server, receives WorkUnits, and forks off
|
4
|
+
# It registers with the central server, receives WorkUnits, and forks off
|
5
5
|
# Workers to process them. The actions are:
|
6
6
|
#
|
7
7
|
# [get /heartbeat] Returns 200 OK to let monitoring tools know the server's up.
|
8
8
|
# [post /work] The central server hits <tt>/work</tt> to dispatch a WorkUnit to this Node.
|
9
|
-
class Node < Sinatra::
|
10
|
-
|
9
|
+
class Node < Sinatra::Base
|
10
|
+
|
11
11
|
# A Node's default port. You only run a single node per machine, so they
|
12
12
|
# can all use the same port without any problems.
|
13
13
|
DEFAULT_PORT = 9063
|
14
|
-
|
15
|
-
# A list of regex scrapers, which let us extract the one-minute load
|
14
|
+
|
15
|
+
# A list of regex scrapers, which let us extract the one-minute load
|
16
16
|
# average and the amount of free memory on different flavors of UNIX.
|
17
|
-
|
17
|
+
|
18
18
|
SCRAPE_UPTIME = /\d+\.\d+/
|
19
19
|
SCRAPE_LINUX_MEMORY = /MemFree:\s+(\d+) kB/
|
20
|
-
SCRAPE_MAC_MEMORY = /Pages free:\s+(\d+)./
|
20
|
+
SCRAPE_MAC_MEMORY = /Pages free:\s+(\d+)./
|
21
21
|
SCRAPE_MAC_PAGE = /page size of (\d+) bytes/
|
22
|
-
|
22
|
+
|
23
23
|
# The interval at which the node monitors the machine's load and memory use
|
24
24
|
# (if configured to do so in config.yml).
|
25
25
|
MONITOR_INTERVAL = 3
|
26
|
-
|
26
|
+
|
27
27
|
# The interval at which the node regularly checks in with central (5 min).
|
28
28
|
CHECK_IN_INTERVAL = 300
|
29
|
-
|
29
|
+
|
30
30
|
# The response sent back when this node is overloaded.
|
31
31
|
OVERLOADED_MESSAGE = 'Node Overloaded'
|
32
|
-
|
32
|
+
|
33
33
|
attr_reader :enabled_actions, :host, :port, :central
|
34
|
-
|
34
|
+
|
35
35
|
set :root, ROOT
|
36
36
|
set :authorization_realm, "CloudCrowd"
|
37
|
-
|
37
|
+
|
38
38
|
helpers Helpers
|
39
|
-
|
39
|
+
|
40
40
|
# methodoverride allows the _method param.
|
41
41
|
enable :methodoverride
|
42
|
-
|
42
|
+
|
43
43
|
# Enabling HTTP Authentication turns it on for all requests.
|
44
44
|
# This works the same way as in the central CloudCrowd::Server.
|
45
45
|
before do
|
46
46
|
login_required if CloudCrowd.config[:http_authentication]
|
47
47
|
end
|
48
|
-
|
49
|
-
# To monitor a Node with Monit, God, Nagios, or another tool, you can hit
|
48
|
+
|
49
|
+
# To monitor a Node with Monit, God, Nagios, or another tool, you can hit
|
50
50
|
# /heartbeat to make sure its still online.
|
51
51
|
get '/heartbeat' do
|
52
52
|
"buh-bump"
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
# Posts a WorkUnit to this Node. Forks a Worker and returns the process id.
|
56
56
|
# Returns a 503 if this Node is overloaded.
|
57
57
|
post '/work' do
|
@@ -61,14 +61,14 @@ module CloudCrowd
|
|
61
61
|
Process.detach(pid)
|
62
62
|
json :pid => pid
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
# When creating a node, specify the port it should run on.
|
66
66
|
def initialize(port=nil, daemon=false)
|
67
67
|
require 'json'
|
68
68
|
CloudCrowd.identity = :node
|
69
69
|
@central = CloudCrowd.central_server
|
70
70
|
@host = Socket.gethostname
|
71
|
-
@enabled_actions = CloudCrowd.actions.keys
|
71
|
+
@enabled_actions = CloudCrowd.actions.keys - (CloudCrowd.config[:disabled_actions] || [])
|
72
72
|
@port = port || DEFAULT_PORT
|
73
73
|
@daemon = daemon
|
74
74
|
@overloaded = false
|
@@ -76,7 +76,7 @@ module CloudCrowd
|
|
76
76
|
@min_memory = CloudCrowd.config[:min_free_memory]
|
77
77
|
start unless test?
|
78
78
|
end
|
79
|
-
|
79
|
+
|
80
80
|
# Starting up a Node registers with the central server and begins to listen
|
81
81
|
# for incoming WorkUnits.
|
82
82
|
def start
|
@@ -94,9 +94,9 @@ module CloudCrowd
|
|
94
94
|
monitor_system if @max_load || @min_memory
|
95
95
|
@server_thread.join
|
96
96
|
end
|
97
|
-
|
98
|
-
# Checking in with the central server informs it of the location and
|
99
|
-
# configuration of this Node. If it can't check-in, there's no point in
|
97
|
+
|
98
|
+
# Checking in with the central server informs it of the location and
|
99
|
+
# configuration of this Node. If it can't check-in, there's no point in
|
100
100
|
# starting.
|
101
101
|
def check_in(critical=false)
|
102
102
|
@central["/node/#{@host}"].put(
|
@@ -109,31 +109,31 @@ module CloudCrowd
|
|
109
109
|
puts "Failed to connect to the central server (#{@central.to_s})."
|
110
110
|
raise SystemExit if critical
|
111
111
|
end
|
112
|
-
|
112
|
+
|
113
113
|
# Before exiting, the Node checks out with the central server, releasing all
|
114
114
|
# of its WorkUnits for other Nodes to handle
|
115
115
|
def check_out
|
116
116
|
@central["/node/#{@host}"].delete
|
117
117
|
end
|
118
|
-
|
118
|
+
|
119
119
|
# Lazy-initialize the asset_store, preferably after the Node has launched.
|
120
120
|
def asset_store
|
121
121
|
@asset_store ||= AssetStore.new
|
122
122
|
end
|
123
|
-
|
124
|
-
# Is the node overloaded? If configured, checks if the load average is
|
123
|
+
|
124
|
+
# Is the node overloaded? If configured, checks if the load average is
|
125
125
|
# greater than 'max_load', or if the available RAM is less than
|
126
126
|
# 'min_free_memory'.
|
127
127
|
def overloaded?
|
128
128
|
(@max_load && load_average > @max_load) ||
|
129
129
|
(@min_memory && free_memory < @min_memory)
|
130
130
|
end
|
131
|
-
|
131
|
+
|
132
132
|
# The current one-minute load average.
|
133
133
|
def load_average
|
134
134
|
`uptime`.match(SCRAPE_UPTIME).to_s.to_f
|
135
135
|
end
|
136
|
-
|
136
|
+
|
137
137
|
# The current amount of free memory in megabytes.
|
138
138
|
def free_memory
|
139
139
|
case RUBY_PLATFORM
|
@@ -147,12 +147,12 @@ module CloudCrowd
|
|
147
147
|
raise NotImplementedError, "'min_free_memory' is not yet implemented on your platform"
|
148
148
|
end
|
149
149
|
end
|
150
|
-
|
151
|
-
|
150
|
+
|
151
|
+
|
152
152
|
private
|
153
|
-
|
154
|
-
# Launch a monitoring thread that periodically checks the node's load
|
155
|
-
# average and the amount of free memory remaining. If we transition out of
|
153
|
+
|
154
|
+
# Launch a monitoring thread that periodically checks the node's load
|
155
|
+
# average and the amount of free memory remaining. If we transition out of
|
156
156
|
# the overloaded state, let central know.
|
157
157
|
def monitor_system
|
158
158
|
@monitor_thread = Thread.new do
|
@@ -164,9 +164,9 @@ module CloudCrowd
|
|
164
164
|
end
|
165
165
|
end
|
166
166
|
end
|
167
|
-
|
168
|
-
# If communication is interrupted for external reasons, the central server
|
169
|
-
# will assume that the node has gone down. Checking in will let central know
|
167
|
+
|
168
|
+
# If communication is interrupted for external reasons, the central server
|
169
|
+
# will assume that the node has gone down. Checking in will let central know
|
170
170
|
# it's still online.
|
171
171
|
def check_in_periodically
|
172
172
|
@check_in_thread = Thread.new do
|
@@ -176,7 +176,7 @@ module CloudCrowd
|
|
176
176
|
end
|
177
177
|
end
|
178
178
|
end
|
179
|
-
|
179
|
+
|
180
180
|
# Trap exit signals in order to shut down cleanly.
|
181
181
|
def trap_signals
|
182
182
|
Signal.trap('QUIT') { shut_down }
|
@@ -184,7 +184,7 @@ module CloudCrowd
|
|
184
184
|
Signal.trap('KILL') { shut_down }
|
185
185
|
Signal.trap('TERM') { shut_down }
|
186
186
|
end
|
187
|
-
|
187
|
+
|
188
188
|
# At shut down, de-register with the central server before exiting.
|
189
189
|
def shut_down
|
190
190
|
@check_in_thread.kill if @check_in_thread
|
@@ -193,7 +193,7 @@ module CloudCrowd
|
|
193
193
|
@server_thread.kill if @server_thread
|
194
194
|
Process.exit
|
195
195
|
end
|
196
|
-
|
196
|
+
|
197
197
|
end
|
198
|
-
|
198
|
+
|
199
199
|
end
|
data/lib/cloud_crowd/schema.rb
CHANGED
@@ -13,7 +13,7 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
|
|
13
13
|
t.datetime "created_at"
|
14
14
|
t.datetime "updated_at"
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
create_table "node_records", :force => true do |t|
|
18
18
|
t.string "host", :null => false
|
19
19
|
t.string "ip_address", :null => false
|
@@ -41,10 +41,10 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
|
|
41
41
|
end
|
42
42
|
|
43
43
|
# Here be indices. After looking, it seems faster not to have them at all.
|
44
|
-
#
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
44
|
+
#
|
45
|
+
add_index "jobs", ["status"], :name => "index_jobs_on_status"
|
46
|
+
add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
|
47
|
+
add_index "work_units", ["worker_pid"], :name => "index_work_units_on_worker_pid"
|
48
|
+
add_index "work_units", ["worker_pid", "status"], :name => "index_work_units_on_worker_pid_and_status"
|
49
|
+
add_index "work_units", ["worker_pid", "node_record_id"], :name => "index_work_units_on_worker_pid_and_node_record_id"
|
50
50
|
end
|
data/lib/cloud_crowd/server.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module CloudCrowd
|
2
|
-
|
2
|
+
|
3
3
|
# The main CloudCrowd (Sinatra) application. The actions are:
|
4
4
|
#
|
5
5
|
# == Admin
|
@@ -7,60 +7,65 @@ module CloudCrowd
|
|
7
7
|
# [get /status] Get the combined JSON of every active job and worker.
|
8
8
|
# [get /worker/:name] Look up the details of a WorkUnit that a Worker is busy processing.
|
9
9
|
# [get /heartbeat] Returns 200 OK to let monitoring tools know the server's up.
|
10
|
-
#
|
10
|
+
#
|
11
11
|
# == Public API
|
12
12
|
# [post /jobs] Begin a new Job. Post with a JSON representation of the job-to-be. (see examples).
|
13
13
|
# [get /jobs/:job_id] Check the status of a Job. Response includes output, if the Job has finished.
|
14
14
|
# [delete /jobs/:job_id] Clean up a Job when you're done downloading the results. Removes all intermediate files.
|
15
15
|
#
|
16
16
|
# == Internal Workers API
|
17
|
-
# [
|
17
|
+
# [put /node/:host] Registers a new Node, making it available for processing.
|
18
18
|
# [delete /node/:host] Removes a Node from the registry, freeing up any WorkUnits that it had checked out.
|
19
19
|
# [put /work/:unit_id] Mark a finished WorkUnit as completed or failed, with results.
|
20
|
-
class Server < Sinatra::
|
21
|
-
|
20
|
+
class Server < Sinatra::Base
|
21
|
+
|
22
22
|
set :root, ROOT
|
23
23
|
set :authorization_realm, "CloudCrowd"
|
24
|
-
|
24
|
+
|
25
25
|
helpers Helpers
|
26
|
-
|
26
|
+
|
27
27
|
# static serves files from /public, methodoverride allows the _method param.
|
28
28
|
enable :static, :methodoverride
|
29
|
-
|
29
|
+
|
30
30
|
# Enabling HTTP Authentication turns it on for all requests.
|
31
31
|
before do
|
32
32
|
login_required if CloudCrowd.config[:http_authentication]
|
33
33
|
end
|
34
|
-
|
34
|
+
|
35
35
|
# Render the admin console.
|
36
36
|
get '/' do
|
37
37
|
erb :operations_center
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
# Get the JSON for every active job in the queue and every active worker
|
41
41
|
# in the system. This action may get a little worrisome as the system grows
|
42
42
|
# larger -- keep it in mind.
|
43
43
|
get '/status' do
|
44
44
|
json(
|
45
|
-
'jobs' => Job.incomplete,
|
46
45
|
'nodes' => NodeRecord.all(:order => 'host desc'),
|
46
|
+
'job_count' => Job.incomplete.count,
|
47
47
|
'work_unit_count' => WorkUnit.incomplete.count
|
48
48
|
)
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
|
+
# Get the last 100 lines of log messages.
|
52
|
+
get '/log' do
|
53
|
+
`tail -n 100 #{CloudCrowd.log_path('server.log')}`
|
54
|
+
end
|
55
|
+
|
51
56
|
# Get the JSON for what a worker is up to.
|
52
57
|
get '/worker/:name' do
|
53
58
|
json WorkUnit.find_by_worker_name(params[:name]) || {}
|
54
59
|
end
|
55
|
-
|
56
|
-
# To monitor the central server with Monit, God, Nagios, or another
|
60
|
+
|
61
|
+
# To monitor the central server with Monit, God, Nagios, or another
|
57
62
|
# monitoring tool, you can hit /heartbeat to make sure.
|
58
63
|
get '/heartbeat' do
|
59
64
|
"buh-bump"
|
60
65
|
end
|
61
|
-
|
66
|
+
|
62
67
|
# PUBLIC API:
|
63
|
-
|
68
|
+
|
64
69
|
# Start a new job. Accepts a JSON representation of the job-to-be.
|
65
70
|
# Distributes all work units to available nodes.
|
66
71
|
post '/jobs' do
|
@@ -68,37 +73,37 @@ module CloudCrowd
|
|
68
73
|
WorkUnit.distribute_to_nodes
|
69
74
|
json job
|
70
75
|
end
|
71
|
-
|
76
|
+
|
72
77
|
# Check the status of a job, returning the output if finished, and the
|
73
|
-
# number of work units remaining otherwise.
|
78
|
+
# number of work units remaining otherwise.
|
74
79
|
get '/jobs/:job_id' do
|
75
80
|
json current_job
|
76
81
|
end
|
77
|
-
|
78
|
-
# Cleans up a Job's saved S3 files. Delete a Job after you're done
|
82
|
+
|
83
|
+
# Cleans up a Job's saved S3 files. Delete a Job after you're done
|
79
84
|
# downloading the results.
|
80
85
|
delete '/jobs/:job_id' do
|
81
86
|
current_job.destroy
|
82
87
|
json nil
|
83
88
|
end
|
84
|
-
|
89
|
+
|
85
90
|
# INTERNAL NODE API:
|
86
|
-
|
87
|
-
# A new Node will this this action to register its location and
|
88
|
-
# configuration with the central server. Triggers distribution of WorkUnits.
|
91
|
+
|
92
|
+
# A new Node will this this action to register its location and
|
93
|
+
# configuration with the central server. Triggers distribution of WorkUnits.
|
89
94
|
put '/node/:host' do
|
90
95
|
NodeRecord.check_in(params, request)
|
91
96
|
WorkUnit.distribute_to_nodes
|
92
97
|
json nil
|
93
98
|
end
|
94
|
-
|
95
|
-
# Deregisters a Node from the central server. Releases and redistributes any
|
99
|
+
|
100
|
+
# Deregisters a Node from the central server. Releases and redistributes any
|
96
101
|
# WorkUnits it may have had checked out.
|
97
102
|
delete '/node/:host' do
|
98
103
|
NodeRecord.destroy_all(:host => params[:host])
|
99
104
|
json nil
|
100
105
|
end
|
101
|
-
|
106
|
+
|
102
107
|
# When workers are done with their unit, either successfully on in failure,
|
103
108
|
# they mark it back on the central server and exit. Triggers distribution
|
104
109
|
# of pending work units.
|
@@ -111,13 +116,13 @@ module CloudCrowd
|
|
111
116
|
WorkUnit.distribute_to_nodes
|
112
117
|
json nil
|
113
118
|
end
|
114
|
-
|
119
|
+
|
115
120
|
# At initialization record the identity of this Ruby instance as a server.
|
116
121
|
def initialize(*args)
|
117
122
|
super(*args)
|
118
123
|
CloudCrowd.identity = :server
|
119
124
|
end
|
120
|
-
|
125
|
+
|
121
126
|
end
|
122
|
-
|
127
|
+
|
123
128
|
end
|