documentcloud-cloud-crowd 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +16 -16
- data/cloud-crowd.gemspec +4 -3
- data/config/config.example.yml +17 -12
- data/lib/cloud-crowd.rb +42 -24
- data/lib/cloud_crowd/action.rb +6 -4
- data/lib/cloud_crowd/asset_store.rb +7 -7
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +15 -9
- data/lib/cloud_crowd/asset_store/s3_store.rb +10 -11
- data/lib/cloud_crowd/command_line.rb +12 -7
- data/lib/cloud_crowd/exceptions.rb +7 -4
- data/lib/cloud_crowd/helpers/authorization.rb +3 -1
- data/lib/cloud_crowd/models/job.rb +19 -21
- data/lib/cloud_crowd/models/node_record.rb +24 -10
- data/lib/cloud_crowd/models/work_unit.rb +39 -25
- data/lib/cloud_crowd/node.rb +24 -6
- data/lib/cloud_crowd/schema.rb +3 -2
- data/lib/cloud_crowd/server.rb +9 -4
- data/lib/cloud_crowd/worker.rb +33 -48
- data/public/css/admin_console.css +17 -7
- data/public/images/server_busy.png +0 -0
- data/public/js/admin_console.js +3 -1
- data/test/config/config.yml +1 -1
- data/test/unit/test_action.rb +1 -1
- data/test/unit/test_job.rb +2 -0
- data/views/{index.erb → operations_center.erb} +5 -5
- metadata +4 -3
data/lib/cloud_crowd/worker.rb
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
module CloudCrowd
|
|
2
2
|
|
|
3
|
-
# The Worker,
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
# actions directory). If communication with the central server is
|
|
7
|
-
# the
|
|
8
|
-
# Worker::RETRY_WAIT seconds. Any exceptions that take place during
|
|
3
|
+
# The Worker, forked off from the Node when a new WorkUnit is received,
|
|
4
|
+
# launches an Action for processing. Workers will only ever receive WorkUnits
|
|
5
|
+
# that they are able to handle (for which they have a corresponding action in
|
|
6
|
+
# their actions directory). If communication with the central server is
|
|
7
|
+
# interrupted, the Worker will repeatedly attempt to complete its unit --
|
|
8
|
+
# every Worker::RETRY_WAIT seconds. Any exceptions that take place during
|
|
9
9
|
# the course of the Action will cause the Worker to mark the WorkUnit as
|
|
10
|
-
# having failed.
|
|
10
|
+
# having failed. When finished, the Worker's process exits, minimizing the
|
|
11
|
+
# potential for memory leaks.
|
|
11
12
|
class Worker
|
|
12
13
|
|
|
13
14
|
# Wait five seconds to retry, after internal communcication errors.
|
|
@@ -15,40 +16,28 @@ module CloudCrowd
|
|
|
15
16
|
|
|
16
17
|
attr_reader :action
|
|
17
18
|
|
|
18
|
-
#
|
|
19
|
-
# connection to S3. This AssetStore gets passed into each action, for use
|
|
20
|
-
# as it is run.
|
|
19
|
+
# A new Worker begins processing its WorkUnit straight off.
|
|
21
20
|
def initialize(node, work_unit)
|
|
22
|
-
Signal.trap('INT') { shut_down }
|
|
23
|
-
Signal.trap('KILL') { shut_down }
|
|
24
|
-
Signal.trap('TERM') { shut_down }
|
|
25
21
|
@pid = $$
|
|
26
22
|
@node = node
|
|
23
|
+
trap_signals
|
|
27
24
|
setup_work_unit(work_unit)
|
|
28
25
|
run
|
|
29
26
|
end
|
|
30
27
|
|
|
31
|
-
#
|
|
32
|
-
# def fetch_work_unit
|
|
33
|
-
# keep_trying_to "fetch a new work unit" do
|
|
34
|
-
# unit_json = @server['/work'].post(base_params)
|
|
35
|
-
# setup_work_unit(unit_json)
|
|
36
|
-
# end
|
|
37
|
-
# end
|
|
38
|
-
|
|
39
|
-
# Return output to the central server, marking the current work unit as done.
|
|
28
|
+
# Return output to the central server, marking the WorkUnit done.
|
|
40
29
|
def complete_work_unit(result)
|
|
41
30
|
keep_trying_to "complete work unit" do
|
|
42
|
-
data =
|
|
31
|
+
data = base_params.merge({:status => 'succeeded', :output => result})
|
|
43
32
|
@node.server["/work/#{data[:id]}"].put(data)
|
|
44
33
|
log "finished #{display_work_unit} in #{data[:time]} seconds"
|
|
45
34
|
end
|
|
46
35
|
end
|
|
47
36
|
|
|
48
|
-
# Mark the
|
|
37
|
+
# Mark the WorkUnit failed, returning the exception to central.
|
|
49
38
|
def fail_work_unit(exception)
|
|
50
39
|
keep_trying_to "mark work unit as failed" do
|
|
51
|
-
data =
|
|
40
|
+
data = base_params.merge({:status => 'failed', :output => {'output' => exception.message}.to_json})
|
|
52
41
|
@node.server["/work/#{data[:id]}"].put(data)
|
|
53
42
|
log "failed #{display_work_unit} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
|
|
54
43
|
end
|
|
@@ -69,12 +58,14 @@ module CloudCrowd
|
|
|
69
58
|
end
|
|
70
59
|
end
|
|
71
60
|
|
|
72
|
-
# Loggable
|
|
61
|
+
# Loggable details describing what the Worker is up to.
|
|
73
62
|
def display_work_unit
|
|
74
63
|
"unit ##{@options['work_unit_id']} (#{@action_name}/#{CloudCrowd.display_status(@status)})"
|
|
75
64
|
end
|
|
76
65
|
|
|
77
|
-
# Executes the
|
|
66
|
+
# Executes the WorkUnit by running the Action, catching all exceptions as
|
|
67
|
+
# failures. We capture the thread so that we can kill it from the outside,
|
|
68
|
+
# when exiting.
|
|
78
69
|
def run_work_unit
|
|
79
70
|
@worker_thread = Thread.new do
|
|
80
71
|
begin
|
|
@@ -91,12 +82,14 @@ module CloudCrowd
|
|
|
91
82
|
complete_work_unit({'output' => result}.to_json)
|
|
92
83
|
rescue Exception => e
|
|
93
84
|
fail_work_unit(e)
|
|
85
|
+
ensure
|
|
86
|
+
@action.cleanup_work_directory
|
|
94
87
|
end
|
|
95
88
|
end
|
|
96
89
|
@worker_thread.join
|
|
97
90
|
end
|
|
98
91
|
|
|
99
|
-
# Wraps
|
|
92
|
+
# Wraps run_work_unit to benchmark the execution time, if requested.
|
|
100
93
|
def run
|
|
101
94
|
return run_work_unit unless @options['benchmark']
|
|
102
95
|
status = CloudCrowd.display_status(@status)
|
|
@@ -106,23 +99,15 @@ module CloudCrowd
|
|
|
106
99
|
|
|
107
100
|
private
|
|
108
101
|
|
|
109
|
-
# Common parameters to send back to central.
|
|
110
|
-
def base_params
|
|
111
|
-
@base_params ||= {
|
|
112
|
-
:pid => @pid
|
|
113
|
-
}
|
|
114
|
-
end
|
|
115
|
-
|
|
116
102
|
# Common parameters to send back to central upon unit completion,
|
|
117
103
|
# regardless of success or failure.
|
|
118
|
-
def
|
|
119
|
-
|
|
120
|
-
:id
|
|
121
|
-
:time
|
|
122
|
-
})
|
|
104
|
+
def base_params
|
|
105
|
+
{ :pid => @pid,
|
|
106
|
+
:id => @options['work_unit_id'],
|
|
107
|
+
:time => Time.now - @start_time }
|
|
123
108
|
end
|
|
124
109
|
|
|
125
|
-
# Extract
|
|
110
|
+
# Extract the Worker's instance variables from a WorkUnit's JSON.
|
|
126
111
|
def setup_work_unit(unit)
|
|
127
112
|
return false unless unit
|
|
128
113
|
@start_time = Time.now
|
|
@@ -139,15 +124,15 @@ module CloudCrowd
|
|
|
139
124
|
puts "Worker ##{@pid}: #{message}" unless ENV['RACK_ENV'] == 'test'
|
|
140
125
|
end
|
|
141
126
|
|
|
142
|
-
# When
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
127
|
+
# When signaled to exit, make sure that the Worker shuts down cleanly.
|
|
128
|
+
def trap_signals
|
|
129
|
+
Signal.trap('INT') { shut_down }
|
|
130
|
+
Signal.trap('KILL') { shut_down }
|
|
131
|
+
Signal.trap('TERM') { shut_down }
|
|
147
132
|
end
|
|
148
133
|
|
|
149
|
-
# Force the
|
|
150
|
-
# If it had checked
|
|
134
|
+
# Force the Worker to quit, even if it's in the middle of processing.
|
|
135
|
+
# If it had a checked-out WorkUnit, the Node should have released it on
|
|
151
136
|
# the central server already.
|
|
152
137
|
def shut_down
|
|
153
138
|
if @worker_thread
|
|
@@ -158,23 +158,33 @@ body {
|
|
|
158
158
|
border: 1px solid transparent;
|
|
159
159
|
margin: 1px 7px;
|
|
160
160
|
padding-left: 18px;
|
|
161
|
+
background-position: left center;
|
|
162
|
+
background-repeat: no-repeat;
|
|
161
163
|
}
|
|
162
164
|
#nodes .node {
|
|
163
165
|
font-size: 11px;
|
|
164
166
|
line-height: 22px;
|
|
165
|
-
background: url(/images/server.png)
|
|
167
|
+
background-image: url(/images/server.png);
|
|
166
168
|
}
|
|
169
|
+
#nodes .node.busy {
|
|
170
|
+
background-image: url(/images/server_busy.png);
|
|
171
|
+
}
|
|
172
|
+
#nodes .node.busy span.busy {
|
|
173
|
+
font-size: 9px;
|
|
174
|
+
color: #7f7f7f;
|
|
175
|
+
text-transform: uppercase;
|
|
176
|
+
}
|
|
167
177
|
#nodes .worker {
|
|
168
178
|
font-size: 10px;
|
|
169
179
|
line-height: 18px;
|
|
170
180
|
cursor: pointer;
|
|
171
|
-
background: url(/images/bullet_green.png)
|
|
172
|
-
}
|
|
173
|
-
#nodes .worker:hover {
|
|
174
|
-
border: 1px solid #aaa;
|
|
175
|
-
border-radius: 4px; -moz-border-radius: 4px; -webkit-border-radius: 4px;
|
|
176
|
-
background-color: #ccc;
|
|
181
|
+
background-image: url(/images/bullet_green.png);
|
|
177
182
|
}
|
|
183
|
+
#nodes .worker:hover {
|
|
184
|
+
border: 1px solid #aaa;
|
|
185
|
+
border-radius: 4px; -moz-border-radius: 4px; -webkit-border-radius: 4px;
|
|
186
|
+
background-color: #ccc;
|
|
187
|
+
}
|
|
178
188
|
|
|
179
189
|
#worker_info {
|
|
180
190
|
position: absolute;
|
|
Binary file
|
data/public/js/admin_console.js
CHANGED
|
@@ -117,6 +117,7 @@ window.Console = {
|
|
|
117
117
|
},
|
|
118
118
|
|
|
119
119
|
// Re-render all workers from scratch each time.
|
|
120
|
+
// This method is desperately in need of Javascript templates...
|
|
120
121
|
renderNodes : function() {
|
|
121
122
|
var header = $('#sidebar_header');
|
|
122
123
|
var nc = this._nodes.length, wc = this._workerCount;
|
|
@@ -124,7 +125,8 @@ window.Console = {
|
|
|
124
125
|
header.toggleClass('no_nodes', this._nodes.length <= 0);
|
|
125
126
|
$('#nodes').html($.map(this._nodes, function(node) {
|
|
126
127
|
var html = "";
|
|
127
|
-
|
|
128
|
+
var extra = node.status == 'busy' ? ' <span class="busy">[busy]</span>' : '';
|
|
129
|
+
html += '<div class="node ' + node.status + '">' + node.host + extra + '</div>';
|
|
128
130
|
html += $.map(node.workers, function(pid) {
|
|
129
131
|
var name = pid + '@' + node.host;
|
|
130
132
|
return '<div class="worker" rel="' + name + '">' + name + '</div>';
|
data/test/config/config.yml
CHANGED
data/test/unit/test_action.rb
CHANGED
|
@@ -27,7 +27,7 @@ class ActionTest < Test::Unit::TestCase
|
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
should "be able to save (to the filesystem while testing)" do
|
|
30
|
-
assert @action.save(@action.input_path) == "file://#{
|
|
30
|
+
assert @action.save(@action.input_path) == "file://#{@store.local_storage_path}/word_count/job_1/unit_1/test_action.rb"
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
should "be able to clean up after itself" do
|
data/test/unit/test_job.rb
CHANGED
|
@@ -61,6 +61,8 @@ class JobTest < Test::Unit::TestCase
|
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
should "fire a callback when a job has finished, successfully or not" do
|
|
64
|
+
@job.update_attribute(:callback_url, 'http://example.com/callback')
|
|
65
|
+
CloudCrowd::Job.any_instance.stubs(:fire_callback).returns(true)
|
|
64
66
|
CloudCrowd::Job.any_instance.expects(:fire_callback)
|
|
65
67
|
@job.work_units.first.finish('{"output":"output"}', 10)
|
|
66
68
|
assert @job.all_work_units_complete?
|
|
@@ -55,11 +55,11 @@
|
|
|
55
55
|
</div>
|
|
56
56
|
<div class="graph_container">
|
|
57
57
|
<div class="graph_title">
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
58
|
+
Active Nodes
|
|
59
|
+
(<span id="nodes_legend" class="legend_box"></span>)
|
|
60
|
+
and Workers
|
|
61
|
+
(<span id="workers_legend" class="legend_box"></span>)
|
|
62
|
+
</div>
|
|
63
63
|
<div id="workers_graph" class="graph"></div>
|
|
64
64
|
</div>
|
|
65
65
|
</div>
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: documentcloud-cloud-crowd
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jeremy Ashkenas
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-09-
|
|
12
|
+
date: 2009-09-17 00:00:00 -07:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
@@ -171,6 +171,7 @@ files:
|
|
|
171
171
|
- public/images/logo.png
|
|
172
172
|
- public/images/queue_fill.png
|
|
173
173
|
- public/images/server.png
|
|
174
|
+
- public/images/server_busy.png
|
|
174
175
|
- public/images/server_error.png
|
|
175
176
|
- public/images/sidebar_bottom.png
|
|
176
177
|
- public/images/sidebar_top.png
|
|
@@ -194,7 +195,7 @@ files:
|
|
|
194
195
|
- test/unit/test_configuration.rb
|
|
195
196
|
- test/unit/test_job.rb
|
|
196
197
|
- test/unit/test_work_unit.rb
|
|
197
|
-
- views/
|
|
198
|
+
- views/operations_center.erb
|
|
198
199
|
has_rdoc: true
|
|
199
200
|
homepage: http://wiki.github.com/documentcloud/cloud-crowd
|
|
200
201
|
licenses:
|