cloud-crowd 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/cloud-crowd.gemspec +2 -2
- data/config/database.example.yml +1 -1
- data/lib/cloud-crowd.rb +1 -1
- data/lib/cloud_crowd/command_line.rb +18 -4
- data/lib/cloud_crowd/models/job.rb +13 -0
- data/lib/cloud_crowd/models/node_record.rb +1 -1
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/acceptance/test_server.rb +6 -6
- data/test/unit/test_job.rb +20 -8
- data/test/unit/test_work_unit.rb +3 -3
- metadata +2 -2
data/cloud-crowd.gemspec
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = 'cloud-crowd'
|
|
3
|
-
s.version = '0.2.
|
|
4
|
-
s.date = '2009-10-
|
|
3
|
+
s.version = '0.2.7' # Keep version in sync with cloud-cloud.rb
|
|
4
|
+
s.date = '2009-10-19'
|
|
5
5
|
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
|
7
7
|
s.summary = "Parallel Processing for the Rest of Us"
|
data/config/database.example.yml
CHANGED
data/lib/cloud-crowd.rb
CHANGED
|
@@ -44,7 +44,7 @@ module CloudCrowd
|
|
|
44
44
|
autoload :WorkUnit, 'cloud_crowd/models'
|
|
45
45
|
|
|
46
46
|
# Keep this version in sync with the gemspec.
|
|
47
|
-
VERSION = '0.2.
|
|
47
|
+
VERSION = '0.2.7'
|
|
48
48
|
|
|
49
49
|
# Increment the schema version when there's a backwards incompatible change.
|
|
50
50
|
SCHEMA_VERSION = 3
|
|
@@ -24,6 +24,7 @@ Commands:
|
|
|
24
24
|
node Start up a worker node (only one node per machine, please)
|
|
25
25
|
console Launch a CloudCrowd console, connected to the central database
|
|
26
26
|
load_schema Load the schema into the database specified by database.yml
|
|
27
|
+
cleanup Removes jobs that were finished over --days (7 by default) ago
|
|
27
28
|
|
|
28
29
|
server -d [start | stop | restart] Servers and nodes can be launched as
|
|
29
30
|
node -d [start | stop | restart] daemons, then stopped or restarted.
|
|
@@ -42,6 +43,7 @@ Options:
|
|
|
42
43
|
when 'node' then run_node(subcommand)
|
|
43
44
|
when 'load_schema' then run_load_schema
|
|
44
45
|
when 'install' then run_install(subcommand)
|
|
46
|
+
when 'cleanup' then run_cleanup
|
|
45
47
|
else usage
|
|
46
48
|
end
|
|
47
49
|
end
|
|
@@ -53,13 +55,14 @@ Options:
|
|
|
53
55
|
require 'irb/completion'
|
|
54
56
|
require 'pp'
|
|
55
57
|
load_code
|
|
56
|
-
connect_to_database
|
|
58
|
+
connect_to_database true
|
|
59
|
+
CloudCrowd::Server # Preload server to autoload classes.
|
|
60
|
+
Object.send(:include, CloudCrowd)
|
|
57
61
|
IRB.start
|
|
58
62
|
end
|
|
59
63
|
|
|
60
64
|
# `crowd server` can either 'start', 'stop', or 'restart'.
|
|
61
65
|
def run_server(subcommand)
|
|
62
|
-
ensure_config
|
|
63
66
|
load_code
|
|
64
67
|
subcommand ||= 'start'
|
|
65
68
|
case subcommand
|
|
@@ -99,7 +102,7 @@ Options:
|
|
|
99
102
|
# `crowd node` can either 'start', 'stop', or 'restart'.
|
|
100
103
|
def run_node(subcommand)
|
|
101
104
|
load_code
|
|
102
|
-
ENV['RACK_ENV'] = @options[
|
|
105
|
+
ENV['RACK_ENV'] = @options[:environment]
|
|
103
106
|
case (subcommand || 'start')
|
|
104
107
|
when 'start' then start_node
|
|
105
108
|
when 'stop' then stop_node
|
|
@@ -146,6 +149,13 @@ Options:
|
|
|
146
149
|
install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
|
|
147
150
|
end
|
|
148
151
|
|
|
152
|
+
# Clean up all Jobs in the CloudCrowd database older than --days old.
|
|
153
|
+
def run_cleanup
|
|
154
|
+
load_code
|
|
155
|
+
connect_to_database(true)
|
|
156
|
+
Job.cleanup_all(:days => @options[:days])
|
|
157
|
+
end
|
|
158
|
+
|
|
149
159
|
# Print `crowd` usage.
|
|
150
160
|
def usage
|
|
151
161
|
puts "\n#{@option_parser}\n"
|
|
@@ -163,6 +173,7 @@ Options:
|
|
|
163
173
|
end
|
|
164
174
|
|
|
165
175
|
# Parse all options for all commands.
|
|
176
|
+
# Valid options are: --config --port --environment --daemonize --days.
|
|
166
177
|
def parse_options
|
|
167
178
|
@options = {
|
|
168
179
|
:environment => 'production',
|
|
@@ -176,12 +187,15 @@ Options:
|
|
|
176
187
|
opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
|
|
177
188
|
@options[:port] = port_num
|
|
178
189
|
end
|
|
179
|
-
opts.on('-e', '--environment ENV', 'server environment (
|
|
190
|
+
opts.on('-e', '--environment ENV', 'server environment (defaults to production)') do |env|
|
|
180
191
|
@options[:environment] = env
|
|
181
192
|
end
|
|
182
193
|
opts.on('-d', '--daemonize', 'run as a background daemon') do |daemonize|
|
|
183
194
|
@options[:daemonize] = daemonize
|
|
184
195
|
end
|
|
196
|
+
opts.on('--days NUM_DAYS', 'grace period before cleanup (7 by default)') do |days|
|
|
197
|
+
@options[:days] = days.to_i if days.match(/\A\d+\Z/)
|
|
198
|
+
end
|
|
185
199
|
opts.on_tail('-v', '--version', 'show version') do
|
|
186
200
|
require "#{CC_ROOT}/lib/cloud-crowd"
|
|
187
201
|
puts "CloudCrowd version #{VERSION}"
|
|
@@ -7,6 +7,8 @@ module CloudCrowd
|
|
|
7
7
|
class Job < ActiveRecord::Base
|
|
8
8
|
include ModelStatus
|
|
9
9
|
|
|
10
|
+
CLEANUP_GRACE_PERIOD = 7 # That's a week.
|
|
11
|
+
|
|
10
12
|
has_many :work_units, :dependent => :destroy
|
|
11
13
|
|
|
12
14
|
validates_presence_of :status, :inputs, :action, :options
|
|
@@ -14,6 +16,9 @@ module CloudCrowd
|
|
|
14
16
|
before_validation_on_create :set_initial_status
|
|
15
17
|
after_create :queue_for_workers
|
|
16
18
|
before_destroy :cleanup_assets
|
|
19
|
+
|
|
20
|
+
# Jobs that were last updated more than N days ago.
|
|
21
|
+
named_scope :older_than, lambda {|num| {:conditions => ['updated_at < ?', num.days.ago]} }
|
|
17
22
|
|
|
18
23
|
# Create a Job from an incoming JSON request, and add it to the queue.
|
|
19
24
|
def self.create_from_request(h)
|
|
@@ -26,6 +31,14 @@ module CloudCrowd
|
|
|
26
31
|
)
|
|
27
32
|
end
|
|
28
33
|
|
|
34
|
+
# Clean up all jobs beyond a certain age.
|
|
35
|
+
def self.cleanup_all(opts = {})
|
|
36
|
+
days = opts[:days] || CLEANUP_GRACE_PERIOD
|
|
37
|
+
self.complete.older_than(days).find_in_batches(:batch_size => 100) do |jobs|
|
|
38
|
+
jobs.each {|job| job.destroy }
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
29
42
|
# After work units are marked successful, we check to see if all of them have
|
|
30
43
|
# finished, if so, continue on to the next phase of the job.
|
|
31
44
|
def check_for_completion
|
|
@@ -42,7 +42,7 @@ module CloudCrowd
|
|
|
42
42
|
rescue RestClient::RequestFailed => e
|
|
43
43
|
raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
|
|
44
44
|
update_attribute(:busy, true) && false
|
|
45
|
-
rescue RestClient::Exception, Errno::ECONNREFUSED
|
|
45
|
+
rescue RestClient::Exception, Errno::ECONNREFUSED, Timeout::Error
|
|
46
46
|
# Couldn't post to node, assume it's gone away.
|
|
47
47
|
destroy && false
|
|
48
48
|
end
|
|
@@ -14,7 +14,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
|
|
|
14
14
|
}.to_json
|
|
15
15
|
assert browser.last_response.ok?
|
|
16
16
|
|
|
17
|
-
job =
|
|
17
|
+
job = Job.last
|
|
18
18
|
(CloudCrowd.config[:work_unit_retries] - 1).times do
|
|
19
19
|
job.work_units.each {|unit| unit.fail('failed', 10) }
|
|
20
20
|
end
|
|
@@ -11,8 +11,8 @@ class ServerTest < Test::Unit::TestCase
|
|
|
11
11
|
context "The CloudCrowd::Server (Sinatra)" do
|
|
12
12
|
|
|
13
13
|
setup do
|
|
14
|
-
|
|
15
|
-
2.times {
|
|
14
|
+
Job.destroy_all
|
|
15
|
+
2.times { Job.make }
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
should "be able to render the Operations Center (GET /)" do
|
|
@@ -52,20 +52,20 @@ class ServerTest < Test::Unit::TestCase
|
|
|
52
52
|
job_info = JSON.parse(last_response.body)
|
|
53
53
|
assert job_info['percent_complete'] == 0
|
|
54
54
|
assert job_info['work_units'] == 1
|
|
55
|
-
assert
|
|
55
|
+
assert Job.last.id == job_info['id']
|
|
56
56
|
end
|
|
57
57
|
|
|
58
58
|
should "be able to check in on the status of a job" do
|
|
59
|
-
get("/jobs/#{
|
|
59
|
+
get("/jobs/#{Job.last.id}")
|
|
60
60
|
assert last_response.ok?
|
|
61
61
|
assert JSON.parse(last_response.body)['percent_complete'] == 0
|
|
62
62
|
end
|
|
63
63
|
|
|
64
64
|
should "be able to clean up a job when we're done with it" do
|
|
65
|
-
id =
|
|
65
|
+
id = Job.last.id
|
|
66
66
|
delete("/jobs/#{id}")
|
|
67
67
|
assert last_response.successful? && last_response.empty?
|
|
68
|
-
assert !
|
|
68
|
+
assert !Job.find_by_id(id)
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
end
|
data/test/unit/test_job.rb
CHANGED
|
@@ -5,7 +5,7 @@ class JobTest < Test::Unit::TestCase
|
|
|
5
5
|
context "A CloudCrowd Job" do
|
|
6
6
|
|
|
7
7
|
setup do
|
|
8
|
-
@job =
|
|
8
|
+
@job = Job.make
|
|
9
9
|
@unit = @job.work_units.first
|
|
10
10
|
end
|
|
11
11
|
|
|
@@ -25,7 +25,7 @@ class JobTest < Test::Unit::TestCase
|
|
|
25
25
|
|
|
26
26
|
should "know its completion status" do
|
|
27
27
|
assert !@job.all_work_units_complete?
|
|
28
|
-
@unit.update_attributes(:status =>
|
|
28
|
+
@unit.update_attributes(:status => SUCCEEDED, :output => '{"output":"hello"}')
|
|
29
29
|
@job.check_for_completion
|
|
30
30
|
assert @job.reload.all_work_units_complete?
|
|
31
31
|
assert @job.percent_complete == 100
|
|
@@ -38,7 +38,7 @@ class JobTest < Test::Unit::TestCase
|
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
should "be able to create a job from a JSON request" do
|
|
41
|
-
job =
|
|
41
|
+
job = Job.create_from_request(JSON.parse(<<-EOS
|
|
42
42
|
{ "inputs" : ["one", "two", "three"],
|
|
43
43
|
"action" : "graphics_magick",
|
|
44
44
|
"email" : "bob@example.com",
|
|
@@ -60,26 +60,38 @@ class JobTest < Test::Unit::TestCase
|
|
|
60
60
|
end
|
|
61
61
|
|
|
62
62
|
should "create jobs with a SPLITTING status for actions that have a split method defined" do
|
|
63
|
-
job =
|
|
63
|
+
job = Job.create_from_request({'inputs' => ['1'], 'action' => 'process_pdfs'})
|
|
64
64
|
assert job.splittable?
|
|
65
65
|
assert job.splitting?
|
|
66
66
|
end
|
|
67
67
|
|
|
68
68
|
should "fire a callback when a job has finished, successfully or not" do
|
|
69
69
|
@job.update_attribute(:callback_url, 'http://example.com/callback')
|
|
70
|
-
|
|
71
|
-
|
|
70
|
+
Job.any_instance.stubs(:fire_callback).returns(true)
|
|
71
|
+
Job.any_instance.expects(:fire_callback)
|
|
72
72
|
@job.work_units.first.finish('{"output":"output"}', 10)
|
|
73
73
|
assert @job.all_work_units_complete?
|
|
74
74
|
end
|
|
75
75
|
|
|
76
76
|
should "have a 'pretty' display of the Job's status" do
|
|
77
77
|
assert @job.display_status == 'processing'
|
|
78
|
-
@job.update_attribute(:status,
|
|
78
|
+
@job.update_attribute(:status, FAILED)
|
|
79
79
|
assert @job.display_status == 'failed'
|
|
80
|
-
@job.update_attribute(:status,
|
|
80
|
+
@job.update_attribute(:status, MERGING)
|
|
81
81
|
assert @job.display_status == 'merging'
|
|
82
82
|
end
|
|
83
|
+
|
|
84
|
+
should "be able to clean up jobs that have aged beyond their use" do
|
|
85
|
+
count = Job.count
|
|
86
|
+
Job.cleanup_all
|
|
87
|
+
assert count == Job.count
|
|
88
|
+
Job.record_timestamps = false
|
|
89
|
+
@job.update_attributes :status => SUCCEEDED, :updated_at => 10.days.ago
|
|
90
|
+
Job.record_timestamps = true
|
|
91
|
+
Job.cleanup_all
|
|
92
|
+
assert count > Job.count
|
|
93
|
+
assert !Job.find_by_id(@job.id)
|
|
94
|
+
end
|
|
83
95
|
|
|
84
96
|
end
|
|
85
97
|
|
data/test/unit/test_work_unit.rb
CHANGED
|
@@ -17,14 +17,14 @@ class WorkUnitTest < Test::Unit::TestCase
|
|
|
17
17
|
|
|
18
18
|
should "know if its done" do
|
|
19
19
|
assert !@unit.complete?
|
|
20
|
-
@unit.status =
|
|
20
|
+
@unit.status = SUCCEEDED
|
|
21
21
|
assert @unit.complete?
|
|
22
|
-
@unit.status =
|
|
22
|
+
@unit.status = FAILED
|
|
23
23
|
assert @unit.complete?
|
|
24
24
|
end
|
|
25
25
|
|
|
26
26
|
should "have JSON that includes job attributes" do
|
|
27
|
-
job =
|
|
27
|
+
job = Job.make
|
|
28
28
|
unit_data = JSON.parse(job.work_units.first.to_json)
|
|
29
29
|
assert unit_data['job_id'] == job.id
|
|
30
30
|
assert unit_data['action'] == job.action
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cloud-crowd
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jeremy Ashkenas
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-10-
|
|
12
|
+
date: 2009-10-19 00:00:00 -04:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|