cloud-crowd 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- data/cloud-crowd.gemspec +2 -2
- data/config/database.example.yml +1 -1
- data/lib/cloud-crowd.rb +1 -1
- data/lib/cloud_crowd/command_line.rb +18 -4
- data/lib/cloud_crowd/models/job.rb +13 -0
- data/lib/cloud_crowd/models/node_record.rb +1 -1
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/acceptance/test_server.rb +6 -6
- data/test/unit/test_job.rb +20 -8
- data/test/unit/test_work_unit.rb +3 -3
- metadata +2 -2
data/cloud-crowd.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.2.
|
4
|
-
s.date = '2009-10-
|
3
|
+
s.version = '0.2.7' # Keep version in sync with cloud-cloud.rb
|
4
|
+
s.date = '2009-10-19'
|
5
5
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
7
7
|
s.summary = "Parallel Processing for the Rest of Us"
|
data/config/database.example.yml
CHANGED
data/lib/cloud-crowd.rb
CHANGED
@@ -44,7 +44,7 @@ module CloudCrowd
|
|
44
44
|
autoload :WorkUnit, 'cloud_crowd/models'
|
45
45
|
|
46
46
|
# Keep this version in sync with the gemspec.
|
47
|
-
VERSION = '0.2.
|
47
|
+
VERSION = '0.2.7'
|
48
48
|
|
49
49
|
# Increment the schema version when there's a backwards incompatible change.
|
50
50
|
SCHEMA_VERSION = 3
|
@@ -24,6 +24,7 @@ Commands:
|
|
24
24
|
node Start up a worker node (only one node per machine, please)
|
25
25
|
console Launch a CloudCrowd console, connected to the central database
|
26
26
|
load_schema Load the schema into the database specified by database.yml
|
27
|
+
cleanup Removes jobs that were finished over --days (7 by default) ago
|
27
28
|
|
28
29
|
server -d [start | stop | restart] Servers and nodes can be launched as
|
29
30
|
node -d [start | stop | restart] daemons, then stopped or restarted.
|
@@ -42,6 +43,7 @@ Options:
|
|
42
43
|
when 'node' then run_node(subcommand)
|
43
44
|
when 'load_schema' then run_load_schema
|
44
45
|
when 'install' then run_install(subcommand)
|
46
|
+
when 'cleanup' then run_cleanup
|
45
47
|
else usage
|
46
48
|
end
|
47
49
|
end
|
@@ -53,13 +55,14 @@ Options:
|
|
53
55
|
require 'irb/completion'
|
54
56
|
require 'pp'
|
55
57
|
load_code
|
56
|
-
connect_to_database
|
58
|
+
connect_to_database true
|
59
|
+
CloudCrowd::Server # Preload server to autoload classes.
|
60
|
+
Object.send(:include, CloudCrowd)
|
57
61
|
IRB.start
|
58
62
|
end
|
59
63
|
|
60
64
|
# `crowd server` can either 'start', 'stop', or 'restart'.
|
61
65
|
def run_server(subcommand)
|
62
|
-
ensure_config
|
63
66
|
load_code
|
64
67
|
subcommand ||= 'start'
|
65
68
|
case subcommand
|
@@ -99,7 +102,7 @@ Options:
|
|
99
102
|
# `crowd node` can either 'start', 'stop', or 'restart'.
|
100
103
|
def run_node(subcommand)
|
101
104
|
load_code
|
102
|
-
ENV['RACK_ENV'] = @options[
|
105
|
+
ENV['RACK_ENV'] = @options[:environment]
|
103
106
|
case (subcommand || 'start')
|
104
107
|
when 'start' then start_node
|
105
108
|
when 'stop' then stop_node
|
@@ -146,6 +149,13 @@ Options:
|
|
146
149
|
install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
|
147
150
|
end
|
148
151
|
|
152
|
+
# Clean up all Jobs in the CloudCrowd database older than --days old.
|
153
|
+
def run_cleanup
|
154
|
+
load_code
|
155
|
+
connect_to_database(true)
|
156
|
+
Job.cleanup_all(:days => @options[:days])
|
157
|
+
end
|
158
|
+
|
149
159
|
# Print `crowd` usage.
|
150
160
|
def usage
|
151
161
|
puts "\n#{@option_parser}\n"
|
@@ -163,6 +173,7 @@ Options:
|
|
163
173
|
end
|
164
174
|
|
165
175
|
# Parse all options for all commands.
|
176
|
+
# Valid options are: --config --port --environment --daemonize --days.
|
166
177
|
def parse_options
|
167
178
|
@options = {
|
168
179
|
:environment => 'production',
|
@@ -176,12 +187,15 @@ Options:
|
|
176
187
|
opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
|
177
188
|
@options[:port] = port_num
|
178
189
|
end
|
179
|
-
opts.on('-e', '--environment ENV', 'server environment (
|
190
|
+
opts.on('-e', '--environment ENV', 'server environment (defaults to production)') do |env|
|
180
191
|
@options[:environment] = env
|
181
192
|
end
|
182
193
|
opts.on('-d', '--daemonize', 'run as a background daemon') do |daemonize|
|
183
194
|
@options[:daemonize] = daemonize
|
184
195
|
end
|
196
|
+
opts.on('--days NUM_DAYS', 'grace period before cleanup (7 by default)') do |days|
|
197
|
+
@options[:days] = days.to_i if days.match(/\A\d+\Z/)
|
198
|
+
end
|
185
199
|
opts.on_tail('-v', '--version', 'show version') do
|
186
200
|
require "#{CC_ROOT}/lib/cloud-crowd"
|
187
201
|
puts "CloudCrowd version #{VERSION}"
|
@@ -7,6 +7,8 @@ module CloudCrowd
|
|
7
7
|
class Job < ActiveRecord::Base
|
8
8
|
include ModelStatus
|
9
9
|
|
10
|
+
CLEANUP_GRACE_PERIOD = 7 # That's a week.
|
11
|
+
|
10
12
|
has_many :work_units, :dependent => :destroy
|
11
13
|
|
12
14
|
validates_presence_of :status, :inputs, :action, :options
|
@@ -14,6 +16,9 @@ module CloudCrowd
|
|
14
16
|
before_validation_on_create :set_initial_status
|
15
17
|
after_create :queue_for_workers
|
16
18
|
before_destroy :cleanup_assets
|
19
|
+
|
20
|
+
# Jobs that were last updated more than N days ago.
|
21
|
+
named_scope :older_than, lambda {|num| {:conditions => ['updated_at < ?', num.days.ago]} }
|
17
22
|
|
18
23
|
# Create a Job from an incoming JSON request, and add it to the queue.
|
19
24
|
def self.create_from_request(h)
|
@@ -26,6 +31,14 @@ module CloudCrowd
|
|
26
31
|
)
|
27
32
|
end
|
28
33
|
|
34
|
+
# Clean up all jobs beyond a certain age.
|
35
|
+
def self.cleanup_all(opts = {})
|
36
|
+
days = opts[:days] || CLEANUP_GRACE_PERIOD
|
37
|
+
self.complete.older_than(days).find_in_batches(:batch_size => 100) do |jobs|
|
38
|
+
jobs.each {|job| job.destroy }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
29
42
|
# After work units are marked successful, we check to see if all of them have
|
30
43
|
# finished, if so, continue on to the next phase of the job.
|
31
44
|
def check_for_completion
|
@@ -42,7 +42,7 @@ module CloudCrowd
|
|
42
42
|
rescue RestClient::RequestFailed => e
|
43
43
|
raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
|
44
44
|
update_attribute(:busy, true) && false
|
45
|
-
rescue RestClient::Exception, Errno::ECONNREFUSED
|
45
|
+
rescue RestClient::Exception, Errno::ECONNREFUSED, Timeout::Error
|
46
46
|
# Couldn't post to node, assume it's gone away.
|
47
47
|
destroy && false
|
48
48
|
end
|
@@ -14,7 +14,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
|
|
14
14
|
}.to_json
|
15
15
|
assert browser.last_response.ok?
|
16
16
|
|
17
|
-
job =
|
17
|
+
job = Job.last
|
18
18
|
(CloudCrowd.config[:work_unit_retries] - 1).times do
|
19
19
|
job.work_units.each {|unit| unit.fail('failed', 10) }
|
20
20
|
end
|
@@ -11,8 +11,8 @@ class ServerTest < Test::Unit::TestCase
|
|
11
11
|
context "The CloudCrowd::Server (Sinatra)" do
|
12
12
|
|
13
13
|
setup do
|
14
|
-
|
15
|
-
2.times {
|
14
|
+
Job.destroy_all
|
15
|
+
2.times { Job.make }
|
16
16
|
end
|
17
17
|
|
18
18
|
should "be able to render the Operations Center (GET /)" do
|
@@ -52,20 +52,20 @@ class ServerTest < Test::Unit::TestCase
|
|
52
52
|
job_info = JSON.parse(last_response.body)
|
53
53
|
assert job_info['percent_complete'] == 0
|
54
54
|
assert job_info['work_units'] == 1
|
55
|
-
assert
|
55
|
+
assert Job.last.id == job_info['id']
|
56
56
|
end
|
57
57
|
|
58
58
|
should "be able to check in on the status of a job" do
|
59
|
-
get("/jobs/#{
|
59
|
+
get("/jobs/#{Job.last.id}")
|
60
60
|
assert last_response.ok?
|
61
61
|
assert JSON.parse(last_response.body)['percent_complete'] == 0
|
62
62
|
end
|
63
63
|
|
64
64
|
should "be able to clean up a job when we're done with it" do
|
65
|
-
id =
|
65
|
+
id = Job.last.id
|
66
66
|
delete("/jobs/#{id}")
|
67
67
|
assert last_response.successful? && last_response.empty?
|
68
|
-
assert !
|
68
|
+
assert !Job.find_by_id(id)
|
69
69
|
end
|
70
70
|
|
71
71
|
end
|
data/test/unit/test_job.rb
CHANGED
@@ -5,7 +5,7 @@ class JobTest < Test::Unit::TestCase
|
|
5
5
|
context "A CloudCrowd Job" do
|
6
6
|
|
7
7
|
setup do
|
8
|
-
@job =
|
8
|
+
@job = Job.make
|
9
9
|
@unit = @job.work_units.first
|
10
10
|
end
|
11
11
|
|
@@ -25,7 +25,7 @@ class JobTest < Test::Unit::TestCase
|
|
25
25
|
|
26
26
|
should "know its completion status" do
|
27
27
|
assert !@job.all_work_units_complete?
|
28
|
-
@unit.update_attributes(:status =>
|
28
|
+
@unit.update_attributes(:status => SUCCEEDED, :output => '{"output":"hello"}')
|
29
29
|
@job.check_for_completion
|
30
30
|
assert @job.reload.all_work_units_complete?
|
31
31
|
assert @job.percent_complete == 100
|
@@ -38,7 +38,7 @@ class JobTest < Test::Unit::TestCase
|
|
38
38
|
end
|
39
39
|
|
40
40
|
should "be able to create a job from a JSON request" do
|
41
|
-
job =
|
41
|
+
job = Job.create_from_request(JSON.parse(<<-EOS
|
42
42
|
{ "inputs" : ["one", "two", "three"],
|
43
43
|
"action" : "graphics_magick",
|
44
44
|
"email" : "bob@example.com",
|
@@ -60,26 +60,38 @@ class JobTest < Test::Unit::TestCase
|
|
60
60
|
end
|
61
61
|
|
62
62
|
should "create jobs with a SPLITTING status for actions that have a split method defined" do
|
63
|
-
job =
|
63
|
+
job = Job.create_from_request({'inputs' => ['1'], 'action' => 'process_pdfs'})
|
64
64
|
assert job.splittable?
|
65
65
|
assert job.splitting?
|
66
66
|
end
|
67
67
|
|
68
68
|
should "fire a callback when a job has finished, successfully or not" do
|
69
69
|
@job.update_attribute(:callback_url, 'http://example.com/callback')
|
70
|
-
|
71
|
-
|
70
|
+
Job.any_instance.stubs(:fire_callback).returns(true)
|
71
|
+
Job.any_instance.expects(:fire_callback)
|
72
72
|
@job.work_units.first.finish('{"output":"output"}', 10)
|
73
73
|
assert @job.all_work_units_complete?
|
74
74
|
end
|
75
75
|
|
76
76
|
should "have a 'pretty' display of the Job's status" do
|
77
77
|
assert @job.display_status == 'processing'
|
78
|
-
@job.update_attribute(:status,
|
78
|
+
@job.update_attribute(:status, FAILED)
|
79
79
|
assert @job.display_status == 'failed'
|
80
|
-
@job.update_attribute(:status,
|
80
|
+
@job.update_attribute(:status, MERGING)
|
81
81
|
assert @job.display_status == 'merging'
|
82
82
|
end
|
83
|
+
|
84
|
+
should "be able to clean up jobs that have aged beyond their use" do
|
85
|
+
count = Job.count
|
86
|
+
Job.cleanup_all
|
87
|
+
assert count == Job.count
|
88
|
+
Job.record_timestamps = false
|
89
|
+
@job.update_attributes :status => SUCCEEDED, :updated_at => 10.days.ago
|
90
|
+
Job.record_timestamps = true
|
91
|
+
Job.cleanup_all
|
92
|
+
assert count > Job.count
|
93
|
+
assert !Job.find_by_id(@job.id)
|
94
|
+
end
|
83
95
|
|
84
96
|
end
|
85
97
|
|
data/test/unit/test_work_unit.rb
CHANGED
@@ -17,14 +17,14 @@ class WorkUnitTest < Test::Unit::TestCase
|
|
17
17
|
|
18
18
|
should "know if its done" do
|
19
19
|
assert !@unit.complete?
|
20
|
-
@unit.status =
|
20
|
+
@unit.status = SUCCEEDED
|
21
21
|
assert @unit.complete?
|
22
|
-
@unit.status =
|
22
|
+
@unit.status = FAILED
|
23
23
|
assert @unit.complete?
|
24
24
|
end
|
25
25
|
|
26
26
|
should "have JSON that includes job attributes" do
|
27
|
-
job =
|
27
|
+
job = Job.make
|
28
28
|
unit_data = JSON.parse(job.work_units.first.to_json)
|
29
29
|
assert unit_data['job_id'] == job.id
|
30
30
|
assert unit_data['action'] == job.action
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cloud-crowd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Ashkenas
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-10-
|
12
|
+
date: 2009-10-19 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|