documentcloud-cloud-crowd 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/cloud-crowd.gemspec +2 -3
- data/lib/cloud-crowd.rb +13 -6
- data/lib/cloud_crowd/action.rb +7 -15
- data/lib/cloud_crowd/app.rb +7 -6
- data/lib/cloud_crowd/command_line.rb +10 -14
- data/lib/cloud_crowd/daemon.rb +1 -1
- data/lib/cloud_crowd/helpers.rb +1 -2
- data/lib/cloud_crowd/helpers/authorization.rb +5 -5
- data/lib/cloud_crowd/inflector.rb +19 -0
- data/lib/cloud_crowd/models.rb +17 -14
- data/lib/cloud_crowd/models/job.rb +29 -18
- data/lib/cloud_crowd/models/work_unit.rb +4 -4
- data/lib/cloud_crowd/runner.rb +0 -1
- data/lib/cloud_crowd/worker.rb +12 -19
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/unit/test_job.rb +2 -2
- metadata +4 -4
- data/lib/cloud_crowd/core_ext.rb +0 -10
- data/lib/cloud_crowd/helpers/urls.rb +0 -7
data/cloud-crowd.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.0.
|
3
|
+
s.version = '0.0.4' # Keep version in sync with cloud-cloud.rb
|
4
4
|
s.date = '2009-08-23'
|
5
5
|
|
6
6
|
s.homepage = "http://documentcloud.org" # wiki page on github?
|
@@ -49,12 +49,11 @@ lib/cloud_crowd/action.rb
|
|
49
49
|
lib/cloud_crowd/app.rb
|
50
50
|
lib/cloud_crowd/asset_store.rb
|
51
51
|
lib/cloud_crowd/command_line.rb
|
52
|
-
lib/cloud_crowd/core_ext.rb
|
53
52
|
lib/cloud_crowd/daemon.rb
|
54
53
|
lib/cloud_crowd/helpers/authorization.rb
|
55
54
|
lib/cloud_crowd/helpers/resources.rb
|
56
|
-
lib/cloud_crowd/helpers/urls.rb
|
57
55
|
lib/cloud_crowd/helpers.rb
|
56
|
+
lib/cloud_crowd/inflector.rb
|
58
57
|
lib/cloud_crowd/models/job.rb
|
59
58
|
lib/cloud_crowd/models/work_unit.rb
|
60
59
|
lib/cloud_crowd/models.rb
|
data/lib/cloud-crowd.rb
CHANGED
@@ -11,9 +11,6 @@ gem 'rest-client'
|
|
11
11
|
gem 'right_aws'
|
12
12
|
gem 'sinatra'
|
13
13
|
|
14
|
-
# Common CloudCrowd libs:
|
15
|
-
require 'cloud_crowd/core_ext'
|
16
|
-
|
17
14
|
# Autoloading for all the pieces which may or may not be needed:
|
18
15
|
autoload :ActiveRecord, 'activerecord'
|
19
16
|
autoload :Benchmark, 'benchmark'
|
@@ -34,6 +31,7 @@ module CloudCrowd
|
|
34
31
|
autoload :Action, 'cloud_crowd/action'
|
35
32
|
autoload :AssetStore, 'cloud_crowd/asset_store'
|
36
33
|
autoload :Helpers, 'cloud_crowd/helpers'
|
34
|
+
autoload :Inflector, 'cloud_crowd/inflector'
|
37
35
|
autoload :Job, 'cloud_crowd/models'
|
38
36
|
autoload :WorkUnit, 'cloud_crowd/models'
|
39
37
|
|
@@ -41,7 +39,7 @@ module CloudCrowd
|
|
41
39
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
42
40
|
|
43
41
|
# Keep the version in sync with the gemspec.
|
44
|
-
VERSION = '0.0.
|
42
|
+
VERSION = '0.0.4'
|
45
43
|
|
46
44
|
# A Job is processing if its WorkUnits in the queue to be handled by workers.
|
47
45
|
PROCESSING = 1
|
@@ -88,6 +86,15 @@ module CloudCrowd
|
|
88
86
|
configuration = YAML.load_file(config_path)
|
89
87
|
ActiveRecord::Base.establish_connection(configuration)
|
90
88
|
end
|
89
|
+
|
90
|
+
# Keep an authenticated (if configured to enable authentication) resource
|
91
|
+
# for the central server.
|
92
|
+
def central_server
|
93
|
+
return @central_server if @central_server
|
94
|
+
params = [CloudCrowd.config[:central_server]]
|
95
|
+
params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
|
96
|
+
@central_server = RestClient::Resource.new(*params)
|
97
|
+
end
|
91
98
|
|
92
99
|
# Return the readable status name of an internal CloudCrowd status number.
|
93
100
|
def display_status(status)
|
@@ -98,13 +105,13 @@ module CloudCrowd
|
|
98
105
|
# so we lazy-load them. Think about a variant of this for installing and
|
99
106
|
# loading actions into a running CloudCrowd cluster on the fly.
|
100
107
|
def actions(name)
|
101
|
-
action_class =
|
108
|
+
action_class = Inflector.camelize(name)
|
102
109
|
begin
|
103
110
|
raise NameError, "can't find the #{action_class} Action" unless Module.constants.include?(action_class)
|
104
111
|
Module.const_get(action_class)
|
105
112
|
rescue NameError => e
|
106
113
|
user_action = "#{@config_path}/actions/#{name}"
|
107
|
-
default_action = "#{
|
114
|
+
default_action = "#{ROOT}/actions/#{name}"
|
108
115
|
require user_action and retry if File.exists? "#{user_action}.rb"
|
109
116
|
require default_action and retry if File.exists? "#{default_action}.rb"
|
110
117
|
raise e
|
data/lib/cloud_crowd/action.rb
CHANGED
@@ -18,14 +18,14 @@ module CloudCrowd
|
|
18
18
|
|
19
19
|
# Configuring a new Action sets up all of the read-only variables that
|
20
20
|
# form the bulk of the API for action subclasses. (Paths to read from and
|
21
|
-
# write to).
|
21
|
+
# write to). It creates the work_directory and moves into it.
|
22
22
|
def configure(status, input, options, store)
|
23
23
|
@input, @options, @store = input, options, store
|
24
24
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
25
25
|
@work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
|
26
26
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
27
27
|
Dir.chdir @work_directory
|
28
|
-
unless status ==
|
28
|
+
unless status == MERGING
|
29
29
|
@input_path = File.join(@work_directory, File.basename(@input))
|
30
30
|
@file_name = File.basename(@input_path, File.extname(@input_path))
|
31
31
|
download(@input, @input_path)
|
@@ -43,15 +43,16 @@ module CloudCrowd
|
|
43
43
|
path
|
44
44
|
end
|
45
45
|
|
46
|
-
# Takes a local filesystem path, and returns the public
|
47
|
-
# file was saved.
|
46
|
+
# Takes a local filesystem path, and returns the public (or authenticated)
|
47
|
+
# url on S3 where the file was saved.
|
48
48
|
def save(file_path)
|
49
49
|
save_path = File.join(s3_storage_path, File.basename(file_path))
|
50
50
|
@store.save(file_path, save_path)
|
51
51
|
return @store.url(save_path)
|
52
52
|
end
|
53
53
|
|
54
|
-
# After the Action has finished, we remove the work directory
|
54
|
+
# After the Action has finished, we remove the work directory and return
|
55
|
+
# to the root directory (where daemons run by default).
|
55
56
|
def cleanup_work_directory
|
56
57
|
Dir.chdir '/'
|
57
58
|
FileUtils.rm_r(@work_directory)
|
@@ -64,7 +65,7 @@ module CloudCrowd
|
|
64
65
|
# [action_name]/job_[job_id]/unit_[work_unit_it]
|
65
66
|
def storage_prefix
|
66
67
|
path_parts = []
|
67
|
-
path_parts << underscore(self.class
|
68
|
+
path_parts << Inflector.underscore(self.class)
|
68
69
|
path_parts << "job_#{@job_id}"
|
69
70
|
path_parts << "unit_#{@work_unit_id}" if @work_unit_id
|
70
71
|
@storage_prefix ||= File.join(path_parts)
|
@@ -74,15 +75,6 @@ module CloudCrowd
|
|
74
75
|
@s3_storage_path ||= storage_prefix
|
75
76
|
end
|
76
77
|
|
77
|
-
# Pilfered from the ActiveSupport::Inflector.
|
78
|
-
def underscore(word)
|
79
|
-
word.to_s.gsub(/::/, '/').
|
80
|
-
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
81
|
-
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
82
|
-
tr("-", "_").
|
83
|
-
downcase
|
84
|
-
end
|
85
|
-
|
86
78
|
end
|
87
79
|
|
88
80
|
end
|
data/lib/cloud_crowd/app.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
3
|
class App < Sinatra::Default
|
4
|
-
|
5
|
-
# static serves files from /public, methodoverride allows the _method param.
|
6
|
-
enable :static, :methodoverride
|
7
4
|
|
8
|
-
set :root,
|
5
|
+
set :root, ROOT
|
9
6
|
set :authorization_realm, "CloudCrowd"
|
10
7
|
|
11
|
-
helpers
|
8
|
+
helpers Helpers
|
9
|
+
|
10
|
+
# static serves files from /public, methodoverride allows the _method param.
|
11
|
+
enable :static, :methodoverride
|
12
12
|
|
13
|
+
# Enabling HTTP Authentication turns it on for all requests.
|
13
14
|
before do
|
14
15
|
login_required if CloudCrowd.config[:use_http_authentication]
|
15
16
|
end
|
@@ -51,7 +52,7 @@ module CloudCrowd
|
|
51
52
|
current_work_unit.fail(params[:output], params[:time])
|
52
53
|
dequeue_work_unit(1)
|
53
54
|
else
|
54
|
-
|
55
|
+
error(500, "Completing a work unit must specify status.")
|
55
56
|
end
|
56
57
|
end
|
57
58
|
end
|
@@ -6,7 +6,7 @@ module CloudCrowd
|
|
6
6
|
# Configuration files required for the `crowd` command to function.
|
7
7
|
CONFIG_FILES = ['config.yml', 'config.ru', 'database.yml']
|
8
8
|
|
9
|
-
# Reference the absolute path to the root
|
9
|
+
# Reference the absolute path to the root.
|
10
10
|
CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
|
11
11
|
|
12
12
|
# Path to the Daemons gem script which launches workers.
|
@@ -58,7 +58,7 @@ OPTIONS:
|
|
58
58
|
def run_server
|
59
59
|
ensure_config
|
60
60
|
require 'rubygems'
|
61
|
-
rackup_path = File.expand_path(
|
61
|
+
rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
|
62
62
|
if Gem.available? 'thin'
|
63
63
|
exec "thin -e production -p #{@options[:port]} -R #{rackup_path} start"
|
64
64
|
else
|
@@ -106,13 +106,13 @@ OPTIONS:
|
|
106
106
|
load_code
|
107
107
|
num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
|
108
108
|
num_workers.times do
|
109
|
-
`CLOUD_CROWD_CONFIG='#{File.expand_path(
|
109
|
+
`CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
|
110
110
|
end
|
111
111
|
end
|
112
112
|
|
113
113
|
# For debugging, run a single worker in the current process, showing output.
|
114
114
|
def run_worker
|
115
|
-
exec "CLOUD_CROWD_CONFIG='#{File.expand_path(
|
115
|
+
exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
|
116
116
|
end
|
117
117
|
|
118
118
|
# Stop all active workers.
|
@@ -137,25 +137,21 @@ OPTIONS:
|
|
137
137
|
# the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
|
138
138
|
def ensure_config
|
139
139
|
return if @config_found
|
140
|
-
|
141
|
-
|
140
|
+
found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
|
141
|
+
found ? @config_dir = true : config_not_found
|
142
142
|
end
|
143
143
|
|
144
144
|
# Parse all options for all actions.
|
145
145
|
# TODO: Think about parsing options per sub-command separately.
|
146
146
|
def parse_options
|
147
147
|
@options = {
|
148
|
-
:db_config => 'database.yml',
|
149
148
|
:port => 9173,
|
150
|
-
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
|
149
|
+
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
|
151
150
|
}
|
152
151
|
@option_parser = OptionParser.new do |opts|
|
153
152
|
opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
|
154
153
|
@options[:config_path] = conf_path
|
155
154
|
end
|
156
|
-
opts.on('-d', '--database-config PATH', 'path to database.yml') do |conf_path|
|
157
|
-
@options[:db_config] = conf_path
|
158
|
-
end
|
159
155
|
opts.on('-n', '--num-workers NUM', OptionParser::DecimalInteger, 'number of worker processes') do |num|
|
160
156
|
@options[:num_workers] = num
|
161
157
|
end
|
@@ -164,7 +160,7 @@ OPTIONS:
|
|
164
160
|
end
|
165
161
|
opts.on_tail('-v', '--version', 'show version') do
|
166
162
|
load_code
|
167
|
-
puts "CloudCrowd version #{
|
163
|
+
puts "CloudCrowd version #{VERSION}"
|
168
164
|
exit
|
169
165
|
end
|
170
166
|
end
|
@@ -178,14 +174,14 @@ OPTIONS:
|
|
178
174
|
ensure_config
|
179
175
|
require 'rubygems'
|
180
176
|
require "#{CC_ROOT}/lib/cloud-crowd"
|
181
|
-
CloudCrowd.configure(
|
177
|
+
CloudCrowd.configure("#{@options[:config_path]}/config.yml")
|
182
178
|
end
|
183
179
|
|
184
180
|
# Establish a connection to the central server's database. Not all commands
|
185
181
|
# require this.
|
186
182
|
def connect_to_database
|
187
183
|
require 'cloud_crowd/models'
|
188
|
-
CloudCrowd.configure_database(@options[:
|
184
|
+
CloudCrowd.configure_database("#{@options[:config_path]}/database.yml")
|
189
185
|
end
|
190
186
|
|
191
187
|
# Exit with an explanation if the configuration files couldn't be found.
|
data/lib/cloud_crowd/daemon.rb
CHANGED
data/lib/cloud_crowd/helpers.rb
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
require 'cloud_crowd/helpers/authorization'
|
2
2
|
require 'cloud_crowd/helpers/resources'
|
3
|
-
require 'cloud_crowd/helpers/urls'
|
4
3
|
|
5
4
|
module CloudCrowd
|
6
5
|
module Helpers
|
7
|
-
include Authorization, Resources
|
6
|
+
include Authorization, Resources #, Rack::Utils
|
8
7
|
end
|
9
8
|
end
|
@@ -4,6 +4,7 @@ module CloudCrowd
|
|
4
4
|
module Helpers
|
5
5
|
module Authorization
|
6
6
|
|
7
|
+
# Ensure that the request includes the correct credentials.
|
7
8
|
def login_required
|
8
9
|
return if authorized?
|
9
10
|
unauthorized! unless auth.provided?
|
@@ -12,14 +13,13 @@ module CloudCrowd
|
|
12
13
|
request.env['REMOTE_USER'] = auth.username
|
13
14
|
end
|
14
15
|
|
16
|
+
# Has the request been authenticated?
|
15
17
|
def authorized?
|
16
18
|
!!request.env['REMOTE_USER']
|
17
19
|
end
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
end
|
22
|
-
|
21
|
+
# A request is authorized if its login and password match those stored
|
22
|
+
# in config.yml, or if authentication is disabled.
|
23
23
|
def authorize(login, password)
|
24
24
|
return true unless CloudCrowd.config[:use_http_authentication]
|
25
25
|
return CloudCrowd.config[:login] == login &&
|
@@ -33,7 +33,7 @@ module CloudCrowd
|
|
33
33
|
@auth ||= Rack::Auth::Basic::Request.new(request.env)
|
34
34
|
end
|
35
35
|
|
36
|
-
def unauthorized!(realm =
|
36
|
+
def unauthorized!(realm = App.authorization_realm)
|
37
37
|
response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
|
38
38
|
halt 401, 'Authorization Required'
|
39
39
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# Pilfered in parts from the ActiveSupport::Inflector.
|
4
|
+
module Inflector
|
5
|
+
|
6
|
+
def self.camelize(word)
|
7
|
+
word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.underscore(word)
|
11
|
+
word.to_s.gsub(/::/, '/').
|
12
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
13
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
14
|
+
tr("-", "_").
|
15
|
+
downcase
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
data/lib/cloud_crowd/models.rb
CHANGED
@@ -1,28 +1,31 @@
|
|
1
1
|
module CloudCrowd
|
2
|
+
|
3
|
+
# Adds named scopes and query methods for every CloudCrowd status to
|
4
|
+
# both Jobs and WorkUnits.
|
2
5
|
module ModelStatus
|
3
6
|
|
4
7
|
def self.included(klass)
|
5
8
|
|
6
9
|
klass.class_eval do
|
7
10
|
# Note that COMPLETE and INCOMPLETE are unions of other states.
|
8
|
-
named_scope 'processing', :conditions => {:status =>
|
9
|
-
named_scope 'succeeded', :conditions => {:status =>
|
10
|
-
named_scope 'failed', :conditions => {:status =>
|
11
|
-
named_scope 'splitting', :conditions => {:status =>
|
12
|
-
named_scope 'merging', :conditions => {:status =>
|
13
|
-
named_scope 'complete', :conditions => {:status =>
|
14
|
-
named_scope 'incomplete', :conditions => {:status =>
|
11
|
+
named_scope 'processing', :conditions => {:status => PROCESSING}
|
12
|
+
named_scope 'succeeded', :conditions => {:status => SUCCEEDED}
|
13
|
+
named_scope 'failed', :conditions => {:status => FAILED}
|
14
|
+
named_scope 'splitting', :conditions => {:status => SPLITTING}
|
15
|
+
named_scope 'merging', :conditions => {:status => MERGING}
|
16
|
+
named_scope 'complete', :conditions => {:status => COMPLETE}
|
17
|
+
named_scope 'incomplete', :conditions => {:status => INCOMPLETE}
|
15
18
|
end
|
16
19
|
|
17
20
|
end
|
18
21
|
|
19
|
-
def processing?; self.status ==
|
20
|
-
def succeeded?; self.status ==
|
21
|
-
def failed?; self.status ==
|
22
|
-
def splitting?; self.status ==
|
23
|
-
def merging?; self.status ==
|
24
|
-
def complete?;
|
25
|
-
def incomplete?;
|
22
|
+
def processing?; self.status == PROCESSING; end
|
23
|
+
def succeeded?; self.status == SUCCEEDED; end
|
24
|
+
def failed?; self.status == FAILED; end
|
25
|
+
def splitting?; self.status == SPLITTING; end
|
26
|
+
def merging?; self.status == MERGING; end
|
27
|
+
def complete?; COMPLETE.include?(self.status); end
|
28
|
+
def incomplete?; INCOMPLETE.include?(self.status); end
|
26
29
|
|
27
30
|
end
|
28
31
|
end
|
@@ -5,7 +5,7 @@ module CloudCrowd
|
|
5
5
|
# of inputs (usually public urls to files), an action (the name of a script that
|
6
6
|
# CloudCrowd knows how to run), and, eventually a corresponding list of output.
|
7
7
|
class Job < ActiveRecord::Base
|
8
|
-
include
|
8
|
+
include ModelStatus
|
9
9
|
|
10
10
|
has_many :work_units, :dependent => :destroy
|
11
11
|
|
@@ -23,16 +23,18 @@ module CloudCrowd
|
|
23
23
|
)
|
24
24
|
end
|
25
25
|
|
26
|
+
# Creating a job creates its corresponding work units, adding them
|
27
|
+
# to the queue.
|
26
28
|
def after_create
|
27
29
|
self.queue_for_workers(JSON.parse(self.inputs))
|
28
30
|
end
|
29
31
|
|
30
32
|
def before_validation_on_create
|
31
|
-
self.status = self.splittable? ?
|
33
|
+
self.status = self.splittable? ? SPLITTING : PROCESSING
|
32
34
|
end
|
33
35
|
|
34
36
|
# After work units are marked successful, we check to see if all of them have
|
35
|
-
# finished, if so,
|
37
|
+
# finished, if so, continue on to the next phase of the job.
|
36
38
|
def check_for_completion
|
37
39
|
return unless all_work_units_complete?
|
38
40
|
transition_to_next_phase
|
@@ -45,19 +47,19 @@ module CloudCrowd
|
|
45
47
|
self.save
|
46
48
|
|
47
49
|
case self.status
|
48
|
-
when
|
49
|
-
when
|
50
|
-
else
|
50
|
+
when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
|
51
|
+
when MERGING then queue_for_workers(output_list.to_json)
|
52
|
+
else fire_callback
|
51
53
|
end
|
52
54
|
self
|
53
55
|
end
|
54
56
|
|
55
|
-
# Transition
|
57
|
+
# Transition this Job's status to the following one.
|
56
58
|
def transition_to_next_phase
|
57
|
-
self.status = any_work_units_failed? ?
|
58
|
-
self.splitting? ?
|
59
|
-
self.
|
60
|
-
|
59
|
+
self.status = any_work_units_failed? ? FAILED :
|
60
|
+
self.splitting? ? PROCESSING :
|
61
|
+
self.mergeable? ? MERGING :
|
62
|
+
SUCCEEDED
|
61
63
|
end
|
62
64
|
|
63
65
|
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
@@ -71,7 +73,7 @@ module CloudCrowd
|
|
71
73
|
|
72
74
|
# Cleaning up after a job will remove all of its files from S3.
|
73
75
|
def cleanup
|
74
|
-
|
76
|
+
AssetStore.new.cleanup_job(self)
|
75
77
|
end
|
76
78
|
|
77
79
|
# Have all of the WorkUnits finished? We could trade reads for writes here
|
@@ -85,18 +87,23 @@ module CloudCrowd
|
|
85
87
|
self.work_units.failed.count > 0
|
86
88
|
end
|
87
89
|
|
90
|
+
# This job is splittable if its Action has a +split+ method.
|
88
91
|
def splittable?
|
89
|
-
self.action_class.
|
92
|
+
self.action_class.public_instance_methods.include? 'split'
|
90
93
|
end
|
91
94
|
|
92
|
-
|
93
|
-
|
95
|
+
# This job is mergeable if its Action has a +merge+ method.
|
96
|
+
def mergeable?
|
97
|
+
self.processing? && self.action_class.public_instance_methods.include?('merge')
|
94
98
|
end
|
95
99
|
|
100
|
+
# Retrieve the class for this Job's Action, loading it if necessary.
|
96
101
|
def action_class
|
97
102
|
CloudCrowd.actions(self.action)
|
98
103
|
end
|
99
104
|
|
105
|
+
# When the WorkUnits are all finished, gather all their outputs together
|
106
|
+
# before removing them from the database entirely.
|
100
107
|
def gather_outputs_from_work_units
|
101
108
|
outs = self.work_units.complete.map {|wu| wu.output }
|
102
109
|
self.work_units.complete.destroy_all
|
@@ -107,14 +114,18 @@ module CloudCrowd
|
|
107
114
|
CloudCrowd.display_status(self.status)
|
108
115
|
end
|
109
116
|
|
110
|
-
|
111
|
-
|
117
|
+
# How complete is this Job?
|
118
|
+
def percent_complete
|
119
|
+
return 0 if splitting?
|
120
|
+
return 100 if complete?
|
121
|
+
return 99 if merging?
|
122
|
+
(work_units.complete.count / work_units.count.to_f * 100).round
|
112
123
|
end
|
113
124
|
|
114
125
|
# A JSON representation of this job includes the statuses of its component
|
115
126
|
# WorkUnits, as well as any completed outputs.
|
116
127
|
def to_json(opts={})
|
117
|
-
atts = {'id' => self.id, 'status' => self.display_status, '
|
128
|
+
atts = {'id' => self.id, 'status' => self.display_status, 'percent_complete' => self.percent_complete}
|
118
129
|
atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
|
119
130
|
atts.merge!({'time' => self.time}) if self.time
|
120
131
|
atts.to_json
|
@@ -3,7 +3,7 @@ module CloudCrowd
|
|
3
3
|
# A WorkUnit is an atomic chunk of work from a job, processing a single input
|
4
4
|
# through a single action. All WorkUnits receive the same options.
|
5
5
|
class WorkUnit < ActiveRecord::Base
|
6
|
-
include
|
6
|
+
include ModelStatus
|
7
7
|
|
8
8
|
belongs_to :job
|
9
9
|
|
@@ -14,7 +14,7 @@ module CloudCrowd
|
|
14
14
|
# Find the Nth available WorkUnit in the queue, and take it out.
|
15
15
|
def self.dequeue(offset=0)
|
16
16
|
unit = self.first(
|
17
|
-
:conditions => {:status =>
|
17
|
+
:conditions => {:status => INCOMPLETE, :taken => false},
|
18
18
|
:order => "created_at asc",
|
19
19
|
:offset => offset
|
20
20
|
)
|
@@ -29,7 +29,7 @@ module CloudCrowd
|
|
29
29
|
# Mark this unit as having finished successfully.
|
30
30
|
def finish(output, time_taken)
|
31
31
|
update_attributes({
|
32
|
-
:status =>
|
32
|
+
:status => SUCCEEDED,
|
33
33
|
:taken => false,
|
34
34
|
:attempts => self.attempts + 1,
|
35
35
|
:output => output,
|
@@ -42,7 +42,7 @@ module CloudCrowd
|
|
42
42
|
tries = self.attempts + 1
|
43
43
|
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
44
44
|
update_attributes({
|
45
|
-
:status =>
|
45
|
+
:status => FAILED,
|
46
46
|
:taken => false,
|
47
47
|
:attempts => tries,
|
48
48
|
:output => output,
|
data/lib/cloud_crowd/runner.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# This is the script that kicks off a single CloudCrowd::Daemon. Rely on
|
2
2
|
# cloud-crowd.rb for autoloading of all the code we need.
|
3
3
|
|
4
|
-
# Daemon/Worker Dependencies.
|
5
4
|
require "#{File.dirname(__FILE__)}/../cloud-crowd"
|
6
5
|
|
7
6
|
FileUtils.mkdir('log') unless File.exists?('log')
|
data/lib/cloud_crowd/worker.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
3
|
class Worker
|
4
|
-
|
5
|
-
CENTRAL_URL = CloudCrowd.config[:central_server]
|
6
|
-
RETRY_WAIT = CloudCrowd.config[:worker_retry_wait]
|
7
|
-
|
4
|
+
|
8
5
|
attr_reader :action
|
9
6
|
|
10
7
|
# Spinning up a worker will create a new AssetStore with a persistent
|
@@ -13,8 +10,8 @@ module CloudCrowd
|
|
13
10
|
def initialize
|
14
11
|
@id = $$
|
15
12
|
@hostname = Socket.gethostname
|
16
|
-
@store =
|
17
|
-
@server =
|
13
|
+
@store = AssetStore.new
|
14
|
+
@server = CloudCrowd.central_server
|
18
15
|
log 'started'
|
19
16
|
end
|
20
17
|
|
@@ -48,14 +45,18 @@ module CloudCrowd
|
|
48
45
|
end
|
49
46
|
end
|
50
47
|
|
48
|
+
# We expect and require internal communication between the central server
|
49
|
+
# and the workers to succeed. If it fails for any reason, log it, and then
|
50
|
+
# keep trying the same request.
|
51
51
|
def keep_trying_to(title)
|
52
52
|
begin
|
53
53
|
yield
|
54
54
|
rescue Exception => e
|
55
|
-
|
55
|
+
wait_time = CloudCrowd.config[:worker_retry_wait]
|
56
|
+
log "failed to #{title} -- retry in #{wait_time} seconds"
|
56
57
|
log e.message
|
57
58
|
log e.backtrace
|
58
|
-
sleep
|
59
|
+
sleep wait_time
|
59
60
|
retry
|
60
61
|
end
|
61
62
|
end
|
@@ -71,9 +72,9 @@ module CloudCrowd
|
|
71
72
|
@action = CloudCrowd.actions(@action_name).new
|
72
73
|
@action.configure(@status, @input, @options, @store)
|
73
74
|
result = case @status
|
74
|
-
when
|
75
|
-
when
|
76
|
-
when
|
75
|
+
when PROCESSING then @action.process
|
76
|
+
when SPLITTING then @action.split
|
77
|
+
when MERGING then @action.merge
|
77
78
|
else raise "Work units must specify their status."
|
78
79
|
end
|
79
80
|
complete_work_unit(result)
|
@@ -92,14 +93,6 @@ module CloudCrowd
|
|
92
93
|
|
93
94
|
private
|
94
95
|
|
95
|
-
# Keep an authenticated (if configured to enable authentication) resource
|
96
|
-
# for the central server.
|
97
|
-
def central_server_resource
|
98
|
-
params = [CENTRAL_URL]
|
99
|
-
params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
|
100
|
-
RestClient::Resource.new(*params)
|
101
|
-
end
|
102
|
-
|
103
96
|
# Common parameters to send back to central, regardless of success or failure.
|
104
97
|
def completion_params
|
105
98
|
{:id => @options['work_unit_id'], :time => Time.now - @start_time}
|
@@ -17,7 +17,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
|
|
17
17
|
(CloudCrowd.config[:work_unit_retries] - 1).times do
|
18
18
|
job.work_units.each {|unit| unit.fail('failed', 10) }
|
19
19
|
end
|
20
|
-
assert job.reload.
|
20
|
+
assert job.reload.percent_complete == 0
|
21
21
|
job.work_units.reload.each_with_index do |unit, i|
|
22
22
|
assert unit.processing?
|
23
23
|
assert unit.attempts == CloudCrowd.config[:work_unit_retries] - 1
|
data/test/unit/test_job.rb
CHANGED
@@ -17,7 +17,7 @@ class JobTest < Test::Unit::TestCase
|
|
17
17
|
|
18
18
|
should "create all of its work units as soon as the job is created" do
|
19
19
|
assert @job.work_units.count >= 1
|
20
|
-
assert @job.
|
20
|
+
assert @job.percent_complete == 0
|
21
21
|
assert @job.processing?
|
22
22
|
assert @unit.processing?
|
23
23
|
assert !@job.all_work_units_complete?
|
@@ -27,7 +27,7 @@ class JobTest < Test::Unit::TestCase
|
|
27
27
|
assert !@job.all_work_units_complete?
|
28
28
|
@unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => 'hello')
|
29
29
|
assert @job.reload.all_work_units_complete?
|
30
|
-
assert @job.
|
30
|
+
assert @job.percent_complete == 100
|
31
31
|
assert @job.outputs == "[\"hello\"]"
|
32
32
|
end
|
33
33
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: documentcloud-cloud-crowd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Ashkenas
|
@@ -141,12 +141,11 @@ files:
|
|
141
141
|
- lib/cloud_crowd/app.rb
|
142
142
|
- lib/cloud_crowd/asset_store.rb
|
143
143
|
- lib/cloud_crowd/command_line.rb
|
144
|
-
- lib/cloud_crowd/core_ext.rb
|
145
144
|
- lib/cloud_crowd/daemon.rb
|
146
145
|
- lib/cloud_crowd/helpers/authorization.rb
|
147
146
|
- lib/cloud_crowd/helpers/resources.rb
|
148
|
-
- lib/cloud_crowd/helpers/urls.rb
|
149
147
|
- lib/cloud_crowd/helpers.rb
|
148
|
+
- lib/cloud_crowd/inflector.rb
|
150
149
|
- lib/cloud_crowd/models/job.rb
|
151
150
|
- lib/cloud_crowd/models/work_unit.rb
|
152
151
|
- lib/cloud_crowd/models.rb
|
@@ -163,6 +162,7 @@ files:
|
|
163
162
|
- test/unit/test_work_unit.rb
|
164
163
|
has_rdoc: true
|
165
164
|
homepage: http://documentcloud.org
|
165
|
+
licenses:
|
166
166
|
post_install_message:
|
167
167
|
rdoc_options: []
|
168
168
|
|
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
183
|
requirements: []
|
184
184
|
|
185
185
|
rubyforge_project: cloud-crowd
|
186
|
-
rubygems_version: 1.
|
186
|
+
rubygems_version: 1.3.5
|
187
187
|
signing_key:
|
188
188
|
specification_version: 2
|
189
189
|
summary: Better living through Map --> Ruby --> Reduce
|
data/lib/cloud_crowd/core_ext.rb
DELETED