documentcloud-cloud-crowd 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/cloud-crowd.gemspec +2 -3
- data/lib/cloud-crowd.rb +13 -6
- data/lib/cloud_crowd/action.rb +7 -15
- data/lib/cloud_crowd/app.rb +7 -6
- data/lib/cloud_crowd/command_line.rb +10 -14
- data/lib/cloud_crowd/daemon.rb +1 -1
- data/lib/cloud_crowd/helpers.rb +1 -2
- data/lib/cloud_crowd/helpers/authorization.rb +5 -5
- data/lib/cloud_crowd/inflector.rb +19 -0
- data/lib/cloud_crowd/models.rb +17 -14
- data/lib/cloud_crowd/models/job.rb +29 -18
- data/lib/cloud_crowd/models/work_unit.rb +4 -4
- data/lib/cloud_crowd/runner.rb +0 -1
- data/lib/cloud_crowd/worker.rb +12 -19
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/unit/test_job.rb +2 -2
- metadata +4 -4
- data/lib/cloud_crowd/core_ext.rb +0 -10
- data/lib/cloud_crowd/helpers/urls.rb +0 -7
data/cloud-crowd.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.0.
|
3
|
+
s.version = '0.0.4' # Keep version in sync with cloud-cloud.rb
|
4
4
|
s.date = '2009-08-23'
|
5
5
|
|
6
6
|
s.homepage = "http://documentcloud.org" # wiki page on github?
|
@@ -49,12 +49,11 @@ lib/cloud_crowd/action.rb
|
|
49
49
|
lib/cloud_crowd/app.rb
|
50
50
|
lib/cloud_crowd/asset_store.rb
|
51
51
|
lib/cloud_crowd/command_line.rb
|
52
|
-
lib/cloud_crowd/core_ext.rb
|
53
52
|
lib/cloud_crowd/daemon.rb
|
54
53
|
lib/cloud_crowd/helpers/authorization.rb
|
55
54
|
lib/cloud_crowd/helpers/resources.rb
|
56
|
-
lib/cloud_crowd/helpers/urls.rb
|
57
55
|
lib/cloud_crowd/helpers.rb
|
56
|
+
lib/cloud_crowd/inflector.rb
|
58
57
|
lib/cloud_crowd/models/job.rb
|
59
58
|
lib/cloud_crowd/models/work_unit.rb
|
60
59
|
lib/cloud_crowd/models.rb
|
data/lib/cloud-crowd.rb
CHANGED
@@ -11,9 +11,6 @@ gem 'rest-client'
|
|
11
11
|
gem 'right_aws'
|
12
12
|
gem 'sinatra'
|
13
13
|
|
14
|
-
# Common CloudCrowd libs:
|
15
|
-
require 'cloud_crowd/core_ext'
|
16
|
-
|
17
14
|
# Autoloading for all the pieces which may or may not be needed:
|
18
15
|
autoload :ActiveRecord, 'activerecord'
|
19
16
|
autoload :Benchmark, 'benchmark'
|
@@ -34,6 +31,7 @@ module CloudCrowd
|
|
34
31
|
autoload :Action, 'cloud_crowd/action'
|
35
32
|
autoload :AssetStore, 'cloud_crowd/asset_store'
|
36
33
|
autoload :Helpers, 'cloud_crowd/helpers'
|
34
|
+
autoload :Inflector, 'cloud_crowd/inflector'
|
37
35
|
autoload :Job, 'cloud_crowd/models'
|
38
36
|
autoload :WorkUnit, 'cloud_crowd/models'
|
39
37
|
|
@@ -41,7 +39,7 @@ module CloudCrowd
|
|
41
39
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
42
40
|
|
43
41
|
# Keep the version in sync with the gemspec.
|
44
|
-
VERSION = '0.0.
|
42
|
+
VERSION = '0.0.4'
|
45
43
|
|
46
44
|
# A Job is processing if its WorkUnits in the queue to be handled by workers.
|
47
45
|
PROCESSING = 1
|
@@ -88,6 +86,15 @@ module CloudCrowd
|
|
88
86
|
configuration = YAML.load_file(config_path)
|
89
87
|
ActiveRecord::Base.establish_connection(configuration)
|
90
88
|
end
|
89
|
+
|
90
|
+
# Keep an authenticated (if configured to enable authentication) resource
|
91
|
+
# for the central server.
|
92
|
+
def central_server
|
93
|
+
return @central_server if @central_server
|
94
|
+
params = [CloudCrowd.config[:central_server]]
|
95
|
+
params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
|
96
|
+
@central_server = RestClient::Resource.new(*params)
|
97
|
+
end
|
91
98
|
|
92
99
|
# Return the readable status name of an internal CloudCrowd status number.
|
93
100
|
def display_status(status)
|
@@ -98,13 +105,13 @@ module CloudCrowd
|
|
98
105
|
# so we lazy-load them. Think about a variant of this for installing and
|
99
106
|
# loading actions into a running CloudCrowd cluster on the fly.
|
100
107
|
def actions(name)
|
101
|
-
action_class =
|
108
|
+
action_class = Inflector.camelize(name)
|
102
109
|
begin
|
103
110
|
raise NameError, "can't find the #{action_class} Action" unless Module.constants.include?(action_class)
|
104
111
|
Module.const_get(action_class)
|
105
112
|
rescue NameError => e
|
106
113
|
user_action = "#{@config_path}/actions/#{name}"
|
107
|
-
default_action = "#{
|
114
|
+
default_action = "#{ROOT}/actions/#{name}"
|
108
115
|
require user_action and retry if File.exists? "#{user_action}.rb"
|
109
116
|
require default_action and retry if File.exists? "#{default_action}.rb"
|
110
117
|
raise e
|
data/lib/cloud_crowd/action.rb
CHANGED
@@ -18,14 +18,14 @@ module CloudCrowd
|
|
18
18
|
|
19
19
|
# Configuring a new Action sets up all of the read-only variables that
|
20
20
|
# form the bulk of the API for action subclasses. (Paths to read from and
|
21
|
-
# write to).
|
21
|
+
# write to). It creates the work_directory and moves into it.
|
22
22
|
def configure(status, input, options, store)
|
23
23
|
@input, @options, @store = input, options, store
|
24
24
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
25
25
|
@work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
|
26
26
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
27
27
|
Dir.chdir @work_directory
|
28
|
-
unless status ==
|
28
|
+
unless status == MERGING
|
29
29
|
@input_path = File.join(@work_directory, File.basename(@input))
|
30
30
|
@file_name = File.basename(@input_path, File.extname(@input_path))
|
31
31
|
download(@input, @input_path)
|
@@ -43,15 +43,16 @@ module CloudCrowd
|
|
43
43
|
path
|
44
44
|
end
|
45
45
|
|
46
|
-
# Takes a local filesystem path, and returns the public
|
47
|
-
# file was saved.
|
46
|
+
# Takes a local filesystem path, and returns the public (or authenticated)
|
47
|
+
# url on S3 where the file was saved.
|
48
48
|
def save(file_path)
|
49
49
|
save_path = File.join(s3_storage_path, File.basename(file_path))
|
50
50
|
@store.save(file_path, save_path)
|
51
51
|
return @store.url(save_path)
|
52
52
|
end
|
53
53
|
|
54
|
-
# After the Action has finished, we remove the work directory
|
54
|
+
# After the Action has finished, we remove the work directory and return
|
55
|
+
# to the root directory (where daemons run by default).
|
55
56
|
def cleanup_work_directory
|
56
57
|
Dir.chdir '/'
|
57
58
|
FileUtils.rm_r(@work_directory)
|
@@ -64,7 +65,7 @@ module CloudCrowd
|
|
64
65
|
# [action_name]/job_[job_id]/unit_[work_unit_it]
|
65
66
|
def storage_prefix
|
66
67
|
path_parts = []
|
67
|
-
path_parts << underscore(self.class
|
68
|
+
path_parts << Inflector.underscore(self.class)
|
68
69
|
path_parts << "job_#{@job_id}"
|
69
70
|
path_parts << "unit_#{@work_unit_id}" if @work_unit_id
|
70
71
|
@storage_prefix ||= File.join(path_parts)
|
@@ -74,15 +75,6 @@ module CloudCrowd
|
|
74
75
|
@s3_storage_path ||= storage_prefix
|
75
76
|
end
|
76
77
|
|
77
|
-
# Pilfered from the ActiveSupport::Inflector.
|
78
|
-
def underscore(word)
|
79
|
-
word.to_s.gsub(/::/, '/').
|
80
|
-
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
81
|
-
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
82
|
-
tr("-", "_").
|
83
|
-
downcase
|
84
|
-
end
|
85
|
-
|
86
78
|
end
|
87
79
|
|
88
80
|
end
|
data/lib/cloud_crowd/app.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
3
|
class App < Sinatra::Default
|
4
|
-
|
5
|
-
# static serves files from /public, methodoverride allows the _method param.
|
6
|
-
enable :static, :methodoverride
|
7
4
|
|
8
|
-
set :root,
|
5
|
+
set :root, ROOT
|
9
6
|
set :authorization_realm, "CloudCrowd"
|
10
7
|
|
11
|
-
helpers
|
8
|
+
helpers Helpers
|
9
|
+
|
10
|
+
# static serves files from /public, methodoverride allows the _method param.
|
11
|
+
enable :static, :methodoverride
|
12
12
|
|
13
|
+
# Enabling HTTP Authentication turns it on for all requests.
|
13
14
|
before do
|
14
15
|
login_required if CloudCrowd.config[:use_http_authentication]
|
15
16
|
end
|
@@ -51,7 +52,7 @@ module CloudCrowd
|
|
51
52
|
current_work_unit.fail(params[:output], params[:time])
|
52
53
|
dequeue_work_unit(1)
|
53
54
|
else
|
54
|
-
|
55
|
+
error(500, "Completing a work unit must specify status.")
|
55
56
|
end
|
56
57
|
end
|
57
58
|
end
|
@@ -6,7 +6,7 @@ module CloudCrowd
|
|
6
6
|
# Configuration files required for the `crowd` command to function.
|
7
7
|
CONFIG_FILES = ['config.yml', 'config.ru', 'database.yml']
|
8
8
|
|
9
|
-
# Reference the absolute path to the root
|
9
|
+
# Reference the absolute path to the root.
|
10
10
|
CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
|
11
11
|
|
12
12
|
# Path to the Daemons gem script which launches workers.
|
@@ -58,7 +58,7 @@ OPTIONS:
|
|
58
58
|
def run_server
|
59
59
|
ensure_config
|
60
60
|
require 'rubygems'
|
61
|
-
rackup_path = File.expand_path(
|
61
|
+
rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
|
62
62
|
if Gem.available? 'thin'
|
63
63
|
exec "thin -e production -p #{@options[:port]} -R #{rackup_path} start"
|
64
64
|
else
|
@@ -106,13 +106,13 @@ OPTIONS:
|
|
106
106
|
load_code
|
107
107
|
num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
|
108
108
|
num_workers.times do
|
109
|
-
`CLOUD_CROWD_CONFIG='#{File.expand_path(
|
109
|
+
`CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
|
110
110
|
end
|
111
111
|
end
|
112
112
|
|
113
113
|
# For debugging, run a single worker in the current process, showing output.
|
114
114
|
def run_worker
|
115
|
-
exec "CLOUD_CROWD_CONFIG='#{File.expand_path(
|
115
|
+
exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
|
116
116
|
end
|
117
117
|
|
118
118
|
# Stop all active workers.
|
@@ -137,25 +137,21 @@ OPTIONS:
|
|
137
137
|
# the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
|
138
138
|
def ensure_config
|
139
139
|
return if @config_found
|
140
|
-
|
141
|
-
|
140
|
+
found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
|
141
|
+
found ? @config_dir = true : config_not_found
|
142
142
|
end
|
143
143
|
|
144
144
|
# Parse all options for all actions.
|
145
145
|
# TODO: Think about parsing options per sub-command separately.
|
146
146
|
def parse_options
|
147
147
|
@options = {
|
148
|
-
:db_config => 'database.yml',
|
149
148
|
:port => 9173,
|
150
|
-
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
|
149
|
+
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
|
151
150
|
}
|
152
151
|
@option_parser = OptionParser.new do |opts|
|
153
152
|
opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
|
154
153
|
@options[:config_path] = conf_path
|
155
154
|
end
|
156
|
-
opts.on('-d', '--database-config PATH', 'path to database.yml') do |conf_path|
|
157
|
-
@options[:db_config] = conf_path
|
158
|
-
end
|
159
155
|
opts.on('-n', '--num-workers NUM', OptionParser::DecimalInteger, 'number of worker processes') do |num|
|
160
156
|
@options[:num_workers] = num
|
161
157
|
end
|
@@ -164,7 +160,7 @@ OPTIONS:
|
|
164
160
|
end
|
165
161
|
opts.on_tail('-v', '--version', 'show version') do
|
166
162
|
load_code
|
167
|
-
puts "CloudCrowd version #{
|
163
|
+
puts "CloudCrowd version #{VERSION}"
|
168
164
|
exit
|
169
165
|
end
|
170
166
|
end
|
@@ -178,14 +174,14 @@ OPTIONS:
|
|
178
174
|
ensure_config
|
179
175
|
require 'rubygems'
|
180
176
|
require "#{CC_ROOT}/lib/cloud-crowd"
|
181
|
-
CloudCrowd.configure(
|
177
|
+
CloudCrowd.configure("#{@options[:config_path]}/config.yml")
|
182
178
|
end
|
183
179
|
|
184
180
|
# Establish a connection to the central server's database. Not all commands
|
185
181
|
# require this.
|
186
182
|
def connect_to_database
|
187
183
|
require 'cloud_crowd/models'
|
188
|
-
CloudCrowd.configure_database(@options[:
|
184
|
+
CloudCrowd.configure_database("#{@options[:config_path]}/database.yml")
|
189
185
|
end
|
190
186
|
|
191
187
|
# Exit with an explanation if the configuration files couldn't be found.
|
data/lib/cloud_crowd/daemon.rb
CHANGED
data/lib/cloud_crowd/helpers.rb
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
require 'cloud_crowd/helpers/authorization'
|
2
2
|
require 'cloud_crowd/helpers/resources'
|
3
|
-
require 'cloud_crowd/helpers/urls'
|
4
3
|
|
5
4
|
module CloudCrowd
|
6
5
|
module Helpers
|
7
|
-
include Authorization, Resources
|
6
|
+
include Authorization, Resources #, Rack::Utils
|
8
7
|
end
|
9
8
|
end
|
@@ -4,6 +4,7 @@ module CloudCrowd
|
|
4
4
|
module Helpers
|
5
5
|
module Authorization
|
6
6
|
|
7
|
+
# Ensure that the request includes the correct credentials.
|
7
8
|
def login_required
|
8
9
|
return if authorized?
|
9
10
|
unauthorized! unless auth.provided?
|
@@ -12,14 +13,13 @@ module CloudCrowd
|
|
12
13
|
request.env['REMOTE_USER'] = auth.username
|
13
14
|
end
|
14
15
|
|
16
|
+
# Has the request been authenticated?
|
15
17
|
def authorized?
|
16
18
|
!!request.env['REMOTE_USER']
|
17
19
|
end
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
end
|
22
|
-
|
21
|
+
# A request is authorized if its login and password match those stored
|
22
|
+
# in config.yml, or if authentication is disabled.
|
23
23
|
def authorize(login, password)
|
24
24
|
return true unless CloudCrowd.config[:use_http_authentication]
|
25
25
|
return CloudCrowd.config[:login] == login &&
|
@@ -33,7 +33,7 @@ module CloudCrowd
|
|
33
33
|
@auth ||= Rack::Auth::Basic::Request.new(request.env)
|
34
34
|
end
|
35
35
|
|
36
|
-
def unauthorized!(realm =
|
36
|
+
def unauthorized!(realm = App.authorization_realm)
|
37
37
|
response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
|
38
38
|
halt 401, 'Authorization Required'
|
39
39
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# Pilfered in parts from the ActiveSupport::Inflector.
|
4
|
+
module Inflector
|
5
|
+
|
6
|
+
def self.camelize(word)
|
7
|
+
word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.underscore(word)
|
11
|
+
word.to_s.gsub(/::/, '/').
|
12
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
13
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
14
|
+
tr("-", "_").
|
15
|
+
downcase
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
data/lib/cloud_crowd/models.rb
CHANGED
@@ -1,28 +1,31 @@
|
|
1
1
|
module CloudCrowd
|
2
|
+
|
3
|
+
# Adds named scopes and query methods for every CloudCrowd status to
|
4
|
+
# both Jobs and WorkUnits.
|
2
5
|
module ModelStatus
|
3
6
|
|
4
7
|
def self.included(klass)
|
5
8
|
|
6
9
|
klass.class_eval do
|
7
10
|
# Note that COMPLETE and INCOMPLETE are unions of other states.
|
8
|
-
named_scope 'processing', :conditions => {:status =>
|
9
|
-
named_scope 'succeeded', :conditions => {:status =>
|
10
|
-
named_scope 'failed', :conditions => {:status =>
|
11
|
-
named_scope 'splitting', :conditions => {:status =>
|
12
|
-
named_scope 'merging', :conditions => {:status =>
|
13
|
-
named_scope 'complete', :conditions => {:status =>
|
14
|
-
named_scope 'incomplete', :conditions => {:status =>
|
11
|
+
named_scope 'processing', :conditions => {:status => PROCESSING}
|
12
|
+
named_scope 'succeeded', :conditions => {:status => SUCCEEDED}
|
13
|
+
named_scope 'failed', :conditions => {:status => FAILED}
|
14
|
+
named_scope 'splitting', :conditions => {:status => SPLITTING}
|
15
|
+
named_scope 'merging', :conditions => {:status => MERGING}
|
16
|
+
named_scope 'complete', :conditions => {:status => COMPLETE}
|
17
|
+
named_scope 'incomplete', :conditions => {:status => INCOMPLETE}
|
15
18
|
end
|
16
19
|
|
17
20
|
end
|
18
21
|
|
19
|
-
def processing?; self.status ==
|
20
|
-
def succeeded?; self.status ==
|
21
|
-
def failed?; self.status ==
|
22
|
-
def splitting?; self.status ==
|
23
|
-
def merging?; self.status ==
|
24
|
-
def complete?;
|
25
|
-
def incomplete?;
|
22
|
+
def processing?; self.status == PROCESSING; end
|
23
|
+
def succeeded?; self.status == SUCCEEDED; end
|
24
|
+
def failed?; self.status == FAILED; end
|
25
|
+
def splitting?; self.status == SPLITTING; end
|
26
|
+
def merging?; self.status == MERGING; end
|
27
|
+
def complete?; COMPLETE.include?(self.status); end
|
28
|
+
def incomplete?; INCOMPLETE.include?(self.status); end
|
26
29
|
|
27
30
|
end
|
28
31
|
end
|
@@ -5,7 +5,7 @@ module CloudCrowd
|
|
5
5
|
# of inputs (usually public urls to files), an action (the name of a script that
|
6
6
|
# CloudCrowd knows how to run), and, eventually a corresponding list of output.
|
7
7
|
class Job < ActiveRecord::Base
|
8
|
-
include
|
8
|
+
include ModelStatus
|
9
9
|
|
10
10
|
has_many :work_units, :dependent => :destroy
|
11
11
|
|
@@ -23,16 +23,18 @@ module CloudCrowd
|
|
23
23
|
)
|
24
24
|
end
|
25
25
|
|
26
|
+
# Creating a job creates its corresponding work units, adding them
|
27
|
+
# to the queue.
|
26
28
|
def after_create
|
27
29
|
self.queue_for_workers(JSON.parse(self.inputs))
|
28
30
|
end
|
29
31
|
|
30
32
|
def before_validation_on_create
|
31
|
-
self.status = self.splittable? ?
|
33
|
+
self.status = self.splittable? ? SPLITTING : PROCESSING
|
32
34
|
end
|
33
35
|
|
34
36
|
# After work units are marked successful, we check to see if all of them have
|
35
|
-
# finished, if so,
|
37
|
+
# finished, if so, continue on to the next phase of the job.
|
36
38
|
def check_for_completion
|
37
39
|
return unless all_work_units_complete?
|
38
40
|
transition_to_next_phase
|
@@ -45,19 +47,19 @@ module CloudCrowd
|
|
45
47
|
self.save
|
46
48
|
|
47
49
|
case self.status
|
48
|
-
when
|
49
|
-
when
|
50
|
-
else
|
50
|
+
when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
|
51
|
+
when MERGING then queue_for_workers(output_list.to_json)
|
52
|
+
else fire_callback
|
51
53
|
end
|
52
54
|
self
|
53
55
|
end
|
54
56
|
|
55
|
-
# Transition
|
57
|
+
# Transition this Job's status to the following one.
|
56
58
|
def transition_to_next_phase
|
57
|
-
self.status = any_work_units_failed? ?
|
58
|
-
self.splitting? ?
|
59
|
-
self.
|
60
|
-
|
59
|
+
self.status = any_work_units_failed? ? FAILED :
|
60
|
+
self.splitting? ? PROCESSING :
|
61
|
+
self.mergeable? ? MERGING :
|
62
|
+
SUCCEEDED
|
61
63
|
end
|
62
64
|
|
63
65
|
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
@@ -71,7 +73,7 @@ module CloudCrowd
|
|
71
73
|
|
72
74
|
# Cleaning up after a job will remove all of its files from S3.
|
73
75
|
def cleanup
|
74
|
-
|
76
|
+
AssetStore.new.cleanup_job(self)
|
75
77
|
end
|
76
78
|
|
77
79
|
# Have all of the WorkUnits finished? We could trade reads for writes here
|
@@ -85,18 +87,23 @@ module CloudCrowd
|
|
85
87
|
self.work_units.failed.count > 0
|
86
88
|
end
|
87
89
|
|
90
|
+
# This job is splittable if its Action has a +split+ method.
|
88
91
|
def splittable?
|
89
|
-
self.action_class.
|
92
|
+
self.action_class.public_instance_methods.include? 'split'
|
90
93
|
end
|
91
94
|
|
92
|
-
|
93
|
-
|
95
|
+
# This job is mergeable if its Action has a +merge+ method.
|
96
|
+
def mergeable?
|
97
|
+
self.processing? && self.action_class.public_instance_methods.include?('merge')
|
94
98
|
end
|
95
99
|
|
100
|
+
# Retrieve the class for this Job's Action, loading it if necessary.
|
96
101
|
def action_class
|
97
102
|
CloudCrowd.actions(self.action)
|
98
103
|
end
|
99
104
|
|
105
|
+
# When the WorkUnits are all finished, gather all their outputs together
|
106
|
+
# before removing them from the database entirely.
|
100
107
|
def gather_outputs_from_work_units
|
101
108
|
outs = self.work_units.complete.map {|wu| wu.output }
|
102
109
|
self.work_units.complete.destroy_all
|
@@ -107,14 +114,18 @@ module CloudCrowd
|
|
107
114
|
CloudCrowd.display_status(self.status)
|
108
115
|
end
|
109
116
|
|
110
|
-
|
111
|
-
|
117
|
+
# How complete is this Job?
|
118
|
+
def percent_complete
|
119
|
+
return 0 if splitting?
|
120
|
+
return 100 if complete?
|
121
|
+
return 99 if merging?
|
122
|
+
(work_units.complete.count / work_units.count.to_f * 100).round
|
112
123
|
end
|
113
124
|
|
114
125
|
# A JSON representation of this job includes the statuses of its component
|
115
126
|
# WorkUnits, as well as any completed outputs.
|
116
127
|
def to_json(opts={})
|
117
|
-
atts = {'id' => self.id, 'status' => self.display_status, '
|
128
|
+
atts = {'id' => self.id, 'status' => self.display_status, 'percent_complete' => self.percent_complete}
|
118
129
|
atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
|
119
130
|
atts.merge!({'time' => self.time}) if self.time
|
120
131
|
atts.to_json
|
@@ -3,7 +3,7 @@ module CloudCrowd
|
|
3
3
|
# A WorkUnit is an atomic chunk of work from a job, processing a single input
|
4
4
|
# through a single action. All WorkUnits receive the same options.
|
5
5
|
class WorkUnit < ActiveRecord::Base
|
6
|
-
include
|
6
|
+
include ModelStatus
|
7
7
|
|
8
8
|
belongs_to :job
|
9
9
|
|
@@ -14,7 +14,7 @@ module CloudCrowd
|
|
14
14
|
# Find the Nth available WorkUnit in the queue, and take it out.
|
15
15
|
def self.dequeue(offset=0)
|
16
16
|
unit = self.first(
|
17
|
-
:conditions => {:status =>
|
17
|
+
:conditions => {:status => INCOMPLETE, :taken => false},
|
18
18
|
:order => "created_at asc",
|
19
19
|
:offset => offset
|
20
20
|
)
|
@@ -29,7 +29,7 @@ module CloudCrowd
|
|
29
29
|
# Mark this unit as having finished successfully.
|
30
30
|
def finish(output, time_taken)
|
31
31
|
update_attributes({
|
32
|
-
:status =>
|
32
|
+
:status => SUCCEEDED,
|
33
33
|
:taken => false,
|
34
34
|
:attempts => self.attempts + 1,
|
35
35
|
:output => output,
|
@@ -42,7 +42,7 @@ module CloudCrowd
|
|
42
42
|
tries = self.attempts + 1
|
43
43
|
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
44
44
|
update_attributes({
|
45
|
-
:status =>
|
45
|
+
:status => FAILED,
|
46
46
|
:taken => false,
|
47
47
|
:attempts => tries,
|
48
48
|
:output => output,
|
data/lib/cloud_crowd/runner.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# This is the script that kicks off a single CloudCrowd::Daemon. Rely on
|
2
2
|
# cloud-crowd.rb for autoloading of all the code we need.
|
3
3
|
|
4
|
-
# Daemon/Worker Dependencies.
|
5
4
|
require "#{File.dirname(__FILE__)}/../cloud-crowd"
|
6
5
|
|
7
6
|
FileUtils.mkdir('log') unless File.exists?('log')
|
data/lib/cloud_crowd/worker.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
3
|
class Worker
|
4
|
-
|
5
|
-
CENTRAL_URL = CloudCrowd.config[:central_server]
|
6
|
-
RETRY_WAIT = CloudCrowd.config[:worker_retry_wait]
|
7
|
-
|
4
|
+
|
8
5
|
attr_reader :action
|
9
6
|
|
10
7
|
# Spinning up a worker will create a new AssetStore with a persistent
|
@@ -13,8 +10,8 @@ module CloudCrowd
|
|
13
10
|
def initialize
|
14
11
|
@id = $$
|
15
12
|
@hostname = Socket.gethostname
|
16
|
-
@store =
|
17
|
-
@server =
|
13
|
+
@store = AssetStore.new
|
14
|
+
@server = CloudCrowd.central_server
|
18
15
|
log 'started'
|
19
16
|
end
|
20
17
|
|
@@ -48,14 +45,18 @@ module CloudCrowd
|
|
48
45
|
end
|
49
46
|
end
|
50
47
|
|
48
|
+
# We expect and require internal communication between the central server
|
49
|
+
# and the workers to succeed. If it fails for any reason, log it, and then
|
50
|
+
# keep trying the same request.
|
51
51
|
def keep_trying_to(title)
|
52
52
|
begin
|
53
53
|
yield
|
54
54
|
rescue Exception => e
|
55
|
-
|
55
|
+
wait_time = CloudCrowd.config[:worker_retry_wait]
|
56
|
+
log "failed to #{title} -- retry in #{wait_time} seconds"
|
56
57
|
log e.message
|
57
58
|
log e.backtrace
|
58
|
-
sleep
|
59
|
+
sleep wait_time
|
59
60
|
retry
|
60
61
|
end
|
61
62
|
end
|
@@ -71,9 +72,9 @@ module CloudCrowd
|
|
71
72
|
@action = CloudCrowd.actions(@action_name).new
|
72
73
|
@action.configure(@status, @input, @options, @store)
|
73
74
|
result = case @status
|
74
|
-
when
|
75
|
-
when
|
76
|
-
when
|
75
|
+
when PROCESSING then @action.process
|
76
|
+
when SPLITTING then @action.split
|
77
|
+
when MERGING then @action.merge
|
77
78
|
else raise "Work units must specify their status."
|
78
79
|
end
|
79
80
|
complete_work_unit(result)
|
@@ -92,14 +93,6 @@ module CloudCrowd
|
|
92
93
|
|
93
94
|
private
|
94
95
|
|
95
|
-
# Keep an authenticated (if configured to enable authentication) resource
|
96
|
-
# for the central server.
|
97
|
-
def central_server_resource
|
98
|
-
params = [CENTRAL_URL]
|
99
|
-
params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
|
100
|
-
RestClient::Resource.new(*params)
|
101
|
-
end
|
102
|
-
|
103
96
|
# Common parameters to send back to central, regardless of success or failure.
|
104
97
|
def completion_params
|
105
98
|
{:id => @options['work_unit_id'], :time => Time.now - @start_time}
|
@@ -17,7 +17,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
|
|
17
17
|
(CloudCrowd.config[:work_unit_retries] - 1).times do
|
18
18
|
job.work_units.each {|unit| unit.fail('failed', 10) }
|
19
19
|
end
|
20
|
-
assert job.reload.
|
20
|
+
assert job.reload.percent_complete == 0
|
21
21
|
job.work_units.reload.each_with_index do |unit, i|
|
22
22
|
assert unit.processing?
|
23
23
|
assert unit.attempts == CloudCrowd.config[:work_unit_retries] - 1
|
data/test/unit/test_job.rb
CHANGED
@@ -17,7 +17,7 @@ class JobTest < Test::Unit::TestCase
|
|
17
17
|
|
18
18
|
should "create all of its work units as soon as the job is created" do
|
19
19
|
assert @job.work_units.count >= 1
|
20
|
-
assert @job.
|
20
|
+
assert @job.percent_complete == 0
|
21
21
|
assert @job.processing?
|
22
22
|
assert @unit.processing?
|
23
23
|
assert !@job.all_work_units_complete?
|
@@ -27,7 +27,7 @@ class JobTest < Test::Unit::TestCase
|
|
27
27
|
assert !@job.all_work_units_complete?
|
28
28
|
@unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => 'hello')
|
29
29
|
assert @job.reload.all_work_units_complete?
|
30
|
-
assert @job.
|
30
|
+
assert @job.percent_complete == 100
|
31
31
|
assert @job.outputs == "[\"hello\"]"
|
32
32
|
end
|
33
33
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: documentcloud-cloud-crowd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Ashkenas
|
@@ -141,12 +141,11 @@ files:
|
|
141
141
|
- lib/cloud_crowd/app.rb
|
142
142
|
- lib/cloud_crowd/asset_store.rb
|
143
143
|
- lib/cloud_crowd/command_line.rb
|
144
|
-
- lib/cloud_crowd/core_ext.rb
|
145
144
|
- lib/cloud_crowd/daemon.rb
|
146
145
|
- lib/cloud_crowd/helpers/authorization.rb
|
147
146
|
- lib/cloud_crowd/helpers/resources.rb
|
148
|
-
- lib/cloud_crowd/helpers/urls.rb
|
149
147
|
- lib/cloud_crowd/helpers.rb
|
148
|
+
- lib/cloud_crowd/inflector.rb
|
150
149
|
- lib/cloud_crowd/models/job.rb
|
151
150
|
- lib/cloud_crowd/models/work_unit.rb
|
152
151
|
- lib/cloud_crowd/models.rb
|
@@ -163,6 +162,7 @@ files:
|
|
163
162
|
- test/unit/test_work_unit.rb
|
164
163
|
has_rdoc: true
|
165
164
|
homepage: http://documentcloud.org
|
165
|
+
licenses:
|
166
166
|
post_install_message:
|
167
167
|
rdoc_options: []
|
168
168
|
|
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
183
|
requirements: []
|
184
184
|
|
185
185
|
rubyforge_project: cloud-crowd
|
186
|
-
rubygems_version: 1.
|
186
|
+
rubygems_version: 1.3.5
|
187
187
|
signing_key:
|
188
188
|
specification_version: 2
|
189
189
|
summary: Better living through Map --> Ruby --> Reduce
|
data/lib/cloud_crowd/core_ext.rb
DELETED