cloud-crowd 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +16 -16
- data/cloud-crowd.gemspec +10 -9
- data/config/config.example.ru +8 -2
- data/config/config.example.yml +21 -25
- data/examples/process_pdfs_example.rb +1 -1
- data/examples/word_count_example.rb +1 -0
- data/lib/cloud-crowd.rb +47 -28
- data/lib/cloud_crowd/action.rb +14 -8
- data/lib/cloud_crowd/asset_store.rb +8 -8
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +18 -7
- data/lib/cloud_crowd/asset_store/s3_store.rb +14 -11
- data/lib/cloud_crowd/command_line.rb +24 -58
- data/lib/cloud_crowd/exceptions.rb +7 -0
- data/lib/cloud_crowd/helpers/authorization.rb +5 -3
- data/lib/cloud_crowd/helpers/resources.rb +0 -20
- data/lib/cloud_crowd/models.rb +1 -1
- data/lib/cloud_crowd/models/job.rb +37 -40
- data/lib/cloud_crowd/models/node_record.rb +95 -0
- data/lib/cloud_crowd/models/work_unit.rb +87 -33
- data/lib/cloud_crowd/node.rb +105 -0
- data/lib/cloud_crowd/schema.rb +22 -18
- data/lib/cloud_crowd/{app.rb → server.rb} +34 -34
- data/lib/cloud_crowd/worker.rb +68 -107
- data/public/css/admin_console.css +40 -18
- data/public/images/server.png +0 -0
- data/public/images/server_busy.png +0 -0
- data/public/js/admin_console.js +47 -18
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/acceptance/{test_app.rb → test_server.rb} +15 -15
- data/test/acceptance/test_word_count.rb +3 -9
- data/test/blueprints.rb +0 -1
- data/test/config/config.ru +1 -1
- data/test/config/config.yml +2 -4
- data/test/unit/test_action.rb +1 -1
- data/test/unit/test_configuration.rb +1 -1
- data/test/unit/test_job.rb +3 -0
- data/test/unit/test_work_unit.rb +2 -4
- data/views/{index.erb → operations_center.erb} +13 -8
- metadata +11 -10
- data/lib/cloud_crowd/daemon.rb +0 -95
- data/lib/cloud_crowd/models/worker_record.rb +0 -61
- data/lib/cloud_crowd/runner.rb +0 -15
@@ -2,15 +2,26 @@ module CloudCrowd
|
|
2
2
|
class AssetStore
|
3
3
|
|
4
4
|
# The FilesystemStore is an implementation of the AssetStore, good only for
|
5
|
-
# use in development, testing,
|
6
|
-
# installation.
|
5
|
+
# use in development, testing, if you're only running a single-machine
|
6
|
+
# installation, or are using a networked drive.
|
7
7
|
module FilesystemStore
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
DEFAULT_STORAGE_PATH = '/tmp/cloud_crowd_storage'
|
10
|
+
|
11
|
+
attr_reader :local_storage_path
|
12
|
+
|
13
|
+
# Make sure that local storage exists and is writeable before starting.
|
14
|
+
def setup
|
15
|
+
lsp = @local_storage_path = CloudCrowd.config[:local_storage_path] || DEFAULT_STORAGE_PATH
|
16
|
+
FileUtils.mkdir_p(lsp) unless File.exists?(lsp)
|
17
|
+
raise Error::StorageNotWritable, "#{lsp} is not writable" unless File.writable?(lsp)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Save a file to somewhere semi-persistent on the filesystem. To use,
|
21
|
+
# configure <tt>:storage: 'filesystem'</tt> in *config.yml*, as well as
|
22
|
+
# <tt>:local_storage_path:</tt>.
|
12
23
|
def save(local_path, save_path)
|
13
|
-
save_path = File.join(
|
24
|
+
save_path = File.join(@local_storage_path, save_path)
|
14
25
|
save_dir = File.dirname(save_path)
|
15
26
|
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
16
27
|
FileUtils.cp(local_path, save_path)
|
@@ -19,7 +30,7 @@ module CloudCrowd
|
|
19
30
|
|
20
31
|
# Remove all of a Job's result files from the filesystem.
|
21
32
|
def cleanup(job)
|
22
|
-
path = "#{
|
33
|
+
path = "#{@local_storage_path}/#{job.action}/job_#{job.id}"
|
23
34
|
FileUtils.rm_r(path) if File.exists?(path)
|
24
35
|
end
|
25
36
|
end
|
@@ -5,11 +5,24 @@ module CloudCrowd
|
|
5
5
|
# on S3 for all resulting files.
|
6
6
|
module S3Store
|
7
7
|
|
8
|
+
# Configure authentication and establish a connection to S3, first thing.
|
9
|
+
def setup
|
10
|
+
@use_auth = CloudCrowd.config[:s3_authentication]
|
11
|
+
bucket_name = CloudCrowd.config[:s3_bucket]
|
12
|
+
key, secret = CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key]
|
13
|
+
valid_conf = [bucket_name, key, secret].all? {|s| s.is_a? String }
|
14
|
+
raise Error::MissingConfiguration, "An S3 account must be configured in 'config.yml' before 's3' storage can be used" unless valid_conf
|
15
|
+
protocol = @use_auth ? 'https' : 'http'
|
16
|
+
port = @use_auth ? 443 : 80
|
17
|
+
@s3 = RightAws::S3.new(key, secret, :protocol => protocol, :port => port)
|
18
|
+
@bucket = @s3.bucket(bucket_name)
|
19
|
+
@bucket = @s3.bucket(bucket_name, true) unless @bucket
|
20
|
+
end
|
21
|
+
|
8
22
|
# Save a finished file from local storage to S3. Save it publicly unless
|
9
23
|
# we're configured to use S3 authentication. Authenticated links expire
|
10
24
|
# after one day by default.
|
11
25
|
def save(local_path, save_path)
|
12
|
-
ensure_s3_connection
|
13
26
|
if @use_auth
|
14
27
|
@bucket.put(save_path, File.open(local_path), {}, 'private')
|
15
28
|
@s3.interface.get_link(@bucket, save_path)
|
@@ -21,19 +34,9 @@ module CloudCrowd
|
|
21
34
|
|
22
35
|
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
23
36
|
def cleanup(job)
|
24
|
-
ensure_s3_connection
|
25
37
|
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
26
38
|
end
|
27
39
|
|
28
|
-
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
29
|
-
# Ensure we have a persistent S3 connection after first use.
|
30
|
-
def ensure_s3_connection
|
31
|
-
unless @s3 && @bucket
|
32
|
-
params = {:port => 80, :protocol => 'http'}
|
33
|
-
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
34
|
-
@bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
|
35
|
-
end
|
36
|
-
end
|
37
40
|
end
|
38
41
|
|
39
42
|
end
|
@@ -9,9 +9,6 @@ module CloudCrowd
|
|
9
9
|
# Reference the absolute path to the root.
|
10
10
|
CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
|
11
11
|
|
12
|
-
# Path to the Daemons gem script which launches workers.
|
13
|
-
WORKER_RUNNER = File.expand_path("#{CC_ROOT}/lib/cloud_crowd/runner.rb")
|
14
|
-
|
15
12
|
# Command-line banner for the usage message.
|
16
13
|
BANNER = <<-EOS
|
17
14
|
CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
|
@@ -24,7 +21,7 @@ Usage: crowd COMMAND OPTIONS
|
|
24
21
|
Commands:
|
25
22
|
install Install the CloudCrowd configuration files to the specified directory
|
26
23
|
server Start up the central server (requires a database)
|
27
|
-
|
24
|
+
node Start up a worker node (only one node per machine, please)
|
28
25
|
console Launch a CloudCrowd console, connected to the central database
|
29
26
|
load_schema Load the schema into the database specified by database.yml
|
30
27
|
|
@@ -38,7 +35,7 @@ Options:
|
|
38
35
|
case command
|
39
36
|
when 'console' then run_console
|
40
37
|
when 'server' then run_server
|
41
|
-
when '
|
38
|
+
when 'node' then run_node
|
42
39
|
when 'load_schema' then run_load_schema
|
43
40
|
when 'install' then run_install
|
44
41
|
else usage
|
@@ -52,7 +49,7 @@ Options:
|
|
52
49
|
require 'irb/completion'
|
53
50
|
require 'pp'
|
54
51
|
load_code
|
55
|
-
connect_to_database
|
52
|
+
connect_to_database(true)
|
56
53
|
IRB.start
|
57
54
|
end
|
58
55
|
|
@@ -63,6 +60,7 @@ Options:
|
|
63
60
|
# (Mongrel, falling back to WEBrick). The equivalent of Rails' script/server.
|
64
61
|
def run_server
|
65
62
|
ensure_config
|
63
|
+
@options[:port] ||= 9173
|
66
64
|
require 'rubygems'
|
67
65
|
rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
|
68
66
|
if Gem.available? 'thin'
|
@@ -72,10 +70,18 @@ Options:
|
|
72
70
|
end
|
73
71
|
end
|
74
72
|
|
73
|
+
# Launch a Node. Please only run a single node per machine. The Node process
|
74
|
+
# will be long-lived, although its workers will come and go.
|
75
|
+
def run_node
|
76
|
+
ENV['RACK_ENV'] = @options['environment']
|
77
|
+
load_code
|
78
|
+
Node.new(@options[:port])
|
79
|
+
end
|
80
|
+
|
75
81
|
# Load in the database schema to the database specified in 'database.yml'.
|
76
82
|
def run_load_schema
|
77
83
|
load_code
|
78
|
-
connect_to_database
|
84
|
+
connect_to_database(false)
|
79
85
|
require 'cloud_crowd/schema.rb'
|
80
86
|
end
|
81
87
|
|
@@ -91,46 +97,6 @@ Options:
|
|
91
97
|
install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
|
92
98
|
end
|
93
99
|
|
94
|
-
# Manipulate worker daemons -- handles all commands that the Daemons gem
|
95
|
-
# provides: start, stop, restart, run, and status.
|
96
|
-
def run_workers_command
|
97
|
-
ensure_config
|
98
|
-
command = ARGV.shift
|
99
|
-
case command
|
100
|
-
when 'start' then start_workers
|
101
|
-
when 'stop' then stop_workers
|
102
|
-
when 'restart' then stop_workers && start_workers
|
103
|
-
when 'run' then run_worker
|
104
|
-
when 'status' then show_worker_status
|
105
|
-
else usage
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
# Start up N workers, specified by argument or the number of workers in
|
110
|
-
# config.yml.
|
111
|
-
def start_workers
|
112
|
-
load_code
|
113
|
-
num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
|
114
|
-
num_workers.times do
|
115
|
-
`CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
# For debugging, run a single worker in the current process, showing output.
|
120
|
-
def run_worker
|
121
|
-
exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
|
122
|
-
end
|
123
|
-
|
124
|
-
# Stop all active workers.
|
125
|
-
def stop_workers
|
126
|
-
`ruby #{WORKER_RUNNER} stop`
|
127
|
-
end
|
128
|
-
|
129
|
-
# Display the status of all active workers.
|
130
|
-
def show_worker_status
|
131
|
-
puts `ruby #{WORKER_RUNNER} status`
|
132
|
-
end
|
133
|
-
|
134
100
|
# Print `crowd` usage.
|
135
101
|
def usage
|
136
102
|
puts "\n#{@option_parser}\n"
|
@@ -150,7 +116,6 @@ Options:
|
|
150
116
|
# Parse all options for all commands.
|
151
117
|
def parse_options
|
152
118
|
@options = {
|
153
|
-
:port => 9173,
|
154
119
|
:environment => 'production',
|
155
120
|
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
|
156
121
|
}
|
@@ -158,17 +123,14 @@ Options:
|
|
158
123
|
opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
|
159
124
|
@options[:config_path] = conf_path
|
160
125
|
end
|
161
|
-
opts.on('-
|
162
|
-
@options[:num_workers] = num
|
163
|
-
end
|
164
|
-
opts.on('-p', '--port PORT', 'central server port number') do |port_num|
|
126
|
+
opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
|
165
127
|
@options[:port] = port_num
|
166
128
|
end
|
167
129
|
opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
|
168
130
|
@options[:environment] = env
|
169
131
|
end
|
170
132
|
opts.on_tail('-v', '--version', 'show version') do
|
171
|
-
|
133
|
+
require "#{CC_ROOT}/lib/cloud-crowd"
|
172
134
|
puts "CloudCrowd version #{VERSION}"
|
173
135
|
exit
|
174
136
|
end
|
@@ -181,26 +143,30 @@ Options:
|
|
181
143
|
# Not all commands require this.
|
182
144
|
def load_code
|
183
145
|
ensure_config
|
184
|
-
require 'rubygems'
|
185
146
|
require "#{CC_ROOT}/lib/cloud-crowd"
|
186
147
|
CloudCrowd.configure("#{@options[:config_path]}/config.yml")
|
187
148
|
end
|
188
149
|
|
189
150
|
# Establish a connection to the central server's database. Not all commands
|
190
151
|
# require this.
|
191
|
-
def connect_to_database
|
152
|
+
def connect_to_database(validate_schema)
|
192
153
|
require 'cloud_crowd/models'
|
193
|
-
CloudCrowd.configure_database("#{@options[:config_path]}/database.yml")
|
154
|
+
CloudCrowd.configure_database("#{@options[:config_path]}/database.yml", validate_schema)
|
194
155
|
end
|
195
156
|
|
196
157
|
# Exit with an explanation if the configuration files couldn't be found.
|
197
158
|
def config_not_found
|
198
|
-
puts "`crowd` can't find the CloudCrowd configuration directory. Please
|
159
|
+
puts "`crowd` can't find the CloudCrowd configuration directory. Please use `crowd -c path/to/config`, or run `crowd` from inside of the configuration directory itself."
|
199
160
|
exit(1)
|
200
161
|
end
|
201
162
|
|
202
|
-
# Install a file and log the installation.
|
163
|
+
# Install a file and log the installation. If we're overwriting a file,
|
164
|
+
# offer a chance to back out.
|
203
165
|
def install_file(source, dest, is_dir=false)
|
166
|
+
if File.exists?(dest)
|
167
|
+
print "#{dest} already exists. Overwrite it? (yes/no) "
|
168
|
+
return unless ['y', 'yes', 'ok'].include? gets.chomp.downcase
|
169
|
+
end
|
204
170
|
is_dir ? FileUtils.cp_r(source, dest) : FileUtils.cp(source, dest)
|
205
171
|
puts "installed #{dest}"
|
206
172
|
end
|
@@ -2,6 +2,8 @@ module CloudCrowd
|
|
2
2
|
|
3
3
|
# Base Error class which all custom CloudCrowd exceptions inherit from.
|
4
4
|
# Rescuing CloudCrowd::Error (or RuntimeError) will get all custom exceptions.
|
5
|
+
# If your cluster is correctly configured, you should never expect to see any
|
6
|
+
# of these.
|
5
7
|
class Error < RuntimeError
|
6
8
|
|
7
9
|
# ActionNotFound is raised when a job is created for an action that doesn't
|
@@ -23,6 +25,11 @@ module CloudCrowd
|
|
23
25
|
class StatusUnspecified < Error
|
24
26
|
end
|
25
27
|
|
28
|
+
# MissingConfiguration is raised when we're trying to run a method that
|
29
|
+
# needs configuration not present in config.yml.
|
30
|
+
class MissingConfiguration < Error
|
31
|
+
end
|
32
|
+
|
26
33
|
end
|
27
34
|
|
28
35
|
end
|
@@ -23,9 +23,9 @@ module CloudCrowd
|
|
23
23
|
# A request is authorized if its login and password match those stored
|
24
24
|
# in config.yml, or if authentication is disabled. If authentication is
|
25
25
|
# turned on, then every request is authenticated, including between
|
26
|
-
# the
|
26
|
+
# the nodes and the central server.
|
27
27
|
def authorize(login, password)
|
28
|
-
return true unless CloudCrowd.config[:
|
28
|
+
return true unless CloudCrowd.config[:http_authentication]
|
29
29
|
return CloudCrowd.config[:login] == login &&
|
30
30
|
CloudCrowd.config[:password] == password
|
31
31
|
end
|
@@ -33,11 +33,13 @@ module CloudCrowd
|
|
33
33
|
|
34
34
|
private
|
35
35
|
|
36
|
+
# Provide a Rack Authorization object.
|
36
37
|
def auth
|
37
38
|
@auth ||= Rack::Auth::Basic::Request.new(request.env)
|
38
39
|
end
|
39
40
|
|
40
|
-
|
41
|
+
# Unauthorized requests will prompt the browser to provide credentials.
|
42
|
+
def unauthorized!(realm = Server.authorization_realm)
|
41
43
|
response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
|
42
44
|
halt 401, 'Authorization Required'
|
43
45
|
end
|
@@ -20,26 +20,6 @@ module CloudCrowd
|
|
20
20
|
@work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
|
21
21
|
end
|
22
22
|
|
23
|
-
# Try to fetch a work unit from the queue. If none are pending, respond
|
24
|
-
# with no content.
|
25
|
-
def dequeue_work_unit(offset=0)
|
26
|
-
handle_conflicts do
|
27
|
-
worker, actions = params[:worker_name], params[:worker_actions].split(',')
|
28
|
-
WorkUnit.dequeue(worker, actions, offset)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
# We're using ActiveRecords optimistic locking, so stale work units
|
33
|
-
# may sometimes arise. handle_conflicts responds with a the HTTP status
|
34
|
-
# code of your choosing if the update failed to be applied.
|
35
|
-
def handle_conflicts(code=204)
|
36
|
-
begin
|
37
|
-
yield
|
38
|
-
rescue ActiveRecord::StaleObjectError => e
|
39
|
-
return status(code) && ''
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
23
|
end
|
44
24
|
end
|
45
25
|
end
|
data/lib/cloud_crowd/models.rb
CHANGED
@@ -31,30 +31,36 @@ module CloudCrowd
|
|
31
31
|
# finished, if so, continue on to the next phase of the job.
|
32
32
|
def check_for_completion
|
33
33
|
return unless all_work_units_complete?
|
34
|
-
|
35
|
-
|
36
|
-
|
34
|
+
set_next_status
|
35
|
+
outs = gather_outputs_from_work_units
|
36
|
+
return queue_for_workers(outs) if merging?
|
37
37
|
if complete?
|
38
|
-
|
39
|
-
|
40
|
-
end
|
41
|
-
self.save
|
42
|
-
|
43
|
-
case self.status
|
44
|
-
when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
|
45
|
-
when MERGING then queue_for_workers(output_list.to_json)
|
46
|
-
else fire_callback
|
38
|
+
update_attributes(:outputs => outs, :time => time_taken)
|
39
|
+
fire_callback if callback_url
|
47
40
|
end
|
48
41
|
self
|
49
42
|
end
|
50
43
|
|
44
|
+
# Transition this Job's status to the appropriate next status.
|
45
|
+
def set_next_status
|
46
|
+
update_attribute(:status,
|
47
|
+
any_work_units_failed? ? FAILED :
|
48
|
+
self.splitting? ? PROCESSING :
|
49
|
+
self.mergeable? ? MERGING :
|
50
|
+
SUCCEEDED
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
51
54
|
# If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
|
52
55
|
# completion. The <tt>callback_url</tt> may include HTTP basic authentication,
|
53
56
|
# if you like:
|
54
57
|
# http://user:password@example.com/job_complete
|
58
|
+
# If the callback_url is successfully pinged, we proceed to cleanup the job.
|
59
|
+
# TODO: This should be moved into a Work Unit...
|
55
60
|
def fire_callback
|
56
61
|
begin
|
57
|
-
RestClient.post(callback_url, {:job => self.to_json})
|
62
|
+
RestClient.post(callback_url, {:job => self.to_json})
|
63
|
+
self.destroy
|
58
64
|
rescue RestClient::Exception => e
|
59
65
|
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
60
66
|
end
|
@@ -62,15 +68,12 @@ module CloudCrowd
|
|
62
68
|
|
63
69
|
# Cleaning up after a job will remove all of its files from S3. Destroying
|
64
70
|
# a Job calls cleanup_assets first.
|
71
|
+
# TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
|
65
72
|
def cleanup_assets
|
66
73
|
AssetStore.new.cleanup(self)
|
67
74
|
end
|
68
75
|
|
69
76
|
# Have all of the WorkUnits finished?
|
70
|
-
#--
|
71
|
-
# We could trade reads for writes here
|
72
|
-
# by keeping a completed_count on the Job itself.
|
73
|
-
#++
|
74
77
|
def all_work_units_complete?
|
75
78
|
self.work_units.incomplete.count <= 0
|
76
79
|
end
|
@@ -85,6 +88,11 @@ module CloudCrowd
|
|
85
88
|
self.action_class.public_instance_methods.include? 'split'
|
86
89
|
end
|
87
90
|
|
91
|
+
# This job is done splitting if it's finished with its splitting work units.
|
92
|
+
def done_splitting?
|
93
|
+
splittable? && work_units.splitting.count <= 0
|
94
|
+
end
|
95
|
+
|
88
96
|
# This job is mergeable if its Action has a +merge+ method.
|
89
97
|
def mergeable?
|
90
98
|
self.processing? && self.action_class.public_instance_methods.include?('merge')
|
@@ -92,16 +100,19 @@ module CloudCrowd
|
|
92
100
|
|
93
101
|
# Retrieve the class for this Job's Action.
|
94
102
|
def action_class
|
95
|
-
|
96
|
-
return
|
103
|
+
@action_class ||= CloudCrowd.actions[self.action]
|
104
|
+
return @action_class if @action_class
|
97
105
|
raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
|
98
106
|
end
|
99
107
|
|
100
108
|
# How complete is this Job?
|
109
|
+
# Unfortunately, with the current processing sequence, the percent_complete
|
110
|
+
# can pull a fast one and go backwards. This happens when there's a single
|
111
|
+
# large input that takes a long time to split, and when it finally does it
|
112
|
+
# creates a whole swarm of work units. This seems unavoidable.
|
101
113
|
def percent_complete
|
102
|
-
return 0 if splitting?
|
103
|
-
return 100 if complete?
|
104
114
|
return 99 if merging?
|
115
|
+
return 100 if complete?
|
105
116
|
(work_units.complete.count / work_units.count.to_f * 100).round
|
106
117
|
end
|
107
118
|
|
@@ -136,20 +147,12 @@ module CloudCrowd
|
|
136
147
|
private
|
137
148
|
|
138
149
|
# When the WorkUnits are all finished, gather all their outputs together
|
139
|
-
# before removing them from the database entirely.
|
150
|
+
# before removing them from the database entirely. Returns their merged JSON.
|
140
151
|
def gather_outputs_from_work_units
|
141
152
|
units = self.work_units.complete
|
142
|
-
outs = self.work_units.complete.map {|u|
|
153
|
+
outs = self.work_units.complete.map {|u| u.parsed_output }
|
143
154
|
self.work_units.complete.destroy_all
|
144
|
-
outs
|
145
|
-
end
|
146
|
-
|
147
|
-
# Transition this Job's status to the appropriate next status.
|
148
|
-
def transition_to_next_phase
|
149
|
-
self.status = any_work_units_failed? ? FAILED :
|
150
|
-
self.splitting? ? PROCESSING :
|
151
|
-
self.mergeable? ? MERGING :
|
152
|
-
SUCCEEDED
|
155
|
+
outs.to_json
|
153
156
|
end
|
154
157
|
|
155
158
|
# When starting a new job, or moving to a new stage, split up the inputs
|
@@ -157,14 +160,8 @@ module CloudCrowd
|
|
157
160
|
# away.
|
158
161
|
def queue_for_workers(input=nil)
|
159
162
|
input ||= JSON.parse(self.inputs)
|
160
|
-
[input].flatten.each
|
161
|
-
|
162
|
-
:job => self,
|
163
|
-
:action => self.action,
|
164
|
-
:input => wu_input,
|
165
|
-
:status => self.status
|
166
|
-
)
|
167
|
-
end
|
163
|
+
[input].flatten.each {|i| WorkUnit.start(self, action, i, status) }
|
164
|
+
self
|
168
165
|
end
|
169
166
|
|
170
167
|
# A Job starts out either splitting or processing, depending on its action.
|