cloud-crowd 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +16 -16
- data/cloud-crowd.gemspec +10 -9
- data/config/config.example.ru +8 -2
- data/config/config.example.yml +21 -25
- data/examples/process_pdfs_example.rb +1 -1
- data/examples/word_count_example.rb +1 -0
- data/lib/cloud-crowd.rb +47 -28
- data/lib/cloud_crowd/action.rb +14 -8
- data/lib/cloud_crowd/asset_store.rb +8 -8
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +18 -7
- data/lib/cloud_crowd/asset_store/s3_store.rb +14 -11
- data/lib/cloud_crowd/command_line.rb +24 -58
- data/lib/cloud_crowd/exceptions.rb +7 -0
- data/lib/cloud_crowd/helpers/authorization.rb +5 -3
- data/lib/cloud_crowd/helpers/resources.rb +0 -20
- data/lib/cloud_crowd/models.rb +1 -1
- data/lib/cloud_crowd/models/job.rb +37 -40
- data/lib/cloud_crowd/models/node_record.rb +95 -0
- data/lib/cloud_crowd/models/work_unit.rb +87 -33
- data/lib/cloud_crowd/node.rb +105 -0
- data/lib/cloud_crowd/schema.rb +22 -18
- data/lib/cloud_crowd/{app.rb → server.rb} +34 -34
- data/lib/cloud_crowd/worker.rb +68 -107
- data/public/css/admin_console.css +40 -18
- data/public/images/server.png +0 -0
- data/public/images/server_busy.png +0 -0
- data/public/js/admin_console.js +47 -18
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/acceptance/{test_app.rb → test_server.rb} +15 -15
- data/test/acceptance/test_word_count.rb +3 -9
- data/test/blueprints.rb +0 -1
- data/test/config/config.ru +1 -1
- data/test/config/config.yml +2 -4
- data/test/unit/test_action.rb +1 -1
- data/test/unit/test_configuration.rb +1 -1
- data/test/unit/test_job.rb +3 -0
- data/test/unit/test_work_unit.rb +2 -4
- data/views/{index.erb → operations_center.erb} +13 -8
- metadata +11 -10
- data/lib/cloud_crowd/daemon.rb +0 -95
- data/lib/cloud_crowd/models/worker_record.rb +0 -61
- data/lib/cloud_crowd/runner.rb +0 -15
@@ -2,15 +2,26 @@ module CloudCrowd
|
|
2
2
|
class AssetStore
|
3
3
|
|
4
4
|
# The FilesystemStore is an implementation of the AssetStore, good only for
|
5
|
-
# use in development, testing,
|
6
|
-
# installation.
|
5
|
+
# use in development, testing, if you're only running a single-machine
|
6
|
+
# installation, or are using a networked drive.
|
7
7
|
module FilesystemStore
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
DEFAULT_STORAGE_PATH = '/tmp/cloud_crowd_storage'
|
10
|
+
|
11
|
+
attr_reader :local_storage_path
|
12
|
+
|
13
|
+
# Make sure that local storage exists and is writeable before starting.
|
14
|
+
def setup
|
15
|
+
lsp = @local_storage_path = CloudCrowd.config[:local_storage_path] || DEFAULT_STORAGE_PATH
|
16
|
+
FileUtils.mkdir_p(lsp) unless File.exists?(lsp)
|
17
|
+
raise Error::StorageNotWritable, "#{lsp} is not writable" unless File.writable?(lsp)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Save a file to somewhere semi-persistent on the filesystem. To use,
|
21
|
+
# configure <tt>:storage: 'filesystem'</tt> in *config.yml*, as well as
|
22
|
+
# <tt>:local_storage_path:</tt>.
|
12
23
|
def save(local_path, save_path)
|
13
|
-
save_path = File.join(
|
24
|
+
save_path = File.join(@local_storage_path, save_path)
|
14
25
|
save_dir = File.dirname(save_path)
|
15
26
|
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
16
27
|
FileUtils.cp(local_path, save_path)
|
@@ -19,7 +30,7 @@ module CloudCrowd
|
|
19
30
|
|
20
31
|
# Remove all of a Job's result files from the filesystem.
|
21
32
|
def cleanup(job)
|
22
|
-
path = "#{
|
33
|
+
path = "#{@local_storage_path}/#{job.action}/job_#{job.id}"
|
23
34
|
FileUtils.rm_r(path) if File.exists?(path)
|
24
35
|
end
|
25
36
|
end
|
@@ -5,11 +5,24 @@ module CloudCrowd
|
|
5
5
|
# on S3 for all resulting files.
|
6
6
|
module S3Store
|
7
7
|
|
8
|
+
# Configure authentication and establish a connection to S3, first thing.
|
9
|
+
def setup
|
10
|
+
@use_auth = CloudCrowd.config[:s3_authentication]
|
11
|
+
bucket_name = CloudCrowd.config[:s3_bucket]
|
12
|
+
key, secret = CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key]
|
13
|
+
valid_conf = [bucket_name, key, secret].all? {|s| s.is_a? String }
|
14
|
+
raise Error::MissingConfiguration, "An S3 account must be configured in 'config.yml' before 's3' storage can be used" unless valid_conf
|
15
|
+
protocol = @use_auth ? 'https' : 'http'
|
16
|
+
port = @use_auth ? 443 : 80
|
17
|
+
@s3 = RightAws::S3.new(key, secret, :protocol => protocol, :port => port)
|
18
|
+
@bucket = @s3.bucket(bucket_name)
|
19
|
+
@bucket = @s3.bucket(bucket_name, true) unless @bucket
|
20
|
+
end
|
21
|
+
|
8
22
|
# Save a finished file from local storage to S3. Save it publicly unless
|
9
23
|
# we're configured to use S3 authentication. Authenticated links expire
|
10
24
|
# after one day by default.
|
11
25
|
def save(local_path, save_path)
|
12
|
-
ensure_s3_connection
|
13
26
|
if @use_auth
|
14
27
|
@bucket.put(save_path, File.open(local_path), {}, 'private')
|
15
28
|
@s3.interface.get_link(@bucket, save_path)
|
@@ -21,19 +34,9 @@ module CloudCrowd
|
|
21
34
|
|
22
35
|
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
23
36
|
def cleanup(job)
|
24
|
-
ensure_s3_connection
|
25
37
|
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
26
38
|
end
|
27
39
|
|
28
|
-
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
29
|
-
# Ensure we have a persistent S3 connection after first use.
|
30
|
-
def ensure_s3_connection
|
31
|
-
unless @s3 && @bucket
|
32
|
-
params = {:port => 80, :protocol => 'http'}
|
33
|
-
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
34
|
-
@bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
|
35
|
-
end
|
36
|
-
end
|
37
40
|
end
|
38
41
|
|
39
42
|
end
|
@@ -9,9 +9,6 @@ module CloudCrowd
|
|
9
9
|
# Reference the absolute path to the root.
|
10
10
|
CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
|
11
11
|
|
12
|
-
# Path to the Daemons gem script which launches workers.
|
13
|
-
WORKER_RUNNER = File.expand_path("#{CC_ROOT}/lib/cloud_crowd/runner.rb")
|
14
|
-
|
15
12
|
# Command-line banner for the usage message.
|
16
13
|
BANNER = <<-EOS
|
17
14
|
CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
|
@@ -24,7 +21,7 @@ Usage: crowd COMMAND OPTIONS
|
|
24
21
|
Commands:
|
25
22
|
install Install the CloudCrowd configuration files to the specified directory
|
26
23
|
server Start up the central server (requires a database)
|
27
|
-
|
24
|
+
node Start up a worker node (only one node per machine, please)
|
28
25
|
console Launch a CloudCrowd console, connected to the central database
|
29
26
|
load_schema Load the schema into the database specified by database.yml
|
30
27
|
|
@@ -38,7 +35,7 @@ Options:
|
|
38
35
|
case command
|
39
36
|
when 'console' then run_console
|
40
37
|
when 'server' then run_server
|
41
|
-
when '
|
38
|
+
when 'node' then run_node
|
42
39
|
when 'load_schema' then run_load_schema
|
43
40
|
when 'install' then run_install
|
44
41
|
else usage
|
@@ -52,7 +49,7 @@ Options:
|
|
52
49
|
require 'irb/completion'
|
53
50
|
require 'pp'
|
54
51
|
load_code
|
55
|
-
connect_to_database
|
52
|
+
connect_to_database(true)
|
56
53
|
IRB.start
|
57
54
|
end
|
58
55
|
|
@@ -63,6 +60,7 @@ Options:
|
|
63
60
|
# (Mongrel, falling back to WEBrick). The equivalent of Rails' script/server.
|
64
61
|
def run_server
|
65
62
|
ensure_config
|
63
|
+
@options[:port] ||= 9173
|
66
64
|
require 'rubygems'
|
67
65
|
rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
|
68
66
|
if Gem.available? 'thin'
|
@@ -72,10 +70,18 @@ Options:
|
|
72
70
|
end
|
73
71
|
end
|
74
72
|
|
73
|
+
# Launch a Node. Please only run a single node per machine. The Node process
|
74
|
+
# will be long-lived, although its workers will come and go.
|
75
|
+
def run_node
|
76
|
+
ENV['RACK_ENV'] = @options['environment']
|
77
|
+
load_code
|
78
|
+
Node.new(@options[:port])
|
79
|
+
end
|
80
|
+
|
75
81
|
# Load in the database schema to the database specified in 'database.yml'.
|
76
82
|
def run_load_schema
|
77
83
|
load_code
|
78
|
-
connect_to_database
|
84
|
+
connect_to_database(false)
|
79
85
|
require 'cloud_crowd/schema.rb'
|
80
86
|
end
|
81
87
|
|
@@ -91,46 +97,6 @@ Options:
|
|
91
97
|
install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
|
92
98
|
end
|
93
99
|
|
94
|
-
# Manipulate worker daemons -- handles all commands that the Daemons gem
|
95
|
-
# provides: start, stop, restart, run, and status.
|
96
|
-
def run_workers_command
|
97
|
-
ensure_config
|
98
|
-
command = ARGV.shift
|
99
|
-
case command
|
100
|
-
when 'start' then start_workers
|
101
|
-
when 'stop' then stop_workers
|
102
|
-
when 'restart' then stop_workers && start_workers
|
103
|
-
when 'run' then run_worker
|
104
|
-
when 'status' then show_worker_status
|
105
|
-
else usage
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
# Start up N workers, specified by argument or the number of workers in
|
110
|
-
# config.yml.
|
111
|
-
def start_workers
|
112
|
-
load_code
|
113
|
-
num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
|
114
|
-
num_workers.times do
|
115
|
-
`CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
# For debugging, run a single worker in the current process, showing output.
|
120
|
-
def run_worker
|
121
|
-
exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
|
122
|
-
end
|
123
|
-
|
124
|
-
# Stop all active workers.
|
125
|
-
def stop_workers
|
126
|
-
`ruby #{WORKER_RUNNER} stop`
|
127
|
-
end
|
128
|
-
|
129
|
-
# Display the status of all active workers.
|
130
|
-
def show_worker_status
|
131
|
-
puts `ruby #{WORKER_RUNNER} status`
|
132
|
-
end
|
133
|
-
|
134
100
|
# Print `crowd` usage.
|
135
101
|
def usage
|
136
102
|
puts "\n#{@option_parser}\n"
|
@@ -150,7 +116,6 @@ Options:
|
|
150
116
|
# Parse all options for all commands.
|
151
117
|
def parse_options
|
152
118
|
@options = {
|
153
|
-
:port => 9173,
|
154
119
|
:environment => 'production',
|
155
120
|
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
|
156
121
|
}
|
@@ -158,17 +123,14 @@ Options:
|
|
158
123
|
opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
|
159
124
|
@options[:config_path] = conf_path
|
160
125
|
end
|
161
|
-
opts.on('-
|
162
|
-
@options[:num_workers] = num
|
163
|
-
end
|
164
|
-
opts.on('-p', '--port PORT', 'central server port number') do |port_num|
|
126
|
+
opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
|
165
127
|
@options[:port] = port_num
|
166
128
|
end
|
167
129
|
opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
|
168
130
|
@options[:environment] = env
|
169
131
|
end
|
170
132
|
opts.on_tail('-v', '--version', 'show version') do
|
171
|
-
|
133
|
+
require "#{CC_ROOT}/lib/cloud-crowd"
|
172
134
|
puts "CloudCrowd version #{VERSION}"
|
173
135
|
exit
|
174
136
|
end
|
@@ -181,26 +143,30 @@ Options:
|
|
181
143
|
# Not all commands require this.
|
182
144
|
def load_code
|
183
145
|
ensure_config
|
184
|
-
require 'rubygems'
|
185
146
|
require "#{CC_ROOT}/lib/cloud-crowd"
|
186
147
|
CloudCrowd.configure("#{@options[:config_path]}/config.yml")
|
187
148
|
end
|
188
149
|
|
189
150
|
# Establish a connection to the central server's database. Not all commands
|
190
151
|
# require this.
|
191
|
-
def connect_to_database
|
152
|
+
def connect_to_database(validate_schema)
|
192
153
|
require 'cloud_crowd/models'
|
193
|
-
CloudCrowd.configure_database("#{@options[:config_path]}/database.yml")
|
154
|
+
CloudCrowd.configure_database("#{@options[:config_path]}/database.yml", validate_schema)
|
194
155
|
end
|
195
156
|
|
196
157
|
# Exit with an explanation if the configuration files couldn't be found.
|
197
158
|
def config_not_found
|
198
|
-
puts "`crowd` can't find the CloudCrowd configuration directory. Please
|
159
|
+
puts "`crowd` can't find the CloudCrowd configuration directory. Please use `crowd -c path/to/config`, or run `crowd` from inside of the configuration directory itself."
|
199
160
|
exit(1)
|
200
161
|
end
|
201
162
|
|
202
|
-
# Install a file and log the installation.
|
163
|
+
# Install a file and log the installation. If we're overwriting a file,
|
164
|
+
# offer a chance to back out.
|
203
165
|
def install_file(source, dest, is_dir=false)
|
166
|
+
if File.exists?(dest)
|
167
|
+
print "#{dest} already exists. Overwrite it? (yes/no) "
|
168
|
+
return unless ['y', 'yes', 'ok'].include? gets.chomp.downcase
|
169
|
+
end
|
204
170
|
is_dir ? FileUtils.cp_r(source, dest) : FileUtils.cp(source, dest)
|
205
171
|
puts "installed #{dest}"
|
206
172
|
end
|
@@ -2,6 +2,8 @@ module CloudCrowd
|
|
2
2
|
|
3
3
|
# Base Error class which all custom CloudCrowd exceptions inherit from.
|
4
4
|
# Rescuing CloudCrowd::Error (or RuntimeError) will get all custom exceptions.
|
5
|
+
# If your cluster is correctly configured, you should never expect to see any
|
6
|
+
# of these.
|
5
7
|
class Error < RuntimeError
|
6
8
|
|
7
9
|
# ActionNotFound is raised when a job is created for an action that doesn't
|
@@ -23,6 +25,11 @@ module CloudCrowd
|
|
23
25
|
class StatusUnspecified < Error
|
24
26
|
end
|
25
27
|
|
28
|
+
# MissingConfiguration is raised when we're trying to run a method that
|
29
|
+
# needs configuration not present in config.yml.
|
30
|
+
class MissingConfiguration < Error
|
31
|
+
end
|
32
|
+
|
26
33
|
end
|
27
34
|
|
28
35
|
end
|
@@ -23,9 +23,9 @@ module CloudCrowd
|
|
23
23
|
# A request is authorized if its login and password match those stored
|
24
24
|
# in config.yml, or if authentication is disabled. If authentication is
|
25
25
|
# turned on, then every request is authenticated, including between
|
26
|
-
# the
|
26
|
+
# the nodes and the central server.
|
27
27
|
def authorize(login, password)
|
28
|
-
return true unless CloudCrowd.config[:
|
28
|
+
return true unless CloudCrowd.config[:http_authentication]
|
29
29
|
return CloudCrowd.config[:login] == login &&
|
30
30
|
CloudCrowd.config[:password] == password
|
31
31
|
end
|
@@ -33,11 +33,13 @@ module CloudCrowd
|
|
33
33
|
|
34
34
|
private
|
35
35
|
|
36
|
+
# Provide a Rack Authorization object.
|
36
37
|
def auth
|
37
38
|
@auth ||= Rack::Auth::Basic::Request.new(request.env)
|
38
39
|
end
|
39
40
|
|
40
|
-
|
41
|
+
# Unauthorized requests will prompt the browser to provide credentials.
|
42
|
+
def unauthorized!(realm = Server.authorization_realm)
|
41
43
|
response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
|
42
44
|
halt 401, 'Authorization Required'
|
43
45
|
end
|
@@ -20,26 +20,6 @@ module CloudCrowd
|
|
20
20
|
@work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
|
21
21
|
end
|
22
22
|
|
23
|
-
# Try to fetch a work unit from the queue. If none are pending, respond
|
24
|
-
# with no content.
|
25
|
-
def dequeue_work_unit(offset=0)
|
26
|
-
handle_conflicts do
|
27
|
-
worker, actions = params[:worker_name], params[:worker_actions].split(',')
|
28
|
-
WorkUnit.dequeue(worker, actions, offset)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
# We're using ActiveRecords optimistic locking, so stale work units
|
33
|
-
# may sometimes arise. handle_conflicts responds with a the HTTP status
|
34
|
-
# code of your choosing if the update failed to be applied.
|
35
|
-
def handle_conflicts(code=204)
|
36
|
-
begin
|
37
|
-
yield
|
38
|
-
rescue ActiveRecord::StaleObjectError => e
|
39
|
-
return status(code) && ''
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
23
|
end
|
44
24
|
end
|
45
25
|
end
|
data/lib/cloud_crowd/models.rb
CHANGED
@@ -31,30 +31,36 @@ module CloudCrowd
|
|
31
31
|
# finished, if so, continue on to the next phase of the job.
|
32
32
|
def check_for_completion
|
33
33
|
return unless all_work_units_complete?
|
34
|
-
|
35
|
-
|
36
|
-
|
34
|
+
set_next_status
|
35
|
+
outs = gather_outputs_from_work_units
|
36
|
+
return queue_for_workers(outs) if merging?
|
37
37
|
if complete?
|
38
|
-
|
39
|
-
|
40
|
-
end
|
41
|
-
self.save
|
42
|
-
|
43
|
-
case self.status
|
44
|
-
when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
|
45
|
-
when MERGING then queue_for_workers(output_list.to_json)
|
46
|
-
else fire_callback
|
38
|
+
update_attributes(:outputs => outs, :time => time_taken)
|
39
|
+
fire_callback if callback_url
|
47
40
|
end
|
48
41
|
self
|
49
42
|
end
|
50
43
|
|
44
|
+
# Transition this Job's status to the appropriate next status.
|
45
|
+
def set_next_status
|
46
|
+
update_attribute(:status,
|
47
|
+
any_work_units_failed? ? FAILED :
|
48
|
+
self.splitting? ? PROCESSING :
|
49
|
+
self.mergeable? ? MERGING :
|
50
|
+
SUCCEEDED
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
51
54
|
# If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
|
52
55
|
# completion. The <tt>callback_url</tt> may include HTTP basic authentication,
|
53
56
|
# if you like:
|
54
57
|
# http://user:password@example.com/job_complete
|
58
|
+
# If the callback_url is successfully pinged, we proceed to cleanup the job.
|
59
|
+
# TODO: This should be moved into a Work Unit...
|
55
60
|
def fire_callback
|
56
61
|
begin
|
57
|
-
RestClient.post(callback_url, {:job => self.to_json})
|
62
|
+
RestClient.post(callback_url, {:job => self.to_json})
|
63
|
+
self.destroy
|
58
64
|
rescue RestClient::Exception => e
|
59
65
|
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
60
66
|
end
|
@@ -62,15 +68,12 @@ module CloudCrowd
|
|
62
68
|
|
63
69
|
# Cleaning up after a job will remove all of its files from S3. Destroying
|
64
70
|
# a Job calls cleanup_assets first.
|
71
|
+
# TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
|
65
72
|
def cleanup_assets
|
66
73
|
AssetStore.new.cleanup(self)
|
67
74
|
end
|
68
75
|
|
69
76
|
# Have all of the WorkUnits finished?
|
70
|
-
#--
|
71
|
-
# We could trade reads for writes here
|
72
|
-
# by keeping a completed_count on the Job itself.
|
73
|
-
#++
|
74
77
|
def all_work_units_complete?
|
75
78
|
self.work_units.incomplete.count <= 0
|
76
79
|
end
|
@@ -85,6 +88,11 @@ module CloudCrowd
|
|
85
88
|
self.action_class.public_instance_methods.include? 'split'
|
86
89
|
end
|
87
90
|
|
91
|
+
# This job is done splitting if it's finished with its splitting work units.
|
92
|
+
def done_splitting?
|
93
|
+
splittable? && work_units.splitting.count <= 0
|
94
|
+
end
|
95
|
+
|
88
96
|
# This job is mergeable if its Action has a +merge+ method.
|
89
97
|
def mergeable?
|
90
98
|
self.processing? && self.action_class.public_instance_methods.include?('merge')
|
@@ -92,16 +100,19 @@ module CloudCrowd
|
|
92
100
|
|
93
101
|
# Retrieve the class for this Job's Action.
|
94
102
|
def action_class
|
95
|
-
|
96
|
-
return
|
103
|
+
@action_class ||= CloudCrowd.actions[self.action]
|
104
|
+
return @action_class if @action_class
|
97
105
|
raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
|
98
106
|
end
|
99
107
|
|
100
108
|
# How complete is this Job?
|
109
|
+
# Unfortunately, with the current processing sequence, the percent_complete
|
110
|
+
# can pull a fast one and go backwards. This happens when there's a single
|
111
|
+
# large input that takes a long time to split, and when it finally does it
|
112
|
+
# creates a whole swarm of work units. This seems unavoidable.
|
101
113
|
def percent_complete
|
102
|
-
return 0 if splitting?
|
103
|
-
return 100 if complete?
|
104
114
|
return 99 if merging?
|
115
|
+
return 100 if complete?
|
105
116
|
(work_units.complete.count / work_units.count.to_f * 100).round
|
106
117
|
end
|
107
118
|
|
@@ -136,20 +147,12 @@ module CloudCrowd
|
|
136
147
|
private
|
137
148
|
|
138
149
|
# When the WorkUnits are all finished, gather all their outputs together
|
139
|
-
# before removing them from the database entirely.
|
150
|
+
# before removing them from the database entirely. Returns their merged JSON.
|
140
151
|
def gather_outputs_from_work_units
|
141
152
|
units = self.work_units.complete
|
142
|
-
outs = self.work_units.complete.map {|u|
|
153
|
+
outs = self.work_units.complete.map {|u| u.parsed_output }
|
143
154
|
self.work_units.complete.destroy_all
|
144
|
-
outs
|
145
|
-
end
|
146
|
-
|
147
|
-
# Transition this Job's status to the appropriate next status.
|
148
|
-
def transition_to_next_phase
|
149
|
-
self.status = any_work_units_failed? ? FAILED :
|
150
|
-
self.splitting? ? PROCESSING :
|
151
|
-
self.mergeable? ? MERGING :
|
152
|
-
SUCCEEDED
|
155
|
+
outs.to_json
|
153
156
|
end
|
154
157
|
|
155
158
|
# When starting a new job, or moving to a new stage, split up the inputs
|
@@ -157,14 +160,8 @@ module CloudCrowd
|
|
157
160
|
# away.
|
158
161
|
def queue_for_workers(input=nil)
|
159
162
|
input ||= JSON.parse(self.inputs)
|
160
|
-
[input].flatten.each
|
161
|
-
|
162
|
-
:job => self,
|
163
|
-
:action => self.action,
|
164
|
-
:input => wu_input,
|
165
|
-
:status => self.status
|
166
|
-
)
|
167
|
-
end
|
163
|
+
[input].flatten.each {|i| WorkUnit.start(self, action, i, status) }
|
164
|
+
self
|
168
165
|
end
|
169
166
|
|
170
167
|
# A Job starts out either splitting or processing, depending on its action.
|