documentcloud-cloud-crowd 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/EPIGRAPHS +17 -0
- data/LICENSE +22 -0
- data/README +75 -0
- data/actions/graphics_magick.rb +1 -3
- data/actions/process_pdfs.rb +92 -0
- data/cloud-crowd.gemspec +24 -4
- data/config/config.example.yml +5 -0
- data/examples/graphics_magick_example.rb +48 -0
- data/examples/process_pdfs_example.rb +30 -0
- data/lib/cloud-crowd.rb +25 -20
- data/lib/cloud_crowd/action.rb +29 -24
- data/lib/cloud_crowd/app.rb +40 -13
- data/lib/cloud_crowd/asset_store.rb +13 -6
- data/lib/cloud_crowd/command_line.rb +11 -5
- data/lib/cloud_crowd/daemon.rb +7 -2
- data/lib/cloud_crowd/exceptions.rb +17 -0
- data/lib/cloud_crowd/helpers.rb +1 -1
- data/lib/cloud_crowd/helpers/authorization.rb +7 -3
- data/lib/cloud_crowd/helpers/resources.rb +12 -3
- data/lib/cloud_crowd/inflector.rb +1 -1
- data/lib/cloud_crowd/models/job.rb +75 -38
- data/lib/cloud_crowd/models/work_unit.rb +14 -8
- data/lib/cloud_crowd/schema.rb +3 -1
- data/lib/cloud_crowd/worker.rb +32 -15
- data/public/css/admin_console.css +51 -0
- data/public/css/reset.css +52 -0
- data/public/images/queue_fill.png +0 -0
- data/public/js/admin_console.js +51 -0
- data/public/js/jquery-1.3.2.js +4376 -0
- data/test/acceptance/test_failing_work_units.rb +2 -2
- data/test/blueprints.rb +1 -0
- data/test/config/config.ru +17 -0
- data/test/unit/test_job.rb +5 -5
- data/test/unit/test_work_unit.rb +1 -1
- data/views/index.erb +22 -0
- metadata +27 -8
data/lib/cloud_crowd/action.rb
CHANGED
@@ -1,52 +1,54 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
5
|
-
# Public API to CloudCrowd::Action subclasses:
|
6
|
-
# +input+, +input_path+, +file_name+, +work_directory+, +options+, +save+
|
7
|
-
#
|
8
|
-
# CloudCrowd::Actions must implement a +process+ method, which must return a
|
3
|
+
# As you write your custom actions, have them inherit from CloudCrowd::Action.
|
4
|
+
# All actions must implement a +process+ method, which should return a
|
9
5
|
# JSON-serializeable object that will be used as the output for the work unit.
|
6
|
+
# See the default actions for examples.
|
7
|
+
#
|
10
8
|
# Optionally, actions may define +split+ and +merge+ methods to do mapping
|
11
|
-
# and reducing around the input
|
12
|
-
#
|
13
|
-
# +
|
14
|
-
#
|
9
|
+
# and reducing around the +input+. +split+ should return an array of URLs --
|
10
|
+
# to be mapped into WorkUnits and processed in parallel. In the +merge+ step,
|
11
|
+
# +input+ will be an array of all the resulting outputs from calling process.
|
12
|
+
#
|
13
|
+
# All actions have use of an individual +work_directory+, for scratch files,
|
14
|
+
# and spend their duration inside of it, so relative paths work well.
|
15
15
|
class Action
|
16
16
|
|
17
17
|
attr_reader :input, :input_path, :file_name, :options, :work_directory
|
18
18
|
|
19
|
-
#
|
19
|
+
# Initializing an Action sets up all of the read-only variables that
|
20
20
|
# form the bulk of the API for action subclasses. (Paths to read from and
|
21
|
-
# write to). It creates the work_directory and moves into it.
|
22
|
-
|
21
|
+
# write to). It creates the +work_directory+ and moves into it.
|
22
|
+
# If we're not merging multiple results, it downloads the input file into
|
23
|
+
# the +work_directory+ before starting.
|
24
|
+
def initialize(status, input, options, store)
|
23
25
|
@input, @options, @store = input, options, store
|
24
26
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
25
27
|
@work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
|
26
28
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
27
29
|
Dir.chdir @work_directory
|
28
30
|
unless status == MERGING
|
29
|
-
@input_path = File.join(@work_directory,
|
31
|
+
@input_path = File.join(@work_directory, safe_filename(@input))
|
30
32
|
@file_name = File.basename(@input_path, File.extname(@input_path))
|
31
33
|
download(@input, @input_path)
|
32
34
|
end
|
33
35
|
end
|
34
36
|
|
35
|
-
# Each
|
37
|
+
# Each Action subclass must implement a +process+ method, overriding this.
|
36
38
|
def process
|
37
39
|
raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.")
|
38
40
|
end
|
39
41
|
|
40
|
-
# Download a file to the specified path
|
42
|
+
# Download a file to the specified path with *curl*.
|
41
43
|
def download(url, path)
|
42
|
-
`curl -s "#{url}" > #{path}`
|
44
|
+
`curl -s "#{url}" > "#{path}"`
|
43
45
|
path
|
44
46
|
end
|
45
47
|
|
46
|
-
# Takes a local filesystem path, and returns the
|
47
|
-
# url on S3 where the file
|
48
|
+
# Takes a local filesystem path, saves the file to S3, and returns the
|
49
|
+
# public (or authenticated) url on S3 where the file can be accessed.
|
48
50
|
def save(file_path)
|
49
|
-
save_path = File.join(
|
51
|
+
save_path = File.join(storage_prefix, File.basename(file_path))
|
50
52
|
@store.save(file_path, save_path)
|
51
53
|
return @store.url(save_path)
|
52
54
|
end
|
@@ -61,6 +63,13 @@ module CloudCrowd
|
|
61
63
|
|
62
64
|
private
|
63
65
|
|
66
|
+
# Convert an unsafe URL into a filesystem-friendly filename.
|
67
|
+
def safe_filename(url)
|
68
|
+
ext = File.extname(url)
|
69
|
+
name = File.basename(url).gsub(/%\d+/, '-').gsub(/[^a-zA-Z0-9_\-.]/, '')
|
70
|
+
File.basename(name, ext).gsub('.', '-') + ext
|
71
|
+
end
|
72
|
+
|
64
73
|
# The directory prefix to use for both local and S3 storage.
|
65
74
|
# [action_name]/job_[job_id]/unit_[work_unit_it]
|
66
75
|
def storage_prefix
|
@@ -71,10 +80,6 @@ module CloudCrowd
|
|
71
80
|
@storage_prefix ||= File.join(path_parts)
|
72
81
|
end
|
73
82
|
|
74
|
-
def s3_storage_path
|
75
|
-
@s3_storage_path ||= storage_prefix
|
76
|
-
end
|
77
|
-
|
78
83
|
end
|
79
84
|
|
80
85
|
end
|
data/lib/cloud_crowd/app.rb
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
|
+
# The main CloudCrowd (Sinatra) application. The actions are:
|
4
|
+
#
|
5
|
+
# == Admin
|
6
|
+
# [get /] Render the admin console, with a progress meter for running jobs.
|
7
|
+
# [get /jobs] Get the combined JSON of every active job in the queue.
|
8
|
+
# [get /heartbeat] Returns 200 OK to let monitoring tools know the server's up.
|
9
|
+
#
|
10
|
+
# == Public API
|
11
|
+
# [post /jobs] Begin a new Job. Post with a JSON representation of the job-to-be. (see examples).
|
12
|
+
# [get /jobs/:job_id] Check the status of a Job. Response includes output, if the Job has finished.
|
13
|
+
# [delete /jobs/:job_id] Clean up a Job when you're done downloading the results. Removes all intermediate files.
|
14
|
+
#
|
15
|
+
# == Internal Workers API
|
16
|
+
# [post /work] Dequeue the next WorkUnit, and hand it off to the worker.
|
17
|
+
# [put /work/:unit_id] Mark a finished WorkUnit as completed or failed, with results.
|
3
18
|
class App < Sinatra::Default
|
4
19
|
|
5
20
|
set :root, ROOT
|
@@ -15,28 +30,46 @@ module CloudCrowd
|
|
15
30
|
login_required if CloudCrowd.config[:use_http_authentication]
|
16
31
|
end
|
17
32
|
|
33
|
+
# Render the admin console.
|
34
|
+
get '/' do
|
35
|
+
erb :index
|
36
|
+
end
|
37
|
+
|
38
|
+
# Get the JSON for every active job in the queue.
|
39
|
+
get '/jobs' do
|
40
|
+
json Job.incomplete
|
41
|
+
end
|
42
|
+
|
43
|
+
# To monitor the central server with Monit, God, Nagios, or another
|
44
|
+
# monitoring tool, you can hit /heartbeat to make sure.
|
45
|
+
get '/heartbeat' do
|
46
|
+
"buh-bump"
|
47
|
+
end
|
48
|
+
|
49
|
+
# PUBLIC API:
|
50
|
+
|
18
51
|
# Start a new job. Accepts a JSON representation of the job-to-be.
|
19
52
|
post '/jobs' do
|
20
|
-
Job.create_from_request(JSON.parse(params[:
|
53
|
+
json Job.create_from_request(JSON.parse(params[:job]))
|
21
54
|
end
|
22
55
|
|
23
56
|
# Check the status of a job, returning the output if finished, and the
|
24
57
|
# number of work units remaining otherwise.
|
25
58
|
get '/jobs/:job_id' do
|
26
|
-
current_job
|
59
|
+
json current_job
|
27
60
|
end
|
28
61
|
|
29
62
|
# Cleans up a Job's saved S3 files. Delete a Job after you're done
|
30
63
|
# downloading the results.
|
31
64
|
delete '/jobs/:job_id' do
|
32
65
|
current_job.cleanup
|
33
|
-
|
66
|
+
json nil
|
34
67
|
end
|
35
68
|
|
36
69
|
# Internal method for worker daemons to fetch the work unit at the front
|
37
70
|
# of the queue. Work unit is marked as taken and handed off to the worker.
|
38
|
-
|
39
|
-
dequeue_work_unit
|
71
|
+
post '/work' do
|
72
|
+
json dequeue_work_unit
|
40
73
|
end
|
41
74
|
|
42
75
|
# When workers are done with their unit, either successfully on in failure,
|
@@ -47,22 +80,16 @@ module CloudCrowd
|
|
47
80
|
case params[:status]
|
48
81
|
when 'succeeded'
|
49
82
|
current_work_unit.finish(params[:output], params[:time])
|
50
|
-
dequeue_work_unit
|
83
|
+
json dequeue_work_unit
|
51
84
|
when 'failed'
|
52
85
|
current_work_unit.fail(params[:output], params[:time])
|
53
|
-
dequeue_work_unit(1)
|
86
|
+
json dequeue_work_unit(1)
|
54
87
|
else
|
55
88
|
error(500, "Completing a work unit must specify status.")
|
56
89
|
end
|
57
90
|
end
|
58
91
|
end
|
59
92
|
|
60
|
-
# To monitor the central server with Monit, God, Nagios, or another
|
61
|
-
# monitoring tool, you can hit /heartbeat to make sure.
|
62
|
-
get '/heartbeat' do
|
63
|
-
"buh-bump"
|
64
|
-
end
|
65
|
-
|
66
93
|
end
|
67
94
|
|
68
95
|
end
|
@@ -2,24 +2,31 @@ require 'tmpdir'
|
|
2
2
|
|
3
3
|
module CloudCrowd
|
4
4
|
|
5
|
-
# The
|
6
|
-
#
|
7
|
-
# be the filesystem
|
5
|
+
# The AssetStore provides a common API for storing files and returning URLs
|
6
|
+
# that can access them. In production this will be S3 but in development
|
7
|
+
# it may be the filesystem.
|
8
|
+
#
|
9
|
+
# You shouldn't need to use the AssetStore directly -- Action's +download+
|
10
|
+
# and +save+ methods use it behind the scenes.
|
8
11
|
class AssetStore
|
9
12
|
include FileUtils
|
10
13
|
|
14
|
+
# Creating an AssetStore will determine wether to save private or public
|
15
|
+
# files on S3, depending on the value of <tt>use_s3_authentication</tt> in
|
16
|
+
# <tt>config.yml</tt>.
|
11
17
|
def initialize
|
12
18
|
@use_auth = CloudCrowd.config[:use_s3_authentication]
|
13
19
|
mkdir_p temp_storage_path unless File.exists? temp_storage_path
|
14
20
|
end
|
15
21
|
|
16
|
-
#
|
22
|
+
# Get the path to CloudCrowd's temporary local storage. All actions run
|
23
|
+
# in subdirectories of this.
|
17
24
|
def temp_storage_path
|
18
25
|
"#{Dir.tmpdir}/cloud_crowd_tmp"
|
19
26
|
end
|
20
27
|
|
21
|
-
# Copy a finished file from our local storage to S3. Save it publicly
|
22
|
-
# we're
|
28
|
+
# Copy a finished file from our local storage to S3. Save it publicly unless
|
29
|
+
# we're configured to use S3 authentication.
|
23
30
|
def save(local_path, save_path)
|
24
31
|
ensure_s3_connection
|
25
32
|
permission = @use_auth ? 'private' : 'public-read'
|
@@ -14,6 +14,8 @@ module CloudCrowd
|
|
14
14
|
|
15
15
|
# Command-line banner for the usage message.
|
16
16
|
BANNER = <<-EOS
|
17
|
+
CloudCrowd is a Ruby & AWS batch processing system, MapReduce style.
|
18
|
+
|
17
19
|
Usage: crowd COMMAND OPTIONS
|
18
20
|
|
19
21
|
COMMANDS:
|
@@ -45,6 +47,7 @@ OPTIONS:
|
|
45
47
|
def run_console
|
46
48
|
require 'irb'
|
47
49
|
require 'irb/completion'
|
50
|
+
require 'pp'
|
48
51
|
load_code
|
49
52
|
connect_to_database
|
50
53
|
IRB.start
|
@@ -60,9 +63,9 @@ OPTIONS:
|
|
60
63
|
require 'rubygems'
|
61
64
|
rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
|
62
65
|
if Gem.available? 'thin'
|
63
|
-
exec "thin -e
|
66
|
+
exec "thin -e #{@options[:environment]} -p #{@options[:port]} -R #{rackup_path} start"
|
64
67
|
else
|
65
|
-
exec "rackup -E
|
68
|
+
exec "rackup -E #{@options[:environment]} -p #{@options[:port]} #{rackup_path}"
|
66
69
|
end
|
67
70
|
end
|
68
71
|
|
@@ -127,7 +130,7 @@ OPTIONS:
|
|
127
130
|
|
128
131
|
# Print `crowd` usage.
|
129
132
|
def usage
|
130
|
-
puts @option_parser
|
133
|
+
puts "\n#{@option_parser}\n"
|
131
134
|
end
|
132
135
|
|
133
136
|
|
@@ -141,11 +144,11 @@ OPTIONS:
|
|
141
144
|
found ? @config_dir = true : config_not_found
|
142
145
|
end
|
143
146
|
|
144
|
-
# Parse all options for all
|
145
|
-
# TODO: Think about parsing options per sub-command separately.
|
147
|
+
# Parse all options for all commands.
|
146
148
|
def parse_options
|
147
149
|
@options = {
|
148
150
|
:port => 9173,
|
151
|
+
:environment => 'production',
|
149
152
|
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
|
150
153
|
}
|
151
154
|
@option_parser = OptionParser.new do |opts|
|
@@ -158,6 +161,9 @@ OPTIONS:
|
|
158
161
|
opts.on('-p', '--port PORT', 'central server port number') do |port_num|
|
159
162
|
@options[:port] = port_num
|
160
163
|
end
|
164
|
+
opts.on('-e', '--environment ENV', 'Sinatra environment (code reloading)') do |env|
|
165
|
+
@options[:environment] = env
|
166
|
+
end
|
161
167
|
opts.on_tail('-v', '--version', 'show version') do
|
162
168
|
load_code
|
163
169
|
puts "CloudCrowd version #{VERSION}"
|
data/lib/cloud_crowd/daemon.rb
CHANGED
@@ -6,8 +6,13 @@ module CloudCrowd
|
|
6
6
|
|
7
7
|
# A CloudCrowd::Daemon, started by the Daemons gem, runs a CloudCrowd::Worker in
|
8
8
|
# a loop, continually fetching and processing WorkUnits from the central
|
9
|
-
# server.
|
10
|
-
#
|
9
|
+
# server.
|
10
|
+
#
|
11
|
+
# The Daemon backs off and pings the central server less frequently
|
12
|
+
# when there isn't any work to be done, and speeds back up when there is.
|
13
|
+
#
|
14
|
+
# The `crowd` command responds to all the usual methods that the Daemons gem
|
15
|
+
# supports.
|
11
16
|
class Daemon
|
12
17
|
|
13
18
|
MIN_WAIT = CloudCrowd.config[:min_worker_wait]
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# Base Error class which all custom CloudCrowd exceptions inherit from.
|
4
|
+
class Error < RuntimeError #:nodoc:
|
5
|
+
end
|
6
|
+
|
7
|
+
# ActionNotFound is raised when a job is created for an action that doesn't
|
8
|
+
# exist.
|
9
|
+
class ActionNotFound < Error #:nodoc:
|
10
|
+
end
|
11
|
+
|
12
|
+
# StatusUnspecified is raised when a WorkUnit returns without a valid
|
13
|
+
# status code.
|
14
|
+
class StatusUnspecified < Error #:nodoc:
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
data/lib/cloud_crowd/helpers.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
# After sinatra-authorization...
|
2
|
-
|
3
1
|
module CloudCrowd
|
4
2
|
module Helpers
|
3
|
+
|
4
|
+
# Authorization takes after sinatra-authorization... See
|
5
|
+
# http://github.com/integrity/sinatra-authorization
|
6
|
+
# for the original.
|
5
7
|
module Authorization
|
6
8
|
|
7
9
|
# Ensure that the request includes the correct credentials.
|
@@ -19,7 +21,9 @@ module CloudCrowd
|
|
19
21
|
end
|
20
22
|
|
21
23
|
# A request is authorized if its login and password match those stored
|
22
|
-
# in config.yml, or if authentication is disabled.
|
24
|
+
# in config.yml, or if authentication is disabled. If authentication is
|
25
|
+
# turned on, then every request is authenticated, including between
|
26
|
+
# the worker daemons and the central server.
|
23
27
|
def authorize(login, password)
|
24
28
|
return true unless CloudCrowd.config[:use_http_authentication]
|
25
29
|
return CloudCrowd.config[:login] == login &&
|
@@ -2,10 +2,20 @@ module CloudCrowd
|
|
2
2
|
module Helpers
|
3
3
|
module Resources
|
4
4
|
|
5
|
+
# Convenience method for responding with JSON. Sets the content-type,
|
6
|
+
# serializes, and allows empty responses.
|
7
|
+
def json(obj)
|
8
|
+
content_type :json
|
9
|
+
return status(204) && '' if obj.nil?
|
10
|
+
obj.to_json
|
11
|
+
end
|
12
|
+
|
13
|
+
# Lazy-fetch the job specified by <tt>job_id</tt>.
|
5
14
|
def current_job
|
6
15
|
@job ||= Job.find_by_id(params[:job_id]) or raise Sinatra::NotFound
|
7
16
|
end
|
8
17
|
|
18
|
+
# Lazy-fetch the WorkUnit specified by <tt>work_unit_id</tt>.
|
9
19
|
def current_work_unit
|
10
20
|
@work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
|
11
21
|
end
|
@@ -14,9 +24,8 @@ module CloudCrowd
|
|
14
24
|
# with no content.
|
15
25
|
def dequeue_work_unit(offset=0)
|
16
26
|
handle_conflicts do
|
17
|
-
|
18
|
-
|
19
|
-
unit.to_json
|
27
|
+
actions = params[:enabled_actions].split(',')
|
28
|
+
WorkUnit.dequeue(actions, offset)
|
20
29
|
end
|
21
30
|
end
|
22
31
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
3
|
# A chunk of work that will be farmed out into many WorkUnits to be processed
|
4
|
-
# in parallel by
|
4
|
+
# in parallel by each active CloudCrowd::Worker. Jobs are defined by a list
|
5
5
|
# of inputs (usually public urls to files), an action (the name of a script that
|
6
6
|
# CloudCrowd knows how to run), and, eventually a corresponding list of output.
|
7
7
|
class Job < ActiveRecord::Base
|
@@ -10,9 +10,13 @@ module CloudCrowd
|
|
10
10
|
has_many :work_units, :dependent => :destroy
|
11
11
|
|
12
12
|
validates_presence_of :status, :inputs, :action, :options
|
13
|
+
|
14
|
+
before_validation_on_create :set_initial_status
|
15
|
+
after_create :queue_for_workers
|
16
|
+
before_destroy :cleanup
|
13
17
|
|
14
18
|
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
15
|
-
# TODO:
|
19
|
+
# TODO: Think about XML support.
|
16
20
|
def self.create_from_request(h)
|
17
21
|
self.create(
|
18
22
|
:inputs => h['inputs'].to_json,
|
@@ -23,16 +27,6 @@ module CloudCrowd
|
|
23
27
|
)
|
24
28
|
end
|
25
29
|
|
26
|
-
# Creating a job creates its corresponding work units, adding them
|
27
|
-
# to the queue.
|
28
|
-
def after_create
|
29
|
-
self.queue_for_workers(JSON.parse(self.inputs))
|
30
|
-
end
|
31
|
-
|
32
|
-
def before_validation_on_create
|
33
|
-
self.status = self.splittable? ? SPLITTING : PROCESSING
|
34
|
-
end
|
35
|
-
|
36
30
|
# After work units are marked successful, we check to see if all of them have
|
37
31
|
# finished, if so, continue on to the next phase of the job.
|
38
32
|
def check_for_completion
|
@@ -54,15 +48,10 @@ module CloudCrowd
|
|
54
48
|
self
|
55
49
|
end
|
56
50
|
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
self.mergeable? ? MERGING :
|
62
|
-
SUCCEEDED
|
63
|
-
end
|
64
|
-
|
65
|
-
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
51
|
+
# If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
|
52
|
+
# completion. The <tt>callback_url</tt> may include HTTP basic authentication,
|
53
|
+
# if you like:
|
54
|
+
# http://user:password@example.com/job_complete
|
66
55
|
def fire_callback
|
67
56
|
begin
|
68
57
|
RestClient.post(callback_url, {:job => self.to_json}) if callback_url
|
@@ -71,13 +60,17 @@ module CloudCrowd
|
|
71
60
|
end
|
72
61
|
end
|
73
62
|
|
74
|
-
# Cleaning up after a job will remove all of its files from S3.
|
63
|
+
# Cleaning up after a job will remove all of its files from S3. Destroying
|
64
|
+
# a Job calls cleanup first.
|
75
65
|
def cleanup
|
76
66
|
AssetStore.new.cleanup_job(self)
|
77
67
|
end
|
78
68
|
|
79
|
-
# Have all of the WorkUnits finished?
|
69
|
+
# Have all of the WorkUnits finished?
|
70
|
+
#--
|
71
|
+
# We could trade reads for writes here
|
80
72
|
# by keeping a completed_count on the Job itself.
|
73
|
+
#++
|
81
74
|
def all_work_units_complete?
|
82
75
|
self.work_units.incomplete.count <= 0
|
83
76
|
end
|
@@ -97,19 +90,14 @@ module CloudCrowd
|
|
97
90
|
self.processing? && self.action_class.public_instance_methods.include?('merge')
|
98
91
|
end
|
99
92
|
|
100
|
-
# Retrieve the class for this Job's Action
|
93
|
+
# Retrieve the class for this Job's Action.
|
101
94
|
def action_class
|
102
|
-
CloudCrowd.actions
|
103
|
-
|
104
|
-
|
105
|
-
# When the WorkUnits are all finished, gather all their outputs together
|
106
|
-
# before removing them from the database entirely.
|
107
|
-
def gather_outputs_from_work_units
|
108
|
-
outs = self.work_units.complete.map {|wu| wu.output }
|
109
|
-
self.work_units.complete.destroy_all
|
110
|
-
outs
|
95
|
+
klass = CloudCrowd.actions[self.action]
|
96
|
+
return klass if klass
|
97
|
+
raise ActionNotFound, "no action named: '#{self.action}' could be found"
|
111
98
|
end
|
112
99
|
|
100
|
+
# Get the displayable status name of the Job's status code.
|
113
101
|
def display_status
|
114
102
|
CloudCrowd.display_status(self.status)
|
115
103
|
end
|
@@ -122,22 +110,71 @@ module CloudCrowd
|
|
122
110
|
(work_units.complete.count / work_units.count.to_f * 100).round
|
123
111
|
end
|
124
112
|
|
113
|
+
# How long has this Job taken?
|
114
|
+
def time_taken
|
115
|
+
return self.time if self.time
|
116
|
+
Time.now - self.created_at
|
117
|
+
end
|
118
|
+
|
119
|
+
# Generate a stable 8-bit Hex color code, based on the Job's id.
|
120
|
+
def color
|
121
|
+
@color ||= Digest::MD5.hexdigest(self.id.to_s)[-7...-1]
|
122
|
+
end
|
123
|
+
|
125
124
|
# A JSON representation of this job includes the statuses of its component
|
126
125
|
# WorkUnits, as well as any completed outputs.
|
127
126
|
def to_json(opts={})
|
128
|
-
atts = {
|
127
|
+
atts = {
|
128
|
+
'id' => self.id,
|
129
|
+
'color' => self.color,
|
130
|
+
'status' => self.display_status,
|
131
|
+
'percent_complete' => self.percent_complete,
|
132
|
+
'work_units' => self.work_units.count,
|
133
|
+
'time_taken' => self.time_taken
|
134
|
+
}
|
129
135
|
atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
|
130
|
-
atts.merge!({'time' => self.time}) if self.time
|
131
136
|
atts.to_json
|
132
137
|
end
|
138
|
+
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
# When the WorkUnits are all finished, gather all their outputs together
|
143
|
+
# before removing them from the database entirely.
|
144
|
+
def gather_outputs_from_work_units
|
145
|
+
units = self.work_units.complete
|
146
|
+
outs = self.work_units.complete.map {|u| JSON.parse(u.output)['output'] }
|
147
|
+
self.work_units.complete.destroy_all
|
148
|
+
outs
|
149
|
+
end
|
150
|
+
|
151
|
+
# Transition this Job's status to the appropriate next status.
|
152
|
+
def transition_to_next_phase
|
153
|
+
self.status = any_work_units_failed? ? FAILED :
|
154
|
+
self.splitting? ? PROCESSING :
|
155
|
+
self.mergeable? ? MERGING :
|
156
|
+
SUCCEEDED
|
157
|
+
end
|
133
158
|
|
134
159
|
# When starting a new job, or moving to a new stage, split up the inputs
|
135
|
-
# into WorkUnits, and queue them.
|
136
|
-
|
160
|
+
# into WorkUnits, and queue them. Workers will start picking them up right
|
161
|
+
# away.
|
162
|
+
def queue_for_workers(input=nil)
|
163
|
+
input ||= JSON.parse(self.inputs)
|
137
164
|
[input].flatten.each do |wu_input|
|
138
|
-
WorkUnit.create(
|
165
|
+
WorkUnit.create(
|
166
|
+
:job => self,
|
167
|
+
:action => self.action,
|
168
|
+
:input => wu_input,
|
169
|
+
:status => self.status
|
170
|
+
)
|
139
171
|
end
|
140
172
|
end
|
141
173
|
|
174
|
+
# A Job starts out either splitting or processing, depending on its action.
|
175
|
+
def set_initial_status
|
176
|
+
self.status = self.splittable? ? SPLITTING : PROCESSING
|
177
|
+
end
|
178
|
+
|
142
179
|
end
|
143
180
|
end
|