documentcloud-cloud-crowd 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/actions/graphics_magick.rb +44 -0
- data/bin/crowd +5 -0
- data/cloud-crowd.gemspec +71 -0
- data/config/config.example.ru +17 -0
- data/config/config.example.yml +11 -0
- data/config/database.example.yml +6 -0
- data/lib/cloud-crowd.rb +96 -0
- data/lib/cloud_crowd/action.rb +88 -0
- data/lib/cloud_crowd/app.rb +54 -0
- data/lib/cloud_crowd/asset_store.rb +58 -0
- data/lib/cloud_crowd/command_line.rb +198 -0
- data/lib/cloud_crowd/core_ext.rb +10 -0
- data/lib/cloud_crowd/daemon.rb +48 -0
- data/lib/cloud_crowd/helpers/resources.rb +15 -0
- data/lib/cloud_crowd/helpers/urls.rb +7 -0
- data/lib/cloud_crowd/helpers.rb +8 -0
- data/lib/cloud_crowd/models/job.rb +129 -0
- data/lib/cloud_crowd/models/work_unit.rb +62 -0
- data/lib/cloud_crowd/models.rb +31 -0
- data/lib/cloud_crowd/runner.rb +29 -0
- data/lib/cloud_crowd/schema.rb +34 -0
- data/lib/cloud_crowd/worker.rb +115 -0
- data/test/acceptance/test_failing_work_units.rb +32 -0
- data/test/blueprints.rb +15 -0
- data/test/config/test_config.yml +10 -0
- data/test/config/test_database.yml +6 -0
- data/test/test_helper.rb +18 -0
- data/test/unit/test_job.rb +70 -0
- data/test/unit/test_work_unit.rb +55 -0
- metadata +190 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
CloudCrowd.configure(ENV['CLOUD_CROWD_CONFIG'])
|
2
|
+
|
3
|
+
require 'cloud_crowd/worker'
|
4
|
+
|
5
|
+
module CloudCrowd
|
6
|
+
|
7
|
+
# A CloudCrowd::Daemon, started by the Daemons gem, runs a CloudCrowd::Worker in
|
8
|
+
# a loop, continually fetching and processing WorkUnits from the central
|
9
|
+
# server. The Daemon backs off and pings central less frequently when there
|
10
|
+
# isn't any work to be done, and speeds back up when there is.
|
11
|
+
class Daemon
|
12
|
+
|
13
|
+
DEFAULT_WAIT = CloudCrowd.config[:default_worker_wait]
|
14
|
+
MAX_WAIT = CloudCrowd.config[:max_worker_wait]
|
15
|
+
WAIT_MULTIPLIER = CloudCrowd.config[:worker_wait_multiplier]
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@wait_time = DEFAULT_WAIT
|
19
|
+
@worker = CloudCrowd::Worker.new
|
20
|
+
Signal.trap('INT', 'EXIT')
|
21
|
+
Signal.trap('KILL', 'EXIT')
|
22
|
+
Signal.trap('TERM', 'EXIT')
|
23
|
+
end
|
24
|
+
|
25
|
+
# Loop forever, fetching WorkUnits.
|
26
|
+
# TODO: Workers busy with their work units won't die until the unit has
|
27
|
+
# been finished. This should probably be wrapped in an appropriately lengthy
|
28
|
+
# timeout, or should be killable from the outside by terminating the thread.
|
29
|
+
# In either case, nasty un-cleaned-up bits might be left behind.
|
30
|
+
def run
|
31
|
+
loop do
|
32
|
+
@worker.fetch_work_unit
|
33
|
+
if @worker.has_work?
|
34
|
+
@worker.run
|
35
|
+
@wait_time = DEFAULT_WAIT
|
36
|
+
sleep 0.01 # So as to listen for incoming signals.
|
37
|
+
else
|
38
|
+
@wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
|
39
|
+
sleep @wait_time
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
CloudCrowd::Daemon.new.run
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
module Helpers
|
3
|
+
module Resources
|
4
|
+
|
5
|
+
def current_job
|
6
|
+
@job ||= Job.find_by_id(params[:job_id]) or raise Sinatra::NotFound
|
7
|
+
end
|
8
|
+
|
9
|
+
def current_work_unit
|
10
|
+
@work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# A chunk of work that will be farmed out into many WorkUnits to be processed
|
2
|
+
# in parallel by all the active CloudCrowd::Workers. Jobs are defined by a list
|
3
|
+
# of inputs (usually public urls to files), an action (the name of a script that
|
4
|
+
# CloudCrowd knows how to run), and, eventually a corresponding list of output.
|
5
|
+
class Job < ActiveRecord::Base
|
6
|
+
include CloudCrowd::ModelStatus
|
7
|
+
|
8
|
+
has_many :work_units, :dependent => :destroy
|
9
|
+
|
10
|
+
validates_presence_of :status, :inputs, :action, :options
|
11
|
+
|
12
|
+
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
13
|
+
# TODO: Add XML support.
|
14
|
+
def self.create_from_request(h)
|
15
|
+
self.create(
|
16
|
+
:inputs => h['inputs'].to_json,
|
17
|
+
:action => h['action'],
|
18
|
+
:options => (h['options'] || {}).to_json,
|
19
|
+
:owner_email => h['owner_email'],
|
20
|
+
:callback_url => h['callback_url']
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def after_create
|
25
|
+
self.queue_for_workers(JSON.parse(self.inputs))
|
26
|
+
end
|
27
|
+
|
28
|
+
def before_validation_on_create
|
29
|
+
self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
|
30
|
+
end
|
31
|
+
|
32
|
+
# After work units are marked successful, we check to see if all of them have
|
33
|
+
# finished, if so, this job is complete.
|
34
|
+
def check_for_completion
|
35
|
+
return unless all_work_units_complete?
|
36
|
+
transition_to_next_phase
|
37
|
+
output_list = gather_outputs_from_work_units
|
38
|
+
|
39
|
+
if complete?
|
40
|
+
self.outputs = output_list.to_json
|
41
|
+
self.time = Time.now - self.created_at
|
42
|
+
end
|
43
|
+
self.save
|
44
|
+
|
45
|
+
case self.status
|
46
|
+
when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
|
47
|
+
when CloudCrowd::MERGING then queue_for_workers(output_list.to_json)
|
48
|
+
else fire_callback
|
49
|
+
end
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
# Transition from the current phase to the next one.
|
54
|
+
def transition_to_next_phase
|
55
|
+
self.status = any_work_units_failed? ? CloudCrowd::FAILED :
|
56
|
+
self.splitting? ? CloudCrowd::PROCESSING :
|
57
|
+
self.should_merge? ? CloudCrowd::MERGING :
|
58
|
+
CloudCrowd::SUCCEEDED
|
59
|
+
end
|
60
|
+
|
61
|
+
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
62
|
+
def fire_callback
|
63
|
+
begin
|
64
|
+
RestClient.post(callback_url, {:job => self.to_json}) if callback_url
|
65
|
+
rescue RestClient::Exception => e
|
66
|
+
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Cleaning up after a job will remove all of its files from S3.
|
71
|
+
def cleanup
|
72
|
+
CloudCrowd::AssetStore.new.cleanup_job(self)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Have all of the WorkUnits finished? We could trade reads for writes here
|
76
|
+
# by keeping a completed_count on the Job itself.
|
77
|
+
def all_work_units_complete?
|
78
|
+
self.work_units.incomplete.count <= 0
|
79
|
+
end
|
80
|
+
|
81
|
+
# Have any of the WorkUnits failed?
|
82
|
+
def any_work_units_failed?
|
83
|
+
self.work_units.failed.count > 0
|
84
|
+
end
|
85
|
+
|
86
|
+
def splittable?
|
87
|
+
self.action_class.new.respond_to? :split
|
88
|
+
end
|
89
|
+
|
90
|
+
def should_merge?
|
91
|
+
self.processing? && self.action_class.new.respond_to?(:merge)
|
92
|
+
end
|
93
|
+
|
94
|
+
def action_class
|
95
|
+
CloudCrowd.actions(self.action)
|
96
|
+
end
|
97
|
+
|
98
|
+
def gather_outputs_from_work_units
|
99
|
+
outs = self.work_units.complete.map {|wu| wu.output }
|
100
|
+
self.work_units.complete.destroy_all
|
101
|
+
outs
|
102
|
+
end
|
103
|
+
|
104
|
+
def display_status
|
105
|
+
CloudCrowd.display_status(self.status)
|
106
|
+
end
|
107
|
+
|
108
|
+
def work_units_remaining
|
109
|
+
self.work_units.incomplete.count
|
110
|
+
end
|
111
|
+
|
112
|
+
# A JSON representation of this job includes the statuses of its component
|
113
|
+
# WorkUnits, as well as any completed outputs.
|
114
|
+
def to_json(opts={})
|
115
|
+
atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
|
116
|
+
atts.merge!({'output' => JSON.parse(self.outputs)}) if self.outputs
|
117
|
+
atts.merge!({'time' => self.time}) if self.time
|
118
|
+
atts.to_json
|
119
|
+
end
|
120
|
+
|
121
|
+
# When starting a new job, or moving to a new stage, split up the inputs
|
122
|
+
# into WorkUnits, and queue them.
|
123
|
+
def queue_for_workers(input)
|
124
|
+
[input].flatten.each do |wu_input|
|
125
|
+
WorkUnit.create(:job => self, :input => wu_input, :status => self.status)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# A WorkUnit is an atomic chunk of work from a job, processing a single input
|
2
|
+
# through a single action. All WorkUnits receive the same options.
|
3
|
+
class WorkUnit < ActiveRecord::Base
|
4
|
+
include CloudCrowd::ModelStatus
|
5
|
+
|
6
|
+
belongs_to :job
|
7
|
+
|
8
|
+
validates_presence_of :job_id, :status, :input
|
9
|
+
|
10
|
+
after_save :check_for_job_completion
|
11
|
+
|
12
|
+
# After saving a WorkUnit, it's Job should check if it just become complete.
|
13
|
+
def check_for_job_completion
|
14
|
+
self.job.check_for_completion if complete?
|
15
|
+
end
|
16
|
+
|
17
|
+
# Mark this unit as having finished successfully.
|
18
|
+
def finish(output, time_taken)
|
19
|
+
update_attributes({
|
20
|
+
:status => CloudCrowd::SUCCEEDED,
|
21
|
+
:taken => false,
|
22
|
+
:attempts => self.attempts + 1,
|
23
|
+
:output => output,
|
24
|
+
:time => time_taken
|
25
|
+
})
|
26
|
+
end
|
27
|
+
|
28
|
+
# Mark this unit as having failed. May attempt a retry.
|
29
|
+
def fail(output, time_taken)
|
30
|
+
tries = self.attempts + 1
|
31
|
+
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
32
|
+
update_attributes({
|
33
|
+
:status => CloudCrowd::FAILED,
|
34
|
+
:taken => false,
|
35
|
+
:attempts => tries,
|
36
|
+
:output => output,
|
37
|
+
:time => time_taken
|
38
|
+
})
|
39
|
+
end
|
40
|
+
|
41
|
+
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
42
|
+
def try_again
|
43
|
+
update_attributes({
|
44
|
+
:taken => false,
|
45
|
+
:attempts => self.attempts + 1
|
46
|
+
})
|
47
|
+
end
|
48
|
+
|
49
|
+
# The JSON representation of a WorkUnit contains common elements of its job.
|
50
|
+
def to_json
|
51
|
+
{
|
52
|
+
'id' => self.id,
|
53
|
+
'job_id' => self.job_id,
|
54
|
+
'input' => self.input,
|
55
|
+
'attempts' => self.attempts,
|
56
|
+
'action' => self.job.action,
|
57
|
+
'options' => JSON.parse(self.job.options),
|
58
|
+
'status' => self.status
|
59
|
+
}.to_json
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
module ModelStatus
|
3
|
+
|
4
|
+
def self.included(klass)
|
5
|
+
|
6
|
+
klass.class_eval do
|
7
|
+
# Note that COMPLETE and INCOMPLETE are unions of other states.
|
8
|
+
named_scope 'processing', :conditions => {:status => CloudCrowd::PROCESSING}
|
9
|
+
named_scope 'succeeded', :conditions => {:status => CloudCrowd::SUCCEEDED}
|
10
|
+
named_scope 'failed', :conditions => {:status => CloudCrowd::FAILED}
|
11
|
+
named_scope 'splitting', :conditions => {:status => CloudCrowd::SPLITTING}
|
12
|
+
named_scope 'merging', :conditions => {:status => CloudCrowd::MERGING}
|
13
|
+
named_scope 'complete', :conditions => {:status => CloudCrowd::COMPLETE}
|
14
|
+
named_scope 'incomplete', :conditions => {:status => CloudCrowd::INCOMPLETE}
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
def processing?; self.status == CloudCrowd::PROCESSING; end
|
20
|
+
def succeeded?; self.status == CloudCrowd::SUCCEEDED; end
|
21
|
+
def failed?; self.status == CloudCrowd::FAILED; end
|
22
|
+
def splitting?; self.status == CloudCrowd::SPLITTING; end
|
23
|
+
def merging?; self.status == CloudCrowd::MERGING; end
|
24
|
+
def complete?; CloudCrowd::COMPLETE.include?(self.status); end
|
25
|
+
def incomplete?; CloudCrowd::INCOMPLETE.include?(self.status); end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
require 'cloud_crowd/models/job'
|
31
|
+
require 'cloud_crowd/models/work_unit'
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# This is the script that kicks off a single CloudCrowd::Daemon. Because the
|
2
|
+
# daemons don't load the entire rails stack, this file functions like a mini
|
3
|
+
# environment.rb, loading all the common gems that we need.
|
4
|
+
|
5
|
+
# CloudCrowd::App.root = File.expand_path(File.dirname(__FILE__) + '/../..') unless defined?(CloudCrowd::App.root)
|
6
|
+
|
7
|
+
# Standard Lib and Gems
|
8
|
+
require 'fileutils'
|
9
|
+
require 'rubygems'
|
10
|
+
require 'daemons'
|
11
|
+
require 'socket'
|
12
|
+
require 'yaml'
|
13
|
+
require 'json'
|
14
|
+
require 'rest_client'
|
15
|
+
require 'right_aws'
|
16
|
+
|
17
|
+
FileUtils.mkdir('log') unless File.exists?('log')
|
18
|
+
|
19
|
+
# Daemon/Worker Dependencies.
|
20
|
+
require "#{File.dirname(__FILE__)}/../cloud-crowd"
|
21
|
+
|
22
|
+
Daemons.run("#{CloudCrowd::App.root}/lib/cloud_crowd/daemon.rb", {
|
23
|
+
:app_name => "cloud_crowd_worker",
|
24
|
+
:dir_mode => :normal,
|
25
|
+
:dir => 'log',
|
26
|
+
:multiple => true,
|
27
|
+
:backtrace => true,
|
28
|
+
:log_output => true
|
29
|
+
})
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Complete schema for CloudCrowd.
|
2
|
+
ActiveRecord::Schema.define(:version => 1) do
|
3
|
+
|
4
|
+
create_table "jobs", :force => true do |t|
|
5
|
+
t.integer "status", :null => false
|
6
|
+
t.text "inputs", :null => false
|
7
|
+
t.string "action", :null => false
|
8
|
+
t.text "options", :null => false
|
9
|
+
t.text "outputs"
|
10
|
+
t.float "time"
|
11
|
+
t.string "callback_url"
|
12
|
+
t.string "owner_email"
|
13
|
+
t.integer "lock_version", :default => 0, :null => false
|
14
|
+
t.datetime "created_at"
|
15
|
+
t.datetime "updated_at"
|
16
|
+
end
|
17
|
+
|
18
|
+
create_table "work_units", :force => true do |t|
|
19
|
+
t.integer "status", :null => false
|
20
|
+
t.integer "job_id", :null => false
|
21
|
+
t.text "input", :null => false
|
22
|
+
t.integer "attempts", :default => 0, :null => false
|
23
|
+
t.integer "lock_version", :default => 0, :null => false
|
24
|
+
t.boolean "taken", :default => false, :null => false
|
25
|
+
t.float "time"
|
26
|
+
t.text "output"
|
27
|
+
t.datetime "created_at"
|
28
|
+
t.datetime "updated_at"
|
29
|
+
end
|
30
|
+
|
31
|
+
add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
|
32
|
+
add_index "work_units", ["status", "taken"], :name => "index_work_units_on_status_and_taken"
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
class Worker
|
4
|
+
|
5
|
+
CENTRAL_URL = CloudCrowd.config[:central_server]
|
6
|
+
RETRY_WAIT = CloudCrowd.config[:worker_retry_wait]
|
7
|
+
|
8
|
+
attr_reader :action
|
9
|
+
|
10
|
+
# Spinning up a worker will create a new AssetStore with a persistent
|
11
|
+
# connection to S3. This AssetStore gets passed into each action, for use
|
12
|
+
# as it is run.
|
13
|
+
def initialize
|
14
|
+
@id = $$
|
15
|
+
@hostname = Socket.gethostname
|
16
|
+
@store = CloudCrowd::AssetStore.new
|
17
|
+
end
|
18
|
+
|
19
|
+
# Ask the central server for a new WorkUnit.
|
20
|
+
def fetch_work_unit
|
21
|
+
keep_trying_to "fetch a new work unit" do
|
22
|
+
unit_json = RestClient.get("#{CENTRAL_URL}/work")
|
23
|
+
return unless unit_json # No content means no work for us.
|
24
|
+
@start_time = Time.now
|
25
|
+
parse_work_unit unit_json
|
26
|
+
log "fetched work unit for #{@action_name}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return output to the central server, marking the current work unit as done.
|
31
|
+
def complete_work_unit(result)
|
32
|
+
keep_trying_to "complete work unit" do
|
33
|
+
data = completion_params.merge({:status => 'succeeded', :output => result})
|
34
|
+
RestClient.put("#{CENTRAL_URL}/work/#{data[:id]}", data)
|
35
|
+
log "finished #{@action_name} in #{data[:time]} seconds"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Mark the current work unit as failed, returning the exception to central.
|
40
|
+
def fail_work_unit(exception)
|
41
|
+
keep_trying_to "mark work unit as failed" do
|
42
|
+
data = completion_params.merge({:status => 'failed', :output => exception.message})
|
43
|
+
RestClient.put("#{CENTRAL_URL}/work/#{data[:id]}", data)
|
44
|
+
log "failed #{@action_name} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def keep_trying_to(title)
|
49
|
+
begin
|
50
|
+
yield
|
51
|
+
rescue Exception => e
|
52
|
+
log "failed to #{title} -- retry in #{RETRY_WAIT} seconds"
|
53
|
+
log e.message
|
54
|
+
log e.backtrace
|
55
|
+
sleep RETRY_WAIT
|
56
|
+
retry
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Does this Worker have a job to do?
|
61
|
+
def has_work?
|
62
|
+
@action_name && @input && @options
|
63
|
+
end
|
64
|
+
|
65
|
+
# Executes the current work unit, catching all exceptions as failures.
|
66
|
+
def run
|
67
|
+
begin
|
68
|
+
@action = CloudCrowd.actions(@action_name).new
|
69
|
+
@action.configure(@status, @input, @options, @store)
|
70
|
+
result = case @status
|
71
|
+
when CloudCrowd::PROCESSING then @action.process
|
72
|
+
when CloudCrowd::SPLITTING then @action.split
|
73
|
+
when CloudCrowd::MERGING then @action.merge
|
74
|
+
else raise "Work units must specify their status."
|
75
|
+
end
|
76
|
+
complete_work_unit(result)
|
77
|
+
rescue Exception => e
|
78
|
+
fail_work_unit(e)
|
79
|
+
ensure
|
80
|
+
clear_work_unit
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
# Common parameters to send back to central, regardless of success or failure.
|
88
|
+
def completion_params
|
89
|
+
{:id => @options['work_unit_id'], :time => Time.now - @start_time}
|
90
|
+
end
|
91
|
+
|
92
|
+
# Extract our instance variables from a WorkUnit's JSON.
|
93
|
+
def parse_work_unit(unit_json)
|
94
|
+
unit = JSON.parse(unit_json)
|
95
|
+
@action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
|
96
|
+
@options['job_id'] = unit['job_id']
|
97
|
+
@options['work_unit_id'] = unit['id']
|
98
|
+
@options['attempts'] ||= unit['attempts']
|
99
|
+
end
|
100
|
+
|
101
|
+
# Log a message to the daemon log. Includes PID for identification.
|
102
|
+
def log(message)
|
103
|
+
puts "Worker ##{@id}: #{message}"
|
104
|
+
end
|
105
|
+
|
106
|
+
# When we're done with a unit, clear out our ivars to make way for the next.
|
107
|
+
# Also, remove all of the previous unit's temporary storage.
|
108
|
+
def clear_work_unit
|
109
|
+
@action.cleanup_work_directory
|
110
|
+
@action, @action_name, @input, @options, @start_time = nil, nil, nil, nil, nil
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
# A Worker Daemon needs to be running to perform this integration test.
|
4
|
+
class FailingWorkUnitsTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
should "retry work units when they fail" do
|
7
|
+
browser = Rack::Test::Session.new(Rack::MockSession.new(CloudCrowd::App))
|
8
|
+
|
9
|
+
browser.post '/jobs', :json => {
|
10
|
+
'action' => 'failure_testing',
|
11
|
+
'inputs' => ['one', 'two', 'three'],
|
12
|
+
'options' => {}
|
13
|
+
}.to_json
|
14
|
+
assert browser.last_response.ok?
|
15
|
+
|
16
|
+
job = Job.last
|
17
|
+
(CloudCrowd.config[:work_unit_retries] - 1).times do
|
18
|
+
job.work_units.each {|unit| unit.fail('failed', 10) }
|
19
|
+
end
|
20
|
+
assert job.reload.work_units_remaining == 3
|
21
|
+
job.work_units.reload.each_with_index do |unit, i|
|
22
|
+
assert unit.processing?
|
23
|
+
assert unit.attempts == CloudCrowd.config[:work_unit_retries] - 1
|
24
|
+
unit.fail('failed', 10)
|
25
|
+
assert unit.job.any_work_units_failed? if i == 0
|
26
|
+
end
|
27
|
+
assert job.reload.failed?
|
28
|
+
assert job.work_units.count == 0
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
data/test/blueprints.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Sham.url { Faker::Internet.domain_name + "/" + Faker::Internet.domain_word + ".jpg" }
|
2
|
+
|
3
|
+
Job.blueprint do
|
4
|
+
status { CloudCrowd::PROCESSING }
|
5
|
+
inputs { ['http://www.google.com/intl/en_ALL/images/logo.gif'].to_json }
|
6
|
+
action { 'graphics_magick' }
|
7
|
+
options { {}.to_json }
|
8
|
+
end
|
9
|
+
|
10
|
+
WorkUnit.blueprint do
|
11
|
+
job { Job.make }
|
12
|
+
status { CloudCrowd::PROCESSING }
|
13
|
+
taken { false }
|
14
|
+
input { Sham.url }
|
15
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
here = File.dirname(__FILE__)
|
4
|
+
require File.expand_path(here + "/../lib/cloud-crowd")
|
5
|
+
CloudCrowd.configure(here + '/config/test_config.yml')
|
6
|
+
CloudCrowd.configure_database(here + '/config/test_database.yml')
|
7
|
+
|
8
|
+
require 'faker'
|
9
|
+
require 'sham'
|
10
|
+
require 'rack/test'
|
11
|
+
require 'shoulda/active_record'
|
12
|
+
require 'machinist/active_record'
|
13
|
+
require 'mocha'
|
14
|
+
require "#{CloudCrowd::App.root}/test/blueprints.rb"
|
15
|
+
|
16
|
+
class Test::Unit::TestCase
|
17
|
+
include CloudCrowd
|
18
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class JobTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "A CloudCrowd Job" do
|
6
|
+
|
7
|
+
setup do
|
8
|
+
@job = Job.make
|
9
|
+
@unit = @job.work_units.first
|
10
|
+
end
|
11
|
+
|
12
|
+
subject { @job }
|
13
|
+
|
14
|
+
should_have_many :work_units
|
15
|
+
|
16
|
+
should_validate_presence_of :status, :inputs, :action, :options
|
17
|
+
|
18
|
+
should "create all of its work units as soon as the job is created" do
|
19
|
+
assert @job.work_units.count >= 1
|
20
|
+
assert @job.work_units_remaining == 1
|
21
|
+
assert @job.processing?
|
22
|
+
assert @unit.processing?
|
23
|
+
assert !@job.all_work_units_complete?
|
24
|
+
end
|
25
|
+
|
26
|
+
should "know its completion status" do
|
27
|
+
assert !@job.all_work_units_complete?
|
28
|
+
@unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => 'hello')
|
29
|
+
assert @job.reload.all_work_units_complete?
|
30
|
+
assert @job.work_units_remaining == 0
|
31
|
+
assert @job.outputs == "[\"hello\"]"
|
32
|
+
end
|
33
|
+
|
34
|
+
should "be able to create a job from a JSON request" do
|
35
|
+
job = Job.create_from_request(JSON.parse(<<-EOS
|
36
|
+
{ "inputs" : ["one", "two", "three"],
|
37
|
+
"action" : "graphics_magick",
|
38
|
+
"owner_email" : "bob@example.com",
|
39
|
+
"callback_url" : "http://example.com/callback" }
|
40
|
+
EOS
|
41
|
+
))
|
42
|
+
assert job.work_units.count == 3
|
43
|
+
assert job.action == 'graphics_magick'
|
44
|
+
assert job.action_class == GraphicsMagick
|
45
|
+
assert job.callback_url == "http://example.com/callback"
|
46
|
+
end
|
47
|
+
|
48
|
+
should "create jobs with a SPLITTING status for actions that have a split method defined" do
|
49
|
+
job = Job.create_from_request({'inputs' => ['1'], 'action' => 'pdf_to_images'})
|
50
|
+
assert job.splittable?
|
51
|
+
assert job.splitting?
|
52
|
+
end
|
53
|
+
|
54
|
+
should "fire a callback when a job has finished, successfully or not" do
|
55
|
+
Job.any_instance.expects(:fire_callback)
|
56
|
+
@job.work_units.first.finish('output', 10)
|
57
|
+
assert @job.all_work_units_complete?
|
58
|
+
end
|
59
|
+
|
60
|
+
should "have a 'pretty' display of the Job's status" do
|
61
|
+
assert @job.display_status == 'processing'
|
62
|
+
@job.update_attribute(:status, CloudCrowd::FAILED)
|
63
|
+
assert @job.display_status == 'failed'
|
64
|
+
@job.update_attribute(:status, CloudCrowd::MERGING)
|
65
|
+
assert @job.display_status == 'merging'
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|