documentcloud-cloud-crowd 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/actions/graphics_magick.rb +44 -0
- data/bin/crowd +5 -0
- data/cloud-crowd.gemspec +71 -0
- data/config/config.example.ru +17 -0
- data/config/config.example.yml +11 -0
- data/config/database.example.yml +6 -0
- data/lib/cloud-crowd.rb +96 -0
- data/lib/cloud_crowd/action.rb +88 -0
- data/lib/cloud_crowd/app.rb +54 -0
- data/lib/cloud_crowd/asset_store.rb +58 -0
- data/lib/cloud_crowd/command_line.rb +198 -0
- data/lib/cloud_crowd/core_ext.rb +10 -0
- data/lib/cloud_crowd/daemon.rb +48 -0
- data/lib/cloud_crowd/helpers/resources.rb +15 -0
- data/lib/cloud_crowd/helpers/urls.rb +7 -0
- data/lib/cloud_crowd/helpers.rb +8 -0
- data/lib/cloud_crowd/models/job.rb +129 -0
- data/lib/cloud_crowd/models/work_unit.rb +62 -0
- data/lib/cloud_crowd/models.rb +31 -0
- data/lib/cloud_crowd/runner.rb +29 -0
- data/lib/cloud_crowd/schema.rb +34 -0
- data/lib/cloud_crowd/worker.rb +115 -0
- data/test/acceptance/test_failing_work_units.rb +32 -0
- data/test/blueprints.rb +15 -0
- data/test/config/test_config.yml +10 -0
- data/test/config/test_database.yml +6 -0
- data/test/test_helper.rb +18 -0
- data/test/unit/test_job.rb +70 -0
- data/test/unit/test_work_unit.rb +55 -0
- metadata +190 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
CloudCrowd.configure(ENV['CLOUD_CROWD_CONFIG'])
|
2
|
+
|
3
|
+
require 'cloud_crowd/worker'
|
4
|
+
|
5
|
+
module CloudCrowd
|
6
|
+
|
7
|
+
# A CloudCrowd::Daemon, started by the Daemons gem, runs a CloudCrowd::Worker in
|
8
|
+
# a loop, continually fetching and processing WorkUnits from the central
|
9
|
+
# server. The Daemon backs off and pings central less frequently when there
|
10
|
+
# isn't any work to be done, and speeds back up when there is.
|
11
|
+
class Daemon
|
12
|
+
|
13
|
+
DEFAULT_WAIT = CloudCrowd.config[:default_worker_wait]
|
14
|
+
MAX_WAIT = CloudCrowd.config[:max_worker_wait]
|
15
|
+
WAIT_MULTIPLIER = CloudCrowd.config[:worker_wait_multiplier]
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@wait_time = DEFAULT_WAIT
|
19
|
+
@worker = CloudCrowd::Worker.new
|
20
|
+
Signal.trap('INT', 'EXIT')
|
21
|
+
Signal.trap('KILL', 'EXIT')
|
22
|
+
Signal.trap('TERM', 'EXIT')
|
23
|
+
end
|
24
|
+
|
25
|
+
# Loop forever, fetching WorkUnits.
|
26
|
+
# TODO: Workers busy with their work units won't die until the unit has
|
27
|
+
# been finished. This should probably be wrapped in an appropriately lengthy
|
28
|
+
# timeout, or should be killable from the outside by terminating the thread.
|
29
|
+
# In either case, nasty un-cleaned-up bits might be left behind.
|
30
|
+
def run
|
31
|
+
loop do
|
32
|
+
@worker.fetch_work_unit
|
33
|
+
if @worker.has_work?
|
34
|
+
@worker.run
|
35
|
+
@wait_time = DEFAULT_WAIT
|
36
|
+
sleep 0.01 # So as to listen for incoming signals.
|
37
|
+
else
|
38
|
+
@wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
|
39
|
+
sleep @wait_time
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
CloudCrowd::Daemon.new.run
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
module Helpers
|
3
|
+
module Resources
|
4
|
+
|
5
|
+
def current_job
|
6
|
+
@job ||= Job.find_by_id(params[:job_id]) or raise Sinatra::NotFound
|
7
|
+
end
|
8
|
+
|
9
|
+
def current_work_unit
|
10
|
+
@work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# A chunk of work that will be farmed out into many WorkUnits to be processed
|
2
|
+
# in parallel by all the active CloudCrowd::Workers. Jobs are defined by a list
|
3
|
+
# of inputs (usually public urls to files), an action (the name of a script that
|
4
|
+
# CloudCrowd knows how to run), and, eventually a corresponding list of output.
|
5
|
+
class Job < ActiveRecord::Base
|
6
|
+
include CloudCrowd::ModelStatus
|
7
|
+
|
8
|
+
has_many :work_units, :dependent => :destroy
|
9
|
+
|
10
|
+
validates_presence_of :status, :inputs, :action, :options
|
11
|
+
|
12
|
+
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
13
|
+
# TODO: Add XML support.
|
14
|
+
def self.create_from_request(h)
|
15
|
+
self.create(
|
16
|
+
:inputs => h['inputs'].to_json,
|
17
|
+
:action => h['action'],
|
18
|
+
:options => (h['options'] || {}).to_json,
|
19
|
+
:owner_email => h['owner_email'],
|
20
|
+
:callback_url => h['callback_url']
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def after_create
|
25
|
+
self.queue_for_workers(JSON.parse(self.inputs))
|
26
|
+
end
|
27
|
+
|
28
|
+
def before_validation_on_create
|
29
|
+
self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
|
30
|
+
end
|
31
|
+
|
32
|
+
# After work units are marked successful, we check to see if all of them have
|
33
|
+
# finished, if so, this job is complete.
|
34
|
+
def check_for_completion
|
35
|
+
return unless all_work_units_complete?
|
36
|
+
transition_to_next_phase
|
37
|
+
output_list = gather_outputs_from_work_units
|
38
|
+
|
39
|
+
if complete?
|
40
|
+
self.outputs = output_list.to_json
|
41
|
+
self.time = Time.now - self.created_at
|
42
|
+
end
|
43
|
+
self.save
|
44
|
+
|
45
|
+
case self.status
|
46
|
+
when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
|
47
|
+
when CloudCrowd::MERGING then queue_for_workers(output_list.to_json)
|
48
|
+
else fire_callback
|
49
|
+
end
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
# Transition from the current phase to the next one.
|
54
|
+
def transition_to_next_phase
|
55
|
+
self.status = any_work_units_failed? ? CloudCrowd::FAILED :
|
56
|
+
self.splitting? ? CloudCrowd::PROCESSING :
|
57
|
+
self.should_merge? ? CloudCrowd::MERGING :
|
58
|
+
CloudCrowd::SUCCEEDED
|
59
|
+
end
|
60
|
+
|
61
|
+
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
62
|
+
def fire_callback
|
63
|
+
begin
|
64
|
+
RestClient.post(callback_url, {:job => self.to_json}) if callback_url
|
65
|
+
rescue RestClient::Exception => e
|
66
|
+
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Cleaning up after a job will remove all of its files from S3.
|
71
|
+
def cleanup
|
72
|
+
CloudCrowd::AssetStore.new.cleanup_job(self)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Have all of the WorkUnits finished? We could trade reads for writes here
|
76
|
+
# by keeping a completed_count on the Job itself.
|
77
|
+
def all_work_units_complete?
|
78
|
+
self.work_units.incomplete.count <= 0
|
79
|
+
end
|
80
|
+
|
81
|
+
# Have any of the WorkUnits failed?
|
82
|
+
def any_work_units_failed?
|
83
|
+
self.work_units.failed.count > 0
|
84
|
+
end
|
85
|
+
|
86
|
+
def splittable?
|
87
|
+
self.action_class.new.respond_to? :split
|
88
|
+
end
|
89
|
+
|
90
|
+
def should_merge?
|
91
|
+
self.processing? && self.action_class.new.respond_to?(:merge)
|
92
|
+
end
|
93
|
+
|
94
|
+
def action_class
|
95
|
+
CloudCrowd.actions(self.action)
|
96
|
+
end
|
97
|
+
|
98
|
+
def gather_outputs_from_work_units
|
99
|
+
outs = self.work_units.complete.map {|wu| wu.output }
|
100
|
+
self.work_units.complete.destroy_all
|
101
|
+
outs
|
102
|
+
end
|
103
|
+
|
104
|
+
def display_status
|
105
|
+
CloudCrowd.display_status(self.status)
|
106
|
+
end
|
107
|
+
|
108
|
+
def work_units_remaining
|
109
|
+
self.work_units.incomplete.count
|
110
|
+
end
|
111
|
+
|
112
|
+
# A JSON representation of this job includes the statuses of its component
|
113
|
+
# WorkUnits, as well as any completed outputs.
|
114
|
+
def to_json(opts={})
|
115
|
+
atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
|
116
|
+
atts.merge!({'output' => JSON.parse(self.outputs)}) if self.outputs
|
117
|
+
atts.merge!({'time' => self.time}) if self.time
|
118
|
+
atts.to_json
|
119
|
+
end
|
120
|
+
|
121
|
+
# When starting a new job, or moving to a new stage, split up the inputs
|
122
|
+
# into WorkUnits, and queue them.
|
123
|
+
def queue_for_workers(input)
|
124
|
+
[input].flatten.each do |wu_input|
|
125
|
+
WorkUnit.create(:job => self, :input => wu_input, :status => self.status)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# A WorkUnit is an atomic chunk of work from a job, processing a single input
|
2
|
+
# through a single action. All WorkUnits receive the same options.
|
3
|
+
class WorkUnit < ActiveRecord::Base
|
4
|
+
include CloudCrowd::ModelStatus
|
5
|
+
|
6
|
+
belongs_to :job
|
7
|
+
|
8
|
+
validates_presence_of :job_id, :status, :input
|
9
|
+
|
10
|
+
after_save :check_for_job_completion
|
11
|
+
|
12
|
+
# After saving a WorkUnit, it's Job should check if it just become complete.
|
13
|
+
def check_for_job_completion
|
14
|
+
self.job.check_for_completion if complete?
|
15
|
+
end
|
16
|
+
|
17
|
+
# Mark this unit as having finished successfully.
|
18
|
+
def finish(output, time_taken)
|
19
|
+
update_attributes({
|
20
|
+
:status => CloudCrowd::SUCCEEDED,
|
21
|
+
:taken => false,
|
22
|
+
:attempts => self.attempts + 1,
|
23
|
+
:output => output,
|
24
|
+
:time => time_taken
|
25
|
+
})
|
26
|
+
end
|
27
|
+
|
28
|
+
# Mark this unit as having failed. May attempt a retry.
|
29
|
+
def fail(output, time_taken)
|
30
|
+
tries = self.attempts + 1
|
31
|
+
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
32
|
+
update_attributes({
|
33
|
+
:status => CloudCrowd::FAILED,
|
34
|
+
:taken => false,
|
35
|
+
:attempts => tries,
|
36
|
+
:output => output,
|
37
|
+
:time => time_taken
|
38
|
+
})
|
39
|
+
end
|
40
|
+
|
41
|
+
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
42
|
+
def try_again
|
43
|
+
update_attributes({
|
44
|
+
:taken => false,
|
45
|
+
:attempts => self.attempts + 1
|
46
|
+
})
|
47
|
+
end
|
48
|
+
|
49
|
+
# The JSON representation of a WorkUnit contains common elements of its job.
|
50
|
+
def to_json
|
51
|
+
{
|
52
|
+
'id' => self.id,
|
53
|
+
'job_id' => self.job_id,
|
54
|
+
'input' => self.input,
|
55
|
+
'attempts' => self.attempts,
|
56
|
+
'action' => self.job.action,
|
57
|
+
'options' => JSON.parse(self.job.options),
|
58
|
+
'status' => self.status
|
59
|
+
}.to_json
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
module ModelStatus
|
3
|
+
|
4
|
+
def self.included(klass)
|
5
|
+
|
6
|
+
klass.class_eval do
|
7
|
+
# Note that COMPLETE and INCOMPLETE are unions of other states.
|
8
|
+
named_scope 'processing', :conditions => {:status => CloudCrowd::PROCESSING}
|
9
|
+
named_scope 'succeeded', :conditions => {:status => CloudCrowd::SUCCEEDED}
|
10
|
+
named_scope 'failed', :conditions => {:status => CloudCrowd::FAILED}
|
11
|
+
named_scope 'splitting', :conditions => {:status => CloudCrowd::SPLITTING}
|
12
|
+
named_scope 'merging', :conditions => {:status => CloudCrowd::MERGING}
|
13
|
+
named_scope 'complete', :conditions => {:status => CloudCrowd::COMPLETE}
|
14
|
+
named_scope 'incomplete', :conditions => {:status => CloudCrowd::INCOMPLETE}
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
def processing?; self.status == CloudCrowd::PROCESSING; end
|
20
|
+
def succeeded?; self.status == CloudCrowd::SUCCEEDED; end
|
21
|
+
def failed?; self.status == CloudCrowd::FAILED; end
|
22
|
+
def splitting?; self.status == CloudCrowd::SPLITTING; end
|
23
|
+
def merging?; self.status == CloudCrowd::MERGING; end
|
24
|
+
def complete?; CloudCrowd::COMPLETE.include?(self.status); end
|
25
|
+
def incomplete?; CloudCrowd::INCOMPLETE.include?(self.status); end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
require 'cloud_crowd/models/job'
|
31
|
+
require 'cloud_crowd/models/work_unit'
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# This is the script that kicks off a single CloudCrowd::Daemon. Because the
|
2
|
+
# daemons don't load the entire rails stack, this file functions like a mini
|
3
|
+
# environment.rb, loading all the common gems that we need.
|
4
|
+
|
5
|
+
# CloudCrowd::App.root = File.expand_path(File.dirname(__FILE__) + '/../..') unless defined?(CloudCrowd::App.root)
|
6
|
+
|
7
|
+
# Standard Lib and Gems
|
8
|
+
require 'fileutils'
|
9
|
+
require 'rubygems'
|
10
|
+
require 'daemons'
|
11
|
+
require 'socket'
|
12
|
+
require 'yaml'
|
13
|
+
require 'json'
|
14
|
+
require 'rest_client'
|
15
|
+
require 'right_aws'
|
16
|
+
|
17
|
+
FileUtils.mkdir('log') unless File.exists?('log')
|
18
|
+
|
19
|
+
# Daemon/Worker Dependencies.
|
20
|
+
require "#{File.dirname(__FILE__)}/../cloud-crowd"
|
21
|
+
|
22
|
+
Daemons.run("#{CloudCrowd::App.root}/lib/cloud_crowd/daemon.rb", {
|
23
|
+
:app_name => "cloud_crowd_worker",
|
24
|
+
:dir_mode => :normal,
|
25
|
+
:dir => 'log',
|
26
|
+
:multiple => true,
|
27
|
+
:backtrace => true,
|
28
|
+
:log_output => true
|
29
|
+
})
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Complete schema for CloudCrowd.
|
2
|
+
ActiveRecord::Schema.define(:version => 1) do
|
3
|
+
|
4
|
+
create_table "jobs", :force => true do |t|
|
5
|
+
t.integer "status", :null => false
|
6
|
+
t.text "inputs", :null => false
|
7
|
+
t.string "action", :null => false
|
8
|
+
t.text "options", :null => false
|
9
|
+
t.text "outputs"
|
10
|
+
t.float "time"
|
11
|
+
t.string "callback_url"
|
12
|
+
t.string "owner_email"
|
13
|
+
t.integer "lock_version", :default => 0, :null => false
|
14
|
+
t.datetime "created_at"
|
15
|
+
t.datetime "updated_at"
|
16
|
+
end
|
17
|
+
|
18
|
+
create_table "work_units", :force => true do |t|
|
19
|
+
t.integer "status", :null => false
|
20
|
+
t.integer "job_id", :null => false
|
21
|
+
t.text "input", :null => false
|
22
|
+
t.integer "attempts", :default => 0, :null => false
|
23
|
+
t.integer "lock_version", :default => 0, :null => false
|
24
|
+
t.boolean "taken", :default => false, :null => false
|
25
|
+
t.float "time"
|
26
|
+
t.text "output"
|
27
|
+
t.datetime "created_at"
|
28
|
+
t.datetime "updated_at"
|
29
|
+
end
|
30
|
+
|
31
|
+
add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
|
32
|
+
add_index "work_units", ["status", "taken"], :name => "index_work_units_on_status_and_taken"
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
class Worker
|
4
|
+
|
5
|
+
CENTRAL_URL = CloudCrowd.config[:central_server]
|
6
|
+
RETRY_WAIT = CloudCrowd.config[:worker_retry_wait]
|
7
|
+
|
8
|
+
attr_reader :action
|
9
|
+
|
10
|
+
# Spinning up a worker will create a new AssetStore with a persistent
|
11
|
+
# connection to S3. This AssetStore gets passed into each action, for use
|
12
|
+
# as it is run.
|
13
|
+
def initialize
|
14
|
+
@id = $$
|
15
|
+
@hostname = Socket.gethostname
|
16
|
+
@store = CloudCrowd::AssetStore.new
|
17
|
+
end
|
18
|
+
|
19
|
+
# Ask the central server for a new WorkUnit.
|
20
|
+
def fetch_work_unit
|
21
|
+
keep_trying_to "fetch a new work unit" do
|
22
|
+
unit_json = RestClient.get("#{CENTRAL_URL}/work")
|
23
|
+
return unless unit_json # No content means no work for us.
|
24
|
+
@start_time = Time.now
|
25
|
+
parse_work_unit unit_json
|
26
|
+
log "fetched work unit for #{@action_name}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return output to the central server, marking the current work unit as done.
|
31
|
+
def complete_work_unit(result)
|
32
|
+
keep_trying_to "complete work unit" do
|
33
|
+
data = completion_params.merge({:status => 'succeeded', :output => result})
|
34
|
+
RestClient.put("#{CENTRAL_URL}/work/#{data[:id]}", data)
|
35
|
+
log "finished #{@action_name} in #{data[:time]} seconds"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Mark the current work unit as failed, returning the exception to central.
|
40
|
+
def fail_work_unit(exception)
|
41
|
+
keep_trying_to "mark work unit as failed" do
|
42
|
+
data = completion_params.merge({:status => 'failed', :output => exception.message})
|
43
|
+
RestClient.put("#{CENTRAL_URL}/work/#{data[:id]}", data)
|
44
|
+
log "failed #{@action_name} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def keep_trying_to(title)
|
49
|
+
begin
|
50
|
+
yield
|
51
|
+
rescue Exception => e
|
52
|
+
log "failed to #{title} -- retry in #{RETRY_WAIT} seconds"
|
53
|
+
log e.message
|
54
|
+
log e.backtrace
|
55
|
+
sleep RETRY_WAIT
|
56
|
+
retry
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Does this Worker have a job to do?
|
61
|
+
def has_work?
|
62
|
+
@action_name && @input && @options
|
63
|
+
end
|
64
|
+
|
65
|
+
# Executes the current work unit, catching all exceptions as failures.
|
66
|
+
def run
|
67
|
+
begin
|
68
|
+
@action = CloudCrowd.actions(@action_name).new
|
69
|
+
@action.configure(@status, @input, @options, @store)
|
70
|
+
result = case @status
|
71
|
+
when CloudCrowd::PROCESSING then @action.process
|
72
|
+
when CloudCrowd::SPLITTING then @action.split
|
73
|
+
when CloudCrowd::MERGING then @action.merge
|
74
|
+
else raise "Work units must specify their status."
|
75
|
+
end
|
76
|
+
complete_work_unit(result)
|
77
|
+
rescue Exception => e
|
78
|
+
fail_work_unit(e)
|
79
|
+
ensure
|
80
|
+
clear_work_unit
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
# Common parameters to send back to central, regardless of success or failure.
|
88
|
+
def completion_params
|
89
|
+
{:id => @options['work_unit_id'], :time => Time.now - @start_time}
|
90
|
+
end
|
91
|
+
|
92
|
+
# Extract our instance variables from a WorkUnit's JSON.
|
93
|
+
def parse_work_unit(unit_json)
|
94
|
+
unit = JSON.parse(unit_json)
|
95
|
+
@action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
|
96
|
+
@options['job_id'] = unit['job_id']
|
97
|
+
@options['work_unit_id'] = unit['id']
|
98
|
+
@options['attempts'] ||= unit['attempts']
|
99
|
+
end
|
100
|
+
|
101
|
+
# Log a message to the daemon log. Includes PID for identification.
|
102
|
+
def log(message)
|
103
|
+
puts "Worker ##{@id}: #{message}"
|
104
|
+
end
|
105
|
+
|
106
|
+
# When we're done with a unit, clear out our ivars to make way for the next.
|
107
|
+
# Also, remove all of the previous unit's temporary storage.
|
108
|
+
def clear_work_unit
|
109
|
+
@action.cleanup_work_directory
|
110
|
+
@action, @action_name, @input, @options, @start_time = nil, nil, nil, nil, nil
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
# A Worker Daemon needs to be running to perform this integration test.
|
4
|
+
class FailingWorkUnitsTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
should "retry work units when they fail" do
|
7
|
+
browser = Rack::Test::Session.new(Rack::MockSession.new(CloudCrowd::App))
|
8
|
+
|
9
|
+
browser.post '/jobs', :json => {
|
10
|
+
'action' => 'failure_testing',
|
11
|
+
'inputs' => ['one', 'two', 'three'],
|
12
|
+
'options' => {}
|
13
|
+
}.to_json
|
14
|
+
assert browser.last_response.ok?
|
15
|
+
|
16
|
+
job = Job.last
|
17
|
+
(CloudCrowd.config[:work_unit_retries] - 1).times do
|
18
|
+
job.work_units.each {|unit| unit.fail('failed', 10) }
|
19
|
+
end
|
20
|
+
assert job.reload.work_units_remaining == 3
|
21
|
+
job.work_units.reload.each_with_index do |unit, i|
|
22
|
+
assert unit.processing?
|
23
|
+
assert unit.attempts == CloudCrowd.config[:work_unit_retries] - 1
|
24
|
+
unit.fail('failed', 10)
|
25
|
+
assert unit.job.any_work_units_failed? if i == 0
|
26
|
+
end
|
27
|
+
assert job.reload.failed?
|
28
|
+
assert job.work_units.count == 0
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
data/test/blueprints.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Sham.url { Faker::Internet.domain_name + "/" + Faker::Internet.domain_word + ".jpg" }
|
2
|
+
|
3
|
+
Job.blueprint do
|
4
|
+
status { CloudCrowd::PROCESSING }
|
5
|
+
inputs { ['http://www.google.com/intl/en_ALL/images/logo.gif'].to_json }
|
6
|
+
action { 'graphics_magick' }
|
7
|
+
options { {}.to_json }
|
8
|
+
end
|
9
|
+
|
10
|
+
WorkUnit.blueprint do
|
11
|
+
job { Job.make }
|
12
|
+
status { CloudCrowd::PROCESSING }
|
13
|
+
taken { false }
|
14
|
+
input { Sham.url }
|
15
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
here = File.dirname(__FILE__)
|
4
|
+
require File.expand_path(here + "/../lib/cloud-crowd")
|
5
|
+
CloudCrowd.configure(here + '/config/test_config.yml')
|
6
|
+
CloudCrowd.configure_database(here + '/config/test_database.yml')
|
7
|
+
|
8
|
+
require 'faker'
|
9
|
+
require 'sham'
|
10
|
+
require 'rack/test'
|
11
|
+
require 'shoulda/active_record'
|
12
|
+
require 'machinist/active_record'
|
13
|
+
require 'mocha'
|
14
|
+
require "#{CloudCrowd::App.root}/test/blueprints.rb"
|
15
|
+
|
16
|
+
class Test::Unit::TestCase
|
17
|
+
include CloudCrowd
|
18
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class JobTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "A CloudCrowd Job" do
|
6
|
+
|
7
|
+
setup do
|
8
|
+
@job = Job.make
|
9
|
+
@unit = @job.work_units.first
|
10
|
+
end
|
11
|
+
|
12
|
+
subject { @job }
|
13
|
+
|
14
|
+
should_have_many :work_units
|
15
|
+
|
16
|
+
should_validate_presence_of :status, :inputs, :action, :options
|
17
|
+
|
18
|
+
should "create all of its work units as soon as the job is created" do
|
19
|
+
assert @job.work_units.count >= 1
|
20
|
+
assert @job.work_units_remaining == 1
|
21
|
+
assert @job.processing?
|
22
|
+
assert @unit.processing?
|
23
|
+
assert !@job.all_work_units_complete?
|
24
|
+
end
|
25
|
+
|
26
|
+
should "know its completion status" do
|
27
|
+
assert !@job.all_work_units_complete?
|
28
|
+
@unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => 'hello')
|
29
|
+
assert @job.reload.all_work_units_complete?
|
30
|
+
assert @job.work_units_remaining == 0
|
31
|
+
assert @job.outputs == "[\"hello\"]"
|
32
|
+
end
|
33
|
+
|
34
|
+
should "be able to create a job from a JSON request" do
|
35
|
+
job = Job.create_from_request(JSON.parse(<<-EOS
|
36
|
+
{ "inputs" : ["one", "two", "three"],
|
37
|
+
"action" : "graphics_magick",
|
38
|
+
"owner_email" : "bob@example.com",
|
39
|
+
"callback_url" : "http://example.com/callback" }
|
40
|
+
EOS
|
41
|
+
))
|
42
|
+
assert job.work_units.count == 3
|
43
|
+
assert job.action == 'graphics_magick'
|
44
|
+
assert job.action_class == GraphicsMagick
|
45
|
+
assert job.callback_url == "http://example.com/callback"
|
46
|
+
end
|
47
|
+
|
48
|
+
should "create jobs with a SPLITTING status for actions that have a split method defined" do
|
49
|
+
job = Job.create_from_request({'inputs' => ['1'], 'action' => 'pdf_to_images'})
|
50
|
+
assert job.splittable?
|
51
|
+
assert job.splitting?
|
52
|
+
end
|
53
|
+
|
54
|
+
should "fire a callback when a job has finished, successfully or not" do
|
55
|
+
Job.any_instance.expects(:fire_callback)
|
56
|
+
@job.work_units.first.finish('output', 10)
|
57
|
+
assert @job.all_work_units_complete?
|
58
|
+
end
|
59
|
+
|
60
|
+
should "have a 'pretty' display of the Job's status" do
|
61
|
+
assert @job.display_status == 'processing'
|
62
|
+
@job.update_attribute(:status, CloudCrowd::FAILED)
|
63
|
+
assert @job.display_status == 'failed'
|
64
|
+
@job.update_attribute(:status, CloudCrowd::MERGING)
|
65
|
+
assert @job.display_status == 'merging'
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|