documentcloud-cloud-crowd 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/cloud-crowd.gemspec +1 -1
- data/config/config.example.ru +0 -1
- data/config/config.example.yml +41 -11
- data/config/database.example.yml +3 -0
- data/lib/cloud-crowd.rb +31 -5
- data/lib/cloud_crowd/app.rb +16 -21
- data/lib/cloud_crowd/daemon.rb +7 -5
- data/lib/cloud_crowd/helpers/resources.rb +21 -0
- data/lib/cloud_crowd/models/job.rb +123 -120
- data/lib/cloud_crowd/models/work_unit.rb +74 -61
- data/lib/cloud_crowd/models.rb +0 -2
- data/lib/cloud_crowd/runner.rb +4 -16
- data/lib/cloud_crowd/worker.rb +12 -9
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/blueprints.rb +3 -3
- data/test/config/config.yml +1 -1
- data/test/test_helper.rb +0 -2
- data/test/unit/test_job.rb +4 -4
- data/test/unit/test_work_unit.rb +2 -2
- metadata +1 -1
data/cloud-crowd.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.0.
|
3
|
+
s.version = '0.0.3' # Keep version in sync with cloud-cloud.rb
|
4
4
|
s.date = '2009-08-23'
|
5
5
|
|
6
6
|
s.homepage = "http://documentcloud.org" # wiki page on github?
|
data/config/config.example.ru
CHANGED
data/config/config.example.yml
CHANGED
@@ -1,16 +1,46 @@
|
|
1
|
-
|
2
|
-
:default_worker_wait: 1
|
3
|
-
:max_worker_wait: 20
|
4
|
-
:worker_wait_multiplier: 1.3
|
5
|
-
:worker_retry_wait: 5
|
6
|
-
:work_unit_retries: 3
|
7
|
-
|
1
|
+
# The URL where you're planning on running the server/queue/database.
|
8
2
|
:central_server: http://localhost:9173
|
3
|
+
|
4
|
+
# Please provide your AWS credentials for S3 storage of job output.
|
5
|
+
:aws_access_key: [your AWS access key]
|
6
|
+
:aws_secret_key: [your AWS secret access key]
|
7
|
+
|
8
|
+
# Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
|
9
|
+
# to keep all resulting files on S3 private. If so, you'll receive authenticated
|
10
|
+
# S3 URLs as job output, good for 24 hours. If left public, you'll get the
|
11
|
+
# straight URLs to the files on S3.
|
12
|
+
:s3_bucket: [your CloudCrowd bucket]
|
13
|
+
:use_s3_authentication: no
|
14
|
+
|
15
|
+
# Use HTTP Basic Auth for all requests? (Includes all internal worker requests
|
16
|
+
# to the central server). If yes, specify the login and password that all
|
17
|
+
# requests must provide for authentication.
|
9
18
|
:use_http_authentication: no
|
10
19
|
:login: [your login name]
|
11
20
|
:password: [your password]
|
12
21
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
22
|
+
# Set the following numbers to tweak the configuration of your worker daemons.
|
23
|
+
# Optimum results will depend on proportion of the Memory/CPU/IO bottlenecks
|
24
|
+
# in your actions, the number of central servers you have running, and your
|
25
|
+
# desired balance between latency and traffic.
|
26
|
+
|
27
|
+
# The number of workers that `crowd workers start` spins up.
|
28
|
+
:num_workers: 4
|
29
|
+
|
30
|
+
# The minimum number of seconds a worker waits between checking the job queue.
|
31
|
+
:min_worker_wait: 1
|
32
|
+
|
33
|
+
# The maximum number of seconds a worker waits between checking the job queue.
|
34
|
+
:max_worker_wait: 20
|
35
|
+
|
36
|
+
# The backoff multiplier the worker uses to slow down the check interval when
|
37
|
+
# there's no work in the queue.
|
38
|
+
:worker_wait_multiplier: 1.3
|
39
|
+
|
40
|
+
# The number of seconds a worker waits to retry when there's some kind of
|
41
|
+
# internal error (ie. the central server fails to respond)
|
42
|
+
:worker_retry_wait: 5
|
43
|
+
|
44
|
+
# The number of separate attempts that will be made to process an individual
|
45
|
+
# work unit, before marking it as having failed.
|
46
|
+
:work_unit_retries: 3
|
data/config/database.example.yml
CHANGED
data/lib/cloud-crowd.rb
CHANGED
@@ -1,21 +1,47 @@
|
|
1
|
+
# The Grand Central of code loading...
|
2
|
+
|
1
3
|
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
|
2
4
|
|
3
5
|
# Common Gems:
|
4
|
-
require '
|
5
|
-
|
6
|
-
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'activerecord'
|
8
|
+
gem 'daemons'
|
9
|
+
gem 'json'
|
10
|
+
gem 'rest-client'
|
11
|
+
gem 'right_aws'
|
12
|
+
gem 'sinatra'
|
7
13
|
|
8
14
|
# Common CloudCrowd libs:
|
9
15
|
require 'cloud_crowd/core_ext'
|
10
|
-
|
16
|
+
|
17
|
+
# Autoloading for all the pieces which may or may not be needed:
|
18
|
+
autoload :ActiveRecord, 'activerecord'
|
19
|
+
autoload :Benchmark, 'benchmark'
|
20
|
+
autoload :Daemons, 'daemons'
|
21
|
+
autoload :ERB, 'erb'
|
22
|
+
autoload :FileUtils, 'fileutils'
|
23
|
+
autoload :JSON, 'json'
|
24
|
+
autoload :RestClient, 'rest_client'
|
25
|
+
autoload :RightAws, 'right_aws'
|
26
|
+
autoload :Sinatra, 'sinatra'
|
27
|
+
autoload :Socket, 'socket'
|
28
|
+
autoload :YAML, 'yaml'
|
11
29
|
|
12
30
|
module CloudCrowd
|
13
31
|
|
32
|
+
# Autoload all the CloudCrowd classes which may not be required.
|
33
|
+
autoload :App, 'cloud_crowd/app'
|
34
|
+
autoload :Action, 'cloud_crowd/action'
|
35
|
+
autoload :AssetStore, 'cloud_crowd/asset_store'
|
36
|
+
autoload :Helpers, 'cloud_crowd/helpers'
|
37
|
+
autoload :Job, 'cloud_crowd/models'
|
38
|
+
autoload :WorkUnit, 'cloud_crowd/models'
|
39
|
+
|
14
40
|
# Root directory of the CloudCrowd gem.
|
15
41
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
16
42
|
|
17
43
|
# Keep the version in sync with the gemspec.
|
18
|
-
VERSION = '0.0.
|
44
|
+
VERSION = '0.0.3'
|
19
45
|
|
20
46
|
# A Job is processing if its WorkUnits in the queue to be handled by workers.
|
21
47
|
PROCESSING = 1
|
data/lib/cloud_crowd/app.rb
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
require 'erb'
|
2
|
-
require 'sinatra'
|
3
|
-
require 'cloud_crowd/models'
|
4
|
-
require 'cloud_crowd/helpers'
|
5
|
-
|
6
1
|
module CloudCrowd
|
7
2
|
|
8
3
|
class App < Sinatra::Default
|
@@ -10,7 +5,7 @@ module CloudCrowd
|
|
10
5
|
# static serves files from /public, methodoverride allows the _method param.
|
11
6
|
enable :static, :methodoverride
|
12
7
|
|
13
|
-
set :root,
|
8
|
+
set :root, CloudCrowd::ROOT
|
14
9
|
set :authorization_realm, "CloudCrowd"
|
15
10
|
|
16
11
|
helpers CloudCrowd::Helpers
|
@@ -40,29 +35,29 @@ module CloudCrowd
|
|
40
35
|
# Internal method for worker daemons to fetch the work unit at the front
|
41
36
|
# of the queue. Work unit is marked as taken and handed off to the worker.
|
42
37
|
get '/work' do
|
43
|
-
|
44
|
-
unit = WorkUnit.first(:conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false}, :order => "created_at desc")
|
45
|
-
return status(204) && '' unless unit
|
46
|
-
unit.update_attributes(:taken => true)
|
47
|
-
unit.to_json
|
48
|
-
rescue ActiveRecord::StaleObjectError => e
|
49
|
-
return status(204) && ''
|
50
|
-
end
|
38
|
+
dequeue_work_unit
|
51
39
|
end
|
52
40
|
|
53
41
|
# When workers are done with their unit, either successfully on in failure,
|
54
|
-
# they mark it back on the central server.
|
42
|
+
# they mark it back on the central server and retrieve another. Failures
|
43
|
+
# pull from one down in the queue, so as to not repeat the same unit.
|
55
44
|
put '/work/:work_unit_id' do
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
45
|
+
handle_conflicts(409) do
|
46
|
+
case params[:status]
|
47
|
+
when 'succeeded'
|
48
|
+
current_work_unit.finish(params[:output], params[:time])
|
49
|
+
dequeue_work_unit
|
50
|
+
when 'failed'
|
51
|
+
current_work_unit.fail(params[:output], params[:time])
|
52
|
+
dequeue_work_unit(1)
|
53
|
+
else
|
54
|
+
return error(500, "Completing a work unit must specify status.")
|
55
|
+
end
|
60
56
|
end
|
61
|
-
return status(204) && ''
|
62
57
|
end
|
63
58
|
|
64
59
|
# To monitor the central server with Monit, God, Nagios, or another
|
65
|
-
# monitoring tool, you can hit /heartbeat to
|
60
|
+
# monitoring tool, you can hit /heartbeat to make sure.
|
66
61
|
get '/heartbeat' do
|
67
62
|
"buh-bump"
|
68
63
|
end
|
data/lib/cloud_crowd/daemon.rb
CHANGED
@@ -10,12 +10,12 @@ module CloudCrowd
|
|
10
10
|
# isn't any work to be done, and speeds back up when there is.
|
11
11
|
class Daemon
|
12
12
|
|
13
|
-
|
13
|
+
MIN_WAIT = CloudCrowd.config[:min_worker_wait]
|
14
14
|
MAX_WAIT = CloudCrowd.config[:max_worker_wait]
|
15
15
|
WAIT_MULTIPLIER = CloudCrowd.config[:worker_wait_multiplier]
|
16
16
|
|
17
17
|
def initialize
|
18
|
-
@wait_time =
|
18
|
+
@wait_time = MIN_WAIT
|
19
19
|
@worker = CloudCrowd::Worker.new
|
20
20
|
Signal.trap('INT', 'EXIT')
|
21
21
|
Signal.trap('KILL', 'EXIT')
|
@@ -31,9 +31,11 @@ module CloudCrowd
|
|
31
31
|
loop do
|
32
32
|
@worker.fetch_work_unit
|
33
33
|
if @worker.has_work?
|
34
|
-
@
|
35
|
-
@
|
36
|
-
|
34
|
+
@wait_time = MIN_WAIT
|
35
|
+
while @worker.has_work?
|
36
|
+
@worker.run
|
37
|
+
sleep 0.01 # So as to listen for incoming signals.
|
38
|
+
end
|
37
39
|
else
|
38
40
|
@wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
|
39
41
|
sleep @wait_time
|
@@ -10,6 +10,27 @@ module CloudCrowd
|
|
10
10
|
@work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
|
11
11
|
end
|
12
12
|
|
13
|
+
# Try to fetch a work unit from the queue. If none are pending, respond
|
14
|
+
# with no content.
|
15
|
+
def dequeue_work_unit(offset=0)
|
16
|
+
handle_conflicts do
|
17
|
+
unit = WorkUnit.dequeue(offset)
|
18
|
+
return status(204) && '' unless unit
|
19
|
+
unit.to_json
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# We're using ActiveRecords optimistic locking, so stale work units
|
24
|
+
# may sometimes arise. handle_conflicts responds with a the HTTP status
|
25
|
+
# code of your choosing if the update failed to be applied.
|
26
|
+
def handle_conflicts(code=204)
|
27
|
+
begin
|
28
|
+
yield
|
29
|
+
rescue ActiveRecord::StaleObjectError => e
|
30
|
+
return status(code) && ''
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
13
34
|
end
|
14
35
|
end
|
15
36
|
end
|
@@ -1,129 +1,132 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
#
|
4
|
-
#
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
13
|
-
# TODO: Add XML support.
|
14
|
-
def self.create_from_request(h)
|
15
|
-
self.create(
|
16
|
-
:inputs => h['inputs'].to_json,
|
17
|
-
:action => h['action'],
|
18
|
-
:options => (h['options'] || {}).to_json,
|
19
|
-
:owner_email => h['owner_email'],
|
20
|
-
:callback_url => h['callback_url']
|
21
|
-
)
|
22
|
-
end
|
23
|
-
|
24
|
-
def after_create
|
25
|
-
self.queue_for_workers(JSON.parse(self.inputs))
|
26
|
-
end
|
27
|
-
|
28
|
-
def before_validation_on_create
|
29
|
-
self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
|
30
|
-
end
|
31
|
-
|
32
|
-
# After work units are marked successful, we check to see if all of them have
|
33
|
-
# finished, if so, this job is complete.
|
34
|
-
def check_for_completion
|
35
|
-
return unless all_work_units_complete?
|
36
|
-
transition_to_next_phase
|
37
|
-
output_list = gather_outputs_from_work_units
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# A chunk of work that will be farmed out into many WorkUnits to be processed
|
4
|
+
# in parallel by all the active CloudCrowd::Workers. Jobs are defined by a list
|
5
|
+
# of inputs (usually public urls to files), an action (the name of a script that
|
6
|
+
# CloudCrowd knows how to run), and, eventually a corresponding list of output.
|
7
|
+
class Job < ActiveRecord::Base
|
8
|
+
include CloudCrowd::ModelStatus
|
9
|
+
|
10
|
+
has_many :work_units, :dependent => :destroy
|
38
11
|
|
39
|
-
|
40
|
-
|
41
|
-
|
12
|
+
validates_presence_of :status, :inputs, :action, :options
|
13
|
+
|
14
|
+
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
15
|
+
# TODO: Add XML support.
|
16
|
+
def self.create_from_request(h)
|
17
|
+
self.create(
|
18
|
+
:inputs => h['inputs'].to_json,
|
19
|
+
:action => h['action'],
|
20
|
+
:options => (h['options'] || {}).to_json,
|
21
|
+
:owner_email => h['owner_email'],
|
22
|
+
:callback_url => h['callback_url']
|
23
|
+
)
|
42
24
|
end
|
43
|
-
self.save
|
44
25
|
|
45
|
-
|
46
|
-
|
47
|
-
when CloudCrowd::MERGING then queue_for_workers(output_list.to_json)
|
48
|
-
else fire_callback
|
26
|
+
def after_create
|
27
|
+
self.queue_for_workers(JSON.parse(self.inputs))
|
49
28
|
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
# Transition from the current phase to the next one.
|
54
|
-
def transition_to_next_phase
|
55
|
-
self.status = any_work_units_failed? ? CloudCrowd::FAILED :
|
56
|
-
self.splitting? ? CloudCrowd::PROCESSING :
|
57
|
-
self.should_merge? ? CloudCrowd::MERGING :
|
58
|
-
CloudCrowd::SUCCEEDED
|
59
|
-
end
|
60
|
-
|
61
|
-
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
62
|
-
def fire_callback
|
63
|
-
begin
|
64
|
-
RestClient.post(callback_url, {:job => self.to_json}) if callback_url
|
65
|
-
rescue RestClient::Exception => e
|
66
|
-
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
29
|
+
|
30
|
+
def before_validation_on_create
|
31
|
+
self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
|
67
32
|
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
# Have all of the WorkUnits finished? We could trade reads for writes here
|
76
|
-
# by keeping a completed_count on the Job itself.
|
77
|
-
def all_work_units_complete?
|
78
|
-
self.work_units.incomplete.count <= 0
|
79
|
-
end
|
80
|
-
|
81
|
-
# Have any of the WorkUnits failed?
|
82
|
-
def any_work_units_failed?
|
83
|
-
self.work_units.failed.count > 0
|
84
|
-
end
|
85
|
-
|
86
|
-
def splittable?
|
87
|
-
self.action_class.new.respond_to? :split
|
88
|
-
end
|
89
|
-
|
90
|
-
def should_merge?
|
91
|
-
self.processing? && self.action_class.new.respond_to?(:merge)
|
92
|
-
end
|
93
|
-
|
94
|
-
def action_class
|
95
|
-
CloudCrowd.actions(self.action)
|
96
|
-
end
|
97
|
-
|
98
|
-
def gather_outputs_from_work_units
|
99
|
-
outs = self.work_units.complete.map {|wu| wu.output }
|
100
|
-
self.work_units.complete.destroy_all
|
101
|
-
outs
|
102
|
-
end
|
103
|
-
|
104
|
-
def display_status
|
105
|
-
CloudCrowd.display_status(self.status)
|
106
|
-
end
|
107
|
-
|
108
|
-
def work_units_remaining
|
109
|
-
self.work_units.incomplete.count
|
110
|
-
end
|
111
|
-
|
112
|
-
# A JSON representation of this job includes the statuses of its component
|
113
|
-
# WorkUnits, as well as any completed outputs.
|
114
|
-
def to_json(opts={})
|
115
|
-
atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
|
116
|
-
atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
|
117
|
-
atts.merge!({'time' => self.time}) if self.time
|
118
|
-
atts.to_json
|
119
|
-
end
|
33
|
+
|
34
|
+
# After work units are marked successful, we check to see if all of them have
|
35
|
+
# finished, if so, this job is complete.
|
36
|
+
def check_for_completion
|
37
|
+
return unless all_work_units_complete?
|
38
|
+
transition_to_next_phase
|
39
|
+
output_list = gather_outputs_from_work_units
|
120
40
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
41
|
+
if complete?
|
42
|
+
self.outputs = output_list.to_json
|
43
|
+
self.time = Time.now - self.created_at
|
44
|
+
end
|
45
|
+
self.save
|
46
|
+
|
47
|
+
case self.status
|
48
|
+
when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
|
49
|
+
when CloudCrowd::MERGING then queue_for_workers(output_list.to_json)
|
50
|
+
else fire_callback
|
51
|
+
end
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
# Transition from the current phase to the next one.
|
56
|
+
def transition_to_next_phase
|
57
|
+
self.status = any_work_units_failed? ? CloudCrowd::FAILED :
|
58
|
+
self.splitting? ? CloudCrowd::PROCESSING :
|
59
|
+
self.should_merge? ? CloudCrowd::MERGING :
|
60
|
+
CloudCrowd::SUCCEEDED
|
61
|
+
end
|
62
|
+
|
63
|
+
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
64
|
+
def fire_callback
|
65
|
+
begin
|
66
|
+
RestClient.post(callback_url, {:job => self.to_json}) if callback_url
|
67
|
+
rescue RestClient::Exception => e
|
68
|
+
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Cleaning up after a job will remove all of its files from S3.
|
73
|
+
def cleanup
|
74
|
+
CloudCrowd::AssetStore.new.cleanup_job(self)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Have all of the WorkUnits finished? We could trade reads for writes here
|
78
|
+
# by keeping a completed_count on the Job itself.
|
79
|
+
def all_work_units_complete?
|
80
|
+
self.work_units.incomplete.count <= 0
|
81
|
+
end
|
82
|
+
|
83
|
+
# Have any of the WorkUnits failed?
|
84
|
+
def any_work_units_failed?
|
85
|
+
self.work_units.failed.count > 0
|
86
|
+
end
|
87
|
+
|
88
|
+
def splittable?
|
89
|
+
self.action_class.new.respond_to? :split
|
90
|
+
end
|
91
|
+
|
92
|
+
def should_merge?
|
93
|
+
self.processing? && self.action_class.new.respond_to?(:merge)
|
94
|
+
end
|
95
|
+
|
96
|
+
def action_class
|
97
|
+
CloudCrowd.actions(self.action)
|
98
|
+
end
|
99
|
+
|
100
|
+
def gather_outputs_from_work_units
|
101
|
+
outs = self.work_units.complete.map {|wu| wu.output }
|
102
|
+
self.work_units.complete.destroy_all
|
103
|
+
outs
|
104
|
+
end
|
105
|
+
|
106
|
+
def display_status
|
107
|
+
CloudCrowd.display_status(self.status)
|
108
|
+
end
|
109
|
+
|
110
|
+
def work_units_remaining
|
111
|
+
self.work_units.incomplete.count
|
112
|
+
end
|
113
|
+
|
114
|
+
# A JSON representation of this job includes the statuses of its component
|
115
|
+
# WorkUnits, as well as any completed outputs.
|
116
|
+
def to_json(opts={})
|
117
|
+
atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
|
118
|
+
atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
|
119
|
+
atts.merge!({'time' => self.time}) if self.time
|
120
|
+
atts.to_json
|
121
|
+
end
|
122
|
+
|
123
|
+
# When starting a new job, or moving to a new stage, split up the inputs
|
124
|
+
# into WorkUnits, and queue them.
|
125
|
+
def queue_for_workers(input)
|
126
|
+
[input].flatten.each do |wu_input|
|
127
|
+
WorkUnit.create(:job => self, :input => wu_input, :status => self.status)
|
128
|
+
end
|
126
129
|
end
|
130
|
+
|
127
131
|
end
|
128
|
-
|
129
132
|
end
|
@@ -1,62 +1,75 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# A WorkUnit is an atomic chunk of work from a job, processing a single input
|
4
|
+
# through a single action. All WorkUnits receive the same options.
|
5
|
+
class WorkUnit < ActiveRecord::Base
|
6
|
+
include CloudCrowd::ModelStatus
|
7
|
+
|
8
|
+
belongs_to :job
|
9
|
+
|
10
|
+
validates_presence_of :job_id, :status, :input
|
11
|
+
|
12
|
+
after_save :check_for_job_completion
|
13
|
+
|
14
|
+
# Find the Nth available WorkUnit in the queue, and take it out.
|
15
|
+
def self.dequeue(offset=0)
|
16
|
+
unit = self.first(
|
17
|
+
:conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false},
|
18
|
+
:order => "created_at asc",
|
19
|
+
:offset => offset
|
20
|
+
)
|
21
|
+
unit ? unit.update_attributes(:taken => true) && unit : nil
|
22
|
+
end
|
23
|
+
|
24
|
+
# After saving a WorkUnit, it's Job should check if it just become complete.
|
25
|
+
def check_for_job_completion
|
26
|
+
self.job.check_for_completion if complete?
|
27
|
+
end
|
28
|
+
|
29
|
+
# Mark this unit as having finished successfully.
|
30
|
+
def finish(output, time_taken)
|
31
|
+
update_attributes({
|
32
|
+
:status => CloudCrowd::SUCCEEDED,
|
33
|
+
:taken => false,
|
34
|
+
:attempts => self.attempts + 1,
|
35
|
+
:output => output,
|
36
|
+
:time => time_taken
|
37
|
+
})
|
38
|
+
end
|
39
|
+
|
40
|
+
# Mark this unit as having failed. May attempt a retry.
|
41
|
+
def fail(output, time_taken)
|
42
|
+
tries = self.attempts + 1
|
43
|
+
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
44
|
+
update_attributes({
|
45
|
+
:status => CloudCrowd::FAILED,
|
46
|
+
:taken => false,
|
47
|
+
:attempts => tries,
|
48
|
+
:output => output,
|
49
|
+
:time => time_taken
|
50
|
+
})
|
51
|
+
end
|
52
|
+
|
53
|
+
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
54
|
+
def try_again
|
55
|
+
update_attributes({
|
56
|
+
:taken => false,
|
57
|
+
:attempts => self.attempts + 1
|
58
|
+
})
|
59
|
+
end
|
60
|
+
|
61
|
+
# The JSON representation of a WorkUnit contains common elements of its job.
|
62
|
+
def to_json
|
63
|
+
{
|
64
|
+
'id' => self.id,
|
65
|
+
'job_id' => self.job_id,
|
66
|
+
'input' => self.input,
|
67
|
+
'attempts' => self.attempts,
|
68
|
+
'action' => self.job.action,
|
69
|
+
'options' => JSON.parse(self.job.options),
|
70
|
+
'status' => self.status
|
71
|
+
}.to_json
|
72
|
+
end
|
73
|
+
|
15
74
|
end
|
16
|
-
|
17
|
-
# Mark this unit as having finished successfully.
|
18
|
-
def finish(output, time_taken)
|
19
|
-
update_attributes({
|
20
|
-
:status => CloudCrowd::SUCCEEDED,
|
21
|
-
:taken => false,
|
22
|
-
:attempts => self.attempts + 1,
|
23
|
-
:output => output,
|
24
|
-
:time => time_taken
|
25
|
-
})
|
26
|
-
end
|
27
|
-
|
28
|
-
# Mark this unit as having failed. May attempt a retry.
|
29
|
-
def fail(output, time_taken)
|
30
|
-
tries = self.attempts + 1
|
31
|
-
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
32
|
-
update_attributes({
|
33
|
-
:status => CloudCrowd::FAILED,
|
34
|
-
:taken => false,
|
35
|
-
:attempts => tries,
|
36
|
-
:output => output,
|
37
|
-
:time => time_taken
|
38
|
-
})
|
39
|
-
end
|
40
|
-
|
41
|
-
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
42
|
-
def try_again
|
43
|
-
update_attributes({
|
44
|
-
:taken => false,
|
45
|
-
:attempts => self.attempts + 1
|
46
|
-
})
|
47
|
-
end
|
48
|
-
|
49
|
-
# The JSON representation of a WorkUnit contains common elements of its job.
|
50
|
-
def to_json
|
51
|
-
{
|
52
|
-
'id' => self.id,
|
53
|
-
'job_id' => self.job_id,
|
54
|
-
'input' => self.input,
|
55
|
-
'attempts' => self.attempts,
|
56
|
-
'action' => self.job.action,
|
57
|
-
'options' => JSON.parse(self.job.options),
|
58
|
-
'status' => self.status
|
59
|
-
}.to_json
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|
75
|
+
end
|
data/lib/cloud_crowd/models.rb
CHANGED
data/lib/cloud_crowd/runner.rb
CHANGED
@@ -1,22 +1,10 @@
|
|
1
|
-
# This is the script that kicks off a single CloudCrowd::Daemon.
|
2
|
-
#
|
3
|
-
# environment.rb, loading all the common gems that we need.
|
4
|
-
|
5
|
-
# Standard Libs
|
6
|
-
require 'fileutils'
|
7
|
-
require 'benchmark'
|
8
|
-
require 'socket'
|
9
|
-
|
10
|
-
# Gems
|
11
|
-
require 'rubygems'
|
12
|
-
require 'daemons'
|
13
|
-
require 'yaml'
|
14
|
-
|
15
|
-
FileUtils.mkdir('log') unless File.exists?('log')
|
1
|
+
# This is the script that kicks off a single CloudCrowd::Daemon. Rely on
|
2
|
+
# cloud-crowd.rb for autoloading of all the code we need.
|
16
3
|
|
17
4
|
# Daemon/Worker Dependencies.
|
18
5
|
require "#{File.dirname(__FILE__)}/../cloud-crowd"
|
19
|
-
|
6
|
+
|
7
|
+
FileUtils.mkdir('log') unless File.exists?('log')
|
20
8
|
|
21
9
|
Daemons.run("#{CloudCrowd::ROOT}/lib/cloud_crowd/daemon.rb", {
|
22
10
|
:app_name => "cloud_crowd_worker",
|
data/lib/cloud_crowd/worker.rb
CHANGED
@@ -22,10 +22,7 @@ module CloudCrowd
|
|
22
22
|
def fetch_work_unit
|
23
23
|
keep_trying_to "fetch a new work unit" do
|
24
24
|
unit_json = @server['/work'].get
|
25
|
-
|
26
|
-
@start_time = Time.now
|
27
|
-
parse_work_unit unit_json
|
28
|
-
log "fetched work unit for #{@action_name}"
|
25
|
+
setup_work_unit(unit_json)
|
29
26
|
end
|
30
27
|
end
|
31
28
|
|
@@ -33,8 +30,10 @@ module CloudCrowd
|
|
33
30
|
def complete_work_unit(result)
|
34
31
|
keep_trying_to "complete work unit" do
|
35
32
|
data = completion_params.merge({:status => 'succeeded', :output => result})
|
36
|
-
@server["/work/#{data[:id]}"].put(data)
|
33
|
+
unit_json = @server["/work/#{data[:id]}"].put(data)
|
37
34
|
log "finished #{@action_name} in #{data[:time]} seconds"
|
35
|
+
clear_work_unit
|
36
|
+
setup_work_unit(unit_json)
|
38
37
|
end
|
39
38
|
end
|
40
39
|
|
@@ -42,8 +41,10 @@ module CloudCrowd
|
|
42
41
|
def fail_work_unit(exception)
|
43
42
|
keep_trying_to "mark work unit as failed" do
|
44
43
|
data = completion_params.merge({:status => 'failed', :output => exception.message})
|
45
|
-
@server["/work/#{data[:id]}"].put(data)
|
44
|
+
unit_json = @server["/work/#{data[:id]}"].put(data)
|
46
45
|
log "failed #{@action_name} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
|
46
|
+
clear_work_unit
|
47
|
+
setup_work_unit(unit_json)
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
@@ -78,8 +79,6 @@ module CloudCrowd
|
|
78
79
|
complete_work_unit(result)
|
79
80
|
rescue Exception => e
|
80
81
|
fail_work_unit(e)
|
81
|
-
ensure
|
82
|
-
clear_work_unit
|
83
82
|
end
|
84
83
|
end
|
85
84
|
|
@@ -107,12 +106,16 @@ module CloudCrowd
|
|
107
106
|
end
|
108
107
|
|
109
108
|
# Extract our instance variables from a WorkUnit's JSON.
|
110
|
-
def
|
109
|
+
def setup_work_unit(unit_json)
|
110
|
+
return false unless unit_json
|
111
111
|
unit = JSON.parse(unit_json)
|
112
|
+
@start_time = Time.now
|
112
113
|
@action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
|
113
114
|
@options['job_id'] = unit['job_id']
|
114
115
|
@options['work_unit_id'] = unit['id']
|
115
116
|
@options['attempts'] ||= unit['attempts']
|
117
|
+
log "fetched work unit for #{@action_name}"
|
118
|
+
return true
|
116
119
|
end
|
117
120
|
|
118
121
|
# Log a message to the daemon log. Includes PID for identification.
|
@@ -13,7 +13,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
|
|
13
13
|
}.to_json
|
14
14
|
assert browser.last_response.ok?
|
15
15
|
|
16
|
-
job = Job.last
|
16
|
+
job = CloudCrowd::Job.last
|
17
17
|
(CloudCrowd.config[:work_unit_retries] - 1).times do
|
18
18
|
job.work_units.each {|unit| unit.fail('failed', 10) }
|
19
19
|
end
|
data/test/blueprints.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
Sham.url { Faker::Internet.domain_name + "/" + Faker::Internet.domain_word + ".jpg" }
|
2
2
|
|
3
|
-
Job.blueprint do
|
3
|
+
CloudCrowd::Job.blueprint do
|
4
4
|
status { CloudCrowd::PROCESSING }
|
5
5
|
inputs { ['http://www.google.com/intl/en_ALL/images/logo.gif'].to_json }
|
6
6
|
action { 'graphics_magick' }
|
7
7
|
options { {}.to_json }
|
8
8
|
end
|
9
9
|
|
10
|
-
WorkUnit.blueprint do
|
11
|
-
job { Job.make }
|
10
|
+
CloudCrowd::WorkUnit.blueprint do
|
11
|
+
job { CloudCrowd::Job.make }
|
12
12
|
status { CloudCrowd::PROCESSING }
|
13
13
|
taken { false }
|
14
14
|
input { Sham.url }
|
data/test/config/config.yml
CHANGED
data/test/test_helper.rb
CHANGED
data/test/unit/test_job.rb
CHANGED
@@ -5,7 +5,7 @@ class JobTest < Test::Unit::TestCase
|
|
5
5
|
context "A CloudCrowd Job" do
|
6
6
|
|
7
7
|
setup do
|
8
|
-
@job = Job.make
|
8
|
+
@job = CloudCrowd::Job.make
|
9
9
|
@unit = @job.work_units.first
|
10
10
|
end
|
11
11
|
|
@@ -32,7 +32,7 @@ class JobTest < Test::Unit::TestCase
|
|
32
32
|
end
|
33
33
|
|
34
34
|
should "be able to create a job from a JSON request" do
|
35
|
-
job = Job.create_from_request(JSON.parse(<<-EOS
|
35
|
+
job = CloudCrowd::Job.create_from_request(JSON.parse(<<-EOS
|
36
36
|
{ "inputs" : ["one", "two", "three"],
|
37
37
|
"action" : "graphics_magick",
|
38
38
|
"owner_email" : "bob@example.com",
|
@@ -46,13 +46,13 @@ class JobTest < Test::Unit::TestCase
|
|
46
46
|
end
|
47
47
|
|
48
48
|
should "create jobs with a SPLITTING status for actions that have a split method defined" do
|
49
|
-
job = Job.create_from_request({'inputs' => ['1'], 'action' => 'pdf_to_images'})
|
49
|
+
job = CloudCrowd::Job.create_from_request({'inputs' => ['1'], 'action' => 'pdf_to_images'})
|
50
50
|
assert job.splittable?
|
51
51
|
assert job.splitting?
|
52
52
|
end
|
53
53
|
|
54
54
|
should "fire a callback when a job has finished, successfully or not" do
|
55
|
-
Job.any_instance.expects(:fire_callback)
|
55
|
+
CloudCrowd::Job.any_instance.expects(:fire_callback)
|
56
56
|
@job.work_units.first.finish('output', 10)
|
57
57
|
assert @job.all_work_units_complete?
|
58
58
|
end
|
data/test/unit/test_work_unit.rb
CHANGED
@@ -5,7 +5,7 @@ class WorkUnitTest < Test::Unit::TestCase
|
|
5
5
|
context "A WorkUnit" do
|
6
6
|
|
7
7
|
setup do
|
8
|
-
@unit = WorkUnit.make
|
8
|
+
@unit = CloudCrowd::WorkUnit.make
|
9
9
|
@job = @unit.job
|
10
10
|
end
|
11
11
|
|
@@ -26,7 +26,7 @@ class WorkUnitTest < Test::Unit::TestCase
|
|
26
26
|
end
|
27
27
|
|
28
28
|
should "have JSON that includes job attributes" do
|
29
|
-
job = Job.make
|
29
|
+
job = CloudCrowd::Job.make
|
30
30
|
unit_data = JSON.parse(job.work_units.first.to_json)
|
31
31
|
assert unit_data['job_id'] == job.id
|
32
32
|
assert unit_data['action'] == job.action
|