documentcloud-cloud-crowd 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/cloud-crowd.gemspec +1 -1
- data/config/config.example.ru +0 -1
- data/config/config.example.yml +41 -11
- data/config/database.example.yml +3 -0
- data/lib/cloud-crowd.rb +31 -5
- data/lib/cloud_crowd/app.rb +16 -21
- data/lib/cloud_crowd/daemon.rb +7 -5
- data/lib/cloud_crowd/helpers/resources.rb +21 -0
- data/lib/cloud_crowd/models/job.rb +123 -120
- data/lib/cloud_crowd/models/work_unit.rb +74 -61
- data/lib/cloud_crowd/models.rb +0 -2
- data/lib/cloud_crowd/runner.rb +4 -16
- data/lib/cloud_crowd/worker.rb +12 -9
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/blueprints.rb +3 -3
- data/test/config/config.yml +1 -1
- data/test/test_helper.rb +0 -2
- data/test/unit/test_job.rb +4 -4
- data/test/unit/test_work_unit.rb +2 -2
- metadata +1 -1
data/cloud-crowd.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.0.
|
3
|
+
s.version = '0.0.3' # Keep version in sync with cloud-cloud.rb
|
4
4
|
s.date = '2009-08-23'
|
5
5
|
|
6
6
|
s.homepage = "http://documentcloud.org" # wiki page on github?
|
data/config/config.example.ru
CHANGED
data/config/config.example.yml
CHANGED
@@ -1,16 +1,46 @@
|
|
1
|
-
|
2
|
-
:default_worker_wait: 1
|
3
|
-
:max_worker_wait: 20
|
4
|
-
:worker_wait_multiplier: 1.3
|
5
|
-
:worker_retry_wait: 5
|
6
|
-
:work_unit_retries: 3
|
7
|
-
|
1
|
+
# The URL where you're planning on running the server/queue/database.
|
8
2
|
:central_server: http://localhost:9173
|
3
|
+
|
4
|
+
# Please provide your AWS credentials for S3 storage of job output.
|
5
|
+
:aws_access_key: [your AWS access key]
|
6
|
+
:aws_secret_key: [your AWS secret access key]
|
7
|
+
|
8
|
+
# Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
|
9
|
+
# to keep all resulting files on S3 private. If so, you'll receive authenticated
|
10
|
+
# S3 URLs as job output, good for 24 hours. If left public, you'll get the
|
11
|
+
# straight URLs to the files on S3.
|
12
|
+
:s3_bucket: [your CloudCrowd bucket]
|
13
|
+
:use_s3_authentication: no
|
14
|
+
|
15
|
+
# Use HTTP Basic Auth for all requests? (Includes all internal worker requests
|
16
|
+
# to the central server). If yes, specify the login and password that all
|
17
|
+
# requests must provide for authentication.
|
9
18
|
:use_http_authentication: no
|
10
19
|
:login: [your login name]
|
11
20
|
:password: [your password]
|
12
21
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
22
|
+
# Set the following numbers to tweak the configuration of your worker daemons.
|
23
|
+
# Optimum results will depend on proportion of the Memory/CPU/IO bottlenecks
|
24
|
+
# in your actions, the number of central servers you have running, and your
|
25
|
+
# desired balance between latency and traffic.
|
26
|
+
|
27
|
+
# The number of workers that `crowd workers start` spins up.
|
28
|
+
:num_workers: 4
|
29
|
+
|
30
|
+
# The minimum number of seconds a worker waits between checking the job queue.
|
31
|
+
:min_worker_wait: 1
|
32
|
+
|
33
|
+
# The maximum number of seconds a worker waits between checking the job queue.
|
34
|
+
:max_worker_wait: 20
|
35
|
+
|
36
|
+
# The backoff multiplier the worker uses to slow down the check interval when
|
37
|
+
# there's no work in the queue.
|
38
|
+
:worker_wait_multiplier: 1.3
|
39
|
+
|
40
|
+
# The number of seconds a worker waits to retry when there's some kind of
|
41
|
+
# internal error (ie. the central server fails to respond)
|
42
|
+
:worker_retry_wait: 5
|
43
|
+
|
44
|
+
# The number of separate attempts that will be made to process an individual
|
45
|
+
# work unit, before marking it as having failed.
|
46
|
+
:work_unit_retries: 3
|
data/config/database.example.yml
CHANGED
data/lib/cloud-crowd.rb
CHANGED
@@ -1,21 +1,47 @@
|
|
1
|
+
# The Grand Central of code loading...
|
2
|
+
|
1
3
|
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
|
2
4
|
|
3
5
|
# Common Gems:
|
4
|
-
require '
|
5
|
-
|
6
|
-
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'activerecord'
|
8
|
+
gem 'daemons'
|
9
|
+
gem 'json'
|
10
|
+
gem 'rest-client'
|
11
|
+
gem 'right_aws'
|
12
|
+
gem 'sinatra'
|
7
13
|
|
8
14
|
# Common CloudCrowd libs:
|
9
15
|
require 'cloud_crowd/core_ext'
|
10
|
-
|
16
|
+
|
17
|
+
# Autoloading for all the pieces which may or may not be needed:
|
18
|
+
autoload :ActiveRecord, 'activerecord'
|
19
|
+
autoload :Benchmark, 'benchmark'
|
20
|
+
autoload :Daemons, 'daemons'
|
21
|
+
autoload :ERB, 'erb'
|
22
|
+
autoload :FileUtils, 'fileutils'
|
23
|
+
autoload :JSON, 'json'
|
24
|
+
autoload :RestClient, 'rest_client'
|
25
|
+
autoload :RightAws, 'right_aws'
|
26
|
+
autoload :Sinatra, 'sinatra'
|
27
|
+
autoload :Socket, 'socket'
|
28
|
+
autoload :YAML, 'yaml'
|
11
29
|
|
12
30
|
module CloudCrowd
|
13
31
|
|
32
|
+
# Autoload all the CloudCrowd classes which may not be required.
|
33
|
+
autoload :App, 'cloud_crowd/app'
|
34
|
+
autoload :Action, 'cloud_crowd/action'
|
35
|
+
autoload :AssetStore, 'cloud_crowd/asset_store'
|
36
|
+
autoload :Helpers, 'cloud_crowd/helpers'
|
37
|
+
autoload :Job, 'cloud_crowd/models'
|
38
|
+
autoload :WorkUnit, 'cloud_crowd/models'
|
39
|
+
|
14
40
|
# Root directory of the CloudCrowd gem.
|
15
41
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
16
42
|
|
17
43
|
# Keep the version in sync with the gemspec.
|
18
|
-
VERSION = '0.0.
|
44
|
+
VERSION = '0.0.3'
|
19
45
|
|
20
46
|
# A Job is processing if its WorkUnits in the queue to be handled by workers.
|
21
47
|
PROCESSING = 1
|
data/lib/cloud_crowd/app.rb
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
require 'erb'
|
2
|
-
require 'sinatra'
|
3
|
-
require 'cloud_crowd/models'
|
4
|
-
require 'cloud_crowd/helpers'
|
5
|
-
|
6
1
|
module CloudCrowd
|
7
2
|
|
8
3
|
class App < Sinatra::Default
|
@@ -10,7 +5,7 @@ module CloudCrowd
|
|
10
5
|
# static serves files from /public, methodoverride allows the _method param.
|
11
6
|
enable :static, :methodoverride
|
12
7
|
|
13
|
-
set :root,
|
8
|
+
set :root, CloudCrowd::ROOT
|
14
9
|
set :authorization_realm, "CloudCrowd"
|
15
10
|
|
16
11
|
helpers CloudCrowd::Helpers
|
@@ -40,29 +35,29 @@ module CloudCrowd
|
|
40
35
|
# Internal method for worker daemons to fetch the work unit at the front
|
41
36
|
# of the queue. Work unit is marked as taken and handed off to the worker.
|
42
37
|
get '/work' do
|
43
|
-
|
44
|
-
unit = WorkUnit.first(:conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false}, :order => "created_at desc")
|
45
|
-
return status(204) && '' unless unit
|
46
|
-
unit.update_attributes(:taken => true)
|
47
|
-
unit.to_json
|
48
|
-
rescue ActiveRecord::StaleObjectError => e
|
49
|
-
return status(204) && ''
|
50
|
-
end
|
38
|
+
dequeue_work_unit
|
51
39
|
end
|
52
40
|
|
53
41
|
# When workers are done with their unit, either successfully on in failure,
|
54
|
-
# they mark it back on the central server.
|
42
|
+
# they mark it back on the central server and retrieve another. Failures
|
43
|
+
# pull from one down in the queue, so as to not repeat the same unit.
|
55
44
|
put '/work/:work_unit_id' do
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
45
|
+
handle_conflicts(409) do
|
46
|
+
case params[:status]
|
47
|
+
when 'succeeded'
|
48
|
+
current_work_unit.finish(params[:output], params[:time])
|
49
|
+
dequeue_work_unit
|
50
|
+
when 'failed'
|
51
|
+
current_work_unit.fail(params[:output], params[:time])
|
52
|
+
dequeue_work_unit(1)
|
53
|
+
else
|
54
|
+
return error(500, "Completing a work unit must specify status.")
|
55
|
+
end
|
60
56
|
end
|
61
|
-
return status(204) && ''
|
62
57
|
end
|
63
58
|
|
64
59
|
# To monitor the central server with Monit, God, Nagios, or another
|
65
|
-
# monitoring tool, you can hit /heartbeat to
|
60
|
+
# monitoring tool, you can hit /heartbeat to make sure.
|
66
61
|
get '/heartbeat' do
|
67
62
|
"buh-bump"
|
68
63
|
end
|
data/lib/cloud_crowd/daemon.rb
CHANGED
@@ -10,12 +10,12 @@ module CloudCrowd
|
|
10
10
|
# isn't any work to be done, and speeds back up when there is.
|
11
11
|
class Daemon
|
12
12
|
|
13
|
-
|
13
|
+
MIN_WAIT = CloudCrowd.config[:min_worker_wait]
|
14
14
|
MAX_WAIT = CloudCrowd.config[:max_worker_wait]
|
15
15
|
WAIT_MULTIPLIER = CloudCrowd.config[:worker_wait_multiplier]
|
16
16
|
|
17
17
|
def initialize
|
18
|
-
@wait_time =
|
18
|
+
@wait_time = MIN_WAIT
|
19
19
|
@worker = CloudCrowd::Worker.new
|
20
20
|
Signal.trap('INT', 'EXIT')
|
21
21
|
Signal.trap('KILL', 'EXIT')
|
@@ -31,9 +31,11 @@ module CloudCrowd
|
|
31
31
|
loop do
|
32
32
|
@worker.fetch_work_unit
|
33
33
|
if @worker.has_work?
|
34
|
-
@
|
35
|
-
@
|
36
|
-
|
34
|
+
@wait_time = MIN_WAIT
|
35
|
+
while @worker.has_work?
|
36
|
+
@worker.run
|
37
|
+
sleep 0.01 # So as to listen for incoming signals.
|
38
|
+
end
|
37
39
|
else
|
38
40
|
@wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
|
39
41
|
sleep @wait_time
|
@@ -10,6 +10,27 @@ module CloudCrowd
|
|
10
10
|
@work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
|
11
11
|
end
|
12
12
|
|
13
|
+
# Try to fetch a work unit from the queue. If none are pending, respond
|
14
|
+
# with no content.
|
15
|
+
def dequeue_work_unit(offset=0)
|
16
|
+
handle_conflicts do
|
17
|
+
unit = WorkUnit.dequeue(offset)
|
18
|
+
return status(204) && '' unless unit
|
19
|
+
unit.to_json
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# We're using ActiveRecords optimistic locking, so stale work units
|
24
|
+
# may sometimes arise. handle_conflicts responds with a the HTTP status
|
25
|
+
# code of your choosing if the update failed to be applied.
|
26
|
+
def handle_conflicts(code=204)
|
27
|
+
begin
|
28
|
+
yield
|
29
|
+
rescue ActiveRecord::StaleObjectError => e
|
30
|
+
return status(code) && ''
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
13
34
|
end
|
14
35
|
end
|
15
36
|
end
|
@@ -1,129 +1,132 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
#
|
4
|
-
#
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
13
|
-
# TODO: Add XML support.
|
14
|
-
def self.create_from_request(h)
|
15
|
-
self.create(
|
16
|
-
:inputs => h['inputs'].to_json,
|
17
|
-
:action => h['action'],
|
18
|
-
:options => (h['options'] || {}).to_json,
|
19
|
-
:owner_email => h['owner_email'],
|
20
|
-
:callback_url => h['callback_url']
|
21
|
-
)
|
22
|
-
end
|
23
|
-
|
24
|
-
def after_create
|
25
|
-
self.queue_for_workers(JSON.parse(self.inputs))
|
26
|
-
end
|
27
|
-
|
28
|
-
def before_validation_on_create
|
29
|
-
self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
|
30
|
-
end
|
31
|
-
|
32
|
-
# After work units are marked successful, we check to see if all of them have
|
33
|
-
# finished, if so, this job is complete.
|
34
|
-
def check_for_completion
|
35
|
-
return unless all_work_units_complete?
|
36
|
-
transition_to_next_phase
|
37
|
-
output_list = gather_outputs_from_work_units
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# A chunk of work that will be farmed out into many WorkUnits to be processed
|
4
|
+
# in parallel by all the active CloudCrowd::Workers. Jobs are defined by a list
|
5
|
+
# of inputs (usually public urls to files), an action (the name of a script that
|
6
|
+
# CloudCrowd knows how to run), and, eventually a corresponding list of output.
|
7
|
+
class Job < ActiveRecord::Base
|
8
|
+
include CloudCrowd::ModelStatus
|
9
|
+
|
10
|
+
has_many :work_units, :dependent => :destroy
|
38
11
|
|
39
|
-
|
40
|
-
|
41
|
-
|
12
|
+
validates_presence_of :status, :inputs, :action, :options
|
13
|
+
|
14
|
+
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
15
|
+
# TODO: Add XML support.
|
16
|
+
def self.create_from_request(h)
|
17
|
+
self.create(
|
18
|
+
:inputs => h['inputs'].to_json,
|
19
|
+
:action => h['action'],
|
20
|
+
:options => (h['options'] || {}).to_json,
|
21
|
+
:owner_email => h['owner_email'],
|
22
|
+
:callback_url => h['callback_url']
|
23
|
+
)
|
42
24
|
end
|
43
|
-
self.save
|
44
25
|
|
45
|
-
|
46
|
-
|
47
|
-
when CloudCrowd::MERGING then queue_for_workers(output_list.to_json)
|
48
|
-
else fire_callback
|
26
|
+
def after_create
|
27
|
+
self.queue_for_workers(JSON.parse(self.inputs))
|
49
28
|
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
# Transition from the current phase to the next one.
|
54
|
-
def transition_to_next_phase
|
55
|
-
self.status = any_work_units_failed? ? CloudCrowd::FAILED :
|
56
|
-
self.splitting? ? CloudCrowd::PROCESSING :
|
57
|
-
self.should_merge? ? CloudCrowd::MERGING :
|
58
|
-
CloudCrowd::SUCCEEDED
|
59
|
-
end
|
60
|
-
|
61
|
-
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
62
|
-
def fire_callback
|
63
|
-
begin
|
64
|
-
RestClient.post(callback_url, {:job => self.to_json}) if callback_url
|
65
|
-
rescue RestClient::Exception => e
|
66
|
-
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
29
|
+
|
30
|
+
def before_validation_on_create
|
31
|
+
self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
|
67
32
|
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
# Have all of the WorkUnits finished? We could trade reads for writes here
|
76
|
-
# by keeping a completed_count on the Job itself.
|
77
|
-
def all_work_units_complete?
|
78
|
-
self.work_units.incomplete.count <= 0
|
79
|
-
end
|
80
|
-
|
81
|
-
# Have any of the WorkUnits failed?
|
82
|
-
def any_work_units_failed?
|
83
|
-
self.work_units.failed.count > 0
|
84
|
-
end
|
85
|
-
|
86
|
-
def splittable?
|
87
|
-
self.action_class.new.respond_to? :split
|
88
|
-
end
|
89
|
-
|
90
|
-
def should_merge?
|
91
|
-
self.processing? && self.action_class.new.respond_to?(:merge)
|
92
|
-
end
|
93
|
-
|
94
|
-
def action_class
|
95
|
-
CloudCrowd.actions(self.action)
|
96
|
-
end
|
97
|
-
|
98
|
-
def gather_outputs_from_work_units
|
99
|
-
outs = self.work_units.complete.map {|wu| wu.output }
|
100
|
-
self.work_units.complete.destroy_all
|
101
|
-
outs
|
102
|
-
end
|
103
|
-
|
104
|
-
def display_status
|
105
|
-
CloudCrowd.display_status(self.status)
|
106
|
-
end
|
107
|
-
|
108
|
-
def work_units_remaining
|
109
|
-
self.work_units.incomplete.count
|
110
|
-
end
|
111
|
-
|
112
|
-
# A JSON representation of this job includes the statuses of its component
|
113
|
-
# WorkUnits, as well as any completed outputs.
|
114
|
-
def to_json(opts={})
|
115
|
-
atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
|
116
|
-
atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
|
117
|
-
atts.merge!({'time' => self.time}) if self.time
|
118
|
-
atts.to_json
|
119
|
-
end
|
33
|
+
|
34
|
+
# After work units are marked successful, we check to see if all of them have
|
35
|
+
# finished, if so, this job is complete.
|
36
|
+
def check_for_completion
|
37
|
+
return unless all_work_units_complete?
|
38
|
+
transition_to_next_phase
|
39
|
+
output_list = gather_outputs_from_work_units
|
120
40
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
41
|
+
if complete?
|
42
|
+
self.outputs = output_list.to_json
|
43
|
+
self.time = Time.now - self.created_at
|
44
|
+
end
|
45
|
+
self.save
|
46
|
+
|
47
|
+
case self.status
|
48
|
+
when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
|
49
|
+
when CloudCrowd::MERGING then queue_for_workers(output_list.to_json)
|
50
|
+
else fire_callback
|
51
|
+
end
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
# Transition from the current phase to the next one.
|
56
|
+
def transition_to_next_phase
|
57
|
+
self.status = any_work_units_failed? ? CloudCrowd::FAILED :
|
58
|
+
self.splitting? ? CloudCrowd::PROCESSING :
|
59
|
+
self.should_merge? ? CloudCrowd::MERGING :
|
60
|
+
CloudCrowd::SUCCEEDED
|
61
|
+
end
|
62
|
+
|
63
|
+
# If a callback_url is defined, post the Job's JSON to it upon completion.
|
64
|
+
def fire_callback
|
65
|
+
begin
|
66
|
+
RestClient.post(callback_url, {:job => self.to_json}) if callback_url
|
67
|
+
rescue RestClient::Exception => e
|
68
|
+
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Cleaning up after a job will remove all of its files from S3.
|
73
|
+
def cleanup
|
74
|
+
CloudCrowd::AssetStore.new.cleanup_job(self)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Have all of the WorkUnits finished? We could trade reads for writes here
|
78
|
+
# by keeping a completed_count on the Job itself.
|
79
|
+
def all_work_units_complete?
|
80
|
+
self.work_units.incomplete.count <= 0
|
81
|
+
end
|
82
|
+
|
83
|
+
# Have any of the WorkUnits failed?
|
84
|
+
def any_work_units_failed?
|
85
|
+
self.work_units.failed.count > 0
|
86
|
+
end
|
87
|
+
|
88
|
+
def splittable?
|
89
|
+
self.action_class.new.respond_to? :split
|
90
|
+
end
|
91
|
+
|
92
|
+
def should_merge?
|
93
|
+
self.processing? && self.action_class.new.respond_to?(:merge)
|
94
|
+
end
|
95
|
+
|
96
|
+
def action_class
|
97
|
+
CloudCrowd.actions(self.action)
|
98
|
+
end
|
99
|
+
|
100
|
+
def gather_outputs_from_work_units
|
101
|
+
outs = self.work_units.complete.map {|wu| wu.output }
|
102
|
+
self.work_units.complete.destroy_all
|
103
|
+
outs
|
104
|
+
end
|
105
|
+
|
106
|
+
def display_status
|
107
|
+
CloudCrowd.display_status(self.status)
|
108
|
+
end
|
109
|
+
|
110
|
+
def work_units_remaining
|
111
|
+
self.work_units.incomplete.count
|
112
|
+
end
|
113
|
+
|
114
|
+
# A JSON representation of this job includes the statuses of its component
|
115
|
+
# WorkUnits, as well as any completed outputs.
|
116
|
+
def to_json(opts={})
|
117
|
+
atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
|
118
|
+
atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
|
119
|
+
atts.merge!({'time' => self.time}) if self.time
|
120
|
+
atts.to_json
|
121
|
+
end
|
122
|
+
|
123
|
+
# When starting a new job, or moving to a new stage, split up the inputs
|
124
|
+
# into WorkUnits, and queue them.
|
125
|
+
def queue_for_workers(input)
|
126
|
+
[input].flatten.each do |wu_input|
|
127
|
+
WorkUnit.create(:job => self, :input => wu_input, :status => self.status)
|
128
|
+
end
|
126
129
|
end
|
130
|
+
|
127
131
|
end
|
128
|
-
|
129
132
|
end
|
@@ -1,62 +1,75 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# A WorkUnit is an atomic chunk of work from a job, processing a single input
|
4
|
+
# through a single action. All WorkUnits receive the same options.
|
5
|
+
class WorkUnit < ActiveRecord::Base
|
6
|
+
include CloudCrowd::ModelStatus
|
7
|
+
|
8
|
+
belongs_to :job
|
9
|
+
|
10
|
+
validates_presence_of :job_id, :status, :input
|
11
|
+
|
12
|
+
after_save :check_for_job_completion
|
13
|
+
|
14
|
+
# Find the Nth available WorkUnit in the queue, and take it out.
|
15
|
+
def self.dequeue(offset=0)
|
16
|
+
unit = self.first(
|
17
|
+
:conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false},
|
18
|
+
:order => "created_at asc",
|
19
|
+
:offset => offset
|
20
|
+
)
|
21
|
+
unit ? unit.update_attributes(:taken => true) && unit : nil
|
22
|
+
end
|
23
|
+
|
24
|
+
# After saving a WorkUnit, it's Job should check if it just become complete.
|
25
|
+
def check_for_job_completion
|
26
|
+
self.job.check_for_completion if complete?
|
27
|
+
end
|
28
|
+
|
29
|
+
# Mark this unit as having finished successfully.
|
30
|
+
def finish(output, time_taken)
|
31
|
+
update_attributes({
|
32
|
+
:status => CloudCrowd::SUCCEEDED,
|
33
|
+
:taken => false,
|
34
|
+
:attempts => self.attempts + 1,
|
35
|
+
:output => output,
|
36
|
+
:time => time_taken
|
37
|
+
})
|
38
|
+
end
|
39
|
+
|
40
|
+
# Mark this unit as having failed. May attempt a retry.
|
41
|
+
def fail(output, time_taken)
|
42
|
+
tries = self.attempts + 1
|
43
|
+
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
44
|
+
update_attributes({
|
45
|
+
:status => CloudCrowd::FAILED,
|
46
|
+
:taken => false,
|
47
|
+
:attempts => tries,
|
48
|
+
:output => output,
|
49
|
+
:time => time_taken
|
50
|
+
})
|
51
|
+
end
|
52
|
+
|
53
|
+
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
54
|
+
def try_again
|
55
|
+
update_attributes({
|
56
|
+
:taken => false,
|
57
|
+
:attempts => self.attempts + 1
|
58
|
+
})
|
59
|
+
end
|
60
|
+
|
61
|
+
# The JSON representation of a WorkUnit contains common elements of its job.
|
62
|
+
def to_json
|
63
|
+
{
|
64
|
+
'id' => self.id,
|
65
|
+
'job_id' => self.job_id,
|
66
|
+
'input' => self.input,
|
67
|
+
'attempts' => self.attempts,
|
68
|
+
'action' => self.job.action,
|
69
|
+
'options' => JSON.parse(self.job.options),
|
70
|
+
'status' => self.status
|
71
|
+
}.to_json
|
72
|
+
end
|
73
|
+
|
15
74
|
end
|
16
|
-
|
17
|
-
# Mark this unit as having finished successfully.
|
18
|
-
def finish(output, time_taken)
|
19
|
-
update_attributes({
|
20
|
-
:status => CloudCrowd::SUCCEEDED,
|
21
|
-
:taken => false,
|
22
|
-
:attempts => self.attempts + 1,
|
23
|
-
:output => output,
|
24
|
-
:time => time_taken
|
25
|
-
})
|
26
|
-
end
|
27
|
-
|
28
|
-
# Mark this unit as having failed. May attempt a retry.
|
29
|
-
def fail(output, time_taken)
|
30
|
-
tries = self.attempts + 1
|
31
|
-
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
32
|
-
update_attributes({
|
33
|
-
:status => CloudCrowd::FAILED,
|
34
|
-
:taken => false,
|
35
|
-
:attempts => tries,
|
36
|
-
:output => output,
|
37
|
-
:time => time_taken
|
38
|
-
})
|
39
|
-
end
|
40
|
-
|
41
|
-
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
42
|
-
def try_again
|
43
|
-
update_attributes({
|
44
|
-
:taken => false,
|
45
|
-
:attempts => self.attempts + 1
|
46
|
-
})
|
47
|
-
end
|
48
|
-
|
49
|
-
# The JSON representation of a WorkUnit contains common elements of its job.
|
50
|
-
def to_json
|
51
|
-
{
|
52
|
-
'id' => self.id,
|
53
|
-
'job_id' => self.job_id,
|
54
|
-
'input' => self.input,
|
55
|
-
'attempts' => self.attempts,
|
56
|
-
'action' => self.job.action,
|
57
|
-
'options' => JSON.parse(self.job.options),
|
58
|
-
'status' => self.status
|
59
|
-
}.to_json
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|
75
|
+
end
|
data/lib/cloud_crowd/models.rb
CHANGED
data/lib/cloud_crowd/runner.rb
CHANGED
@@ -1,22 +1,10 @@
|
|
1
|
-
# This is the script that kicks off a single CloudCrowd::Daemon.
|
2
|
-
#
|
3
|
-
# environment.rb, loading all the common gems that we need.
|
4
|
-
|
5
|
-
# Standard Libs
|
6
|
-
require 'fileutils'
|
7
|
-
require 'benchmark'
|
8
|
-
require 'socket'
|
9
|
-
|
10
|
-
# Gems
|
11
|
-
require 'rubygems'
|
12
|
-
require 'daemons'
|
13
|
-
require 'yaml'
|
14
|
-
|
15
|
-
FileUtils.mkdir('log') unless File.exists?('log')
|
1
|
+
# This is the script that kicks off a single CloudCrowd::Daemon. Rely on
|
2
|
+
# cloud-crowd.rb for autoloading of all the code we need.
|
16
3
|
|
17
4
|
# Daemon/Worker Dependencies.
|
18
5
|
require "#{File.dirname(__FILE__)}/../cloud-crowd"
|
19
|
-
|
6
|
+
|
7
|
+
FileUtils.mkdir('log') unless File.exists?('log')
|
20
8
|
|
21
9
|
Daemons.run("#{CloudCrowd::ROOT}/lib/cloud_crowd/daemon.rb", {
|
22
10
|
:app_name => "cloud_crowd_worker",
|
data/lib/cloud_crowd/worker.rb
CHANGED
@@ -22,10 +22,7 @@ module CloudCrowd
|
|
22
22
|
def fetch_work_unit
|
23
23
|
keep_trying_to "fetch a new work unit" do
|
24
24
|
unit_json = @server['/work'].get
|
25
|
-
|
26
|
-
@start_time = Time.now
|
27
|
-
parse_work_unit unit_json
|
28
|
-
log "fetched work unit for #{@action_name}"
|
25
|
+
setup_work_unit(unit_json)
|
29
26
|
end
|
30
27
|
end
|
31
28
|
|
@@ -33,8 +30,10 @@ module CloudCrowd
|
|
33
30
|
def complete_work_unit(result)
|
34
31
|
keep_trying_to "complete work unit" do
|
35
32
|
data = completion_params.merge({:status => 'succeeded', :output => result})
|
36
|
-
@server["/work/#{data[:id]}"].put(data)
|
33
|
+
unit_json = @server["/work/#{data[:id]}"].put(data)
|
37
34
|
log "finished #{@action_name} in #{data[:time]} seconds"
|
35
|
+
clear_work_unit
|
36
|
+
setup_work_unit(unit_json)
|
38
37
|
end
|
39
38
|
end
|
40
39
|
|
@@ -42,8 +41,10 @@ module CloudCrowd
|
|
42
41
|
def fail_work_unit(exception)
|
43
42
|
keep_trying_to "mark work unit as failed" do
|
44
43
|
data = completion_params.merge({:status => 'failed', :output => exception.message})
|
45
|
-
@server["/work/#{data[:id]}"].put(data)
|
44
|
+
unit_json = @server["/work/#{data[:id]}"].put(data)
|
46
45
|
log "failed #{@action_name} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
|
46
|
+
clear_work_unit
|
47
|
+
setup_work_unit(unit_json)
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
@@ -78,8 +79,6 @@ module CloudCrowd
|
|
78
79
|
complete_work_unit(result)
|
79
80
|
rescue Exception => e
|
80
81
|
fail_work_unit(e)
|
81
|
-
ensure
|
82
|
-
clear_work_unit
|
83
82
|
end
|
84
83
|
end
|
85
84
|
|
@@ -107,12 +106,16 @@ module CloudCrowd
|
|
107
106
|
end
|
108
107
|
|
109
108
|
# Extract our instance variables from a WorkUnit's JSON.
|
110
|
-
def
|
109
|
+
def setup_work_unit(unit_json)
|
110
|
+
return false unless unit_json
|
111
111
|
unit = JSON.parse(unit_json)
|
112
|
+
@start_time = Time.now
|
112
113
|
@action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
|
113
114
|
@options['job_id'] = unit['job_id']
|
114
115
|
@options['work_unit_id'] = unit['id']
|
115
116
|
@options['attempts'] ||= unit['attempts']
|
117
|
+
log "fetched work unit for #{@action_name}"
|
118
|
+
return true
|
116
119
|
end
|
117
120
|
|
118
121
|
# Log a message to the daemon log. Includes PID for identification.
|
@@ -13,7 +13,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
|
|
13
13
|
}.to_json
|
14
14
|
assert browser.last_response.ok?
|
15
15
|
|
16
|
-
job = Job.last
|
16
|
+
job = CloudCrowd::Job.last
|
17
17
|
(CloudCrowd.config[:work_unit_retries] - 1).times do
|
18
18
|
job.work_units.each {|unit| unit.fail('failed', 10) }
|
19
19
|
end
|
data/test/blueprints.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
Sham.url { Faker::Internet.domain_name + "/" + Faker::Internet.domain_word + ".jpg" }
|
2
2
|
|
3
|
-
Job.blueprint do
|
3
|
+
CloudCrowd::Job.blueprint do
|
4
4
|
status { CloudCrowd::PROCESSING }
|
5
5
|
inputs { ['http://www.google.com/intl/en_ALL/images/logo.gif'].to_json }
|
6
6
|
action { 'graphics_magick' }
|
7
7
|
options { {}.to_json }
|
8
8
|
end
|
9
9
|
|
10
|
-
WorkUnit.blueprint do
|
11
|
-
job { Job.make }
|
10
|
+
CloudCrowd::WorkUnit.blueprint do
|
11
|
+
job { CloudCrowd::Job.make }
|
12
12
|
status { CloudCrowd::PROCESSING }
|
13
13
|
taken { false }
|
14
14
|
input { Sham.url }
|
data/test/config/config.yml
CHANGED
data/test/test_helper.rb
CHANGED
data/test/unit/test_job.rb
CHANGED
@@ -5,7 +5,7 @@ class JobTest < Test::Unit::TestCase
|
|
5
5
|
context "A CloudCrowd Job" do
|
6
6
|
|
7
7
|
setup do
|
8
|
-
@job = Job.make
|
8
|
+
@job = CloudCrowd::Job.make
|
9
9
|
@unit = @job.work_units.first
|
10
10
|
end
|
11
11
|
|
@@ -32,7 +32,7 @@ class JobTest < Test::Unit::TestCase
|
|
32
32
|
end
|
33
33
|
|
34
34
|
should "be able to create a job from a JSON request" do
|
35
|
-
job = Job.create_from_request(JSON.parse(<<-EOS
|
35
|
+
job = CloudCrowd::Job.create_from_request(JSON.parse(<<-EOS
|
36
36
|
{ "inputs" : ["one", "two", "three"],
|
37
37
|
"action" : "graphics_magick",
|
38
38
|
"owner_email" : "bob@example.com",
|
@@ -46,13 +46,13 @@ class JobTest < Test::Unit::TestCase
|
|
46
46
|
end
|
47
47
|
|
48
48
|
should "create jobs with a SPLITTING status for actions that have a split method defined" do
|
49
|
-
job = Job.create_from_request({'inputs' => ['1'], 'action' => 'pdf_to_images'})
|
49
|
+
job = CloudCrowd::Job.create_from_request({'inputs' => ['1'], 'action' => 'pdf_to_images'})
|
50
50
|
assert job.splittable?
|
51
51
|
assert job.splitting?
|
52
52
|
end
|
53
53
|
|
54
54
|
should "fire a callback when a job has finished, successfully or not" do
|
55
|
-
Job.any_instance.expects(:fire_callback)
|
55
|
+
CloudCrowd::Job.any_instance.expects(:fire_callback)
|
56
56
|
@job.work_units.first.finish('output', 10)
|
57
57
|
assert @job.all_work_units_complete?
|
58
58
|
end
|
data/test/unit/test_work_unit.rb
CHANGED
@@ -5,7 +5,7 @@ class WorkUnitTest < Test::Unit::TestCase
|
|
5
5
|
context "A WorkUnit" do
|
6
6
|
|
7
7
|
setup do
|
8
|
-
@unit = WorkUnit.make
|
8
|
+
@unit = CloudCrowd::WorkUnit.make
|
9
9
|
@job = @unit.job
|
10
10
|
end
|
11
11
|
|
@@ -26,7 +26,7 @@ class WorkUnitTest < Test::Unit::TestCase
|
|
26
26
|
end
|
27
27
|
|
28
28
|
should "have JSON that includes job attributes" do
|
29
|
-
job = Job.make
|
29
|
+
job = CloudCrowd::Job.make
|
30
30
|
unit_data = JSON.parse(job.work_units.first.to_json)
|
31
31
|
assert unit_data['job_id'] == job.id
|
32
32
|
assert unit_data['action'] == job.action
|