documentcloud-cloud-crowd 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +59 -50
- data/actions/process_pdfs.rb +3 -3
- data/actions/word_count.rb +14 -0
- data/cloud-crowd.gemspec +27 -13
- data/config/config.example.yml +8 -11
- data/examples/graphics_magick_example.rb +40 -44
- data/examples/process_pdfs_example.rb +39 -29
- data/examples/word_count_example.rb +41 -0
- data/lib/cloud-crowd.rb +20 -17
- data/lib/cloud_crowd/action.rb +26 -9
- data/lib/cloud_crowd/app.rb +26 -4
- data/lib/cloud_crowd/asset_store.rb +69 -40
- data/lib/cloud_crowd/command_line.rb +6 -4
- data/lib/cloud_crowd/daemon.rb +65 -25
- data/lib/cloud_crowd/exceptions.rb +5 -0
- data/lib/cloud_crowd/helpers/resources.rb +2 -2
- data/lib/cloud_crowd/models/job.rb +9 -13
- data/lib/cloud_crowd/models/work_unit.rb +23 -15
- data/lib/cloud_crowd/models/worker_record.rb +61 -0
- data/lib/cloud_crowd/models.rb +7 -1
- data/lib/cloud_crowd/schema.rb +12 -3
- data/lib/cloud_crowd/worker.rb +48 -10
- data/public/css/admin_console.css +174 -4
- data/public/css/reset.css +17 -27
- data/public/images/bullet_green.png +0 -0
- data/public/images/bullet_white.png +0 -0
- data/public/images/cloud_hand.png +0 -0
- data/public/images/header_back.png +0 -0
- data/public/images/logo.png +0 -0
- data/public/images/server_error.png +0 -0
- data/public/images/sidebar_bottom.png +0 -0
- data/public/images/sidebar_top.png +0 -0
- data/public/images/worker_info.png +0 -0
- data/public/images/worker_info_loading.gif +0 -0
- data/public/js/admin_console.js +127 -10
- data/public/js/excanvas.pack.js +1 -0
- data/public/js/jquery-1.3.2.min.js +19 -0
- data/public/js/jquery.flot.pack.js +1 -0
- data/test/acceptance/test_word_count.rb +49 -0
- data/test/blueprints.rb +6 -5
- data/test/config/config.yml +1 -4
- data/test/test_helper.rb +1 -0
- data/test/unit/test_job.rb +12 -4
- data/test/unit/test_work_unit.rb +2 -2
- data/views/index.erb +69 -14
- metadata +23 -6
- data/public/js/jquery-1.3.2.js +0 -4376
data/lib/cloud_crowd/app.rb
CHANGED
@@ -4,7 +4,7 @@ module CloudCrowd
|
|
4
4
|
#
|
5
5
|
# == Admin
|
6
6
|
# [get /] Render the admin console, with a progress meter for running jobs.
|
7
|
-
# [get /
|
7
|
+
# [get /status] Get the combined JSON of every active job and worker.
|
8
8
|
# [get /heartbeat] Returns 200 OK to let monitoring tools know the server's up.
|
9
9
|
#
|
10
10
|
# == Public API
|
@@ -15,6 +15,7 @@ module CloudCrowd
|
|
15
15
|
# == Internal Workers API
|
16
16
|
# [post /work] Dequeue the next WorkUnit, and hand it off to the worker.
|
17
17
|
# [put /work/:unit_id] Mark a finished WorkUnit as completed or failed, with results.
|
18
|
+
# [put /worker] Keep a record of an actively running worker.
|
18
19
|
class App < Sinatra::Default
|
19
20
|
|
20
21
|
set :root, ROOT
|
@@ -35,9 +36,21 @@ module CloudCrowd
|
|
35
36
|
erb :index
|
36
37
|
end
|
37
38
|
|
38
|
-
# Get the JSON for every active job in the queue
|
39
|
-
get
|
40
|
-
|
39
|
+
# Get the JSON for every active job in the queue and every active worker
|
40
|
+
# in the system. This action may get a little worrisome as the system grows
|
41
|
+
# larger -- keep it in mind.
|
42
|
+
get '/status' do
|
43
|
+
json(
|
44
|
+
'jobs' => Job.incomplete,
|
45
|
+
'workers' => WorkerRecord.alive(:order => 'name desc'),
|
46
|
+
'work_unit_count' => WorkUnit.incomplete.count
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Get the JSON for a worker record's work unit, if one exists.
|
51
|
+
get '/worker/:name' do
|
52
|
+
record = WorkerRecord.find_by_name params[:name]
|
53
|
+
json((record && record.work_unit) || {})
|
41
54
|
end
|
42
55
|
|
43
56
|
# To monitor the central server with Monit, God, Nagios, or another
|
@@ -66,6 +79,8 @@ module CloudCrowd
|
|
66
79
|
json nil
|
67
80
|
end
|
68
81
|
|
82
|
+
# INTERNAL WORKER DAEMON API:
|
83
|
+
|
69
84
|
# Internal method for worker daemons to fetch the work unit at the front
|
70
85
|
# of the queue. Work unit is marked as taken and handed off to the worker.
|
71
86
|
post '/work' do
|
@@ -90,6 +105,13 @@ module CloudCrowd
|
|
90
105
|
end
|
91
106
|
end
|
92
107
|
|
108
|
+
# Every so often workers check in to let the central server know that
|
109
|
+
# they're still alive. Keep up-to-date records
|
110
|
+
put '/worker' do
|
111
|
+
params[:terminated] ? WorkerRecord.check_out(params) : WorkerRecord.check_in(params)
|
112
|
+
json nil
|
113
|
+
end
|
114
|
+
|
93
115
|
end
|
94
116
|
|
95
117
|
end
|
@@ -9,14 +9,20 @@ module CloudCrowd
|
|
9
9
|
# You shouldn't need to use the AssetStore directly -- Action's +download+
|
10
10
|
# and +save+ methods use it behind the scenes.
|
11
11
|
class AssetStore
|
12
|
-
include FileUtils
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
#
|
13
|
+
LOCAL_STORAGE_PATH = '/tmp/cloud_crowd_storage'
|
14
|
+
|
15
|
+
# Creating an AssetStore mixes in the specific storage implementation
|
16
|
+
# specified by 'storage' in <tt>config.yml</tt>.
|
17
17
|
def initialize
|
18
18
|
@use_auth = CloudCrowd.config[:use_s3_authentication]
|
19
|
-
|
19
|
+
@storage = CloudCrowd.config[:storage]
|
20
|
+
FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
|
21
|
+
case @storage
|
22
|
+
when 's3' then extend S3Store
|
23
|
+
when 'filesystem' then extend FilesystemStore
|
24
|
+
else raise StorageNotFound, "#{@storage} is not a valid storage back end"
|
25
|
+
end
|
20
26
|
end
|
21
27
|
|
22
28
|
# Get the path to CloudCrowd's temporary local storage. All actions run
|
@@ -25,45 +31,68 @@ module CloudCrowd
|
|
25
31
|
"#{Dir.tmpdir}/cloud_crowd_tmp"
|
26
32
|
end
|
27
33
|
|
28
|
-
# Copy a finished file from our local storage to S3. Save it publicly unless
|
29
|
-
# we're configured to use S3 authentication.
|
30
|
-
def save(local_path, save_path)
|
31
|
-
ensure_s3_connection
|
32
|
-
permission = @use_auth ? 'private' : 'public-read'
|
33
|
-
@bucket.put(save_path, File.open(local_path), {}, permission)
|
34
|
-
end
|
35
34
|
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
35
|
+
# The S3Store is an implementation of an AssetStore that uses a bucket
|
36
|
+
# on S3 for all resulting files.
|
37
|
+
module S3Store
|
38
|
+
|
39
|
+
# Save a finished file from local storage to S3. Save it publicly unless
|
40
|
+
# we're configured to use S3 authentication.
|
41
|
+
def save(local_path, save_path)
|
42
|
+
ensure_s3_connection
|
43
|
+
permission = @use_auth ? 'private' : 'public-read'
|
44
|
+
@bucket.put(save_path, File.open(local_path), {}, permission)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Return the S3 public URL for a finshed file. Authenticated links expire
|
48
|
+
# after one day by default.
|
49
|
+
def url(save_path)
|
50
|
+
@use_auth ? @s3.interface.get_link(@bucket, save_path) :
|
51
|
+
@bucket.key(save_path).public_link
|
52
|
+
end
|
53
|
+
|
54
|
+
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
55
|
+
def cleanup_job(job)
|
56
|
+
ensure_s3_connection
|
57
|
+
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
58
|
+
end
|
59
|
+
|
60
|
+
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
61
|
+
# Ensure we have a persistent S3 connection after first use.
|
62
|
+
def ensure_s3_connection
|
63
|
+
unless @s3 && @bucket
|
64
|
+
params = {:port => 80, :protocol => 'http'}
|
65
|
+
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
66
|
+
@bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
|
67
|
+
end
|
68
|
+
end
|
47
69
|
end
|
48
70
|
|
49
|
-
private
|
50
|
-
|
51
|
-
# Unused for the moment. Think about using the filesystem instead of S3
|
52
|
-
# in development.
|
53
|
-
def save_to_filesystem(local_path, save_path)
|
54
|
-
save_path = File.join("/tmp/cloud_crowd_storage", save_path)
|
55
|
-
save_dir = File.dirname(save_path)
|
56
|
-
mkdir_p save_dir unless File.exists? save_dir
|
57
|
-
cp(local_path, save_path)
|
58
|
-
end
|
59
71
|
|
60
|
-
#
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
72
|
+
# The FilesystemStore is an implementation of the AssetStore, good only for
|
73
|
+
# use in development, testing, or if you're only running a single-machine
|
74
|
+
# installation.
|
75
|
+
module FilesystemStore
|
76
|
+
|
77
|
+
# Save a file to somewhere semi-persistent on the filesystem. Can be used
|
78
|
+
# in development, when offline, or if you happen to have a single-machine
|
79
|
+
# CloudCrowd installation. To use, configure :local_storage.
|
80
|
+
def save(local_path, save_path)
|
81
|
+
save_path = File.join(LOCAL_STORAGE_PATH, save_path)
|
82
|
+
save_dir = File.dirname(save_path)
|
83
|
+
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
84
|
+
FileUtils.cp(local_path, save_path)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Return the URL for a file saved to the local filesystem.
|
88
|
+
def url(save_path)
|
89
|
+
"file://#{File.expand_path(File.join(LOCAL_STORAGE_PATH, save_path))}"
|
90
|
+
end
|
91
|
+
|
92
|
+
# Remove all of a Job's result files from the filesystem.
|
93
|
+
def cleanup_job(job)
|
94
|
+
path = "#{LOCAL_STORAGE_PATH}/#{job.action}/job_#{job.id}"
|
95
|
+
FileUtils.rm_r(path) if File.exists?(path)
|
67
96
|
end
|
68
97
|
end
|
69
98
|
|
@@ -14,18 +14,20 @@ module CloudCrowd
|
|
14
14
|
|
15
15
|
# Command-line banner for the usage message.
|
16
16
|
BANNER = <<-EOS
|
17
|
-
CloudCrowd is a
|
17
|
+
CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
|
18
|
+
|
19
|
+
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
18
20
|
|
19
21
|
Usage: crowd COMMAND OPTIONS
|
20
22
|
|
21
|
-
|
23
|
+
Commands:
|
22
24
|
install Install the CloudCrowd configuration files to the specified directory
|
23
25
|
server Start up the central server (requires a database)
|
24
26
|
workers Control worker daemons, use: (start | stop | restart | status | run)
|
25
27
|
console Launch a CloudCrowd console, connected to the central database
|
26
28
|
load_schema Load the schema into the database specified by database.yml
|
27
29
|
|
28
|
-
|
30
|
+
Options:
|
29
31
|
EOS
|
30
32
|
|
31
33
|
# Creating a CloudCrowd::CommandLine runs from the contents of ARGV.
|
@@ -161,7 +163,7 @@ OPTIONS:
|
|
161
163
|
opts.on('-p', '--port PORT', 'central server port number') do |port_num|
|
162
164
|
@options[:port] = port_num
|
163
165
|
end
|
164
|
-
opts.on('-e', '--environment ENV', '
|
166
|
+
opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
|
165
167
|
@options[:environment] = env
|
166
168
|
end
|
167
169
|
opts.on_tail('-v', '--version', 'show version') do
|
data/lib/cloud_crowd/daemon.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
CloudCrowd.configure(ENV['CLOUD_CROWD_CONFIG'])
|
2
2
|
|
3
|
-
require 'cloud_crowd/worker'
|
4
|
-
|
5
3
|
module CloudCrowd
|
6
4
|
|
7
5
|
# A CloudCrowd::Daemon, started by the Daemons gem, runs a CloudCrowd::Worker in
|
@@ -15,39 +13,81 @@ module CloudCrowd
|
|
15
13
|
# supports.
|
16
14
|
class Daemon
|
17
15
|
|
18
|
-
|
19
|
-
|
20
|
-
WAIT_MULTIPLIER
|
16
|
+
# The back-off factor used to slow down requests for new work units
|
17
|
+
# when the queue is empty.
|
18
|
+
WAIT_MULTIPLIER = 1.5
|
19
|
+
|
20
|
+
MIN_WAIT = CloudCrowd.config[:min_worker_wait]
|
21
|
+
MAX_WAIT = CloudCrowd.config[:max_worker_wait]
|
21
22
|
|
22
23
|
def initialize
|
23
|
-
@wait_time
|
24
|
-
@worker
|
25
|
-
Signal.trap('INT'
|
26
|
-
Signal.trap('KILL'
|
27
|
-
Signal.trap('TERM'
|
24
|
+
@wait_time = MIN_WAIT
|
25
|
+
@worker = Worker.new
|
26
|
+
Signal.trap('INT') { kill_worker_and_exit }
|
27
|
+
Signal.trap('KILL') { kill_worker_and_exit }
|
28
|
+
Signal.trap('TERM') { kill_worker_and_exit }
|
28
29
|
end
|
29
30
|
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
# timeout, or should be killable from the outside by terminating the thread.
|
34
|
-
# In either case, nasty un-cleaned-up bits might be left behind.
|
31
|
+
# Spin up our worker and monitoring threads. The monitor's the boss, and
|
32
|
+
# will feel no compunction in killing the worker thread if necessary.
|
33
|
+
# Check in before starting up. If check in fails, there's no sense in going.
|
35
34
|
def run
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
35
|
+
@worker.check_in('starting')
|
36
|
+
@work_thread = run_worker
|
37
|
+
@monitor_thread = run_monitor
|
38
|
+
@monitor_thread.join
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# Loop forever, fetching WorkUnits and processing them.
|
45
|
+
def run_worker
|
46
|
+
Thread.new do
|
47
|
+
loop do
|
48
|
+
@worker.fetch_work_unit
|
49
|
+
if @worker.has_work?
|
50
|
+
@wait_time = MIN_WAIT
|
51
|
+
while @worker.has_work?
|
52
|
+
@worker.run
|
53
|
+
sleep 0.01 # So as to listen for incoming signals.
|
54
|
+
end
|
55
|
+
else
|
56
|
+
@wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
|
57
|
+
sleep @wait_time
|
43
58
|
end
|
44
|
-
else
|
45
|
-
@wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
|
46
|
-
sleep @wait_time
|
47
59
|
end
|
48
60
|
end
|
49
61
|
end
|
50
62
|
|
63
|
+
# Checks in to let the central server know it's still alive every
|
64
|
+
# CHECK_IN_INTERVAL seconds. Restarts the work_thread if it has died.
|
65
|
+
def run_monitor
|
66
|
+
Thread.new do
|
67
|
+
sleep Worker::CHECK_IN_INTERVAL
|
68
|
+
loop do
|
69
|
+
@work_thread = run_monitor unless @work_thread.alive? || @exit_started
|
70
|
+
@worker.check_in(@work_thread.status)
|
71
|
+
sleep Worker::CHECK_IN_INTERVAL
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def running?
|
77
|
+
@work_thread.alive? || @monitor_thread.alive?
|
78
|
+
end
|
79
|
+
|
80
|
+
# At exit, kill the worker thread, gently at first, then forcefully.
|
81
|
+
def kill_worker_and_exit
|
82
|
+
@worker.check_out
|
83
|
+
@exit_started = Time.now
|
84
|
+
@work_thread.kill && @monitor_thread.kill
|
85
|
+
sleep 0.3 while running? && Time.now - @exit_started < WORKER_EXIT_WAIT
|
86
|
+
return Process.exit unless running?
|
87
|
+
@work_thread.kill! && @monitor_thread.kill!
|
88
|
+
Process.exit
|
89
|
+
end
|
90
|
+
|
51
91
|
end
|
52
92
|
|
53
93
|
end
|
@@ -9,6 +9,11 @@ module CloudCrowd
|
|
9
9
|
class ActionNotFound < Error #:nodoc:
|
10
10
|
end
|
11
11
|
|
12
|
+
# StorageNotFound is raised when config.yml specifies a storage back end that
|
13
|
+
# doesn't exist.
|
14
|
+
class StorageNotFound < Error #:nodoc:
|
15
|
+
end
|
16
|
+
|
12
17
|
# StatusUnspecified is raised when a WorkUnit returns without a valid
|
13
18
|
# status code.
|
14
19
|
class StatusUnspecified < Error #:nodoc:
|
@@ -24,8 +24,8 @@ module CloudCrowd
|
|
24
24
|
# with no content.
|
25
25
|
def dequeue_work_unit(offset=0)
|
26
26
|
handle_conflicts do
|
27
|
-
actions = params[:
|
28
|
-
WorkUnit.dequeue(actions, offset)
|
27
|
+
worker, actions = params[:worker_name], params[:worker_actions].split(',')
|
28
|
+
WorkUnit.dequeue(worker, actions, offset)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
@@ -22,7 +22,7 @@ module CloudCrowd
|
|
22
22
|
:inputs => h['inputs'].to_json,
|
23
23
|
:action => h['action'],
|
24
24
|
:options => (h['options'] || {}).to_json,
|
25
|
-
:
|
25
|
+
:email => h['email'],
|
26
26
|
:callback_url => h['callback_url']
|
27
27
|
)
|
28
28
|
end
|
@@ -97,11 +97,6 @@ module CloudCrowd
|
|
97
97
|
raise ActionNotFound, "no action named: '#{self.action}' could be found"
|
98
98
|
end
|
99
99
|
|
100
|
-
# Get the displayable status name of the Job's status code.
|
101
|
-
def display_status
|
102
|
-
CloudCrowd.display_status(self.status)
|
103
|
-
end
|
104
|
-
|
105
100
|
# How complete is this Job?
|
106
101
|
def percent_complete
|
107
102
|
return 0 if splitting?
|
@@ -125,14 +120,15 @@ module CloudCrowd
|
|
125
120
|
# WorkUnits, as well as any completed outputs.
|
126
121
|
def to_json(opts={})
|
127
122
|
atts = {
|
128
|
-
'id' =>
|
129
|
-
'color' =>
|
130
|
-
'status' =>
|
131
|
-
'percent_complete' =>
|
132
|
-
'work_units' =>
|
133
|
-
'time_taken' =>
|
123
|
+
'id' => id,
|
124
|
+
'color' => color,
|
125
|
+
'status' => display_status,
|
126
|
+
'percent_complete' => percent_complete,
|
127
|
+
'work_units' => work_units.count,
|
128
|
+
'time_taken' => time_taken
|
134
129
|
}
|
135
|
-
atts
|
130
|
+
atts['outputs'] = JSON.parse(outputs) if outputs
|
131
|
+
atts['email'] = email if email
|
136
132
|
atts.to_json
|
137
133
|
end
|
138
134
|
|
@@ -8,6 +8,7 @@ module CloudCrowd
|
|
8
8
|
include ModelStatus
|
9
9
|
|
10
10
|
belongs_to :job
|
11
|
+
belongs_to :worker_record
|
11
12
|
|
12
13
|
validates_presence_of :job_id, :status, :input, :action
|
13
14
|
|
@@ -17,13 +18,13 @@ module CloudCrowd
|
|
17
18
|
# +enabled_actions+ must be passed to whitelist the types of WorkUnits than
|
18
19
|
# can be retrieved for processing. Optionally, specify the +offset+ to peek
|
19
20
|
# further on in line.
|
20
|
-
def self.dequeue(enabled_actions=[], offset=0)
|
21
|
+
def self.dequeue(worker_name, enabled_actions=[], offset=0)
|
21
22
|
unit = self.first(
|
22
|
-
:conditions => {:status => INCOMPLETE, :
|
23
|
+
:conditions => {:status => INCOMPLETE, :worker_record_id => nil, :action => enabled_actions},
|
23
24
|
:order => "created_at asc",
|
24
25
|
:offset => offset
|
25
26
|
)
|
26
|
-
unit ? unit.
|
27
|
+
unit ? unit.assign_to(worker_name) : nil
|
27
28
|
end
|
28
29
|
|
29
30
|
# After saving a WorkUnit, its Job should check if it just became complete.
|
@@ -34,11 +35,11 @@ module CloudCrowd
|
|
34
35
|
# Mark this unit as having finished successfully.
|
35
36
|
def finish(output, time_taken)
|
36
37
|
update_attributes({
|
37
|
-
:status
|
38
|
-
:
|
39
|
-
:attempts
|
40
|
-
:output
|
41
|
-
:time
|
38
|
+
:status => SUCCEEDED,
|
39
|
+
:worker_record => nil,
|
40
|
+
:attempts => self.attempts + 1,
|
41
|
+
:output => output,
|
42
|
+
:time => time_taken
|
42
43
|
})
|
43
44
|
end
|
44
45
|
|
@@ -47,22 +48,29 @@ module CloudCrowd
|
|
47
48
|
tries = self.attempts + 1
|
48
49
|
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
49
50
|
update_attributes({
|
50
|
-
:status
|
51
|
-
:
|
52
|
-
:attempts
|
53
|
-
:output
|
54
|
-
:time
|
51
|
+
:status => FAILED,
|
52
|
+
:worker_record => nil,
|
53
|
+
:attempts => tries,
|
54
|
+
:output => output,
|
55
|
+
:time => time_taken
|
55
56
|
})
|
56
57
|
end
|
57
58
|
|
58
59
|
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
59
60
|
def try_again
|
60
61
|
update_attributes({
|
61
|
-
:
|
62
|
-
:attempts
|
62
|
+
:worker_record => nil,
|
63
|
+
:attempts => self.attempts + 1
|
63
64
|
})
|
64
65
|
end
|
65
66
|
|
67
|
+
# When a Worker checks out a WorkUnit, establish the connection between
|
68
|
+
# WorkUnit and WorkerRecord.
|
69
|
+
def assign_to(worker_name)
|
70
|
+
self.worker_record = WorkerRecord.find_by_name!(worker_name)
|
71
|
+
self.save ? self : nil
|
72
|
+
end
|
73
|
+
|
66
74
|
# The JSON representation of a WorkUnit shares the Job's options with all
|
67
75
|
# its sister WorkUnits.
|
68
76
|
def to_json
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# A WorkerRecord is a recording of an active worker daemon running remotely.
|
4
|
+
# Every time it checks in, we keep track of its status. The attributes shown
|
5
|
+
# here may lag their actual values by up to Worker::CHECK_IN_INTERVAL seconds.
|
6
|
+
class WorkerRecord < ActiveRecord::Base
|
7
|
+
|
8
|
+
EXPIRES_AFTER = 2 * Worker::CHECK_IN_INTERVAL
|
9
|
+
|
10
|
+
has_one :work_unit
|
11
|
+
|
12
|
+
validates_presence_of :name, :thread_status
|
13
|
+
|
14
|
+
before_destroy :clear_work_units
|
15
|
+
|
16
|
+
named_scope :alive, lambda { {:conditions => ['updated_at > ?', Time.now - EXPIRES_AFTER]} }
|
17
|
+
named_scope :dead, lambda { {:conditions => ['updated_at <= ?', Time.now - EXPIRES_AFTER]} }
|
18
|
+
|
19
|
+
# Save a Worker's current status to the database.
|
20
|
+
def self.check_in(params)
|
21
|
+
attrs = {:thread_status => params[:thread_status], :updated_at => Time.now}
|
22
|
+
self.find_or_create_by_name(params[:name]).update_attributes!(attrs)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Remove a terminated Worker's record from the database.
|
26
|
+
def self.check_out(params)
|
27
|
+
self.find_by_name(params[:name]).destroy
|
28
|
+
end
|
29
|
+
|
30
|
+
# We consider the worker to be alive if it's checked in more recently
|
31
|
+
# than twice the expected interval ago.
|
32
|
+
def alive?
|
33
|
+
updated_at > Time.now - EXPIRES_AFTER
|
34
|
+
end
|
35
|
+
|
36
|
+
# Derive the Worker's PID on the remote machine from the name.
|
37
|
+
def pid
|
38
|
+
@pid ||= self.name.split('@').first
|
39
|
+
end
|
40
|
+
|
41
|
+
# Derive the hostname from the Worker's name.
|
42
|
+
def hostname
|
43
|
+
@hostname ||= self.name.split('@').last
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_json(opts={})
|
47
|
+
{
|
48
|
+
'name' => name,
|
49
|
+
'status' => work_unit && work_unit.display_status,
|
50
|
+
}.to_json
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def clear_work_units
|
57
|
+
WorkUnit.update_all('worker_record_id = null', "worker_record_id = #{id}")
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
data/lib/cloud_crowd/models.rb
CHANGED
@@ -27,8 +27,14 @@ module CloudCrowd
|
|
27
27
|
def complete?; COMPLETE.include?(self.status); end
|
28
28
|
def incomplete?; INCOMPLETE.include?(self.status); end
|
29
29
|
|
30
|
+
# Get the displayable status name of the model's status code.
|
31
|
+
def display_status
|
32
|
+
CloudCrowd.display_status(self.status)
|
33
|
+
end
|
34
|
+
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
33
38
|
require 'cloud_crowd/models/job'
|
34
|
-
require 'cloud_crowd/models/work_unit'
|
39
|
+
require 'cloud_crowd/models/work_unit'
|
40
|
+
require 'cloud_crowd/models/worker_record'
|
data/lib/cloud_crowd/schema.rb
CHANGED
@@ -9,7 +9,7 @@ ActiveRecord::Schema.define(:version => 1) do
|
|
9
9
|
t.text "outputs"
|
10
10
|
t.float "time"
|
11
11
|
t.string "callback_url"
|
12
|
-
t.string "
|
12
|
+
t.string "email"
|
13
13
|
t.integer "lock_version", :default => 0, :null => false
|
14
14
|
t.datetime "created_at"
|
15
15
|
t.datetime "updated_at"
|
@@ -22,15 +22,24 @@ ActiveRecord::Schema.define(:version => 1) do
|
|
22
22
|
t.string "action", :null => false
|
23
23
|
t.integer "attempts", :default => 0, :null => false
|
24
24
|
t.integer "lock_version", :default => 0, :null => false
|
25
|
-
t.
|
25
|
+
t.integer "worker_record_id"
|
26
26
|
t.float "time"
|
27
27
|
t.text "output"
|
28
28
|
t.datetime "created_at"
|
29
29
|
t.datetime "updated_at"
|
30
30
|
end
|
31
|
+
|
32
|
+
create_table "worker_records", :force => true do |t|
|
33
|
+
t.string "name", :null => false
|
34
|
+
t.string "thread_status", :null => false
|
35
|
+
t.datetime "created_at"
|
36
|
+
t.datetime "updated_at"
|
37
|
+
end
|
31
38
|
|
32
39
|
add_index "jobs", ["status"], :name => "index_jobs_on_status"
|
33
40
|
add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
|
34
|
-
add_index "work_units", ["status", "
|
41
|
+
add_index "work_units", ["status", "worker_record_id", "action"], :name => "index_work_units_on_status_and_worker_record_id_and_action"
|
42
|
+
add_index "worker_records", ["name"], :name => "index_worker_records_on_name"
|
43
|
+
add_index "worker_records", ["updated_at"], :name => "index_worker_records_on_updated_at"
|
35
44
|
|
36
45
|
end
|