documentcloud-cloud-crowd 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README +59 -50
- data/actions/process_pdfs.rb +3 -3
- data/actions/word_count.rb +14 -0
- data/cloud-crowd.gemspec +27 -13
- data/config/config.example.yml +8 -11
- data/examples/graphics_magick_example.rb +40 -44
- data/examples/process_pdfs_example.rb +39 -29
- data/examples/word_count_example.rb +41 -0
- data/lib/cloud-crowd.rb +20 -17
- data/lib/cloud_crowd/action.rb +26 -9
- data/lib/cloud_crowd/app.rb +26 -4
- data/lib/cloud_crowd/asset_store.rb +69 -40
- data/lib/cloud_crowd/command_line.rb +6 -4
- data/lib/cloud_crowd/daemon.rb +65 -25
- data/lib/cloud_crowd/exceptions.rb +5 -0
- data/lib/cloud_crowd/helpers/resources.rb +2 -2
- data/lib/cloud_crowd/models/job.rb +9 -13
- data/lib/cloud_crowd/models/work_unit.rb +23 -15
- data/lib/cloud_crowd/models/worker_record.rb +61 -0
- data/lib/cloud_crowd/models.rb +7 -1
- data/lib/cloud_crowd/schema.rb +12 -3
- data/lib/cloud_crowd/worker.rb +48 -10
- data/public/css/admin_console.css +174 -4
- data/public/css/reset.css +17 -27
- data/public/images/bullet_green.png +0 -0
- data/public/images/bullet_white.png +0 -0
- data/public/images/cloud_hand.png +0 -0
- data/public/images/header_back.png +0 -0
- data/public/images/logo.png +0 -0
- data/public/images/server_error.png +0 -0
- data/public/images/sidebar_bottom.png +0 -0
- data/public/images/sidebar_top.png +0 -0
- data/public/images/worker_info.png +0 -0
- data/public/images/worker_info_loading.gif +0 -0
- data/public/js/admin_console.js +127 -10
- data/public/js/excanvas.pack.js +1 -0
- data/public/js/jquery-1.3.2.min.js +19 -0
- data/public/js/jquery.flot.pack.js +1 -0
- data/test/acceptance/test_word_count.rb +49 -0
- data/test/blueprints.rb +6 -5
- data/test/config/config.yml +1 -4
- data/test/test_helper.rb +1 -0
- data/test/unit/test_job.rb +12 -4
- data/test/unit/test_work_unit.rb +2 -2
- data/views/index.erb +69 -14
- metadata +23 -6
- data/public/js/jquery-1.3.2.js +0 -4376
data/lib/cloud_crowd/app.rb
CHANGED
@@ -4,7 +4,7 @@ module CloudCrowd
|
|
4
4
|
#
|
5
5
|
# == Admin
|
6
6
|
# [get /] Render the admin console, with a progress meter for running jobs.
|
7
|
-
# [get /
|
7
|
+
# [get /status] Get the combined JSON of every active job and worker.
|
8
8
|
# [get /heartbeat] Returns 200 OK to let monitoring tools know the server's up.
|
9
9
|
#
|
10
10
|
# == Public API
|
@@ -15,6 +15,7 @@ module CloudCrowd
|
|
15
15
|
# == Internal Workers API
|
16
16
|
# [post /work] Dequeue the next WorkUnit, and hand it off to the worker.
|
17
17
|
# [put /work/:unit_id] Mark a finished WorkUnit as completed or failed, with results.
|
18
|
+
# [put /worker] Keep a record of an actively running worker.
|
18
19
|
class App < Sinatra::Default
|
19
20
|
|
20
21
|
set :root, ROOT
|
@@ -35,9 +36,21 @@ module CloudCrowd
|
|
35
36
|
erb :index
|
36
37
|
end
|
37
38
|
|
38
|
-
# Get the JSON for every active job in the queue
|
39
|
-
get
|
40
|
-
|
39
|
+
# Get the JSON for every active job in the queue and every active worker
|
40
|
+
# in the system. This action may get a little worrisome as the system grows
|
41
|
+
# larger -- keep it in mind.
|
42
|
+
get '/status' do
|
43
|
+
json(
|
44
|
+
'jobs' => Job.incomplete,
|
45
|
+
'workers' => WorkerRecord.alive(:order => 'name desc'),
|
46
|
+
'work_unit_count' => WorkUnit.incomplete.count
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Get the JSON for a worker record's work unit, if one exists.
|
51
|
+
get '/worker/:name' do
|
52
|
+
record = WorkerRecord.find_by_name params[:name]
|
53
|
+
json((record && record.work_unit) || {})
|
41
54
|
end
|
42
55
|
|
43
56
|
# To monitor the central server with Monit, God, Nagios, or another
|
@@ -66,6 +79,8 @@ module CloudCrowd
|
|
66
79
|
json nil
|
67
80
|
end
|
68
81
|
|
82
|
+
# INTERNAL WORKER DAEMON API:
|
83
|
+
|
69
84
|
# Internal method for worker daemons to fetch the work unit at the front
|
70
85
|
# of the queue. Work unit is marked as taken and handed off to the worker.
|
71
86
|
post '/work' do
|
@@ -90,6 +105,13 @@ module CloudCrowd
|
|
90
105
|
end
|
91
106
|
end
|
92
107
|
|
108
|
+
# Every so often workers check in to let the central server know that
|
109
|
+
# they're still alive. Keep up-to-date records
|
110
|
+
put '/worker' do
|
111
|
+
params[:terminated] ? WorkerRecord.check_out(params) : WorkerRecord.check_in(params)
|
112
|
+
json nil
|
113
|
+
end
|
114
|
+
|
93
115
|
end
|
94
116
|
|
95
117
|
end
|
@@ -9,14 +9,20 @@ module CloudCrowd
|
|
9
9
|
# You shouldn't need to use the AssetStore directly -- Action's +download+
|
10
10
|
# and +save+ methods use it behind the scenes.
|
11
11
|
class AssetStore
|
12
|
-
include FileUtils
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
#
|
13
|
+
LOCAL_STORAGE_PATH = '/tmp/cloud_crowd_storage'
|
14
|
+
|
15
|
+
# Creating an AssetStore mixes in the specific storage implementation
|
16
|
+
# specified by 'storage' in <tt>config.yml</tt>.
|
17
17
|
def initialize
|
18
18
|
@use_auth = CloudCrowd.config[:use_s3_authentication]
|
19
|
-
|
19
|
+
@storage = CloudCrowd.config[:storage]
|
20
|
+
FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
|
21
|
+
case @storage
|
22
|
+
when 's3' then extend S3Store
|
23
|
+
when 'filesystem' then extend FilesystemStore
|
24
|
+
else raise StorageNotFound, "#{@storage} is not a valid storage back end"
|
25
|
+
end
|
20
26
|
end
|
21
27
|
|
22
28
|
# Get the path to CloudCrowd's temporary local storage. All actions run
|
@@ -25,45 +31,68 @@ module CloudCrowd
|
|
25
31
|
"#{Dir.tmpdir}/cloud_crowd_tmp"
|
26
32
|
end
|
27
33
|
|
28
|
-
# Copy a finished file from our local storage to S3. Save it publicly unless
|
29
|
-
# we're configured to use S3 authentication.
|
30
|
-
def save(local_path, save_path)
|
31
|
-
ensure_s3_connection
|
32
|
-
permission = @use_auth ? 'private' : 'public-read'
|
33
|
-
@bucket.put(save_path, File.open(local_path), {}, permission)
|
34
|
-
end
|
35
34
|
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
35
|
+
# The S3Store is an implementation of an AssetStore that uses a bucket
|
36
|
+
# on S3 for all resulting files.
|
37
|
+
module S3Store
|
38
|
+
|
39
|
+
# Save a finished file from local storage to S3. Save it publicly unless
|
40
|
+
# we're configured to use S3 authentication.
|
41
|
+
def save(local_path, save_path)
|
42
|
+
ensure_s3_connection
|
43
|
+
permission = @use_auth ? 'private' : 'public-read'
|
44
|
+
@bucket.put(save_path, File.open(local_path), {}, permission)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Return the S3 public URL for a finshed file. Authenticated links expire
|
48
|
+
# after one day by default.
|
49
|
+
def url(save_path)
|
50
|
+
@use_auth ? @s3.interface.get_link(@bucket, save_path) :
|
51
|
+
@bucket.key(save_path).public_link
|
52
|
+
end
|
53
|
+
|
54
|
+
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
55
|
+
def cleanup_job(job)
|
56
|
+
ensure_s3_connection
|
57
|
+
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
58
|
+
end
|
59
|
+
|
60
|
+
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
61
|
+
# Ensure we have a persistent S3 connection after first use.
|
62
|
+
def ensure_s3_connection
|
63
|
+
unless @s3 && @bucket
|
64
|
+
params = {:port => 80, :protocol => 'http'}
|
65
|
+
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
66
|
+
@bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
|
67
|
+
end
|
68
|
+
end
|
47
69
|
end
|
48
70
|
|
49
|
-
private
|
50
|
-
|
51
|
-
# Unused for the moment. Think about using the filesystem instead of S3
|
52
|
-
# in development.
|
53
|
-
def save_to_filesystem(local_path, save_path)
|
54
|
-
save_path = File.join("/tmp/cloud_crowd_storage", save_path)
|
55
|
-
save_dir = File.dirname(save_path)
|
56
|
-
mkdir_p save_dir unless File.exists? save_dir
|
57
|
-
cp(local_path, save_path)
|
58
|
-
end
|
59
71
|
|
60
|
-
#
|
61
|
-
#
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
72
|
+
# The FilesystemStore is an implementation of the AssetStore, good only for
|
73
|
+
# use in development, testing, or if you're only running a single-machine
|
74
|
+
# installation.
|
75
|
+
module FilesystemStore
|
76
|
+
|
77
|
+
# Save a file to somewhere semi-persistent on the filesystem. Can be used
|
78
|
+
# in development, when offline, or if you happen to have a single-machine
|
79
|
+
# CloudCrowd installation. To use, configure :local_storage.
|
80
|
+
def save(local_path, save_path)
|
81
|
+
save_path = File.join(LOCAL_STORAGE_PATH, save_path)
|
82
|
+
save_dir = File.dirname(save_path)
|
83
|
+
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
84
|
+
FileUtils.cp(local_path, save_path)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Return the URL for a file saved to the local filesystem.
|
88
|
+
def url(save_path)
|
89
|
+
"file://#{File.expand_path(File.join(LOCAL_STORAGE_PATH, save_path))}"
|
90
|
+
end
|
91
|
+
|
92
|
+
# Remove all of a Job's result files from the filesystem.
|
93
|
+
def cleanup_job(job)
|
94
|
+
path = "#{LOCAL_STORAGE_PATH}/#{job.action}/job_#{job.id}"
|
95
|
+
FileUtils.rm_r(path) if File.exists?(path)
|
67
96
|
end
|
68
97
|
end
|
69
98
|
|
@@ -14,18 +14,20 @@ module CloudCrowd
|
|
14
14
|
|
15
15
|
# Command-line banner for the usage message.
|
16
16
|
BANNER = <<-EOS
|
17
|
-
CloudCrowd is a
|
17
|
+
CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
|
18
|
+
|
19
|
+
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
18
20
|
|
19
21
|
Usage: crowd COMMAND OPTIONS
|
20
22
|
|
21
|
-
|
23
|
+
Commands:
|
22
24
|
install Install the CloudCrowd configuration files to the specified directory
|
23
25
|
server Start up the central server (requires a database)
|
24
26
|
workers Control worker daemons, use: (start | stop | restart | status | run)
|
25
27
|
console Launch a CloudCrowd console, connected to the central database
|
26
28
|
load_schema Load the schema into the database specified by database.yml
|
27
29
|
|
28
|
-
|
30
|
+
Options:
|
29
31
|
EOS
|
30
32
|
|
31
33
|
# Creating a CloudCrowd::CommandLine runs from the contents of ARGV.
|
@@ -161,7 +163,7 @@ OPTIONS:
|
|
161
163
|
opts.on('-p', '--port PORT', 'central server port number') do |port_num|
|
162
164
|
@options[:port] = port_num
|
163
165
|
end
|
164
|
-
opts.on('-e', '--environment ENV', '
|
166
|
+
opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
|
165
167
|
@options[:environment] = env
|
166
168
|
end
|
167
169
|
opts.on_tail('-v', '--version', 'show version') do
|
data/lib/cloud_crowd/daemon.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
CloudCrowd.configure(ENV['CLOUD_CROWD_CONFIG'])
|
2
2
|
|
3
|
-
require 'cloud_crowd/worker'
|
4
|
-
|
5
3
|
module CloudCrowd
|
6
4
|
|
7
5
|
# A CloudCrowd::Daemon, started by the Daemons gem, runs a CloudCrowd::Worker in
|
@@ -15,39 +13,81 @@ module CloudCrowd
|
|
15
13
|
# supports.
|
16
14
|
class Daemon
|
17
15
|
|
18
|
-
|
19
|
-
|
20
|
-
WAIT_MULTIPLIER
|
16
|
+
# The back-off factor used to slow down requests for new work units
|
17
|
+
# when the queue is empty.
|
18
|
+
WAIT_MULTIPLIER = 1.5
|
19
|
+
|
20
|
+
MIN_WAIT = CloudCrowd.config[:min_worker_wait]
|
21
|
+
MAX_WAIT = CloudCrowd.config[:max_worker_wait]
|
21
22
|
|
22
23
|
def initialize
|
23
|
-
@wait_time
|
24
|
-
@worker
|
25
|
-
Signal.trap('INT'
|
26
|
-
Signal.trap('KILL'
|
27
|
-
Signal.trap('TERM'
|
24
|
+
@wait_time = MIN_WAIT
|
25
|
+
@worker = Worker.new
|
26
|
+
Signal.trap('INT') { kill_worker_and_exit }
|
27
|
+
Signal.trap('KILL') { kill_worker_and_exit }
|
28
|
+
Signal.trap('TERM') { kill_worker_and_exit }
|
28
29
|
end
|
29
30
|
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
# timeout, or should be killable from the outside by terminating the thread.
|
34
|
-
# In either case, nasty un-cleaned-up bits might be left behind.
|
31
|
+
# Spin up our worker and monitoring threads. The monitor's the boss, and
|
32
|
+
# will feel no compunction in killing the worker thread if necessary.
|
33
|
+
# Check in before starting up. If check in fails, there's no sense in going.
|
35
34
|
def run
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
35
|
+
@worker.check_in('starting')
|
36
|
+
@work_thread = run_worker
|
37
|
+
@monitor_thread = run_monitor
|
38
|
+
@monitor_thread.join
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# Loop forever, fetching WorkUnits and processing them.
|
45
|
+
def run_worker
|
46
|
+
Thread.new do
|
47
|
+
loop do
|
48
|
+
@worker.fetch_work_unit
|
49
|
+
if @worker.has_work?
|
50
|
+
@wait_time = MIN_WAIT
|
51
|
+
while @worker.has_work?
|
52
|
+
@worker.run
|
53
|
+
sleep 0.01 # So as to listen for incoming signals.
|
54
|
+
end
|
55
|
+
else
|
56
|
+
@wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
|
57
|
+
sleep @wait_time
|
43
58
|
end
|
44
|
-
else
|
45
|
-
@wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
|
46
|
-
sleep @wait_time
|
47
59
|
end
|
48
60
|
end
|
49
61
|
end
|
50
62
|
|
63
|
+
# Checks in to let the central server know it's still alive every
|
64
|
+
# CHECK_IN_INTERVAL seconds. Restarts the work_thread if it has died.
|
65
|
+
def run_monitor
|
66
|
+
Thread.new do
|
67
|
+
sleep Worker::CHECK_IN_INTERVAL
|
68
|
+
loop do
|
69
|
+
@work_thread = run_monitor unless @work_thread.alive? || @exit_started
|
70
|
+
@worker.check_in(@work_thread.status)
|
71
|
+
sleep Worker::CHECK_IN_INTERVAL
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def running?
|
77
|
+
@work_thread.alive? || @monitor_thread.alive?
|
78
|
+
end
|
79
|
+
|
80
|
+
# At exit, kill the worker thread, gently at first, then forcefully.
|
81
|
+
def kill_worker_and_exit
|
82
|
+
@worker.check_out
|
83
|
+
@exit_started = Time.now
|
84
|
+
@work_thread.kill && @monitor_thread.kill
|
85
|
+
sleep 0.3 while running? && Time.now - @exit_started < WORKER_EXIT_WAIT
|
86
|
+
return Process.exit unless running?
|
87
|
+
@work_thread.kill! && @monitor_thread.kill!
|
88
|
+
Process.exit
|
89
|
+
end
|
90
|
+
|
51
91
|
end
|
52
92
|
|
53
93
|
end
|
@@ -9,6 +9,11 @@ module CloudCrowd
|
|
9
9
|
class ActionNotFound < Error #:nodoc:
|
10
10
|
end
|
11
11
|
|
12
|
+
# StorageNotFound is raised when config.yml specifies a storage back end that
|
13
|
+
# doesn't exist.
|
14
|
+
class StorageNotFound < Error #:nodoc:
|
15
|
+
end
|
16
|
+
|
12
17
|
# StatusUnspecified is raised when a WorkUnit returns without a valid
|
13
18
|
# status code.
|
14
19
|
class StatusUnspecified < Error #:nodoc:
|
@@ -24,8 +24,8 @@ module CloudCrowd
|
|
24
24
|
# with no content.
|
25
25
|
def dequeue_work_unit(offset=0)
|
26
26
|
handle_conflicts do
|
27
|
-
actions = params[:
|
28
|
-
WorkUnit.dequeue(actions, offset)
|
27
|
+
worker, actions = params[:worker_name], params[:worker_actions].split(',')
|
28
|
+
WorkUnit.dequeue(worker, actions, offset)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
@@ -22,7 +22,7 @@ module CloudCrowd
|
|
22
22
|
:inputs => h['inputs'].to_json,
|
23
23
|
:action => h['action'],
|
24
24
|
:options => (h['options'] || {}).to_json,
|
25
|
-
:
|
25
|
+
:email => h['email'],
|
26
26
|
:callback_url => h['callback_url']
|
27
27
|
)
|
28
28
|
end
|
@@ -97,11 +97,6 @@ module CloudCrowd
|
|
97
97
|
raise ActionNotFound, "no action named: '#{self.action}' could be found"
|
98
98
|
end
|
99
99
|
|
100
|
-
# Get the displayable status name of the Job's status code.
|
101
|
-
def display_status
|
102
|
-
CloudCrowd.display_status(self.status)
|
103
|
-
end
|
104
|
-
|
105
100
|
# How complete is this Job?
|
106
101
|
def percent_complete
|
107
102
|
return 0 if splitting?
|
@@ -125,14 +120,15 @@ module CloudCrowd
|
|
125
120
|
# WorkUnits, as well as any completed outputs.
|
126
121
|
def to_json(opts={})
|
127
122
|
atts = {
|
128
|
-
'id' =>
|
129
|
-
'color' =>
|
130
|
-
'status' =>
|
131
|
-
'percent_complete' =>
|
132
|
-
'work_units' =>
|
133
|
-
'time_taken' =>
|
123
|
+
'id' => id,
|
124
|
+
'color' => color,
|
125
|
+
'status' => display_status,
|
126
|
+
'percent_complete' => percent_complete,
|
127
|
+
'work_units' => work_units.count,
|
128
|
+
'time_taken' => time_taken
|
134
129
|
}
|
135
|
-
atts
|
130
|
+
atts['outputs'] = JSON.parse(outputs) if outputs
|
131
|
+
atts['email'] = email if email
|
136
132
|
atts.to_json
|
137
133
|
end
|
138
134
|
|
@@ -8,6 +8,7 @@ module CloudCrowd
|
|
8
8
|
include ModelStatus
|
9
9
|
|
10
10
|
belongs_to :job
|
11
|
+
belongs_to :worker_record
|
11
12
|
|
12
13
|
validates_presence_of :job_id, :status, :input, :action
|
13
14
|
|
@@ -17,13 +18,13 @@ module CloudCrowd
|
|
17
18
|
# +enabled_actions+ must be passed to whitelist the types of WorkUnits than
|
18
19
|
# can be retrieved for processing. Optionally, specify the +offset+ to peek
|
19
20
|
# further on in line.
|
20
|
-
def self.dequeue(enabled_actions=[], offset=0)
|
21
|
+
def self.dequeue(worker_name, enabled_actions=[], offset=0)
|
21
22
|
unit = self.first(
|
22
|
-
:conditions => {:status => INCOMPLETE, :
|
23
|
+
:conditions => {:status => INCOMPLETE, :worker_record_id => nil, :action => enabled_actions},
|
23
24
|
:order => "created_at asc",
|
24
25
|
:offset => offset
|
25
26
|
)
|
26
|
-
unit ? unit.
|
27
|
+
unit ? unit.assign_to(worker_name) : nil
|
27
28
|
end
|
28
29
|
|
29
30
|
# After saving a WorkUnit, its Job should check if it just became complete.
|
@@ -34,11 +35,11 @@ module CloudCrowd
|
|
34
35
|
# Mark this unit as having finished successfully.
|
35
36
|
def finish(output, time_taken)
|
36
37
|
update_attributes({
|
37
|
-
:status
|
38
|
-
:
|
39
|
-
:attempts
|
40
|
-
:output
|
41
|
-
:time
|
38
|
+
:status => SUCCEEDED,
|
39
|
+
:worker_record => nil,
|
40
|
+
:attempts => self.attempts + 1,
|
41
|
+
:output => output,
|
42
|
+
:time => time_taken
|
42
43
|
})
|
43
44
|
end
|
44
45
|
|
@@ -47,22 +48,29 @@ module CloudCrowd
|
|
47
48
|
tries = self.attempts + 1
|
48
49
|
return try_again if tries < CloudCrowd.config[:work_unit_retries]
|
49
50
|
update_attributes({
|
50
|
-
:status
|
51
|
-
:
|
52
|
-
:attempts
|
53
|
-
:output
|
54
|
-
:time
|
51
|
+
:status => FAILED,
|
52
|
+
:worker_record => nil,
|
53
|
+
:attempts => tries,
|
54
|
+
:output => output,
|
55
|
+
:time => time_taken
|
55
56
|
})
|
56
57
|
end
|
57
58
|
|
58
59
|
# Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
|
59
60
|
def try_again
|
60
61
|
update_attributes({
|
61
|
-
:
|
62
|
-
:attempts
|
62
|
+
:worker_record => nil,
|
63
|
+
:attempts => self.attempts + 1
|
63
64
|
})
|
64
65
|
end
|
65
66
|
|
67
|
+
# When a Worker checks out a WorkUnit, establish the connection between
|
68
|
+
# WorkUnit and WorkerRecord.
|
69
|
+
def assign_to(worker_name)
|
70
|
+
self.worker_record = WorkerRecord.find_by_name!(worker_name)
|
71
|
+
self.save ? self : nil
|
72
|
+
end
|
73
|
+
|
66
74
|
# The JSON representation of a WorkUnit shares the Job's options with all
|
67
75
|
# its sister WorkUnits.
|
68
76
|
def to_json
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# A WorkerRecord is a recording of an active worker daemon running remotely.
|
4
|
+
# Every time it checks in, we keep track of its status. The attributes shown
|
5
|
+
# here may lag their actual values by up to Worker::CHECK_IN_INTERVAL seconds.
|
6
|
+
class WorkerRecord < ActiveRecord::Base
|
7
|
+
|
8
|
+
EXPIRES_AFTER = 2 * Worker::CHECK_IN_INTERVAL
|
9
|
+
|
10
|
+
has_one :work_unit
|
11
|
+
|
12
|
+
validates_presence_of :name, :thread_status
|
13
|
+
|
14
|
+
before_destroy :clear_work_units
|
15
|
+
|
16
|
+
named_scope :alive, lambda { {:conditions => ['updated_at > ?', Time.now - EXPIRES_AFTER]} }
|
17
|
+
named_scope :dead, lambda { {:conditions => ['updated_at <= ?', Time.now - EXPIRES_AFTER]} }
|
18
|
+
|
19
|
+
# Save a Worker's current status to the database.
|
20
|
+
def self.check_in(params)
|
21
|
+
attrs = {:thread_status => params[:thread_status], :updated_at => Time.now}
|
22
|
+
self.find_or_create_by_name(params[:name]).update_attributes!(attrs)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Remove a terminated Worker's record from the database.
|
26
|
+
def self.check_out(params)
|
27
|
+
self.find_by_name(params[:name]).destroy
|
28
|
+
end
|
29
|
+
|
30
|
+
# We consider the worker to be alive if it's checked in more recently
|
31
|
+
# than twice the expected interval ago.
|
32
|
+
def alive?
|
33
|
+
updated_at > Time.now - EXPIRES_AFTER
|
34
|
+
end
|
35
|
+
|
36
|
+
# Derive the Worker's PID on the remote machine from the name.
|
37
|
+
def pid
|
38
|
+
@pid ||= self.name.split('@').first
|
39
|
+
end
|
40
|
+
|
41
|
+
# Derive the hostname from the Worker's name.
|
42
|
+
def hostname
|
43
|
+
@hostname ||= self.name.split('@').last
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_json(opts={})
|
47
|
+
{
|
48
|
+
'name' => name,
|
49
|
+
'status' => work_unit && work_unit.display_status,
|
50
|
+
}.to_json
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def clear_work_units
|
57
|
+
WorkUnit.update_all('worker_record_id = null', "worker_record_id = #{id}")
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
data/lib/cloud_crowd/models.rb
CHANGED
@@ -27,8 +27,14 @@ module CloudCrowd
|
|
27
27
|
def complete?; COMPLETE.include?(self.status); end
|
28
28
|
def incomplete?; INCOMPLETE.include?(self.status); end
|
29
29
|
|
30
|
+
# Get the displayable status name of the model's status code.
|
31
|
+
def display_status
|
32
|
+
CloudCrowd.display_status(self.status)
|
33
|
+
end
|
34
|
+
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
33
38
|
require 'cloud_crowd/models/job'
|
34
|
-
require 'cloud_crowd/models/work_unit'
|
39
|
+
require 'cloud_crowd/models/work_unit'
|
40
|
+
require 'cloud_crowd/models/worker_record'
|
data/lib/cloud_crowd/schema.rb
CHANGED
@@ -9,7 +9,7 @@ ActiveRecord::Schema.define(:version => 1) do
|
|
9
9
|
t.text "outputs"
|
10
10
|
t.float "time"
|
11
11
|
t.string "callback_url"
|
12
|
-
t.string "
|
12
|
+
t.string "email"
|
13
13
|
t.integer "lock_version", :default => 0, :null => false
|
14
14
|
t.datetime "created_at"
|
15
15
|
t.datetime "updated_at"
|
@@ -22,15 +22,24 @@ ActiveRecord::Schema.define(:version => 1) do
|
|
22
22
|
t.string "action", :null => false
|
23
23
|
t.integer "attempts", :default => 0, :null => false
|
24
24
|
t.integer "lock_version", :default => 0, :null => false
|
25
|
-
t.
|
25
|
+
t.integer "worker_record_id"
|
26
26
|
t.float "time"
|
27
27
|
t.text "output"
|
28
28
|
t.datetime "created_at"
|
29
29
|
t.datetime "updated_at"
|
30
30
|
end
|
31
|
+
|
32
|
+
create_table "worker_records", :force => true do |t|
|
33
|
+
t.string "name", :null => false
|
34
|
+
t.string "thread_status", :null => false
|
35
|
+
t.datetime "created_at"
|
36
|
+
t.datetime "updated_at"
|
37
|
+
end
|
31
38
|
|
32
39
|
add_index "jobs", ["status"], :name => "index_jobs_on_status"
|
33
40
|
add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
|
34
|
-
add_index "work_units", ["status", "
|
41
|
+
add_index "work_units", ["status", "worker_record_id", "action"], :name => "index_work_units_on_status_and_worker_record_id_and_action"
|
42
|
+
add_index "worker_records", ["name"], :name => "index_worker_records_on_name"
|
43
|
+
add_index "worker_records", ["updated_at"], :name => "index_worker_records_on_updated_at"
|
35
44
|
|
36
45
|
end
|