documentcloud-cloud-crowd 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/README +59 -50
  2. data/actions/process_pdfs.rb +3 -3
  3. data/actions/word_count.rb +14 -0
  4. data/cloud-crowd.gemspec +27 -13
  5. data/config/config.example.yml +8 -11
  6. data/examples/graphics_magick_example.rb +40 -44
  7. data/examples/process_pdfs_example.rb +39 -29
  8. data/examples/word_count_example.rb +41 -0
  9. data/lib/cloud-crowd.rb +20 -17
  10. data/lib/cloud_crowd/action.rb +26 -9
  11. data/lib/cloud_crowd/app.rb +26 -4
  12. data/lib/cloud_crowd/asset_store.rb +69 -40
  13. data/lib/cloud_crowd/command_line.rb +6 -4
  14. data/lib/cloud_crowd/daemon.rb +65 -25
  15. data/lib/cloud_crowd/exceptions.rb +5 -0
  16. data/lib/cloud_crowd/helpers/resources.rb +2 -2
  17. data/lib/cloud_crowd/models/job.rb +9 -13
  18. data/lib/cloud_crowd/models/work_unit.rb +23 -15
  19. data/lib/cloud_crowd/models/worker_record.rb +61 -0
  20. data/lib/cloud_crowd/models.rb +7 -1
  21. data/lib/cloud_crowd/schema.rb +12 -3
  22. data/lib/cloud_crowd/worker.rb +48 -10
  23. data/public/css/admin_console.css +174 -4
  24. data/public/css/reset.css +17 -27
  25. data/public/images/bullet_green.png +0 -0
  26. data/public/images/bullet_white.png +0 -0
  27. data/public/images/cloud_hand.png +0 -0
  28. data/public/images/header_back.png +0 -0
  29. data/public/images/logo.png +0 -0
  30. data/public/images/server_error.png +0 -0
  31. data/public/images/sidebar_bottom.png +0 -0
  32. data/public/images/sidebar_top.png +0 -0
  33. data/public/images/worker_info.png +0 -0
  34. data/public/images/worker_info_loading.gif +0 -0
  35. data/public/js/admin_console.js +127 -10
  36. data/public/js/excanvas.pack.js +1 -0
  37. data/public/js/jquery-1.3.2.min.js +19 -0
  38. data/public/js/jquery.flot.pack.js +1 -0
  39. data/test/acceptance/test_word_count.rb +49 -0
  40. data/test/blueprints.rb +6 -5
  41. data/test/config/config.yml +1 -4
  42. data/test/test_helper.rb +1 -0
  43. data/test/unit/test_job.rb +12 -4
  44. data/test/unit/test_work_unit.rb +2 -2
  45. data/views/index.erb +69 -14
  46. metadata +23 -6
  47. data/public/js/jquery-1.3.2.js +0 -4376
@@ -4,7 +4,7 @@ module CloudCrowd
4
4
  #
5
5
  # == Admin
6
6
  # [get /] Render the admin console, with a progress meter for running jobs.
7
- # [get /jobs] Get the combined JSON of every active job in the queue.
7
+ # [get /status] Get the combined JSON of every active job and worker.
8
8
  # [get /heartbeat] Returns 200 OK to let monitoring tools know the server's up.
9
9
  #
10
10
  # == Public API
@@ -15,6 +15,7 @@ module CloudCrowd
15
15
  # == Internal Workers API
16
16
  # [post /work] Dequeue the next WorkUnit, and hand it off to the worker.
17
17
  # [put /work/:unit_id] Mark a finished WorkUnit as completed or failed, with results.
18
+ # [put /worker] Keep a record of an actively running worker.
18
19
  class App < Sinatra::Default
19
20
 
20
21
  set :root, ROOT
@@ -35,9 +36,21 @@ module CloudCrowd
35
36
  erb :index
36
37
  end
37
38
 
38
- # Get the JSON for every active job in the queue.
39
- get '/jobs' do
40
- json Job.incomplete
39
+ # Get the JSON for every active job in the queue and every active worker
40
+ # in the system. This action may get a little worrisome as the system grows
41
+ # larger -- keep it in mind.
42
+ get '/status' do
43
+ json(
44
+ 'jobs' => Job.incomplete,
45
+ 'workers' => WorkerRecord.alive(:order => 'name desc'),
46
+ 'work_unit_count' => WorkUnit.incomplete.count
47
+ )
48
+ end
49
+
50
+ # Get the JSON for a worker record's work unit, if one exists.
51
+ get '/worker/:name' do
52
+ record = WorkerRecord.find_by_name params[:name]
53
+ json((record && record.work_unit) || {})
41
54
  end
42
55
 
43
56
  # To monitor the central server with Monit, God, Nagios, or another
@@ -66,6 +79,8 @@ module CloudCrowd
66
79
  json nil
67
80
  end
68
81
 
82
+ # INTERNAL WORKER DAEMON API:
83
+
69
84
  # Internal method for worker daemons to fetch the work unit at the front
70
85
  # of the queue. Work unit is marked as taken and handed off to the worker.
71
86
  post '/work' do
@@ -90,6 +105,13 @@ module CloudCrowd
90
105
  end
91
106
  end
92
107
 
108
+ # Every so often workers check in to let the central server know that
109
+ # they're still alive. Keep up-to-date records
110
+ put '/worker' do
111
+ params[:terminated] ? WorkerRecord.check_out(params) : WorkerRecord.check_in(params)
112
+ json nil
113
+ end
114
+
93
115
  end
94
116
 
95
117
  end
@@ -9,14 +9,20 @@ module CloudCrowd
9
9
  # You shouldn't need to use the AssetStore directly -- Action's +download+
10
10
  # and +save+ methods use it behind the scenes.
11
11
  class AssetStore
12
- include FileUtils
13
12
 
14
- # Creating an AssetStore will determine wether to save private or public
15
- # files on S3, depending on the value of <tt>use_s3_authentication</tt> in
16
- # <tt>config.yml</tt>.
13
+ LOCAL_STORAGE_PATH = '/tmp/cloud_crowd_storage'
14
+
15
+ # Creating an AssetStore mixes in the specific storage implementation
16
+ # specified by 'storage' in <tt>config.yml</tt>.
17
17
  def initialize
18
18
  @use_auth = CloudCrowd.config[:use_s3_authentication]
19
- mkdir_p temp_storage_path unless File.exists? temp_storage_path
19
+ @storage = CloudCrowd.config[:storage]
20
+ FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
21
+ case @storage
22
+ when 's3' then extend S3Store
23
+ when 'filesystem' then extend FilesystemStore
24
+ else raise StorageNotFound, "#{@storage} is not a valid storage back end"
25
+ end
20
26
  end
21
27
 
22
28
  # Get the path to CloudCrowd's temporary local storage. All actions run
@@ -25,45 +31,68 @@ module CloudCrowd
25
31
  "#{Dir.tmpdir}/cloud_crowd_tmp"
26
32
  end
27
33
 
28
- # Copy a finished file from our local storage to S3. Save it publicly unless
29
- # we're configured to use S3 authentication.
30
- def save(local_path, save_path)
31
- ensure_s3_connection
32
- permission = @use_auth ? 'private' : 'public-read'
33
- @bucket.put(save_path, File.open(local_path), {}, permission)
34
- end
35
34
 
36
- # Cleanup all S3 files for a job that's been completed and retrieved.
37
- def cleanup_job(job)
38
- ensure_s3_connection
39
- @bucket.delete_folder("#{job.action}/job_#{job.id}")
40
- end
41
-
42
- # Return the S3 public URL for a finshed file. Authenticated links expire
43
- # after one day by default.
44
- def url(save_path)
45
- @use_auth ? @s3.interface.get_link(@bucket, save_path) :
46
- @bucket.key(save_path).public_link
35
+ # The S3Store is an implementation of an AssetStore that uses a bucket
36
+ # on S3 for all resulting files.
37
+ module S3Store
38
+
39
+ # Save a finished file from local storage to S3. Save it publicly unless
40
+ # we're configured to use S3 authentication.
41
+ def save(local_path, save_path)
42
+ ensure_s3_connection
43
+ permission = @use_auth ? 'private' : 'public-read'
44
+ @bucket.put(save_path, File.open(local_path), {}, permission)
45
+ end
46
+
47
+ # Return the S3 public URL for a finshed file. Authenticated links expire
48
+ # after one day by default.
49
+ def url(save_path)
50
+ @use_auth ? @s3.interface.get_link(@bucket, save_path) :
51
+ @bucket.key(save_path).public_link
52
+ end
53
+
54
+ # Remove all of a Job's resulting files from S3, both intermediate and finished.
55
+ def cleanup_job(job)
56
+ ensure_s3_connection
57
+ @bucket.delete_folder("#{job.action}/job_#{job.id}")
58
+ end
59
+
60
+ # Workers, through the course of many WorkUnits, keep around an AssetStore.
61
+ # Ensure we have a persistent S3 connection after first use.
62
+ def ensure_s3_connection
63
+ unless @s3 && @bucket
64
+ params = {:port => 80, :protocol => 'http'}
65
+ @s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
66
+ @bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
67
+ end
68
+ end
47
69
  end
48
70
 
49
- private
50
-
51
- # Unused for the moment. Think about using the filesystem instead of S3
52
- # in development.
53
- def save_to_filesystem(local_path, save_path)
54
- save_path = File.join("/tmp/cloud_crowd_storage", save_path)
55
- save_dir = File.dirname(save_path)
56
- mkdir_p save_dir unless File.exists? save_dir
57
- cp(local_path, save_path)
58
- end
59
71
 
60
- # Workers, through the course of many WorkUnits, keep around an AssetStore.
61
- # Ensure we have a persistent S3 connection after first use.
62
- def ensure_s3_connection
63
- unless @s3 && @bucket
64
- params = {:port => 80, :protocol => 'http'}
65
- @s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
66
- @bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
72
+ # The FilesystemStore is an implementation of the AssetStore, good only for
73
+ # use in development, testing, or if you're only running a single-machine
74
+ # installation.
75
+ module FilesystemStore
76
+
77
+ # Save a file to somewhere semi-persistent on the filesystem. Can be used
78
+ # in development, when offline, or if you happen to have a single-machine
79
+ # CloudCrowd installation. To use, configure :local_storage.
80
+ def save(local_path, save_path)
81
+ save_path = File.join(LOCAL_STORAGE_PATH, save_path)
82
+ save_dir = File.dirname(save_path)
83
+ FileUtils.mkdir_p save_dir unless File.exists? save_dir
84
+ FileUtils.cp(local_path, save_path)
85
+ end
86
+
87
+ # Return the URL for a file saved to the local filesystem.
88
+ def url(save_path)
89
+ "file://#{File.expand_path(File.join(LOCAL_STORAGE_PATH, save_path))}"
90
+ end
91
+
92
+ # Remove all of a Job's result files from the filesystem.
93
+ def cleanup_job(job)
94
+ path = "#{LOCAL_STORAGE_PATH}/#{job.action}/job_#{job.id}"
95
+ FileUtils.rm_r(path) if File.exists?(path)
67
96
  end
68
97
  end
69
98
 
@@ -14,18 +14,20 @@ module CloudCrowd
14
14
 
15
15
  # Command-line banner for the usage message.
16
16
  BANNER = <<-EOS
17
- CloudCrowd is a Ruby & AWS batch processing system, MapReduce style.
17
+ CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
18
+
19
+ Wiki: http://wiki.github.com/documentcloud/cloud-crowd
18
20
 
19
21
  Usage: crowd COMMAND OPTIONS
20
22
 
21
- COMMANDS:
23
+ Commands:
22
24
  install Install the CloudCrowd configuration files to the specified directory
23
25
  server Start up the central server (requires a database)
24
26
  workers Control worker daemons, use: (start | stop | restart | status | run)
25
27
  console Launch a CloudCrowd console, connected to the central database
26
28
  load_schema Load the schema into the database specified by database.yml
27
29
 
28
- OPTIONS:
30
+ Options:
29
31
  EOS
30
32
 
31
33
  # Creating a CloudCrowd::CommandLine runs from the contents of ARGV.
@@ -161,7 +163,7 @@ OPTIONS:
161
163
  opts.on('-p', '--port PORT', 'central server port number') do |port_num|
162
164
  @options[:port] = port_num
163
165
  end
164
- opts.on('-e', '--environment ENV', 'Sinatra environment (code reloading)') do |env|
166
+ opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
165
167
  @options[:environment] = env
166
168
  end
167
169
  opts.on_tail('-v', '--version', 'show version') do
@@ -1,7 +1,5 @@
1
1
  CloudCrowd.configure(ENV['CLOUD_CROWD_CONFIG'])
2
2
 
3
- require 'cloud_crowd/worker'
4
-
5
3
  module CloudCrowd
6
4
 
7
5
  # A CloudCrowd::Daemon, started by the Daemons gem, runs a CloudCrowd::Worker in
@@ -15,39 +13,81 @@ module CloudCrowd
15
13
  # supports.
16
14
  class Daemon
17
15
 
18
- MIN_WAIT = CloudCrowd.config[:min_worker_wait]
19
- MAX_WAIT = CloudCrowd.config[:max_worker_wait]
20
- WAIT_MULTIPLIER = CloudCrowd.config[:worker_wait_multiplier]
16
+ # The back-off factor used to slow down requests for new work units
17
+ # when the queue is empty.
18
+ WAIT_MULTIPLIER = 1.5
19
+
20
+ MIN_WAIT = CloudCrowd.config[:min_worker_wait]
21
+ MAX_WAIT = CloudCrowd.config[:max_worker_wait]
21
22
 
22
23
  def initialize
23
- @wait_time = MIN_WAIT
24
- @worker = Worker.new
25
- Signal.trap('INT', 'EXIT')
26
- Signal.trap('KILL', 'EXIT')
27
- Signal.trap('TERM', 'EXIT')
24
+ @wait_time = MIN_WAIT
25
+ @worker = Worker.new
26
+ Signal.trap('INT') { kill_worker_and_exit }
27
+ Signal.trap('KILL') { kill_worker_and_exit }
28
+ Signal.trap('TERM') { kill_worker_and_exit }
28
29
  end
29
30
 
30
- # Loop forever, fetching WorkUnits.
31
- # TODO: Workers busy with their work units won't die until the unit has
32
- # been finished. This should probably be wrapped in an appropriately lengthy
33
- # timeout, or should be killable from the outside by terminating the thread.
34
- # In either case, nasty un-cleaned-up bits might be left behind.
31
+ # Spin up our worker and monitoring threads. The monitor's the boss, and
32
+ # will feel no compunction in killing the worker thread if necessary.
33
+ # Check in before starting up. If check in fails, there's no sense in going.
35
34
  def run
36
- loop do
37
- @worker.fetch_work_unit
38
- if @worker.has_work?
39
- @wait_time = MIN_WAIT
40
- while @worker.has_work?
41
- @worker.run
42
- sleep 0.01 # So as to listen for incoming signals.
35
+ @worker.check_in('starting')
36
+ @work_thread = run_worker
37
+ @monitor_thread = run_monitor
38
+ @monitor_thread.join
39
+ end
40
+
41
+
42
+ private
43
+
44
+ # Loop forever, fetching WorkUnits and processing them.
45
+ def run_worker
46
+ Thread.new do
47
+ loop do
48
+ @worker.fetch_work_unit
49
+ if @worker.has_work?
50
+ @wait_time = MIN_WAIT
51
+ while @worker.has_work?
52
+ @worker.run
53
+ sleep 0.01 # So as to listen for incoming signals.
54
+ end
55
+ else
56
+ @wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
57
+ sleep @wait_time
43
58
  end
44
- else
45
- @wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
46
- sleep @wait_time
47
59
  end
48
60
  end
49
61
  end
50
62
 
63
+ # Checks in to let the central server know it's still alive every
64
+ # CHECK_IN_INTERVAL seconds. Restarts the work_thread if it has died.
65
+ def run_monitor
66
+ Thread.new do
67
+ sleep Worker::CHECK_IN_INTERVAL
68
+ loop do
69
+ @work_thread = run_monitor unless @work_thread.alive? || @exit_started
70
+ @worker.check_in(@work_thread.status)
71
+ sleep Worker::CHECK_IN_INTERVAL
72
+ end
73
+ end
74
+ end
75
+
76
+ def running?
77
+ @work_thread.alive? || @monitor_thread.alive?
78
+ end
79
+
80
+ # At exit, kill the worker thread, gently at first, then forcefully.
81
+ def kill_worker_and_exit
82
+ @worker.check_out
83
+ @exit_started = Time.now
84
+ @work_thread.kill && @monitor_thread.kill
85
+ sleep 0.3 while running? && Time.now - @exit_started < WORKER_EXIT_WAIT
86
+ return Process.exit unless running?
87
+ @work_thread.kill! && @monitor_thread.kill!
88
+ Process.exit
89
+ end
90
+
51
91
  end
52
92
 
53
93
  end
@@ -9,6 +9,11 @@ module CloudCrowd
9
9
  class ActionNotFound < Error #:nodoc:
10
10
  end
11
11
 
12
+ # StorageNotFound is raised when config.yml specifies a storage back end that
13
+ # doesn't exist.
14
+ class StorageNotFound < Error #:nodoc:
15
+ end
16
+
12
17
  # StatusUnspecified is raised when a WorkUnit returns without a valid
13
18
  # status code.
14
19
  class StatusUnspecified < Error #:nodoc:
@@ -24,8 +24,8 @@ module CloudCrowd
24
24
  # with no content.
25
25
  def dequeue_work_unit(offset=0)
26
26
  handle_conflicts do
27
- actions = params[:enabled_actions].split(',')
28
- WorkUnit.dequeue(actions, offset)
27
+ worker, actions = params[:worker_name], params[:worker_actions].split(',')
28
+ WorkUnit.dequeue(worker, actions, offset)
29
29
  end
30
30
  end
31
31
 
@@ -22,7 +22,7 @@ module CloudCrowd
22
22
  :inputs => h['inputs'].to_json,
23
23
  :action => h['action'],
24
24
  :options => (h['options'] || {}).to_json,
25
- :owner_email => h['owner_email'],
25
+ :email => h['email'],
26
26
  :callback_url => h['callback_url']
27
27
  )
28
28
  end
@@ -97,11 +97,6 @@ module CloudCrowd
97
97
  raise ActionNotFound, "no action named: '#{self.action}' could be found"
98
98
  end
99
99
 
100
- # Get the displayable status name of the Job's status code.
101
- def display_status
102
- CloudCrowd.display_status(self.status)
103
- end
104
-
105
100
  # How complete is this Job?
106
101
  def percent_complete
107
102
  return 0 if splitting?
@@ -125,14 +120,15 @@ module CloudCrowd
125
120
  # WorkUnits, as well as any completed outputs.
126
121
  def to_json(opts={})
127
122
  atts = {
128
- 'id' => self.id,
129
- 'color' => self.color,
130
- 'status' => self.display_status,
131
- 'percent_complete' => self.percent_complete,
132
- 'work_units' => self.work_units.count,
133
- 'time_taken' => self.time_taken
123
+ 'id' => id,
124
+ 'color' => color,
125
+ 'status' => display_status,
126
+ 'percent_complete' => percent_complete,
127
+ 'work_units' => work_units.count,
128
+ 'time_taken' => time_taken
134
129
  }
135
- atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
130
+ atts['outputs'] = JSON.parse(outputs) if outputs
131
+ atts['email'] = email if email
136
132
  atts.to_json
137
133
  end
138
134
 
@@ -8,6 +8,7 @@ module CloudCrowd
8
8
  include ModelStatus
9
9
 
10
10
  belongs_to :job
11
+ belongs_to :worker_record
11
12
 
12
13
  validates_presence_of :job_id, :status, :input, :action
13
14
 
@@ -17,13 +18,13 @@ module CloudCrowd
17
18
  # +enabled_actions+ must be passed to whitelist the types of WorkUnits than
18
19
  # can be retrieved for processing. Optionally, specify the +offset+ to peek
19
20
  # further on in line.
20
- def self.dequeue(enabled_actions=[], offset=0)
21
+ def self.dequeue(worker_name, enabled_actions=[], offset=0)
21
22
  unit = self.first(
22
- :conditions => {:status => INCOMPLETE, :taken => false, :action => enabled_actions},
23
+ :conditions => {:status => INCOMPLETE, :worker_record_id => nil, :action => enabled_actions},
23
24
  :order => "created_at asc",
24
25
  :offset => offset
25
26
  )
26
- unit ? unit.update_attributes(:taken => true) && unit : nil
27
+ unit ? unit.assign_to(worker_name) : nil
27
28
  end
28
29
 
29
30
  # After saving a WorkUnit, its Job should check if it just became complete.
@@ -34,11 +35,11 @@ module CloudCrowd
34
35
  # Mark this unit as having finished successfully.
35
36
  def finish(output, time_taken)
36
37
  update_attributes({
37
- :status => SUCCEEDED,
38
- :taken => false,
39
- :attempts => self.attempts + 1,
40
- :output => output,
41
- :time => time_taken
38
+ :status => SUCCEEDED,
39
+ :worker_record => nil,
40
+ :attempts => self.attempts + 1,
41
+ :output => output,
42
+ :time => time_taken
42
43
  })
43
44
  end
44
45
 
@@ -47,22 +48,29 @@ module CloudCrowd
47
48
  tries = self.attempts + 1
48
49
  return try_again if tries < CloudCrowd.config[:work_unit_retries]
49
50
  update_attributes({
50
- :status => FAILED,
51
- :taken => false,
52
- :attempts => tries,
53
- :output => output,
54
- :time => time_taken
51
+ :status => FAILED,
52
+ :worker_record => nil,
53
+ :attempts => tries,
54
+ :output => output,
55
+ :time => time_taken
55
56
  })
56
57
  end
57
58
 
58
59
  # Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
59
60
  def try_again
60
61
  update_attributes({
61
- :taken => false,
62
- :attempts => self.attempts + 1
62
+ :worker_record => nil,
63
+ :attempts => self.attempts + 1
63
64
  })
64
65
  end
65
66
 
67
+ # When a Worker checks out a WorkUnit, establish the connection between
68
+ # WorkUnit and WorkerRecord.
69
+ def assign_to(worker_name)
70
+ self.worker_record = WorkerRecord.find_by_name!(worker_name)
71
+ self.save ? self : nil
72
+ end
73
+
66
74
  # The JSON representation of a WorkUnit shares the Job's options with all
67
75
  # its sister WorkUnits.
68
76
  def to_json
@@ -0,0 +1,61 @@
1
+ module CloudCrowd
2
+
3
+ # A WorkerRecord is a recording of an active worker daemon running remotely.
4
+ # Every time it checks in, we keep track of its status. The attributes shown
5
+ # here may lag their actual values by up to Worker::CHECK_IN_INTERVAL seconds.
6
+ class WorkerRecord < ActiveRecord::Base
7
+
8
+ EXPIRES_AFTER = 2 * Worker::CHECK_IN_INTERVAL
9
+
10
+ has_one :work_unit
11
+
12
+ validates_presence_of :name, :thread_status
13
+
14
+ before_destroy :clear_work_units
15
+
16
+ named_scope :alive, lambda { {:conditions => ['updated_at > ?', Time.now - EXPIRES_AFTER]} }
17
+ named_scope :dead, lambda { {:conditions => ['updated_at <= ?', Time.now - EXPIRES_AFTER]} }
18
+
19
+ # Save a Worker's current status to the database.
20
+ def self.check_in(params)
21
+ attrs = {:thread_status => params[:thread_status], :updated_at => Time.now}
22
+ self.find_or_create_by_name(params[:name]).update_attributes!(attrs)
23
+ end
24
+
25
+ # Remove a terminated Worker's record from the database.
26
+ def self.check_out(params)
27
+ self.find_by_name(params[:name]).destroy
28
+ end
29
+
30
+ # We consider the worker to be alive if it's checked in more recently
31
+ # than twice the expected interval ago.
32
+ def alive?
33
+ updated_at > Time.now - EXPIRES_AFTER
34
+ end
35
+
36
+ # Derive the Worker's PID on the remote machine from the name.
37
+ def pid
38
+ @pid ||= self.name.split('@').first
39
+ end
40
+
41
+ # Derive the hostname from the Worker's name.
42
+ def hostname
43
+ @hostname ||= self.name.split('@').last
44
+ end
45
+
46
+ def to_json(opts={})
47
+ {
48
+ 'name' => name,
49
+ 'status' => work_unit && work_unit.display_status,
50
+ }.to_json
51
+ end
52
+
53
+
54
+ private
55
+
56
+ def clear_work_units
57
+ WorkUnit.update_all('worker_record_id = null', "worker_record_id = #{id}")
58
+ end
59
+
60
+ end
61
+ end
@@ -27,8 +27,14 @@ module CloudCrowd
27
27
  def complete?; COMPLETE.include?(self.status); end
28
28
  def incomplete?; INCOMPLETE.include?(self.status); end
29
29
 
30
+ # Get the displayable status name of the model's status code.
31
+ def display_status
32
+ CloudCrowd.display_status(self.status)
33
+ end
34
+
30
35
  end
31
36
  end
32
37
 
33
38
  require 'cloud_crowd/models/job'
34
- require 'cloud_crowd/models/work_unit'
39
+ require 'cloud_crowd/models/work_unit'
40
+ require 'cloud_crowd/models/worker_record'
@@ -9,7 +9,7 @@ ActiveRecord::Schema.define(:version => 1) do
9
9
  t.text "outputs"
10
10
  t.float "time"
11
11
  t.string "callback_url"
12
- t.string "owner_email"
12
+ t.string "email"
13
13
  t.integer "lock_version", :default => 0, :null => false
14
14
  t.datetime "created_at"
15
15
  t.datetime "updated_at"
@@ -22,15 +22,24 @@ ActiveRecord::Schema.define(:version => 1) do
22
22
  t.string "action", :null => false
23
23
  t.integer "attempts", :default => 0, :null => false
24
24
  t.integer "lock_version", :default => 0, :null => false
25
- t.boolean "taken", :default => false, :null => false
25
+ t.integer "worker_record_id"
26
26
  t.float "time"
27
27
  t.text "output"
28
28
  t.datetime "created_at"
29
29
  t.datetime "updated_at"
30
30
  end
31
+
32
+ create_table "worker_records", :force => true do |t|
33
+ t.string "name", :null => false
34
+ t.string "thread_status", :null => false
35
+ t.datetime "created_at"
36
+ t.datetime "updated_at"
37
+ end
31
38
 
32
39
  add_index "jobs", ["status"], :name => "index_jobs_on_status"
33
40
  add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
34
- add_index "work_units", ["status", "taken", "action"], :name => "index_work_units_on_status_and_taken_and_action"
41
+ add_index "work_units", ["status", "worker_record_id", "action"], :name => "index_work_units_on_status_and_worker_record_id_and_action"
42
+ add_index "worker_records", ["name"], :name => "index_worker_records_on_name"
43
+ add_index "worker_records", ["updated_at"], :name => "index_worker_records_on_updated_at"
35
44
 
36
45
  end