documentcloud-cloud-crowd 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/README +8 -8
  2. data/cloud-crowd.gemspec +8 -8
  3. data/config/config.example.ru +8 -2
  4. data/config/config.example.yml +6 -15
  5. data/examples/process_pdfs_example.rb +1 -1
  6. data/examples/word_count_example.rb +1 -0
  7. data/lib/cloud-crowd.rb +6 -5
  8. data/lib/cloud_crowd/action.rb +11 -7
  9. data/lib/cloud_crowd/asset_store/filesystem_store.rb +5 -0
  10. data/lib/cloud_crowd/asset_store/s3_store.rb +7 -3
  11. data/lib/cloud_crowd/asset_store.rb +1 -1
  12. data/lib/cloud_crowd/command_line.rb +14 -53
  13. data/lib/cloud_crowd/exceptions.rb +4 -0
  14. data/lib/cloud_crowd/helpers/authorization.rb +2 -2
  15. data/lib/cloud_crowd/helpers/resources.rb +0 -20
  16. data/lib/cloud_crowd/models/job.rb +25 -26
  17. data/lib/cloud_crowd/models/node_record.rb +81 -0
  18. data/lib/cloud_crowd/models/work_unit.rb +70 -30
  19. data/lib/cloud_crowd/models.rb +1 -1
  20. data/lib/cloud_crowd/node.rb +87 -0
  21. data/lib/cloud_crowd/schema.rb +19 -16
  22. data/lib/cloud_crowd/{app.rb → server.rb} +25 -30
  23. data/lib/cloud_crowd/worker.rb +50 -74
  24. data/public/css/admin_console.css +26 -14
  25. data/public/images/server.png +0 -0
  26. data/public/js/admin_console.js +45 -18
  27. data/test/acceptance/test_failing_work_units.rb +1 -1
  28. data/test/acceptance/{test_app.rb → test_server.rb} +15 -15
  29. data/test/acceptance/test_word_count.rb +3 -9
  30. data/test/blueprints.rb +0 -1
  31. data/test/config/config.ru +1 -1
  32. data/test/config/config.yml +1 -3
  33. data/test/unit/test_configuration.rb +1 -1
  34. data/test/unit/test_job.rb +1 -0
  35. data/test/unit/test_work_unit.rb +2 -4
  36. data/views/index.erb +13 -8
  37. metadata +9 -9
  38. data/lib/cloud_crowd/daemon.rb +0 -95
  39. data/lib/cloud_crowd/models/worker_record.rb +0 -61
  40. data/lib/cloud_crowd/runner.rb +0 -15
@@ -0,0 +1,81 @@
1
+ module CloudCrowd
2
+
3
+ # A NodeRecord is the record of a Node running remotely. We can use it to
4
+ # assign work units to the node, and keep track of its status.
5
+ class NodeRecord < ActiveRecord::Base
6
+
7
+ has_many :work_units
8
+
9
+ validates_presence_of :host, :ip_address, :port
10
+
11
+ before_destroy :clear_work_units
12
+
13
+ # Available Nodes haven't used up their maxiumum number of workers yet.
14
+ named_scope :available, {
15
+ :conditions => ['(max_workers is null or (select count(*) from work_units where node_record_id = node_records.id) < max_workers)'],
16
+ :order => 'updated_at asc'
17
+ }
18
+
19
+ # Save a Node's current status to the database.
20
+ def self.check_in(params, request)
21
+ attrs = {
22
+ :ip_address => request.ip,
23
+ :port => params[:port],
24
+ :max_workers => params[:max_workers],
25
+ :enabled_actions => params[:enabled_actions],
26
+ :updated_at => Time.now
27
+ }
28
+ self.find_or_create_by_host(params[:host]).update_attributes!(attrs)
29
+ end
30
+
31
+ def send_work_unit(unit)
32
+ result = node['/work'].post(:work_unit => unit.to_json)
33
+ unit.assign_to(self, JSON.parse(result)['pid'])
34
+ touch
35
+ rescue Errno::ECONNREFUSED
36
+ self.destroy # Couldn't post to node, assume it's gone away.
37
+ end
38
+
39
+ def actions
40
+ enabled_actions.split(',')
41
+ end
42
+
43
+ def busy?
44
+ max_workers && work_units.count >= max_workers
45
+ end
46
+
47
+ def url
48
+ @url ||= "http://#{host}:#{port}"
49
+ end
50
+
51
+ def node
52
+ return @node if @node
53
+ params = [url]
54
+ params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
55
+ @node = RestClient::Resource.new(*params)
56
+ end
57
+
58
+ def display_status
59
+ busy? ? 'busy' : 'available'
60
+ end
61
+
62
+ def worker_pids
63
+ work_units.all(:select => 'worker_pid').map(&:worker_pid)
64
+ end
65
+
66
+ def to_json(opts={})
67
+ { 'host' => host,
68
+ 'workers' => worker_pids,
69
+ 'status' => display_status,
70
+ }.to_json
71
+ end
72
+
73
+
74
+ private
75
+
76
+ def clear_work_units
77
+ WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
78
+ end
79
+
80
+ end
81
+ end
@@ -8,39 +8,77 @@ module CloudCrowd
8
8
  include ModelStatus
9
9
 
10
10
  belongs_to :job
11
- belongs_to :worker_record
11
+ belongs_to :node_record
12
12
 
13
13
  validates_presence_of :job_id, :status, :input, :action
14
+
15
+ named_scope :taken, {:conditions => ["worker_pid is not null"]}
16
+ named_scope :available, {:conditions => {:worker_pid => nil, :status => INCOMPLETE}}
17
+ named_scope :reserved, {:conditions => {:worker_pid => 0}}
14
18
 
15
- after_save :check_for_job_completion
19
+ # Attempt to send a list of work_units to nodes with available capacity.
20
+ # Do this in a separate thread so that the request can return, satisfied.
21
+ # A single application server process stops the same WorkUnit from being
22
+ # distributed to multiple nodes by reserving all the available ones.
23
+ def self.distribute_to_nodes
24
+ return unless WorkUnit.reserve_available
25
+ work_units = WorkUnit.reserved
26
+ available_nodes = NodeRecord.available
27
+ until work_units.empty? do
28
+ node = available_nodes.shift
29
+ unit = work_units.first
30
+ break unless node
31
+ next unless node.actions.include? unit.action
32
+ sent = node.send_work_unit(unit)
33
+ if sent
34
+ work_units.shift
35
+ available_nodes.push(node) unless node.busy?
36
+ end
37
+ end
38
+ WorkUnit.cancel_reservations
39
+ end
40
+
41
+ # Reserves all available WorkUnits. Returns false if there were none
42
+ # available.
43
+ def self.reserve_available
44
+ WorkUnit.available.update_all('worker_pid = 0') > 0
45
+ end
16
46
 
17
- # Find the first available WorkUnit in the queue, and take it out.
18
- # +enabled_actions+ must be passed to whitelist the types of WorkUnits than
19
- # can be retrieved for processing. Optionally, specify the +offset+ to peek
20
- # further on in line.
21
- def self.dequeue(worker_name, enabled_actions=[], offset=0)
22
- unit = self.first(
23
- :conditions => {:status => INCOMPLETE, :worker_record_id => nil, :action => enabled_actions},
24
- :order => "created_at asc",
25
- :offset => offset
26
- )
27
- unit ? unit.assign_to(worker_name) : nil
47
+ def self.cancel_reservations
48
+ WorkUnit.reserved.update_all('worker_pid = null')
28
49
  end
29
50
 
30
- # After saving a WorkUnit, its Job should check if it just became complete.
31
- def check_for_job_completion
32
- self.job.check_for_completion if complete?
51
+ def self.find_by_worker_name(name)
52
+ pid, host = name.split('@')
53
+ node = NodeRecord.find_by_host(host)
54
+ node && node.work_units.find_by_worker_pid(pid)
33
55
  end
34
56
 
35
57
  # Mark this unit as having finished successfully.
58
+ # TODO: Refactor alongside check_for_completion ... look into doubleparse.
36
59
  def finish(output, time_taken)
37
- update_attributes({
38
- :status => SUCCEEDED,
39
- :worker_record => nil,
40
- :attempts => self.attempts + 1,
41
- :output => output,
42
- :time => time_taken
43
- })
60
+ if splitting?
61
+ [JSON.parse(JSON.parse(output)['output'])].flatten.each do |wu_input|
62
+ WorkUnit.create(
63
+ :job => job,
64
+ :action => action,
65
+ :input => wu_input,
66
+ :status => PROCESSING
67
+ )
68
+ end
69
+ self.destroy
70
+ job.set_next_status if job.work_units.splitting.count <= 0
71
+ else
72
+ update_attributes({
73
+ :status => SUCCEEDED,
74
+ :node_record => nil,
75
+ :worker_pid => nil,
76
+ :attempts => attempts + 1,
77
+ :output => output,
78
+ :time => time_taken
79
+ })
80
+ job.check_for_completion
81
+ end
44
82
  end
45
83
 
46
84
  # Mark this unit as having failed. May attempt a retry.
@@ -49,26 +87,28 @@ module CloudCrowd
49
87
  return try_again if tries < CloudCrowd.config[:work_unit_retries]
50
88
  update_attributes({
51
89
  :status => FAILED,
52
- :worker_record => nil,
90
+ :node_record => nil,
91
+ :worker_pid => nil,
53
92
  :attempts => tries,
54
93
  :output => output,
55
94
  :time => time_taken
56
95
  })
96
+ self.job.check_for_completion
57
97
  end
58
98
 
59
99
  # Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
60
100
  def try_again
61
101
  update_attributes({
62
- :worker_record => nil,
63
- :attempts => self.attempts + 1
102
+ :node_record => nil,
103
+ :worker_pid => nil,
104
+ :attempts => self.attempts + 1
64
105
  })
65
106
  end
66
107
 
67
108
  # When a Worker checks out a WorkUnit, establish the connection between
68
- # WorkUnit and WorkerRecord.
69
- def assign_to(worker_name)
70
- self.worker_record = WorkerRecord.find_by_name!(worker_name)
71
- self.save ? self : nil
109
+ # WorkUnit and NodeRecord.
110
+ def assign_to(node_record, worker_pid)
111
+ update_attributes!(:node_record => node_record, :worker_pid => worker_pid)
72
112
  end
73
113
 
74
114
  # The JSON representation of a WorkUnit shares the Job's options with all
@@ -36,5 +36,5 @@ module CloudCrowd
36
36
  end
37
37
 
38
38
  require 'cloud_crowd/models/job'
39
+ require 'cloud_crowd/models/node_record'
39
40
  require 'cloud_crowd/models/work_unit'
40
- require 'cloud_crowd/models/worker_record'
@@ -0,0 +1,87 @@
1
+ module CloudCrowd
2
+
3
+ class Node < Sinatra::Default
4
+
5
+ # A Node's default port. You only run a single node per machine, so they
6
+ # can all use the same port without problems.
7
+ DEFAULT_PORT = 9063
8
+
9
+ attr_reader :server, :asset_store
10
+
11
+ set :root, ROOT
12
+ set :authorization_realm, "CloudCrowd"
13
+
14
+ helpers Helpers
15
+
16
+ # methodoverride allows the _method param.
17
+ enable :methodoverride
18
+
19
+ # Enabling HTTP Authentication turns it on for all requests.
20
+ before do
21
+ login_required if CloudCrowd.config[:use_http_authentication]
22
+ end
23
+
24
+ # To monitor a Node with Monit, God, Nagios, or another tool, you can hit
25
+ # /heartbeat to make sure its still up.
26
+ get '/heartbeat' do
27
+ "buh-bump"
28
+ end
29
+
30
+ post '/work' do
31
+ pid = fork { Worker.new(self, JSON.parse(params[:work_unit])) }
32
+ Process.detach(pid)
33
+ json :pid => pid
34
+ end
35
+
36
+ def initialize(port=DEFAULT_PORT)
37
+ require 'json'
38
+ @server = CloudCrowd.central_server
39
+ @host = Socket.gethostname
40
+ @enabled_actions = CloudCrowd.actions.keys
41
+ @asset_store = AssetStore.new
42
+ @port = port || DEFAULT_PORT
43
+
44
+ trap_signals
45
+ start_server
46
+ check_in
47
+ @server_thread.join
48
+ end
49
+
50
+ def check_in
51
+ @server["/node/#{@host}"].put(
52
+ :port => @port,
53
+ :max_workers => CloudCrowd.config[:max_workers],
54
+ :enabled_actions => @enabled_actions.join(',')
55
+ )
56
+ rescue Errno::ECONNREFUSED
57
+ puts "Failed to connect to the central server (#{@server.to_s}), exiting..."
58
+ raise SystemExit
59
+ end
60
+
61
+ def check_out
62
+ @server["/node/#{@host}"].delete
63
+ end
64
+
65
+ def start_server
66
+ @server_thread = Thread.new do
67
+ Thin::Server.start('0.0.0.0', @port, self, :signals => false)
68
+ end
69
+ end
70
+
71
+
72
+ private
73
+
74
+ def trap_signals
75
+ Signal.trap('INT') { shut_down }
76
+ Signal.trap('KILL') { shut_down }
77
+ Signal.trap('TERM') { shut_down }
78
+ end
79
+
80
+ def shut_down
81
+ check_out
82
+ Process.exit
83
+ end
84
+
85
+ end
86
+
87
+ end
@@ -10,7 +10,16 @@ ActiveRecord::Schema.define(:version => 1) do
10
10
  t.float "time"
11
11
  t.string "callback_url"
12
12
  t.string "email"
13
- t.integer "lock_version", :default => 0, :null => false
13
+ t.datetime "created_at"
14
+ t.datetime "updated_at"
15
+ end
16
+
17
+ create_table "node_records", :force => true do |t|
18
+ t.string "host", :null => false
19
+ t.string "ip_address", :null => false
20
+ t.integer "port", :null => false
21
+ t.string "enabled_actions", :default => '', :null => false
22
+ t.integer "max_workers"
14
23
  t.datetime "created_at"
15
24
  t.datetime "updated_at"
16
25
  end
@@ -21,25 +30,19 @@ ActiveRecord::Schema.define(:version => 1) do
21
30
  t.text "input", :null => false
22
31
  t.string "action", :null => false
23
32
  t.integer "attempts", :default => 0, :null => false
24
- t.integer "lock_version", :default => 0, :null => false
25
- t.integer "worker_record_id"
33
+ t.integer "node_record_id"
34
+ t.integer "worker_pid"
26
35
  t.float "time"
27
36
  t.text "output"
28
37
  t.datetime "created_at"
29
38
  t.datetime "updated_at"
30
39
  end
31
-
32
- create_table "worker_records", :force => true do |t|
33
- t.string "name", :null => false
34
- t.string "thread_status", :null => false
35
- t.datetime "created_at"
36
- t.datetime "updated_at"
37
- end
38
-
39
- add_index "jobs", ["status"], :name => "index_jobs_on_status"
40
- add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
41
- add_index "work_units", ["status", "worker_record_id", "action"], :name => "index_work_units_on_status_and_worker_record_id_and_action"
42
- add_index "worker_records", ["name"], :name => "index_worker_records_on_name"
43
- add_index "worker_records", ["updated_at"], :name => "index_worker_records_on_updated_at"
44
40
 
41
+ # Here be indices. After looking, it seems faster not to have them at all.
42
+ #
43
+ # add_index "jobs", ["status"], :name => "index_jobs_on_status"
44
+ # add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
45
+ # add_index "work_units", ["worker_pid"], :name => "index_work_units_on_worker_pid"
46
+ # add_index "work_units", ["worker_pid", "status"], :name => "index_work_units_on_worker_pid_and_status"
47
+ # add_index "work_units", ["worker_pid", "node_record_id"], :name => "index_work_units_on_worker_pid_and_node_record_id"
45
48
  end
@@ -16,7 +16,7 @@ module CloudCrowd
16
16
  # [post /work] Dequeue the next WorkUnit, and hand it off to the worker.
17
17
  # [put /work/:unit_id] Mark a finished WorkUnit as completed or failed, with results.
18
18
  # [put /worker] Keep a record of an actively running worker.
19
- class App < Sinatra::Default
19
+ class Server < Sinatra::Default
20
20
 
21
21
  set :root, ROOT
22
22
  set :authorization_realm, "CloudCrowd"
@@ -42,15 +42,14 @@ module CloudCrowd
42
42
  get '/status' do
43
43
  json(
44
44
  'jobs' => Job.incomplete,
45
- 'workers' => WorkerRecord.alive(:order => 'name desc'),
45
+ 'nodes' => NodeRecord.all(:order => 'host desc'),
46
46
  'work_unit_count' => WorkUnit.incomplete.count
47
47
  )
48
48
  end
49
49
 
50
- # Get the JSON for a worker record's work unit, if one exists.
50
+ # Get the JSON for what a worker is up to.
51
51
  get '/worker/:name' do
52
- record = WorkerRecord.find_by_name params[:name]
53
- json((record && record.work_unit) || {})
52
+ json WorkUnit.find_by_worker_name(params[:name]) || {}
54
53
  end
55
54
 
56
55
  # To monitor the central server with Monit, God, Nagios, or another
@@ -62,8 +61,11 @@ module CloudCrowd
62
61
  # PUBLIC API:
63
62
 
64
63
  # Start a new job. Accepts a JSON representation of the job-to-be.
64
+ # Distributes all work units to available nodes.
65
65
  post '/jobs' do
66
- json Job.create_from_request(JSON.parse(params[:job]))
66
+ job = Job.create_from_request(JSON.parse(params[:job]))
67
+ WorkUnit.distribute_to_nodes
68
+ json job
67
69
  end
68
70
 
69
71
  # Check the status of a job, returning the output if finished, and the
@@ -79,36 +81,29 @@ module CloudCrowd
79
81
  json nil
80
82
  end
81
83
 
82
- # INTERNAL WORKER DAEMON API:
84
+ # INTERNAL NODE API:
83
85
 
84
- # Internal method for worker daemons to fetch the work unit at the front
85
- # of the queue. Work unit is marked as taken and handed off to the worker.
86
- post '/work' do
87
- json dequeue_work_unit
86
+ put '/node/:host' do
87
+ NodeRecord.check_in(params, request)
88
+ WorkUnit.distribute_to_nodes
89
+ json nil
90
+ end
91
+
92
+ delete '/node/:host' do
93
+ NodeRecord.destroy_all(:host => params[:host])
94
+ json nil
88
95
  end
89
96
 
90
97
  # When workers are done with their unit, either successfully on in failure,
91
- # they mark it back on the central server and retrieve another. Failures
92
- # pull from one down in the queue, so as to not repeat the same unit.
98
+ # they mark it back on the central server and exit. Triggers distribution
99
+ # of pending work units.
93
100
  put '/work/:work_unit_id' do
94
- handle_conflicts(409) do
95
- case params[:status]
96
- when 'succeeded'
97
- current_work_unit.finish(params[:output], params[:time])
98
- json dequeue_work_unit
99
- when 'failed'
100
- current_work_unit.fail(params[:output], params[:time])
101
- json dequeue_work_unit(1)
102
- else
103
- error(500, "Completing a work unit must specify status.")
104
- end
101
+ case params[:status]
102
+ when 'succeeded' then current_work_unit.finish(params[:output], params[:time])
103
+ when 'failed' then current_work_unit.fail(params[:output], params[:time])
104
+ else error(500, "Completing a work unit must specify status.")
105
105
  end
106
- end
107
-
108
- # Every so often workers check in to let the central server know that
109
- # they're still alive. Keep up-to-date records
110
- put '/worker' do
111
- params[:terminated] ? WorkerRecord.check_out(params) : WorkerRecord.check_in(params)
106
+ WorkUnit.distribute_to_nodes
112
107
  json nil
113
108
  end
114
109
 
@@ -10,10 +10,6 @@ module CloudCrowd
10
10
  # having failed.
11
11
  class Worker
12
12
 
13
- # The time between worker check-ins with the central server, informing
14
- # it of the current status, and simply that it's still alive.
15
- CHECK_IN_INTERVAL = 60
16
-
17
13
  # Wait five seconds to retry, after internal communcication errors.
18
14
  RETRY_WAIT = 5
19
15
 
@@ -22,32 +18,30 @@ module CloudCrowd
22
18
  # Spinning up a worker will create a new AssetStore with a persistent
23
19
  # connection to S3. This AssetStore gets passed into each action, for use
24
20
  # as it is run.
25
- def initialize
26
- @id = $$
27
- @hostname = Socket.gethostname
28
- @name = "#{@id}@#{@hostname}"
29
- @store = AssetStore.new
30
- @server = CloudCrowd.central_server
31
- @enabled_actions = CloudCrowd.actions.keys
32
- log 'started'
33
- end
34
-
35
- # Ask the central server for the first WorkUnit in line.
36
- def fetch_work_unit
37
- keep_trying_to "fetch a new work unit" do
38
- unit_json = @server['/work'].post(base_params)
39
- setup_work_unit(unit_json)
40
- end
41
- end
21
+ def initialize(node, work_unit)
22
+ Signal.trap('INT') { shut_down }
23
+ Signal.trap('KILL') { shut_down }
24
+ Signal.trap('TERM') { shut_down }
25
+ @pid = $$
26
+ @node = node
27
+ setup_work_unit(work_unit)
28
+ run
29
+ end
30
+
31
+ # # Ask the central server for the first WorkUnit in line.
32
+ # def fetch_work_unit
33
+ # keep_trying_to "fetch a new work unit" do
34
+ # unit_json = @server['/work'].post(base_params)
35
+ # setup_work_unit(unit_json)
36
+ # end
37
+ # end
42
38
 
43
39
  # Return output to the central server, marking the current work unit as done.
44
40
  def complete_work_unit(result)
45
41
  keep_trying_to "complete work unit" do
46
42
  data = completion_params.merge({:status => 'succeeded', :output => result})
47
- unit_json = @server["/work/#{data[:id]}"].put(data)
43
+ @node.server["/work/#{data[:id]}"].put(data)
48
44
  log "finished #{display_work_unit} in #{data[:time]} seconds"
49
- clear_work_unit
50
- setup_work_unit(unit_json)
51
45
  end
52
46
  end
53
47
 
@@ -55,36 +49,11 @@ module CloudCrowd
55
49
  def fail_work_unit(exception)
56
50
  keep_trying_to "mark work unit as failed" do
57
51
  data = completion_params.merge({:status => 'failed', :output => {'output' => exception.message}.to_json})
58
- unit_json = @server["/work/#{data[:id]}"].put(data)
52
+ @node.server["/work/#{data[:id]}"].put(data)
59
53
  log "failed #{display_work_unit} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
60
- clear_work_unit
61
- setup_work_unit(unit_json)
62
54
  end
63
55
  end
64
56
 
65
- # Check in with the central server. Let it know the condition of the work
66
- # thread, the action and status we're processing, and our hostname and PID.
67
- def check_in(thread_status)
68
- keep_trying_to "check in with central" do
69
- @server["/worker"].put({
70
- :name => @name,
71
- :thread_status => thread_status
72
- })
73
- end
74
- end
75
-
76
- # Inform the central server that this worker is finished. This is the only
77
- # remote method that doesn't retry on connection errors -- if the worker
78
- # can't connect to the central server while it's trying to shutdown, it
79
- # should close, regardless.
80
- def check_out
81
- @server["/worker"].put({
82
- :name => @name,
83
- :terminated => true
84
- })
85
- log 'exiting'
86
- end
87
-
88
57
  # We expect and require internal communication between the central server
89
58
  # and the workers to succeed. If it fails for any reason, log it, and then
90
59
  # keep trying the same request.
@@ -100,33 +69,31 @@ module CloudCrowd
100
69
  end
101
70
  end
102
71
 
103
- # Does this Worker have a job to do?
104
- def has_work?
105
- @action_name && @input && @options
106
- end
107
-
108
72
  # Loggable string of the current work unit.
109
73
  def display_work_unit
110
- "unit ##{@options['work_unit_id']} (#{@action_name})"
74
+ "unit ##{@options['work_unit_id']} (#{@action_name}/#{CloudCrowd.display_status(@status)})"
111
75
  end
112
76
 
113
77
  # Executes the current work unit, catching all exceptions as failures.
114
78
  def run_work_unit
115
- begin
116
- result = nil
117
- @action = CloudCrowd.actions[@action_name].new(@status, @input, @options, @store)
118
- Dir.chdir(@action.work_directory) do
119
- result = case @status
120
- when PROCESSING then @action.process
121
- when SPLITTING then @action.split
122
- when MERGING then @action.merge
123
- else raise Error::StatusUnspecified, "work units must specify their status"
79
+ @worker_thread = Thread.new do
80
+ begin
81
+ result = nil
82
+ @action = CloudCrowd.actions[@action_name].new(@status, @input, @options, @node.asset_store)
83
+ Dir.chdir(@action.work_directory) do
84
+ result = case @status
85
+ when PROCESSING then @action.process
86
+ when SPLITTING then @action.split
87
+ when MERGING then @action.merge
88
+ else raise Error::StatusUnspecified, "work units must specify their status"
89
+ end
124
90
  end
91
+ complete_work_unit({'output' => result}.to_json)
92
+ rescue Exception => e
93
+ fail_work_unit(e)
125
94
  end
126
- complete_work_unit({'output' => result}.to_json)
127
- rescue Exception => e
128
- fail_work_unit(e)
129
95
  end
96
+ @worker_thread.join
130
97
  end
131
98
 
132
99
  # Wraps <tt>run_work_unit</tt> to benchmark the execution time, if requested.
@@ -142,8 +109,7 @@ module CloudCrowd
142
109
  # Common parameters to send back to central.
143
110
  def base_params
144
111
  @base_params ||= {
145
- :worker_name => @name,
146
- :worker_actions => @enabled_actions.join(',')
112
+ :pid => @pid
147
113
  }
148
114
  end
149
115
 
@@ -157,9 +123,8 @@ module CloudCrowd
157
123
  end
158
124
 
159
125
  # Extract our instance variables from a WorkUnit's JSON.
160
- def setup_work_unit(unit_json)
161
- return false unless unit_json
162
- unit = JSON.parse(unit_json)
126
+ def setup_work_unit(unit)
127
+ return false unless unit
163
128
  @start_time = Time.now
164
129
  @action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
165
130
  @options['job_id'] = unit['job_id']
@@ -171,7 +136,7 @@ module CloudCrowd
171
136
 
172
137
  # Log a message to the daemon log. Includes PID for identification.
173
138
  def log(message)
174
- puts "Worker ##{@id}: #{message}" unless ENV['RACK_ENV'] == 'test'
139
+ puts "Worker ##{@pid}: #{message}" unless ENV['RACK_ENV'] == 'test'
175
140
  end
176
141
 
177
142
  # When we're done with a unit, clear out our instance variables to make way
@@ -181,6 +146,17 @@ module CloudCrowd
181
146
  @action, @action_name, @input, @options, @start_time = nil, nil, nil, nil, nil
182
147
  end
183
148
 
149
+ # Force the worker to quit, even if it's in the middle of processing.
150
+ # If it had checked out a work unit, the node should have released it on
151
+ # the central server already.
152
+ def shut_down
153
+ if @worker_thread
154
+ @worker_thread.kill
155
+ @worker_thread.kill! if @worker_thread.alive?
156
+ end
157
+ Process.exit
158
+ end
159
+
184
160
  end
185
161
 
186
162
  end