documentcloud-cloud-crowd 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/README +8 -8
  2. data/cloud-crowd.gemspec +8 -8
  3. data/config/config.example.ru +8 -2
  4. data/config/config.example.yml +6 -15
  5. data/examples/process_pdfs_example.rb +1 -1
  6. data/examples/word_count_example.rb +1 -0
  7. data/lib/cloud-crowd.rb +6 -5
  8. data/lib/cloud_crowd/action.rb +11 -7
  9. data/lib/cloud_crowd/asset_store/filesystem_store.rb +5 -0
  10. data/lib/cloud_crowd/asset_store/s3_store.rb +7 -3
  11. data/lib/cloud_crowd/asset_store.rb +1 -1
  12. data/lib/cloud_crowd/command_line.rb +14 -53
  13. data/lib/cloud_crowd/exceptions.rb +4 -0
  14. data/lib/cloud_crowd/helpers/authorization.rb +2 -2
  15. data/lib/cloud_crowd/helpers/resources.rb +0 -20
  16. data/lib/cloud_crowd/models/job.rb +25 -26
  17. data/lib/cloud_crowd/models/node_record.rb +81 -0
  18. data/lib/cloud_crowd/models/work_unit.rb +70 -30
  19. data/lib/cloud_crowd/models.rb +1 -1
  20. data/lib/cloud_crowd/node.rb +87 -0
  21. data/lib/cloud_crowd/schema.rb +19 -16
  22. data/lib/cloud_crowd/{app.rb → server.rb} +25 -30
  23. data/lib/cloud_crowd/worker.rb +50 -74
  24. data/public/css/admin_console.css +26 -14
  25. data/public/images/server.png +0 -0
  26. data/public/js/admin_console.js +45 -18
  27. data/test/acceptance/test_failing_work_units.rb +1 -1
  28. data/test/acceptance/{test_app.rb → test_server.rb} +15 -15
  29. data/test/acceptance/test_word_count.rb +3 -9
  30. data/test/blueprints.rb +0 -1
  31. data/test/config/config.ru +1 -1
  32. data/test/config/config.yml +1 -3
  33. data/test/unit/test_configuration.rb +1 -1
  34. data/test/unit/test_job.rb +1 -0
  35. data/test/unit/test_work_unit.rb +2 -4
  36. data/views/index.erb +13 -8
  37. metadata +9 -9
  38. data/lib/cloud_crowd/daemon.rb +0 -95
  39. data/lib/cloud_crowd/models/worker_record.rb +0 -61
  40. data/lib/cloud_crowd/runner.rb +0 -15
@@ -0,0 +1,81 @@
1
+ module CloudCrowd
2
+
3
+ # A NodeRecord is the record of a Node running remotely. We can use it to
4
+ # assign work units to the node, and keep track of its status.
5
+ class NodeRecord < ActiveRecord::Base
6
+
7
+ has_many :work_units
8
+
9
+ validates_presence_of :host, :ip_address, :port
10
+
11
+ before_destroy :clear_work_units
12
+
13
+ # Available Nodes haven't used up their maxiumum number of workers yet.
14
+ named_scope :available, {
15
+ :conditions => ['(max_workers is null or (select count(*) from work_units where node_record_id = node_records.id) < max_workers)'],
16
+ :order => 'updated_at asc'
17
+ }
18
+
19
+ # Save a Node's current status to the database.
20
+ def self.check_in(params, request)
21
+ attrs = {
22
+ :ip_address => request.ip,
23
+ :port => params[:port],
24
+ :max_workers => params[:max_workers],
25
+ :enabled_actions => params[:enabled_actions],
26
+ :updated_at => Time.now
27
+ }
28
+ self.find_or_create_by_host(params[:host]).update_attributes!(attrs)
29
+ end
30
+
31
+ def send_work_unit(unit)
32
+ result = node['/work'].post(:work_unit => unit.to_json)
33
+ unit.assign_to(self, JSON.parse(result)['pid'])
34
+ touch
35
+ rescue Errno::ECONNREFUSED
36
+ self.destroy # Couldn't post to node, assume it's gone away.
37
+ end
38
+
39
+ def actions
40
+ enabled_actions.split(',')
41
+ end
42
+
43
+ def busy?
44
+ max_workers && work_units.count >= max_workers
45
+ end
46
+
47
+ def url
48
+ @url ||= "http://#{host}:#{port}"
49
+ end
50
+
51
+ def node
52
+ return @node if @node
53
+ params = [url]
54
+ params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
55
+ @node = RestClient::Resource.new(*params)
56
+ end
57
+
58
+ def display_status
59
+ busy? ? 'busy' : 'available'
60
+ end
61
+
62
+ def worker_pids
63
+ work_units.all(:select => 'worker_pid').map(&:worker_pid)
64
+ end
65
+
66
+ def to_json(opts={})
67
+ { 'host' => host,
68
+ 'workers' => worker_pids,
69
+ 'status' => display_status,
70
+ }.to_json
71
+ end
72
+
73
+
74
+ private
75
+
76
+ def clear_work_units
77
+ WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
78
+ end
79
+
80
+ end
81
+ end
@@ -8,39 +8,77 @@ module CloudCrowd
8
8
  include ModelStatus
9
9
 
10
10
  belongs_to :job
11
- belongs_to :worker_record
11
+ belongs_to :node_record
12
12
 
13
13
  validates_presence_of :job_id, :status, :input, :action
14
+
15
+ named_scope :taken, {:conditions => ["worker_pid is not null"]}
16
+ named_scope :available, {:conditions => {:worker_pid => nil, :status => INCOMPLETE}}
17
+ named_scope :reserved, {:conditions => {:worker_pid => 0}}
14
18
 
15
- after_save :check_for_job_completion
19
+ # Attempt to send a list of work_units to nodes with available capacity.
20
+ # Do this in a separate thread so that the request can return, satisfied.
21
+ # A single application server process stops the same WorkUnit from being
22
+ # distributed to multiple nodes by reserving all the available ones.
23
+ def self.distribute_to_nodes
24
+ return unless WorkUnit.reserve_available
25
+ work_units = WorkUnit.reserved
26
+ available_nodes = NodeRecord.available
27
+ until work_units.empty? do
28
+ node = available_nodes.shift
29
+ unit = work_units.first
30
+ break unless node
31
+ next unless node.actions.include? unit.action
32
+ sent = node.send_work_unit(unit)
33
+ if sent
34
+ work_units.shift
35
+ available_nodes.push(node) unless node.busy?
36
+ end
37
+ end
38
+ WorkUnit.cancel_reservations
39
+ end
40
+
41
+ # Reserves all available WorkUnits. Returns false if there were none
42
+ # available.
43
+ def self.reserve_available
44
+ WorkUnit.available.update_all('worker_pid = 0') > 0
45
+ end
16
46
 
17
- # Find the first available WorkUnit in the queue, and take it out.
18
- # +enabled_actions+ must be passed to whitelist the types of WorkUnits than
19
- # can be retrieved for processing. Optionally, specify the +offset+ to peek
20
- # further on in line.
21
- def self.dequeue(worker_name, enabled_actions=[], offset=0)
22
- unit = self.first(
23
- :conditions => {:status => INCOMPLETE, :worker_record_id => nil, :action => enabled_actions},
24
- :order => "created_at asc",
25
- :offset => offset
26
- )
27
- unit ? unit.assign_to(worker_name) : nil
47
+ def self.cancel_reservations
48
+ WorkUnit.reserved.update_all('worker_pid = null')
28
49
  end
29
50
 
30
- # After saving a WorkUnit, its Job should check if it just became complete.
31
- def check_for_job_completion
32
- self.job.check_for_completion if complete?
51
+ def self.find_by_worker_name(name)
52
+ pid, host = name.split('@')
53
+ node = NodeRecord.find_by_host(host)
54
+ node && node.work_units.find_by_worker_pid(pid)
33
55
  end
34
56
 
35
57
  # Mark this unit as having finished successfully.
58
+ # TODO: Refactor alongside check_for_completion ... look into doubleparse.
36
59
  def finish(output, time_taken)
37
- update_attributes({
38
- :status => SUCCEEDED,
39
- :worker_record => nil,
40
- :attempts => self.attempts + 1,
41
- :output => output,
42
- :time => time_taken
43
- })
60
+ if splitting?
61
+ [JSON.parse(JSON.parse(output)['output'])].flatten.each do |wu_input|
62
+ WorkUnit.create(
63
+ :job => job,
64
+ :action => action,
65
+ :input => wu_input,
66
+ :status => PROCESSING
67
+ )
68
+ end
69
+ self.destroy
70
+ job.set_next_status if job.work_units.splitting.count <= 0
71
+ else
72
+ update_attributes({
73
+ :status => SUCCEEDED,
74
+ :node_record => nil,
75
+ :worker_pid => nil,
76
+ :attempts => attempts + 1,
77
+ :output => output,
78
+ :time => time_taken
79
+ })
80
+ job.check_for_completion
81
+ end
44
82
  end
45
83
 
46
84
  # Mark this unit as having failed. May attempt a retry.
@@ -49,26 +87,28 @@ module CloudCrowd
49
87
  return try_again if tries < CloudCrowd.config[:work_unit_retries]
50
88
  update_attributes({
51
89
  :status => FAILED,
52
- :worker_record => nil,
90
+ :node_record => nil,
91
+ :worker_pid => nil,
53
92
  :attempts => tries,
54
93
  :output => output,
55
94
  :time => time_taken
56
95
  })
96
+ self.job.check_for_completion
57
97
  end
58
98
 
59
99
  # Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
60
100
  def try_again
61
101
  update_attributes({
62
- :worker_record => nil,
63
- :attempts => self.attempts + 1
102
+ :node_record => nil,
103
+ :worker_pid => nil,
104
+ :attempts => self.attempts + 1
64
105
  })
65
106
  end
66
107
 
67
108
  # When a Worker checks out a WorkUnit, establish the connection between
68
- # WorkUnit and WorkerRecord.
69
- def assign_to(worker_name)
70
- self.worker_record = WorkerRecord.find_by_name!(worker_name)
71
- self.save ? self : nil
109
+ # WorkUnit and NodeRecord.
110
+ def assign_to(node_record, worker_pid)
111
+ update_attributes!(:node_record => node_record, :worker_pid => worker_pid)
72
112
  end
73
113
 
74
114
  # The JSON representation of a WorkUnit shares the Job's options with all
@@ -36,5 +36,5 @@ module CloudCrowd
36
36
  end
37
37
 
38
38
  require 'cloud_crowd/models/job'
39
+ require 'cloud_crowd/models/node_record'
39
40
  require 'cloud_crowd/models/work_unit'
40
- require 'cloud_crowd/models/worker_record'
@@ -0,0 +1,87 @@
1
+ module CloudCrowd
2
+
3
+ class Node < Sinatra::Default
4
+
5
+ # A Node's default port. You only run a single node per machine, so they
6
+ # can all use the same port without problems.
7
+ DEFAULT_PORT = 9063
8
+
9
+ attr_reader :server, :asset_store
10
+
11
+ set :root, ROOT
12
+ set :authorization_realm, "CloudCrowd"
13
+
14
+ helpers Helpers
15
+
16
+ # methodoverride allows the _method param.
17
+ enable :methodoverride
18
+
19
+ # Enabling HTTP Authentication turns it on for all requests.
20
+ before do
21
+ login_required if CloudCrowd.config[:use_http_authentication]
22
+ end
23
+
24
+ # To monitor a Node with Monit, God, Nagios, or another tool, you can hit
25
+ # /heartbeat to make sure its still up.
26
+ get '/heartbeat' do
27
+ "buh-bump"
28
+ end
29
+
30
+ post '/work' do
31
+ pid = fork { Worker.new(self, JSON.parse(params[:work_unit])) }
32
+ Process.detach(pid)
33
+ json :pid => pid
34
+ end
35
+
36
+ def initialize(port=DEFAULT_PORT)
37
+ require 'json'
38
+ @server = CloudCrowd.central_server
39
+ @host = Socket.gethostname
40
+ @enabled_actions = CloudCrowd.actions.keys
41
+ @asset_store = AssetStore.new
42
+ @port = port || DEFAULT_PORT
43
+
44
+ trap_signals
45
+ start_server
46
+ check_in
47
+ @server_thread.join
48
+ end
49
+
50
+ def check_in
51
+ @server["/node/#{@host}"].put(
52
+ :port => @port,
53
+ :max_workers => CloudCrowd.config[:max_workers],
54
+ :enabled_actions => @enabled_actions.join(',')
55
+ )
56
+ rescue Errno::ECONNREFUSED
57
+ puts "Failed to connect to the central server (#{@server.to_s}), exiting..."
58
+ raise SystemExit
59
+ end
60
+
61
+ def check_out
62
+ @server["/node/#{@host}"].delete
63
+ end
64
+
65
+ def start_server
66
+ @server_thread = Thread.new do
67
+ Thin::Server.start('0.0.0.0', @port, self, :signals => false)
68
+ end
69
+ end
70
+
71
+
72
+ private
73
+
74
+ def trap_signals
75
+ Signal.trap('INT') { shut_down }
76
+ Signal.trap('KILL') { shut_down }
77
+ Signal.trap('TERM') { shut_down }
78
+ end
79
+
80
+ def shut_down
81
+ check_out
82
+ Process.exit
83
+ end
84
+
85
+ end
86
+
87
+ end
@@ -10,7 +10,16 @@ ActiveRecord::Schema.define(:version => 1) do
10
10
  t.float "time"
11
11
  t.string "callback_url"
12
12
  t.string "email"
13
- t.integer "lock_version", :default => 0, :null => false
13
+ t.datetime "created_at"
14
+ t.datetime "updated_at"
15
+ end
16
+
17
+ create_table "node_records", :force => true do |t|
18
+ t.string "host", :null => false
19
+ t.string "ip_address", :null => false
20
+ t.integer "port", :null => false
21
+ t.string "enabled_actions", :default => '', :null => false
22
+ t.integer "max_workers"
14
23
  t.datetime "created_at"
15
24
  t.datetime "updated_at"
16
25
  end
@@ -21,25 +30,19 @@ ActiveRecord::Schema.define(:version => 1) do
21
30
  t.text "input", :null => false
22
31
  t.string "action", :null => false
23
32
  t.integer "attempts", :default => 0, :null => false
24
- t.integer "lock_version", :default => 0, :null => false
25
- t.integer "worker_record_id"
33
+ t.integer "node_record_id"
34
+ t.integer "worker_pid"
26
35
  t.float "time"
27
36
  t.text "output"
28
37
  t.datetime "created_at"
29
38
  t.datetime "updated_at"
30
39
  end
31
-
32
- create_table "worker_records", :force => true do |t|
33
- t.string "name", :null => false
34
- t.string "thread_status", :null => false
35
- t.datetime "created_at"
36
- t.datetime "updated_at"
37
- end
38
-
39
- add_index "jobs", ["status"], :name => "index_jobs_on_status"
40
- add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
41
- add_index "work_units", ["status", "worker_record_id", "action"], :name => "index_work_units_on_status_and_worker_record_id_and_action"
42
- add_index "worker_records", ["name"], :name => "index_worker_records_on_name"
43
- add_index "worker_records", ["updated_at"], :name => "index_worker_records_on_updated_at"
44
40
 
41
+ # Here be indices. After looking, it seems faster not to have them at all.
42
+ #
43
+ # add_index "jobs", ["status"], :name => "index_jobs_on_status"
44
+ # add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
45
+ # add_index "work_units", ["worker_pid"], :name => "index_work_units_on_worker_pid"
46
+ # add_index "work_units", ["worker_pid", "status"], :name => "index_work_units_on_worker_pid_and_status"
47
+ # add_index "work_units", ["worker_pid", "node_record_id"], :name => "index_work_units_on_worker_pid_and_node_record_id"
45
48
  end
@@ -16,7 +16,7 @@ module CloudCrowd
16
16
  # [post /work] Dequeue the next WorkUnit, and hand it off to the worker.
17
17
  # [put /work/:unit_id] Mark a finished WorkUnit as completed or failed, with results.
18
18
  # [put /worker] Keep a record of an actively running worker.
19
- class App < Sinatra::Default
19
+ class Server < Sinatra::Default
20
20
 
21
21
  set :root, ROOT
22
22
  set :authorization_realm, "CloudCrowd"
@@ -42,15 +42,14 @@ module CloudCrowd
42
42
  get '/status' do
43
43
  json(
44
44
  'jobs' => Job.incomplete,
45
- 'workers' => WorkerRecord.alive(:order => 'name desc'),
45
+ 'nodes' => NodeRecord.all(:order => 'host desc'),
46
46
  'work_unit_count' => WorkUnit.incomplete.count
47
47
  )
48
48
  end
49
49
 
50
- # Get the JSON for a worker record's work unit, if one exists.
50
+ # Get the JSON for what a worker is up to.
51
51
  get '/worker/:name' do
52
- record = WorkerRecord.find_by_name params[:name]
53
- json((record && record.work_unit) || {})
52
+ json WorkUnit.find_by_worker_name(params[:name]) || {}
54
53
  end
55
54
 
56
55
  # To monitor the central server with Monit, God, Nagios, or another
@@ -62,8 +61,11 @@ module CloudCrowd
62
61
  # PUBLIC API:
63
62
 
64
63
  # Start a new job. Accepts a JSON representation of the job-to-be.
64
+ # Distributes all work units to available nodes.
65
65
  post '/jobs' do
66
- json Job.create_from_request(JSON.parse(params[:job]))
66
+ job = Job.create_from_request(JSON.parse(params[:job]))
67
+ WorkUnit.distribute_to_nodes
68
+ json job
67
69
  end
68
70
 
69
71
  # Check the status of a job, returning the output if finished, and the
@@ -79,36 +81,29 @@ module CloudCrowd
79
81
  json nil
80
82
  end
81
83
 
82
- # INTERNAL WORKER DAEMON API:
84
+ # INTERNAL NODE API:
83
85
 
84
- # Internal method for worker daemons to fetch the work unit at the front
85
- # of the queue. Work unit is marked as taken and handed off to the worker.
86
- post '/work' do
87
- json dequeue_work_unit
86
+ put '/node/:host' do
87
+ NodeRecord.check_in(params, request)
88
+ WorkUnit.distribute_to_nodes
89
+ json nil
90
+ end
91
+
92
+ delete '/node/:host' do
93
+ NodeRecord.destroy_all(:host => params[:host])
94
+ json nil
88
95
  end
89
96
 
90
97
  # When workers are done with their unit, either successfully on in failure,
91
- # they mark it back on the central server and retrieve another. Failures
92
- # pull from one down in the queue, so as to not repeat the same unit.
98
+ # they mark it back on the central server and exit. Triggers distribution
99
+ # of pending work units.
93
100
  put '/work/:work_unit_id' do
94
- handle_conflicts(409) do
95
- case params[:status]
96
- when 'succeeded'
97
- current_work_unit.finish(params[:output], params[:time])
98
- json dequeue_work_unit
99
- when 'failed'
100
- current_work_unit.fail(params[:output], params[:time])
101
- json dequeue_work_unit(1)
102
- else
103
- error(500, "Completing a work unit must specify status.")
104
- end
101
+ case params[:status]
102
+ when 'succeeded' then current_work_unit.finish(params[:output], params[:time])
103
+ when 'failed' then current_work_unit.fail(params[:output], params[:time])
104
+ else error(500, "Completing a work unit must specify status.")
105
105
  end
106
- end
107
-
108
- # Every so often workers check in to let the central server know that
109
- # they're still alive. Keep up-to-date records
110
- put '/worker' do
111
- params[:terminated] ? WorkerRecord.check_out(params) : WorkerRecord.check_in(params)
106
+ WorkUnit.distribute_to_nodes
112
107
  json nil
113
108
  end
114
109
 
@@ -10,10 +10,6 @@ module CloudCrowd
10
10
  # having failed.
11
11
  class Worker
12
12
 
13
- # The time between worker check-ins with the central server, informing
14
- # it of the current status, and simply that it's still alive.
15
- CHECK_IN_INTERVAL = 60
16
-
17
13
  # Wait five seconds to retry, after internal communcication errors.
18
14
  RETRY_WAIT = 5
19
15
 
@@ -22,32 +18,30 @@ module CloudCrowd
22
18
  # Spinning up a worker will create a new AssetStore with a persistent
23
19
  # connection to S3. This AssetStore gets passed into each action, for use
24
20
  # as it is run.
25
- def initialize
26
- @id = $$
27
- @hostname = Socket.gethostname
28
- @name = "#{@id}@#{@hostname}"
29
- @store = AssetStore.new
30
- @server = CloudCrowd.central_server
31
- @enabled_actions = CloudCrowd.actions.keys
32
- log 'started'
33
- end
34
-
35
- # Ask the central server for the first WorkUnit in line.
36
- def fetch_work_unit
37
- keep_trying_to "fetch a new work unit" do
38
- unit_json = @server['/work'].post(base_params)
39
- setup_work_unit(unit_json)
40
- end
41
- end
21
+ def initialize(node, work_unit)
22
+ Signal.trap('INT') { shut_down }
23
+ Signal.trap('KILL') { shut_down }
24
+ Signal.trap('TERM') { shut_down }
25
+ @pid = $$
26
+ @node = node
27
+ setup_work_unit(work_unit)
28
+ run
29
+ end
30
+
31
+ # # Ask the central server for the first WorkUnit in line.
32
+ # def fetch_work_unit
33
+ # keep_trying_to "fetch a new work unit" do
34
+ # unit_json = @server['/work'].post(base_params)
35
+ # setup_work_unit(unit_json)
36
+ # end
37
+ # end
42
38
 
43
39
  # Return output to the central server, marking the current work unit as done.
44
40
  def complete_work_unit(result)
45
41
  keep_trying_to "complete work unit" do
46
42
  data = completion_params.merge({:status => 'succeeded', :output => result})
47
- unit_json = @server["/work/#{data[:id]}"].put(data)
43
+ @node.server["/work/#{data[:id]}"].put(data)
48
44
  log "finished #{display_work_unit} in #{data[:time]} seconds"
49
- clear_work_unit
50
- setup_work_unit(unit_json)
51
45
  end
52
46
  end
53
47
 
@@ -55,36 +49,11 @@ module CloudCrowd
55
49
  def fail_work_unit(exception)
56
50
  keep_trying_to "mark work unit as failed" do
57
51
  data = completion_params.merge({:status => 'failed', :output => {'output' => exception.message}.to_json})
58
- unit_json = @server["/work/#{data[:id]}"].put(data)
52
+ @node.server["/work/#{data[:id]}"].put(data)
59
53
  log "failed #{display_work_unit} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
60
- clear_work_unit
61
- setup_work_unit(unit_json)
62
54
  end
63
55
  end
64
56
 
65
- # Check in with the central server. Let it know the condition of the work
66
- # thread, the action and status we're processing, and our hostname and PID.
67
- def check_in(thread_status)
68
- keep_trying_to "check in with central" do
69
- @server["/worker"].put({
70
- :name => @name,
71
- :thread_status => thread_status
72
- })
73
- end
74
- end
75
-
76
- # Inform the central server that this worker is finished. This is the only
77
- # remote method that doesn't retry on connection errors -- if the worker
78
- # can't connect to the central server while it's trying to shutdown, it
79
- # should close, regardless.
80
- def check_out
81
- @server["/worker"].put({
82
- :name => @name,
83
- :terminated => true
84
- })
85
- log 'exiting'
86
- end
87
-
88
57
  # We expect and require internal communication between the central server
89
58
  # and the workers to succeed. If it fails for any reason, log it, and then
90
59
  # keep trying the same request.
@@ -100,33 +69,31 @@ module CloudCrowd
100
69
  end
101
70
  end
102
71
 
103
- # Does this Worker have a job to do?
104
- def has_work?
105
- @action_name && @input && @options
106
- end
107
-
108
72
  # Loggable string of the current work unit.
109
73
  def display_work_unit
110
- "unit ##{@options['work_unit_id']} (#{@action_name})"
74
+ "unit ##{@options['work_unit_id']} (#{@action_name}/#{CloudCrowd.display_status(@status)})"
111
75
  end
112
76
 
113
77
  # Executes the current work unit, catching all exceptions as failures.
114
78
  def run_work_unit
115
- begin
116
- result = nil
117
- @action = CloudCrowd.actions[@action_name].new(@status, @input, @options, @store)
118
- Dir.chdir(@action.work_directory) do
119
- result = case @status
120
- when PROCESSING then @action.process
121
- when SPLITTING then @action.split
122
- when MERGING then @action.merge
123
- else raise Error::StatusUnspecified, "work units must specify their status"
79
+ @worker_thread = Thread.new do
80
+ begin
81
+ result = nil
82
+ @action = CloudCrowd.actions[@action_name].new(@status, @input, @options, @node.asset_store)
83
+ Dir.chdir(@action.work_directory) do
84
+ result = case @status
85
+ when PROCESSING then @action.process
86
+ when SPLITTING then @action.split
87
+ when MERGING then @action.merge
88
+ else raise Error::StatusUnspecified, "work units must specify their status"
89
+ end
124
90
  end
91
+ complete_work_unit({'output' => result}.to_json)
92
+ rescue Exception => e
93
+ fail_work_unit(e)
125
94
  end
126
- complete_work_unit({'output' => result}.to_json)
127
- rescue Exception => e
128
- fail_work_unit(e)
129
95
  end
96
+ @worker_thread.join
130
97
  end
131
98
 
132
99
  # Wraps <tt>run_work_unit</tt> to benchmark the execution time, if requested.
@@ -142,8 +109,7 @@ module CloudCrowd
142
109
  # Common parameters to send back to central.
143
110
  def base_params
144
111
  @base_params ||= {
145
- :worker_name => @name,
146
- :worker_actions => @enabled_actions.join(',')
112
+ :pid => @pid
147
113
  }
148
114
  end
149
115
 
@@ -157,9 +123,8 @@ module CloudCrowd
157
123
  end
158
124
 
159
125
  # Extract our instance variables from a WorkUnit's JSON.
160
- def setup_work_unit(unit_json)
161
- return false unless unit_json
162
- unit = JSON.parse(unit_json)
126
+ def setup_work_unit(unit)
127
+ return false unless unit
163
128
  @start_time = Time.now
164
129
  @action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
165
130
  @options['job_id'] = unit['job_id']
@@ -171,7 +136,7 @@ module CloudCrowd
171
136
 
172
137
  # Log a message to the daemon log. Includes PID for identification.
173
138
  def log(message)
174
- puts "Worker ##{@id}: #{message}" unless ENV['RACK_ENV'] == 'test'
139
+ puts "Worker ##{@pid}: #{message}" unless ENV['RACK_ENV'] == 'test'
175
140
  end
176
141
 
177
142
  # When we're done with a unit, clear out our instance variables to make way
@@ -181,6 +146,17 @@ module CloudCrowd
181
146
  @action, @action_name, @input, @options, @start_time = nil, nil, nil, nil, nil
182
147
  end
183
148
 
149
+ # Force the worker to quit, even if it's in the middle of processing.
150
+ # If it had checked out a work unit, the node should have released it on
151
+ # the central server already.
152
+ def shut_down
153
+ if @worker_thread
154
+ @worker_thread.kill
155
+ @worker_thread.kill! if @worker_thread.alive?
156
+ end
157
+ Process.exit
158
+ end
159
+
184
160
  end
185
161
 
186
162
  end