mooktakim-cloud-crowd 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/EPIGRAPHS +17 -0
  2. data/LICENSE +22 -0
  3. data/README +93 -0
  4. data/actions/graphics_magick.rb +43 -0
  5. data/actions/process_pdfs.rb +92 -0
  6. data/actions/word_count.rb +16 -0
  7. data/bin/crowd +5 -0
  8. data/config/config.example.ru +23 -0
  9. data/config/config.example.yml +55 -0
  10. data/config/database.example.yml +16 -0
  11. data/examples/graphics_magick_example.rb +44 -0
  12. data/examples/process_pdfs_example.rb +40 -0
  13. data/examples/word_count_example.rb +42 -0
  14. data/lib/cloud-crowd.rb +188 -0
  15. data/lib/cloud_crowd/action.rb +125 -0
  16. data/lib/cloud_crowd/asset_store/filesystem_store.rb +39 -0
  17. data/lib/cloud_crowd/asset_store/s3_store.rb +43 -0
  18. data/lib/cloud_crowd/asset_store.rb +41 -0
  19. data/lib/cloud_crowd/command_line.rb +242 -0
  20. data/lib/cloud_crowd/exceptions.rb +46 -0
  21. data/lib/cloud_crowd/helpers/authorization.rb +52 -0
  22. data/lib/cloud_crowd/helpers/resources.rb +25 -0
  23. data/lib/cloud_crowd/helpers.rb +8 -0
  24. data/lib/cloud_crowd/inflector.rb +19 -0
  25. data/lib/cloud_crowd/models/job.rb +190 -0
  26. data/lib/cloud_crowd/models/node_record.rb +107 -0
  27. data/lib/cloud_crowd/models/work_unit.rb +170 -0
  28. data/lib/cloud_crowd/models.rb +40 -0
  29. data/lib/cloud_crowd/node.rb +199 -0
  30. data/lib/cloud_crowd/schema.rb +50 -0
  31. data/lib/cloud_crowd/server.rb +123 -0
  32. data/lib/cloud_crowd/worker.rb +149 -0
  33. data/mooktakim-cloud-crowd.gemspec +116 -0
  34. data/public/css/admin_console.css +243 -0
  35. data/public/css/reset.css +42 -0
  36. data/public/images/bullet_green.png +0 -0
  37. data/public/images/bullet_white.png +0 -0
  38. data/public/images/cloud_hand.png +0 -0
  39. data/public/images/header_back.png +0 -0
  40. data/public/images/logo.png +0 -0
  41. data/public/images/queue_fill.png +0 -0
  42. data/public/images/server.png +0 -0
  43. data/public/images/server_busy.png +0 -0
  44. data/public/images/server_error.png +0 -0
  45. data/public/images/sidebar_bottom.png +0 -0
  46. data/public/images/sidebar_top.png +0 -0
  47. data/public/images/worker_info.png +0 -0
  48. data/public/images/worker_info_loading.gif +0 -0
  49. data/public/js/admin_console.js +197 -0
  50. data/public/js/excanvas.js +1 -0
  51. data/public/js/flot.js +1 -0
  52. data/public/js/jquery.js +19 -0
  53. data/test/acceptance/test_failing_work_units.rb +33 -0
  54. data/test/acceptance/test_node.rb +20 -0
  55. data/test/acceptance/test_server.rb +66 -0
  56. data/test/acceptance/test_word_count.rb +40 -0
  57. data/test/blueprints.rb +25 -0
  58. data/test/config/actions/failure_testing.rb +13 -0
  59. data/test/config/config.ru +17 -0
  60. data/test/config/config.yml +6 -0
  61. data/test/config/database.yml +3 -0
  62. data/test/test_helper.rb +19 -0
  63. data/test/unit/test_action.rb +70 -0
  64. data/test/unit/test_configuration.rb +48 -0
  65. data/test/unit/test_job.rb +103 -0
  66. data/test/unit/test_node.rb +41 -0
  67. data/test/unit/test_node_record.rb +42 -0
  68. data/test/unit/test_work_unit.rb +53 -0
  69. data/test/unit/test_worker.rb +48 -0
  70. data/views/operations_center.erb +82 -0
  71. metadata +290 -0
@@ -0,0 +1,46 @@
1
+ module CloudCrowd
2
+
3
+ # Base Error class which all custom CloudCrowd exceptions inherit from.
4
+ # Rescuing CloudCrowd::Error (or RuntimeError) will get all custom exceptions.
5
+ # If your cluster is correctly configured, you should never expect to see any
6
+ # of these.
7
+ class Error < RuntimeError
8
+
9
+ # ActionNotFound is raised when a job is created for an action that doesn't
10
+ # exist.
11
+ class ActionNotFound < Error
12
+ end
13
+
14
+ # StorageNotFound is raised when config.yml specifies a storage back-end that
15
+ # doesn't exist.
16
+ class StorageNotFound < Error
17
+ end
18
+
19
+ # If the AssetStore can't write to its scratch directory.
20
+ class StorageNotWritable < Error
21
+ end
22
+
23
+ # StatusUnspecified is raised when a WorkUnit returns without a valid
24
+ # status code.
25
+ class StatusUnspecified < Error
26
+ end
27
+
28
+ # MissingConfiguration is raised when we're trying to run a method that
29
+ # needs configuration not present in config.yml.
30
+ class MissingConfiguration < Error
31
+ end
32
+
33
+ # CommandFailed is raised when an action shells out, and the external
34
+ # command returns a non-zero exit code.
35
+ class CommandFailed < Error
36
+ attr_reader :exit_code
37
+
38
+ def initialize(message, exit_code)
39
+ super(message)
40
+ @exit_code = exit_code
41
+ end
42
+ end
43
+
44
+ end
45
+
46
+ end
@@ -0,0 +1,52 @@
1
+ module CloudCrowd
2
+ module Helpers
3
+
4
+ # Authorization takes after sinatra-authorization... See
5
+ # http://github.com/integrity/sinatra-authorization
6
+ # for the original.
7
+ module Authorization
8
+
9
+ # Ensure that the request includes the correct credentials.
10
+ def login_required
11
+ return if authorized?
12
+ unauthorized! unless auth.provided?
13
+ bad_request! unless auth.basic?
14
+ unauthorized! unless authorize(*auth.credentials)
15
+ request.env['REMOTE_USER'] = auth.username
16
+ end
17
+
18
+ # Has the request been authenticated?
19
+ def authorized?
20
+ !!request.env['REMOTE_USER']
21
+ end
22
+
23
+ # A request is authorized if its login and password match those stored
24
+ # in config.yml, or if authentication is disabled. If authentication is
25
+ # turned on, then every request is authenticated, including between
26
+ # the nodes and the central server.
27
+ def authorize(login, password)
28
+ return true unless CloudCrowd.config[:http_authentication]
29
+ return CloudCrowd.config[:login] == login &&
30
+ CloudCrowd.config[:password] == password
31
+ end
32
+
33
+
34
+ private
35
+
36
+ # Provide a Rack Authorization object.
37
+ def auth
38
+ @auth ||= Rack::Auth::Basic::Request.new(request.env)
39
+ end
40
+
41
+ # Unauthorized requests will prompt the browser to provide credentials.
42
+ def unauthorized!(realm = Server.authorization_realm)
43
+ response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
44
+ halt 401, 'Authorization Required'
45
+ end
46
+
47
+ def bad_request!
48
+ halt 400, 'Bad Request'
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,25 @@
1
+ module CloudCrowd
2
+ module Helpers
3
+ module Resources
4
+
5
+ # Convenience method for responding with JSON. Sets the content-type,
6
+ # serializes, and allows empty responses.
7
+ def json(obj)
8
+ content_type :json
9
+ return status(204) && '' if obj.nil?
10
+ obj.to_json
11
+ end
12
+
13
+ # Lazy-fetch the job specified by <tt>job_id</tt>.
14
+ def current_job
15
+ @job ||= Job.find_by_id(params[:job_id]) or raise Sinatra::NotFound
16
+ end
17
+
18
+ # Lazy-fetch the WorkUnit specified by <tt>work_unit_id</tt>.
19
+ def current_work_unit
20
+ @work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,8 @@
1
+ require 'cloud_crowd/helpers/authorization'
2
+ require 'cloud_crowd/helpers/resources'
3
+
4
+ module CloudCrowd
5
+ module Helpers
6
+ include Authorization, Resources #, Rack::Utils
7
+ end
8
+ end
@@ -0,0 +1,19 @@
1
+ module CloudCrowd
2
+
3
+ # Pilfered in parts from the ActiveSupport::Inflector.
4
+ module Inflector
5
+
6
+ def self.camelize(word)
7
+ word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ end
9
+
10
+ def self.underscore(word)
11
+ word.to_s.gsub(/::/, '/').
12
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
13
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
14
+ tr("-", "_").
15
+ downcase
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,190 @@
1
+ module CloudCrowd
2
+
3
+ # A chunk of work that will be farmed out into many WorkUnits to be processed
4
+ # in parallel by each active CloudCrowd::Worker. Jobs are defined by a list
5
+ # of inputs (usually public urls to files), an action (the name of a script that
6
+ # CloudCrowd knows how to run), and, eventually a corresponding list of output.
7
+ class Job < ActiveRecord::Base
8
+ include ModelStatus
9
+
10
+ CLEANUP_GRACE_PERIOD = 7 # That's a week.
11
+
12
+ has_many :work_units, :dependent => :destroy
13
+
14
+ validates_presence_of :status, :inputs, :action, :options
15
+
16
+ before_validation_on_create :set_initial_status
17
+ after_create :queue_for_workers
18
+ before_destroy :cleanup_assets
19
+
20
+ # Jobs that were last updated more than N days ago.
21
+ named_scope :older_than, lambda {|num| {:conditions => ['updated_at < ?', num.days.ago]} }
22
+
23
+ # Create a Job from an incoming JSON request, and add it to the queue.
24
+ def self.create_from_request(h)
25
+ self.create(
26
+ :inputs => h['inputs'].to_json,
27
+ :action => h['action'],
28
+ :options => (h['options'] || {}).to_json,
29
+ :email => h['email'],
30
+ :callback_url => h['callback_url']
31
+ )
32
+ end
33
+
34
+ # Clean up all jobs beyond a certain age.
35
+ def self.cleanup_all(opts = {})
36
+ days = opts[:days] || CLEANUP_GRACE_PERIOD
37
+ self.complete.older_than(days).find_in_batches(:batch_size => 100) do |jobs|
38
+ jobs.each {|job| job.destroy }
39
+ end
40
+ end
41
+
42
+ # After work units are marked successful, we check to see if all of them have
43
+ # finished, if so, continue on to the next phase of the job.
44
+ def check_for_completion
45
+ return unless all_work_units_complete?
46
+ set_next_status
47
+ outs = gather_outputs_from_work_units
48
+ return queue_for_workers([outs]) if merging?
49
+ if complete?
50
+ update_attributes(:outputs => outs, :time => time_taken)
51
+ Thread.new { fire_callback } if callback_url
52
+ end
53
+ self
54
+ end
55
+
56
+ # Transition this Job's current status to the appropriate next one, based
57
+ # on the state of the WorkUnits and the nature of the Action.
58
+ def set_next_status
59
+ update_attribute(:status,
60
+ any_work_units_failed? ? FAILED :
61
+ self.splitting? ? PROCESSING :
62
+ self.mergeable? ? MERGING :
63
+ SUCCEEDED
64
+ )
65
+ end
66
+
67
+ # If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
68
+ # completion. The <tt>callback_url</tt> may include HTTP basic authentication,
69
+ # if you like:
70
+ # http://user:password@example.com/job_complete
71
+ # If the callback URL returns a '201 Created' HTTP status code, CloudCrowd
72
+ # will assume that the resource has been successfully created, and the Job
73
+ # will be cleaned up.
74
+ def fire_callback
75
+ begin
76
+ response = RestClient.post(callback_url, {:job => self.to_json})
77
+ Thread.new { self.destroy } if response && response.code == 201
78
+ rescue RestClient::Exception => e
79
+ puts "Job ##{id} failed to fire callback: #{callback_url}"
80
+ end
81
+ end
82
+
83
+ # Cleaning up after a job will remove all of its files from S3 or the
84
+ # filesystem. Destroying a Job will cleanup_assets first. Run this in a
85
+ # separate thread to get out of the transaction's way.
86
+ # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
87
+ def cleanup_assets
88
+ AssetStore.new.cleanup(self)
89
+ end
90
+
91
+ # Have all of the WorkUnits finished?
92
+ def all_work_units_complete?
93
+ self.work_units.incomplete.count <= 0
94
+ end
95
+
96
+ # Have any of the WorkUnits failed?
97
+ def any_work_units_failed?
98
+ self.work_units.failed.count > 0
99
+ end
100
+
101
+ # This job is splittable if its Action has a +split+ method.
102
+ def splittable?
103
+ self.action_class.public_instance_methods.map {|m| m.to_sym }.include? :split
104
+ end
105
+
106
+ # This job is done splitting if it's finished with its splitting work units.
107
+ def done_splitting?
108
+ splittable? && work_units.splitting.count <= 0
109
+ end
110
+
111
+ # This job is mergeable if its Action has a +merge+ method.
112
+ def mergeable?
113
+ self.processing? && self.action_class.public_instance_methods.map {|m| m.to_sym }.include?(:merge)
114
+ end
115
+
116
+ # Retrieve the class for this Job's Action.
117
+ def action_class
118
+ @action_class ||= CloudCrowd.actions[self.action]
119
+ return @action_class if @action_class
120
+ raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
121
+ end
122
+
123
+ # How complete is this Job?
124
+ # Unfortunately, with the current processing sequence, the percent_complete
125
+ # can pull a fast one and go backwards. This happens when there's a single
126
+ # large input that takes a long time to split, and when it finally does it
127
+ # creates a whole swarm of work units. This seems unavoidable.
128
+ def percent_complete
129
+ return 99 if merging?
130
+ return 100 if complete?
131
+ unit_count = work_units.count
132
+ return 100 if unit_count <= 0
133
+ (work_units.complete.count / unit_count.to_f * 100).round
134
+ end
135
+
136
+ # How long has this Job taken?
137
+ def time_taken
138
+ return self.time if self.time
139
+ Time.now - self.created_at
140
+ end
141
+
142
+ # Generate a stable 8-bit Hex color code, based on the Job's id.
143
+ def color
144
+ @color ||= Digest::MD5.hexdigest(self.id.to_s)[-7...-1]
145
+ end
146
+
147
+ # A JSON representation of this job includes the statuses of its component
148
+ # WorkUnits, as well as any completed outputs.
149
+ def to_json(opts={})
150
+ atts = {
151
+ 'id' => id,
152
+ 'color' => color,
153
+ 'status' => display_status,
154
+ 'percent_complete' => percent_complete,
155
+ 'work_units' => work_units.count,
156
+ 'time_taken' => time_taken
157
+ }
158
+ atts['outputs'] = JSON.parse(outputs) if outputs
159
+ atts['email'] = email if email
160
+ atts.to_json
161
+ end
162
+
163
+
164
+ private
165
+
166
+ # When the WorkUnits are all finished, gather all their outputs together
167
+ # before removing them from the database entirely. Returns their merged JSON.
168
+ def gather_outputs_from_work_units
169
+ units = self.work_units.complete
170
+ outs = self.work_units.complete.map {|u| u.parsed_output }
171
+ self.work_units.complete.destroy_all
172
+ outs.to_json
173
+ end
174
+
175
+ # When starting a new job, or moving to a new stage, split up the inputs
176
+ # into WorkUnits, and queue them. Workers will start picking them up right
177
+ # away.
178
+ def queue_for_workers(input=nil)
179
+ input ||= JSON.parse(self.inputs)
180
+ input.each {|i| WorkUnit.start(self, action, i, status) }
181
+ self
182
+ end
183
+
184
+ # A Job starts out either splitting or processing, depending on its action.
185
+ def set_initial_status
186
+ self.status = self.splittable? ? SPLITTING : PROCESSING
187
+ end
188
+
189
+ end
190
+ end
@@ -0,0 +1,107 @@
1
+ module CloudCrowd
2
+
3
+ # A NodeRecord is the central server's record of a Node running remotely. We
4
+ # can use it to assign WorkUnits to the Node, and keep track of its status.
5
+ # When a Node exits, it destroys this record.
6
+ class NodeRecord < ActiveRecord::Base
7
+
8
+ has_many :work_units
9
+
10
+ validates_presence_of :host, :ip_address, :port, :enabled_actions
11
+
12
+ after_destroy :redistribute_work_units
13
+
14
+ # Available Nodes haven't used up their maxiumum number of workers yet.
15
+ named_scope :available, {
16
+ :conditions => ['(max_workers is null or (select count(*) from work_units where node_record_id = node_records.id) < max_workers)'],
17
+ :order => 'updated_at asc'
18
+ }
19
+
20
+ # Register a Node with the central server. Currently this only happens at
21
+ # Node startup.
22
+ def self.check_in(params, request)
23
+ attrs = {
24
+ :ip_address => request.ip,
25
+ :port => params[:port],
26
+ :busy => params[:busy],
27
+ :max_workers => params[:max_workers],
28
+ :enabled_actions => params[:enabled_actions]
29
+ }
30
+ self.find_or_create_by_host(params[:host]).update_attributes!(attrs)
31
+ end
32
+
33
+ # Dispatch a WorkUnit to this node. Places the node at back at the end of
34
+ # the rotation. If we fail to send the WorkUnit, we consider the node to be
35
+ # down, and remove this record, freeing up all of its checked-out work units.
36
+ # If the Node responds that it's overloaded, we mark it as busy. Returns
37
+ # true if the WorkUnit was dispatched successfully.
38
+ def send_work_unit(unit)
39
+ result = node['/work'].post(:work_unit => unit.to_json)
40
+ unit.assign_to(self, JSON.parse(result)['pid'])
41
+ touch && true
42
+ rescue RestClient::RequestFailed => e
43
+ raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
44
+ update_attribute(:busy, true) && false
45
+ rescue RestClient::Exception, Errno::ECONNREFUSED, Timeout::Error
46
+ # Couldn't post to node, assume it's gone away.
47
+ destroy && false
48
+ end
49
+
50
+ # What Actions is this Node able to run?
51
+ def actions
52
+ @actions ||= enabled_actions.split(',')
53
+ end
54
+
55
+ # Is this Node too busy for more work? Determined by number of workers, or
56
+ # the Node's load average, as configured in config.yml.
57
+ def busy?
58
+ busy || (max_workers && work_units.count >= max_workers)
59
+ end
60
+
61
+ # The URL at which this Node may be reached.
62
+ # TODO: Make sure that the host actually has externally accessible DNS.
63
+ def url
64
+ @url ||= "http://#{host}:#{port}"
65
+ end
66
+
67
+ # Keep a RestClient::Resource handy for contacting the Node, including
68
+ # HTTP authentication, if configured.
69
+ def node
70
+ @node ||= RestClient::Resource.new(url, CloudCrowd.client_options)
71
+ end
72
+
73
+ # The printable status of the Node.
74
+ def display_status
75
+ busy? ? 'busy' : 'available'
76
+ end
77
+
78
+ # A list of the process ids of the workers currently being run by the Node.
79
+ def worker_pids
80
+ work_units.all(:select => 'worker_pid').map(&:worker_pid)
81
+ end
82
+
83
+ # Release all of this Node's WorkUnits for other nodes to take.
84
+ def release_work_units
85
+ WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
86
+ end
87
+
88
+ # The JSON representation of a NodeRecord includes its worker_pids.
89
+ def to_json(opts={})
90
+ { 'host' => host,
91
+ 'workers' => worker_pids,
92
+ 'status' => display_status
93
+ }.to_json
94
+ end
95
+
96
+
97
+ private
98
+
99
+ # When a Node exits, release its WorkUnits and redistribute them to others.
100
+ # Redistribute in a separate thread to avoid delaying shutdown.
101
+ def redistribute_work_units
102
+ release_work_units
103
+ Thread.new { WorkUnit.distribute_to_nodes }
104
+ end
105
+
106
+ end
107
+ end
@@ -0,0 +1,170 @@
1
+ module CloudCrowd
2
+
3
+ # A WorkUnit is an atomic chunk of work from a job, processing a single input
4
+ # through a single action. The WorkUnits are run in parallel, with each worker
5
+ # daemon processing one at a time. The splitting and merging stages of a job
6
+ # are each run as a single WorkUnit.
7
+ class WorkUnit < ActiveRecord::Base
8
+ include ModelStatus
9
+
10
+ # We use a random number in (0...MAX_RESERVATION) to reserve work units.
11
+ # The size of the maximum signed integer in MySQL -- SQLite has no limit.
12
+ MAX_RESERVATION = 2147483647
13
+
14
+ # We only reserve a certain number of WorkUnits in a single go, to avoid
15
+ # reserving the entire table.
16
+ RESERVATION_LIMIT = 25
17
+
18
+ belongs_to :job
19
+ belongs_to :node_record
20
+
21
+ validates_presence_of :job_id, :status, :input, :action
22
+
23
+ # Available WorkUnits are waiting to be distributed to Nodes for processing.
24
+ named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
25
+ # Reserved WorkUnits have been marked for distribution by a central server process.
26
+ named_scope :reserved, lambda {|reservation|
27
+ {:conditions => {:reservation => reservation}, :order => 'updated_at asc'}
28
+ }
29
+
30
+ # Attempt to send a list of WorkUnits to nodes with available capacity.
31
+ # A single central server process stops the same WorkUnit from being
32
+ # distributed to multiple nodes by reserving it first. The algorithm used
33
+ # should be lock-free.
34
+ #
35
+ # We reserve WorkUnits for this process in chunks of RESERVATION_LIMIT size,
36
+ # and try to match them to Nodes that are capable of handling the Action.
37
+ # WorkUnits get removed from the availability list when they are
38
+ # successfully sent, and Nodes get removed when they are busy or have the
39
+ # action in question disabled.
40
+ def self.distribute_to_nodes
41
+ reservation = nil
42
+ loop do
43
+ return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
44
+ work_units = WorkUnit.reserved(reservation)
45
+ available_nodes = NodeRecord.available
46
+ while node = available_nodes.shift and unit = work_units.shift do
47
+ if node.actions.include? unit.action
48
+ if node.send_work_unit(unit)
49
+ available_nodes.push(node) unless node.busy?
50
+ next
51
+ end
52
+ end
53
+ work_units.push(unit)
54
+ end
55
+ return if work_units.any? || available_nodes.empty?
56
+ end
57
+ ensure
58
+ WorkUnit.cancel_reservations(reservation) if reservation
59
+ end
60
+
61
+ # Reserves all available WorkUnits for this process. Returns false if there
62
+ # were none available.
63
+ def self.reserve_available(options={})
64
+ reservation = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
65
+ any = WorkUnit.available.update_all("reservation = #{reservation}", nil, options) > 0
66
+ any && reservation
67
+ end
68
+
69
+ # Cancels all outstanding WorkUnit reservations for this process.
70
+ def self.cancel_reservations(reservation)
71
+ WorkUnit.reserved(reservation).update_all('reservation = null')
72
+ end
73
+
74
+ # Cancels all outstanding WorkUnit reservations for all processes. (Useful
75
+ # in the console for debugging.)
76
+ def self.cancel_all_reservations
77
+ WorkUnit.update_all('reservation = null')
78
+ end
79
+
80
+ # Look up a WorkUnit by the worker that's currently processing it. Specified
81
+ # by <tt>pid@host</tt>.
82
+ def self.find_by_worker_name(name)
83
+ pid, host = name.split('@')
84
+ node = NodeRecord.find_by_host(host)
85
+ node && node.work_units.find_by_worker_pid(pid)
86
+ end
87
+
88
+ # Convenience method for starting a new WorkUnit.
89
+ def self.start(job, action, input, status)
90
+ input = input.to_json unless input.is_a? String
91
+ self.create(:job => job, :action => action, :input => input, :status => status)
92
+ end
93
+
94
+ # Mark this unit as having finished successfully.
95
+ # Splitting work units are handled differently (an optimization) -- they
96
+ # immediately fire off all of their resulting WorkUnits for processing,
97
+ # without waiting for the rest of their splitting cousins to complete.
98
+ def finish(result, time_taken)
99
+ if splitting?
100
+ [parsed_output(result)].flatten.each do |new_input|
101
+ WorkUnit.start(job, action, new_input, PROCESSING)
102
+ end
103
+ self.destroy
104
+ job.set_next_status if job && job.done_splitting?
105
+ else
106
+ update_attributes({
107
+ :status => SUCCEEDED,
108
+ :node_record => nil,
109
+ :worker_pid => nil,
110
+ :attempts => attempts + 1,
111
+ :output => result,
112
+ :time => time_taken
113
+ })
114
+ job && job.check_for_completion
115
+ end
116
+ end
117
+
118
+ # Mark this unit as having failed. May attempt a retry.
119
+ def fail(output, time_taken)
120
+ tries = self.attempts + 1
121
+ return try_again if tries < CloudCrowd.config[:work_unit_retries]
122
+ update_attributes({
123
+ :status => FAILED,
124
+ :node_record => nil,
125
+ :worker_pid => nil,
126
+ :attempts => tries,
127
+ :output => output,
128
+ :time => time_taken
129
+ })
130
+ job && job.check_for_completion
131
+ end
132
+
133
+ # Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
134
+ def try_again
135
+ update_attributes({
136
+ :node_record => nil,
137
+ :worker_pid => nil,
138
+ :attempts => self.attempts + 1
139
+ })
140
+ end
141
+
142
+ # When a Node checks out a WorkUnit, establish the connection between
143
+ # WorkUnit and NodeRecord and record the worker_pid.
144
+ def assign_to(node_record, worker_pid)
145
+ update_attributes!(:node_record => node_record, :worker_pid => worker_pid)
146
+ end
147
+
148
+ # All output needs to be wrapped in a JSON object for consistency
149
+ # (unfortunately, JSON.parse needs the top-level to be an object or array).
150
+ # Convenience method to provide the parsed version.
151
+ def parsed_output(out = self.output)
152
+ JSON.parse(out)['output']
153
+ end
154
+
155
+ # The JSON representation of a WorkUnit shares the Job's options with all
156
+ # its cousin WorkUnits.
157
+ def to_json
158
+ {
159
+ 'id' => self.id,
160
+ 'job_id' => self.job_id,
161
+ 'input' => self.input,
162
+ 'attempts' => self.attempts,
163
+ 'action' => self.action,
164
+ 'options' => JSON.parse(self.job.options),
165
+ 'status' => self.status
166
+ }.to_json
167
+ end
168
+
169
+ end
170
+ end
@@ -0,0 +1,40 @@
1
+ module CloudCrowd
2
+
3
+ # Adds named scopes and query methods for every CloudCrowd status to
4
+ # both Jobs and WorkUnits.
5
+ module ModelStatus
6
+
7
+ def self.included(klass)
8
+
9
+ klass.class_eval do
10
+ # Note that COMPLETE and INCOMPLETE are unions of other states.
11
+ named_scope 'processing', :conditions => {:status => PROCESSING}
12
+ named_scope 'succeeded', :conditions => {:status => SUCCEEDED}
13
+ named_scope 'failed', :conditions => {:status => FAILED}
14
+ named_scope 'splitting', :conditions => {:status => SPLITTING}
15
+ named_scope 'merging', :conditions => {:status => MERGING}
16
+ named_scope 'complete', :conditions => {:status => COMPLETE}
17
+ named_scope 'incomplete', :conditions => {:status => INCOMPLETE}
18
+ end
19
+
20
+ end
21
+
22
+ def processing?; self.status == PROCESSING; end
23
+ def succeeded?; self.status == SUCCEEDED; end
24
+ def failed?; self.status == FAILED; end
25
+ def splitting?; self.status == SPLITTING; end
26
+ def merging?; self.status == MERGING; end
27
+ def complete?; COMPLETE.include?(self.status); end
28
+ def incomplete?; INCOMPLETE.include?(self.status); end
29
+
30
+ # Get the displayable status name of the model's status code.
31
+ def display_status
32
+ CloudCrowd.display_status(self.status)
33
+ end
34
+
35
+ end
36
+ end
37
+
38
+ require 'cloud_crowd/models/job'
39
+ require 'cloud_crowd/models/node_record'
40
+ require 'cloud_crowd/models/work_unit'