mooktakim-cloud-crowd 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/EPIGRAPHS +17 -0
  2. data/LICENSE +22 -0
  3. data/README +93 -0
  4. data/actions/graphics_magick.rb +43 -0
  5. data/actions/process_pdfs.rb +92 -0
  6. data/actions/word_count.rb +16 -0
  7. data/bin/crowd +5 -0
  8. data/config/config.example.ru +23 -0
  9. data/config/config.example.yml +55 -0
  10. data/config/database.example.yml +16 -0
  11. data/examples/graphics_magick_example.rb +44 -0
  12. data/examples/process_pdfs_example.rb +40 -0
  13. data/examples/word_count_example.rb +42 -0
  14. data/lib/cloud-crowd.rb +188 -0
  15. data/lib/cloud_crowd/action.rb +125 -0
  16. data/lib/cloud_crowd/asset_store/filesystem_store.rb +39 -0
  17. data/lib/cloud_crowd/asset_store/s3_store.rb +43 -0
  18. data/lib/cloud_crowd/asset_store.rb +41 -0
  19. data/lib/cloud_crowd/command_line.rb +242 -0
  20. data/lib/cloud_crowd/exceptions.rb +46 -0
  21. data/lib/cloud_crowd/helpers/authorization.rb +52 -0
  22. data/lib/cloud_crowd/helpers/resources.rb +25 -0
  23. data/lib/cloud_crowd/helpers.rb +8 -0
  24. data/lib/cloud_crowd/inflector.rb +19 -0
  25. data/lib/cloud_crowd/models/job.rb +190 -0
  26. data/lib/cloud_crowd/models/node_record.rb +107 -0
  27. data/lib/cloud_crowd/models/work_unit.rb +170 -0
  28. data/lib/cloud_crowd/models.rb +40 -0
  29. data/lib/cloud_crowd/node.rb +199 -0
  30. data/lib/cloud_crowd/schema.rb +50 -0
  31. data/lib/cloud_crowd/server.rb +123 -0
  32. data/lib/cloud_crowd/worker.rb +149 -0
  33. data/mooktakim-cloud-crowd.gemspec +116 -0
  34. data/public/css/admin_console.css +243 -0
  35. data/public/css/reset.css +42 -0
  36. data/public/images/bullet_green.png +0 -0
  37. data/public/images/bullet_white.png +0 -0
  38. data/public/images/cloud_hand.png +0 -0
  39. data/public/images/header_back.png +0 -0
  40. data/public/images/logo.png +0 -0
  41. data/public/images/queue_fill.png +0 -0
  42. data/public/images/server.png +0 -0
  43. data/public/images/server_busy.png +0 -0
  44. data/public/images/server_error.png +0 -0
  45. data/public/images/sidebar_bottom.png +0 -0
  46. data/public/images/sidebar_top.png +0 -0
  47. data/public/images/worker_info.png +0 -0
  48. data/public/images/worker_info_loading.gif +0 -0
  49. data/public/js/admin_console.js +197 -0
  50. data/public/js/excanvas.js +1 -0
  51. data/public/js/flot.js +1 -0
  52. data/public/js/jquery.js +19 -0
  53. data/test/acceptance/test_failing_work_units.rb +33 -0
  54. data/test/acceptance/test_node.rb +20 -0
  55. data/test/acceptance/test_server.rb +66 -0
  56. data/test/acceptance/test_word_count.rb +40 -0
  57. data/test/blueprints.rb +25 -0
  58. data/test/config/actions/failure_testing.rb +13 -0
  59. data/test/config/config.ru +17 -0
  60. data/test/config/config.yml +6 -0
  61. data/test/config/database.yml +3 -0
  62. data/test/test_helper.rb +19 -0
  63. data/test/unit/test_action.rb +70 -0
  64. data/test/unit/test_configuration.rb +48 -0
  65. data/test/unit/test_job.rb +103 -0
  66. data/test/unit/test_node.rb +41 -0
  67. data/test/unit/test_node_record.rb +42 -0
  68. data/test/unit/test_work_unit.rb +53 -0
  69. data/test/unit/test_worker.rb +48 -0
  70. data/views/operations_center.erb +82 -0
  71. metadata +290 -0
@@ -0,0 +1,46 @@
1
+ module CloudCrowd
2
+
3
+ # Base Error class which all custom CloudCrowd exceptions inherit from.
4
+ # Rescuing CloudCrowd::Error (or RuntimeError) will get all custom exceptions.
5
+ # If your cluster is correctly configured, you should never expect to see any
6
+ # of these.
7
+ class Error < RuntimeError
8
+
9
+ # ActionNotFound is raised when a job is created for an action that doesn't
10
+ # exist.
11
+ class ActionNotFound < Error
12
+ end
13
+
14
+ # StorageNotFound is raised when config.yml specifies a storage back-end that
15
+ # doesn't exist.
16
+ class StorageNotFound < Error
17
+ end
18
+
19
+ # If the AssetStore can't write to its scratch directory.
20
+ class StorageNotWritable < Error
21
+ end
22
+
23
+ # StatusUnspecified is raised when a WorkUnit returns without a valid
24
+ # status code.
25
+ class StatusUnspecified < Error
26
+ end
27
+
28
+ # MissingConfiguration is raised when we're trying to run a method that
29
+ # needs configuration not present in config.yml.
30
+ class MissingConfiguration < Error
31
+ end
32
+
33
+ # CommandFailed is raised when an action shells out, and the external
34
+ # command returns a non-zero exit code.
35
+ class CommandFailed < Error
36
+ attr_reader :exit_code
37
+
38
+ def initialize(message, exit_code)
39
+ super(message)
40
+ @exit_code = exit_code
41
+ end
42
+ end
43
+
44
+ end
45
+
46
+ end
@@ -0,0 +1,52 @@
1
+ module CloudCrowd
2
+ module Helpers
3
+
4
+ # Authorization takes after sinatra-authorization... See
5
+ # http://github.com/integrity/sinatra-authorization
6
+ # for the original.
7
+ module Authorization
8
+
9
+ # Ensure that the request includes the correct credentials.
10
+ def login_required
11
+ return if authorized?
12
+ unauthorized! unless auth.provided?
13
+ bad_request! unless auth.basic?
14
+ unauthorized! unless authorize(*auth.credentials)
15
+ request.env['REMOTE_USER'] = auth.username
16
+ end
17
+
18
+ # Has the request been authenticated?
19
+ def authorized?
20
+ !!request.env['REMOTE_USER']
21
+ end
22
+
23
+ # A request is authorized if its login and password match those stored
24
+ # in config.yml, or if authentication is disabled. If authentication is
25
+ # turned on, then every request is authenticated, including between
26
+ # the nodes and the central server.
27
+ def authorize(login, password)
28
+ return true unless CloudCrowd.config[:http_authentication]
29
+ return CloudCrowd.config[:login] == login &&
30
+ CloudCrowd.config[:password] == password
31
+ end
32
+
33
+
34
+ private
35
+
36
+ # Provide a Rack Authorization object.
37
+ def auth
38
+ @auth ||= Rack::Auth::Basic::Request.new(request.env)
39
+ end
40
+
41
+ # Unauthorized requests will prompt the browser to provide credentials.
42
+ def unauthorized!(realm = Server.authorization_realm)
43
+ response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
44
+ halt 401, 'Authorization Required'
45
+ end
46
+
47
+ def bad_request!
48
+ halt 400, 'Bad Request'
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,25 @@
1
+ module CloudCrowd
2
+ module Helpers
3
+ module Resources
4
+
5
+ # Convenience method for responding with JSON. Sets the content-type,
6
+ # serializes, and allows empty responses.
7
+ def json(obj)
8
+ content_type :json
9
+ return status(204) && '' if obj.nil?
10
+ obj.to_json
11
+ end
12
+
13
+ # Lazy-fetch the job specified by <tt>job_id</tt>.
14
+ def current_job
15
+ @job ||= Job.find_by_id(params[:job_id]) or raise Sinatra::NotFound
16
+ end
17
+
18
+ # Lazy-fetch the WorkUnit specified by <tt>work_unit_id</tt>.
19
+ def current_work_unit
20
+ @work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,8 @@
1
+ require 'cloud_crowd/helpers/authorization'
2
+ require 'cloud_crowd/helpers/resources'
3
+
4
+ module CloudCrowd
5
+ module Helpers
6
+ include Authorization, Resources #, Rack::Utils
7
+ end
8
+ end
@@ -0,0 +1,19 @@
1
+ module CloudCrowd
2
+
3
+ # Pilfered in parts from the ActiveSupport::Inflector.
4
+ module Inflector
5
+
6
+ def self.camelize(word)
7
+ word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ end
9
+
10
+ def self.underscore(word)
11
+ word.to_s.gsub(/::/, '/').
12
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
13
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
14
+ tr("-", "_").
15
+ downcase
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,190 @@
1
+ module CloudCrowd
2
+
3
+ # A chunk of work that will be farmed out into many WorkUnits to be processed
4
+ # in parallel by each active CloudCrowd::Worker. Jobs are defined by a list
5
+ # of inputs (usually public urls to files), an action (the name of a script that
6
+ # CloudCrowd knows how to run), and, eventually a corresponding list of output.
7
+ class Job < ActiveRecord::Base
8
+ include ModelStatus
9
+
10
+ CLEANUP_GRACE_PERIOD = 7 # That's a week.
11
+
12
+ has_many :work_units, :dependent => :destroy
13
+
14
+ validates_presence_of :status, :inputs, :action, :options
15
+
16
+ before_validation_on_create :set_initial_status
17
+ after_create :queue_for_workers
18
+ before_destroy :cleanup_assets
19
+
20
+ # Jobs that were last updated more than N days ago.
21
+ named_scope :older_than, lambda {|num| {:conditions => ['updated_at < ?', num.days.ago]} }
22
+
23
+ # Create a Job from an incoming JSON request, and add it to the queue.
24
+ def self.create_from_request(h)
25
+ self.create(
26
+ :inputs => h['inputs'].to_json,
27
+ :action => h['action'],
28
+ :options => (h['options'] || {}).to_json,
29
+ :email => h['email'],
30
+ :callback_url => h['callback_url']
31
+ )
32
+ end
33
+
34
+ # Clean up all jobs beyond a certain age.
35
+ def self.cleanup_all(opts = {})
36
+ days = opts[:days] || CLEANUP_GRACE_PERIOD
37
+ self.complete.older_than(days).find_in_batches(:batch_size => 100) do |jobs|
38
+ jobs.each {|job| job.destroy }
39
+ end
40
+ end
41
+
42
+ # After work units are marked successful, we check to see if all of them have
43
+ # finished, if so, continue on to the next phase of the job.
44
+ def check_for_completion
45
+ return unless all_work_units_complete?
46
+ set_next_status
47
+ outs = gather_outputs_from_work_units
48
+ return queue_for_workers([outs]) if merging?
49
+ if complete?
50
+ update_attributes(:outputs => outs, :time => time_taken)
51
+ Thread.new { fire_callback } if callback_url
52
+ end
53
+ self
54
+ end
55
+
56
+ # Transition this Job's current status to the appropriate next one, based
57
+ # on the state of the WorkUnits and the nature of the Action.
58
+ def set_next_status
59
+ update_attribute(:status,
60
+ any_work_units_failed? ? FAILED :
61
+ self.splitting? ? PROCESSING :
62
+ self.mergeable? ? MERGING :
63
+ SUCCEEDED
64
+ )
65
+ end
66
+
67
+ # If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
68
+ # completion. The <tt>callback_url</tt> may include HTTP basic authentication,
69
+ # if you like:
70
+ # http://user:password@example.com/job_complete
71
+ # If the callback URL returns a '201 Created' HTTP status code, CloudCrowd
72
+ # will assume that the resource has been successfully created, and the Job
73
+ # will be cleaned up.
74
+ def fire_callback
75
+ begin
76
+ response = RestClient.post(callback_url, {:job => self.to_json})
77
+ Thread.new { self.destroy } if response && response.code == 201
78
+ rescue RestClient::Exception => e
79
+ puts "Job ##{id} failed to fire callback: #{callback_url}"
80
+ end
81
+ end
82
+
83
+ # Cleaning up after a job will remove all of its files from S3 or the
84
+ # filesystem. Destroying a Job will cleanup_assets first. Run this in a
85
+ # separate thread to get out of the transaction's way.
86
+ # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
87
+ def cleanup_assets
88
+ AssetStore.new.cleanup(self)
89
+ end
90
+
91
+ # Have all of the WorkUnits finished?
92
+ def all_work_units_complete?
93
+ self.work_units.incomplete.count <= 0
94
+ end
95
+
96
+ # Have any of the WorkUnits failed?
97
+ def any_work_units_failed?
98
+ self.work_units.failed.count > 0
99
+ end
100
+
101
+ # This job is splittable if its Action has a +split+ method.
102
+ def splittable?
103
+ self.action_class.public_instance_methods.map {|m| m.to_sym }.include? :split
104
+ end
105
+
106
+ # This job is done splitting if it's finished with its splitting work units.
107
+ def done_splitting?
108
+ splittable? && work_units.splitting.count <= 0
109
+ end
110
+
111
+ # This job is mergeable if its Action has a +merge+ method.
112
+ def mergeable?
113
+ self.processing? && self.action_class.public_instance_methods.map {|m| m.to_sym }.include?(:merge)
114
+ end
115
+
116
+ # Retrieve the class for this Job's Action.
117
+ def action_class
118
+ @action_class ||= CloudCrowd.actions[self.action]
119
+ return @action_class if @action_class
120
+ raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
121
+ end
122
+
123
+ # How complete is this Job?
124
+ # Unfortunately, with the current processing sequence, the percent_complete
125
+ # can pull a fast one and go backwards. This happens when there's a single
126
+ # large input that takes a long time to split, and when it finally does it
127
+ # creates a whole swarm of work units. This seems unavoidable.
128
+ def percent_complete
129
+ return 99 if merging?
130
+ return 100 if complete?
131
+ unit_count = work_units.count
132
+ return 100 if unit_count <= 0
133
+ (work_units.complete.count / unit_count.to_f * 100).round
134
+ end
135
+
136
+ # How long has this Job taken?
137
+ def time_taken
138
+ return self.time if self.time
139
+ Time.now - self.created_at
140
+ end
141
+
142
+ # Generate a stable 8-bit Hex color code, based on the Job's id.
143
+ def color
144
+ @color ||= Digest::MD5.hexdigest(self.id.to_s)[-7...-1]
145
+ end
146
+
147
+ # A JSON representation of this job includes the statuses of its component
148
+ # WorkUnits, as well as any completed outputs.
149
+ def to_json(opts={})
150
+ atts = {
151
+ 'id' => id,
152
+ 'color' => color,
153
+ 'status' => display_status,
154
+ 'percent_complete' => percent_complete,
155
+ 'work_units' => work_units.count,
156
+ 'time_taken' => time_taken
157
+ }
158
+ atts['outputs'] = JSON.parse(outputs) if outputs
159
+ atts['email'] = email if email
160
+ atts.to_json
161
+ end
162
+
163
+
164
+ private
165
+
166
+ # When the WorkUnits are all finished, gather all their outputs together
167
+ # before removing them from the database entirely. Returns their merged JSON.
168
+ def gather_outputs_from_work_units
169
+ units = self.work_units.complete
170
+ outs = self.work_units.complete.map {|u| u.parsed_output }
171
+ self.work_units.complete.destroy_all
172
+ outs.to_json
173
+ end
174
+
175
+ # When starting a new job, or moving to a new stage, split up the inputs
176
+ # into WorkUnits, and queue them. Workers will start picking them up right
177
+ # away.
178
+ def queue_for_workers(input=nil)
179
+ input ||= JSON.parse(self.inputs)
180
+ input.each {|i| WorkUnit.start(self, action, i, status) }
181
+ self
182
+ end
183
+
184
+ # A Job starts out either splitting or processing, depending on its action.
185
+ def set_initial_status
186
+ self.status = self.splittable? ? SPLITTING : PROCESSING
187
+ end
188
+
189
+ end
190
+ end
@@ -0,0 +1,107 @@
1
+ module CloudCrowd
2
+
3
+ # A NodeRecord is the central server's record of a Node running remotely. We
4
+ # can use it to assign WorkUnits to the Node, and keep track of its status.
5
+ # When a Node exits, it destroys this record.
6
+ class NodeRecord < ActiveRecord::Base
7
+
8
+ has_many :work_units
9
+
10
+ validates_presence_of :host, :ip_address, :port, :enabled_actions
11
+
12
+ after_destroy :redistribute_work_units
13
+
14
+ # Available Nodes haven't used up their maxiumum number of workers yet.
15
+ named_scope :available, {
16
+ :conditions => ['(max_workers is null or (select count(*) from work_units where node_record_id = node_records.id) < max_workers)'],
17
+ :order => 'updated_at asc'
18
+ }
19
+
20
+ # Register a Node with the central server. Currently this only happens at
21
+ # Node startup.
22
+ def self.check_in(params, request)
23
+ attrs = {
24
+ :ip_address => request.ip,
25
+ :port => params[:port],
26
+ :busy => params[:busy],
27
+ :max_workers => params[:max_workers],
28
+ :enabled_actions => params[:enabled_actions]
29
+ }
30
+ self.find_or_create_by_host(params[:host]).update_attributes!(attrs)
31
+ end
32
+
33
+ # Dispatch a WorkUnit to this node. Places the node at back at the end of
34
+ # the rotation. If we fail to send the WorkUnit, we consider the node to be
35
+ # down, and remove this record, freeing up all of its checked-out work units.
36
+ # If the Node responds that it's overloaded, we mark it as busy. Returns
37
+ # true if the WorkUnit was dispatched successfully.
38
+ def send_work_unit(unit)
39
+ result = node['/work'].post(:work_unit => unit.to_json)
40
+ unit.assign_to(self, JSON.parse(result)['pid'])
41
+ touch && true
42
+ rescue RestClient::RequestFailed => e
43
+ raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
44
+ update_attribute(:busy, true) && false
45
+ rescue RestClient::Exception, Errno::ECONNREFUSED, Timeout::Error
46
+ # Couldn't post to node, assume it's gone away.
47
+ destroy && false
48
+ end
49
+
50
+ # What Actions is this Node able to run?
51
+ def actions
52
+ @actions ||= enabled_actions.split(',')
53
+ end
54
+
55
+ # Is this Node too busy for more work? Determined by number of workers, or
56
+ # the Node's load average, as configured in config.yml.
57
+ def busy?
58
+ busy || (max_workers && work_units.count >= max_workers)
59
+ end
60
+
61
+ # The URL at which this Node may be reached.
62
+ # TODO: Make sure that the host actually has externally accessible DNS.
63
+ def url
64
+ @url ||= "http://#{host}:#{port}"
65
+ end
66
+
67
+ # Keep a RestClient::Resource handy for contacting the Node, including
68
+ # HTTP authentication, if configured.
69
+ def node
70
+ @node ||= RestClient::Resource.new(url, CloudCrowd.client_options)
71
+ end
72
+
73
+ # The printable status of the Node.
74
+ def display_status
75
+ busy? ? 'busy' : 'available'
76
+ end
77
+
78
+ # A list of the process ids of the workers currently being run by the Node.
79
+ def worker_pids
80
+ work_units.all(:select => 'worker_pid').map(&:worker_pid)
81
+ end
82
+
83
+ # Release all of this Node's WorkUnits for other nodes to take.
84
+ def release_work_units
85
+ WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
86
+ end
87
+
88
+ # The JSON representation of a NodeRecord includes its worker_pids.
89
+ def to_json(opts={})
90
+ { 'host' => host,
91
+ 'workers' => worker_pids,
92
+ 'status' => display_status
93
+ }.to_json
94
+ end
95
+
96
+
97
+ private
98
+
99
+ # When a Node exits, release its WorkUnits and redistribute them to others.
100
+ # Redistribute in a separate thread to avoid delaying shutdown.
101
+ def redistribute_work_units
102
+ release_work_units
103
+ Thread.new { WorkUnit.distribute_to_nodes }
104
+ end
105
+
106
+ end
107
+ end
@@ -0,0 +1,170 @@
1
+ module CloudCrowd
2
+
3
+ # A WorkUnit is an atomic chunk of work from a job, processing a single input
4
+ # through a single action. The WorkUnits are run in parallel, with each worker
5
+ # daemon processing one at a time. The splitting and merging stages of a job
6
+ # are each run as a single WorkUnit.
7
+ class WorkUnit < ActiveRecord::Base
8
+ include ModelStatus
9
+
10
+ # We use a random number in (0...MAX_RESERVATION) to reserve work units.
11
+ # The size of the maximum signed integer in MySQL -- SQLite has no limit.
12
+ MAX_RESERVATION = 2147483647
13
+
14
+ # We only reserve a certain number of WorkUnits in a single go, to avoid
15
+ # reserving the entire table.
16
+ RESERVATION_LIMIT = 25
17
+
18
+ belongs_to :job
19
+ belongs_to :node_record
20
+
21
+ validates_presence_of :job_id, :status, :input, :action
22
+
23
+ # Available WorkUnits are waiting to be distributed to Nodes for processing.
24
+ named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
25
+ # Reserved WorkUnits have been marked for distribution by a central server process.
26
+ named_scope :reserved, lambda {|reservation|
27
+ {:conditions => {:reservation => reservation}, :order => 'updated_at asc'}
28
+ }
29
+
30
+ # Attempt to send a list of WorkUnits to nodes with available capacity.
31
+ # A single central server process stops the same WorkUnit from being
32
+ # distributed to multiple nodes by reserving it first. The algorithm used
33
+ # should be lock-free.
34
+ #
35
+ # We reserve WorkUnits for this process in chunks of RESERVATION_LIMIT size,
36
+ # and try to match them to Nodes that are capable of handling the Action.
37
+ # WorkUnits get removed from the availability list when they are
38
+ # successfully sent, and Nodes get removed when they are busy or have the
39
+ # action in question disabled.
40
+ def self.distribute_to_nodes
41
+ reservation = nil
42
+ loop do
43
+ return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
44
+ work_units = WorkUnit.reserved(reservation)
45
+ available_nodes = NodeRecord.available
46
+ while node = available_nodes.shift and unit = work_units.shift do
47
+ if node.actions.include? unit.action
48
+ if node.send_work_unit(unit)
49
+ available_nodes.push(node) unless node.busy?
50
+ next
51
+ end
52
+ end
53
+ work_units.push(unit)
54
+ end
55
+ return if work_units.any? || available_nodes.empty?
56
+ end
57
+ ensure
58
+ WorkUnit.cancel_reservations(reservation) if reservation
59
+ end
60
+
61
+ # Reserves all available WorkUnits for this process. Returns false if there
62
+ # were none available.
63
+ def self.reserve_available(options={})
64
+ reservation = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
65
+ any = WorkUnit.available.update_all("reservation = #{reservation}", nil, options) > 0
66
+ any && reservation
67
+ end
68
+
69
+ # Cancels all outstanding WorkUnit reservations for this process.
70
+ def self.cancel_reservations(reservation)
71
+ WorkUnit.reserved(reservation).update_all('reservation = null')
72
+ end
73
+
74
+ # Cancels all outstanding WorkUnit reservations for all processes. (Useful
75
+ # in the console for debugging.)
76
+ def self.cancel_all_reservations
77
+ WorkUnit.update_all('reservation = null')
78
+ end
79
+
80
+ # Look up a WorkUnit by the worker that's currently processing it. Specified
81
+ # by <tt>pid@host</tt>.
82
+ def self.find_by_worker_name(name)
83
+ pid, host = name.split('@')
84
+ node = NodeRecord.find_by_host(host)
85
+ node && node.work_units.find_by_worker_pid(pid)
86
+ end
87
+
88
+ # Convenience method for starting a new WorkUnit.
89
+ def self.start(job, action, input, status)
90
+ input = input.to_json unless input.is_a? String
91
+ self.create(:job => job, :action => action, :input => input, :status => status)
92
+ end
93
+
94
+ # Mark this unit as having finished successfully.
95
+ # Splitting work units are handled differently (an optimization) -- they
96
+ # immediately fire off all of their resulting WorkUnits for processing,
97
+ # without waiting for the rest of their splitting cousins to complete.
98
+ def finish(result, time_taken)
99
+ if splitting?
100
+ [parsed_output(result)].flatten.each do |new_input|
101
+ WorkUnit.start(job, action, new_input, PROCESSING)
102
+ end
103
+ self.destroy
104
+ job.set_next_status if job && job.done_splitting?
105
+ else
106
+ update_attributes({
107
+ :status => SUCCEEDED,
108
+ :node_record => nil,
109
+ :worker_pid => nil,
110
+ :attempts => attempts + 1,
111
+ :output => result,
112
+ :time => time_taken
113
+ })
114
+ job && job.check_for_completion
115
+ end
116
+ end
117
+
118
+ # Mark this unit as having failed. May attempt a retry.
119
+ def fail(output, time_taken)
120
+ tries = self.attempts + 1
121
+ return try_again if tries < CloudCrowd.config[:work_unit_retries]
122
+ update_attributes({
123
+ :status => FAILED,
124
+ :node_record => nil,
125
+ :worker_pid => nil,
126
+ :attempts => tries,
127
+ :output => output,
128
+ :time => time_taken
129
+ })
130
+ job && job.check_for_completion
131
+ end
132
+
133
+ # Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
134
+ def try_again
135
+ update_attributes({
136
+ :node_record => nil,
137
+ :worker_pid => nil,
138
+ :attempts => self.attempts + 1
139
+ })
140
+ end
141
+
142
+ # When a Node checks out a WorkUnit, establish the connection between
143
+ # WorkUnit and NodeRecord and record the worker_pid.
144
+ def assign_to(node_record, worker_pid)
145
+ update_attributes!(:node_record => node_record, :worker_pid => worker_pid)
146
+ end
147
+
148
+ # All output needs to be wrapped in a JSON object for consistency
149
+ # (unfortunately, JSON.parse needs the top-level to be an object or array).
150
+ # Convenience method to provide the parsed version.
151
+ def parsed_output(out = self.output)
152
+ JSON.parse(out)['output']
153
+ end
154
+
155
+ # The JSON representation of a WorkUnit shares the Job's options with all
156
+ # its cousin WorkUnits.
157
+ def to_json
158
+ {
159
+ 'id' => self.id,
160
+ 'job_id' => self.job_id,
161
+ 'input' => self.input,
162
+ 'attempts' => self.attempts,
163
+ 'action' => self.action,
164
+ 'options' => JSON.parse(self.job.options),
165
+ 'status' => self.status
166
+ }.to_json
167
+ end
168
+
169
+ end
170
+ end
@@ -0,0 +1,40 @@
1
+ module CloudCrowd
2
+
3
+ # Adds named scopes and query methods for every CloudCrowd status to
4
+ # both Jobs and WorkUnits.
5
+ module ModelStatus
6
+
7
+ def self.included(klass)
8
+
9
+ klass.class_eval do
10
+ # Note that COMPLETE and INCOMPLETE are unions of other states.
11
+ named_scope 'processing', :conditions => {:status => PROCESSING}
12
+ named_scope 'succeeded', :conditions => {:status => SUCCEEDED}
13
+ named_scope 'failed', :conditions => {:status => FAILED}
14
+ named_scope 'splitting', :conditions => {:status => SPLITTING}
15
+ named_scope 'merging', :conditions => {:status => MERGING}
16
+ named_scope 'complete', :conditions => {:status => COMPLETE}
17
+ named_scope 'incomplete', :conditions => {:status => INCOMPLETE}
18
+ end
19
+
20
+ end
21
+
22
+ def processing?; self.status == PROCESSING; end
23
+ def succeeded?; self.status == SUCCEEDED; end
24
+ def failed?; self.status == FAILED; end
25
+ def splitting?; self.status == SPLITTING; end
26
+ def merging?; self.status == MERGING; end
27
+ def complete?; COMPLETE.include?(self.status); end
28
+ def incomplete?; INCOMPLETE.include?(self.status); end
29
+
30
+ # Get the displayable status name of the model's status code.
31
+ def display_status
32
+ CloudCrowd.display_status(self.status)
33
+ end
34
+
35
+ end
36
+ end
37
+
38
+ require 'cloud_crowd/models/job'
39
+ require 'cloud_crowd/models/node_record'
40
+ require 'cloud_crowd/models/work_unit'