documentcloud-cloud-crowd 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/cloud-crowd.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.0.3' # Keep version in sync with cloud-cloud.rb
3
+ s.version = '0.0.4' # Keep version in sync with cloud-cloud.rb
4
4
  s.date = '2009-08-23'
5
5
 
6
6
  s.homepage = "http://documentcloud.org" # wiki page on github?
@@ -49,12 +49,11 @@ lib/cloud_crowd/action.rb
49
49
  lib/cloud_crowd/app.rb
50
50
  lib/cloud_crowd/asset_store.rb
51
51
  lib/cloud_crowd/command_line.rb
52
- lib/cloud_crowd/core_ext.rb
53
52
  lib/cloud_crowd/daemon.rb
54
53
  lib/cloud_crowd/helpers/authorization.rb
55
54
  lib/cloud_crowd/helpers/resources.rb
56
- lib/cloud_crowd/helpers/urls.rb
57
55
  lib/cloud_crowd/helpers.rb
56
+ lib/cloud_crowd/inflector.rb
58
57
  lib/cloud_crowd/models/job.rb
59
58
  lib/cloud_crowd/models/work_unit.rb
60
59
  lib/cloud_crowd/models.rb
data/lib/cloud-crowd.rb CHANGED
@@ -11,9 +11,6 @@ gem 'rest-client'
11
11
  gem 'right_aws'
12
12
  gem 'sinatra'
13
13
 
14
- # Common CloudCrowd libs:
15
- require 'cloud_crowd/core_ext'
16
-
17
14
  # Autoloading for all the pieces which may or may not be needed:
18
15
  autoload :ActiveRecord, 'activerecord'
19
16
  autoload :Benchmark, 'benchmark'
@@ -34,6 +31,7 @@ module CloudCrowd
34
31
  autoload :Action, 'cloud_crowd/action'
35
32
  autoload :AssetStore, 'cloud_crowd/asset_store'
36
33
  autoload :Helpers, 'cloud_crowd/helpers'
34
+ autoload :Inflector, 'cloud_crowd/inflector'
37
35
  autoload :Job, 'cloud_crowd/models'
38
36
  autoload :WorkUnit, 'cloud_crowd/models'
39
37
 
@@ -41,7 +39,7 @@ module CloudCrowd
41
39
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
42
40
 
43
41
  # Keep the version in sync with the gemspec.
44
- VERSION = '0.0.3'
42
+ VERSION = '0.0.4'
45
43
 
46
44
  # A Job is processing if its WorkUnits in the queue to be handled by workers.
47
45
  PROCESSING = 1
@@ -88,6 +86,15 @@ module CloudCrowd
88
86
  configuration = YAML.load_file(config_path)
89
87
  ActiveRecord::Base.establish_connection(configuration)
90
88
  end
89
+
90
+ # Keep an authenticated (if configured to enable authentication) resource
91
+ # for the central server.
92
+ def central_server
93
+ return @central_server if @central_server
94
+ params = [CloudCrowd.config[:central_server]]
95
+ params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
96
+ @central_server = RestClient::Resource.new(*params)
97
+ end
91
98
 
92
99
  # Return the readable status name of an internal CloudCrowd status number.
93
100
  def display_status(status)
@@ -98,13 +105,13 @@ module CloudCrowd
98
105
  # so we lazy-load them. Think about a variant of this for installing and
99
106
  # loading actions into a running CloudCrowd cluster on the fly.
100
107
  def actions(name)
101
- action_class = name.camelize
108
+ action_class = Inflector.camelize(name)
102
109
  begin
103
110
  raise NameError, "can't find the #{action_class} Action" unless Module.constants.include?(action_class)
104
111
  Module.const_get(action_class)
105
112
  rescue NameError => e
106
113
  user_action = "#{@config_path}/actions/#{name}"
107
- default_action = "#{CloudCrowd::ROOT}/actions/#{name}"
114
+ default_action = "#{ROOT}/actions/#{name}"
108
115
  require user_action and retry if File.exists? "#{user_action}.rb"
109
116
  require default_action and retry if File.exists? "#{default_action}.rb"
110
117
  raise e
@@ -18,14 +18,14 @@ module CloudCrowd
18
18
 
19
19
  # Configuring a new Action sets up all of the read-only variables that
20
20
  # form the bulk of the API for action subclasses. (Paths to read from and
21
- # write to).
21
+ # write to). It creates the work_directory and moves into it.
22
22
  def configure(status, input, options, store)
23
23
  @input, @options, @store = input, options, store
24
24
  @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
25
25
  @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
26
26
  FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
27
27
  Dir.chdir @work_directory
28
- unless status == CloudCrowd::MERGING
28
+ unless status == MERGING
29
29
  @input_path = File.join(@work_directory, File.basename(@input))
30
30
  @file_name = File.basename(@input_path, File.extname(@input_path))
31
31
  download(@input, @input_path)
@@ -43,15 +43,16 @@ module CloudCrowd
43
43
  path
44
44
  end
45
45
 
46
- # Takes a local filesystem path, and returns the public url on S3 where the
47
- # file was saved.
46
+ # Takes a local filesystem path, and returns the public (or authenticated)
47
+ # url on S3 where the file was saved.
48
48
  def save(file_path)
49
49
  save_path = File.join(s3_storage_path, File.basename(file_path))
50
50
  @store.save(file_path, save_path)
51
51
  return @store.url(save_path)
52
52
  end
53
53
 
54
- # After the Action has finished, we remove the work directory.
54
+ # After the Action has finished, we remove the work directory and return
55
+ # to the root directory (where daemons run by default).
55
56
  def cleanup_work_directory
56
57
  Dir.chdir '/'
57
58
  FileUtils.rm_r(@work_directory)
@@ -64,7 +65,7 @@ module CloudCrowd
64
65
  # [action_name]/job_[job_id]/unit_[work_unit_it]
65
66
  def storage_prefix
66
67
  path_parts = []
67
- path_parts << underscore(self.class.to_s)
68
+ path_parts << Inflector.underscore(self.class)
68
69
  path_parts << "job_#{@job_id}"
69
70
  path_parts << "unit_#{@work_unit_id}" if @work_unit_id
70
71
  @storage_prefix ||= File.join(path_parts)
@@ -74,15 +75,6 @@ module CloudCrowd
74
75
  @s3_storage_path ||= storage_prefix
75
76
  end
76
77
 
77
- # Pilfered from the ActiveSupport::Inflector.
78
- def underscore(word)
79
- word.to_s.gsub(/::/, '/').
80
- gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
81
- gsub(/([a-z\d])([A-Z])/,'\1_\2').
82
- tr("-", "_").
83
- downcase
84
- end
85
-
86
78
  end
87
79
 
88
80
  end
@@ -1,15 +1,16 @@
1
1
  module CloudCrowd
2
2
 
3
3
  class App < Sinatra::Default
4
-
5
- # static serves files from /public, methodoverride allows the _method param.
6
- enable :static, :methodoverride
7
4
 
8
- set :root, CloudCrowd::ROOT
5
+ set :root, ROOT
9
6
  set :authorization_realm, "CloudCrowd"
10
7
 
11
- helpers CloudCrowd::Helpers
8
+ helpers Helpers
9
+
10
+ # static serves files from /public, methodoverride allows the _method param.
11
+ enable :static, :methodoverride
12
12
 
13
+ # Enabling HTTP Authentication turns it on for all requests.
13
14
  before do
14
15
  login_required if CloudCrowd.config[:use_http_authentication]
15
16
  end
@@ -51,7 +52,7 @@ module CloudCrowd
51
52
  current_work_unit.fail(params[:output], params[:time])
52
53
  dequeue_work_unit(1)
53
54
  else
54
- return error(500, "Completing a work unit must specify status.")
55
+ error(500, "Completing a work unit must specify status.")
55
56
  end
56
57
  end
57
58
  end
@@ -6,7 +6,7 @@ module CloudCrowd
6
6
  # Configuration files required for the `crowd` command to function.
7
7
  CONFIG_FILES = ['config.yml', 'config.ru', 'database.yml']
8
8
 
9
- # Reference the absolute path to the root, because we're about to chdir.
9
+ # Reference the absolute path to the root.
10
10
  CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
11
11
 
12
12
  # Path to the Daemons gem script which launches workers.
@@ -58,7 +58,7 @@ OPTIONS:
58
58
  def run_server
59
59
  ensure_config
60
60
  require 'rubygems'
61
- rackup_path = File.expand_path('config.ru')
61
+ rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
62
62
  if Gem.available? 'thin'
63
63
  exec "thin -e production -p #{@options[:port]} -R #{rackup_path} start"
64
64
  else
@@ -106,13 +106,13 @@ OPTIONS:
106
106
  load_code
107
107
  num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
108
108
  num_workers.times do
109
- `CLOUD_CROWD_CONFIG='#{File.expand_path('config.yml')}' ruby #{WORKER_RUNNER} start`
109
+ `CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
110
110
  end
111
111
  end
112
112
 
113
113
  # For debugging, run a single worker in the current process, showing output.
114
114
  def run_worker
115
- exec "CLOUD_CROWD_CONFIG='#{File.expand_path('config.yml')}' ruby #{WORKER_RUNNER} run"
115
+ exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
116
116
  end
117
117
 
118
118
  # Stop all active workers.
@@ -137,25 +137,21 @@ OPTIONS:
137
137
  # the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
138
138
  def ensure_config
139
139
  return if @config_found
140
- Dir.chdir @options[:config_path]
141
- CONFIG_FILES.all? {|f| File.exists? f } ? @config_dir = true : config_not_found
140
+ found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
141
+ found ? @config_dir = true : config_not_found
142
142
  end
143
143
 
144
144
  # Parse all options for all actions.
145
145
  # TODO: Think about parsing options per sub-command separately.
146
146
  def parse_options
147
147
  @options = {
148
- :db_config => 'database.yml',
149
148
  :port => 9173,
150
- :config_path => ENV['CLOUD_CROWD_CONFIG'] || '.',
149
+ :config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
151
150
  }
152
151
  @option_parser = OptionParser.new do |opts|
153
152
  opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
154
153
  @options[:config_path] = conf_path
155
154
  end
156
- opts.on('-d', '--database-config PATH', 'path to database.yml') do |conf_path|
157
- @options[:db_config] = conf_path
158
- end
159
155
  opts.on('-n', '--num-workers NUM', OptionParser::DecimalInteger, 'number of worker processes') do |num|
160
156
  @options[:num_workers] = num
161
157
  end
@@ -164,7 +160,7 @@ OPTIONS:
164
160
  end
165
161
  opts.on_tail('-v', '--version', 'show version') do
166
162
  load_code
167
- puts "CloudCrowd version #{CloudCrowd::VERSION}"
163
+ puts "CloudCrowd version #{VERSION}"
168
164
  exit
169
165
  end
170
166
  end
@@ -178,14 +174,14 @@ OPTIONS:
178
174
  ensure_config
179
175
  require 'rubygems'
180
176
  require "#{CC_ROOT}/lib/cloud-crowd"
181
- CloudCrowd.configure('config.yml')
177
+ CloudCrowd.configure("#{@options[:config_path]}/config.yml")
182
178
  end
183
179
 
184
180
  # Establish a connection to the central server's database. Not all commands
185
181
  # require this.
186
182
  def connect_to_database
187
183
  require 'cloud_crowd/models'
188
- CloudCrowd.configure_database(@options[:db_config])
184
+ CloudCrowd.configure_database("#{@options[:config_path]}/database.yml")
189
185
  end
190
186
 
191
187
  # Exit with an explanation if the configuration files couldn't be found.
@@ -16,7 +16,7 @@ module CloudCrowd
16
16
 
17
17
  def initialize
18
18
  @wait_time = MIN_WAIT
19
- @worker = CloudCrowd::Worker.new
19
+ @worker = Worker.new
20
20
  Signal.trap('INT', 'EXIT')
21
21
  Signal.trap('KILL', 'EXIT')
22
22
  Signal.trap('TERM', 'EXIT')
@@ -1,9 +1,8 @@
1
1
  require 'cloud_crowd/helpers/authorization'
2
2
  require 'cloud_crowd/helpers/resources'
3
- require 'cloud_crowd/helpers/urls'
4
3
 
5
4
  module CloudCrowd
6
5
  module Helpers
7
- include Authorization, Resources, Urls #, Rack::Utils
6
+ include Authorization, Resources #, Rack::Utils
8
7
  end
9
8
  end
@@ -4,6 +4,7 @@ module CloudCrowd
4
4
  module Helpers
5
5
  module Authorization
6
6
 
7
+ # Ensure that the request includes the correct credentials.
7
8
  def login_required
8
9
  return if authorized?
9
10
  unauthorized! unless auth.provided?
@@ -12,14 +13,13 @@ module CloudCrowd
12
13
  request.env['REMOTE_USER'] = auth.username
13
14
  end
14
15
 
16
+ # Has the request been authenticated?
15
17
  def authorized?
16
18
  !!request.env['REMOTE_USER']
17
19
  end
18
20
 
19
- def current_user
20
- request.env['REMOTE_USER']
21
- end
22
-
21
+ # A request is authorized if its login and password match those stored
22
+ # in config.yml, or if authentication is disabled.
23
23
  def authorize(login, password)
24
24
  return true unless CloudCrowd.config[:use_http_authentication]
25
25
  return CloudCrowd.config[:login] == login &&
@@ -33,7 +33,7 @@ module CloudCrowd
33
33
  @auth ||= Rack::Auth::Basic::Request.new(request.env)
34
34
  end
35
35
 
36
- def unauthorized!(realm = CloudCrowd::App.authorization_realm)
36
+ def unauthorized!(realm = App.authorization_realm)
37
37
  response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
38
38
  halt 401, 'Authorization Required'
39
39
  end
@@ -0,0 +1,19 @@
1
+ module CloudCrowd
2
+
3
+ # Pilfered in parts from the ActiveSupport::Inflector.
4
+ module Inflector
5
+
6
+ def self.camelize(word)
7
+ word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ end
9
+
10
+ def self.underscore(word)
11
+ word.to_s.gsub(/::/, '/').
12
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
13
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
14
+ tr("-", "_").
15
+ downcase
16
+ end
17
+
18
+ end
19
+ end
@@ -1,28 +1,31 @@
1
1
  module CloudCrowd
2
+
3
+ # Adds named scopes and query methods for every CloudCrowd status to
4
+ # both Jobs and WorkUnits.
2
5
  module ModelStatus
3
6
 
4
7
  def self.included(klass)
5
8
 
6
9
  klass.class_eval do
7
10
  # Note that COMPLETE and INCOMPLETE are unions of other states.
8
- named_scope 'processing', :conditions => {:status => CloudCrowd::PROCESSING}
9
- named_scope 'succeeded', :conditions => {:status => CloudCrowd::SUCCEEDED}
10
- named_scope 'failed', :conditions => {:status => CloudCrowd::FAILED}
11
- named_scope 'splitting', :conditions => {:status => CloudCrowd::SPLITTING}
12
- named_scope 'merging', :conditions => {:status => CloudCrowd::MERGING}
13
- named_scope 'complete', :conditions => {:status => CloudCrowd::COMPLETE}
14
- named_scope 'incomplete', :conditions => {:status => CloudCrowd::INCOMPLETE}
11
+ named_scope 'processing', :conditions => {:status => PROCESSING}
12
+ named_scope 'succeeded', :conditions => {:status => SUCCEEDED}
13
+ named_scope 'failed', :conditions => {:status => FAILED}
14
+ named_scope 'splitting', :conditions => {:status => SPLITTING}
15
+ named_scope 'merging', :conditions => {:status => MERGING}
16
+ named_scope 'complete', :conditions => {:status => COMPLETE}
17
+ named_scope 'incomplete', :conditions => {:status => INCOMPLETE}
15
18
  end
16
19
 
17
20
  end
18
21
 
19
- def processing?; self.status == CloudCrowd::PROCESSING; end
20
- def succeeded?; self.status == CloudCrowd::SUCCEEDED; end
21
- def failed?; self.status == CloudCrowd::FAILED; end
22
- def splitting?; self.status == CloudCrowd::SPLITTING; end
23
- def merging?; self.status == CloudCrowd::MERGING; end
24
- def complete?; CloudCrowd::COMPLETE.include?(self.status); end
25
- def incomplete?; CloudCrowd::INCOMPLETE.include?(self.status); end
22
+ def processing?; self.status == PROCESSING; end
23
+ def succeeded?; self.status == SUCCEEDED; end
24
+ def failed?; self.status == FAILED; end
25
+ def splitting?; self.status == SPLITTING; end
26
+ def merging?; self.status == MERGING; end
27
+ def complete?; COMPLETE.include?(self.status); end
28
+ def incomplete?; INCOMPLETE.include?(self.status); end
26
29
 
27
30
  end
28
31
  end
@@ -5,7 +5,7 @@ module CloudCrowd
5
5
  # of inputs (usually public urls to files), an action (the name of a script that
6
6
  # CloudCrowd knows how to run), and, eventually a corresponding list of output.
7
7
  class Job < ActiveRecord::Base
8
- include CloudCrowd::ModelStatus
8
+ include ModelStatus
9
9
 
10
10
  has_many :work_units, :dependent => :destroy
11
11
 
@@ -23,16 +23,18 @@ module CloudCrowd
23
23
  )
24
24
  end
25
25
 
26
+ # Creating a job creates its corresponding work units, adding them
27
+ # to the queue.
26
28
  def after_create
27
29
  self.queue_for_workers(JSON.parse(self.inputs))
28
30
  end
29
31
 
30
32
  def before_validation_on_create
31
- self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
33
+ self.status = self.splittable? ? SPLITTING : PROCESSING
32
34
  end
33
35
 
34
36
  # After work units are marked successful, we check to see if all of them have
35
- # finished, if so, this job is complete.
37
+ # finished, if so, continue on to the next phase of the job.
36
38
  def check_for_completion
37
39
  return unless all_work_units_complete?
38
40
  transition_to_next_phase
@@ -45,19 +47,19 @@ module CloudCrowd
45
47
  self.save
46
48
 
47
49
  case self.status
48
- when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
49
- when CloudCrowd::MERGING then queue_for_workers(output_list.to_json)
50
- else fire_callback
50
+ when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
51
+ when MERGING then queue_for_workers(output_list.to_json)
52
+ else fire_callback
51
53
  end
52
54
  self
53
55
  end
54
56
 
55
- # Transition from the current phase to the next one.
57
+ # Transition this Job's status to the following one.
56
58
  def transition_to_next_phase
57
- self.status = any_work_units_failed? ? CloudCrowd::FAILED :
58
- self.splitting? ? CloudCrowd::PROCESSING :
59
- self.should_merge? ? CloudCrowd::MERGING :
60
- CloudCrowd::SUCCEEDED
59
+ self.status = any_work_units_failed? ? FAILED :
60
+ self.splitting? ? PROCESSING :
61
+ self.mergeable? ? MERGING :
62
+ SUCCEEDED
61
63
  end
62
64
 
63
65
  # If a callback_url is defined, post the Job's JSON to it upon completion.
@@ -71,7 +73,7 @@ module CloudCrowd
71
73
 
72
74
  # Cleaning up after a job will remove all of its files from S3.
73
75
  def cleanup
74
- CloudCrowd::AssetStore.new.cleanup_job(self)
76
+ AssetStore.new.cleanup_job(self)
75
77
  end
76
78
 
77
79
  # Have all of the WorkUnits finished? We could trade reads for writes here
@@ -85,18 +87,23 @@ module CloudCrowd
85
87
  self.work_units.failed.count > 0
86
88
  end
87
89
 
90
+ # This job is splittable if its Action has a +split+ method.
88
91
  def splittable?
89
- self.action_class.new.respond_to? :split
92
+ self.action_class.public_instance_methods.include? 'split'
90
93
  end
91
94
 
92
- def should_merge?
93
- self.processing? && self.action_class.new.respond_to?(:merge)
95
+ # This job is mergeable if its Action has a +merge+ method.
96
+ def mergeable?
97
+ self.processing? && self.action_class.public_instance_methods.include?('merge')
94
98
  end
95
99
 
100
+ # Retrieve the class for this Job's Action, loading it if necessary.
96
101
  def action_class
97
102
  CloudCrowd.actions(self.action)
98
103
  end
99
104
 
105
+ # When the WorkUnits are all finished, gather all their outputs together
106
+ # before removing them from the database entirely.
100
107
  def gather_outputs_from_work_units
101
108
  outs = self.work_units.complete.map {|wu| wu.output }
102
109
  self.work_units.complete.destroy_all
@@ -107,14 +114,18 @@ module CloudCrowd
107
114
  CloudCrowd.display_status(self.status)
108
115
  end
109
116
 
110
- def work_units_remaining
111
- self.work_units.incomplete.count
117
+ # How complete is this Job?
118
+ def percent_complete
119
+ return 0 if splitting?
120
+ return 100 if complete?
121
+ return 99 if merging?
122
+ (work_units.complete.count / work_units.count.to_f * 100).round
112
123
  end
113
124
 
114
125
  # A JSON representation of this job includes the statuses of its component
115
126
  # WorkUnits, as well as any completed outputs.
116
127
  def to_json(opts={})
117
- atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
128
+ atts = {'id' => self.id, 'status' => self.display_status, 'percent_complete' => self.percent_complete}
118
129
  atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
119
130
  atts.merge!({'time' => self.time}) if self.time
120
131
  atts.to_json
@@ -3,7 +3,7 @@ module CloudCrowd
3
3
  # A WorkUnit is an atomic chunk of work from a job, processing a single input
4
4
  # through a single action. All WorkUnits receive the same options.
5
5
  class WorkUnit < ActiveRecord::Base
6
- include CloudCrowd::ModelStatus
6
+ include ModelStatus
7
7
 
8
8
  belongs_to :job
9
9
 
@@ -14,7 +14,7 @@ module CloudCrowd
14
14
  # Find the Nth available WorkUnit in the queue, and take it out.
15
15
  def self.dequeue(offset=0)
16
16
  unit = self.first(
17
- :conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false},
17
+ :conditions => {:status => INCOMPLETE, :taken => false},
18
18
  :order => "created_at asc",
19
19
  :offset => offset
20
20
  )
@@ -29,7 +29,7 @@ module CloudCrowd
29
29
  # Mark this unit as having finished successfully.
30
30
  def finish(output, time_taken)
31
31
  update_attributes({
32
- :status => CloudCrowd::SUCCEEDED,
32
+ :status => SUCCEEDED,
33
33
  :taken => false,
34
34
  :attempts => self.attempts + 1,
35
35
  :output => output,
@@ -42,7 +42,7 @@ module CloudCrowd
42
42
  tries = self.attempts + 1
43
43
  return try_again if tries < CloudCrowd.config[:work_unit_retries]
44
44
  update_attributes({
45
- :status => CloudCrowd::FAILED,
45
+ :status => FAILED,
46
46
  :taken => false,
47
47
  :attempts => tries,
48
48
  :output => output,
@@ -1,7 +1,6 @@
1
1
  # This is the script that kicks off a single CloudCrowd::Daemon. Rely on
2
2
  # cloud-crowd.rb for autoloading of all the code we need.
3
3
 
4
- # Daemon/Worker Dependencies.
5
4
  require "#{File.dirname(__FILE__)}/../cloud-crowd"
6
5
 
7
6
  FileUtils.mkdir('log') unless File.exists?('log')
@@ -1,10 +1,7 @@
1
1
  module CloudCrowd
2
2
 
3
3
  class Worker
4
-
5
- CENTRAL_URL = CloudCrowd.config[:central_server]
6
- RETRY_WAIT = CloudCrowd.config[:worker_retry_wait]
7
-
4
+
8
5
  attr_reader :action
9
6
 
10
7
  # Spinning up a worker will create a new AssetStore with a persistent
@@ -13,8 +10,8 @@ module CloudCrowd
13
10
  def initialize
14
11
  @id = $$
15
12
  @hostname = Socket.gethostname
16
- @store = CloudCrowd::AssetStore.new
17
- @server = central_server_resource
13
+ @store = AssetStore.new
14
+ @server = CloudCrowd.central_server
18
15
  log 'started'
19
16
  end
20
17
 
@@ -48,14 +45,18 @@ module CloudCrowd
48
45
  end
49
46
  end
50
47
 
48
+ # We expect and require internal communication between the central server
49
+ # and the workers to succeed. If it fails for any reason, log it, and then
50
+ # keep trying the same request.
51
51
  def keep_trying_to(title)
52
52
  begin
53
53
  yield
54
54
  rescue Exception => e
55
- log "failed to #{title} -- retry in #{RETRY_WAIT} seconds"
55
+ wait_time = CloudCrowd.config[:worker_retry_wait]
56
+ log "failed to #{title} -- retry in #{wait_time} seconds"
56
57
  log e.message
57
58
  log e.backtrace
58
- sleep RETRY_WAIT
59
+ sleep wait_time
59
60
  retry
60
61
  end
61
62
  end
@@ -71,9 +72,9 @@ module CloudCrowd
71
72
  @action = CloudCrowd.actions(@action_name).new
72
73
  @action.configure(@status, @input, @options, @store)
73
74
  result = case @status
74
- when CloudCrowd::PROCESSING then @action.process
75
- when CloudCrowd::SPLITTING then @action.split
76
- when CloudCrowd::MERGING then @action.merge
75
+ when PROCESSING then @action.process
76
+ when SPLITTING then @action.split
77
+ when MERGING then @action.merge
77
78
  else raise "Work units must specify their status."
78
79
  end
79
80
  complete_work_unit(result)
@@ -92,14 +93,6 @@ module CloudCrowd
92
93
 
93
94
  private
94
95
 
95
- # Keep an authenticated (if configured to enable authentication) resource
96
- # for the central server.
97
- def central_server_resource
98
- params = [CENTRAL_URL]
99
- params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
100
- RestClient::Resource.new(*params)
101
- end
102
-
103
96
  # Common parameters to send back to central, regardless of success or failure.
104
97
  def completion_params
105
98
  {:id => @options['work_unit_id'], :time => Time.now - @start_time}
@@ -17,7 +17,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
17
17
  (CloudCrowd.config[:work_unit_retries] - 1).times do
18
18
  job.work_units.each {|unit| unit.fail('failed', 10) }
19
19
  end
20
- assert job.reload.work_units_remaining == 3
20
+ assert job.reload.percent_complete == 0
21
21
  job.work_units.reload.each_with_index do |unit, i|
22
22
  assert unit.processing?
23
23
  assert unit.attempts == CloudCrowd.config[:work_unit_retries] - 1
@@ -17,7 +17,7 @@ class JobTest < Test::Unit::TestCase
17
17
 
18
18
  should "create all of its work units as soon as the job is created" do
19
19
  assert @job.work_units.count >= 1
20
- assert @job.work_units_remaining == 1
20
+ assert @job.percent_complete == 0
21
21
  assert @job.processing?
22
22
  assert @unit.processing?
23
23
  assert !@job.all_work_units_complete?
@@ -27,7 +27,7 @@ class JobTest < Test::Unit::TestCase
27
27
  assert !@job.all_work_units_complete?
28
28
  @unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => 'hello')
29
29
  assert @job.reload.all_work_units_complete?
30
- assert @job.work_units_remaining == 0
30
+ assert @job.percent_complete == 100
31
31
  assert @job.outputs == "[\"hello\"]"
32
32
  end
33
33
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentcloud-cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -141,12 +141,11 @@ files:
141
141
  - lib/cloud_crowd/app.rb
142
142
  - lib/cloud_crowd/asset_store.rb
143
143
  - lib/cloud_crowd/command_line.rb
144
- - lib/cloud_crowd/core_ext.rb
145
144
  - lib/cloud_crowd/daemon.rb
146
145
  - lib/cloud_crowd/helpers/authorization.rb
147
146
  - lib/cloud_crowd/helpers/resources.rb
148
- - lib/cloud_crowd/helpers/urls.rb
149
147
  - lib/cloud_crowd/helpers.rb
148
+ - lib/cloud_crowd/inflector.rb
150
149
  - lib/cloud_crowd/models/job.rb
151
150
  - lib/cloud_crowd/models/work_unit.rb
152
151
  - lib/cloud_crowd/models.rb
@@ -163,6 +162,7 @@ files:
163
162
  - test/unit/test_work_unit.rb
164
163
  has_rdoc: true
165
164
  homepage: http://documentcloud.org
165
+ licenses:
166
166
  post_install_message:
167
167
  rdoc_options: []
168
168
 
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
183
  requirements: []
184
184
 
185
185
  rubyforge_project: cloud-crowd
186
- rubygems_version: 1.2.0
186
+ rubygems_version: 1.3.5
187
187
  signing_key:
188
188
  specification_version: 2
189
189
  summary: Better living through Map --> Ruby --> Reduce
@@ -1,10 +0,0 @@
1
- # Extensions to core Ruby.
2
-
3
- class String
4
-
5
- # Stolen-ish in parts from ActiveSupport::Inflector.
6
- def camelize
7
- self.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
- end
9
-
10
- end
@@ -1,7 +0,0 @@
1
- module CloudCrowd
2
- module Helpers
3
- module Urls
4
-
5
- end
6
- end
7
- end