documentcloud-cloud-crowd 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/cloud-crowd.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.0.3' # Keep version in sync with cloud-cloud.rb
3
+ s.version = '0.0.4' # Keep version in sync with cloud-cloud.rb
4
4
  s.date = '2009-08-23'
5
5
 
6
6
  s.homepage = "http://documentcloud.org" # wiki page on github?
@@ -49,12 +49,11 @@ lib/cloud_crowd/action.rb
49
49
  lib/cloud_crowd/app.rb
50
50
  lib/cloud_crowd/asset_store.rb
51
51
  lib/cloud_crowd/command_line.rb
52
- lib/cloud_crowd/core_ext.rb
53
52
  lib/cloud_crowd/daemon.rb
54
53
  lib/cloud_crowd/helpers/authorization.rb
55
54
  lib/cloud_crowd/helpers/resources.rb
56
- lib/cloud_crowd/helpers/urls.rb
57
55
  lib/cloud_crowd/helpers.rb
56
+ lib/cloud_crowd/inflector.rb
58
57
  lib/cloud_crowd/models/job.rb
59
58
  lib/cloud_crowd/models/work_unit.rb
60
59
  lib/cloud_crowd/models.rb
data/lib/cloud-crowd.rb CHANGED
@@ -11,9 +11,6 @@ gem 'rest-client'
11
11
  gem 'right_aws'
12
12
  gem 'sinatra'
13
13
 
14
- # Common CloudCrowd libs:
15
- require 'cloud_crowd/core_ext'
16
-
17
14
  # Autoloading for all the pieces which may or may not be needed:
18
15
  autoload :ActiveRecord, 'activerecord'
19
16
  autoload :Benchmark, 'benchmark'
@@ -34,6 +31,7 @@ module CloudCrowd
34
31
  autoload :Action, 'cloud_crowd/action'
35
32
  autoload :AssetStore, 'cloud_crowd/asset_store'
36
33
  autoload :Helpers, 'cloud_crowd/helpers'
34
+ autoload :Inflector, 'cloud_crowd/inflector'
37
35
  autoload :Job, 'cloud_crowd/models'
38
36
  autoload :WorkUnit, 'cloud_crowd/models'
39
37
 
@@ -41,7 +39,7 @@ module CloudCrowd
41
39
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
42
40
 
43
41
  # Keep the version in sync with the gemspec.
44
- VERSION = '0.0.3'
42
+ VERSION = '0.0.4'
45
43
 
46
44
  # A Job is processing if its WorkUnits in the queue to be handled by workers.
47
45
  PROCESSING = 1
@@ -88,6 +86,15 @@ module CloudCrowd
88
86
  configuration = YAML.load_file(config_path)
89
87
  ActiveRecord::Base.establish_connection(configuration)
90
88
  end
89
+
90
+ # Keep an authenticated (if configured to enable authentication) resource
91
+ # for the central server.
92
+ def central_server
93
+ return @central_server if @central_server
94
+ params = [CloudCrowd.config[:central_server]]
95
+ params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
96
+ @central_server = RestClient::Resource.new(*params)
97
+ end
91
98
 
92
99
  # Return the readable status name of an internal CloudCrowd status number.
93
100
  def display_status(status)
@@ -98,13 +105,13 @@ module CloudCrowd
98
105
  # so we lazy-load them. Think about a variant of this for installing and
99
106
  # loading actions into a running CloudCrowd cluster on the fly.
100
107
  def actions(name)
101
- action_class = name.camelize
108
+ action_class = Inflector.camelize(name)
102
109
  begin
103
110
  raise NameError, "can't find the #{action_class} Action" unless Module.constants.include?(action_class)
104
111
  Module.const_get(action_class)
105
112
  rescue NameError => e
106
113
  user_action = "#{@config_path}/actions/#{name}"
107
- default_action = "#{CloudCrowd::ROOT}/actions/#{name}"
114
+ default_action = "#{ROOT}/actions/#{name}"
108
115
  require user_action and retry if File.exists? "#{user_action}.rb"
109
116
  require default_action and retry if File.exists? "#{default_action}.rb"
110
117
  raise e
@@ -18,14 +18,14 @@ module CloudCrowd
18
18
 
19
19
  # Configuring a new Action sets up all of the read-only variables that
20
20
  # form the bulk of the API for action subclasses. (Paths to read from and
21
- # write to).
21
+ # write to). It creates the work_directory and moves into it.
22
22
  def configure(status, input, options, store)
23
23
  @input, @options, @store = input, options, store
24
24
  @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
25
25
  @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
26
26
  FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
27
27
  Dir.chdir @work_directory
28
- unless status == CloudCrowd::MERGING
28
+ unless status == MERGING
29
29
  @input_path = File.join(@work_directory, File.basename(@input))
30
30
  @file_name = File.basename(@input_path, File.extname(@input_path))
31
31
  download(@input, @input_path)
@@ -43,15 +43,16 @@ module CloudCrowd
43
43
  path
44
44
  end
45
45
 
46
- # Takes a local filesystem path, and returns the public url on S3 where the
47
- # file was saved.
46
+ # Takes a local filesystem path, and returns the public (or authenticated)
47
+ # url on S3 where the file was saved.
48
48
  def save(file_path)
49
49
  save_path = File.join(s3_storage_path, File.basename(file_path))
50
50
  @store.save(file_path, save_path)
51
51
  return @store.url(save_path)
52
52
  end
53
53
 
54
- # After the Action has finished, we remove the work directory.
54
+ # After the Action has finished, we remove the work directory and return
55
+ # to the root directory (where daemons run by default).
55
56
  def cleanup_work_directory
56
57
  Dir.chdir '/'
57
58
  FileUtils.rm_r(@work_directory)
@@ -64,7 +65,7 @@ module CloudCrowd
64
65
  # [action_name]/job_[job_id]/unit_[work_unit_it]
65
66
  def storage_prefix
66
67
  path_parts = []
67
- path_parts << underscore(self.class.to_s)
68
+ path_parts << Inflector.underscore(self.class)
68
69
  path_parts << "job_#{@job_id}"
69
70
  path_parts << "unit_#{@work_unit_id}" if @work_unit_id
70
71
  @storage_prefix ||= File.join(path_parts)
@@ -74,15 +75,6 @@ module CloudCrowd
74
75
  @s3_storage_path ||= storage_prefix
75
76
  end
76
77
 
77
- # Pilfered from the ActiveSupport::Inflector.
78
- def underscore(word)
79
- word.to_s.gsub(/::/, '/').
80
- gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
81
- gsub(/([a-z\d])([A-Z])/,'\1_\2').
82
- tr("-", "_").
83
- downcase
84
- end
85
-
86
78
  end
87
79
 
88
80
  end
@@ -1,15 +1,16 @@
1
1
  module CloudCrowd
2
2
 
3
3
  class App < Sinatra::Default
4
-
5
- # static serves files from /public, methodoverride allows the _method param.
6
- enable :static, :methodoverride
7
4
 
8
- set :root, CloudCrowd::ROOT
5
+ set :root, ROOT
9
6
  set :authorization_realm, "CloudCrowd"
10
7
 
11
- helpers CloudCrowd::Helpers
8
+ helpers Helpers
9
+
10
+ # static serves files from /public, methodoverride allows the _method param.
11
+ enable :static, :methodoverride
12
12
 
13
+ # Enabling HTTP Authentication turns it on for all requests.
13
14
  before do
14
15
  login_required if CloudCrowd.config[:use_http_authentication]
15
16
  end
@@ -51,7 +52,7 @@ module CloudCrowd
51
52
  current_work_unit.fail(params[:output], params[:time])
52
53
  dequeue_work_unit(1)
53
54
  else
54
- return error(500, "Completing a work unit must specify status.")
55
+ error(500, "Completing a work unit must specify status.")
55
56
  end
56
57
  end
57
58
  end
@@ -6,7 +6,7 @@ module CloudCrowd
6
6
  # Configuration files required for the `crowd` command to function.
7
7
  CONFIG_FILES = ['config.yml', 'config.ru', 'database.yml']
8
8
 
9
- # Reference the absolute path to the root, because we're about to chdir.
9
+ # Reference the absolute path to the root.
10
10
  CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
11
11
 
12
12
  # Path to the Daemons gem script which launches workers.
@@ -58,7 +58,7 @@ OPTIONS:
58
58
  def run_server
59
59
  ensure_config
60
60
  require 'rubygems'
61
- rackup_path = File.expand_path('config.ru')
61
+ rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
62
62
  if Gem.available? 'thin'
63
63
  exec "thin -e production -p #{@options[:port]} -R #{rackup_path} start"
64
64
  else
@@ -106,13 +106,13 @@ OPTIONS:
106
106
  load_code
107
107
  num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
108
108
  num_workers.times do
109
- `CLOUD_CROWD_CONFIG='#{File.expand_path('config.yml')}' ruby #{WORKER_RUNNER} start`
109
+ `CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
110
110
  end
111
111
  end
112
112
 
113
113
  # For debugging, run a single worker in the current process, showing output.
114
114
  def run_worker
115
- exec "CLOUD_CROWD_CONFIG='#{File.expand_path('config.yml')}' ruby #{WORKER_RUNNER} run"
115
+ exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
116
116
  end
117
117
 
118
118
  # Stop all active workers.
@@ -137,25 +137,21 @@ OPTIONS:
137
137
  # the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
138
138
  def ensure_config
139
139
  return if @config_found
140
- Dir.chdir @options[:config_path]
141
- CONFIG_FILES.all? {|f| File.exists? f } ? @config_dir = true : config_not_found
140
+ found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
141
+ found ? @config_dir = true : config_not_found
142
142
  end
143
143
 
144
144
  # Parse all options for all actions.
145
145
  # TODO: Think about parsing options per sub-command separately.
146
146
  def parse_options
147
147
  @options = {
148
- :db_config => 'database.yml',
149
148
  :port => 9173,
150
- :config_path => ENV['CLOUD_CROWD_CONFIG'] || '.',
149
+ :config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
151
150
  }
152
151
  @option_parser = OptionParser.new do |opts|
153
152
  opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
154
153
  @options[:config_path] = conf_path
155
154
  end
156
- opts.on('-d', '--database-config PATH', 'path to database.yml') do |conf_path|
157
- @options[:db_config] = conf_path
158
- end
159
155
  opts.on('-n', '--num-workers NUM', OptionParser::DecimalInteger, 'number of worker processes') do |num|
160
156
  @options[:num_workers] = num
161
157
  end
@@ -164,7 +160,7 @@ OPTIONS:
164
160
  end
165
161
  opts.on_tail('-v', '--version', 'show version') do
166
162
  load_code
167
- puts "CloudCrowd version #{CloudCrowd::VERSION}"
163
+ puts "CloudCrowd version #{VERSION}"
168
164
  exit
169
165
  end
170
166
  end
@@ -178,14 +174,14 @@ OPTIONS:
178
174
  ensure_config
179
175
  require 'rubygems'
180
176
  require "#{CC_ROOT}/lib/cloud-crowd"
181
- CloudCrowd.configure('config.yml')
177
+ CloudCrowd.configure("#{@options[:config_path]}/config.yml")
182
178
  end
183
179
 
184
180
  # Establish a connection to the central server's database. Not all commands
185
181
  # require this.
186
182
  def connect_to_database
187
183
  require 'cloud_crowd/models'
188
- CloudCrowd.configure_database(@options[:db_config])
184
+ CloudCrowd.configure_database("#{@options[:config_path]}/database.yml")
189
185
  end
190
186
 
191
187
  # Exit with an explanation if the configuration files couldn't be found.
@@ -16,7 +16,7 @@ module CloudCrowd
16
16
 
17
17
  def initialize
18
18
  @wait_time = MIN_WAIT
19
- @worker = CloudCrowd::Worker.new
19
+ @worker = Worker.new
20
20
  Signal.trap('INT', 'EXIT')
21
21
  Signal.trap('KILL', 'EXIT')
22
22
  Signal.trap('TERM', 'EXIT')
@@ -1,9 +1,8 @@
1
1
  require 'cloud_crowd/helpers/authorization'
2
2
  require 'cloud_crowd/helpers/resources'
3
- require 'cloud_crowd/helpers/urls'
4
3
 
5
4
  module CloudCrowd
6
5
  module Helpers
7
- include Authorization, Resources, Urls #, Rack::Utils
6
+ include Authorization, Resources #, Rack::Utils
8
7
  end
9
8
  end
@@ -4,6 +4,7 @@ module CloudCrowd
4
4
  module Helpers
5
5
  module Authorization
6
6
 
7
+ # Ensure that the request includes the correct credentials.
7
8
  def login_required
8
9
  return if authorized?
9
10
  unauthorized! unless auth.provided?
@@ -12,14 +13,13 @@ module CloudCrowd
12
13
  request.env['REMOTE_USER'] = auth.username
13
14
  end
14
15
 
16
+ # Has the request been authenticated?
15
17
  def authorized?
16
18
  !!request.env['REMOTE_USER']
17
19
  end
18
20
 
19
- def current_user
20
- request.env['REMOTE_USER']
21
- end
22
-
21
+ # A request is authorized if its login and password match those stored
22
+ # in config.yml, or if authentication is disabled.
23
23
  def authorize(login, password)
24
24
  return true unless CloudCrowd.config[:use_http_authentication]
25
25
  return CloudCrowd.config[:login] == login &&
@@ -33,7 +33,7 @@ module CloudCrowd
33
33
  @auth ||= Rack::Auth::Basic::Request.new(request.env)
34
34
  end
35
35
 
36
- def unauthorized!(realm = CloudCrowd::App.authorization_realm)
36
+ def unauthorized!(realm = App.authorization_realm)
37
37
  response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
38
38
  halt 401, 'Authorization Required'
39
39
  end
@@ -0,0 +1,19 @@
1
+ module CloudCrowd
2
+
3
+ # Pilfered in parts from the ActiveSupport::Inflector.
4
+ module Inflector
5
+
6
+ def self.camelize(word)
7
+ word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
+ end
9
+
10
+ def self.underscore(word)
11
+ word.to_s.gsub(/::/, '/').
12
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
13
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
14
+ tr("-", "_").
15
+ downcase
16
+ end
17
+
18
+ end
19
+ end
@@ -1,28 +1,31 @@
1
1
  module CloudCrowd
2
+
3
+ # Adds named scopes and query methods for every CloudCrowd status to
4
+ # both Jobs and WorkUnits.
2
5
  module ModelStatus
3
6
 
4
7
  def self.included(klass)
5
8
 
6
9
  klass.class_eval do
7
10
  # Note that COMPLETE and INCOMPLETE are unions of other states.
8
- named_scope 'processing', :conditions => {:status => CloudCrowd::PROCESSING}
9
- named_scope 'succeeded', :conditions => {:status => CloudCrowd::SUCCEEDED}
10
- named_scope 'failed', :conditions => {:status => CloudCrowd::FAILED}
11
- named_scope 'splitting', :conditions => {:status => CloudCrowd::SPLITTING}
12
- named_scope 'merging', :conditions => {:status => CloudCrowd::MERGING}
13
- named_scope 'complete', :conditions => {:status => CloudCrowd::COMPLETE}
14
- named_scope 'incomplete', :conditions => {:status => CloudCrowd::INCOMPLETE}
11
+ named_scope 'processing', :conditions => {:status => PROCESSING}
12
+ named_scope 'succeeded', :conditions => {:status => SUCCEEDED}
13
+ named_scope 'failed', :conditions => {:status => FAILED}
14
+ named_scope 'splitting', :conditions => {:status => SPLITTING}
15
+ named_scope 'merging', :conditions => {:status => MERGING}
16
+ named_scope 'complete', :conditions => {:status => COMPLETE}
17
+ named_scope 'incomplete', :conditions => {:status => INCOMPLETE}
15
18
  end
16
19
 
17
20
  end
18
21
 
19
- def processing?; self.status == CloudCrowd::PROCESSING; end
20
- def succeeded?; self.status == CloudCrowd::SUCCEEDED; end
21
- def failed?; self.status == CloudCrowd::FAILED; end
22
- def splitting?; self.status == CloudCrowd::SPLITTING; end
23
- def merging?; self.status == CloudCrowd::MERGING; end
24
- def complete?; CloudCrowd::COMPLETE.include?(self.status); end
25
- def incomplete?; CloudCrowd::INCOMPLETE.include?(self.status); end
22
+ def processing?; self.status == PROCESSING; end
23
+ def succeeded?; self.status == SUCCEEDED; end
24
+ def failed?; self.status == FAILED; end
25
+ def splitting?; self.status == SPLITTING; end
26
+ def merging?; self.status == MERGING; end
27
+ def complete?; COMPLETE.include?(self.status); end
28
+ def incomplete?; INCOMPLETE.include?(self.status); end
26
29
 
27
30
  end
28
31
  end
@@ -5,7 +5,7 @@ module CloudCrowd
5
5
  # of inputs (usually public urls to files), an action (the name of a script that
6
6
  # CloudCrowd knows how to run), and, eventually a corresponding list of output.
7
7
  class Job < ActiveRecord::Base
8
- include CloudCrowd::ModelStatus
8
+ include ModelStatus
9
9
 
10
10
  has_many :work_units, :dependent => :destroy
11
11
 
@@ -23,16 +23,18 @@ module CloudCrowd
23
23
  )
24
24
  end
25
25
 
26
+ # Creating a job creates its corresponding work units, adding them
27
+ # to the queue.
26
28
  def after_create
27
29
  self.queue_for_workers(JSON.parse(self.inputs))
28
30
  end
29
31
 
30
32
  def before_validation_on_create
31
- self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
33
+ self.status = self.splittable? ? SPLITTING : PROCESSING
32
34
  end
33
35
 
34
36
  # After work units are marked successful, we check to see if all of them have
35
- # finished, if so, this job is complete.
37
+ # finished, if so, continue on to the next phase of the job.
36
38
  def check_for_completion
37
39
  return unless all_work_units_complete?
38
40
  transition_to_next_phase
@@ -45,19 +47,19 @@ module CloudCrowd
45
47
  self.save
46
48
 
47
49
  case self.status
48
- when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
49
- when CloudCrowd::MERGING then queue_for_workers(output_list.to_json)
50
- else fire_callback
50
+ when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
51
+ when MERGING then queue_for_workers(output_list.to_json)
52
+ else fire_callback
51
53
  end
52
54
  self
53
55
  end
54
56
 
55
- # Transition from the current phase to the next one.
57
+ # Transition this Job's status to the following one.
56
58
  def transition_to_next_phase
57
- self.status = any_work_units_failed? ? CloudCrowd::FAILED :
58
- self.splitting? ? CloudCrowd::PROCESSING :
59
- self.should_merge? ? CloudCrowd::MERGING :
60
- CloudCrowd::SUCCEEDED
59
+ self.status = any_work_units_failed? ? FAILED :
60
+ self.splitting? ? PROCESSING :
61
+ self.mergeable? ? MERGING :
62
+ SUCCEEDED
61
63
  end
62
64
 
63
65
  # If a callback_url is defined, post the Job's JSON to it upon completion.
@@ -71,7 +73,7 @@ module CloudCrowd
71
73
 
72
74
  # Cleaning up after a job will remove all of its files from S3.
73
75
  def cleanup
74
- CloudCrowd::AssetStore.new.cleanup_job(self)
76
+ AssetStore.new.cleanup_job(self)
75
77
  end
76
78
 
77
79
  # Have all of the WorkUnits finished? We could trade reads for writes here
@@ -85,18 +87,23 @@ module CloudCrowd
85
87
  self.work_units.failed.count > 0
86
88
  end
87
89
 
90
+ # This job is splittable if its Action has a +split+ method.
88
91
  def splittable?
89
- self.action_class.new.respond_to? :split
92
+ self.action_class.public_instance_methods.include? 'split'
90
93
  end
91
94
 
92
- def should_merge?
93
- self.processing? && self.action_class.new.respond_to?(:merge)
95
+ # This job is mergeable if its Action has a +merge+ method.
96
+ def mergeable?
97
+ self.processing? && self.action_class.public_instance_methods.include?('merge')
94
98
  end
95
99
 
100
+ # Retrieve the class for this Job's Action, loading it if necessary.
96
101
  def action_class
97
102
  CloudCrowd.actions(self.action)
98
103
  end
99
104
 
105
+ # When the WorkUnits are all finished, gather all their outputs together
106
+ # before removing them from the database entirely.
100
107
  def gather_outputs_from_work_units
101
108
  outs = self.work_units.complete.map {|wu| wu.output }
102
109
  self.work_units.complete.destroy_all
@@ -107,14 +114,18 @@ module CloudCrowd
107
114
  CloudCrowd.display_status(self.status)
108
115
  end
109
116
 
110
- def work_units_remaining
111
- self.work_units.incomplete.count
117
+ # How complete is this Job?
118
+ def percent_complete
119
+ return 0 if splitting?
120
+ return 100 if complete?
121
+ return 99 if merging?
122
+ (work_units.complete.count / work_units.count.to_f * 100).round
112
123
  end
113
124
 
114
125
  # A JSON representation of this job includes the statuses of its component
115
126
  # WorkUnits, as well as any completed outputs.
116
127
  def to_json(opts={})
117
- atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
128
+ atts = {'id' => self.id, 'status' => self.display_status, 'percent_complete' => self.percent_complete}
118
129
  atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
119
130
  atts.merge!({'time' => self.time}) if self.time
120
131
  atts.to_json
@@ -3,7 +3,7 @@ module CloudCrowd
3
3
  # A WorkUnit is an atomic chunk of work from a job, processing a single input
4
4
  # through a single action. All WorkUnits receive the same options.
5
5
  class WorkUnit < ActiveRecord::Base
6
- include CloudCrowd::ModelStatus
6
+ include ModelStatus
7
7
 
8
8
  belongs_to :job
9
9
 
@@ -14,7 +14,7 @@ module CloudCrowd
14
14
  # Find the Nth available WorkUnit in the queue, and take it out.
15
15
  def self.dequeue(offset=0)
16
16
  unit = self.first(
17
- :conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false},
17
+ :conditions => {:status => INCOMPLETE, :taken => false},
18
18
  :order => "created_at asc",
19
19
  :offset => offset
20
20
  )
@@ -29,7 +29,7 @@ module CloudCrowd
29
29
  # Mark this unit as having finished successfully.
30
30
  def finish(output, time_taken)
31
31
  update_attributes({
32
- :status => CloudCrowd::SUCCEEDED,
32
+ :status => SUCCEEDED,
33
33
  :taken => false,
34
34
  :attempts => self.attempts + 1,
35
35
  :output => output,
@@ -42,7 +42,7 @@ module CloudCrowd
42
42
  tries = self.attempts + 1
43
43
  return try_again if tries < CloudCrowd.config[:work_unit_retries]
44
44
  update_attributes({
45
- :status => CloudCrowd::FAILED,
45
+ :status => FAILED,
46
46
  :taken => false,
47
47
  :attempts => tries,
48
48
  :output => output,
@@ -1,7 +1,6 @@
1
1
  # This is the script that kicks off a single CloudCrowd::Daemon. Rely on
2
2
  # cloud-crowd.rb for autoloading of all the code we need.
3
3
 
4
- # Daemon/Worker Dependencies.
5
4
  require "#{File.dirname(__FILE__)}/../cloud-crowd"
6
5
 
7
6
  FileUtils.mkdir('log') unless File.exists?('log')
@@ -1,10 +1,7 @@
1
1
  module CloudCrowd
2
2
 
3
3
  class Worker
4
-
5
- CENTRAL_URL = CloudCrowd.config[:central_server]
6
- RETRY_WAIT = CloudCrowd.config[:worker_retry_wait]
7
-
4
+
8
5
  attr_reader :action
9
6
 
10
7
  # Spinning up a worker will create a new AssetStore with a persistent
@@ -13,8 +10,8 @@ module CloudCrowd
13
10
  def initialize
14
11
  @id = $$
15
12
  @hostname = Socket.gethostname
16
- @store = CloudCrowd::AssetStore.new
17
- @server = central_server_resource
13
+ @store = AssetStore.new
14
+ @server = CloudCrowd.central_server
18
15
  log 'started'
19
16
  end
20
17
 
@@ -48,14 +45,18 @@ module CloudCrowd
48
45
  end
49
46
  end
50
47
 
48
+ # We expect and require internal communication between the central server
49
+ # and the workers to succeed. If it fails for any reason, log it, and then
50
+ # keep trying the same request.
51
51
  def keep_trying_to(title)
52
52
  begin
53
53
  yield
54
54
  rescue Exception => e
55
- log "failed to #{title} -- retry in #{RETRY_WAIT} seconds"
55
+ wait_time = CloudCrowd.config[:worker_retry_wait]
56
+ log "failed to #{title} -- retry in #{wait_time} seconds"
56
57
  log e.message
57
58
  log e.backtrace
58
- sleep RETRY_WAIT
59
+ sleep wait_time
59
60
  retry
60
61
  end
61
62
  end
@@ -71,9 +72,9 @@ module CloudCrowd
71
72
  @action = CloudCrowd.actions(@action_name).new
72
73
  @action.configure(@status, @input, @options, @store)
73
74
  result = case @status
74
- when CloudCrowd::PROCESSING then @action.process
75
- when CloudCrowd::SPLITTING then @action.split
76
- when CloudCrowd::MERGING then @action.merge
75
+ when PROCESSING then @action.process
76
+ when SPLITTING then @action.split
77
+ when MERGING then @action.merge
77
78
  else raise "Work units must specify their status."
78
79
  end
79
80
  complete_work_unit(result)
@@ -92,14 +93,6 @@ module CloudCrowd
92
93
 
93
94
  private
94
95
 
95
- # Keep an authenticated (if configured to enable authentication) resource
96
- # for the central server.
97
- def central_server_resource
98
- params = [CENTRAL_URL]
99
- params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
100
- RestClient::Resource.new(*params)
101
- end
102
-
103
96
  # Common parameters to send back to central, regardless of success or failure.
104
97
  def completion_params
105
98
  {:id => @options['work_unit_id'], :time => Time.now - @start_time}
@@ -17,7 +17,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
17
17
  (CloudCrowd.config[:work_unit_retries] - 1).times do
18
18
  job.work_units.each {|unit| unit.fail('failed', 10) }
19
19
  end
20
- assert job.reload.work_units_remaining == 3
20
+ assert job.reload.percent_complete == 0
21
21
  job.work_units.reload.each_with_index do |unit, i|
22
22
  assert unit.processing?
23
23
  assert unit.attempts == CloudCrowd.config[:work_unit_retries] - 1
@@ -17,7 +17,7 @@ class JobTest < Test::Unit::TestCase
17
17
 
18
18
  should "create all of its work units as soon as the job is created" do
19
19
  assert @job.work_units.count >= 1
20
- assert @job.work_units_remaining == 1
20
+ assert @job.percent_complete == 0
21
21
  assert @job.processing?
22
22
  assert @unit.processing?
23
23
  assert !@job.all_work_units_complete?
@@ -27,7 +27,7 @@ class JobTest < Test::Unit::TestCase
27
27
  assert !@job.all_work_units_complete?
28
28
  @unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => 'hello')
29
29
  assert @job.reload.all_work_units_complete?
30
- assert @job.work_units_remaining == 0
30
+ assert @job.percent_complete == 100
31
31
  assert @job.outputs == "[\"hello\"]"
32
32
  end
33
33
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentcloud-cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -141,12 +141,11 @@ files:
141
141
  - lib/cloud_crowd/app.rb
142
142
  - lib/cloud_crowd/asset_store.rb
143
143
  - lib/cloud_crowd/command_line.rb
144
- - lib/cloud_crowd/core_ext.rb
145
144
  - lib/cloud_crowd/daemon.rb
146
145
  - lib/cloud_crowd/helpers/authorization.rb
147
146
  - lib/cloud_crowd/helpers/resources.rb
148
- - lib/cloud_crowd/helpers/urls.rb
149
147
  - lib/cloud_crowd/helpers.rb
148
+ - lib/cloud_crowd/inflector.rb
150
149
  - lib/cloud_crowd/models/job.rb
151
150
  - lib/cloud_crowd/models/work_unit.rb
152
151
  - lib/cloud_crowd/models.rb
@@ -163,6 +162,7 @@ files:
163
162
  - test/unit/test_work_unit.rb
164
163
  has_rdoc: true
165
164
  homepage: http://documentcloud.org
165
+ licenses:
166
166
  post_install_message:
167
167
  rdoc_options: []
168
168
 
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
183
  requirements: []
184
184
 
185
185
  rubyforge_project: cloud-crowd
186
- rubygems_version: 1.2.0
186
+ rubygems_version: 1.3.5
187
187
  signing_key:
188
188
  specification_version: 2
189
189
  summary: Better living through Map --> Ruby --> Reduce
@@ -1,10 +0,0 @@
1
- # Extensions to core Ruby.
2
-
3
- class String
4
-
5
- # Stolen-ish in parts from ActiveSupport::Inflector.
6
- def camelize
7
- self.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
8
- end
9
-
10
- end
@@ -1,7 +0,0 @@
1
- module CloudCrowd
2
- module Helpers
3
- module Urls
4
-
5
- end
6
- end
7
- end