RubyGems - documentcloud-cloud-crowd - Versions diffs - 0.0.3 → 0.0.4 - Mend

documentcloud-cloud-crowd 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

data/cloud-crowd.gemspec +2 -3
data/lib/cloud-crowd.rb +13 -6
data/lib/cloud_crowd/action.rb +7 -15
data/lib/cloud_crowd/app.rb +7 -6
data/lib/cloud_crowd/command_line.rb +10 -14
data/lib/cloud_crowd/daemon.rb +1 -1
data/lib/cloud_crowd/helpers.rb +1 -2
data/lib/cloud_crowd/helpers/authorization.rb +5 -5
data/lib/cloud_crowd/inflector.rb +19 -0
data/lib/cloud_crowd/models.rb +17 -14
data/lib/cloud_crowd/models/job.rb +29 -18
data/lib/cloud_crowd/models/work_unit.rb +4 -4
data/lib/cloud_crowd/runner.rb +0 -1
data/lib/cloud_crowd/worker.rb +12 -19
data/test/acceptance/test_failing_work_units.rb +1 -1
data/test/unit/test_job.rb +2 -2
metadata +4 -4
data/lib/cloud_crowd/core_ext.rb +0 -10
data/lib/cloud_crowd/helpers/urls.rb +0 -7

data/cloud-crowd.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
   s.name      = 'cloud-crowd'
-  s.version   = '0.0.3'         # Keep version in sync with cloud-cloud.rb
+  s.version   = '0.0.4'         # Keep version in sync with cloud-cloud.rb
   s.date      = '2009-08-23'
   s.homepage    = "http://documentcloud.org" # wiki page on github?
@@ -49,12 +49,11 @@ lib/cloud_crowd/action.rb
 lib/cloud_crowd/app.rb
 lib/cloud_crowd/asset_store.rb
 lib/cloud_crowd/command_line.rb
-lib/cloud_crowd/core_ext.rb
 lib/cloud_crowd/daemon.rb
 lib/cloud_crowd/helpers/authorization.rb
 lib/cloud_crowd/helpers/resources.rb
-lib/cloud_crowd/helpers/urls.rb
 lib/cloud_crowd/helpers.rb
+lib/cloud_crowd/inflector.rb
 lib/cloud_crowd/models/job.rb
 lib/cloud_crowd/models/work_unit.rb
 lib/cloud_crowd/models.rb

data/lib/cloud-crowd.rb CHANGED Viewed

@@ -11,9 +11,6 @@ gem 'rest-client'
 gem 'right_aws'
 gem 'sinatra'
-# Common CloudCrowd libs:
-require 'cloud_crowd/core_ext'
 # Autoloading for all the pieces which may or may not be needed:
 autoload :ActiveRecord, 'activerecord'
 autoload :Benchmark,    'benchmark'
@@ -34,6 +31,7 @@ module CloudCrowd
   autoload :Action,     'cloud_crowd/action'
   autoload :AssetStore, 'cloud_crowd/asset_store'
   autoload :Helpers,    'cloud_crowd/helpers'
+  autoload :Inflector,  'cloud_crowd/inflector'
   autoload :Job,        'cloud_crowd/models'
   autoload :WorkUnit,   'cloud_crowd/models'
@@ -41,7 +39,7 @@ module CloudCrowd
   ROOT        = File.expand_path(File.dirname(__FILE__) + '/..')
   # Keep the version in sync with the gemspec.
-  VERSION     = '0.0.3'
+  VERSION     = '0.0.4'
   # A Job is processing if its WorkUnits in the queue to be handled by workers.
   PROCESSING  = 1
@@ -88,6 +86,15 @@ module CloudCrowd
       configuration = YAML.load_file(config_path)
       ActiveRecord::Base.establish_connection(configuration)
     end
+    # Keep an authenticated (if configured to enable authentication) resource
+    # for the central server.
+    def central_server
+      return @central_server if @central_server
+      params = [CloudCrowd.config[:central_server]]
+      params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
+      @central_server = RestClient::Resource.new(*params)
+    end
     # Return the readable status name of an internal CloudCrowd status number.
     def display_status(status)
@@ -98,13 +105,13 @@ module CloudCrowd
     # so we lazy-load them. Think about a variant of this for installing and
     # loading actions into a running CloudCrowd cluster on the fly.
     def actions(name)
-      action_class = name.camelize
+      action_class = Inflector.camelize(name)
       begin
         raise NameError, "can't find the #{action_class} Action" unless Module.constants.include?(action_class)
         Module.const_get(action_class)
       rescue NameError => e
         user_action     = "#{@config_path}/actions/#{name}"
-        default_action  = "#{CloudCrowd::ROOT}/actions/#{name}"
+        default_action  = "#{ROOT}/actions/#{name}"
         require user_action and retry    if File.exists? "#{user_action}.rb"
         require default_action and retry if File.exists? "#{default_action}.rb"
         raise e

data/lib/cloud_crowd/action.rb CHANGED Viewed

@@ -18,14 +18,14 @@ module CloudCrowd
     # Configuring a new Action sets up all of the read-only variables that
     # form the bulk of the API for action subclasses. (Paths to read from and
-    # write to).
+    # write to). It creates the work_directory and moves into it.
     def configure(status, input, options, store)
       @input, @options, @store = input, options, store
       @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
       @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
       FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
       Dir.chdir @work_directory
-      unless status == CloudCrowd::MERGING
+      unless status == MERGING
         @input_path = File.join(@work_directory, File.basename(@input))
         @file_name = File.basename(@input_path, File.extname(@input_path))
         download(@input, @input_path)
@@ -43,15 +43,16 @@ module CloudCrowd
       path
     end
-    # Takes a local filesystem path, and returns the public url on S3 where the
-    # file was saved.
+    # Takes a local filesystem path, and returns the public (or authenticated)
+    # url on S3 where the file was saved.
     def save(file_path)
       save_path = File.join(s3_storage_path, File.basename(file_path))
       @store.save(file_path, save_path)
       return @store.url(save_path)
     end
-    # After the Action has finished, we remove the work directory.
+    # After the Action has finished, we remove the work directory and return
+    # to the root directory (where daemons run by default).
     def cleanup_work_directory
       Dir.chdir '/'
       FileUtils.rm_r(@work_directory)
@@ -64,7 +65,7 @@ module CloudCrowd
     # [action_name]/job_[job_id]/unit_[work_unit_it]
     def storage_prefix
       path_parts = []
-      path_parts << underscore(self.class.to_s)
+      path_parts << Inflector.underscore(self.class)
       path_parts << "job_#{@job_id}"
       path_parts << "unit_#{@work_unit_id}" if @work_unit_id
       @storage_prefix ||= File.join(path_parts)
@@ -74,15 +75,6 @@ module CloudCrowd
       @s3_storage_path ||= storage_prefix
     end
-    # Pilfered from the ActiveSupport::Inflector.
-    def underscore(word)
-      word.to_s.gsub(/::/, '/').
-        gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
-        gsub(/([a-z\d])([A-Z])/,'\1_\2').
-        tr("-", "_").
-        downcase
-    end
   end
 end

data/lib/cloud_crowd/app.rb CHANGED Viewed

@@ -1,15 +1,16 @@
 module CloudCrowd
   class App < Sinatra::Default
-    # static serves files from /public, methodoverride allows the _method param.
-    enable :static, :methodoverride
-    set :root, CloudCrowd::ROOT
+    set :root, ROOT
     set :authorization_realm, "CloudCrowd"
-    helpers CloudCrowd::Helpers
+    helpers Helpers
+    # static serves files from /public, methodoverride allows the _method param.
+    enable :static, :methodoverride
+    # Enabling HTTP Authentication turns it on for all requests.
     before do
       login_required if CloudCrowd.config[:use_http_authentication]
     end
@@ -51,7 +52,7 @@ module CloudCrowd
           current_work_unit.fail(params[:output], params[:time])
           dequeue_work_unit(1)
         else
-          return error(500, "Completing a work unit must specify status.")
+          error(500, "Completing a work unit must specify status.")
         end
       end
     end

data/lib/cloud_crowd/command_line.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module CloudCrowd
     # Configuration files required for the `crowd` command to function.
     CONFIG_FILES = ['config.yml', 'config.ru', 'database.yml']
-    # Reference the absolute path to the root, because we're about to chdir.
+    # Reference the absolute path to the root.
     CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
     # Path to the Daemons gem script which launches workers.
@@ -58,7 +58,7 @@ OPTIONS:
     def run_server
       ensure_config
       require 'rubygems'
-      rackup_path = File.expand_path('config.ru')
+      rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
       if Gem.available? 'thin'
         exec "thin -e production -p #{@options[:port]} -R #{rackup_path} start"
       else
@@ -106,13 +106,13 @@ OPTIONS:
       load_code
       num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
       num_workers.times do
-        `CLOUD_CROWD_CONFIG='#{File.expand_path('config.yml')}' ruby #{WORKER_RUNNER} start`
+        `CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
       end
     end
     # For debugging, run a single worker in the current process, showing output.
     def run_worker
-      exec "CLOUD_CROWD_CONFIG='#{File.expand_path('config.yml')}' ruby #{WORKER_RUNNER} run"
+      exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
     end
     # Stop all active workers.
@@ -137,25 +137,21 @@ OPTIONS:
     # the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
     def ensure_config
       return if @config_found
-      Dir.chdir @options[:config_path]
-      CONFIG_FILES.all? {|f| File.exists? f } ? @config_dir = true : config_not_found
+      found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
+      found ? @config_dir = true : config_not_found
     end
     # Parse all options for all actions.
     # TODO: Think about parsing options per sub-command separately.
     def parse_options
       @options = {
-        :db_config    => 'database.yml',
         :port         => 9173,
-        :config_path  => ENV['CLOUD_CROWD_CONFIG'] || '.',
+        :config_path  => ENV['CLOUD_CROWD_CONFIG'] || '.'
       }
       @option_parser = OptionParser.new do |opts|
         opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
           @options[:config_path] = conf_path
         end
-        opts.on('-d', '--database-config PATH', 'path to database.yml') do |conf_path|
-          @options[:db_config] = conf_path
-        end
         opts.on('-n', '--num-workers NUM', OptionParser::DecimalInteger, 'number of worker processes') do |num|
           @options[:num_workers] = num
         end
@@ -164,7 +160,7 @@ OPTIONS:
         end
         opts.on_tail('-v', '--version', 'show version') do
           load_code
-          puts "CloudCrowd version #{CloudCrowd::VERSION}"
+          puts "CloudCrowd version #{VERSION}"
           exit
         end
       end
@@ -178,14 +174,14 @@ OPTIONS:
       ensure_config
       require 'rubygems'
       require "#{CC_ROOT}/lib/cloud-crowd"
-      CloudCrowd.configure('config.yml')
+      CloudCrowd.configure("#{@options[:config_path]}/config.yml")
     end
     # Establish a connection to the central server's database. Not all commands
     # require this.
     def connect_to_database
       require 'cloud_crowd/models'
-      CloudCrowd.configure_database(@options[:db_config])
+      CloudCrowd.configure_database("#{@options[:config_path]}/database.yml")
     end
     # Exit with an explanation if the configuration files couldn't be found.

data/lib/cloud_crowd/daemon.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module CloudCrowd
     def initialize
       @wait_time = MIN_WAIT
-      @worker = CloudCrowd::Worker.new
+      @worker = Worker.new
       Signal.trap('INT',  'EXIT')
       Signal.trap('KILL', 'EXIT')
       Signal.trap('TERM', 'EXIT')

data/lib/cloud_crowd/helpers.rb CHANGED Viewed

@@ -1,9 +1,8 @@
 require 'cloud_crowd/helpers/authorization'
 require 'cloud_crowd/helpers/resources'
-require 'cloud_crowd/helpers/urls'
 module CloudCrowd
   module Helpers
-    include Authorization, Resources, Urls #, Rack::Utils
+    include Authorization, Resources #, Rack::Utils
   end
 end

data/lib/cloud_crowd/helpers/authorization.rb CHANGED Viewed

@@ -4,6 +4,7 @@ module CloudCrowd
   module Helpers
     module Authorization
+      # Ensure that the request includes the correct credentials.
       def login_required
         return if authorized?
         unauthorized! unless auth.provided?
@@ -12,14 +13,13 @@ module CloudCrowd
         request.env['REMOTE_USER'] = auth.username
       end
+      # Has the request been authenticated?
       def authorized?
         !!request.env['REMOTE_USER']
       end
-      def current_user
-        request.env['REMOTE_USER']
-      end
+      # A request is authorized if its login and password match those stored
+      # in config.yml, or if authentication is disabled.
       def authorize(login, password)
         return true unless CloudCrowd.config[:use_http_authentication]
         return CloudCrowd.config[:login] == login &&
@@ -33,7 +33,7 @@ module CloudCrowd
         @auth ||= Rack::Auth::Basic::Request.new(request.env)
       end
-      def unauthorized!(realm = CloudCrowd::App.authorization_realm)
+      def unauthorized!(realm = App.authorization_realm)
         response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
         halt 401, 'Authorization Required'
       end

data/lib/cloud_crowd/inflector.rb ADDED Viewed

@@ -0,0 +1,19 @@
+module CloudCrowd
+  # Pilfered in parts from the ActiveSupport::Inflector.
+  module Inflector
+    def self.camelize(word)
+      word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
+    end
+    def self.underscore(word)
+      word.to_s.gsub(/::/, '/').
+        gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
+        gsub(/([a-z\d])([A-Z])/,'\1_\2').
+        tr("-", "_").
+        downcase
+    end
+  end
+end

data/lib/cloud_crowd/models.rb CHANGED Viewed

@@ -1,28 +1,31 @@
 module CloudCrowd
+  # Adds named scopes and query methods for every CloudCrowd status to
+  # both Jobs and WorkUnits.
   module ModelStatus
     def self.included(klass)
       klass.class_eval do
         # Note that COMPLETE and INCOMPLETE are unions of other states.
-        named_scope 'processing', :conditions => {:status => CloudCrowd::PROCESSING}
-        named_scope 'succeeded',  :conditions => {:status => CloudCrowd::SUCCEEDED}
-        named_scope 'failed',     :conditions => {:status => CloudCrowd::FAILED}
-        named_scope 'splitting',  :conditions => {:status => CloudCrowd::SPLITTING}
-        named_scope 'merging',    :conditions => {:status => CloudCrowd::MERGING}
-        named_scope 'complete',   :conditions => {:status => CloudCrowd::COMPLETE}
-        named_scope 'incomplete', :conditions => {:status => CloudCrowd::INCOMPLETE}
+        named_scope 'processing', :conditions => {:status => PROCESSING}
+        named_scope 'succeeded',  :conditions => {:status => SUCCEEDED}
+        named_scope 'failed',     :conditions => {:status => FAILED}
+        named_scope 'splitting',  :conditions => {:status => SPLITTING}
+        named_scope 'merging',    :conditions => {:status => MERGING}
+        named_scope 'complete',   :conditions => {:status => COMPLETE}
+        named_scope 'incomplete', :conditions => {:status => INCOMPLETE}
       end
     end
-    def processing?;  self.status == CloudCrowd::PROCESSING;          end
-    def succeeded?;   self.status == CloudCrowd::SUCCEEDED;           end
-    def failed?;      self.status == CloudCrowd::FAILED;              end
-    def splitting?;   self.status == CloudCrowd::SPLITTING;           end
-    def merging?;     self.status == CloudCrowd::MERGING;             end
-    def complete?;    CloudCrowd::COMPLETE.include?(self.status);     end
-    def incomplete?;  CloudCrowd::INCOMPLETE.include?(self.status);   end
+    def processing?;  self.status == PROCESSING;          end
+    def succeeded?;   self.status == SUCCEEDED;           end
+    def failed?;      self.status == FAILED;              end
+    def splitting?;   self.status == SPLITTING;           end
+    def merging?;     self.status == MERGING;             end
+    def complete?;    COMPLETE.include?(self.status);     end
+    def incomplete?;  INCOMPLETE.include?(self.status);   end
   end
 end

data/lib/cloud_crowd/models/job.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module CloudCrowd
   # of inputs (usually public urls to files), an action (the name of a script that
   # CloudCrowd knows how to run), and, eventually a corresponding list of output.
   class Job < ActiveRecord::Base
-    include CloudCrowd::ModelStatus
+    include ModelStatus
     has_many :work_units, :dependent => :destroy
@@ -23,16 +23,18 @@ module CloudCrowd
       )
     end
+    # Creating a job creates its corresponding work units, adding them
+    # to the queue.
     def after_create
       self.queue_for_workers(JSON.parse(self.inputs))
     end
     def before_validation_on_create
-      self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
+      self.status = self.splittable? ? SPLITTING : PROCESSING
     end
     # After work units are marked successful, we check to see if all of them have
-    # finished, if so, this job is complete.
+    # finished, if so, continue on to the next phase of the job.
     def check_for_completion
       return unless all_work_units_complete?
       transition_to_next_phase
@@ -45,19 +47,19 @@ module CloudCrowd
       self.save
       case self.status
-      when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
-      when CloudCrowd::MERGING    then queue_for_workers(output_list.to_json)
-      else                             fire_callback
+      when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
+      when MERGING    then queue_for_workers(output_list.to_json)
+      else                 fire_callback
       end
       self
     end
-    # Transition from the current phase to the next one.
+    # Transition this Job's status to the following one.
     def transition_to_next_phase
-      self.status = any_work_units_failed? ? CloudCrowd::FAILED     :
-                    self.splitting?        ? CloudCrowd::PROCESSING :
-                    self.should_merge?     ? CloudCrowd::MERGING    :
-                                             CloudCrowd::SUCCEEDED
+      self.status = any_work_units_failed? ? FAILED     :
+                    self.splitting?        ? PROCESSING :
+                    self.mergeable?        ? MERGING    :
+                                             SUCCEEDED
     end
     # If a callback_url is defined, post the Job's JSON to it upon completion.
@@ -71,7 +73,7 @@ module CloudCrowd
     # Cleaning up after a job will remove all of its files from S3.
     def cleanup
-      CloudCrowd::AssetStore.new.cleanup_job(self)
+      AssetStore.new.cleanup_job(self)
     end
     # Have all of the WorkUnits finished? We could trade reads for writes here
@@ -85,18 +87,23 @@ module CloudCrowd
       self.work_units.failed.count > 0
     end
+    # This job is splittable if its Action has a +split+ method.
     def splittable?
-      self.action_class.new.respond_to? :split
+      self.action_class.public_instance_methods.include? 'split'
     end
-    def should_merge?
-      self.processing? && self.action_class.new.respond_to?(:merge)
+    # This job is mergeable if its Action has a +merge+ method.
+    def mergeable?
+      self.processing? && self.action_class.public_instance_methods.include?('merge')
     end
+    # Retrieve the class for this Job's Action, loading it if necessary.
     def action_class
       CloudCrowd.actions(self.action)
     end
+    # When the WorkUnits are all finished, gather all their outputs together
+    # before removing them from the database entirely.
     def gather_outputs_from_work_units
       outs = self.work_units.complete.map {|wu| wu.output }
       self.work_units.complete.destroy_all
@@ -107,14 +114,18 @@ module CloudCrowd
       CloudCrowd.display_status(self.status)
     end
-    def work_units_remaining
-      self.work_units.incomplete.count
+    # How complete is this Job?
+    def percent_complete
+      return 0   if splitting?
+      return 100 if complete?
+      return 99  if merging?
+      (work_units.complete.count / work_units.count.to_f * 100).round
     end
     # A JSON representation of this job includes the statuses of its component
     # WorkUnits, as well as any completed outputs.
     def to_json(opts={})
-      atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
+      atts = {'id' => self.id, 'status' => self.display_status, 'percent_complete' => self.percent_complete}
       atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
       atts.merge!({'time' => self.time}) if self.time
       atts.to_json

data/lib/cloud_crowd/models/work_unit.rb CHANGED Viewed

@@ -3,7 +3,7 @@ module CloudCrowd
   # A WorkUnit is an atomic chunk of work from a job, processing a single input
   # through a single action. All WorkUnits receive the same options.
   class WorkUnit < ActiveRecord::Base
-    include CloudCrowd::ModelStatus
+    include ModelStatus
     belongs_to :job
@@ -14,7 +14,7 @@ module CloudCrowd
     # Find the Nth available WorkUnit in the queue, and take it out.
     def self.dequeue(offset=0)
       unit = self.first(
-        :conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false},
+        :conditions => {:status => INCOMPLETE, :taken => false},
         :order      => "created_at asc",
         :offset     => offset
       )
@@ -29,7 +29,7 @@ module CloudCrowd
     # Mark this unit as having finished successfully.
     def finish(output, time_taken)
       update_attributes({
-        :status   => CloudCrowd::SUCCEEDED,
+        :status   => SUCCEEDED,
         :taken    => false,
         :attempts => self.attempts + 1,
         :output   => output,
@@ -42,7 +42,7 @@ module CloudCrowd
       tries = self.attempts + 1
       return try_again if tries < CloudCrowd.config[:work_unit_retries]
       update_attributes({
-        :status   => CloudCrowd::FAILED,
+        :status   => FAILED,
         :taken    => false,
         :attempts => tries,
         :output   => output,

data/lib/cloud_crowd/runner.rb CHANGED Viewed

@@ -1,7 +1,6 @@
 # This is the script that kicks off a single CloudCrowd::Daemon. Rely on
 # cloud-crowd.rb for autoloading of all the code we need.
-# Daemon/Worker Dependencies.
 require "#{File.dirname(__FILE__)}/../cloud-crowd"
 FileUtils.mkdir('log') unless File.exists?('log')

data/lib/cloud_crowd/worker.rb CHANGED Viewed

@@ -1,10 +1,7 @@
 module CloudCrowd
   class Worker
-    CENTRAL_URL = CloudCrowd.config[:central_server]
-    RETRY_WAIT = CloudCrowd.config[:worker_retry_wait]
     attr_reader :action
     # Spinning up a worker will create a new AssetStore with a persistent
@@ -13,8 +10,8 @@ module CloudCrowd
     def initialize
       @id       = $$
       @hostname = Socket.gethostname
-      @store    = CloudCrowd::AssetStore.new
-      @server   = central_server_resource
+      @store    = AssetStore.new
+      @server   = CloudCrowd.central_server
       log 'started'
     end
@@ -48,14 +45,18 @@ module CloudCrowd
       end
     end
+    # We expect and require internal communication between the central server
+    # and the workers to succeed. If it fails for any reason, log it, and then
+    # keep trying the same request.
     def keep_trying_to(title)
       begin
         yield
       rescue Exception => e
-        log "failed to #{title} -- retry in #{RETRY_WAIT} seconds"
+        wait_time = CloudCrowd.config[:worker_retry_wait]
+        log "failed to #{title} -- retry in #{wait_time} seconds"
         log e.message
         log e.backtrace
-        sleep RETRY_WAIT
+        sleep wait_time
         retry
       end
     end
@@ -71,9 +72,9 @@ module CloudCrowd
         @action = CloudCrowd.actions(@action_name).new
         @action.configure(@status, @input, @options, @store)
         result = case @status
-        when CloudCrowd::PROCESSING then @action.process
-        when CloudCrowd::SPLITTING  then @action.split
-        when CloudCrowd::MERGING    then @action.merge
+        when PROCESSING then @action.process
+        when SPLITTING  then @action.split
+        when MERGING    then @action.merge
         else raise "Work units must specify their status."
         end
         complete_work_unit(result)
@@ -92,14 +93,6 @@ module CloudCrowd
     private
-    # Keep an authenticated (if configured to enable authentication) resource
-    # for the central server.
-    def central_server_resource
-      params = [CENTRAL_URL]
-      params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
-      RestClient::Resource.new(*params)
-    end
     # Common parameters to send back to central, regardless of success or failure.
     def completion_params
       {:id => @options['work_unit_id'], :time => Time.now - @start_time}

data/test/acceptance/test_failing_work_units.rb CHANGED Viewed

@@ -17,7 +17,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
     (CloudCrowd.config[:work_unit_retries] - 1).times do
       job.work_units.each {|unit| unit.fail('failed', 10) }
     end
-    assert job.reload.work_units_remaining == 3
+    assert job.reload.percent_complete == 0
     job.work_units.reload.each_with_index do |unit, i|
       assert unit.processing?
       assert unit.attempts == CloudCrowd.config[:work_unit_retries] - 1

data/test/unit/test_job.rb CHANGED Viewed

@@ -17,7 +17,7 @@ class JobTest < Test::Unit::TestCase
     should "create all of its work units as soon as the job is created" do
       assert @job.work_units.count >= 1
-      assert @job.work_units_remaining == 1
+      assert @job.percent_complete == 0
       assert @job.processing?
       assert @unit.processing?
       assert !@job.all_work_units_complete?
@@ -27,7 +27,7 @@ class JobTest < Test::Unit::TestCase
       assert !@job.all_work_units_complete?
       @unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => 'hello')
       assert @job.reload.all_work_units_complete?
-      assert @job.work_units_remaining == 0
+      assert @job.percent_complete == 100
       assert @job.outputs == "[\"hello\"]"
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: documentcloud-cloud-crowd
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - Jeremy Ashkenas
@@ -141,12 +141,11 @@ files:
 - lib/cloud_crowd/app.rb
 - lib/cloud_crowd/asset_store.rb
 - lib/cloud_crowd/command_line.rb
-- lib/cloud_crowd/core_ext.rb
 - lib/cloud_crowd/daemon.rb
 - lib/cloud_crowd/helpers/authorization.rb
 - lib/cloud_crowd/helpers/resources.rb
-- lib/cloud_crowd/helpers/urls.rb
 - lib/cloud_crowd/helpers.rb
+- lib/cloud_crowd/inflector.rb
 - lib/cloud_crowd/models/job.rb
 - lib/cloud_crowd/models/work_unit.rb
 - lib/cloud_crowd/models.rb
@@ -163,6 +162,7 @@ files:
 - test/unit/test_work_unit.rb
 has_rdoc: true
 homepage: http://documentcloud.org
+licenses:
 post_install_message:
 rdoc_options: []
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project: cloud-crowd
-rubygems_version: 1.2.0
+rubygems_version: 1.3.5
 signing_key:
 specification_version: 2
 summary: Better living through Map --> Ruby --> Reduce

data/lib/cloud_crowd/core_ext.rb DELETED Viewed

@@ -1,10 +0,0 @@
-# Extensions to core Ruby.
-class String
-  # Stolen-ish in parts from ActiveSupport::Inflector.
-  def camelize
-    self.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
-  end
-end

data/lib/cloud_crowd/helpers/urls.rb DELETED Viewed

@@ -1,7 +0,0 @@
-module CloudCrowd
-  module Helpers
-    module Urls
-    end
-  end
-end