RubyGems - documentcloud-cloud-crowd - Versions diffs - 0.0.2 → 0.0.3 - Mend

documentcloud-cloud-crowd 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

data/cloud-crowd.gemspec +1 -1
data/config/config.example.ru +0 -1
data/config/config.example.yml +41 -11
data/config/database.example.yml +3 -0
data/lib/cloud-crowd.rb +31 -5
data/lib/cloud_crowd/app.rb +16 -21
data/lib/cloud_crowd/daemon.rb +7 -5
data/lib/cloud_crowd/helpers/resources.rb +21 -0
data/lib/cloud_crowd/models/job.rb +123 -120
data/lib/cloud_crowd/models/work_unit.rb +74 -61
data/lib/cloud_crowd/models.rb +0 -2
data/lib/cloud_crowd/runner.rb +4 -16
data/lib/cloud_crowd/worker.rb +12 -9
data/test/acceptance/test_failing_work_units.rb +1 -1
data/test/blueprints.rb +3 -3
data/test/config/config.yml +1 -1
data/test/test_helper.rb +0 -2
data/test/unit/test_job.rb +4 -4
data/test/unit/test_work_unit.rb +2 -2
metadata +1 -1

data/cloud-crowd.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
   s.name      = 'cloud-crowd'
-  s.version   = '0.0.2'         # Keep version in sync with cloud-cloud.rb
+  s.version   = '0.0.3'         # Keep version in sync with cloud-cloud.rb
   s.date      = '2009-08-23'
   s.homepage    = "http://documentcloud.org" # wiki page on github?

data/config/config.example.ru CHANGED Viewed

@@ -8,7 +8,6 @@
 require 'rubygems'
 require 'cloud-crowd'
-require 'cloud_crowd/app'
 CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
 CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')

data/config/config.example.yml CHANGED Viewed

@@ -1,16 +1,46 @@
-:num_workers:             4
-:default_worker_wait:     1
-:max_worker_wait:         20
-:worker_wait_multiplier:  1.3
-:worker_retry_wait:       5
-:work_unit_retries:       3
+# The URL where you're planning on running the server/queue/database.
 :central_server:          http://localhost:9173
+# Please provide your AWS credentials for S3 storage of job output.
+:aws_access_key:          [your AWS access key]
+:aws_secret_key:          [your AWS secret access key]
+# Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
+# to keep all resulting files on S3 private. If so, you'll receive authenticated
+# S3 URLs as job output, good for 24 hours. If left public, you'll get the
+# straight URLs to the files on S3.
+:s3_bucket:               [your CloudCrowd bucket]
+:use_s3_authentication:   no
+# Use HTTP Basic Auth for all requests? (Includes all internal worker requests
+# to the central server). If yes, specify the login and password that all
+# requests must provide for authentication.
 :use_http_authentication: no
 :login:                   [your login name]
 :password:                [your password]
-:use_s3_authentication:   no
-:s3_bucket:               [your CloudCrowd bucket]
-:aws_access_key:          [your AWS access key]
-:aws_secret_key:          [your AWS secret access key]
+# Set the following numbers to tweak the configuration of your worker daemons.
+# Optimum results will depend on proportion of the Memory/CPU/IO bottlenecks
+# in your actions, the number of central servers you have running, and your
+# desired balance between latency and traffic.
+# The number of workers that `crowd workers start` spins up.
+:num_workers:             4
+# The minimum number of seconds a worker waits between checking the job queue.
+:min_worker_wait:         1
+# The maximum number of seconds a worker waits between checking the job queue.
+:max_worker_wait:         20
+# The backoff multiplier the worker uses to slow down the check interval when
+# there's no work in the queue.
+:worker_wait_multiplier:  1.3
+# The number of seconds a worker waits to retry when there's some kind of
+# internal error (ie. the central server fails to respond)
+:worker_retry_wait:       5
+# The number of separate attempts that will be made to process an individual
+# work unit, before marking it as having failed.
+:work_unit_retries:       3

data/config/database.example.yml CHANGED Viewed

@@ -1,3 +1,6 @@
+# This is a standard ActiveRecord database.yml file. You can configure it
+# to use any database that ActiveRecord supports.
 :adapter:  mysql
 :encoding: utf8
 :username: root

data/lib/cloud-crowd.rb CHANGED Viewed

@@ -1,21 +1,47 @@
+# The Grand Central of code loading...
 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
 # Common Gems:
-require 'json'
-require 'rest_client'
-require 'right_aws'
+require 'rubygems'
+gem 'activerecord'
+gem 'daemons'
+gem 'json'
+gem 'rest-client'
+gem 'right_aws'
+gem 'sinatra'
 # Common CloudCrowd libs:
 require 'cloud_crowd/core_ext'
-require 'cloud_crowd/action'
+# Autoloading for all the pieces which may or may not be needed:
+autoload :ActiveRecord, 'activerecord'
+autoload :Benchmark,    'benchmark'
+autoload :Daemons,      'daemons'
+autoload :ERB,          'erb'
+autoload :FileUtils,    'fileutils'
+autoload :JSON,         'json'
+autoload :RestClient,   'rest_client'
+autoload :RightAws,     'right_aws'
+autoload :Sinatra,      'sinatra'
+autoload :Socket,       'socket'
+autoload :YAML,         'yaml'
 module CloudCrowd
+  # Autoload all the CloudCrowd classes which may not be required.
+  autoload :App,        'cloud_crowd/app'
+  autoload :Action,     'cloud_crowd/action'
+  autoload :AssetStore, 'cloud_crowd/asset_store'
+  autoload :Helpers,    'cloud_crowd/helpers'
+  autoload :Job,        'cloud_crowd/models'
+  autoload :WorkUnit,   'cloud_crowd/models'
   # Root directory of the CloudCrowd gem.
   ROOT        = File.expand_path(File.dirname(__FILE__) + '/..')
   # Keep the version in sync with the gemspec.
-  VERSION     = '0.0.2'
+  VERSION     = '0.0.3'
   # A Job is processing if its WorkUnits in the queue to be handled by workers.
   PROCESSING  = 1

data/lib/cloud_crowd/app.rb CHANGED Viewed

@@ -1,8 +1,3 @@
-require 'erb'
-require 'sinatra'
-require 'cloud_crowd/models'
-require 'cloud_crowd/helpers'
 module CloudCrowd
   class App < Sinatra::Default
@@ -10,7 +5,7 @@ module CloudCrowd
     # static serves files from /public, methodoverride allows the _method param.
     enable :static, :methodoverride
-    set :root,                CloudCrowd::ROOT
+    set :root, CloudCrowd::ROOT
     set :authorization_realm, "CloudCrowd"
     helpers CloudCrowd::Helpers
@@ -40,29 +35,29 @@ module CloudCrowd
     # Internal method for worker daemons to fetch the work unit at the front
     # of the queue. Work unit is marked as taken and handed off to the worker.
     get '/work' do
-      begin
-        unit = WorkUnit.first(:conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false}, :order => "created_at desc")
-        return status(204) && '' unless unit
-        unit.update_attributes(:taken => true)
-        unit.to_json
-      rescue ActiveRecord::StaleObjectError => e
-        return status(204) && ''
-      end
+      dequeue_work_unit
     end
     # When workers are done with their unit, either successfully on in failure,
-    # they mark it back on the central server.
+    # they mark it back on the central server and retrieve another. Failures
+    # pull from one down in the queue, so as to not repeat the same unit.
     put '/work/:work_unit_id' do
-      case params[:status]
-      when 'succeeded' then current_work_unit.finish(params[:output], params[:time])
-      when 'failed'    then current_work_unit.fail(params[:output], params[:time])
-      else             return error(500, "Completing a work unit must specify status.")
+      handle_conflicts(409) do
+        case params[:status]
+        when 'succeeded'
+          current_work_unit.finish(params[:output], params[:time])
+          dequeue_work_unit
+        when 'failed'
+          current_work_unit.fail(params[:output], params[:time])
+          dequeue_work_unit(1)
+        else
+          return error(500, "Completing a work unit must specify status.")
+        end
       end
-      return status(204) && ''
     end
     # To monitor the central server with Monit, God, Nagios, or another
-    # monitoring tool, you can hit /heartbeat to check.
+    # monitoring tool, you can hit /heartbeat to make sure.
     get '/heartbeat' do
       "buh-bump"
     end

data/lib/cloud_crowd/daemon.rb CHANGED Viewed

@@ -10,12 +10,12 @@ module CloudCrowd
   # isn't any work to be done, and speeds back up when there is.
   class Daemon
-    DEFAULT_WAIT    = CloudCrowd.config[:default_worker_wait]
+    MIN_WAIT        = CloudCrowd.config[:min_worker_wait]
     MAX_WAIT        = CloudCrowd.config[:max_worker_wait]
     WAIT_MULTIPLIER = CloudCrowd.config[:worker_wait_multiplier]
     def initialize
-      @wait_time = DEFAULT_WAIT
+      @wait_time = MIN_WAIT
       @worker = CloudCrowd::Worker.new
       Signal.trap('INT',  'EXIT')
       Signal.trap('KILL', 'EXIT')
@@ -31,9 +31,11 @@ module CloudCrowd
       loop do
         @worker.fetch_work_unit
         if @worker.has_work?
-          @worker.run
-          @wait_time = DEFAULT_WAIT
-          sleep 0.01 # So as to listen for incoming signals.
+          @wait_time = MIN_WAIT
+          while @worker.has_work?
+            @worker.run
+            sleep 0.01 # So as to listen for incoming signals.
+          end
         else
           @wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min
           sleep @wait_time

data/lib/cloud_crowd/helpers/resources.rb CHANGED Viewed

@@ -10,6 +10,27 @@ module CloudCrowd
         @work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
       end
+      # Try to fetch a work unit from the queue. If none are pending, respond
+      # with no content.
+      def dequeue_work_unit(offset=0)
+        handle_conflicts do
+          unit = WorkUnit.dequeue(offset)
+          return status(204) && '' unless unit
+          unit.to_json
+        end
+      end
+      # We're using ActiveRecords optimistic locking, so stale work units
+      # may sometimes arise. handle_conflicts responds with a the HTTP status
+      # code of your choosing if the update failed to be applied.
+      def handle_conflicts(code=204)
+        begin
+          yield
+        rescue ActiveRecord::StaleObjectError => e
+          return status(code) && ''
+        end
+      end
     end
   end
 end

data/lib/cloud_crowd/models/job.rb CHANGED Viewed

@@ -1,129 +1,132 @@
-# A chunk of work that will be farmed out into many WorkUnits to be processed
-# in parallel by all the active CloudCrowd::Workers. Jobs are defined by a list
-# of inputs (usually public urls to files), an action (the name of a script that
-# CloudCrowd knows how to run), and, eventually a corresponding list of output.
-class Job < ActiveRecord::Base
-  include CloudCrowd::ModelStatus
-  has_many :work_units, :dependent => :destroy
-  validates_presence_of :status, :inputs, :action, :options
-  # Create a Job from an incoming JSON or XML request, and add it to the queue.
-  # TODO: Add XML support.
-  def self.create_from_request(h)
-    self.create(
-      :inputs       => h['inputs'].to_json,
-      :action       => h['action'],
-      :options      => (h['options'] || {}).to_json,
-      :owner_email  => h['owner_email'],
-      :callback_url => h['callback_url']
-    )
-  end
-  def after_create
-    self.queue_for_workers(JSON.parse(self.inputs))
-  end
-  def before_validation_on_create
-    self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
-  end
-  # After work units are marked successful, we check to see if all of them have
-  # finished, if so, this job is complete.
-  def check_for_completion
-    return unless all_work_units_complete?
-    transition_to_next_phase
-    output_list = gather_outputs_from_work_units
+module CloudCrowd
+  # A chunk of work that will be farmed out into many WorkUnits to be processed
+  # in parallel by all the active CloudCrowd::Workers. Jobs are defined by a list
+  # of inputs (usually public urls to files), an action (the name of a script that
+  # CloudCrowd knows how to run), and, eventually a corresponding list of output.
+  class Job < ActiveRecord::Base
+    include CloudCrowd::ModelStatus
+    has_many :work_units, :dependent => :destroy
-    if complete?
-      self.outputs = output_list.to_json
-      self.time = Time.now - self.created_at
+    validates_presence_of :status, :inputs, :action, :options
+    # Create a Job from an incoming JSON or XML request, and add it to the queue.
+    # TODO: Add XML support.
+    def self.create_from_request(h)
+      self.create(
+        :inputs       => h['inputs'].to_json,
+        :action       => h['action'],
+        :options      => (h['options'] || {}).to_json,
+        :owner_email  => h['owner_email'],
+        :callback_url => h['callback_url']
+      )
     end
-    self.save
-    case self.status
-    when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
-    when CloudCrowd::MERGING    then queue_for_workers(output_list.to_json)
-    else                             fire_callback
+    def after_create
+      self.queue_for_workers(JSON.parse(self.inputs))
     end
-    self
-  end
-  # Transition from the current phase to the next one.
-  def transition_to_next_phase
-    self.status = any_work_units_failed? ? CloudCrowd::FAILED     :
-                  self.splitting?        ? CloudCrowd::PROCESSING :
-                  self.should_merge?     ? CloudCrowd::MERGING    :
-                                           CloudCrowd::SUCCEEDED
-  end
-  # If a callback_url is defined, post the Job's JSON to it upon completion.
-  def fire_callback
-    begin
-      RestClient.post(callback_url, {:job => self.to_json}) if callback_url
-    rescue RestClient::Exception => e
-      puts "Failed to fire job callback. Hmmm, what should happen here?"
+    def before_validation_on_create
+      self.status = self.splittable? ? CloudCrowd::SPLITTING : CloudCrowd::PROCESSING
     end
-  end
-  # Cleaning up after a job will remove all of its files from S3.
-  def cleanup
-    CloudCrowd::AssetStore.new.cleanup_job(self)
-  end
-  # Have all of the WorkUnits finished? We could trade reads for writes here
-  # by keeping a completed_count on the Job itself.
-  def all_work_units_complete?
-    self.work_units.incomplete.count <= 0
-  end
-  # Have any of the WorkUnits failed?
-  def any_work_units_failed?
-    self.work_units.failed.count > 0
-  end
-  def splittable?
-    self.action_class.new.respond_to? :split
-  end
-  def should_merge?
-    self.processing? && self.action_class.new.respond_to?(:merge)
-  end
-  def action_class
-    CloudCrowd.actions(self.action)
-  end
-  def gather_outputs_from_work_units
-    outs = self.work_units.complete.map {|wu| wu.output }
-    self.work_units.complete.destroy_all
-    outs
-  end
-  def display_status
-    CloudCrowd.display_status(self.status)
-  end
-  def work_units_remaining
-    self.work_units.incomplete.count
-  end
-  # A JSON representation of this job includes the statuses of its component
-  # WorkUnits, as well as any completed outputs.
-  def to_json(opts={})
-    atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
-    atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
-    atts.merge!({'time' => self.time}) if self.time
-    atts.to_json
-  end
+    # After work units are marked successful, we check to see if all of them have
+    # finished, if so, this job is complete.
+    def check_for_completion
+      return unless all_work_units_complete?
+      transition_to_next_phase
+      output_list = gather_outputs_from_work_units
-  # When starting a new job, or moving to a new stage, split up the inputs
-  # into WorkUnits, and queue them.
-  def queue_for_workers(input)
-    [input].flatten.each do |wu_input|
-      WorkUnit.create(:job => self, :input => wu_input, :status => self.status)
+      if complete?
+        self.outputs = output_list.to_json
+        self.time = Time.now - self.created_at
+      end
+      self.save
+      case self.status
+      when CloudCrowd::PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
+      when CloudCrowd::MERGING    then queue_for_workers(output_list.to_json)
+      else                             fire_callback
+      end
+      self
+    end
+    # Transition from the current phase to the next one.
+    def transition_to_next_phase
+      self.status = any_work_units_failed? ? CloudCrowd::FAILED     :
+                    self.splitting?        ? CloudCrowd::PROCESSING :
+                    self.should_merge?     ? CloudCrowd::MERGING    :
+                                             CloudCrowd::SUCCEEDED
+    end
+    # If a callback_url is defined, post the Job's JSON to it upon completion.
+    def fire_callback
+      begin
+        RestClient.post(callback_url, {:job => self.to_json}) if callback_url
+      rescue RestClient::Exception => e
+        puts "Failed to fire job callback. Hmmm, what should happen here?"
+      end
+    end
+    # Cleaning up after a job will remove all of its files from S3.
+    def cleanup
+      CloudCrowd::AssetStore.new.cleanup_job(self)
+    end
+    # Have all of the WorkUnits finished? We could trade reads for writes here
+    # by keeping a completed_count on the Job itself.
+    def all_work_units_complete?
+      self.work_units.incomplete.count <= 0
+    end
+    # Have any of the WorkUnits failed?
+    def any_work_units_failed?
+      self.work_units.failed.count > 0
+    end
+    def splittable?
+      self.action_class.new.respond_to? :split
+    end
+    def should_merge?
+      self.processing? && self.action_class.new.respond_to?(:merge)
+    end
+    def action_class
+      CloudCrowd.actions(self.action)
+    end
+    def gather_outputs_from_work_units
+      outs = self.work_units.complete.map {|wu| wu.output }
+      self.work_units.complete.destroy_all
+      outs
+    end
+    def display_status
+      CloudCrowd.display_status(self.status)
+    end
+    def work_units_remaining
+      self.work_units.incomplete.count
+    end
+    # A JSON representation of this job includes the statuses of its component
+    # WorkUnits, as well as any completed outputs.
+    def to_json(opts={})
+      atts = {'id' => self.id, 'status' => self.display_status, 'work_units_remaining' => self.work_units_remaining}
+      atts.merge!({'outputs' => JSON.parse(self.outputs)}) if self.outputs
+      atts.merge!({'time' => self.time}) if self.time
+      atts.to_json
+    end
+    # When starting a new job, or moving to a new stage, split up the inputs
+    # into WorkUnits, and queue them.
+    def queue_for_workers(input)
+      [input].flatten.each do |wu_input|
+        WorkUnit.create(:job => self, :input => wu_input, :status => self.status)
+      end
     end
   end
 end

data/lib/cloud_crowd/models/work_unit.rb CHANGED Viewed

@@ -1,62 +1,75 @@
-# A WorkUnit is an atomic chunk of work from a job, processing a single input
-# through a single action. All WorkUnits receive the same options.
-class WorkUnit < ActiveRecord::Base
-  include CloudCrowd::ModelStatus
-  belongs_to :job
-  validates_presence_of :job_id, :status, :input
-  after_save :check_for_job_completion
-  # After saving a WorkUnit, it's Job should check if it just become complete.
-  def check_for_job_completion
-    self.job.check_for_completion if complete?
+module CloudCrowd
+  # A WorkUnit is an atomic chunk of work from a job, processing a single input
+  # through a single action. All WorkUnits receive the same options.
+  class WorkUnit < ActiveRecord::Base
+    include CloudCrowd::ModelStatus
+    belongs_to :job
+    validates_presence_of :job_id, :status, :input
+    after_save :check_for_job_completion
+    # Find the Nth available WorkUnit in the queue, and take it out.
+    def self.dequeue(offset=0)
+      unit = self.first(
+        :conditions => {:status => CloudCrowd::INCOMPLETE, :taken => false},
+        :order      => "created_at asc",
+        :offset     => offset
+      )
+      unit ? unit.update_attributes(:taken => true) && unit : nil
+    end
+    # After saving a WorkUnit, it's Job should check if it just become complete.
+    def check_for_job_completion
+      self.job.check_for_completion if complete?
+    end
+    # Mark this unit as having finished successfully.
+    def finish(output, time_taken)
+      update_attributes({
+        :status   => CloudCrowd::SUCCEEDED,
+        :taken    => false,
+        :attempts => self.attempts + 1,
+        :output   => output,
+        :time     => time_taken
+      })
+    end
+    # Mark this unit as having failed. May attempt a retry.
+    def fail(output, time_taken)
+      tries = self.attempts + 1
+      return try_again if tries < CloudCrowd.config[:work_unit_retries]
+      update_attributes({
+        :status   => CloudCrowd::FAILED,
+        :taken    => false,
+        :attempts => tries,
+        :output   => output,
+        :time     => time_taken
+      })
+    end
+    # Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
+    def try_again
+      update_attributes({
+        :taken    => false,
+        :attempts => self.attempts + 1
+      })
+    end
+    # The JSON representation of a WorkUnit contains common elements of its job.
+    def to_json
+      {
+        'id'        => self.id,
+        'job_id'    => self.job_id,
+        'input'     => self.input,
+        'attempts'  => self.attempts,
+        'action'    => self.job.action,
+        'options'   => JSON.parse(self.job.options),
+        'status'    => self.status
+      }.to_json
+    end
   end
-  # Mark this unit as having finished successfully.
-  def finish(output, time_taken)
-    update_attributes({
-      :status   => CloudCrowd::SUCCEEDED,
-      :taken    => false,
-      :attempts => self.attempts + 1,
-      :output   => output,
-      :time     => time_taken
-    })
-  end
-  # Mark this unit as having failed. May attempt a retry.
-  def fail(output, time_taken)
-    tries = self.attempts + 1
-    return try_again if tries < CloudCrowd.config[:work_unit_retries]
-    update_attributes({
-      :status   => CloudCrowd::FAILED,
-      :taken    => false,
-      :attempts => tries,
-      :output   => output,
-      :time     => time_taken
-    })
-  end
-  # Ever tried. Ever failed. No matter. Try again. Fail again. Fail better.
-  def try_again
-    update_attributes({
-      :taken    => false,
-      :attempts => self.attempts + 1
-    })
-  end
-  # The JSON representation of a WorkUnit contains common elements of its job.
-  def to_json
-    {
-      'id'        => self.id,
-      'job_id'    => self.job_id,
-      'input'     => self.input,
-      'attempts'  => self.attempts,
-      'action'    => self.job.action,
-      'options'   => JSON.parse(self.job.options),
-      'status'    => self.status
-    }.to_json
-  end
-end
+end

data/lib/cloud_crowd/models.rb CHANGED Viewed

@@ -1,5 +1,3 @@
-require 'activerecord'
 module CloudCrowd
   module ModelStatus

data/lib/cloud_crowd/runner.rb CHANGED Viewed

@@ -1,22 +1,10 @@
-# This is the script that kicks off a single CloudCrowd::Daemon. Because the
-# daemons don't load the entire rails stack, this file functions like a mini
-# environment.rb, loading all the common gems that we need.
-# Standard Libs
-require 'fileutils'
-require 'benchmark'
-require 'socket'
-# Gems
-require 'rubygems'
-require 'daemons'
-require 'yaml'
-FileUtils.mkdir('log') unless File.exists?('log')
+# This is the script that kicks off a single CloudCrowd::Daemon. Rely on
+# cloud-crowd.rb for autoloading of all the code we need.
 # Daemon/Worker Dependencies.
 require "#{File.dirname(__FILE__)}/../cloud-crowd"
-require 'cloud_crowd/asset_store'
+FileUtils.mkdir('log') unless File.exists?('log')
 Daemons.run("#{CloudCrowd::ROOT}/lib/cloud_crowd/daemon.rb", {
   :app_name   => "cloud_crowd_worker",

data/lib/cloud_crowd/worker.rb CHANGED Viewed

@@ -22,10 +22,7 @@ module CloudCrowd
     def fetch_work_unit
       keep_trying_to "fetch a new work unit" do
         unit_json = @server['/work'].get
-        return unless unit_json # No content means no work for us.
-        @start_time = Time.now
-        parse_work_unit unit_json
-        log "fetched work unit for #{@action_name}"
+        setup_work_unit(unit_json)
       end
     end
@@ -33,8 +30,10 @@ module CloudCrowd
     def complete_work_unit(result)
       keep_trying_to "complete work unit" do
         data = completion_params.merge({:status => 'succeeded', :output => result})
-        @server["/work/#{data[:id]}"].put(data)
+        unit_json = @server["/work/#{data[:id]}"].put(data)
         log "finished #{@action_name} in #{data[:time]} seconds"
+        clear_work_unit
+        setup_work_unit(unit_json)
       end
     end
@@ -42,8 +41,10 @@ module CloudCrowd
     def fail_work_unit(exception)
       keep_trying_to "mark work unit as failed" do
         data = completion_params.merge({:status => 'failed', :output => exception.message})
-        @server["/work/#{data[:id]}"].put(data)
+        unit_json = @server["/work/#{data[:id]}"].put(data)
         log "failed #{@action_name} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
+        clear_work_unit
+        setup_work_unit(unit_json)
       end
     end
@@ -78,8 +79,6 @@ module CloudCrowd
         complete_work_unit(result)
       rescue Exception => e
         fail_work_unit(e)
-      ensure
-        clear_work_unit
       end
     end
@@ -107,12 +106,16 @@ module CloudCrowd
     end
     # Extract our instance variables from a WorkUnit's JSON.
-    def parse_work_unit(unit_json)
+    def setup_work_unit(unit_json)
+      return false unless unit_json
       unit = JSON.parse(unit_json)
+      @start_time = Time.now
       @action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
       @options['job_id'] = unit['job_id']
       @options['work_unit_id'] = unit['id']
       @options['attempts'] ||= unit['attempts']
+      log "fetched work unit for #{@action_name}"
+      return true
     end
     # Log a message to the daemon log. Includes PID for identification.

data/test/acceptance/test_failing_work_units.rb CHANGED Viewed

@@ -13,7 +13,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
     }.to_json
     assert browser.last_response.ok?
-    job = Job.last
+    job = CloudCrowd::Job.last
     (CloudCrowd.config[:work_unit_retries] - 1).times do
       job.work_units.each {|unit| unit.fail('failed', 10) }
     end

data/test/blueprints.rb CHANGED Viewed

@@ -1,14 +1,14 @@
 Sham.url        { Faker::Internet.domain_name + "/" + Faker::Internet.domain_word + ".jpg" }
-Job.blueprint do
+CloudCrowd::Job.blueprint do
   status  { CloudCrowd::PROCESSING }
   inputs  { ['http://www.google.com/intl/en_ALL/images/logo.gif'].to_json }
   action  { 'graphics_magick' }
   options { {}.to_json }
 end
-WorkUnit.blueprint do
-  job    { Job.make }
+CloudCrowd::WorkUnit.blueprint do
+  job    { CloudCrowd::Job.make }
   status { CloudCrowd::PROCESSING }
   taken  { false }
   input  { Sham.url }

data/test/config/config.yml CHANGED Viewed

@@ -1,5 +1,5 @@
 :num_workers:             4
-:default_worker_wait:     1
+:min_worker_wait:         1
 :max_worker_wait:         20
 :worker_wait_multiplier:  1.3
 :worker_retry_wait:       5

data/test/test_helper.rb CHANGED Viewed

@@ -2,8 +2,6 @@ require 'rubygems'
 here = File.dirname(__FILE__)
 require File.expand_path(here + "/../lib/cloud-crowd")
-require 'cloud_crowd/app'
 CloudCrowd.configure(here + '/config/config.yml')
 CloudCrowd.configure_database(here + '/config/database.yml')

data/test/unit/test_job.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class JobTest < Test::Unit::TestCase
   context "A CloudCrowd Job" do
     setup do
-      @job = Job.make
+      @job = CloudCrowd::Job.make
       @unit = @job.work_units.first
     end
@@ -32,7 +32,7 @@ class JobTest < Test::Unit::TestCase
     end
     should "be able to create a job from a JSON request" do
-      job = Job.create_from_request(JSON.parse(<<-EOS
+      job = CloudCrowd::Job.create_from_request(JSON.parse(<<-EOS
       { "inputs"       : ["one", "two", "three"],
         "action"       : "graphics_magick",
         "owner_email"  : "bob@example.com",
@@ -46,13 +46,13 @@ class JobTest < Test::Unit::TestCase
     end
     should "create jobs with a SPLITTING status for actions that have a split method defined" do
-      job = Job.create_from_request({'inputs' => ['1'], 'action' => 'pdf_to_images'})
+      job = CloudCrowd::Job.create_from_request({'inputs' => ['1'], 'action' => 'pdf_to_images'})
       assert job.splittable?
       assert job.splitting?
     end
     should "fire a callback when a job has finished, successfully or not" do
-      Job.any_instance.expects(:fire_callback)
+      CloudCrowd::Job.any_instance.expects(:fire_callback)
       @job.work_units.first.finish('output', 10)
       assert @job.all_work_units_complete?
     end

data/test/unit/test_work_unit.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class WorkUnitTest < Test::Unit::TestCase
   context "A WorkUnit" do
     setup do
-      @unit = WorkUnit.make
+      @unit = CloudCrowd::WorkUnit.make
       @job = @unit.job
     end
@@ -26,7 +26,7 @@ class WorkUnitTest < Test::Unit::TestCase
     end
     should "have JSON that includes job attributes" do
-      job = Job.make
+      job = CloudCrowd::Job.make
       unit_data = JSON.parse(job.work_units.first.to_json)
       assert unit_data['job_id'] == job.id
       assert unit_data['action'] == job.action

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: documentcloud-cloud-crowd
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 platform: ruby
 authors:
 - Jeremy Ashkenas