RubyGems - cloud-crowd - Versions diffs - 0.3.0 → 0.3.1 - Mend

cloud-crowd 0.3.0 → 0.3.1

Files changed (6) hide show

@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
   s.name      = 'cloud-crowd'
-  s.version   = '0.3.0'         # Keep version in sync with cloud-cloud.rb
-  s.date      = '2009-11-06'
+  s.version   = '0.3.1'         # Keep version in sync with cloud-cloud.rb
+  s.date      = '2009-11-19'
   s.homepage    = "http://wiki.github.com/documentcloud/cloud-crowd"
   s.summary     = "Parallel Processing for the Rest of Us"
@@ -12,21 +12,21 @@ Gem::Specification.new do |s|
     everywhere is black with people and more come streaming from all sides as though
     streets had only one direction.
   EOS
   s.authors           = ['Jeremy Ashkenas']
   s.email             = 'jeremy@documentcloud.org'
   s.rubyforge_project = 'cloud-crowd'
   s.require_paths     = ['lib']
   s.executables       = ['crowd']
   s.has_rdoc          = true
   s.extra_rdoc_files  = ['README']
   s.rdoc_options      << '--title'    << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
                          '--exclude'  << 'test' <<
                          '--main'     << 'README' <<
                          '--all'
   s.add_dependency 'sinatra',       ['>= 0.9.4']
   s.add_dependency 'activerecord',  ['>= 2.3.3']
   s.add_dependency 'json',          ['>= 1.1.7']
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
     s.add_development_dependency 'rack-test',           ['>= 0.4.1']
     s.add_development_dependency 'mocha',               ['>= 0.9.7']
   end
   s.files = %w(
 actions/graphics_magick.rb
 actions/process_pdfs.rb

data/config/config.example.ru CHANGED

@@ -15,8 +15,8 @@
 require 'rubygems'
 require 'cloud-crowd'
-CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
-CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
+CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
+CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
 map '/' do
   run CloudCrowd::Server

data/lib/cloud-crowd.rb CHANGED

@@ -29,7 +29,7 @@ require 'socket'
 require 'cloud_crowd/exceptions'
 module CloudCrowd
   # Autoload all the CloudCrowd internals.
   autoload :Action,       'cloud_crowd/action'
   autoload :AssetStore,   'cloud_crowd/asset_store'
@@ -42,53 +42,53 @@ module CloudCrowd
   autoload :Server,       'cloud_crowd/server'
   autoload :Worker,       'cloud_crowd/worker'
   autoload :WorkUnit,     'cloud_crowd/models'
   # Keep this version in sync with the gemspec.
-  VERSION        = '0.3.0'
+  VERSION        = '0.3.1'
   # Increment the schema version when there's a backwards incompatible change.
   SCHEMA_VERSION = 3
   # Root directory of the CloudCrowd gem.
   ROOT           = File.expand_path(File.dirname(__FILE__) + '/..')
   # Default folder to log daemonized servers and nodes into.
   LOG_PATH       = 'log'
   # Default folder to contain the pids of daemonized servers and nodes.
   PID_PATH       = 'tmp/pids'
   # A Job is processing if its WorkUnits are in the queue to be handled by nodes.
   PROCESSING     = 1
   # A Job has succeeded if all of its WorkUnits have finished successfully.
   SUCCEEDED      = 2
   # A Job has failed if even a single one of its WorkUnits has failed (they may
   # be attempted multiple times on failure, however).
   FAILED         = 3
   # A Job is splitting if it's in the process of dividing its inputs up into
   # multiple WorkUnits.
   SPLITTING      = 4
   # A Job is merging if it's busy collecting all of its successful WorkUnits
   # back together into the final result.
   MERGING        = 5
   # A Job is considered to be complete if it succeeded or if it failed.
   COMPLETE       = [SUCCEEDED, FAILED]
   # A Job is considered incomplete if it's being processed, split up or merged.
   INCOMPLETE     = [PROCESSING, SPLITTING, MERGING]
   # Mapping of statuses to their display strings.
   DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
   class << self
     attr_reader :config
     attr_accessor :identity
     # Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
     def configure(config_path)
       @config_path = File.expand_path(File.dirname(config_path))
@@ -96,7 +96,7 @@ module CloudCrowd
     end
     # Configure the CloudCrowd central database (and connect to it), by passing
-    # in a path to <tt>database.yml</tt>. The file should use the standard
+    # in a path to <tt>database.yml</tt>. The file should use the standard
     # ActiveRecord connection format.
     def configure_database(config_path, validate_schema=true)
       configuration = YAML.load_file(config_path)
@@ -108,25 +108,25 @@ module CloudCrowd
         exit
       end
     end
-    # Get a reference to the central server, including authentication if
+    # Get a reference to the central server, including authentication if
     # configured.
     def central_server
       @central_server ||= RestClient::Resource.new(CloudCrowd.config[:central_server], CloudCrowd.client_options)
     end
     # The path that daemonized servers and nodes will log to.
     def log_path(log_file=nil)
       @log_path ||= config[:log_path] || LOG_PATH
       log_file ? File.join(@log_path, log_file) : @log_path
     end
     # The path in which daemonized servers and nodes will store their pids.
     def pid_path(pid_file=nil)
       @pid_path ||= config[:pid_path] || PID_PATH
       pid_file ? File.join(@pid_path, pid_file) : @pid_path
     end
     # The standard RestClient options for the central server talking to nodes,
     # as well as the other way around. There's a timeout of 5 seconds to open
     # a connection, and a timeout of 30 to finish reading it.
@@ -145,11 +145,11 @@ module CloudCrowd
     def display_status(status)
       DISPLAY_STATUS_MAP[status] || 'unknown'
     end
     # CloudCrowd::Actions are requested dynamically by name. Access them through
     # this actions property, which behaves like a hash. At load time, we
     # load all installed Actions and CloudCrowd's default Actions into it.
-    # If you wish to have certain nodes be specialized to only handle certain
+    # If you wish to have certain nodes be specialized to only handle certain
     # Actions, then install only those into the actions directory.
     def actions
       return @actions if @actions
@@ -160,10 +160,10 @@ module CloudCrowd
         memo
       end
     rescue NameError => e
-      adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
+      adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
       raise NameError.new(adjusted_message, e.name)
     end
     # Retrieve the list of every installed Action for this node or server.
     def action_paths
       default_actions   = Dir["#{ROOT}/actions/*.rb"]
@@ -171,18 +171,18 @@ module CloudCrowd
       custom_actions    = CloudCrowd.config[:actions_path] ? Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] : []
       default_actions + installed_actions + custom_actions
     end
     # Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
     # code from actions.
     def server?
       @identity == :server
     end
     # Or is it a node?
     def node?
       @identity == :node
     end
   end
 end

data/lib/cloud_crowd/worker.rb CHANGED

@@ -1,21 +1,21 @@
 module CloudCrowd
-  # The Worker, forked off from the Node when a new WorkUnit is received,
+  # The Worker, forked off from the Node when a new WorkUnit is received,
   # launches an Action for processing. Workers will only ever receive WorkUnits
-  # that they are able to handle (for which they have a corresponding action in
-  # their actions directory). If communication with the central server is
-  # interrupted, the Worker will repeatedly attempt to complete its unit --
-  # every Worker::RETRY_WAIT seconds. Any exceptions that take place during
-  # the course of the Action will cause the Worker to mark the WorkUnit as
+  # that they are able to handle (for which they have a corresponding action in
+  # their actions directory). If communication with the central server is
+  # interrupted, the Worker will repeatedly attempt to complete its unit --
+  # every Worker::RETRY_WAIT seconds. Any exceptions that take place during
+  # the course of the Action will cause the Worker to mark the WorkUnit as
   # having failed. When finished, the Worker's process exits, minimizing the
   # potential for memory leaks.
   class Worker
     # Wait five seconds to retry, after internal communcication errors.
     RETRY_WAIT = 5
     attr_reader :pid, :node, :unit, :status
     # A new Worker customizes itself to its WorkUnit at instantiation.
     def initialize(node, unit)
       @start_time = Time.now
@@ -25,7 +25,7 @@ module CloudCrowd
       @status     = @unit['status']
       @retry_wait = RETRY_WAIT
     end
     # Return output to the central server, marking the WorkUnit done.
     def complete_work_unit(result)
       keep_trying_to "complete work unit" do
@@ -34,7 +34,7 @@ module CloudCrowd
         log "finished #{display_work_unit} in #{data[:time]} seconds"
       end
     end
     # Mark the WorkUnit failed, returning the exception to central.
     def fail_work_unit(exception)
       keep_trying_to "mark work unit as failed" do
@@ -43,9 +43,9 @@ module CloudCrowd
         log "failed #{display_work_unit} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
       end
     end
     # We expect and require internal communication between the central server
-    # and the workers to succeed. If it fails for any reason, log it, and then
+    # and the workers to succeed. If it fails for any reason, log it, and then
     # keep trying the same request.
     def keep_trying_to(title)
       begin
@@ -60,13 +60,13 @@ module CloudCrowd
         retry
       end
     end
     # Loggable details describing what the Worker is up to.
     def display_work_unit
       "unit ##{@unit['id']} (#{@unit['action']}/#{CloudCrowd.display_status(@status)})"
     end
-    # Executes the WorkUnit by running the Action, catching all exceptions as
+    # Executes the WorkUnit by running the Action, catching all exceptions as
     # failures. We capture the thread so that we can kill it from the outside,
     # when exiting.
     def run_work_unit
@@ -82,14 +82,14 @@ module CloudCrowd
           else raise Error::StatusUnspecified, "work units must specify their status"
           end
         end
+        action.cleanup_work_directory if action
         complete_work_unit({'output' => result}.to_json)
       rescue Exception => e
-        fail_work_unit(e)
-      ensure
         action.cleanup_work_directory if action
+        fail_work_unit(e)
       end
     end
     # Run this worker inside of a fork. Attempts to exit cleanly.
     # Wraps run_work_unit to benchmark the execution time, if requested.
     def run
@@ -102,39 +102,39 @@ module CloudCrowd
       end
       Process.exit!
     end
-    # There are some potentially important attributes of the WorkUnit that we'd
-    # like to pass into the Action -- in case it needs to know them. They will
+    # There are some potentially important attributes of the WorkUnit that we'd
+    # like to pass into the Action -- in case it needs to know them. They will
     # always be made available in the options hash.
     def enhanced_unit_options
       @unit['options'].merge({
         'job_id'        => @unit['job_id'],
         'work_unit_id'  => @unit['id'],
-        'attempts'      => @unit['attempts']
+        'attempts'      => @unit['attempts']
       })
     end
     # How long has this worker been running for?
     def time_taken
       Time.now - @start_time
     end
     private
-    # Common parameters to send back to central upon unit completion,
+    # Common parameters to send back to central upon unit completion,
     # regardless of success or failure.
     def base_params
       { :pid  => @pid,
-        :id   => @unit['id'],
+        :id   => @unit['id'],
         :time => time_taken }
     end
     # Log a message to the daemon log. Includes PID for identification.
     def log(message)
       puts "Worker ##{@pid}: #{message}" unless ENV['RACK_ENV'] == 'test'
     end
     # When signaled to exit, make sure that the Worker shuts down without firing
     # the Node's at_exit callbacks.
     def trap_signals
@@ -143,7 +143,7 @@ module CloudCrowd
       Signal.trap('KILL') { Process.exit! }
       Signal.trap('TERM') { Process.exit! }
     end
   end
 end

data/test/config/config.ru CHANGED

@@ -9,8 +9,8 @@
 require 'rubygems'
 require 'cloud-crowd'
-CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
-CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
+CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
+CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
 map '/' do
   run CloudCrowd::Server

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: cloud-crowd
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.1
 platform: ruby
 authors:
 - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-11-06 00:00:00 -05:00
+date: 2009-11-19 00:00:00 -05:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency