RubyGems - cloud-crowd - Versions diffs - 0.3.0 → 0.3.1 - Mend

cloud-crowd 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/cloud-crowd.gemspec CHANGED

@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
   s.name      = 'cloud-crowd'
-  s.version   = '0.3.0'         # Keep version in sync with cloud-cloud.rb
-  s.date      = '2009-11-06'
+  s.version   = '0.3.1'         # Keep version in sync with cloud-cloud.rb
+  s.date      = '2009-11-19'
   s.homepage    = "http://wiki.github.com/documentcloud/cloud-crowd"
   s.summary     = "Parallel Processing for the Rest of Us"
@@ -12,21 +12,21 @@ Gem::Specification.new do |s|
     everywhere is black with people and more come streaming from all sides as though
     streets had only one direction.
   EOS
   s.authors           = ['Jeremy Ashkenas']
   s.email             = 'jeremy@documentcloud.org'
   s.rubyforge_project = 'cloud-crowd'
   s.require_paths     = ['lib']
   s.executables       = ['crowd']
   s.has_rdoc          = true
   s.extra_rdoc_files  = ['README']
   s.rdoc_options      << '--title'    << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
                          '--exclude'  << 'test' <<
                          '--main'     << 'README' <<
                          '--all'
   s.add_dependency 'sinatra',       ['>= 0.9.4']
   s.add_dependency 'activerecord',  ['>= 2.3.3']
   s.add_dependency 'json',          ['>= 1.1.7']
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
     s.add_development_dependency 'rack-test',           ['>= 0.4.1']
     s.add_development_dependency 'mocha',               ['>= 0.9.7']
   end
   s.files = %w(
 actions/graphics_magick.rb
 actions/process_pdfs.rb

data/config/config.example.ru CHANGED

@@ -15,8 +15,8 @@
 require 'rubygems'
 require 'cloud-crowd'
-CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
-CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
+CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
+CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
 map '/' do
   run CloudCrowd::Server

data/lib/cloud-crowd.rb CHANGED

@@ -29,7 +29,7 @@ require 'socket'
 require 'cloud_crowd/exceptions'
 module CloudCrowd
   # Autoload all the CloudCrowd internals.
   autoload :Action,       'cloud_crowd/action'
   autoload :AssetStore,   'cloud_crowd/asset_store'
@@ -42,53 +42,53 @@ module CloudCrowd
   autoload :Server,       'cloud_crowd/server'
   autoload :Worker,       'cloud_crowd/worker'
   autoload :WorkUnit,     'cloud_crowd/models'
   # Keep this version in sync with the gemspec.
-  VERSION        = '0.3.0'
+  VERSION        = '0.3.1'
   # Increment the schema version when there's a backwards incompatible change.
   SCHEMA_VERSION = 3
   # Root directory of the CloudCrowd gem.
   ROOT           = File.expand_path(File.dirname(__FILE__) + '/..')
   # Default folder to log daemonized servers and nodes into.
   LOG_PATH       = 'log'
   # Default folder to contain the pids of daemonized servers and nodes.
   PID_PATH       = 'tmp/pids'
   # A Job is processing if its WorkUnits are in the queue to be handled by nodes.
   PROCESSING     = 1
   # A Job has succeeded if all of its WorkUnits have finished successfully.
   SUCCEEDED      = 2
   # A Job has failed if even a single one of its WorkUnits has failed (they may
   # be attempted multiple times on failure, however).
   FAILED         = 3
   # A Job is splitting if it's in the process of dividing its inputs up into
   # multiple WorkUnits.
   SPLITTING      = 4
   # A Job is merging if it's busy collecting all of its successful WorkUnits
   # back together into the final result.
   MERGING        = 5
   # A Job is considered to be complete if it succeeded or if it failed.
   COMPLETE       = [SUCCEEDED, FAILED]
   # A Job is considered incomplete if it's being processed, split up or merged.
   INCOMPLETE     = [PROCESSING, SPLITTING, MERGING]
   # Mapping of statuses to their display strings.
   DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
   class << self
     attr_reader :config
     attr_accessor :identity
     # Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
     def configure(config_path)
       @config_path = File.expand_path(File.dirname(config_path))
@@ -96,7 +96,7 @@ module CloudCrowd
     end
     # Configure the CloudCrowd central database (and connect to it), by passing
-    # in a path to <tt>database.yml</tt>. The file should use the standard
+    # in a path to <tt>database.yml</tt>. The file should use the standard
     # ActiveRecord connection format.
     def configure_database(config_path, validate_schema=true)
       configuration = YAML.load_file(config_path)
@@ -108,25 +108,25 @@ module CloudCrowd
         exit
       end
     end
-    # Get a reference to the central server, including authentication if
+    # Get a reference to the central server, including authentication if
     # configured.
     def central_server
       @central_server ||= RestClient::Resource.new(CloudCrowd.config[:central_server], CloudCrowd.client_options)
     end
     # The path that daemonized servers and nodes will log to.
     def log_path(log_file=nil)
       @log_path ||= config[:log_path] || LOG_PATH
       log_file ? File.join(@log_path, log_file) : @log_path
     end
     # The path in which daemonized servers and nodes will store their pids.
     def pid_path(pid_file=nil)
       @pid_path ||= config[:pid_path] || PID_PATH
       pid_file ? File.join(@pid_path, pid_file) : @pid_path
     end
     # The standard RestClient options for the central server talking to nodes,
     # as well as the other way around. There's a timeout of 5 seconds to open
     # a connection, and a timeout of 30 to finish reading it.
@@ -145,11 +145,11 @@ module CloudCrowd
     def display_status(status)
       DISPLAY_STATUS_MAP[status] || 'unknown'
     end
     # CloudCrowd::Actions are requested dynamically by name. Access them through
     # this actions property, which behaves like a hash. At load time, we
     # load all installed Actions and CloudCrowd's default Actions into it.
-    # If you wish to have certain nodes be specialized to only handle certain
+    # If you wish to have certain nodes be specialized to only handle certain
     # Actions, then install only those into the actions directory.
     def actions
       return @actions if @actions
@@ -160,10 +160,10 @@ module CloudCrowd
         memo
       end
     rescue NameError => e
-      adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
+      adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
       raise NameError.new(adjusted_message, e.name)
     end
     # Retrieve the list of every installed Action for this node or server.
     def action_paths
       default_actions   = Dir["#{ROOT}/actions/*.rb"]
@@ -171,18 +171,18 @@ module CloudCrowd
       custom_actions    = CloudCrowd.config[:actions_path] ? Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] : []
       default_actions + installed_actions + custom_actions
     end
     # Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
     # code from actions.
     def server?
       @identity == :server
     end
     # Or is it a node?
     def node?
       @identity == :node
     end
   end
 end

data/lib/cloud_crowd/worker.rb CHANGED

@@ -1,21 +1,21 @@
 module CloudCrowd
-  # The Worker, forked off from the Node when a new WorkUnit is received,
+  # The Worker, forked off from the Node when a new WorkUnit is received,
   # launches an Action for processing. Workers will only ever receive WorkUnits
-  # that they are able to handle (for which they have a corresponding action in
-  # their actions directory). If communication with the central server is
-  # interrupted, the Worker will repeatedly attempt to complete its unit --
-  # every Worker::RETRY_WAIT seconds. Any exceptions that take place during
-  # the course of the Action will cause the Worker to mark the WorkUnit as
+  # that they are able to handle (for which they have a corresponding action in
+  # their actions directory). If communication with the central server is
+  # interrupted, the Worker will repeatedly attempt to complete its unit --
+  # every Worker::RETRY_WAIT seconds. Any exceptions that take place during
+  # the course of the Action will cause the Worker to mark the WorkUnit as
   # having failed. When finished, the Worker's process exits, minimizing the
   # potential for memory leaks.
   class Worker
     # Wait five seconds to retry, after internal communcication errors.
     RETRY_WAIT = 5
     attr_reader :pid, :node, :unit, :status
     # A new Worker customizes itself to its WorkUnit at instantiation.
     def initialize(node, unit)
       @start_time = Time.now
@@ -25,7 +25,7 @@ module CloudCrowd
       @status     = @unit['status']
       @retry_wait = RETRY_WAIT
     end
     # Return output to the central server, marking the WorkUnit done.
     def complete_work_unit(result)
       keep_trying_to "complete work unit" do
@@ -34,7 +34,7 @@ module CloudCrowd
         log "finished #{display_work_unit} in #{data[:time]} seconds"
       end
     end
     # Mark the WorkUnit failed, returning the exception to central.
     def fail_work_unit(exception)
       keep_trying_to "mark work unit as failed" do
@@ -43,9 +43,9 @@ module CloudCrowd
         log "failed #{display_work_unit} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
       end
     end
     # We expect and require internal communication between the central server
-    # and the workers to succeed. If it fails for any reason, log it, and then
+    # and the workers to succeed. If it fails for any reason, log it, and then
     # keep trying the same request.
     def keep_trying_to(title)
       begin
@@ -60,13 +60,13 @@ module CloudCrowd
         retry
       end
     end
     # Loggable details describing what the Worker is up to.
     def display_work_unit
       "unit ##{@unit['id']} (#{@unit['action']}/#{CloudCrowd.display_status(@status)})"
     end
-    # Executes the WorkUnit by running the Action, catching all exceptions as
+    # Executes the WorkUnit by running the Action, catching all exceptions as
     # failures. We capture the thread so that we can kill it from the outside,
     # when exiting.
     def run_work_unit
@@ -82,14 +82,14 @@ module CloudCrowd
           else raise Error::StatusUnspecified, "work units must specify their status"
           end
         end
+        action.cleanup_work_directory if action
         complete_work_unit({'output' => result}.to_json)
       rescue Exception => e
-        fail_work_unit(e)
-      ensure
         action.cleanup_work_directory if action
+        fail_work_unit(e)
       end
     end
     # Run this worker inside of a fork. Attempts to exit cleanly.
     # Wraps run_work_unit to benchmark the execution time, if requested.
     def run
@@ -102,39 +102,39 @@ module CloudCrowd
       end
       Process.exit!
     end
-    # There are some potentially important attributes of the WorkUnit that we'd
-    # like to pass into the Action -- in case it needs to know them. They will
+    # There are some potentially important attributes of the WorkUnit that we'd
+    # like to pass into the Action -- in case it needs to know them. They will
     # always be made available in the options hash.
     def enhanced_unit_options
       @unit['options'].merge({
         'job_id'        => @unit['job_id'],
         'work_unit_id'  => @unit['id'],
-        'attempts'      => @unit['attempts']
+        'attempts'      => @unit['attempts']
       })
     end
     # How long has this worker been running for?
     def time_taken
       Time.now - @start_time
     end
     private
-    # Common parameters to send back to central upon unit completion,
+    # Common parameters to send back to central upon unit completion,
     # regardless of success or failure.
     def base_params
       { :pid  => @pid,
-        :id   => @unit['id'],
+        :id   => @unit['id'],
         :time => time_taken }
     end
     # Log a message to the daemon log. Includes PID for identification.
     def log(message)
       puts "Worker ##{@pid}: #{message}" unless ENV['RACK_ENV'] == 'test'
     end
     # When signaled to exit, make sure that the Worker shuts down without firing
     # the Node's at_exit callbacks.
     def trap_signals
@@ -143,7 +143,7 @@ module CloudCrowd
       Signal.trap('KILL') { Process.exit! }
       Signal.trap('TERM') { Process.exit! }
     end
   end
 end

data/test/config/config.ru CHANGED

@@ -9,8 +9,8 @@
 require 'rubygems'
 require 'cloud-crowd'
-CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
-CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
+CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
+CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
 map '/' do
   run CloudCrowd::Server

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: cloud-crowd
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.1
 platform: ruby
 authors:
 - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-11-06 00:00:00 -05:00
+date: 2009-11-19 00:00:00 -05:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency