RubyGems - cloud-crowd - Versions diffs - 0.3.1 → 0.3.2 - Mend

cloud-crowd 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/actions/word_count.rb +6 -4
data/cloud-crowd.gemspec +2 -2
data/lib/cloud-crowd.rb +3 -3
data/lib/cloud_crowd/models/job.rb +40 -40
metadata +2 -2

data/actions/word_count.rb CHANGED

@@ -1,14 +1,16 @@
-# A parallel WordCount. Depends on the 'wc' utility.
+# A parallel WordCount. Depends on the 'wc' utility.
 class WordCount < CloudCrowd::Action
   # Count the words in a single book.
+  # Pretend that this takes longer than it really does, for demonstration purposes.
   def process
+    sleep 5
     (`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
   end
   # Sum the total word count.
   def merge
     input.inject(0) {|sum, count| sum + count }
   end
 end

data/cloud-crowd.gemspec CHANGED

@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
   s.name      = 'cloud-crowd'
-  s.version   = '0.3.1'         # Keep version in sync with cloud-cloud.rb
-  s.date      = '2009-11-19'
+  s.version   = '0.3.2'         # Keep version in sync with cloud-cloud.rb
+  s.date      = '2010-01-08'
   s.homepage    = "http://wiki.github.com/documentcloud/cloud-crowd"
   s.summary     = "Parallel Processing for the Rest of Us"

data/lib/cloud-crowd.rb CHANGED

@@ -12,13 +12,13 @@ gem 'sinatra'
 gem 'thin'
 # Autoloading for all the pieces which may or may not be needed:
-autoload :ActiveRecord, 'activerecord'
+autoload :ActiveRecord, 'active_record'
 autoload :Benchmark,    'benchmark'
 autoload :Digest,       'digest'
 autoload :ERB,          'erb'
 autoload :FileUtils,    'fileutils'
 autoload :JSON,         'json'
-autoload :RestClient,   'restclient'
+autoload :RestClient,   'rest_client'
 autoload :RightAws,     'right_aws'
 autoload :Sinatra,      'sinatra'
 autoload :Thin,         'thin'
@@ -44,7 +44,7 @@ module CloudCrowd
   autoload :WorkUnit,     'cloud_crowd/models'
   # Keep this version in sync with the gemspec.
-  VERSION        = '0.3.1'
+  VERSION        = '0.3.2'
   # Increment the schema version when there's a backwards incompatible change.
   SCHEMA_VERSION = 3

data/lib/cloud_crowd/models/job.rb CHANGED

@@ -1,25 +1,25 @@
 module CloudCrowd
   # A chunk of work that will be farmed out into many WorkUnits to be processed
   # in parallel by each active CloudCrowd::Worker. Jobs are defined by a list
-  # of inputs (usually public urls to files), an action (the name of a script that
+  # of inputs (usually public urls to files), an action (the name of a script that
   # CloudCrowd knows how to run), and, eventually a corresponding list of output.
   class Job < ActiveRecord::Base
     include ModelStatus
     CLEANUP_GRACE_PERIOD = 7 # That's a week.
     has_many :work_units, :dependent => :destroy
     validates_presence_of :status, :inputs, :action, :options
     before_validation_on_create :set_initial_status
     after_create                :queue_for_workers
     before_destroy              :cleanup_assets
     # Jobs that were last updated more than N days ago.
     named_scope :older_than, lambda {|num| {:conditions => ['updated_at < ?', num.days.ago]} }
     # Create a Job from an incoming JSON request, and add it to the queue.
     def self.create_from_request(h)
       self.create(
@@ -30,7 +30,7 @@ module CloudCrowd
         :callback_url => h['callback_url']
       )
     end
     # Clean up all jobs beyond a certain age.
     def self.cleanup_all(opts = {})
       days = opts[:days] || CLEANUP_GRACE_PERIOD
@@ -38,12 +38,12 @@ module CloudCrowd
         jobs.each {|job| job.destroy }
       end
     end
     # After work units are marked successful, we check to see if all of them have
-    # finished, if so, continue on to the next phase of the job.
+    # finished, if so, continue on to the next phase of the job.
     def check_for_completion
       return unless all_work_units_complete?
-      set_next_status
+      set_next_status
       outs = gather_outputs_from_work_units
       return queue_for_workers([outs]) if merging?
       if complete?
@@ -52,7 +52,7 @@ module CloudCrowd
       end
       self
     end
     # Transition this Job's current status to the appropriate next one, based
     # on the state of the WorkUnits and the nature of the Action.
     def set_next_status
@@ -63,12 +63,12 @@ module CloudCrowd
                                  SUCCEEDED
       )
     end
-    # If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
+    # If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
     # completion. The <tt>callback_url</tt> may include HTTP basic authentication,
     # if you like:
     #   http://user:password@example.com/job_complete
-    # If the callback URL returns a '201 Created' HTTP status code, CloudCrowd
+    # If the callback URL returns a '201 Created' HTTP status code, CloudCrowd
     # will assume that the resource has been successfully created, and the Job
     # will be cleaned up.
     def fire_callback
@@ -76,54 +76,54 @@ module CloudCrowd
         response = RestClient.post(callback_url, {:job => self.to_json})
         Thread.new { self.destroy } if response && response.code == 201
       rescue RestClient::Exception => e
-        puts "Failed to fire job callback. Hmmm, what should happen here?"
+        puts "Job ##{id} failed to fire callback: #{callback_url}"
       end
     end
     # Cleaning up after a job will remove all of its files from S3 or the
-    # filesystem. Destroying a Job will cleanup_assets first. Run this in a
+    # filesystem. Destroying a Job will cleanup_assets first. Run this in a
     # separate thread to get out of the transaction's way.
     # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
     def cleanup_assets
       AssetStore.new.cleanup(self)
     end
-    # Have all of the WorkUnits finished?
+    # Have all of the WorkUnits finished?
     def all_work_units_complete?
       self.work_units.incomplete.count <= 0
     end
     # Have any of the WorkUnits failed?
     def any_work_units_failed?
       self.work_units.failed.count > 0
     end
     # This job is splittable if its Action has a +split+ method.
     def splittable?
       self.action_class.public_instance_methods.map {|m| m.to_sym }.include? :split
     end
     # This job is done splitting if it's finished with its splitting work units.
     def done_splitting?
       splittable? && work_units.splitting.count <= 0
     end
     # This job is mergeable if its Action has a +merge+ method.
     def mergeable?
       self.processing? && self.action_class.public_instance_methods.map {|m| m.to_sym }.include?(:merge)
     end
     # Retrieve the class for this Job's Action.
     def action_class
       @action_class ||= CloudCrowd.actions[self.action]
       return @action_class if @action_class
       raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
     end
     # How complete is this Job?
     # Unfortunately, with the current processing sequence, the percent_complete
     # can pull a fast one and go backwards. This happens when there's a single
-    # large input that takes a long time to split, and when it finally does it
+    # large input that takes a long time to split, and when it finally does it
     # creates a whole swarm of work units. This seems unavoidable.
     def percent_complete
       return 99  if merging?
@@ -132,25 +132,25 @@ module CloudCrowd
       return 100 if unit_count <= 0
       (work_units.complete.count / unit_count.to_f * 100).round
     end
     # How long has this Job taken?
     def time_taken
       return self.time if self.time
       Time.now - self.created_at
     end
     # Generate a stable 8-bit Hex color code, based on the Job's id.
     def color
       @color ||= Digest::MD5.hexdigest(self.id.to_s)[-7...-1]
     end
     # A JSON representation of this job includes the statuses of its component
     # WorkUnits, as well as any completed outputs.
     def to_json(opts={})
       atts = {
         'id'                => id,
         'color'             => color,
-        'status'            => display_status,
+        'status'            => display_status,
         'percent_complete'  => percent_complete,
         'work_units'        => work_units.count,
         'time_taken'        => time_taken
@@ -159,10 +159,10 @@ module CloudCrowd
       atts['email']   = email               if email
       atts.to_json
     end
     private
     # When the WorkUnits are all finished, gather all their outputs together
     # before removing them from the database entirely. Returns their merged JSON.
     def gather_outputs_from_work_units
@@ -171,20 +171,20 @@ module CloudCrowd
       self.work_units.complete.destroy_all
       outs.to_json
     end
-    # When starting a new job, or moving to a new stage, split up the inputs
+    # When starting a new job, or moving to a new stage, split up the inputs
     # into WorkUnits, and queue them. Workers will start picking them up right
     # away.
     def queue_for_workers(input=nil)
       input ||= JSON.parse(self.inputs)
-      input.each {|i| WorkUnit.start(self, action, i, status) }
+      input.each {|i| WorkUnit.start(self, action, i, status) }
       self
     end
     # A Job starts out either splitting or processing, depending on its action.
     def set_initial_status
       self.status = self.splittable? ? SPLITTING : PROCESSING
     end
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: cloud-crowd
 version: !ruby/object:Gem::Version
-  version: 0.3.1
+  version: 0.3.2
 platform: ruby
 authors:
 - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-11-19 00:00:00 -05:00
+date: 2010-01-08 00:00:00 -05:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency