RubyGems - cloud-crowd - Versions diffs - 0.5.2 → 0.6.0 - Mend

cloud-crowd 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/cloud-crowd.gemspec +2 -2
data/lib/cloud-crowd.rb +1 -1
data/lib/cloud_crowd/action.rb +14 -10
data/lib/cloud_crowd/models/work_unit.rb +18 -7
data/lib/cloud_crowd/schema.rb +0 -2
data/test/unit/test_action.rb +3 -3
metadata +7 -7

data/cloud-crowd.gemspec CHANGED

@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
   s.name      = 'cloud-crowd'
-  s.version   = '0.5.2'         # Keep version in sync with cloud-cloud.rb
-  s.date      = '2010-08-03'
+  s.version   = '0.6.0'         # Keep version in sync with cloud-cloud.rb
+  s.date      = '2011-03-04'
   s.homepage    = "http://wiki.github.com/documentcloud/cloud-crowd"
   s.summary     = "Parallel Processing for the Rest of Us"

data/lib/cloud-crowd.rb CHANGED

@@ -45,7 +45,7 @@ module CloudCrowd
   autoload :WorkUnit,     'cloud_crowd/models'
   # Keep this version in sync with the gemspec.
-  VERSION        = '0.5.2'
+  VERSION        = '0.6.0'
   # Increment the schema version when there's a backwards incompatible change.
   SCHEMA_VERSION = 4

data/lib/cloud_crowd/action.rb CHANGED

@@ -29,7 +29,7 @@ module CloudCrowd
     def initialize(status, input, options, store)
       @input, @options, @store = input, options, store
       @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
-      @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
+      @work_directory = File.expand_path(File.join(@store.temp_storage_path, local_storage_prefix))
       FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
       parse_input
       download_input
@@ -59,7 +59,7 @@ module CloudCrowd
     # Takes a local filesystem path, saves the file to S3, and returns the
     # public (or authenticated) url on S3 where the file can be accessed.
     def save(file_path)
-      save_path = File.join(storage_prefix, File.basename(file_path))
+      save_path = File.join(remote_storage_prefix, File.basename(file_path))
       @store.save(file_path, save_path)
     end
@@ -89,14 +89,18 @@ module CloudCrowd
       File.basename(name, ext).gsub('.', '-') + ext
     end
-    # The directory prefix to use for both local and S3 storage.
-    # [action]/job_[job_id]/unit_[work_unit_it]
-    def storage_prefix
-      path_parts = []
-      path_parts << Inflector.underscore(self.class)
-      path_parts << "job_#{@job_id}"
-      path_parts << "unit_#{@work_unit_id}" if @work_unit_id
-      @storage_prefix ||= File.join(path_parts)
+    # The directory prefix to use for remote storage.
+    # [action]/job_[job_id]
+    def remote_storage_prefix
+      @remote_storage_prefix ||= Inflector.underscore(self.class) +
+        "/job_#{@job_id}" + (@work_unit_id ? "/unit_#{@work_unit_id}" : '')
+    end
+    # The directory prefix to use for local storage.
+    # [action]/unit_[work_unit_id]
+    def local_storage_prefix
+      @local_storage_prefix ||= Inflector.underscore(self.class) +
+        (@work_unit_id ? "/unit_#{@work_unit_id}" : '')
     end
     # If we think that the input is JSON, replace it with the parsed form.

data/lib/cloud_crowd/models/work_unit.rb CHANGED

@@ -39,13 +39,22 @@ module CloudCrowd
     # action in question disabled.
     def self.distribute_to_nodes
       reservation = nil
-      filter = {}
       loop do
+        # Find the available nodes, and determine what actions we're capable
+        # of running at the moment.
+        available_nodes   = NodeRecord.available
+        available_actions = available_nodes.map {|node| node.actions }.flatten.uniq
+        filter            = "action in (#{available_actions.map{|a| "'#{a}'"}.join(',')})"
+        # Reserve a handful of available work units.
         WorkUnit.cancel_reservations(reservation) if reservation
         return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT, :conditions => filter)
         work_units = WorkUnit.reserved(reservation)
-        available_nodes = NodeRecord.available
-        while node = available_nodes.shift and unit = work_units.shift do
+        # Round robin through the nodes and units, sending the unit if the node
+        # is able to process it.
+        while (node = available_nodes.shift) && (unit = work_units.shift) do
           if node.actions.include?(unit.action)
             if node.send_work_unit(unit)
               available_nodes.push(node) unless node.busy?
@@ -56,10 +65,12 @@ module CloudCrowd
           end
           work_units.push(unit)
         end
-        if work_units.any? && available_nodes.any?
-          filter = {:action => available_nodes.map {|node| node.actions }.flatten.uniq }
-          next
-        end
+        # If there are both units and nodes left over, try again.
+        next if work_units.any? && available_nodes.any?
+        # If we still have units at this point, or we're fresh out of nodes,
+        # that means we're done.
         return if work_units.any? || available_nodes.empty?
       end
     ensure

data/lib/cloud_crowd/schema.rb CHANGED

@@ -41,8 +41,6 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
     t.datetime "updated_at"
   end
-  # Here be indices. After looking, it seems faster not to have them at all.
-  #
   add_index "jobs", ["status"], :name => "index_jobs_on_status"
   add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
   add_index "work_units", ["worker_pid"], :name => "index_work_units_on_worker_pid"

data/test/unit/test_action.rb CHANGED

@@ -50,7 +50,7 @@ class ActionTest < Test::Unit::TestCase
     end
     should "be able to count the number of words in this file" do
-      assert @action.process == 274
+      assert @action.process == 266
     end
     should "raise an exception when backticks fail" do
@@ -60,8 +60,8 @@ class ActionTest < Test::Unit::TestCase
     should "be able to download a remote file" do
       path = "temp.txt"
-      @action.download('http://example.com', path)
-      assert File.read(path).match(/These domain names are reserved for use in documentation/)
+      @action.download('http://www.w3.org', path)
+      assert File.read(path).match(/standards/i)
       FileUtils.rm path
     end

metadata CHANGED

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: cloud-crowd
 version: !ruby/object:Gem::Version
-  hash: 15
-  prerelease: false
+  hash: 7
+  prerelease:
   segments:
   - 0
-  - 5
-  - 2
-  version: 0.5.2
+  - 6
+  - 0
+  version: 0.6.0
 platform: ruby
 authors:
 - Jeremy Ashkenas
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-08-03 00:00:00 -04:00
+date: 2011-03-04 00:00:00 -05:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -291,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project: cloud-crowd
-rubygems_version: 1.3.7
+rubygems_version: 1.4.2
 signing_key:
 specification_version: 3
 summary: Parallel Processing for the Rest of Us