RubyGems - documentcloud-cloud-crowd - Versions diffs - 0.2.0 → 0.2.1 - Mend

documentcloud-cloud-crowd 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

data/cloud-crowd.gemspec +7 -3
data/config/config.example.yml +10 -3
data/config/database.example.yml +7 -1
data/lib/cloud-crowd.rb +2 -2
data/lib/cloud_crowd/action.rb +16 -4
data/lib/cloud_crowd/exceptions.rb +12 -1
data/lib/cloud_crowd/models/job.rb +6 -5
data/lib/cloud_crowd/models/node_record.rb +23 -12
data/lib/cloud_crowd/models/work_unit.rb +9 -3
data/lib/cloud_crowd/node.rb +79 -10
data/lib/cloud_crowd/schema.rb +5 -4
data/lib/cloud_crowd/worker.rb +43 -34
data/test/acceptance/test_failing_work_units.rb +1 -0
data/test/acceptance/test_node.rb +20 -0
data/test/acceptance/test_server.rb +1 -0
data/test/acceptance/test_word_count.rb +2 -5
data/test/blueprints.rb +13 -4
data/test/config/database.yml +3 -6
data/test/unit/test_node.rb +38 -0
data/test/unit/test_node_record.rb +42 -0
data/test/unit/test_worker.rb +48 -0
metadata +7 -3

data/cloud-crowd.gemspec CHANGED Viewed

@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
   s.name      = 'cloud-crowd'
-  s.version   = '0.2.0'         # Keep version in sync with cloud-cloud.rb
-  s.date      = '2009-09-17'
+  s.version   = '0.2.1'         # Keep version in sync with cloud-cloud.rb
+  s.date      = '2009-09-18'
   s.homepage    = "http://wiki.github.com/documentcloud/cloud-crowd"
   s.summary     = "Parallel Processing for the Rest of Us"
@@ -94,8 +94,9 @@ public/js/excanvas.js
 public/js/flot.js
 public/js/jquery.js
 README
-test/acceptance/test_server.rb
+test/acceptance/test_node.rb
 test/acceptance/test_failing_work_units.rb
+test/acceptance/test_server.rb
 test/acceptance/test_word_count.rb
 test/blueprints.rb
 test/config/config.ru
@@ -105,7 +106,10 @@ test/config/actions/failure_testing.rb
 test/test_helper.rb
 test/unit/test_action.rb
 test/unit/test_configuration.rb
+test/unit/test_node.rb
+test/unit/test_node_record.rb
 test/unit/test_job.rb
+test/unit/test_worker.rb
 test/unit/test_work_unit.rb
 views/operations_center.erb
 )

data/config/config.example.yml CHANGED Viewed

@@ -1,14 +1,21 @@
 # The URL where you're planning on running the central server/queue/database.
 :central_server:      http://localhost:9173
-# Set the maximum number of workers allowed per-node. Workers only run while
-# there's work to be done. It's best to set 'max_workers' below the point where
-# you'd start to swap or peg your CPU (as determined by experiment).
+# The following settings allow you to control the number of workers that can run
+# on a given node, to prevent the node from becoming overloaded. 'max_workers'
+# is a simple cap on the maximum number of workers a node is allowed to run
+# concurrently. 'max_load' is the maximum (one-minute) load average, above which
+# a node will refuse to take new work. 'min_free_memory' is the minimum amount
+# of free RAM (in megabytes) a node is allowed to have, below which no new
+# workers are run. These settings may be used in any combination.
 :max_workers:         5
+# :max_load:            5.0
+# :min_free_memory:     150
 # The storage back-end that you'd like to use for intermediate and final results
 # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
 # be used in development, on single-machine installations, or networked drives.
+# If you *are* developing an action, filesystem is certainly faster and easier.
 :storage:             s3
 # Please provide your AWS credentials for S3 storage of job output.

data/config/database.example.yml CHANGED Viewed

@@ -6,4 +6,10 @@
 :username: root
 :password:
 :socket:   /tmp/mysql.sock
-:database: cloud_crowd
+:database: cloud_crowd
+# If you'd prefer to use an SQLite database instead, the following configuration
+# will do nicely:
+#
+# :adapter:  sqlite3
+# :database: cloud_crowd.db

data/lib/cloud-crowd.rb CHANGED Viewed

@@ -43,10 +43,10 @@ module CloudCrowd
   autoload :WorkUnit,     'cloud_crowd/models'
   # Keep this version in sync with the gemspec.
-  VERSION        = '0.2.0'
+  VERSION        = '0.2.1'
   # Increment the schema version when there's a backwards incompatible change.
-  SCHEMA_VERSION = 2
+  SCHEMA_VERSION = 3
   # Root directory of the CloudCrowd gem.
   ROOT           = File.expand_path(File.dirname(__FILE__) + '/..')

data/lib/cloud_crowd/action.rb CHANGED Viewed

@@ -12,6 +12,9 @@ module CloudCrowd
   #
   # All actions have use of an individual +work_directory+, for scratch files,
   # and spend their duration inside of it, so relative paths work well.
+  #
+  # Note that Actions inherit a backticks (`) method that raises an Exception
+  # if the external command fails.
   class Action
     FILE_URL = /\Afile:\/\//
@@ -33,7 +36,7 @@ module CloudCrowd
     # Each Action subclass must implement a +process+ method, overriding this.
     def process
-      raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.")
+      raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code."
     end
     # Download a file to the specified path.
@@ -66,6 +69,15 @@ module CloudCrowd
       FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
     end
+    # Actions have a backticks command that raises a CommandFailed exception
+    # on failure, so that processing doesn't just blithely continue.
+    def `(command)
+      result    = super(command)
+      exit_code = $?.to_i
+      raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0
+      result
+    end
     private
@@ -77,7 +89,7 @@ module CloudCrowd
     end
     # The directory prefix to use for both local and S3 storage.
-    # [action_name]/job_[job_id]/unit_[work_unit_it]
+    # [action]/job_[job_id]/unit_[work_unit_it]
     def storage_prefix
       path_parts = []
       path_parts << Inflector.underscore(self.class)
@@ -93,9 +105,9 @@ module CloudCrowd
     # If the input is a URL, download the file before beginning processing.
     def download_input
+      input_is_url = !!URI.parse(@input) rescue false
+      return unless input_is_url
       Dir.chdir(@work_directory) do
-        input_is_url = !!URI.parse(@input) rescue false
-        return unless input_is_url
         @input_path = File.join(@work_directory, safe_filename(@input))
         @file_name = File.basename(@input_path, File.extname(@input_path))
         download(@input, @input_path)

data/lib/cloud_crowd/exceptions.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module CloudCrowd
     class ActionNotFound < Error
     end
-    # StorageNotFound is raised when config.yml specifies a storage back end that
+    # StorageNotFound is raised when config.yml specifies a storage back-end that
     # doesn't exist.
     class StorageNotFound < Error
     end
@@ -30,6 +30,17 @@ module CloudCrowd
     class MissingConfiguration < Error
     end
+    # CommandFailed is raised when an action shells out, and the external
+    # command returns a non-zero exit code.
+    class CommandFailed < Error
+      attr_reader :exit_code
+      def initialize(message, exit_code)
+        super(message)
+        @exit_code = exit_code
+      end
+    end
   end
 end

data/lib/cloud_crowd/models/job.rb CHANGED Viewed

@@ -15,8 +15,7 @@ module CloudCrowd
     after_create                :queue_for_workers
     before_destroy              :cleanup_assets
-    # Create a Job from an incoming JSON or XML request, and add it to the queue.
-    # TODO: Think about XML support.
+    # Create a Job from an incoming JSON request, and add it to the queue.
     def self.create_from_request(h)
       self.create(
         :inputs       => h['inputs'].to_json,
@@ -41,7 +40,8 @@ module CloudCrowd
       self
     end
-    # Transition this Job's status to the appropriate next status.
+    # Transition this Job's current status to the appropriate next one, based
+    # on the state of the WorkUnits and the nature of the Action.
     def set_next_status
       update_attribute(:status,
         any_work_units_failed? ? FAILED     :
@@ -66,8 +66,9 @@ module CloudCrowd
       end
     end
-    # Cleaning up after a job will remove all of its files from S3. Destroying
-    # a Job calls cleanup_assets first.
+    # Cleaning up after a job will remove all of its files from S3 or the
+    # filesystem. Destroying a Job will cleanup_assets first. Run this in a
+    # separate thread to get out of the transaction's way.
     # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
     def cleanup_assets
       AssetStore.new.cleanup(self)

data/lib/cloud_crowd/models/node_record.rb CHANGED Viewed

@@ -7,9 +7,9 @@ module CloudCrowd
     has_many :work_units
-    validates_presence_of :host, :ip_address, :port
+    validates_presence_of :host, :ip_address, :port, :enabled_actions
-    before_destroy :clear_work_units
+    after_destroy :redistribute_work_units
     # Available Nodes haven't used up their maxiumum number of workers yet.
     named_scope :available, {
@@ -23,6 +23,7 @@ module CloudCrowd
       attrs = {
         :ip_address       => request.ip,
         :port             => params[:port],
+        :busy             => params[:busy],
         :max_workers      => params[:max_workers],
         :enabled_actions  => params[:enabled_actions]
       }
@@ -32,12 +33,17 @@ module CloudCrowd
     # Dispatch a WorkUnit to this node. Places the node at back at the end of
     # the rotation. If we fail to send the WorkUnit, we consider the node to be
     # down, and remove this record, freeing up all of its checked-out work units.
+    # If the Node responds that it's overloaded, we mark it as busy. Returns
+    # true if the WorkUnit was dispatched successfully.
     def send_work_unit(unit)
       result = node['/work'].post(:work_unit => unit.to_json)
       unit.assign_to(self, JSON.parse(result)['pid'])
-      touch
-    rescue Errno::ECONNREFUSED
-      self.destroy # Couldn't post to node, assume it's gone away.
+      touch && true
+    rescue Errno::ECONNREFUSED # Couldn't post to node, assume it's gone away.
+      destroy && false
+    rescue RestClient::RequestFailed => e
+      raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
+      update_attribute(:busy, true) && false
     end
     # What Actions is this Node able to run?
@@ -45,9 +51,10 @@ module CloudCrowd
       enabled_actions.split(',')
     end
-    # Is this Node too busy for more work? (Determined by number of workers.)
+    # Is this Node too busy for more work? Determined by number of workers, or
+    # the Node's load average, as configured in config.yml.
     def busy?
-      max_workers && work_units.count >= max_workers
+      busy || (max_workers && work_units.count >= max_workers)
     end
     # The URL at which this Node may be reached.
@@ -72,6 +79,11 @@ module CloudCrowd
       work_units.all(:select => 'worker_pid').map(&:worker_pid)
     end
+    # Release all of this Node's WorkUnits for other nodes to take.
+    def release_work_units
+      WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
+    end
     # The JSON representation of a NodeRecord includes its worker_pids.
     def to_json(opts={})
       { 'host'    => host,
@@ -83,11 +95,10 @@ module CloudCrowd
     private
-    # When a Node shuts down, we free up all of the WorkUnits that it had
-    # reserved, and they become available for others to pick up. Redistribute
-    # the WorkUnits in a separate thread to avoid delaying Node shutdown.
-    def clear_work_units
-      WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
+    # When a Node exits, release its WorkUnits and redistribute them to others.
+    # Redistribute in a separate thread to avoid delaying shutdown.
+    def redistribute_work_units
+      release_work_units
       Thread.new { WorkUnit.distribute_to_nodes }
     end

data/lib/cloud_crowd/models/work_unit.rb CHANGED Viewed

@@ -28,7 +28,7 @@ module CloudCrowd
       until work_units.empty? do
         node = available_nodes.shift
         unit = work_units.first
-        break unless node
+        break unless node && unit
         next unless node.actions.include? unit.action
         sent = node.send_work_unit(unit)
         if sent
@@ -51,6 +51,12 @@ module CloudCrowd
       WorkUnit.reserved.update_all('reservation = null')
     end
+    # Cancels all outstanding WorkUnit reservations for all processes. (Useful
+    # in the console for debugging.)
+    def self.cancel_all_reservations
+      WorkUnit.update_all('reservation = null')
+    end
     # Look up a WorkUnit by the worker that's currently processing it. Specified
     # by <tt>pid@host</tt>.
     def self.find_by_worker_name(name)
@@ -74,7 +80,7 @@ module CloudCrowd
           WorkUnit.start(job, action, new_input, PROCESSING)
         end
         self.destroy
-        job.set_next_status if job.done_splitting?
+        job.set_next_status if job && job.done_splitting?
       else
         update_attributes({
           :status         => SUCCEEDED,
@@ -84,7 +90,7 @@ module CloudCrowd
           :output         => result,
           :time           => time_taken
         })
-        job.check_for_completion
+        job && job.check_for_completion
       end
     end

data/lib/cloud_crowd/node.rb CHANGED Viewed

@@ -10,9 +10,24 @@ module CloudCrowd
     # A Node's default port. You only run a single node per machine, so they
     # can all use the same port without any problems.
-    DEFAULT_PORT = 9063
+    DEFAULT_PORT        = 9063
-    attr_reader :server, :asset_store
+    # A list of regex scrapers, which let us extract the one-minute load
+    # average and the amount of free memory on different flavors of UNIX.
+    SCRAPE_UPTIME       = /\d+\.\d+/
+    SCRAPE_LINUX_MEMORY = /MemFree:\s+(\d+) kB/
+    SCRAPE_MAC_MEMORY   = /Pages free:\s+(\d+)./
+    SCRAPE_MAC_PAGE     = /page size of (\d+) bytes/
+    # The interval at which the node monitors the machine's load and memory use
+    # (if configured to do so in config.yml).
+    MONITOR_INTERVAL    = 3
+    # The response sent back when this node is overloaded.
+    OVERLOADED_MESSAGE  = 'Node Overloaded'
+    attr_reader :asset_store, :enabled_actions, :host, :port, :server
     set :root, ROOT
     set :authorization_realm, "CloudCrowd"
@@ -35,14 +50,15 @@ module CloudCrowd
     end
     # Posts a WorkUnit to this Node. Forks a Worker and returns the process id.
+    # Returns a 503 if this Node is overloaded.
     post '/work' do
-      pid = fork { Worker.new(self, JSON.parse(params[:work_unit])) }
+      throw :halt, [503, OVERLOADED_MESSAGE] if @overloaded
+      pid = fork { Worker.new(self, JSON.parse(params[:work_unit])).run }
       Process.detach(pid)
       json :pid => pid
     end
-    # Creating a Node registers with the central server and starts listening for
-    # incoming WorkUnits.
+    # When creating a node, specify the port it should run on.
     def initialize(port=DEFAULT_PORT)
       require 'json'
       @server           = CloudCrowd.central_server
@@ -50,25 +66,35 @@ module CloudCrowd
       @enabled_actions  = CloudCrowd.actions.keys
       @asset_store      = AssetStore.new
       @port             = port || DEFAULT_PORT
+      @overloaded       = false
+      @max_load         = CloudCrowd.config[:max_load]
+      @min_memory       = CloudCrowd.config[:min_free_memory]
+      start unless test?
+    end
+    # Starting up a Node registers with the central server and begins to listen
+    # for incoming WorkUnits.
+    def start
       trap_signals
       start_server
-      check_in
+      monitor_system if @max_load || @min_memory
+      check_in(true)
       @server_thread.join
     end
     # Checking in with the central server informs it of the location and
     # configuration of this Node. If it can't check-in, there's no point in
     # starting.
-    def check_in
+    def check_in(critical=false)
       @server["/node/#{@host}"].put(
         :port             => @port,
+        :busy             => @overloaded,
         :max_workers      => CloudCrowd.config[:max_workers],
         :enabled_actions  => @enabled_actions.join(',')
       )
     rescue Errno::ECONNREFUSED
-      puts "Failed to connect to the central server (#{@server.to_s}), exiting..."
-      raise SystemExit
+      puts "Failed to connect to the central server (#{@server.to_s})."
+      raise SystemExit if critical
     end
     # Before exiting, the Node checks out with the central server, releasing all
@@ -77,6 +103,33 @@ module CloudCrowd
       @server["/node/#{@host}"].delete
     end
+    # Is the node overloaded? If configured, checks if the load average is
+    # greater than 'max_load', or if the available RAM is less than
+    # 'min_free_memory'.
+    def overloaded?
+      (@max_load && load_average > @max_load) ||
+      (@min_memory && free_memory < @min_memory)
+    end
+    # The current one-minute load average.
+    def load_average
+      `uptime`.match(SCRAPE_UPTIME).to_s.to_f
+    end
+    # The current amount of free memory in megabytes.
+    def free_memory
+      case RUBY_PLATFORM
+      when /darwin/
+        stats = `vm_stat`
+        @mac_page_size ||= stats.match(SCRAPE_MAC_PAGE)[1].to_f / 1048576.0
+        stats.match(SCRAPE_MAC_MEMORY)[1].to_f * @mac_page_size
+      when /linux/
+        `cat /proc/meminfo`.match(SCRAPE_LINUX_MEMORY)[1].to_f / 1024.0
+      else
+        raise NotImplementedError, "'min_free_memory' is not yet implemented on your platform"
+      end
+    end
     private
@@ -87,6 +140,20 @@ module CloudCrowd
       end
     end
+    # Launch a monitoring thread that periodically checks the node's load
+    # average and the amount of free memory remaining. If we transition out of
+    # the overloaded state, let central know.
+    def monitor_system
+      @monitor_thread = Thread.new do
+        loop do
+          was_overloaded = @overloaded
+          @overloaded = overloaded?
+          check_in if was_overloaded && !@overloaded
+          sleep MONITOR_INTERVAL
+        end
+      end
+    end
     # Trap exit signals in order to shut down cleanly.
     def trap_signals
       Signal.trap('INT')  { shut_down }
@@ -96,7 +163,9 @@ module CloudCrowd
     # At shut down, de-register with the central server before exiting.
     def shut_down
+      @monitor_thread.kill if @monitor_thread
       check_out
+      @server_thread.kill
       Process.exit
     end

data/lib/cloud_crowd/schema.rb CHANGED Viewed

@@ -15,10 +15,11 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
   end
   create_table "node_records", :force => true do |t|
-    t.string   "host",                            :null => false
-    t.string   "ip_address",                      :null => false
-    t.integer  "port",                            :null => false
-    t.string   "enabled_actions", :default => '', :null => false
+    t.string   "host",                                :null => false
+    t.string   "ip_address",                          :null => false
+    t.integer  "port",                                :null => false
+    t.string   "enabled_actions", :default => '',     :null => false
+    t.boolean  "busy",            :default => false,  :null => false
     t.integer  "max_workers"
     t.datetime "created_at"
     t.datetime "updated_at"

data/lib/cloud_crowd/worker.rb CHANGED Viewed

@@ -14,15 +14,16 @@ module CloudCrowd
     # Wait five seconds to retry, after internal communcication errors.
     RETRY_WAIT = 5
-    attr_reader :action
+    attr_reader :pid, :node, :unit, :status
-    # A new Worker begins processing its WorkUnit straight off.
-    def initialize(node, work_unit)
-      @pid  = $$
-      @node = node
-      trap_signals
-      setup_work_unit(work_unit)
-      run
+    # A new Worker customizes itself to its WorkUnit at instantiation.
+    def initialize(node, unit)
+      @start_time = Time.now
+      @pid        = $$
+      @node       = node
+      @unit       = unit
+      @status     = @unit['status']
+      @retry_wait = RETRY_WAIT
     end
     # Return output to the central server, marking the WorkUnit done.
@@ -49,18 +50,20 @@ module CloudCrowd
     def keep_trying_to(title)
       begin
         yield
+      rescue RestClient::ResourceNotFound => e
+        log "work unit ##{@unit['id']} doesn't exist. discarding..."
       rescue Exception => e
-        log "failed to #{title} -- retry in #{RETRY_WAIT} seconds"
+        log "failed to #{title} -- retry in #{@retry_wait} seconds"
         log e.message
         log e.backtrace
-        sleep RETRY_WAIT
+        sleep @retry_wait
         retry
       end
     end
     # Loggable details describing what the Worker is up to.
     def display_work_unit
-      "unit ##{@options['work_unit_id']} (#{@action_name}/#{CloudCrowd.display_status(@status)})"
+      "unit ##{@unit['id']} (#{@unit['action']}/#{CloudCrowd.display_status(@status)})"
     end
     # Executes the WorkUnit by running the Action, catching all exceptions as
@@ -70,12 +73,13 @@ module CloudCrowd
       @worker_thread = Thread.new do
         begin
           result = nil
-          @action = CloudCrowd.actions[@action_name].new(@status, @input, @options, @node.asset_store)
-          Dir.chdir(@action.work_directory) do
+          action_class = CloudCrowd.actions[@unit['action']]
+          action = action_class.new(@status, @unit['input'], enhanced_unit_options, @node.asset_store)
+          Dir.chdir(action.work_directory) do
             result = case @status
-            when PROCESSING then @action.process
-            when SPLITTING  then @action.split
-            when MERGING    then @action.merge
+            when PROCESSING then action.process
+            when SPLITTING  then action.split
+            when MERGING    then action.merge
             else raise Error::StatusUnspecified, "work units must specify their status"
             end
           end
@@ -83,7 +87,7 @@ module CloudCrowd
         rescue Exception => e
           fail_work_unit(e)
         ensure
-          @action.cleanup_work_directory
+          action.cleanup_work_directory if action
         end
       end
       @worker_thread.join
@@ -91,9 +95,26 @@ module CloudCrowd
     # Wraps run_work_unit to benchmark the execution time, if requested.
     def run
-      return run_work_unit unless @options['benchmark']
-      status = CloudCrowd.display_status(@status)
-      log("ran #{@action_name}/#{status} in " + Benchmark.measure { run_work_unit }.to_s)
+      trap_signals
+      log "starting #{display_work_unit}"
+      return run_work_unit unless @unit['options']['benchmark']
+      log("ran #{display_work_unit} in " + Benchmark.measure { run_work_unit }.to_s)
+    end
+    # There are some potentially important attributes of the WorkUnit that we'd
+    # like to pass into the Action -- in case it needs to know them. They will
+    # always be made available in the options hash.
+    def enhanced_unit_options
+      @unit['options'].merge({
+        'job_id'        => @unit['job_id'],
+        'work_unit_id'  => @unit['id'],
+        'attempts'      => @unit['attempts']
+      })
+    end
+    # How long has this worker been running for?
+    def time_taken
+      Time.now - @start_time
     end
@@ -103,20 +124,8 @@ module CloudCrowd
     # regardless of success or failure.
     def base_params
       { :pid  => @pid,
-        :id   => @options['work_unit_id'],
-        :time => Time.now - @start_time }
-    end
-    # Extract the Worker's instance variables from a WorkUnit's JSON.
-    def setup_work_unit(unit)
-      return false unless unit
-      @start_time = Time.now
-      @action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
-      @options['job_id'] = unit['job_id']
-      @options['work_unit_id'] = unit['id']
-      @options['attempts'] ||= unit['attempts']
-      log "fetched #{display_work_unit}"
-      return true
+        :id   => @unit['id'],
+        :time => time_taken }
     end
     # Log a message to the daemon log. Includes PID for identification.

data/test/acceptance/test_failing_work_units.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require 'test_helper'
 class FailingWorkUnitsTest < Test::Unit::TestCase
   should "retry work units when they fail" do
+    WorkUnit.expects(:distribute_to_nodes).returns(true)
     browser = Rack::Test::Session.new(Rack::MockSession.new(CloudCrowd::Server))
     browser.post '/jobs', :job => {

data/test/acceptance/test_node.rb ADDED Viewed

@@ -0,0 +1,20 @@
+require 'test_helper'
+class NodeAcceptanceTest < Test::Unit::TestCase
+  include Rack::Test::Methods
+  def app
+    CloudCrowd::Node
+  end
+  context "The CloudCrowd::Node (Sinatra)" do
+    should "have a heartbeat" do
+      get '/heartbeat'
+      assert last_response.body == 'buh-bump'
+    end
+  end
+end

data/test/acceptance/test_server.rb CHANGED Viewed

@@ -46,6 +46,7 @@ class ServerTest < Test::Unit::TestCase
     end
     should "be able to create a job" do
+      WorkUnit.expects(:distribute_to_nodes).returns(true)
       post('/jobs', :job => '{"action":"graphics_magick","inputs":["http://www.google.com/"]}')
       assert last_response.ok?
       job_info = JSON.parse(last_response.body)

data/test/acceptance/test_word_count.rb CHANGED Viewed

@@ -5,16 +5,13 @@ class WordCountTest < Test::Unit::TestCase
   context "the word_count action" do
     setup do
+      WorkUnit.expects(:distribute_to_nodes).returns(true)
       @asset_store = AssetStore.new
       @browser = Rack::Test::Session.new(Rack::MockSession.new(CloudCrowd::Server))
       @browser.put('/worker', :name => 'test_worker', :thread_status => 'sleeping')
       post_job_to_count_words_in_this_file
       @job_id = JSON.parse(@browser.last_response.body)['id']
     end
-    teardown do
-      CloudCrowd::Job.destroy_all
-    end
     should "be able to create a word_count job" do
       assert @browser.last_response.ok?
@@ -26,7 +23,7 @@ class WordCountTest < Test::Unit::TestCase
     should "be able to perform the processing stage of a word_count" do
       action = CloudCrowd.actions['word_count'].new(1, "file://#{File.expand_path(__FILE__)}", {}, @asset_store)
       count = action.process
-      assert count == 104
+      assert count == 101
     end
   end

data/test/blueprints.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 Sham.url        { Faker::Internet.domain_name + "/" + Faker::Internet.domain_word + ".jpg" }
+Sham.host       { Faker::Internet.domain_name + '.local' }
 CloudCrowd::Job.blueprint do
   status  { CloudCrowd::PROCESSING }
@@ -8,9 +9,17 @@ CloudCrowd::Job.blueprint do
   email   { 'noone@example.com' }
 end
+CloudCrowd::NodeRecord.blueprint do
+  host
+  ip_address      { '127.0.0.1' }
+  port            { 6093 }
+  enabled_actions { 'graphics_magick,word_count' }
+  max_workers     { 3 }
+end
 CloudCrowd::WorkUnit.blueprint do
-  job               { CloudCrowd::Job.make }
-  status            { CloudCrowd::PROCESSING }
-  input             { Sham.url }
-  action            { 'graphics_magick' }
+  job     { CloudCrowd::Job.make }
+  status  { CloudCrowd::PROCESSING }
+  input   { '{"key":"value"}' }
+  action  { 'graphics_magick' }
 end

data/test/config/database.yml CHANGED Viewed

@@ -1,6 +1,3 @@
-:adapter:  mysql
-:encoding: utf8
-:username: root
-:password:
-:socket:   /tmp/mysql.sock
-:database: cloud_crowd_test
+:adapter:   sqlite3
+:database:  test/cloud_crowd_test.db
+:timeout:   5000

data/test/unit/test_node.rb ADDED Viewed

@@ -0,0 +1,38 @@
+require 'test_helper'
+class NodeUnitTest < Test::Unit::TestCase
+  context "A Node" do
+    setup do
+      @node = Node.new(11011).instance_variable_get(:@app)
+    end
+    should "instantiate correctly" do
+      assert @node.server.to_s == "http://localhost:9173"
+      assert @node.port == 11011
+      assert @node.host == Socket.gethostname
+      assert @node.enabled_actions.length > 2
+      assert @node.asset_store.is_a? AssetStore::FilesystemStore
+    end
+    should "trap signals and launch a server at start" do
+      Signal.expects(:trap).times(3)
+      Thin::Server.expects(:start)
+      @node.expects(:check_in)
+      @node.start
+    end
+    should "be able to determine if the node is overloaded" do
+      assert !@node.overloaded?
+      @node.instance_variable_set :@max_load, 0.01
+      assert @node.overloaded?
+      @node.instance_variable_set :@max_load, nil
+      assert !@node.overloaded?
+      @node.instance_variable_set :@min_memory, 8000
+      assert @node.overloaded?
+    end
+  end
+end

data/test/unit/test_node_record.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require 'test_helper'
+class NodeRecordTest < Test::Unit::TestCase
+  context "A NodeRecord" do
+    setup do
+      @node = CloudCrowd::NodeRecord.make
+    end
+    subject { @node }
+    should_have_many :work_units
+    should_validate_presence_of :host, :ip_address, :port, :enabled_actions
+    should "be available" do
+      assert NodeRecord.available.map(&:id).include? @node.id
+    end
+    should "know its enabled actions" do
+      assert @node.actions.include? 'graphics_magick'
+      assert @node.actions.include? 'word_count'
+    end
+    should "know if the node is busy" do
+      assert !@node.busy?
+      assert @node.display_status == 'available'
+      (@node.max_workers + 1).times { WorkUnit.make(:node_record => @node) }
+      assert @node.busy?
+      assert @node.display_status == 'busy'
+      @node.release_work_units
+      assert !@node.busy?
+    end
+    should "be reachable at a URL" do
+      assert !!URI.parse(@node.url)
+    end
+  end
+end

data/test/unit/test_worker.rb ADDED Viewed

@@ -0,0 +1,48 @@
+require 'test_helper'
+class WorkerTest < Test::Unit::TestCase
+  context "A CloudCrowd::Worker" do
+    setup do
+      @node = Node.new.instance_variable_get(:@app)
+      @unit = WorkUnit.make
+      @worker = Worker.new(@node, JSON.parse(@unit.to_json))
+    end
+    should "instantiate correctly" do
+      assert @worker.pid == $$
+      assert @worker.unit['id'] == @unit.id
+      assert @worker.status == @unit.status
+      assert @worker.node == @node
+      assert @worker.time_taken > 0
+    end
+    should "be able to retry operations that must succeed" do
+      @worker.instance_variable_set :@retry_wait, 0.01
+      @worker.expects(:log).at_least(3)
+      tries = 0
+      @worker.keep_trying_to("do something critical") do
+        tries += 1;
+        raise 'hell' unless tries > 3
+        assert "made it through"
+      end
+    end
+    should "be able to run an action and try to complete it" do
+      GraphicsMagick.any_instance.expects(:process).returns('the answer')
+      GraphicsMagick.any_instance.expects(:cleanup_work_directory)
+      @worker.expects(:complete_work_unit).with({'output' => 'the answer'}.to_json)
+      @worker.run_work_unit
+    end
+    should "enchance the options that an action receives with extra info" do
+      opts = @worker.enhanced_unit_options
+      assert opts['work_unit_id'] == @unit.id
+      assert opts['job_id'] == @unit.job.id
+      assert opts['attempts'] == @unit.attempts
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: documentcloud-cloud-crowd
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.1
 platform: ruby
 authors:
 - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-09-17 00:00:00 -07:00
+date: 2009-09-18 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -182,8 +182,9 @@ files:
 - public/js/flot.js
 - public/js/jquery.js
 - README
-- test/acceptance/test_server.rb
+- test/acceptance/test_node.rb
 - test/acceptance/test_failing_work_units.rb
+- test/acceptance/test_server.rb
 - test/acceptance/test_word_count.rb
 - test/blueprints.rb
 - test/config/config.ru
@@ -193,7 +194,10 @@ files:
 - test/test_helper.rb
 - test/unit/test_action.rb
 - test/unit/test_configuration.rb
+- test/unit/test_node.rb
+- test/unit/test_node_record.rb
 - test/unit/test_job.rb
+- test/unit/test_worker.rb
 - test/unit/test_work_unit.rb
 - views/operations_center.erb
 has_rdoc: true