documentcloud-cloud-crowd 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/cloud-crowd.gemspec +7 -3
 - data/config/config.example.yml +10 -3
 - data/config/database.example.yml +7 -1
 - data/lib/cloud-crowd.rb +2 -2
 - data/lib/cloud_crowd/action.rb +16 -4
 - data/lib/cloud_crowd/exceptions.rb +12 -1
 - data/lib/cloud_crowd/models/job.rb +6 -5
 - data/lib/cloud_crowd/models/node_record.rb +23 -12
 - data/lib/cloud_crowd/models/work_unit.rb +9 -3
 - data/lib/cloud_crowd/node.rb +79 -10
 - data/lib/cloud_crowd/schema.rb +5 -4
 - data/lib/cloud_crowd/worker.rb +43 -34
 - data/test/acceptance/test_failing_work_units.rb +1 -0
 - data/test/acceptance/test_node.rb +20 -0
 - data/test/acceptance/test_server.rb +1 -0
 - data/test/acceptance/test_word_count.rb +2 -5
 - data/test/blueprints.rb +13 -4
 - data/test/config/database.yml +3 -6
 - data/test/unit/test_node.rb +38 -0
 - data/test/unit/test_node_record.rb +42 -0
 - data/test/unit/test_worker.rb +48 -0
 - metadata +7 -3
 
    
        data/cloud-crowd.gemspec
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            Gem::Specification.new do |s|
         
     | 
| 
       2 
2 
     | 
    
         
             
              s.name      = 'cloud-crowd'
         
     | 
| 
       3 
     | 
    
         
            -
              s.version   = '0.2. 
     | 
| 
       4 
     | 
    
         
            -
              s.date      = '2009-09- 
     | 
| 
      
 3 
     | 
    
         
            +
              s.version   = '0.2.1'         # Keep version in sync with cloud-cloud.rb
         
     | 
| 
      
 4 
     | 
    
         
            +
              s.date      = '2009-09-18'
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
              s.homepage    = "http://wiki.github.com/documentcloud/cloud-crowd"
         
     | 
| 
       7 
7 
     | 
    
         
             
              s.summary     = "Parallel Processing for the Rest of Us"
         
     | 
| 
         @@ -94,8 +94,9 @@ public/js/excanvas.js 
     | 
|
| 
       94 
94 
     | 
    
         
             
            public/js/flot.js
         
     | 
| 
       95 
95 
     | 
    
         
             
            public/js/jquery.js
         
     | 
| 
       96 
96 
     | 
    
         
             
            README
         
     | 
| 
       97 
     | 
    
         
            -
            test/acceptance/ 
     | 
| 
      
 97 
     | 
    
         
            +
            test/acceptance/test_node.rb
         
     | 
| 
       98 
98 
     | 
    
         
             
            test/acceptance/test_failing_work_units.rb
         
     | 
| 
      
 99 
     | 
    
         
            +
            test/acceptance/test_server.rb
         
     | 
| 
       99 
100 
     | 
    
         
             
            test/acceptance/test_word_count.rb
         
     | 
| 
       100 
101 
     | 
    
         
             
            test/blueprints.rb
         
     | 
| 
       101 
102 
     | 
    
         
             
            test/config/config.ru
         
     | 
| 
         @@ -105,7 +106,10 @@ test/config/actions/failure_testing.rb 
     | 
|
| 
       105 
106 
     | 
    
         
             
            test/test_helper.rb
         
     | 
| 
       106 
107 
     | 
    
         
             
            test/unit/test_action.rb
         
     | 
| 
       107 
108 
     | 
    
         
             
            test/unit/test_configuration.rb
         
     | 
| 
      
 109 
     | 
    
         
            +
            test/unit/test_node.rb
         
     | 
| 
      
 110 
     | 
    
         
            +
            test/unit/test_node_record.rb
         
     | 
| 
       108 
111 
     | 
    
         
             
            test/unit/test_job.rb
         
     | 
| 
      
 112 
     | 
    
         
            +
            test/unit/test_worker.rb
         
     | 
| 
       109 
113 
     | 
    
         
             
            test/unit/test_work_unit.rb
         
     | 
| 
       110 
114 
     | 
    
         
             
            views/operations_center.erb
         
     | 
| 
       111 
115 
     | 
    
         
             
            )
         
     | 
    
        data/config/config.example.yml
    CHANGED
    
    | 
         @@ -1,14 +1,21 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # The URL where you're planning on running the central server/queue/database.
         
     | 
| 
       2 
2 
     | 
    
         
             
            :central_server:      http://localhost:9173
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
       4 
     | 
    
         
            -
            #  
     | 
| 
       5 
     | 
    
         
            -
            #  
     | 
| 
       6 
     | 
    
         
            -
            #  
     | 
| 
      
 4 
     | 
    
         
            +
            # The following settings allow you to control the number of workers that can run
         
     | 
| 
      
 5 
     | 
    
         
            +
            # on a given node, to prevent the node from becoming overloaded. 'max_workers' 
         
     | 
| 
      
 6 
     | 
    
         
            +
            # is a simple cap on the maximum number of workers a node is allowed to run
         
     | 
| 
      
 7 
     | 
    
         
            +
            # concurrently. 'max_load' is the maximum (one-minute) load average, above which
         
     | 
| 
      
 8 
     | 
    
         
            +
            # a node will refuse to take new work. 'min_free_memory' is the minimum amount
         
     | 
| 
      
 9 
     | 
    
         
            +
            # of free RAM (in megabytes) a node is allowed to have, below which no new 
         
     | 
| 
      
 10 
     | 
    
         
            +
            # workers are run. These settings may be used in any combination.
         
     | 
| 
       7 
11 
     | 
    
         
             
            :max_workers:         5
         
     | 
| 
      
 12 
     | 
    
         
            +
            # :max_load:            5.0
         
     | 
| 
      
 13 
     | 
    
         
            +
            # :min_free_memory:     150
         
     | 
| 
       8 
14 
     | 
    
         | 
| 
       9 
15 
     | 
    
         
             
            # The storage back-end that you'd like to use for intermediate and final results
         
     | 
| 
       10 
16 
     | 
    
         
             
            # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
         
     | 
| 
       11 
17 
     | 
    
         
             
            # be used in development, on single-machine installations, or networked drives.
         
     | 
| 
      
 18 
     | 
    
         
            +
            # If you *are* developing an action, filesystem is certainly faster and easier.
         
     | 
| 
       12 
19 
     | 
    
         
             
            :storage:             s3
         
     | 
| 
       13 
20 
     | 
    
         | 
| 
       14 
21 
     | 
    
         
             
            # Please provide your AWS credentials for S3 storage of job output.
         
     | 
    
        data/config/database.example.yml
    CHANGED
    
    | 
         @@ -6,4 +6,10 @@ 
     | 
|
| 
       6 
6 
     | 
    
         
             
            :username: root
         
     | 
| 
       7 
7 
     | 
    
         
             
            :password:
         
     | 
| 
       8 
8 
     | 
    
         
             
            :socket:   /tmp/mysql.sock
         
     | 
| 
       9 
     | 
    
         
            -
            :database: cloud_crowd
         
     | 
| 
      
 9 
     | 
    
         
            +
            :database: cloud_crowd
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            # If you'd prefer to use an SQLite database instead, the following configuration
         
     | 
| 
      
 12 
     | 
    
         
            +
            # will do nicely:
         
     | 
| 
      
 13 
     | 
    
         
            +
            #
         
     | 
| 
      
 14 
     | 
    
         
            +
            # :adapter:  sqlite3
         
     | 
| 
      
 15 
     | 
    
         
            +
            # :database: cloud_crowd.db
         
     | 
    
        data/lib/cloud-crowd.rb
    CHANGED
    
    | 
         @@ -43,10 +43,10 @@ module CloudCrowd 
     | 
|
| 
       43 
43 
     | 
    
         
             
              autoload :WorkUnit,     'cloud_crowd/models'
         
     | 
| 
       44 
44 
     | 
    
         | 
| 
       45 
45 
     | 
    
         
             
              # Keep this version in sync with the gemspec.
         
     | 
| 
       46 
     | 
    
         
            -
              VERSION        = '0.2. 
     | 
| 
      
 46 
     | 
    
         
            +
              VERSION        = '0.2.1'
         
     | 
| 
       47 
47 
     | 
    
         | 
| 
       48 
48 
     | 
    
         
             
              # Increment the schema version when there's a backwards incompatible change.
         
     | 
| 
       49 
     | 
    
         
            -
              SCHEMA_VERSION =  
     | 
| 
      
 49 
     | 
    
         
            +
              SCHEMA_VERSION = 3
         
     | 
| 
       50 
50 
     | 
    
         | 
| 
       51 
51 
     | 
    
         
             
              # Root directory of the CloudCrowd gem.
         
     | 
| 
       52 
52 
     | 
    
         
             
              ROOT           = File.expand_path(File.dirname(__FILE__) + '/..')
         
     | 
    
        data/lib/cloud_crowd/action.rb
    CHANGED
    
    | 
         @@ -12,6 +12,9 @@ module CloudCrowd 
     | 
|
| 
       12 
12 
     | 
    
         
             
              #
         
     | 
| 
       13 
13 
     | 
    
         
             
              # All actions have use of an individual +work_directory+, for scratch files,
         
     | 
| 
       14 
14 
     | 
    
         
             
              # and spend their duration inside of it, so relative paths work well.
         
     | 
| 
      
 15 
     | 
    
         
            +
              #
         
     | 
| 
      
 16 
     | 
    
         
            +
              # Note that Actions inherit a backticks (`) method that raises an Exception
         
     | 
| 
      
 17 
     | 
    
         
            +
              # if the external command fails.
         
     | 
| 
       15 
18 
     | 
    
         
             
              class Action
         
     | 
| 
       16 
19 
     | 
    
         | 
| 
       17 
20 
     | 
    
         
             
                FILE_URL = /\Afile:\/\//
         
     | 
| 
         @@ -33,7 +36,7 @@ module CloudCrowd 
     | 
|
| 
       33 
36 
     | 
    
         | 
| 
       34 
37 
     | 
    
         
             
                # Each Action subclass must implement a +process+ method, overriding this.
         
     | 
| 
       35 
38 
     | 
    
         
             
                def process
         
     | 
| 
       36 
     | 
    
         
            -
                  raise NotImplementedError 
     | 
| 
      
 39 
     | 
    
         
            +
                  raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code."
         
     | 
| 
       37 
40 
     | 
    
         
             
                end
         
     | 
| 
       38 
41 
     | 
    
         | 
| 
       39 
42 
     | 
    
         
             
                # Download a file to the specified path.
         
     | 
| 
         @@ -66,6 +69,15 @@ module CloudCrowd 
     | 
|
| 
       66 
69 
     | 
    
         
             
                  FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
         
     | 
| 
       67 
70 
     | 
    
         
             
                end
         
     | 
| 
       68 
71 
     | 
    
         | 
| 
      
 72 
     | 
    
         
            +
                # Actions have a backticks command that raises a CommandFailed exception 
         
     | 
| 
      
 73 
     | 
    
         
            +
                # on failure, so that processing doesn't just blithely continue.
         
     | 
| 
      
 74 
     | 
    
         
            +
                def `(command)
         
     | 
| 
      
 75 
     | 
    
         
            +
                  result    = super(command)
         
     | 
| 
      
 76 
     | 
    
         
            +
                  exit_code = $?.to_i
         
     | 
| 
      
 77 
     | 
    
         
            +
                  raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0
         
     | 
| 
      
 78 
     | 
    
         
            +
                  result
         
     | 
| 
      
 79 
     | 
    
         
            +
                end
         
     | 
| 
      
 80 
     | 
    
         
            +
                
         
     | 
| 
       69 
81 
     | 
    
         | 
| 
       70 
82 
     | 
    
         
             
                private
         
     | 
| 
       71 
83 
     | 
    
         | 
| 
         @@ -77,7 +89,7 @@ module CloudCrowd 
     | 
|
| 
       77 
89 
     | 
    
         
             
                end
         
     | 
| 
       78 
90 
     | 
    
         | 
| 
       79 
91 
     | 
    
         
             
                # The directory prefix to use for both local and S3 storage.
         
     | 
| 
       80 
     | 
    
         
            -
                # [ 
     | 
| 
      
 92 
     | 
    
         
            +
                # [action]/job_[job_id]/unit_[work_unit_it]
         
     | 
| 
       81 
93 
     | 
    
         
             
                def storage_prefix
         
     | 
| 
       82 
94 
     | 
    
         
             
                  path_parts = []
         
     | 
| 
       83 
95 
     | 
    
         
             
                  path_parts << Inflector.underscore(self.class)
         
     | 
| 
         @@ -93,9 +105,9 @@ module CloudCrowd 
     | 
|
| 
       93 
105 
     | 
    
         | 
| 
       94 
106 
     | 
    
         
             
                # If the input is a URL, download the file before beginning processing.
         
     | 
| 
       95 
107 
     | 
    
         
             
                def download_input
         
     | 
| 
      
 108 
     | 
    
         
            +
                  input_is_url = !!URI.parse(@input) rescue false
         
     | 
| 
      
 109 
     | 
    
         
            +
                  return unless input_is_url
         
     | 
| 
       96 
110 
     | 
    
         
             
                  Dir.chdir(@work_directory) do
         
     | 
| 
       97 
     | 
    
         
            -
                    input_is_url = !!URI.parse(@input) rescue false
         
     | 
| 
       98 
     | 
    
         
            -
                    return unless input_is_url
         
     | 
| 
       99 
111 
     | 
    
         
             
                    @input_path = File.join(@work_directory, safe_filename(@input))
         
     | 
| 
       100 
112 
     | 
    
         
             
                    @file_name = File.basename(@input_path, File.extname(@input_path))
         
     | 
| 
       101 
113 
     | 
    
         
             
                    download(@input, @input_path)
         
     | 
| 
         @@ -11,7 +11,7 @@ module CloudCrowd 
     | 
|
| 
       11 
11 
     | 
    
         
             
                class ActionNotFound < Error
         
     | 
| 
       12 
12 
     | 
    
         
             
                end
         
     | 
| 
       13 
13 
     | 
    
         | 
| 
       14 
     | 
    
         
            -
                # StorageNotFound is raised when config.yml specifies a storage back 
     | 
| 
      
 14 
     | 
    
         
            +
                # StorageNotFound is raised when config.yml specifies a storage back-end that
         
     | 
| 
       15 
15 
     | 
    
         
             
                # doesn't exist.
         
     | 
| 
       16 
16 
     | 
    
         
             
                class StorageNotFound < Error
         
     | 
| 
       17 
17 
     | 
    
         
             
                end
         
     | 
| 
         @@ -30,6 +30,17 @@ module CloudCrowd 
     | 
|
| 
       30 
30 
     | 
    
         
             
                class MissingConfiguration < Error
         
     | 
| 
       31 
31 
     | 
    
         
             
                end
         
     | 
| 
       32 
32 
     | 
    
         | 
| 
      
 33 
     | 
    
         
            +
                # CommandFailed is raised when an action shells out, and the external 
         
     | 
| 
      
 34 
     | 
    
         
            +
                # command returns a non-zero exit code.
         
     | 
| 
      
 35 
     | 
    
         
            +
                class CommandFailed < Error
         
     | 
| 
      
 36 
     | 
    
         
            +
                  attr_reader :exit_code
         
     | 
| 
      
 37 
     | 
    
         
            +
                  
         
     | 
| 
      
 38 
     | 
    
         
            +
                  def initialize(message, exit_code)
         
     | 
| 
      
 39 
     | 
    
         
            +
                    super(message)
         
     | 
| 
      
 40 
     | 
    
         
            +
                    @exit_code = exit_code
         
     | 
| 
      
 41 
     | 
    
         
            +
                  end
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
                
         
     | 
| 
       33 
44 
     | 
    
         
             
              end
         
     | 
| 
       34 
45 
     | 
    
         | 
| 
       35 
46 
     | 
    
         
             
            end
         
     | 
| 
         @@ -15,8 +15,7 @@ module CloudCrowd 
     | 
|
| 
       15 
15 
     | 
    
         
             
                after_create                :queue_for_workers
         
     | 
| 
       16 
16 
     | 
    
         
             
                before_destroy              :cleanup_assets
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
                # Create a Job from an incoming JSON  
     | 
| 
       19 
     | 
    
         
            -
                # TODO: Think about XML support.
         
     | 
| 
      
 18 
     | 
    
         
            +
                # Create a Job from an incoming JSON request, and add it to the queue.
         
     | 
| 
       20 
19 
     | 
    
         
             
                def self.create_from_request(h)
         
     | 
| 
       21 
20 
     | 
    
         
             
                  self.create(
         
     | 
| 
       22 
21 
     | 
    
         
             
                    :inputs       => h['inputs'].to_json,
         
     | 
| 
         @@ -41,7 +40,8 @@ module CloudCrowd 
     | 
|
| 
       41 
40 
     | 
    
         
             
                  self
         
     | 
| 
       42 
41 
     | 
    
         
             
                end
         
     | 
| 
       43 
42 
     | 
    
         | 
| 
       44 
     | 
    
         
            -
                # Transition this Job's status to the appropriate next  
     | 
| 
      
 43 
     | 
    
         
            +
                # Transition this Job's current status to the appropriate next one, based
         
     | 
| 
      
 44 
     | 
    
         
            +
                # on the state of the WorkUnits and the nature of the Action.
         
     | 
| 
       45 
45 
     | 
    
         
             
                def set_next_status
         
     | 
| 
       46 
46 
     | 
    
         
             
                  update_attribute(:status,
         
     | 
| 
       47 
47 
     | 
    
         
             
                    any_work_units_failed? ? FAILED     :
         
     | 
| 
         @@ -66,8 +66,9 @@ module CloudCrowd 
     | 
|
| 
       66 
66 
     | 
    
         
             
                  end
         
     | 
| 
       67 
67 
     | 
    
         
             
                end
         
     | 
| 
       68 
68 
     | 
    
         | 
| 
       69 
     | 
    
         
            -
                # Cleaning up after a job will remove all of its files from S3 
     | 
| 
       70 
     | 
    
         
            -
                # a Job  
     | 
| 
      
 69 
     | 
    
         
            +
                # Cleaning up after a job will remove all of its files from S3 or the
         
     | 
| 
      
 70 
     | 
    
         
            +
                # filesystem. Destroying a Job will cleanup_assets first. Run this in a 
         
     | 
| 
      
 71 
     | 
    
         
            +
                # separate thread to get out of the transaction's way.
         
     | 
| 
       71 
72 
     | 
    
         
             
                # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
         
     | 
| 
       72 
73 
     | 
    
         
             
                def cleanup_assets
         
     | 
| 
       73 
74 
     | 
    
         
             
                  AssetStore.new.cleanup(self)
         
     | 
| 
         @@ -7,9 +7,9 @@ module CloudCrowd 
     | 
|
| 
       7 
7 
     | 
    
         | 
| 
       8 
8 
     | 
    
         
             
                has_many :work_units
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
                validates_presence_of :host, :ip_address, :port
         
     | 
| 
      
 10 
     | 
    
         
            +
                validates_presence_of :host, :ip_address, :port, :enabled_actions
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
                 
     | 
| 
      
 12 
     | 
    
         
            +
                after_destroy :redistribute_work_units
         
     | 
| 
       13 
13 
     | 
    
         | 
| 
       14 
14 
     | 
    
         
             
                # Available Nodes haven't used up their maxiumum number of workers yet.
         
     | 
| 
       15 
15 
     | 
    
         
             
                named_scope :available, {
         
     | 
| 
         @@ -23,6 +23,7 @@ module CloudCrowd 
     | 
|
| 
       23 
23 
     | 
    
         
             
                  attrs = {
         
     | 
| 
       24 
24 
     | 
    
         
             
                    :ip_address       => request.ip,
         
     | 
| 
       25 
25 
     | 
    
         
             
                    :port             => params[:port],
         
     | 
| 
      
 26 
     | 
    
         
            +
                    :busy             => params[:busy],
         
     | 
| 
       26 
27 
     | 
    
         
             
                    :max_workers      => params[:max_workers],
         
     | 
| 
       27 
28 
     | 
    
         
             
                    :enabled_actions  => params[:enabled_actions]
         
     | 
| 
       28 
29 
     | 
    
         
             
                  }
         
     | 
| 
         @@ -32,12 +33,17 @@ module CloudCrowd 
     | 
|
| 
       32 
33 
     | 
    
         
             
                # Dispatch a WorkUnit to this node. Places the node at back at the end of
         
     | 
| 
       33 
34 
     | 
    
         
             
                # the rotation. If we fail to send the WorkUnit, we consider the node to be
         
     | 
| 
       34 
35 
     | 
    
         
             
                # down, and remove this record, freeing up all of its checked-out work units.
         
     | 
| 
      
 36 
     | 
    
         
            +
                # If the Node responds that it's overloaded, we mark it as busy. Returns 
         
     | 
| 
      
 37 
     | 
    
         
            +
                # true if the WorkUnit was dispatched successfully.
         
     | 
| 
       35 
38 
     | 
    
         
             
                def send_work_unit(unit)
         
     | 
| 
       36 
39 
     | 
    
         
             
                  result = node['/work'].post(:work_unit => unit.to_json)
         
     | 
| 
       37 
40 
     | 
    
         
             
                  unit.assign_to(self, JSON.parse(result)['pid'])
         
     | 
| 
       38 
     | 
    
         
            -
                  touch
         
     | 
| 
       39 
     | 
    
         
            -
                rescue Errno::ECONNREFUSED
         
     | 
| 
       40 
     | 
    
         
            -
                   
     | 
| 
      
 41 
     | 
    
         
            +
                  touch && true
         
     | 
| 
      
 42 
     | 
    
         
            +
                rescue Errno::ECONNREFUSED # Couldn't post to node, assume it's gone away.
         
     | 
| 
      
 43 
     | 
    
         
            +
                  destroy && false
         
     | 
| 
      
 44 
     | 
    
         
            +
                rescue RestClient::RequestFailed => e
         
     | 
| 
      
 45 
     | 
    
         
            +
                  raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
         
     | 
| 
      
 46 
     | 
    
         
            +
                  update_attribute(:busy, true) && false
         
     | 
| 
       41 
47 
     | 
    
         
             
                end
         
     | 
| 
       42 
48 
     | 
    
         | 
| 
       43 
49 
     | 
    
         
             
                # What Actions is this Node able to run?
         
     | 
| 
         @@ -45,9 +51,10 @@ module CloudCrowd 
     | 
|
| 
       45 
51 
     | 
    
         
             
                  enabled_actions.split(',')
         
     | 
| 
       46 
52 
     | 
    
         
             
                end
         
     | 
| 
       47 
53 
     | 
    
         | 
| 
       48 
     | 
    
         
            -
                # Is this Node too busy for more work?  
     | 
| 
      
 54 
     | 
    
         
            +
                # Is this Node too busy for more work? Determined by number of workers, or 
         
     | 
| 
      
 55 
     | 
    
         
            +
                # the Node's load average, as configured in config.yml.
         
     | 
| 
       49 
56 
     | 
    
         
             
                def busy?
         
     | 
| 
       50 
     | 
    
         
            -
                  max_workers && work_units.count >= max_workers
         
     | 
| 
      
 57 
     | 
    
         
            +
                  busy || (max_workers && work_units.count >= max_workers)
         
     | 
| 
       51 
58 
     | 
    
         
             
                end
         
     | 
| 
       52 
59 
     | 
    
         | 
| 
       53 
60 
     | 
    
         
             
                # The URL at which this Node may be reached.
         
     | 
| 
         @@ -72,6 +79,11 @@ module CloudCrowd 
     | 
|
| 
       72 
79 
     | 
    
         
             
                  work_units.all(:select => 'worker_pid').map(&:worker_pid)
         
     | 
| 
       73 
80 
     | 
    
         
             
                end
         
     | 
| 
       74 
81 
     | 
    
         | 
| 
      
 82 
     | 
    
         
            +
                # Release all of this Node's WorkUnits for other nodes to take.
         
     | 
| 
      
 83 
     | 
    
         
            +
                def release_work_units
         
     | 
| 
      
 84 
     | 
    
         
            +
                  WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
         
     | 
| 
      
 85 
     | 
    
         
            +
                end
         
     | 
| 
      
 86 
     | 
    
         
            +
                
         
     | 
| 
       75 
87 
     | 
    
         
             
                # The JSON representation of a NodeRecord includes its worker_pids.
         
     | 
| 
       76 
88 
     | 
    
         
             
                def to_json(opts={})
         
     | 
| 
       77 
89 
     | 
    
         
             
                  { 'host'    => host,
         
     | 
| 
         @@ -83,11 +95,10 @@ module CloudCrowd 
     | 
|
| 
       83 
95 
     | 
    
         | 
| 
       84 
96 
     | 
    
         
             
                private
         
     | 
| 
       85 
97 
     | 
    
         | 
| 
       86 
     | 
    
         
            -
                # When a Node  
     | 
| 
       87 
     | 
    
         
            -
                #  
     | 
| 
       88 
     | 
    
         
            -
                 
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
                  WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
         
     | 
| 
      
 98 
     | 
    
         
            +
                # When a Node exits, release its WorkUnits and redistribute them to others. 
         
     | 
| 
      
 99 
     | 
    
         
            +
                # Redistribute in a separate thread to avoid delaying shutdown.
         
     | 
| 
      
 100 
     | 
    
         
            +
                def redistribute_work_units
         
     | 
| 
      
 101 
     | 
    
         
            +
                  release_work_units
         
     | 
| 
       91 
102 
     | 
    
         
             
                  Thread.new { WorkUnit.distribute_to_nodes }
         
     | 
| 
       92 
103 
     | 
    
         
             
                end
         
     | 
| 
       93 
104 
     | 
    
         | 
| 
         @@ -28,7 +28,7 @@ module CloudCrowd 
     | 
|
| 
       28 
28 
     | 
    
         
             
                  until work_units.empty? do
         
     | 
| 
       29 
29 
     | 
    
         
             
                    node = available_nodes.shift
         
     | 
| 
       30 
30 
     | 
    
         
             
                    unit = work_units.first
         
     | 
| 
       31 
     | 
    
         
            -
                    break unless node
         
     | 
| 
      
 31 
     | 
    
         
            +
                    break unless node && unit
         
     | 
| 
       32 
32 
     | 
    
         
             
                    next unless node.actions.include? unit.action
         
     | 
| 
       33 
33 
     | 
    
         
             
                    sent = node.send_work_unit(unit)
         
     | 
| 
       34 
34 
     | 
    
         
             
                    if sent
         
     | 
| 
         @@ -51,6 +51,12 @@ module CloudCrowd 
     | 
|
| 
       51 
51 
     | 
    
         
             
                  WorkUnit.reserved.update_all('reservation = null')
         
     | 
| 
       52 
52 
     | 
    
         
             
                end
         
     | 
| 
       53 
53 
     | 
    
         | 
| 
      
 54 
     | 
    
         
            +
                # Cancels all outstanding WorkUnit reservations for all processes. (Useful
         
     | 
| 
      
 55 
     | 
    
         
            +
                # in the console for debugging.)
         
     | 
| 
      
 56 
     | 
    
         
            +
                def self.cancel_all_reservations
         
     | 
| 
      
 57 
     | 
    
         
            +
                  WorkUnit.update_all('reservation = null')
         
     | 
| 
      
 58 
     | 
    
         
            +
                end
         
     | 
| 
      
 59 
     | 
    
         
            +
                
         
     | 
| 
       54 
60 
     | 
    
         
             
                # Look up a WorkUnit by the worker that's currently processing it. Specified
         
     | 
| 
       55 
61 
     | 
    
         
             
                # by <tt>pid@host</tt>.
         
     | 
| 
       56 
62 
     | 
    
         
             
                def self.find_by_worker_name(name)
         
     | 
| 
         @@ -74,7 +80,7 @@ module CloudCrowd 
     | 
|
| 
       74 
80 
     | 
    
         
             
                      WorkUnit.start(job, action, new_input, PROCESSING)
         
     | 
| 
       75 
81 
     | 
    
         
             
                    end
         
     | 
| 
       76 
82 
     | 
    
         
             
                    self.destroy
         
     | 
| 
       77 
     | 
    
         
            -
                    job.set_next_status if job.done_splitting?
         
     | 
| 
      
 83 
     | 
    
         
            +
                    job.set_next_status if job && job.done_splitting?
         
     | 
| 
       78 
84 
     | 
    
         
             
                  else
         
     | 
| 
       79 
85 
     | 
    
         
             
                    update_attributes({
         
     | 
| 
       80 
86 
     | 
    
         
             
                      :status         => SUCCEEDED,
         
     | 
| 
         @@ -84,7 +90,7 @@ module CloudCrowd 
     | 
|
| 
       84 
90 
     | 
    
         
             
                      :output         => result,
         
     | 
| 
       85 
91 
     | 
    
         
             
                      :time           => time_taken
         
     | 
| 
       86 
92 
     | 
    
         
             
                    })
         
     | 
| 
       87 
     | 
    
         
            -
                    job.check_for_completion
         
     | 
| 
      
 93 
     | 
    
         
            +
                    job && job.check_for_completion
         
     | 
| 
       88 
94 
     | 
    
         
             
                  end
         
     | 
| 
       89 
95 
     | 
    
         
             
                end
         
     | 
| 
       90 
96 
     | 
    
         | 
    
        data/lib/cloud_crowd/node.rb
    CHANGED
    
    | 
         @@ -10,9 +10,24 @@ module CloudCrowd 
     | 
|
| 
       10 
10 
     | 
    
         | 
| 
       11 
11 
     | 
    
         
             
                # A Node's default port. You only run a single node per machine, so they
         
     | 
| 
       12 
12 
     | 
    
         
             
                # can all use the same port without any problems.
         
     | 
| 
       13 
     | 
    
         
            -
                DEFAULT_PORT 
     | 
| 
      
 13 
     | 
    
         
            +
                DEFAULT_PORT        = 9063
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
                 
     | 
| 
      
 15 
     | 
    
         
            +
                # A list of regex scrapers, which let us extract the one-minute load 
         
     | 
| 
      
 16 
     | 
    
         
            +
                # average and the amount of free memory on different flavors of UNIX.
         
     | 
| 
      
 17 
     | 
    
         
            +
                
         
     | 
| 
      
 18 
     | 
    
         
            +
                SCRAPE_UPTIME       = /\d+\.\d+/
         
     | 
| 
      
 19 
     | 
    
         
            +
                SCRAPE_LINUX_MEMORY = /MemFree:\s+(\d+) kB/
         
     | 
| 
      
 20 
     | 
    
         
            +
                SCRAPE_MAC_MEMORY   = /Pages free:\s+(\d+)./   
         
     | 
| 
      
 21 
     | 
    
         
            +
                SCRAPE_MAC_PAGE     = /page size of (\d+) bytes/
         
     | 
| 
      
 22 
     | 
    
         
            +
                
         
     | 
| 
      
 23 
     | 
    
         
            +
                # The interval at which the node monitors the machine's load and memory use
         
     | 
| 
      
 24 
     | 
    
         
            +
                # (if configured to do so in config.yml).
         
     | 
| 
      
 25 
     | 
    
         
            +
                MONITOR_INTERVAL    = 3
         
     | 
| 
      
 26 
     | 
    
         
            +
                
         
     | 
| 
      
 27 
     | 
    
         
            +
                # The response sent back when this node is overloaded.
         
     | 
| 
      
 28 
     | 
    
         
            +
                OVERLOADED_MESSAGE  = 'Node Overloaded'
         
     | 
| 
      
 29 
     | 
    
         
            +
                
         
     | 
| 
      
 30 
     | 
    
         
            +
                attr_reader :asset_store, :enabled_actions, :host, :port, :server
         
     | 
| 
       16 
31 
     | 
    
         | 
| 
       17 
32 
     | 
    
         
             
                set :root, ROOT
         
     | 
| 
       18 
33 
     | 
    
         
             
                set :authorization_realm, "CloudCrowd"
         
     | 
| 
         @@ -35,14 +50,15 @@ module CloudCrowd 
     | 
|
| 
       35 
50 
     | 
    
         
             
                end
         
     | 
| 
       36 
51 
     | 
    
         | 
| 
       37 
52 
     | 
    
         
             
                # Posts a WorkUnit to this Node. Forks a Worker and returns the process id.
         
     | 
| 
      
 53 
     | 
    
         
            +
                # Returns a 503 if this Node is overloaded.
         
     | 
| 
       38 
54 
     | 
    
         
             
                post '/work' do
         
     | 
| 
       39 
     | 
    
         
            -
                   
     | 
| 
      
 55 
     | 
    
         
            +
                  throw :halt, [503, OVERLOADED_MESSAGE] if @overloaded
         
     | 
| 
      
 56 
     | 
    
         
            +
                  pid = fork { Worker.new(self, JSON.parse(params[:work_unit])).run }
         
     | 
| 
       40 
57 
     | 
    
         
             
                  Process.detach(pid)
         
     | 
| 
       41 
58 
     | 
    
         
             
                  json :pid => pid
         
     | 
| 
       42 
59 
     | 
    
         
             
                end
         
     | 
| 
       43 
60 
     | 
    
         | 
| 
       44 
     | 
    
         
            -
                #  
     | 
| 
       45 
     | 
    
         
            -
                # incoming WorkUnits.
         
     | 
| 
      
 61 
     | 
    
         
            +
                # When creating a node, specify the port it should run on.
         
     | 
| 
       46 
62 
     | 
    
         
             
                def initialize(port=DEFAULT_PORT)
         
     | 
| 
       47 
63 
     | 
    
         
             
                  require 'json'
         
     | 
| 
       48 
64 
     | 
    
         
             
                  @server           = CloudCrowd.central_server
         
     | 
| 
         @@ -50,25 +66,35 @@ module CloudCrowd 
     | 
|
| 
       50 
66 
     | 
    
         
             
                  @enabled_actions  = CloudCrowd.actions.keys
         
     | 
| 
       51 
67 
     | 
    
         
             
                  @asset_store      = AssetStore.new
         
     | 
| 
       52 
68 
     | 
    
         
             
                  @port             = port || DEFAULT_PORT
         
     | 
| 
       53 
     | 
    
         
            -
                  
         
     | 
| 
      
 69 
     | 
    
         
            +
                  @overloaded       = false
         
     | 
| 
      
 70 
     | 
    
         
            +
                  @max_load         = CloudCrowd.config[:max_load]
         
     | 
| 
      
 71 
     | 
    
         
            +
                  @min_memory       = CloudCrowd.config[:min_free_memory]
         
     | 
| 
      
 72 
     | 
    
         
            +
                  start unless test?
         
     | 
| 
      
 73 
     | 
    
         
            +
                end
         
     | 
| 
      
 74 
     | 
    
         
            +
                
         
     | 
| 
      
 75 
     | 
    
         
            +
                # Starting up a Node registers with the central server and begins to listen
         
     | 
| 
      
 76 
     | 
    
         
            +
                # for incoming WorkUnits.
         
     | 
| 
      
 77 
     | 
    
         
            +
                def start
         
     | 
| 
       54 
78 
     | 
    
         
             
                  trap_signals
         
     | 
| 
       55 
79 
     | 
    
         
             
                  start_server
         
     | 
| 
       56 
     | 
    
         
            -
                   
     | 
| 
      
 80 
     | 
    
         
            +
                  monitor_system if @max_load || @min_memory
         
     | 
| 
      
 81 
     | 
    
         
            +
                  check_in(true)
         
     | 
| 
       57 
82 
     | 
    
         
             
                  @server_thread.join
         
     | 
| 
       58 
83 
     | 
    
         
             
                end
         
     | 
| 
       59 
84 
     | 
    
         | 
| 
       60 
85 
     | 
    
         
             
                # Checking in with the central server informs it of the location and 
         
     | 
| 
       61 
86 
     | 
    
         
             
                # configuration of this Node. If it can't check-in, there's no point in 
         
     | 
| 
       62 
87 
     | 
    
         
             
                # starting.
         
     | 
| 
       63 
     | 
    
         
            -
                def check_in
         
     | 
| 
      
 88 
     | 
    
         
            +
                def check_in(critical=false)
         
     | 
| 
       64 
89 
     | 
    
         
             
                  @server["/node/#{@host}"].put(
         
     | 
| 
       65 
90 
     | 
    
         
             
                    :port             => @port,
         
     | 
| 
      
 91 
     | 
    
         
            +
                    :busy             => @overloaded,
         
     | 
| 
       66 
92 
     | 
    
         
             
                    :max_workers      => CloudCrowd.config[:max_workers],
         
     | 
| 
       67 
93 
     | 
    
         
             
                    :enabled_actions  => @enabled_actions.join(',')
         
     | 
| 
       68 
94 
     | 
    
         
             
                  )
         
     | 
| 
       69 
95 
     | 
    
         
             
                rescue Errno::ECONNREFUSED
         
     | 
| 
       70 
     | 
    
         
            -
                  puts "Failed to connect to the central server (#{@server.to_s}) 
     | 
| 
       71 
     | 
    
         
            -
                  raise SystemExit
         
     | 
| 
      
 96 
     | 
    
         
            +
                  puts "Failed to connect to the central server (#{@server.to_s})."
         
     | 
| 
      
 97 
     | 
    
         
            +
                  raise SystemExit if critical
         
     | 
| 
       72 
98 
     | 
    
         
             
                end
         
     | 
| 
       73 
99 
     | 
    
         | 
| 
       74 
100 
     | 
    
         
             
                # Before exiting, the Node checks out with the central server, releasing all
         
     | 
| 
         @@ -77,6 +103,33 @@ module CloudCrowd 
     | 
|
| 
       77 
103 
     | 
    
         
             
                  @server["/node/#{@host}"].delete
         
     | 
| 
       78 
104 
     | 
    
         
             
                end
         
     | 
| 
       79 
105 
     | 
    
         | 
| 
      
 106 
     | 
    
         
            +
                # Is the node overloaded? If configured, checks if the load average is 
         
     | 
| 
      
 107 
     | 
    
         
            +
                # greater than 'max_load', or if the available RAM is less than
         
     | 
| 
      
 108 
     | 
    
         
            +
                # 'min_free_memory'.
         
     | 
| 
      
 109 
     | 
    
         
            +
                def overloaded?
         
     | 
| 
      
 110 
     | 
    
         
            +
                  (@max_load && load_average > @max_load) ||
         
     | 
| 
      
 111 
     | 
    
         
            +
                  (@min_memory && free_memory < @min_memory)
         
     | 
| 
      
 112 
     | 
    
         
            +
                end
         
     | 
| 
      
 113 
     | 
    
         
            +
                
         
     | 
| 
      
 114 
     | 
    
         
            +
                # The current one-minute load average.
         
     | 
| 
      
 115 
     | 
    
         
            +
                def load_average
         
     | 
| 
      
 116 
     | 
    
         
            +
                  `uptime`.match(SCRAPE_UPTIME).to_s.to_f
         
     | 
| 
      
 117 
     | 
    
         
            +
                end
         
     | 
| 
      
 118 
     | 
    
         
            +
                
         
     | 
| 
      
 119 
     | 
    
         
            +
                # The current amount of free memory in megabytes.
         
     | 
| 
      
 120 
     | 
    
         
            +
                def free_memory
         
     | 
| 
      
 121 
     | 
    
         
            +
                  case RUBY_PLATFORM
         
     | 
| 
      
 122 
     | 
    
         
            +
                  when /darwin/
         
     | 
| 
      
 123 
     | 
    
         
            +
                    stats = `vm_stat`
         
     | 
| 
      
 124 
     | 
    
         
            +
                    @mac_page_size ||= stats.match(SCRAPE_MAC_PAGE)[1].to_f / 1048576.0
         
     | 
| 
      
 125 
     | 
    
         
            +
                    stats.match(SCRAPE_MAC_MEMORY)[1].to_f * @mac_page_size
         
     | 
| 
      
 126 
     | 
    
         
            +
                  when /linux/
         
     | 
| 
      
 127 
     | 
    
         
            +
                    `cat /proc/meminfo`.match(SCRAPE_LINUX_MEMORY)[1].to_f / 1024.0
         
     | 
| 
      
 128 
     | 
    
         
            +
                  else
         
     | 
| 
      
 129 
     | 
    
         
            +
                    raise NotImplementedError, "'min_free_memory' is not yet implemented on your platform"
         
     | 
| 
      
 130 
     | 
    
         
            +
                  end
         
     | 
| 
      
 131 
     | 
    
         
            +
                end
         
     | 
| 
      
 132 
     | 
    
         
            +
                
         
     | 
| 
       80 
133 
     | 
    
         | 
| 
       81 
134 
     | 
    
         
             
                private
         
     | 
| 
       82 
135 
     | 
    
         | 
| 
         @@ -87,6 +140,20 @@ module CloudCrowd 
     | 
|
| 
       87 
140 
     | 
    
         
             
                  end
         
     | 
| 
       88 
141 
     | 
    
         
             
                end
         
     | 
| 
       89 
142 
     | 
    
         | 
| 
      
 143 
     | 
    
         
            +
                # Launch a monitoring thread that periodically checks the node's load 
         
     | 
| 
      
 144 
     | 
    
         
            +
                # average and the amount of free memory remaining. If we transition out of 
         
     | 
| 
      
 145 
     | 
    
         
            +
                # the overloaded state, let central know.
         
     | 
| 
      
 146 
     | 
    
         
            +
                def monitor_system
         
     | 
| 
      
 147 
     | 
    
         
            +
                  @monitor_thread = Thread.new do
         
     | 
| 
      
 148 
     | 
    
         
            +
                    loop do
         
     | 
| 
      
 149 
     | 
    
         
            +
                      was_overloaded = @overloaded
         
     | 
| 
      
 150 
     | 
    
         
            +
                      @overloaded = overloaded?
         
     | 
| 
      
 151 
     | 
    
         
            +
                      check_in if was_overloaded && !@overloaded
         
     | 
| 
      
 152 
     | 
    
         
            +
                      sleep MONITOR_INTERVAL
         
     | 
| 
      
 153 
     | 
    
         
            +
                    end
         
     | 
| 
      
 154 
     | 
    
         
            +
                  end
         
     | 
| 
      
 155 
     | 
    
         
            +
                end
         
     | 
| 
      
 156 
     | 
    
         
            +
                
         
     | 
| 
       90 
157 
     | 
    
         
             
                # Trap exit signals in order to shut down cleanly.
         
     | 
| 
       91 
158 
     | 
    
         
             
                def trap_signals
         
     | 
| 
       92 
159 
     | 
    
         
             
                  Signal.trap('INT')  { shut_down }
         
     | 
| 
         @@ -96,7 +163,9 @@ module CloudCrowd 
     | 
|
| 
       96 
163 
     | 
    
         | 
| 
       97 
164 
     | 
    
         
             
                # At shut down, de-register with the central server before exiting.
         
     | 
| 
       98 
165 
     | 
    
         
             
                def shut_down
         
     | 
| 
      
 166 
     | 
    
         
            +
                  @monitor_thread.kill if @monitor_thread
         
     | 
| 
       99 
167 
     | 
    
         
             
                  check_out
         
     | 
| 
      
 168 
     | 
    
         
            +
                  @server_thread.kill
         
     | 
| 
       100 
169 
     | 
    
         
             
                  Process.exit
         
     | 
| 
       101 
170 
     | 
    
         
             
                end
         
     | 
| 
       102 
171 
     | 
    
         | 
    
        data/lib/cloud_crowd/schema.rb
    CHANGED
    
    | 
         @@ -15,10 +15,11 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do 
     | 
|
| 
       15 
15 
     | 
    
         
             
              end
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
17 
     | 
    
         
             
              create_table "node_records", :force => true do |t|
         
     | 
| 
       18 
     | 
    
         
            -
                t.string   "host", 
     | 
| 
       19 
     | 
    
         
            -
                t.string   "ip_address", 
     | 
| 
       20 
     | 
    
         
            -
                t.integer  "port", 
     | 
| 
       21 
     | 
    
         
            -
                t.string   "enabled_actions", :default => '', 
     | 
| 
      
 18 
     | 
    
         
            +
                t.string   "host",                                :null => false
         
     | 
| 
      
 19 
     | 
    
         
            +
                t.string   "ip_address",                          :null => false
         
     | 
| 
      
 20 
     | 
    
         
            +
                t.integer  "port",                                :null => false
         
     | 
| 
      
 21 
     | 
    
         
            +
                t.string   "enabled_actions", :default => '',     :null => false
         
     | 
| 
      
 22 
     | 
    
         
            +
                t.boolean  "busy",            :default => false,  :null => false
         
     | 
| 
       22 
23 
     | 
    
         
             
                t.integer  "max_workers"
         
     | 
| 
       23 
24 
     | 
    
         
             
                t.datetime "created_at"
         
     | 
| 
       24 
25 
     | 
    
         
             
                t.datetime "updated_at"
         
     | 
    
        data/lib/cloud_crowd/worker.rb
    CHANGED
    
    | 
         @@ -14,15 +14,16 @@ module CloudCrowd 
     | 
|
| 
       14 
14 
     | 
    
         
             
                # Wait five seconds to retry, after internal communcication errors.
         
     | 
| 
       15 
15 
     | 
    
         
             
                RETRY_WAIT = 5
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
                attr_reader : 
     | 
| 
      
 17 
     | 
    
         
            +
                attr_reader :pid, :node, :unit, :status
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
                # A new Worker  
     | 
| 
       20 
     | 
    
         
            -
                def initialize(node,  
     | 
| 
       21 
     | 
    
         
            -
                  @ 
     | 
| 
       22 
     | 
    
         
            -
                  @ 
     | 
| 
       23 
     | 
    
         
            -
                   
     | 
| 
       24 
     | 
    
         
            -
                   
     | 
| 
       25 
     | 
    
         
            -
                   
     | 
| 
      
 19 
     | 
    
         
            +
                # A new Worker customizes itself to its WorkUnit at instantiation.
         
     | 
| 
      
 20 
     | 
    
         
            +
                def initialize(node, unit)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  @start_time = Time.now
         
     | 
| 
      
 22 
     | 
    
         
            +
                  @pid        = $$
         
     | 
| 
      
 23 
     | 
    
         
            +
                  @node       = node
         
     | 
| 
      
 24 
     | 
    
         
            +
                  @unit       = unit
         
     | 
| 
      
 25 
     | 
    
         
            +
                  @status     = @unit['status']
         
     | 
| 
      
 26 
     | 
    
         
            +
                  @retry_wait = RETRY_WAIT
         
     | 
| 
       26 
27 
     | 
    
         
             
                end
         
     | 
| 
       27 
28 
     | 
    
         | 
| 
       28 
29 
     | 
    
         
             
                # Return output to the central server, marking the WorkUnit done.
         
     | 
| 
         @@ -49,18 +50,20 @@ module CloudCrowd 
     | 
|
| 
       49 
50 
     | 
    
         
             
                def keep_trying_to(title)
         
     | 
| 
       50 
51 
     | 
    
         
             
                  begin
         
     | 
| 
       51 
52 
     | 
    
         
             
                    yield
         
     | 
| 
      
 53 
     | 
    
         
            +
                  rescue RestClient::ResourceNotFound => e
         
     | 
| 
      
 54 
     | 
    
         
            +
                    log "work unit ##{@unit['id']} doesn't exist. discarding..."
         
     | 
| 
       52 
55 
     | 
    
         
             
                  rescue Exception => e
         
     | 
| 
       53 
     | 
    
         
            -
                    log "failed to #{title} -- retry in #{ 
     | 
| 
      
 56 
     | 
    
         
            +
                    log "failed to #{title} -- retry in #{@retry_wait} seconds"
         
     | 
| 
       54 
57 
     | 
    
         
             
                    log e.message
         
     | 
| 
       55 
58 
     | 
    
         
             
                    log e.backtrace
         
     | 
| 
       56 
     | 
    
         
            -
                    sleep  
     | 
| 
      
 59 
     | 
    
         
            +
                    sleep @retry_wait
         
     | 
| 
       57 
60 
     | 
    
         
             
                    retry
         
     | 
| 
       58 
61 
     | 
    
         
             
                  end
         
     | 
| 
       59 
62 
     | 
    
         
             
                end
         
     | 
| 
       60 
63 
     | 
    
         | 
| 
       61 
64 
     | 
    
         
             
                # Loggable details describing what the Worker is up to.
         
     | 
| 
       62 
65 
     | 
    
         
             
                def display_work_unit
         
     | 
| 
       63 
     | 
    
         
            -
                  "unit ##{@ 
     | 
| 
      
 66 
     | 
    
         
            +
                  "unit ##{@unit['id']} (#{@unit['action']}/#{CloudCrowd.display_status(@status)})"
         
     | 
| 
       64 
67 
     | 
    
         
             
                end
         
     | 
| 
       65 
68 
     | 
    
         | 
| 
       66 
69 
     | 
    
         
             
                # Executes the WorkUnit by running the Action, catching all exceptions as 
         
     | 
| 
         @@ -70,12 +73,13 @@ module CloudCrowd 
     | 
|
| 
       70 
73 
     | 
    
         
             
                  @worker_thread = Thread.new do
         
     | 
| 
       71 
74 
     | 
    
         
             
                    begin
         
     | 
| 
       72 
75 
     | 
    
         
             
                      result = nil
         
     | 
| 
       73 
     | 
    
         
            -
                       
     | 
| 
       74 
     | 
    
         
            -
                       
     | 
| 
      
 76 
     | 
    
         
            +
                      action_class = CloudCrowd.actions[@unit['action']]
         
     | 
| 
      
 77 
     | 
    
         
            +
                      action = action_class.new(@status, @unit['input'], enhanced_unit_options, @node.asset_store)
         
     | 
| 
      
 78 
     | 
    
         
            +
                      Dir.chdir(action.work_directory) do
         
     | 
| 
       75 
79 
     | 
    
         
             
                        result = case @status
         
     | 
| 
       76 
     | 
    
         
            -
                        when PROCESSING then  
     | 
| 
       77 
     | 
    
         
            -
                        when SPLITTING  then  
     | 
| 
       78 
     | 
    
         
            -
                        when MERGING    then  
     | 
| 
      
 80 
     | 
    
         
            +
                        when PROCESSING then action.process
         
     | 
| 
      
 81 
     | 
    
         
            +
                        when SPLITTING  then action.split
         
     | 
| 
      
 82 
     | 
    
         
            +
                        when MERGING    then action.merge
         
     | 
| 
       79 
83 
     | 
    
         
             
                        else raise Error::StatusUnspecified, "work units must specify their status"
         
     | 
| 
       80 
84 
     | 
    
         
             
                        end
         
     | 
| 
       81 
85 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -83,7 +87,7 @@ module CloudCrowd 
     | 
|
| 
       83 
87 
     | 
    
         
             
                    rescue Exception => e
         
     | 
| 
       84 
88 
     | 
    
         
             
                      fail_work_unit(e)
         
     | 
| 
       85 
89 
     | 
    
         
             
                    ensure
         
     | 
| 
       86 
     | 
    
         
            -
                       
     | 
| 
      
 90 
     | 
    
         
            +
                      action.cleanup_work_directory if action
         
     | 
| 
       87 
91 
     | 
    
         
             
                    end
         
     | 
| 
       88 
92 
     | 
    
         
             
                  end
         
     | 
| 
       89 
93 
     | 
    
         
             
                  @worker_thread.join
         
     | 
| 
         @@ -91,9 +95,26 @@ module CloudCrowd 
     | 
|
| 
       91 
95 
     | 
    
         | 
| 
       92 
96 
     | 
    
         
             
                # Wraps run_work_unit to benchmark the execution time, if requested.
         
     | 
| 
       93 
97 
     | 
    
         
             
                def run
         
     | 
| 
       94 
     | 
    
         
            -
                   
     | 
| 
       95 
     | 
    
         
            -
                   
     | 
| 
       96 
     | 
    
         
            -
                   
     | 
| 
      
 98 
     | 
    
         
            +
                  trap_signals
         
     | 
| 
      
 99 
     | 
    
         
            +
                  log "starting #{display_work_unit}"
         
     | 
| 
      
 100 
     | 
    
         
            +
                  return run_work_unit unless @unit['options']['benchmark']
         
     | 
| 
      
 101 
     | 
    
         
            +
                  log("ran #{display_work_unit} in " + Benchmark.measure { run_work_unit }.to_s)
         
     | 
| 
      
 102 
     | 
    
         
            +
                end
         
     | 
| 
      
 103 
     | 
    
         
            +
                
         
     | 
| 
      
 104 
     | 
    
         
            +
                # There are some potentially important attributes of the WorkUnit that we'd 
         
     | 
| 
      
 105 
     | 
    
         
            +
                # like to pass into the Action -- in case it needs to know them. They will 
         
     | 
| 
      
 106 
     | 
    
         
            +
                # always be made available in the options hash.
         
     | 
| 
      
 107 
     | 
    
         
            +
                def enhanced_unit_options
         
     | 
| 
      
 108 
     | 
    
         
            +
                  @unit['options'].merge({
         
     | 
| 
      
 109 
     | 
    
         
            +
                    'job_id'        => @unit['job_id'],
         
     | 
| 
      
 110 
     | 
    
         
            +
                    'work_unit_id'  => @unit['id'],
         
     | 
| 
      
 111 
     | 
    
         
            +
                    'attempts'      => @unit['attempts'] 
         
     | 
| 
      
 112 
     | 
    
         
            +
                  })
         
     | 
| 
      
 113 
     | 
    
         
            +
                end
         
     | 
| 
      
 114 
     | 
    
         
            +
                
         
     | 
| 
      
 115 
     | 
    
         
            +
                # How long has this worker been running for?
         
     | 
| 
      
 116 
     | 
    
         
            +
                def time_taken
         
     | 
| 
      
 117 
     | 
    
         
            +
                  Time.now - @start_time
         
     | 
| 
       97 
118 
     | 
    
         
             
                end
         
     | 
| 
       98 
119 
     | 
    
         | 
| 
       99 
120 
     | 
    
         | 
| 
         @@ -103,20 +124,8 @@ module CloudCrowd 
     | 
|
| 
       103 
124 
     | 
    
         
             
                # regardless of success or failure.
         
     | 
| 
       104 
125 
     | 
    
         
             
                def base_params
         
     | 
| 
       105 
126 
     | 
    
         
             
                  { :pid  => @pid,
         
     | 
| 
       106 
     | 
    
         
            -
                    :id   => @ 
     | 
| 
       107 
     | 
    
         
            -
                    :time =>  
     | 
| 
       108 
     | 
    
         
            -
                end
         
     | 
| 
       109 
     | 
    
         
            -
                
         
     | 
| 
       110 
     | 
    
         
            -
                # Extract the Worker's instance variables from a WorkUnit's JSON.
         
     | 
| 
       111 
     | 
    
         
            -
                def setup_work_unit(unit)
         
     | 
| 
       112 
     | 
    
         
            -
                  return false unless unit
         
     | 
| 
       113 
     | 
    
         
            -
                  @start_time = Time.now
         
     | 
| 
       114 
     | 
    
         
            -
                  @action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
         
     | 
| 
       115 
     | 
    
         
            -
                  @options['job_id'] = unit['job_id']
         
     | 
| 
       116 
     | 
    
         
            -
                  @options['work_unit_id'] = unit['id']
         
     | 
| 
       117 
     | 
    
         
            -
                  @options['attempts'] ||= unit['attempts']
         
     | 
| 
       118 
     | 
    
         
            -
                  log "fetched #{display_work_unit}"
         
     | 
| 
       119 
     | 
    
         
            -
                  return true
         
     | 
| 
      
 127 
     | 
    
         
            +
                    :id   => @unit['id'], 
         
     | 
| 
      
 128 
     | 
    
         
            +
                    :time => time_taken }
         
     | 
| 
       120 
129 
     | 
    
         
             
                end
         
     | 
| 
       121 
130 
     | 
    
         | 
| 
       122 
131 
     | 
    
         
             
                # Log a message to the daemon log. Includes PID for identification.
         
     | 
| 
         @@ -4,6 +4,7 @@ require 'test_helper' 
     | 
|
| 
       4 
4 
     | 
    
         
             
            class FailingWorkUnitsTest < Test::Unit::TestCase
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
              should "retry work units when they fail" do
         
     | 
| 
      
 7 
     | 
    
         
            +
                WorkUnit.expects(:distribute_to_nodes).returns(true)
         
     | 
| 
       7 
8 
     | 
    
         
             
                browser = Rack::Test::Session.new(Rack::MockSession.new(CloudCrowd::Server))
         
     | 
| 
       8 
9 
     | 
    
         | 
| 
       9 
10 
     | 
    
         
             
                browser.post '/jobs', :job => {
         
     | 
| 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class NodeAcceptanceTest < Test::Unit::TestCase
         
     | 
| 
      
 4 
     | 
    
         
            +
              
         
     | 
| 
      
 5 
     | 
    
         
            +
              include Rack::Test::Methods
         
     | 
| 
      
 6 
     | 
    
         
            +
              
         
     | 
| 
      
 7 
     | 
    
         
            +
              def app
         
     | 
| 
      
 8 
     | 
    
         
            +
                CloudCrowd::Node
         
     | 
| 
      
 9 
     | 
    
         
            +
              end
         
     | 
| 
      
 10 
     | 
    
         
            +
              
         
     | 
| 
      
 11 
     | 
    
         
            +
              context "The CloudCrowd::Node (Sinatra)" do
         
     | 
| 
      
 12 
     | 
    
         
            +
                
         
     | 
| 
      
 13 
     | 
    
         
            +
                should "have a heartbeat" do
         
     | 
| 
      
 14 
     | 
    
         
            +
                  get '/heartbeat'
         
     | 
| 
      
 15 
     | 
    
         
            +
                  assert last_response.body == 'buh-bump'
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
              
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
              
         
     | 
| 
      
 20 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -46,6 +46,7 @@ class ServerTest < Test::Unit::TestCase 
     | 
|
| 
       46 
46 
     | 
    
         
             
                end
         
     | 
| 
       47 
47 
     | 
    
         | 
| 
       48 
48 
     | 
    
         
             
                should "be able to create a job" do
         
     | 
| 
      
 49 
     | 
    
         
            +
                  WorkUnit.expects(:distribute_to_nodes).returns(true)
         
     | 
| 
       49 
50 
     | 
    
         
             
                  post('/jobs', :job => '{"action":"graphics_magick","inputs":["http://www.google.com/"]}')
         
     | 
| 
       50 
51 
     | 
    
         
             
                  assert last_response.ok?
         
     | 
| 
       51 
52 
     | 
    
         
             
                  job_info = JSON.parse(last_response.body)
         
     | 
| 
         @@ -5,16 +5,13 @@ class WordCountTest < Test::Unit::TestCase 
     | 
|
| 
       5 
5 
     | 
    
         
             
              context "the word_count action" do
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
                setup do
         
     | 
| 
      
 8 
     | 
    
         
            +
                  WorkUnit.expects(:distribute_to_nodes).returns(true)
         
     | 
| 
       8 
9 
     | 
    
         
             
                  @asset_store = AssetStore.new
         
     | 
| 
       9 
10 
     | 
    
         
             
                  @browser = Rack::Test::Session.new(Rack::MockSession.new(CloudCrowd::Server))
         
     | 
| 
       10 
11 
     | 
    
         
             
                  @browser.put('/worker', :name => 'test_worker', :thread_status => 'sleeping')
         
     | 
| 
       11 
12 
     | 
    
         
             
                  post_job_to_count_words_in_this_file
         
     | 
| 
       12 
13 
     | 
    
         
             
                  @job_id = JSON.parse(@browser.last_response.body)['id']
         
     | 
| 
       13 
14 
     | 
    
         
             
                end
         
     | 
| 
       14 
     | 
    
         
            -
                
         
     | 
| 
       15 
     | 
    
         
            -
                teardown do
         
     | 
| 
       16 
     | 
    
         
            -
                  CloudCrowd::Job.destroy_all
         
     | 
| 
       17 
     | 
    
         
            -
                end
         
     | 
| 
       18 
15 
     | 
    
         | 
| 
       19 
16 
     | 
    
         
             
                should "be able to create a word_count job" do
         
     | 
| 
       20 
17 
     | 
    
         
             
                  assert @browser.last_response.ok? 
         
     | 
| 
         @@ -26,7 +23,7 @@ class WordCountTest < Test::Unit::TestCase 
     | 
|
| 
       26 
23 
     | 
    
         
             
                should "be able to perform the processing stage of a word_count" do
         
     | 
| 
       27 
24 
     | 
    
         
             
                  action = CloudCrowd.actions['word_count'].new(1, "file://#{File.expand_path(__FILE__)}", {}, @asset_store)
         
     | 
| 
       28 
25 
     | 
    
         
             
                  count = action.process
         
     | 
| 
       29 
     | 
    
         
            -
                  assert count ==  
     | 
| 
      
 26 
     | 
    
         
            +
                  assert count == 101
         
     | 
| 
       30 
27 
     | 
    
         
             
                end
         
     | 
| 
       31 
28 
     | 
    
         | 
| 
       32 
29 
     | 
    
         
             
              end
         
     | 
    
        data/test/blueprints.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            Sham.url        { Faker::Internet.domain_name + "/" + Faker::Internet.domain_word + ".jpg" }
         
     | 
| 
      
 2 
     | 
    
         
            +
            Sham.host       { Faker::Internet.domain_name + '.local' }
         
     | 
| 
       2 
3 
     | 
    
         | 
| 
       3 
4 
     | 
    
         
             
            CloudCrowd::Job.blueprint do
         
     | 
| 
       4 
5 
     | 
    
         
             
              status  { CloudCrowd::PROCESSING }
         
     | 
| 
         @@ -8,9 +9,17 @@ CloudCrowd::Job.blueprint do 
     | 
|
| 
       8 
9 
     | 
    
         
             
              email   { 'noone@example.com' }
         
     | 
| 
       9 
10 
     | 
    
         
             
            end
         
     | 
| 
       10 
11 
     | 
    
         | 
| 
      
 12 
     | 
    
         
            +
            CloudCrowd::NodeRecord.blueprint do
         
     | 
| 
      
 13 
     | 
    
         
            +
              host
         
     | 
| 
      
 14 
     | 
    
         
            +
              ip_address      { '127.0.0.1' }
         
     | 
| 
      
 15 
     | 
    
         
            +
              port            { 6093 }
         
     | 
| 
      
 16 
     | 
    
         
            +
              enabled_actions { 'graphics_magick,word_count' }
         
     | 
| 
      
 17 
     | 
    
         
            +
              max_workers     { 3 }
         
     | 
| 
      
 18 
     | 
    
         
            +
            end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
       11 
20 
     | 
    
         
             
            CloudCrowd::WorkUnit.blueprint do
         
     | 
| 
       12 
     | 
    
         
            -
              job 
     | 
| 
       13 
     | 
    
         
            -
              status 
     | 
| 
       14 
     | 
    
         
            -
              input 
     | 
| 
       15 
     | 
    
         
            -
              action 
     | 
| 
      
 21 
     | 
    
         
            +
              job     { CloudCrowd::Job.make }
         
     | 
| 
      
 22 
     | 
    
         
            +
              status  { CloudCrowd::PROCESSING }
         
     | 
| 
      
 23 
     | 
    
         
            +
              input   { '{"key":"value"}' }
         
     | 
| 
      
 24 
     | 
    
         
            +
              action  { 'graphics_magick' }
         
     | 
| 
       16 
25 
     | 
    
         
             
            end
         
     | 
    
        data/test/config/database.yml
    CHANGED
    
    
| 
         @@ -0,0 +1,38 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class NodeUnitTest < Test::Unit::TestCase
         
     | 
| 
      
 4 
     | 
    
         
            +
              
         
     | 
| 
      
 5 
     | 
    
         
            +
              context "A Node" do
         
     | 
| 
      
 6 
     | 
    
         
            +
                
         
     | 
| 
      
 7 
     | 
    
         
            +
                setup do
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @node = Node.new(11011).instance_variable_get(:@app)
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
                
         
     | 
| 
      
 11 
     | 
    
         
            +
                should "instantiate correctly" do
         
     | 
| 
      
 12 
     | 
    
         
            +
                  assert @node.server.to_s == "http://localhost:9173"
         
     | 
| 
      
 13 
     | 
    
         
            +
                  assert @node.port == 11011
         
     | 
| 
      
 14 
     | 
    
         
            +
                  assert @node.host == Socket.gethostname
         
     | 
| 
      
 15 
     | 
    
         
            +
                  assert @node.enabled_actions.length > 2
         
     | 
| 
      
 16 
     | 
    
         
            +
                  assert @node.asset_store.is_a? AssetStore::FilesystemStore
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
                
         
     | 
| 
      
 19 
     | 
    
         
            +
                should "trap signals and launch a server at start" do
         
     | 
| 
      
 20 
     | 
    
         
            +
                  Signal.expects(:trap).times(3)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  Thin::Server.expects(:start)
         
     | 
| 
      
 22 
     | 
    
         
            +
                  @node.expects(:check_in)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  @node.start
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
                
         
     | 
| 
      
 26 
     | 
    
         
            +
                should "be able to determine if the node is overloaded" do
         
     | 
| 
      
 27 
     | 
    
         
            +
                  assert !@node.overloaded?
         
     | 
| 
      
 28 
     | 
    
         
            +
                  @node.instance_variable_set :@max_load, 0.01
         
     | 
| 
      
 29 
     | 
    
         
            +
                  assert @node.overloaded?
         
     | 
| 
      
 30 
     | 
    
         
            +
                  @node.instance_variable_set :@max_load, nil
         
     | 
| 
      
 31 
     | 
    
         
            +
                  assert !@node.overloaded?
         
     | 
| 
      
 32 
     | 
    
         
            +
                  @node.instance_variable_set :@min_memory, 8000
         
     | 
| 
      
 33 
     | 
    
         
            +
                  assert @node.overloaded?
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
                  
         
     | 
| 
      
 36 
     | 
    
         
            +
              end
         
     | 
| 
      
 37 
     | 
    
         
            +
              
         
     | 
| 
      
 38 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,42 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class NodeRecordTest < Test::Unit::TestCase
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              context "A NodeRecord" do
         
     | 
| 
      
 6 
     | 
    
         
            +
                    
         
     | 
| 
      
 7 
     | 
    
         
            +
                setup do
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @node = CloudCrowd::NodeRecord.make
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
                
         
     | 
| 
      
 11 
     | 
    
         
            +
                subject { @node }
         
     | 
| 
      
 12 
     | 
    
         
            +
                
         
     | 
| 
      
 13 
     | 
    
         
            +
                should_have_many :work_units
         
     | 
| 
      
 14 
     | 
    
         
            +
                
         
     | 
| 
      
 15 
     | 
    
         
            +
                should_validate_presence_of :host, :ip_address, :port, :enabled_actions
         
     | 
| 
      
 16 
     | 
    
         
            +
                
         
     | 
| 
      
 17 
     | 
    
         
            +
                should "be available" do
         
     | 
| 
      
 18 
     | 
    
         
            +
                  assert NodeRecord.available.map(&:id).include? @node.id
         
     | 
| 
      
 19 
     | 
    
         
            +
                end
         
     | 
| 
      
 20 
     | 
    
         
            +
                
         
     | 
| 
      
 21 
     | 
    
         
            +
                should "know its enabled actions" do
         
     | 
| 
      
 22 
     | 
    
         
            +
                  assert @node.actions.include? 'graphics_magick'
         
     | 
| 
      
 23 
     | 
    
         
            +
                  assert @node.actions.include? 'word_count'
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
                
         
     | 
| 
      
 26 
     | 
    
         
            +
                should "know if the node is busy" do
         
     | 
| 
      
 27 
     | 
    
         
            +
                  assert !@node.busy?
         
     | 
| 
      
 28 
     | 
    
         
            +
                  assert @node.display_status == 'available'
         
     | 
| 
      
 29 
     | 
    
         
            +
                  (@node.max_workers + 1).times { WorkUnit.make(:node_record => @node) }
         
     | 
| 
      
 30 
     | 
    
         
            +
                  assert @node.busy?
         
     | 
| 
      
 31 
     | 
    
         
            +
                  assert @node.display_status == 'busy'
         
     | 
| 
      
 32 
     | 
    
         
            +
                  @node.release_work_units
         
     | 
| 
      
 33 
     | 
    
         
            +
                  assert !@node.busy?
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
                
         
     | 
| 
      
 36 
     | 
    
         
            +
                should "be reachable at a URL" do
         
     | 
| 
      
 37 
     | 
    
         
            +
                  assert !!URI.parse(@node.url)
         
     | 
| 
      
 38 
     | 
    
         
            +
                end
         
     | 
| 
      
 39 
     | 
    
         
            +
                
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
              
         
     | 
| 
      
 42 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,48 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class WorkerTest < Test::Unit::TestCase
         
     | 
| 
      
 4 
     | 
    
         
            +
              
         
     | 
| 
      
 5 
     | 
    
         
            +
              context "A CloudCrowd::Worker" do
         
     | 
| 
      
 6 
     | 
    
         
            +
                    
         
     | 
| 
      
 7 
     | 
    
         
            +
                setup do
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @node = Node.new.instance_variable_get(:@app)
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @unit = WorkUnit.make
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @worker = Worker.new(@node, JSON.parse(@unit.to_json))
         
     | 
| 
      
 11 
     | 
    
         
            +
                end
         
     | 
| 
      
 12 
     | 
    
         
            +
                
         
     | 
| 
      
 13 
     | 
    
         
            +
                should "instantiate correctly" do
         
     | 
| 
      
 14 
     | 
    
         
            +
                  assert @worker.pid == $$
         
     | 
| 
      
 15 
     | 
    
         
            +
                  assert @worker.unit['id'] == @unit.id
         
     | 
| 
      
 16 
     | 
    
         
            +
                  assert @worker.status == @unit.status
         
     | 
| 
      
 17 
     | 
    
         
            +
                  assert @worker.node == @node
         
     | 
| 
      
 18 
     | 
    
         
            +
                  assert @worker.time_taken > 0
         
     | 
| 
      
 19 
     | 
    
         
            +
                end
         
     | 
| 
      
 20 
     | 
    
         
            +
                
         
     | 
| 
      
 21 
     | 
    
         
            +
                should "be able to retry operations that must succeed" do
         
     | 
| 
      
 22 
     | 
    
         
            +
                  @worker.instance_variable_set :@retry_wait, 0.01
         
     | 
| 
      
 23 
     | 
    
         
            +
                  @worker.expects(:log).at_least(3)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  tries = 0
         
     | 
| 
      
 25 
     | 
    
         
            +
                  @worker.keep_trying_to("do something critical") do
         
     | 
| 
      
 26 
     | 
    
         
            +
                    tries += 1;
         
     | 
| 
      
 27 
     | 
    
         
            +
                    raise 'hell' unless tries > 3
         
     | 
| 
      
 28 
     | 
    
         
            +
                    assert "made it through"
         
     | 
| 
      
 29 
     | 
    
         
            +
                  end    
         
     | 
| 
      
 30 
     | 
    
         
            +
                end
         
     | 
| 
      
 31 
     | 
    
         
            +
                
         
     | 
| 
      
 32 
     | 
    
         
            +
                should "be able to run an action and try to complete it" do
         
     | 
| 
      
 33 
     | 
    
         
            +
                  GraphicsMagick.any_instance.expects(:process).returns('the answer')
         
     | 
| 
      
 34 
     | 
    
         
            +
                  GraphicsMagick.any_instance.expects(:cleanup_work_directory)
         
     | 
| 
      
 35 
     | 
    
         
            +
                  @worker.expects(:complete_work_unit).with({'output' => 'the answer'}.to_json)
         
     | 
| 
      
 36 
     | 
    
         
            +
                  @worker.run_work_unit
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
                
         
     | 
| 
      
 39 
     | 
    
         
            +
                should "enchance the options that an action receives with extra info" do
         
     | 
| 
      
 40 
     | 
    
         
            +
                  opts = @worker.enhanced_unit_options
         
     | 
| 
      
 41 
     | 
    
         
            +
                  assert opts['work_unit_id'] == @unit.id
         
     | 
| 
      
 42 
     | 
    
         
            +
                  assert opts['job_id'] == @unit.job.id
         
     | 
| 
      
 43 
     | 
    
         
            +
                  assert opts['attempts'] == @unit.attempts
         
     | 
| 
      
 44 
     | 
    
         
            +
                end
         
     | 
| 
      
 45 
     | 
    
         
            +
                  
         
     | 
| 
      
 46 
     | 
    
         
            +
              end
         
     | 
| 
      
 47 
     | 
    
         
            +
              
         
     | 
| 
      
 48 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: documentcloud-cloud-crowd
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.2. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.2.1
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors: 
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Jeremy Ashkenas
         
     | 
| 
         @@ -9,7 +9,7 @@ autorequire: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
            date: 2009-09- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2009-09-18 00:00:00 -07:00
         
     | 
| 
       13 
13 
     | 
    
         
             
            default_executable: 
         
     | 
| 
       14 
14 
     | 
    
         
             
            dependencies: 
         
     | 
| 
       15 
15 
     | 
    
         
             
            - !ruby/object:Gem::Dependency 
         
     | 
| 
         @@ -182,8 +182,9 @@ files: 
     | 
|
| 
       182 
182 
     | 
    
         
             
            - public/js/flot.js
         
     | 
| 
       183 
183 
     | 
    
         
             
            - public/js/jquery.js
         
     | 
| 
       184 
184 
     | 
    
         
             
            - README
         
     | 
| 
       185 
     | 
    
         
            -
            - test/acceptance/ 
     | 
| 
      
 185 
     | 
    
         
            +
            - test/acceptance/test_node.rb
         
     | 
| 
       186 
186 
     | 
    
         
             
            - test/acceptance/test_failing_work_units.rb
         
     | 
| 
      
 187 
     | 
    
         
            +
            - test/acceptance/test_server.rb
         
     | 
| 
       187 
188 
     | 
    
         
             
            - test/acceptance/test_word_count.rb
         
     | 
| 
       188 
189 
     | 
    
         
             
            - test/blueprints.rb
         
     | 
| 
       189 
190 
     | 
    
         
             
            - test/config/config.ru
         
     | 
| 
         @@ -193,7 +194,10 @@ files: 
     | 
|
| 
       193 
194 
     | 
    
         
             
            - test/test_helper.rb
         
     | 
| 
       194 
195 
     | 
    
         
             
            - test/unit/test_action.rb
         
     | 
| 
       195 
196 
     | 
    
         
             
            - test/unit/test_configuration.rb
         
     | 
| 
      
 197 
     | 
    
         
            +
            - test/unit/test_node.rb
         
     | 
| 
      
 198 
     | 
    
         
            +
            - test/unit/test_node_record.rb
         
     | 
| 
       196 
199 
     | 
    
         
             
            - test/unit/test_job.rb
         
     | 
| 
      
 200 
     | 
    
         
            +
            - test/unit/test_worker.rb
         
     | 
| 
       197 
201 
     | 
    
         
             
            - test/unit/test_work_unit.rb
         
     | 
| 
       198 
202 
     | 
    
         
             
            - views/operations_center.erb
         
     | 
| 
       199 
203 
     | 
    
         
             
            has_rdoc: true
         
     |