documentcloud-cloud-crowd 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/cloud-crowd.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.2.0' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-09-17'
3
+ s.version = '0.2.1' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-09-18'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -94,8 +94,9 @@ public/js/excanvas.js
94
94
  public/js/flot.js
95
95
  public/js/jquery.js
96
96
  README
97
- test/acceptance/test_server.rb
97
+ test/acceptance/test_node.rb
98
98
  test/acceptance/test_failing_work_units.rb
99
+ test/acceptance/test_server.rb
99
100
  test/acceptance/test_word_count.rb
100
101
  test/blueprints.rb
101
102
  test/config/config.ru
@@ -105,7 +106,10 @@ test/config/actions/failure_testing.rb
105
106
  test/test_helper.rb
106
107
  test/unit/test_action.rb
107
108
  test/unit/test_configuration.rb
109
+ test/unit/test_node.rb
110
+ test/unit/test_node_record.rb
108
111
  test/unit/test_job.rb
112
+ test/unit/test_worker.rb
109
113
  test/unit/test_work_unit.rb
110
114
  views/operations_center.erb
111
115
  )
@@ -1,14 +1,21 @@
1
1
  # The URL where you're planning on running the central server/queue/database.
2
2
  :central_server: http://localhost:9173
3
3
 
4
- # Set the maximum number of workers allowed per-node. Workers only run while
5
- # there's work to be done. It's best to set 'max_workers' below the point where
6
- # you'd start to swap or peg your CPU (as determined by experiment).
4
+ # The following settings allow you to control the number of workers that can run
5
+ # on a given node, to prevent the node from becoming overloaded. 'max_workers'
6
+ # is a simple cap on the maximum number of workers a node is allowed to run
7
+ # concurrently. 'max_load' is the maximum (one-minute) load average, above which
8
+ # a node will refuse to take new work. 'min_free_memory' is the minimum amount
9
+ # of free RAM (in megabytes) a node is allowed to have, below which no new
10
+ # workers are run. These settings may be used in any combination.
7
11
  :max_workers: 5
12
+ # :max_load: 5.0
13
+ # :min_free_memory: 150
8
14
 
9
15
  # The storage back-end that you'd like to use for intermediate and final results
10
16
  # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
11
17
  # be used in development, on single-machine installations, or networked drives.
18
+ # If you *are* developing an action, filesystem is certainly faster and easier.
12
19
  :storage: s3
13
20
 
14
21
  # Please provide your AWS credentials for S3 storage of job output.
@@ -6,4 +6,10 @@
6
6
  :username: root
7
7
  :password:
8
8
  :socket: /tmp/mysql.sock
9
- :database: cloud_crowd
9
+ :database: cloud_crowd
10
+
11
+ # If you'd prefer to use an SQLite database instead, the following configuration
12
+ # will do nicely:
13
+ #
14
+ # :adapter: sqlite3
15
+ # :database: cloud_crowd.db
data/lib/cloud-crowd.rb CHANGED
@@ -43,10 +43,10 @@ module CloudCrowd
43
43
  autoload :WorkUnit, 'cloud_crowd/models'
44
44
 
45
45
  # Keep this version in sync with the gemspec.
46
- VERSION = '0.2.0'
46
+ VERSION = '0.2.1'
47
47
 
48
48
  # Increment the schema version when there's a backwards incompatible change.
49
- SCHEMA_VERSION = 2
49
+ SCHEMA_VERSION = 3
50
50
 
51
51
  # Root directory of the CloudCrowd gem.
52
52
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
@@ -12,6 +12,9 @@ module CloudCrowd
12
12
  #
13
13
  # All actions have use of an individual +work_directory+, for scratch files,
14
14
  # and spend their duration inside of it, so relative paths work well.
15
+ #
16
+ # Note that Actions inherit a backticks (`) method that raises an Exception
17
+ # if the external command fails.
15
18
  class Action
16
19
 
17
20
  FILE_URL = /\Afile:\/\//
@@ -33,7 +36,7 @@ module CloudCrowd
33
36
 
34
37
  # Each Action subclass must implement a +process+ method, overriding this.
35
38
  def process
36
- raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.")
39
+ raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code."
37
40
  end
38
41
 
39
42
  # Download a file to the specified path.
@@ -66,6 +69,15 @@ module CloudCrowd
66
69
  FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
67
70
  end
68
71
 
72
+ # Actions have a backticks command that raises a CommandFailed exception
73
+ # on failure, so that processing doesn't just blithely continue.
74
+ def `(command)
75
+ result = super(command)
76
+ exit_code = $?.to_i
77
+ raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0
78
+ result
79
+ end
80
+
69
81
 
70
82
  private
71
83
 
@@ -77,7 +89,7 @@ module CloudCrowd
77
89
  end
78
90
 
79
91
  # The directory prefix to use for both local and S3 storage.
80
- # [action_name]/job_[job_id]/unit_[work_unit_it]
92
+ # [action]/job_[job_id]/unit_[work_unit_it]
81
93
  def storage_prefix
82
94
  path_parts = []
83
95
  path_parts << Inflector.underscore(self.class)
@@ -93,9 +105,9 @@ module CloudCrowd
93
105
 
94
106
  # If the input is a URL, download the file before beginning processing.
95
107
  def download_input
108
+ input_is_url = !!URI.parse(@input) rescue false
109
+ return unless input_is_url
96
110
  Dir.chdir(@work_directory) do
97
- input_is_url = !!URI.parse(@input) rescue false
98
- return unless input_is_url
99
111
  @input_path = File.join(@work_directory, safe_filename(@input))
100
112
  @file_name = File.basename(@input_path, File.extname(@input_path))
101
113
  download(@input, @input_path)
@@ -11,7 +11,7 @@ module CloudCrowd
11
11
  class ActionNotFound < Error
12
12
  end
13
13
 
14
- # StorageNotFound is raised when config.yml specifies a storage back end that
14
+ # StorageNotFound is raised when config.yml specifies a storage back-end that
15
15
  # doesn't exist.
16
16
  class StorageNotFound < Error
17
17
  end
@@ -30,6 +30,17 @@ module CloudCrowd
30
30
  class MissingConfiguration < Error
31
31
  end
32
32
 
33
+ # CommandFailed is raised when an action shells out, and the external
34
+ # command returns a non-zero exit code.
35
+ class CommandFailed < Error
36
+ attr_reader :exit_code
37
+
38
+ def initialize(message, exit_code)
39
+ super(message)
40
+ @exit_code = exit_code
41
+ end
42
+ end
43
+
33
44
  end
34
45
 
35
46
  end
@@ -15,8 +15,7 @@ module CloudCrowd
15
15
  after_create :queue_for_workers
16
16
  before_destroy :cleanup_assets
17
17
 
18
- # Create a Job from an incoming JSON or XML request, and add it to the queue.
19
- # TODO: Think about XML support.
18
+ # Create a Job from an incoming JSON request, and add it to the queue.
20
19
  def self.create_from_request(h)
21
20
  self.create(
22
21
  :inputs => h['inputs'].to_json,
@@ -41,7 +40,8 @@ module CloudCrowd
41
40
  self
42
41
  end
43
42
 
44
- # Transition this Job's status to the appropriate next status.
43
+ # Transition this Job's current status to the appropriate next one, based
44
+ # on the state of the WorkUnits and the nature of the Action.
45
45
  def set_next_status
46
46
  update_attribute(:status,
47
47
  any_work_units_failed? ? FAILED :
@@ -66,8 +66,9 @@ module CloudCrowd
66
66
  end
67
67
  end
68
68
 
69
- # Cleaning up after a job will remove all of its files from S3. Destroying
70
- # a Job calls cleanup_assets first.
69
+ # Cleaning up after a job will remove all of its files from S3 or the
70
+ # filesystem. Destroying a Job will cleanup_assets first. Run this in a
71
+ # separate thread to get out of the transaction's way.
71
72
  # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
72
73
  def cleanup_assets
73
74
  AssetStore.new.cleanup(self)
@@ -7,9 +7,9 @@ module CloudCrowd
7
7
 
8
8
  has_many :work_units
9
9
 
10
- validates_presence_of :host, :ip_address, :port
10
+ validates_presence_of :host, :ip_address, :port, :enabled_actions
11
11
 
12
- before_destroy :clear_work_units
12
+ after_destroy :redistribute_work_units
13
13
 
14
14
  # Available Nodes haven't used up their maxiumum number of workers yet.
15
15
  named_scope :available, {
@@ -23,6 +23,7 @@ module CloudCrowd
23
23
  attrs = {
24
24
  :ip_address => request.ip,
25
25
  :port => params[:port],
26
+ :busy => params[:busy],
26
27
  :max_workers => params[:max_workers],
27
28
  :enabled_actions => params[:enabled_actions]
28
29
  }
@@ -32,12 +33,17 @@ module CloudCrowd
32
33
  # Dispatch a WorkUnit to this node. Places the node at back at the end of
33
34
  # the rotation. If we fail to send the WorkUnit, we consider the node to be
34
35
  # down, and remove this record, freeing up all of its checked-out work units.
36
+ # If the Node responds that it's overloaded, we mark it as busy. Returns
37
+ # true if the WorkUnit was dispatched successfully.
35
38
  def send_work_unit(unit)
36
39
  result = node['/work'].post(:work_unit => unit.to_json)
37
40
  unit.assign_to(self, JSON.parse(result)['pid'])
38
- touch
39
- rescue Errno::ECONNREFUSED
40
- self.destroy # Couldn't post to node, assume it's gone away.
41
+ touch && true
42
+ rescue Errno::ECONNREFUSED # Couldn't post to node, assume it's gone away.
43
+ destroy && false
44
+ rescue RestClient::RequestFailed => e
45
+ raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
46
+ update_attribute(:busy, true) && false
41
47
  end
42
48
 
43
49
  # What Actions is this Node able to run?
@@ -45,9 +51,10 @@ module CloudCrowd
45
51
  enabled_actions.split(',')
46
52
  end
47
53
 
48
- # Is this Node too busy for more work? (Determined by number of workers.)
54
+ # Is this Node too busy for more work? Determined by number of workers, or
55
+ # the Node's load average, as configured in config.yml.
49
56
  def busy?
50
- max_workers && work_units.count >= max_workers
57
+ busy || (max_workers && work_units.count >= max_workers)
51
58
  end
52
59
 
53
60
  # The URL at which this Node may be reached.
@@ -72,6 +79,11 @@ module CloudCrowd
72
79
  work_units.all(:select => 'worker_pid').map(&:worker_pid)
73
80
  end
74
81
 
82
+ # Release all of this Node's WorkUnits for other nodes to take.
83
+ def release_work_units
84
+ WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
85
+ end
86
+
75
87
  # The JSON representation of a NodeRecord includes its worker_pids.
76
88
  def to_json(opts={})
77
89
  { 'host' => host,
@@ -83,11 +95,10 @@ module CloudCrowd
83
95
 
84
96
  private
85
97
 
86
- # When a Node shuts down, we free up all of the WorkUnits that it had
87
- # reserved, and they become available for others to pick up. Redistribute
88
- # the WorkUnits in a separate thread to avoid delaying Node shutdown.
89
- def clear_work_units
90
- WorkUnit.update_all('node_record_id = null, worker_pid = null', "node_record_id = #{id}")
98
+ # When a Node exits, release its WorkUnits and redistribute them to others.
99
+ # Redistribute in a separate thread to avoid delaying shutdown.
100
+ def redistribute_work_units
101
+ release_work_units
91
102
  Thread.new { WorkUnit.distribute_to_nodes }
92
103
  end
93
104
 
@@ -28,7 +28,7 @@ module CloudCrowd
28
28
  until work_units.empty? do
29
29
  node = available_nodes.shift
30
30
  unit = work_units.first
31
- break unless node
31
+ break unless node && unit
32
32
  next unless node.actions.include? unit.action
33
33
  sent = node.send_work_unit(unit)
34
34
  if sent
@@ -51,6 +51,12 @@ module CloudCrowd
51
51
  WorkUnit.reserved.update_all('reservation = null')
52
52
  end
53
53
 
54
+ # Cancels all outstanding WorkUnit reservations for all processes. (Useful
55
+ # in the console for debugging.)
56
+ def self.cancel_all_reservations
57
+ WorkUnit.update_all('reservation = null')
58
+ end
59
+
54
60
  # Look up a WorkUnit by the worker that's currently processing it. Specified
55
61
  # by <tt>pid@host</tt>.
56
62
  def self.find_by_worker_name(name)
@@ -74,7 +80,7 @@ module CloudCrowd
74
80
  WorkUnit.start(job, action, new_input, PROCESSING)
75
81
  end
76
82
  self.destroy
77
- job.set_next_status if job.done_splitting?
83
+ job.set_next_status if job && job.done_splitting?
78
84
  else
79
85
  update_attributes({
80
86
  :status => SUCCEEDED,
@@ -84,7 +90,7 @@ module CloudCrowd
84
90
  :output => result,
85
91
  :time => time_taken
86
92
  })
87
- job.check_for_completion
93
+ job && job.check_for_completion
88
94
  end
89
95
  end
90
96
 
@@ -10,9 +10,24 @@ module CloudCrowd
10
10
 
11
11
  # A Node's default port. You only run a single node per machine, so they
12
12
  # can all use the same port without any problems.
13
- DEFAULT_PORT = 9063
13
+ DEFAULT_PORT = 9063
14
14
 
15
- attr_reader :server, :asset_store
15
+ # A list of regex scrapers, which let us extract the one-minute load
16
+ # average and the amount of free memory on different flavors of UNIX.
17
+
18
+ SCRAPE_UPTIME = /\d+\.\d+/
19
+ SCRAPE_LINUX_MEMORY = /MemFree:\s+(\d+) kB/
20
+ SCRAPE_MAC_MEMORY = /Pages free:\s+(\d+)./
21
+ SCRAPE_MAC_PAGE = /page size of (\d+) bytes/
22
+
23
+ # The interval at which the node monitors the machine's load and memory use
24
+ # (if configured to do so in config.yml).
25
+ MONITOR_INTERVAL = 3
26
+
27
+ # The response sent back when this node is overloaded.
28
+ OVERLOADED_MESSAGE = 'Node Overloaded'
29
+
30
+ attr_reader :asset_store, :enabled_actions, :host, :port, :server
16
31
 
17
32
  set :root, ROOT
18
33
  set :authorization_realm, "CloudCrowd"
@@ -35,14 +50,15 @@ module CloudCrowd
35
50
  end
36
51
 
37
52
  # Posts a WorkUnit to this Node. Forks a Worker and returns the process id.
53
+ # Returns a 503 if this Node is overloaded.
38
54
  post '/work' do
39
- pid = fork { Worker.new(self, JSON.parse(params[:work_unit])) }
55
+ throw :halt, [503, OVERLOADED_MESSAGE] if @overloaded
56
+ pid = fork { Worker.new(self, JSON.parse(params[:work_unit])).run }
40
57
  Process.detach(pid)
41
58
  json :pid => pid
42
59
  end
43
60
 
44
- # Creating a Node registers with the central server and starts listening for
45
- # incoming WorkUnits.
61
+ # When creating a node, specify the port it should run on.
46
62
  def initialize(port=DEFAULT_PORT)
47
63
  require 'json'
48
64
  @server = CloudCrowd.central_server
@@ -50,25 +66,35 @@ module CloudCrowd
50
66
  @enabled_actions = CloudCrowd.actions.keys
51
67
  @asset_store = AssetStore.new
52
68
  @port = port || DEFAULT_PORT
53
-
69
+ @overloaded = false
70
+ @max_load = CloudCrowd.config[:max_load]
71
+ @min_memory = CloudCrowd.config[:min_free_memory]
72
+ start unless test?
73
+ end
74
+
75
+ # Starting up a Node registers with the central server and begins to listen
76
+ # for incoming WorkUnits.
77
+ def start
54
78
  trap_signals
55
79
  start_server
56
- check_in
80
+ monitor_system if @max_load || @min_memory
81
+ check_in(true)
57
82
  @server_thread.join
58
83
  end
59
84
 
60
85
  # Checking in with the central server informs it of the location and
61
86
  # configuration of this Node. If it can't check-in, there's no point in
62
87
  # starting.
63
- def check_in
88
+ def check_in(critical=false)
64
89
  @server["/node/#{@host}"].put(
65
90
  :port => @port,
91
+ :busy => @overloaded,
66
92
  :max_workers => CloudCrowd.config[:max_workers],
67
93
  :enabled_actions => @enabled_actions.join(',')
68
94
  )
69
95
  rescue Errno::ECONNREFUSED
70
- puts "Failed to connect to the central server (#{@server.to_s}), exiting..."
71
- raise SystemExit
96
+ puts "Failed to connect to the central server (#{@server.to_s})."
97
+ raise SystemExit if critical
72
98
  end
73
99
 
74
100
  # Before exiting, the Node checks out with the central server, releasing all
@@ -77,6 +103,33 @@ module CloudCrowd
77
103
  @server["/node/#{@host}"].delete
78
104
  end
79
105
 
106
+ # Is the node overloaded? If configured, checks if the load average is
107
+ # greater than 'max_load', or if the available RAM is less than
108
+ # 'min_free_memory'.
109
+ def overloaded?
110
+ (@max_load && load_average > @max_load) ||
111
+ (@min_memory && free_memory < @min_memory)
112
+ end
113
+
114
+ # The current one-minute load average.
115
+ def load_average
116
+ `uptime`.match(SCRAPE_UPTIME).to_s.to_f
117
+ end
118
+
119
+ # The current amount of free memory in megabytes.
120
+ def free_memory
121
+ case RUBY_PLATFORM
122
+ when /darwin/
123
+ stats = `vm_stat`
124
+ @mac_page_size ||= stats.match(SCRAPE_MAC_PAGE)[1].to_f / 1048576.0
125
+ stats.match(SCRAPE_MAC_MEMORY)[1].to_f * @mac_page_size
126
+ when /linux/
127
+ `cat /proc/meminfo`.match(SCRAPE_LINUX_MEMORY)[1].to_f / 1024.0
128
+ else
129
+ raise NotImplementedError, "'min_free_memory' is not yet implemented on your platform"
130
+ end
131
+ end
132
+
80
133
 
81
134
  private
82
135
 
@@ -87,6 +140,20 @@ module CloudCrowd
87
140
  end
88
141
  end
89
142
 
143
+ # Launch a monitoring thread that periodically checks the node's load
144
+ # average and the amount of free memory remaining. If we transition out of
145
+ # the overloaded state, let central know.
146
+ def monitor_system
147
+ @monitor_thread = Thread.new do
148
+ loop do
149
+ was_overloaded = @overloaded
150
+ @overloaded = overloaded?
151
+ check_in if was_overloaded && !@overloaded
152
+ sleep MONITOR_INTERVAL
153
+ end
154
+ end
155
+ end
156
+
90
157
  # Trap exit signals in order to shut down cleanly.
91
158
  def trap_signals
92
159
  Signal.trap('INT') { shut_down }
@@ -96,7 +163,9 @@ module CloudCrowd
96
163
 
97
164
  # At shut down, de-register with the central server before exiting.
98
165
  def shut_down
166
+ @monitor_thread.kill if @monitor_thread
99
167
  check_out
168
+ @server_thread.kill
100
169
  Process.exit
101
170
  end
102
171
 
@@ -15,10 +15,11 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
15
15
  end
16
16
 
17
17
  create_table "node_records", :force => true do |t|
18
- t.string "host", :null => false
19
- t.string "ip_address", :null => false
20
- t.integer "port", :null => false
21
- t.string "enabled_actions", :default => '', :null => false
18
+ t.string "host", :null => false
19
+ t.string "ip_address", :null => false
20
+ t.integer "port", :null => false
21
+ t.string "enabled_actions", :default => '', :null => false
22
+ t.boolean "busy", :default => false, :null => false
22
23
  t.integer "max_workers"
23
24
  t.datetime "created_at"
24
25
  t.datetime "updated_at"
@@ -14,15 +14,16 @@ module CloudCrowd
14
14
  # Wait five seconds to retry, after internal communcication errors.
15
15
  RETRY_WAIT = 5
16
16
 
17
- attr_reader :action
17
+ attr_reader :pid, :node, :unit, :status
18
18
 
19
- # A new Worker begins processing its WorkUnit straight off.
20
- def initialize(node, work_unit)
21
- @pid = $$
22
- @node = node
23
- trap_signals
24
- setup_work_unit(work_unit)
25
- run
19
+ # A new Worker customizes itself to its WorkUnit at instantiation.
20
+ def initialize(node, unit)
21
+ @start_time = Time.now
22
+ @pid = $$
23
+ @node = node
24
+ @unit = unit
25
+ @status = @unit['status']
26
+ @retry_wait = RETRY_WAIT
26
27
  end
27
28
 
28
29
  # Return output to the central server, marking the WorkUnit done.
@@ -49,18 +50,20 @@ module CloudCrowd
49
50
  def keep_trying_to(title)
50
51
  begin
51
52
  yield
53
+ rescue RestClient::ResourceNotFound => e
54
+ log "work unit ##{@unit['id']} doesn't exist. discarding..."
52
55
  rescue Exception => e
53
- log "failed to #{title} -- retry in #{RETRY_WAIT} seconds"
56
+ log "failed to #{title} -- retry in #{@retry_wait} seconds"
54
57
  log e.message
55
58
  log e.backtrace
56
- sleep RETRY_WAIT
59
+ sleep @retry_wait
57
60
  retry
58
61
  end
59
62
  end
60
63
 
61
64
  # Loggable details describing what the Worker is up to.
62
65
  def display_work_unit
63
- "unit ##{@options['work_unit_id']} (#{@action_name}/#{CloudCrowd.display_status(@status)})"
66
+ "unit ##{@unit['id']} (#{@unit['action']}/#{CloudCrowd.display_status(@status)})"
64
67
  end
65
68
 
66
69
  # Executes the WorkUnit by running the Action, catching all exceptions as
@@ -70,12 +73,13 @@ module CloudCrowd
70
73
  @worker_thread = Thread.new do
71
74
  begin
72
75
  result = nil
73
- @action = CloudCrowd.actions[@action_name].new(@status, @input, @options, @node.asset_store)
74
- Dir.chdir(@action.work_directory) do
76
+ action_class = CloudCrowd.actions[@unit['action']]
77
+ action = action_class.new(@status, @unit['input'], enhanced_unit_options, @node.asset_store)
78
+ Dir.chdir(action.work_directory) do
75
79
  result = case @status
76
- when PROCESSING then @action.process
77
- when SPLITTING then @action.split
78
- when MERGING then @action.merge
80
+ when PROCESSING then action.process
81
+ when SPLITTING then action.split
82
+ when MERGING then action.merge
79
83
  else raise Error::StatusUnspecified, "work units must specify their status"
80
84
  end
81
85
  end
@@ -83,7 +87,7 @@ module CloudCrowd
83
87
  rescue Exception => e
84
88
  fail_work_unit(e)
85
89
  ensure
86
- @action.cleanup_work_directory
90
+ action.cleanup_work_directory if action
87
91
  end
88
92
  end
89
93
  @worker_thread.join
@@ -91,9 +95,26 @@ module CloudCrowd
91
95
 
92
96
  # Wraps run_work_unit to benchmark the execution time, if requested.
93
97
  def run
94
- return run_work_unit unless @options['benchmark']
95
- status = CloudCrowd.display_status(@status)
96
- log("ran #{@action_name}/#{status} in " + Benchmark.measure { run_work_unit }.to_s)
98
+ trap_signals
99
+ log "starting #{display_work_unit}"
100
+ return run_work_unit unless @unit['options']['benchmark']
101
+ log("ran #{display_work_unit} in " + Benchmark.measure { run_work_unit }.to_s)
102
+ end
103
+
104
+ # There are some potentially important attributes of the WorkUnit that we'd
105
+ # like to pass into the Action -- in case it needs to know them. They will
106
+ # always be made available in the options hash.
107
+ def enhanced_unit_options
108
+ @unit['options'].merge({
109
+ 'job_id' => @unit['job_id'],
110
+ 'work_unit_id' => @unit['id'],
111
+ 'attempts' => @unit['attempts']
112
+ })
113
+ end
114
+
115
+ # How long has this worker been running for?
116
+ def time_taken
117
+ Time.now - @start_time
97
118
  end
98
119
 
99
120
 
@@ -103,20 +124,8 @@ module CloudCrowd
103
124
  # regardless of success or failure.
104
125
  def base_params
105
126
  { :pid => @pid,
106
- :id => @options['work_unit_id'],
107
- :time => Time.now - @start_time }
108
- end
109
-
110
- # Extract the Worker's instance variables from a WorkUnit's JSON.
111
- def setup_work_unit(unit)
112
- return false unless unit
113
- @start_time = Time.now
114
- @action_name, @input, @options, @status = unit['action'], unit['input'], unit['options'], unit['status']
115
- @options['job_id'] = unit['job_id']
116
- @options['work_unit_id'] = unit['id']
117
- @options['attempts'] ||= unit['attempts']
118
- log "fetched #{display_work_unit}"
119
- return true
127
+ :id => @unit['id'],
128
+ :time => time_taken }
120
129
  end
121
130
 
122
131
  # Log a message to the daemon log. Includes PID for identification.
@@ -4,6 +4,7 @@ require 'test_helper'
4
4
  class FailingWorkUnitsTest < Test::Unit::TestCase
5
5
 
6
6
  should "retry work units when they fail" do
7
+ WorkUnit.expects(:distribute_to_nodes).returns(true)
7
8
  browser = Rack::Test::Session.new(Rack::MockSession.new(CloudCrowd::Server))
8
9
 
9
10
  browser.post '/jobs', :job => {
@@ -0,0 +1,20 @@
1
+ require 'test_helper'
2
+
3
+ class NodeAcceptanceTest < Test::Unit::TestCase
4
+
5
+ include Rack::Test::Methods
6
+
7
+ def app
8
+ CloudCrowd::Node
9
+ end
10
+
11
+ context "The CloudCrowd::Node (Sinatra)" do
12
+
13
+ should "have a heartbeat" do
14
+ get '/heartbeat'
15
+ assert last_response.body == 'buh-bump'
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -46,6 +46,7 @@ class ServerTest < Test::Unit::TestCase
46
46
  end
47
47
 
48
48
  should "be able to create a job" do
49
+ WorkUnit.expects(:distribute_to_nodes).returns(true)
49
50
  post('/jobs', :job => '{"action":"graphics_magick","inputs":["http://www.google.com/"]}')
50
51
  assert last_response.ok?
51
52
  job_info = JSON.parse(last_response.body)
@@ -5,16 +5,13 @@ class WordCountTest < Test::Unit::TestCase
5
5
  context "the word_count action" do
6
6
 
7
7
  setup do
8
+ WorkUnit.expects(:distribute_to_nodes).returns(true)
8
9
  @asset_store = AssetStore.new
9
10
  @browser = Rack::Test::Session.new(Rack::MockSession.new(CloudCrowd::Server))
10
11
  @browser.put('/worker', :name => 'test_worker', :thread_status => 'sleeping')
11
12
  post_job_to_count_words_in_this_file
12
13
  @job_id = JSON.parse(@browser.last_response.body)['id']
13
14
  end
14
-
15
- teardown do
16
- CloudCrowd::Job.destroy_all
17
- end
18
15
 
19
16
  should "be able to create a word_count job" do
20
17
  assert @browser.last_response.ok?
@@ -26,7 +23,7 @@ class WordCountTest < Test::Unit::TestCase
26
23
  should "be able to perform the processing stage of a word_count" do
27
24
  action = CloudCrowd.actions['word_count'].new(1, "file://#{File.expand_path(__FILE__)}", {}, @asset_store)
28
25
  count = action.process
29
- assert count == 104
26
+ assert count == 101
30
27
  end
31
28
 
32
29
  end
data/test/blueprints.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  Sham.url { Faker::Internet.domain_name + "/" + Faker::Internet.domain_word + ".jpg" }
2
+ Sham.host { Faker::Internet.domain_name + '.local' }
2
3
 
3
4
  CloudCrowd::Job.blueprint do
4
5
  status { CloudCrowd::PROCESSING }
@@ -8,9 +9,17 @@ CloudCrowd::Job.blueprint do
8
9
  email { 'noone@example.com' }
9
10
  end
10
11
 
12
+ CloudCrowd::NodeRecord.blueprint do
13
+ host
14
+ ip_address { '127.0.0.1' }
15
+ port { 6093 }
16
+ enabled_actions { 'graphics_magick,word_count' }
17
+ max_workers { 3 }
18
+ end
19
+
11
20
  CloudCrowd::WorkUnit.blueprint do
12
- job { CloudCrowd::Job.make }
13
- status { CloudCrowd::PROCESSING }
14
- input { Sham.url }
15
- action { 'graphics_magick' }
21
+ job { CloudCrowd::Job.make }
22
+ status { CloudCrowd::PROCESSING }
23
+ input { '{"key":"value"}' }
24
+ action { 'graphics_magick' }
16
25
  end
@@ -1,6 +1,3 @@
1
- :adapter: mysql
2
- :encoding: utf8
3
- :username: root
4
- :password:
5
- :socket: /tmp/mysql.sock
6
- :database: cloud_crowd_test
1
+ :adapter: sqlite3
2
+ :database: test/cloud_crowd_test.db
3
+ :timeout: 5000
@@ -0,0 +1,38 @@
1
+ require 'test_helper'
2
+
3
+ class NodeUnitTest < Test::Unit::TestCase
4
+
5
+ context "A Node" do
6
+
7
+ setup do
8
+ @node = Node.new(11011).instance_variable_get(:@app)
9
+ end
10
+
11
+ should "instantiate correctly" do
12
+ assert @node.server.to_s == "http://localhost:9173"
13
+ assert @node.port == 11011
14
+ assert @node.host == Socket.gethostname
15
+ assert @node.enabled_actions.length > 2
16
+ assert @node.asset_store.is_a? AssetStore::FilesystemStore
17
+ end
18
+
19
+ should "trap signals and launch a server at start" do
20
+ Signal.expects(:trap).times(3)
21
+ Thin::Server.expects(:start)
22
+ @node.expects(:check_in)
23
+ @node.start
24
+ end
25
+
26
+ should "be able to determine if the node is overloaded" do
27
+ assert !@node.overloaded?
28
+ @node.instance_variable_set :@max_load, 0.01
29
+ assert @node.overloaded?
30
+ @node.instance_variable_set :@max_load, nil
31
+ assert !@node.overloaded?
32
+ @node.instance_variable_set :@min_memory, 8000
33
+ assert @node.overloaded?
34
+ end
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,42 @@
1
+ require 'test_helper'
2
+
3
+ class NodeRecordTest < Test::Unit::TestCase
4
+
5
+ context "A NodeRecord" do
6
+
7
+ setup do
8
+ @node = CloudCrowd::NodeRecord.make
9
+ end
10
+
11
+ subject { @node }
12
+
13
+ should_have_many :work_units
14
+
15
+ should_validate_presence_of :host, :ip_address, :port, :enabled_actions
16
+
17
+ should "be available" do
18
+ assert NodeRecord.available.map(&:id).include? @node.id
19
+ end
20
+
21
+ should "know its enabled actions" do
22
+ assert @node.actions.include? 'graphics_magick'
23
+ assert @node.actions.include? 'word_count'
24
+ end
25
+
26
+ should "know if the node is busy" do
27
+ assert !@node.busy?
28
+ assert @node.display_status == 'available'
29
+ (@node.max_workers + 1).times { WorkUnit.make(:node_record => @node) }
30
+ assert @node.busy?
31
+ assert @node.display_status == 'busy'
32
+ @node.release_work_units
33
+ assert !@node.busy?
34
+ end
35
+
36
+ should "be reachable at a URL" do
37
+ assert !!URI.parse(@node.url)
38
+ end
39
+
40
+ end
41
+
42
+ end
@@ -0,0 +1,48 @@
1
+ require 'test_helper'
2
+
3
+ class WorkerTest < Test::Unit::TestCase
4
+
5
+ context "A CloudCrowd::Worker" do
6
+
7
+ setup do
8
+ @node = Node.new.instance_variable_get(:@app)
9
+ @unit = WorkUnit.make
10
+ @worker = Worker.new(@node, JSON.parse(@unit.to_json))
11
+ end
12
+
13
+ should "instantiate correctly" do
14
+ assert @worker.pid == $$
15
+ assert @worker.unit['id'] == @unit.id
16
+ assert @worker.status == @unit.status
17
+ assert @worker.node == @node
18
+ assert @worker.time_taken > 0
19
+ end
20
+
21
+ should "be able to retry operations that must succeed" do
22
+ @worker.instance_variable_set :@retry_wait, 0.01
23
+ @worker.expects(:log).at_least(3)
24
+ tries = 0
25
+ @worker.keep_trying_to("do something critical") do
26
+ tries += 1;
27
+ raise 'hell' unless tries > 3
28
+ assert "made it through"
29
+ end
30
+ end
31
+
32
+ should "be able to run an action and try to complete it" do
33
+ GraphicsMagick.any_instance.expects(:process).returns('the answer')
34
+ GraphicsMagick.any_instance.expects(:cleanup_work_directory)
35
+ @worker.expects(:complete_work_unit).with({'output' => 'the answer'}.to_json)
36
+ @worker.run_work_unit
37
+ end
38
+
39
+ should "enchance the options that an action receives with extra info" do
40
+ opts = @worker.enhanced_unit_options
41
+ assert opts['work_unit_id'] == @unit.id
42
+ assert opts['job_id'] == @unit.job.id
43
+ assert opts['attempts'] == @unit.attempts
44
+ end
45
+
46
+ end
47
+
48
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentcloud-cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-17 00:00:00 -07:00
12
+ date: 2009-09-18 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -182,8 +182,9 @@ files:
182
182
  - public/js/flot.js
183
183
  - public/js/jquery.js
184
184
  - README
185
- - test/acceptance/test_server.rb
185
+ - test/acceptance/test_node.rb
186
186
  - test/acceptance/test_failing_work_units.rb
187
+ - test/acceptance/test_server.rb
187
188
  - test/acceptance/test_word_count.rb
188
189
  - test/blueprints.rb
189
190
  - test/config/config.ru
@@ -193,7 +194,10 @@ files:
193
194
  - test/test_helper.rb
194
195
  - test/unit/test_action.rb
195
196
  - test/unit/test_configuration.rb
197
+ - test/unit/test_node.rb
198
+ - test/unit/test_node_record.rb
196
199
  - test/unit/test_job.rb
200
+ - test/unit/test_worker.rb
197
201
  - test/unit/test_work_unit.rb
198
202
  - views/operations_center.erb
199
203
  has_rdoc: true