cloud-crowd 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.2.3' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-09-23'
3
+ s.version = '0.2.4' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-09-28'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -44,7 +44,7 @@ module CloudCrowd
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
45
 
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.2.3'
47
+ VERSION = '0.2.4'
48
48
 
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
@@ -162,6 +162,9 @@ module CloudCrowd
162
162
  @actions[name] = Module.const_get(Inflector.camelize(name))
163
163
  end
164
164
  @actions
165
+ rescue NameError => e
166
+ adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
167
+ raise NameError.new(adjusted_message, e.name)
165
168
  end
166
169
  end
167
170
 
@@ -35,7 +35,7 @@ module CloudCrowd
35
35
  return queue_for_workers(outs) if merging?
36
36
  if complete?
37
37
  update_attributes(:outputs => outs, :time => time_taken)
38
- fire_callback if callback_url
38
+ Thread.new { fire_callback } if callback_url
39
39
  end
40
40
  self
41
41
  end
@@ -39,11 +39,12 @@ module CloudCrowd
39
39
  result = node['/work'].post(:work_unit => unit.to_json)
40
40
  unit.assign_to(self, JSON.parse(result)['pid'])
41
41
  touch && true
42
- rescue Errno::ECONNREFUSED # Couldn't post to node, assume it's gone away.
43
- destroy && false
44
42
  rescue RestClient::RequestFailed => e
45
43
  raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
46
44
  update_attribute(:busy, true) && false
45
+ rescue RestClient::Exception, Errno::ECONNREFUSED
46
+ # Couldn't post to node, assume it's gone away.
47
+ destroy && false
47
48
  end
48
49
 
49
50
  # What Actions is this Node able to run?
@@ -7,6 +7,10 @@ module CloudCrowd
7
7
  class WorkUnit < ActiveRecord::Base
8
8
  include ModelStatus
9
9
 
10
+ # We use a random number in (0...MAX_RESERVATION) to reserve work units.
11
+ # The size of the maximum signed integer in MySQL -- SQLite has no limit.
12
+ MAX_RESERVATION = 2147483647
13
+
10
14
  belongs_to :job
11
15
  belongs_to :node_record
12
16
 
@@ -15,7 +19,9 @@ module CloudCrowd
15
19
  # Available WorkUnits are waiting to be distributed to Nodes for processing.
16
20
  named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
17
21
  # Reserved WorkUnits have been marked for distribution by a central server process.
18
- named_scope :reserved, {:conditions => {:reservation => $$}, :order => 'updated_at asc'}
22
+ named_scope :reserved, lambda {|reservation_number|
23
+ {:conditions => {:reservation => reservation_number}, :order => 'updated_at asc'}
24
+ }
19
25
 
20
26
  # Attempt to send a list of WorkUnits to nodes with available capacity.
21
27
  # A single central server process stops the same WorkUnit from being
@@ -27,8 +33,8 @@ module CloudCrowd
27
33
  # from the availability list when they are successfully sent, and Nodes get
28
34
  # removed when they are busy or have the action in question disabled.
29
35
  def self.distribute_to_nodes
30
- return unless WorkUnit.reserve_available
31
- work_units = WorkUnit.reserved
36
+ return unless reservation_number = WorkUnit.reserve_available
37
+ work_units = WorkUnit.reserved(reservation_number)
32
38
  available_nodes = NodeRecord.available
33
39
  while node = available_nodes.shift and unit = work_units.shift do
34
40
  if node.actions.include? unit.action
@@ -40,18 +46,20 @@ module CloudCrowd
40
46
  work_units.push(unit)
41
47
  end
42
48
  ensure
43
- WorkUnit.cancel_reservations
49
+ WorkUnit.cancel_reservations(reservation_number)
44
50
  end
45
51
 
46
52
  # Reserves all available WorkUnits for this process. Returns false if there
47
53
  # were none available.
48
54
  def self.reserve_available
49
- WorkUnit.available.update_all("reservation = #{$$}") > 0
55
+ reservation_number = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
56
+ any = WorkUnit.available.update_all("reservation = #{reservation_number}") > 0
57
+ any && reservation_number
50
58
  end
51
59
 
52
60
  # Cancels all outstanding WorkUnit reservations for this process.
53
- def self.cancel_reservations
54
- WorkUnit.reserved.update_all('reservation = null')
61
+ def self.cancel_reservations(reservation_number)
62
+ WorkUnit.reserved(reservation_number).update_all('reservation = null')
55
63
  end
56
64
 
57
65
  # Cancels all outstanding WorkUnit reservations for all processes. (Useful
@@ -24,6 +24,9 @@ module CloudCrowd
24
24
  # (if configured to do so in config.yml).
25
25
  MONITOR_INTERVAL = 3
26
26
 
27
+ # The interval at which the node regularly checks in with central (5 min).
28
+ CHECK_IN_INTERVAL = 300
29
+
27
30
  # The response sent back when this node is overloaded.
28
31
  OVERLOADED_MESSAGE = 'Node Overloaded'
29
32
 
@@ -86,6 +89,7 @@ module CloudCrowd
86
89
  asset_store
87
90
  @server_thread = Thread.new { @server.start }
88
91
  check_in(true)
92
+ check_in_periodically
89
93
  monitor_system if @max_load || @min_memory
90
94
  @server_thread.join
91
95
  end
@@ -100,7 +104,7 @@ module CloudCrowd
100
104
  :max_workers => CloudCrowd.config[:max_workers],
101
105
  :enabled_actions => @enabled_actions.join(',')
102
106
  )
103
- rescue Errno::ECONNREFUSED
107
+ rescue RestClient::Exception, Errno::ECONNREFUSED
104
108
  puts "Failed to connect to the central server (#{@central.to_s})."
105
109
  raise SystemExit if critical
106
110
  end
@@ -160,6 +164,18 @@ module CloudCrowd
160
164
  end
161
165
  end
162
166
 
167
+ # If communication is interrupted for external reasons, the central server
168
+ # will assume that the node has gone down. Checking in will let central know
169
+ # it's still online.
170
+ def check_in_periodically
171
+ @check_in_thread = Thread.new do
172
+ loop do
173
+ sleep CHECK_IN_INTERVAL
174
+ check_in
175
+ end
176
+ end
177
+ end
178
+
163
179
  # Trap exit signals in order to shut down cleanly.
164
180
  def trap_signals
165
181
  Signal.trap('QUIT') { shut_down }
@@ -170,6 +186,7 @@ module CloudCrowd
170
186
 
171
187
  # At shut down, de-register with the central server before exiting.
172
188
  def shut_down
189
+ @check_in_thread.kill if @check_in_thread
173
190
  @monitor_thread.kill if @monitor_thread
174
191
  check_out
175
192
  @server_thread.kill if @server_thread
@@ -2,7 +2,7 @@
2
2
  # all of its retries.
3
3
  class FailureTesting < CloudCrowd::Action
4
4
 
5
- def run
5
+ def process
6
6
  if options['attempts'] + 1 >= CloudCrowd.config[:work_unit_retries]
7
7
  return 'made it!'
8
8
  else
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-23 00:00:00 -04:00
12
+ date: 2009-09-28 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency