cloud-crowd 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.2.3' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-09-23'
3
+ s.version = '0.2.4' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-09-28'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -44,7 +44,7 @@ module CloudCrowd
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
45
 
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.2.3'
47
+ VERSION = '0.2.4'
48
48
 
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
@@ -162,6 +162,9 @@ module CloudCrowd
162
162
  @actions[name] = Module.const_get(Inflector.camelize(name))
163
163
  end
164
164
  @actions
165
+ rescue NameError => e
166
+ adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
167
+ raise NameError.new(adjusted_message, e.name)
165
168
  end
166
169
  end
167
170
 
@@ -35,7 +35,7 @@ module CloudCrowd
35
35
  return queue_for_workers(outs) if merging?
36
36
  if complete?
37
37
  update_attributes(:outputs => outs, :time => time_taken)
38
- fire_callback if callback_url
38
+ Thread.new { fire_callback } if callback_url
39
39
  end
40
40
  self
41
41
  end
@@ -39,11 +39,12 @@ module CloudCrowd
39
39
  result = node['/work'].post(:work_unit => unit.to_json)
40
40
  unit.assign_to(self, JSON.parse(result)['pid'])
41
41
  touch && true
42
- rescue Errno::ECONNREFUSED # Couldn't post to node, assume it's gone away.
43
- destroy && false
44
42
  rescue RestClient::RequestFailed => e
45
43
  raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
46
44
  update_attribute(:busy, true) && false
45
+ rescue RestClient::Exception, Errno::ECONNREFUSED
46
+ # Couldn't post to node, assume it's gone away.
47
+ destroy && false
47
48
  end
48
49
 
49
50
  # What Actions is this Node able to run?
@@ -7,6 +7,10 @@ module CloudCrowd
7
7
  class WorkUnit < ActiveRecord::Base
8
8
  include ModelStatus
9
9
 
10
+ # We use a random number in (0...MAX_RESERVATION) to reserve work units.
11
+ # The size of the maximum signed integer in MySQL -- SQLite has no limit.
12
+ MAX_RESERVATION = 2147483647
13
+
10
14
  belongs_to :job
11
15
  belongs_to :node_record
12
16
 
@@ -15,7 +19,9 @@ module CloudCrowd
15
19
  # Available WorkUnits are waiting to be distributed to Nodes for processing.
16
20
  named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
17
21
  # Reserved WorkUnits have been marked for distribution by a central server process.
18
- named_scope :reserved, {:conditions => {:reservation => $$}, :order => 'updated_at asc'}
22
+ named_scope :reserved, lambda {|reservation_number|
23
+ {:conditions => {:reservation => reservation_number}, :order => 'updated_at asc'}
24
+ }
19
25
 
20
26
  # Attempt to send a list of WorkUnits to nodes with available capacity.
21
27
  # A single central server process stops the same WorkUnit from being
@@ -27,8 +33,8 @@ module CloudCrowd
27
33
  # from the availability list when they are successfully sent, and Nodes get
28
34
  # removed when they are busy or have the action in question disabled.
29
35
  def self.distribute_to_nodes
30
- return unless WorkUnit.reserve_available
31
- work_units = WorkUnit.reserved
36
+ return unless reservation_number = WorkUnit.reserve_available
37
+ work_units = WorkUnit.reserved(reservation_number)
32
38
  available_nodes = NodeRecord.available
33
39
  while node = available_nodes.shift and unit = work_units.shift do
34
40
  if node.actions.include? unit.action
@@ -40,18 +46,20 @@ module CloudCrowd
40
46
  work_units.push(unit)
41
47
  end
42
48
  ensure
43
- WorkUnit.cancel_reservations
49
+ WorkUnit.cancel_reservations(reservation_number)
44
50
  end
45
51
 
46
52
  # Reserves all available WorkUnits for this process. Returns false if there
47
53
  # were none available.
48
54
  def self.reserve_available
49
- WorkUnit.available.update_all("reservation = #{$$}") > 0
55
+ reservation_number = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
56
+ any = WorkUnit.available.update_all("reservation = #{reservation_number}") > 0
57
+ any && reservation_number
50
58
  end
51
59
 
52
60
  # Cancels all outstanding WorkUnit reservations for this process.
53
- def self.cancel_reservations
54
- WorkUnit.reserved.update_all('reservation = null')
61
+ def self.cancel_reservations(reservation_number)
62
+ WorkUnit.reserved(reservation_number).update_all('reservation = null')
55
63
  end
56
64
 
57
65
  # Cancels all outstanding WorkUnit reservations for all processes. (Useful
@@ -24,6 +24,9 @@ module CloudCrowd
24
24
  # (if configured to do so in config.yml).
25
25
  MONITOR_INTERVAL = 3
26
26
 
27
+ # The interval at which the node regularly checks in with central (5 min).
28
+ CHECK_IN_INTERVAL = 300
29
+
27
30
  # The response sent back when this node is overloaded.
28
31
  OVERLOADED_MESSAGE = 'Node Overloaded'
29
32
 
@@ -86,6 +89,7 @@ module CloudCrowd
86
89
  asset_store
87
90
  @server_thread = Thread.new { @server.start }
88
91
  check_in(true)
92
+ check_in_periodically
89
93
  monitor_system if @max_load || @min_memory
90
94
  @server_thread.join
91
95
  end
@@ -100,7 +104,7 @@ module CloudCrowd
100
104
  :max_workers => CloudCrowd.config[:max_workers],
101
105
  :enabled_actions => @enabled_actions.join(',')
102
106
  )
103
- rescue Errno::ECONNREFUSED
107
+ rescue RestClient::Exception, Errno::ECONNREFUSED
104
108
  puts "Failed to connect to the central server (#{@central.to_s})."
105
109
  raise SystemExit if critical
106
110
  end
@@ -160,6 +164,18 @@ module CloudCrowd
160
164
  end
161
165
  end
162
166
 
167
+ # If communication is interrupted for external reasons, the central server
168
+ # will assume that the node has gone down. Checking in will let central know
169
+ # it's still online.
170
+ def check_in_periodically
171
+ @check_in_thread = Thread.new do
172
+ loop do
173
+ sleep CHECK_IN_INTERVAL
174
+ check_in
175
+ end
176
+ end
177
+ end
178
+
163
179
  # Trap exit signals in order to shut down cleanly.
164
180
  def trap_signals
165
181
  Signal.trap('QUIT') { shut_down }
@@ -170,6 +186,7 @@ module CloudCrowd
170
186
 
171
187
  # At shut down, de-register with the central server before exiting.
172
188
  def shut_down
189
+ @check_in_thread.kill if @check_in_thread
173
190
  @monitor_thread.kill if @monitor_thread
174
191
  check_out
175
192
  @server_thread.kill if @server_thread
@@ -2,7 +2,7 @@
2
2
  # all of its retries.
3
3
  class FailureTesting < CloudCrowd::Action
4
4
 
5
- def run
5
+ def process
6
6
  if options['attempts'] + 1 >= CloudCrowd.config[:work_unit_retries]
7
7
  return 'made it!'
8
8
  else
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-23 00:00:00 -04:00
12
+ date: 2009-09-28 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency