cloud-crowd 0.2.8 → 0.2.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,7 +21,7 @@ class ProcessPdfs < CloudCrowd::Action
21
21
  batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
22
22
  `tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
23
23
  end
24
- Dir["*.tar"].map {|tar| save(tar) }.to_json
24
+ Dir["*.tar"].map {|tar| save(tar) }
25
25
  end
26
26
 
27
27
  # Convert a pdf page into different-sized thumbnails. Grab the text.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.2.8' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-10-27'
3
+ s.version = '0.2.9' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-11-03'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -44,7 +44,7 @@ module CloudCrowd
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
45
 
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.2.8'
47
+ VERSION = '0.2.9'
48
48
 
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
@@ -87,6 +87,7 @@ module CloudCrowd
87
87
 
88
88
  class << self
89
89
  attr_reader :config
90
+ attr_accessor :identity
90
91
 
91
92
  # Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
92
93
  def configure(config_path)
@@ -171,6 +172,17 @@ module CloudCrowd
171
172
  default_actions + installed_actions + custom_actions
172
173
  end
173
174
 
175
+ # Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
176
+ # code from actions.
177
+ def server?
178
+ @identity == :server
179
+ end
180
+
181
+ # Or is it a node?
182
+ def node?
183
+ @identity == :node
184
+ end
185
+
174
186
  end
175
187
 
176
188
  end
@@ -31,7 +31,8 @@ module CloudCrowd
31
31
  @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
32
32
  @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
33
33
  FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
34
- status == MERGING ? parse_input : download_input
34
+ parse_input
35
+ download_input
35
36
  end
36
37
 
37
38
  # Each Action subclass must implement a +process+ method, overriding this.
@@ -98,9 +99,11 @@ module CloudCrowd
98
99
  @storage_prefix ||= File.join(path_parts)
99
100
  end
100
101
 
101
- # If we know that the input is JSON, replace it with the parsed form.
102
+ # If we think that the input is JSON, replace it with the parsed form.
103
+ # It would be great if the JSON module had an is_json? method.
102
104
  def parse_input
103
- @input = JSON.parse(@input)
105
+ return unless ['[', '{'].include? @input[0..0]
106
+ @input = JSON.parse(@input) rescue @input
104
107
  end
105
108
 
106
109
  def input_is_url?
@@ -23,8 +23,8 @@ module CloudCrowd
23
23
  # Available WorkUnits are waiting to be distributed to Nodes for processing.
24
24
  named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
25
25
  # Reserved WorkUnits have been marked for distribution by a central server process.
26
- named_scope :reserved, lambda {|reservation_number|
27
- {:conditions => {:reservation => reservation_number}, :order => 'updated_at asc'}
26
+ named_scope :reserved, lambda {|reservation|
27
+ {:conditions => {:reservation => reservation}, :order => 'updated_at asc'}
28
28
  }
29
29
 
30
30
  # Attempt to send a list of WorkUnits to nodes with available capacity.
@@ -38,9 +38,10 @@ module CloudCrowd
38
38
  # successfully sent, and Nodes get removed when they are busy or have the
39
39
  # action in question disabled.
40
40
  def self.distribute_to_nodes
41
- begin
42
- return unless reservation_number = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
43
- work_units = WorkUnit.reserved(reservation_number)
41
+ reservation = nil
42
+ loop do
43
+ return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
44
+ work_units = WorkUnit.reserved(reservation)
44
45
  available_nodes = NodeRecord.available
45
46
  while node = available_nodes.shift and unit = work_units.shift do
46
47
  if node.actions.include? unit.action
@@ -51,23 +52,23 @@ module CloudCrowd
51
52
  end
52
53
  work_units.push(unit)
53
54
  end
54
- retry if work_units.empty? && !available_nodes.empty?
55
- ensure
56
- WorkUnit.cancel_reservations(reservation_number) if reservation_number
55
+ return if work_units.any? || available_nodes.empty?
57
56
  end
57
+ ensure
58
+ WorkUnit.cancel_reservations(reservation) if reservation
58
59
  end
59
60
 
60
61
  # Reserves all available WorkUnits for this process. Returns false if there
61
62
  # were none available.
62
63
  def self.reserve_available(options={})
63
- reservation_number = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
64
- any = WorkUnit.available.update_all("reservation = #{reservation_number}", nil, options) > 0
65
- any && reservation_number
64
+ reservation = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
65
+ any = WorkUnit.available.update_all("reservation = #{reservation}", nil, options) > 0
66
+ any && reservation
66
67
  end
67
68
 
68
69
  # Cancels all outstanding WorkUnit reservations for this process.
69
- def self.cancel_reservations(reservation_number)
70
- WorkUnit.reserved(reservation_number).update_all('reservation = null')
70
+ def self.cancel_reservations(reservation)
71
+ WorkUnit.reserved(reservation).update_all('reservation = null')
71
72
  end
72
73
 
73
74
  # Cancels all outstanding WorkUnit reservations for all processes. (Useful
@@ -95,7 +96,8 @@ module CloudCrowd
95
96
  # without waiting for the rest of their splitting cousins to complete.
96
97
  def finish(result, time_taken)
97
98
  if splitting?
98
- [JSON.parse(parsed_output(result))].flatten.each do |new_input|
99
+ [parsed_output(result)].flatten.each do |new_input|
100
+ new_input = new_input.to_json unless new_input.is_a? String
99
101
  WorkUnit.start(job, action, new_input, PROCESSING)
100
102
  end
101
103
  self.destroy
@@ -65,6 +65,7 @@ module CloudCrowd
65
65
  # When creating a node, specify the port it should run on.
66
66
  def initialize(port=nil, daemon=false)
67
67
  require 'json'
68
+ CloudCrowd.identity = :node
68
69
  @central = CloudCrowd.central_server
69
70
  @host = Socket.gethostname
70
71
  @enabled_actions = CloudCrowd.actions.keys
@@ -112,6 +112,12 @@ module CloudCrowd
112
112
  json nil
113
113
  end
114
114
 
115
+ # At initialization record the identity of this Ruby instance as a server.
116
+ def initialize(*args)
117
+ super(*args)
118
+ CloudCrowd.identity = :server
119
+ end
120
+
115
121
  end
116
122
 
117
123
  end
@@ -15,6 +15,11 @@ class ServerTest < Test::Unit::TestCase
15
15
  2.times { Job.make }
16
16
  end
17
17
 
18
+ should "set the identity of the Ruby instance" do
19
+ app.new
20
+ assert CloudCrowd.server?
21
+ end
22
+
18
23
  should "be able to render the Operations Center (GET /)" do
19
24
  get '/'
20
25
  assert last_response.body.include? '<div id="nodes">'
@@ -29,18 +34,6 @@ class ServerTest < Test::Unit::TestCase
29
34
  assert resp['work_unit_count'] == 2
30
35
  end
31
36
 
32
- # should "be able to check in a worker daemon, and then check out a work unit" do
33
- # put '/worker', :name => '101@localhost', :thread_status => 'sleeping'
34
- # assert last_response.successful? && last_response.empty?
35
- # post '/work', :worker_name => '101@localhost', :worker_actions => 'graphics_magick'
36
- # checked_out = JSON.parse(last_response.body)
37
- # assert checked_out['action'] == 'graphics_magick'
38
- # assert checked_out['attempts'] == 0
39
- # assert checked_out['status'] == CloudCrowd::PROCESSING
40
- # status_check = JSON.parse(get('/worker/101@localhost').body)
41
- # assert checked_out == status_check
42
- # end
43
-
44
37
  should "have a heartbeat" do
45
38
  assert get('/heartbeat').body == 'buh-bump'
46
39
  end
@@ -8,6 +8,10 @@ class NodeUnitTest < Test::Unit::TestCase
8
8
  @node = Node.new(11011).instance_variable_get(:@app)
9
9
  end
10
10
 
11
+ should "set the identity of the Ruby instance" do
12
+ assert CloudCrowd.node?
13
+ end
14
+
11
15
  should "instantiate correctly" do
12
16
  assert @node.central.to_s == "http://localhost:9173"
13
17
  assert @node.port == 11011
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-27 00:00:00 -04:00
12
+ date: 2009-11-03 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency