cloud-crowd 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@ class ProcessPdfs < CloudCrowd::Action
21
21
  batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
22
22
  `tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
23
23
  end
24
- Dir["*.tar"].map {|tar| save(tar) }.to_json
24
+ Dir["*.tar"].map {|tar| save(tar) }
25
25
  end
26
26
 
27
27
  # Convert a pdf page into different-sized thumbnails. Grab the text.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.2.8' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-10-27'
3
+ s.version = '0.2.9' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-11-03'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -44,7 +44,7 @@ module CloudCrowd
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
45
 
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.2.8'
47
+ VERSION = '0.2.9'
48
48
 
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
@@ -87,6 +87,7 @@ module CloudCrowd
87
87
 
88
88
  class << self
89
89
  attr_reader :config
90
+ attr_accessor :identity
90
91
 
91
92
  # Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
92
93
  def configure(config_path)
@@ -171,6 +172,17 @@ module CloudCrowd
171
172
  default_actions + installed_actions + custom_actions
172
173
  end
173
174
 
175
+ # Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
176
+ # code from actions.
177
+ def server?
178
+ @identity == :server
179
+ end
180
+
181
+ # Or is it a node?
182
+ def node?
183
+ @identity == :node
184
+ end
185
+
174
186
  end
175
187
 
176
188
  end
@@ -31,7 +31,8 @@ module CloudCrowd
31
31
  @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
32
32
  @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
33
33
  FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
34
- status == MERGING ? parse_input : download_input
34
+ parse_input
35
+ download_input
35
36
  end
36
37
 
37
38
  # Each Action subclass must implement a +process+ method, overriding this.
@@ -98,9 +99,11 @@ module CloudCrowd
98
99
  @storage_prefix ||= File.join(path_parts)
99
100
  end
100
101
 
101
- # If we know that the input is JSON, replace it with the parsed form.
102
+ # If we think that the input is JSON, replace it with the parsed form.
103
+ # It would be great if the JSON module had an is_json? method.
102
104
  def parse_input
103
- @input = JSON.parse(@input)
105
+ return unless ['[', '{'].include? @input[0..0]
106
+ @input = JSON.parse(@input) rescue @input
104
107
  end
105
108
 
106
109
  def input_is_url?
@@ -23,8 +23,8 @@ module CloudCrowd
23
23
  # Available WorkUnits are waiting to be distributed to Nodes for processing.
24
24
  named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
25
25
  # Reserved WorkUnits have been marked for distribution by a central server process.
26
- named_scope :reserved, lambda {|reservation_number|
27
- {:conditions => {:reservation => reservation_number}, :order => 'updated_at asc'}
26
+ named_scope :reserved, lambda {|reservation|
27
+ {:conditions => {:reservation => reservation}, :order => 'updated_at asc'}
28
28
  }
29
29
 
30
30
  # Attempt to send a list of WorkUnits to nodes with available capacity.
@@ -38,9 +38,10 @@ module CloudCrowd
38
38
  # successfully sent, and Nodes get removed when they are busy or have the
39
39
  # action in question disabled.
40
40
  def self.distribute_to_nodes
41
- begin
42
- return unless reservation_number = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
43
- work_units = WorkUnit.reserved(reservation_number)
41
+ reservation = nil
42
+ loop do
43
+ return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
44
+ work_units = WorkUnit.reserved(reservation)
44
45
  available_nodes = NodeRecord.available
45
46
  while node = available_nodes.shift and unit = work_units.shift do
46
47
  if node.actions.include? unit.action
@@ -51,23 +52,23 @@ module CloudCrowd
51
52
  end
52
53
  work_units.push(unit)
53
54
  end
54
- retry if work_units.empty? && !available_nodes.empty?
55
- ensure
56
- WorkUnit.cancel_reservations(reservation_number) if reservation_number
55
+ return if work_units.any? || available_nodes.empty?
57
56
  end
57
+ ensure
58
+ WorkUnit.cancel_reservations(reservation) if reservation
58
59
  end
59
60
 
60
61
  # Reserves all available WorkUnits for this process. Returns false if there
61
62
  # were none available.
62
63
  def self.reserve_available(options={})
63
- reservation_number = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
64
- any = WorkUnit.available.update_all("reservation = #{reservation_number}", nil, options) > 0
65
- any && reservation_number
64
+ reservation = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
65
+ any = WorkUnit.available.update_all("reservation = #{reservation}", nil, options) > 0
66
+ any && reservation
66
67
  end
67
68
 
68
69
  # Cancels all outstanding WorkUnit reservations for this process.
69
- def self.cancel_reservations(reservation_number)
70
- WorkUnit.reserved(reservation_number).update_all('reservation = null')
70
+ def self.cancel_reservations(reservation)
71
+ WorkUnit.reserved(reservation).update_all('reservation = null')
71
72
  end
72
73
 
73
74
  # Cancels all outstanding WorkUnit reservations for all processes. (Useful
@@ -95,7 +96,8 @@ module CloudCrowd
95
96
  # without waiting for the rest of their splitting cousins to complete.
96
97
  def finish(result, time_taken)
97
98
  if splitting?
98
- [JSON.parse(parsed_output(result))].flatten.each do |new_input|
99
+ [parsed_output(result)].flatten.each do |new_input|
100
+ new_input = new_input.to_json unless new_input.is_a? String
99
101
  WorkUnit.start(job, action, new_input, PROCESSING)
100
102
  end
101
103
  self.destroy
@@ -65,6 +65,7 @@ module CloudCrowd
65
65
  # When creating a node, specify the port it should run on.
66
66
  def initialize(port=nil, daemon=false)
67
67
  require 'json'
68
+ CloudCrowd.identity = :node
68
69
  @central = CloudCrowd.central_server
69
70
  @host = Socket.gethostname
70
71
  @enabled_actions = CloudCrowd.actions.keys
@@ -112,6 +112,12 @@ module CloudCrowd
112
112
  json nil
113
113
  end
114
114
 
115
+ # At initialization record the identity of this Ruby instance as a server.
116
+ def initialize(*args)
117
+ super(*args)
118
+ CloudCrowd.identity = :server
119
+ end
120
+
115
121
  end
116
122
 
117
123
  end
@@ -15,6 +15,11 @@ class ServerTest < Test::Unit::TestCase
15
15
  2.times { Job.make }
16
16
  end
17
17
 
18
+ should "set the identity of the Ruby instance" do
19
+ app.new
20
+ assert CloudCrowd.server?
21
+ end
22
+
18
23
  should "be able to render the Operations Center (GET /)" do
19
24
  get '/'
20
25
  assert last_response.body.include? '<div id="nodes">'
@@ -29,18 +34,6 @@ class ServerTest < Test::Unit::TestCase
29
34
  assert resp['work_unit_count'] == 2
30
35
  end
31
36
 
32
- # should "be able to check in a worker daemon, and then check out a work unit" do
33
- # put '/worker', :name => '101@localhost', :thread_status => 'sleeping'
34
- # assert last_response.successful? && last_response.empty?
35
- # post '/work', :worker_name => '101@localhost', :worker_actions => 'graphics_magick'
36
- # checked_out = JSON.parse(last_response.body)
37
- # assert checked_out['action'] == 'graphics_magick'
38
- # assert checked_out['attempts'] == 0
39
- # assert checked_out['status'] == CloudCrowd::PROCESSING
40
- # status_check = JSON.parse(get('/worker/101@localhost').body)
41
- # assert checked_out == status_check
42
- # end
43
-
44
37
  should "have a heartbeat" do
45
38
  assert get('/heartbeat').body == 'buh-bump'
46
39
  end
@@ -8,6 +8,10 @@ class NodeUnitTest < Test::Unit::TestCase
8
8
  @node = Node.new(11011).instance_variable_get(:@app)
9
9
  end
10
10
 
11
+ should "set the identity of the Ruby instance" do
12
+ assert CloudCrowd.node?
13
+ end
14
+
11
15
  should "instantiate correctly" do
12
16
  assert @node.central.to_s == "http://localhost:9173"
13
17
  assert @node.port == 11011
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-27 00:00:00 -04:00
12
+ date: 2009-11-03 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency