cloud-crowd 0.5.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.5.2' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2010-08-03'
3
+ s.version = '0.6.0' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2011-03-04'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -45,7 +45,7 @@ module CloudCrowd
45
45
  autoload :WorkUnit, 'cloud_crowd/models'
46
46
 
47
47
  # Keep this version in sync with the gemspec.
48
- VERSION = '0.5.2'
48
+ VERSION = '0.6.0'
49
49
 
50
50
  # Increment the schema version when there's a backwards incompatible change.
51
51
  SCHEMA_VERSION = 4
@@ -29,7 +29,7 @@ module CloudCrowd
29
29
  def initialize(status, input, options, store)
30
30
  @input, @options, @store = input, options, store
31
31
  @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
32
- @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
32
+ @work_directory = File.expand_path(File.join(@store.temp_storage_path, local_storage_prefix))
33
33
  FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
34
34
  parse_input
35
35
  download_input
@@ -59,7 +59,7 @@ module CloudCrowd
59
59
  # Takes a local filesystem path, saves the file to S3, and returns the
60
60
  # public (or authenticated) url on S3 where the file can be accessed.
61
61
  def save(file_path)
62
- save_path = File.join(storage_prefix, File.basename(file_path))
62
+ save_path = File.join(remote_storage_prefix, File.basename(file_path))
63
63
  @store.save(file_path, save_path)
64
64
  end
65
65
 
@@ -89,14 +89,18 @@ module CloudCrowd
89
89
  File.basename(name, ext).gsub('.', '-') + ext
90
90
  end
91
91
 
92
- # The directory prefix to use for both local and S3 storage.
93
- # [action]/job_[job_id]/unit_[work_unit_it]
94
- def storage_prefix
95
- path_parts = []
96
- path_parts << Inflector.underscore(self.class)
97
- path_parts << "job_#{@job_id}"
98
- path_parts << "unit_#{@work_unit_id}" if @work_unit_id
99
- @storage_prefix ||= File.join(path_parts)
92
+ # The directory prefix to use for remote storage.
93
+ # [action]/job_[job_id]
94
+ def remote_storage_prefix
95
+ @remote_storage_prefix ||= Inflector.underscore(self.class) +
96
+ "/job_#{@job_id}" + (@work_unit_id ? "/unit_#{@work_unit_id}" : '')
97
+ end
98
+
99
+ # The directory prefix to use for local storage.
100
+ # [action]/unit_[work_unit_id]
101
+ def local_storage_prefix
102
+ @local_storage_prefix ||= Inflector.underscore(self.class) +
103
+ (@work_unit_id ? "/unit_#{@work_unit_id}" : '')
100
104
  end
101
105
 
102
106
  # If we think that the input is JSON, replace it with the parsed form.
@@ -39,13 +39,22 @@ module CloudCrowd
39
39
  # action in question disabled.
40
40
  def self.distribute_to_nodes
41
41
  reservation = nil
42
- filter = {}
43
42
  loop do
43
+
44
+ # Find the available nodes, and determine what actions we're capable
45
+ # of running at the moment.
46
+ available_nodes = NodeRecord.available
47
+ available_actions = available_nodes.map {|node| node.actions }.flatten.uniq
48
+ filter = "action in (#{available_actions.map{|a| "'#{a}'"}.join(',')})"
49
+
50
+ # Reserve a handful of available work units.
44
51
  WorkUnit.cancel_reservations(reservation) if reservation
45
52
  return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT, :conditions => filter)
46
53
  work_units = WorkUnit.reserved(reservation)
47
- available_nodes = NodeRecord.available
48
- while node = available_nodes.shift and unit = work_units.shift do
54
+
55
+ # Round robin through the nodes and units, sending the unit if the node
56
+ # is able to process it.
57
+ while (node = available_nodes.shift) && (unit = work_units.shift) do
49
58
  if node.actions.include?(unit.action)
50
59
  if node.send_work_unit(unit)
51
60
  available_nodes.push(node) unless node.busy?
@@ -56,10 +65,12 @@ module CloudCrowd
56
65
  end
57
66
  work_units.push(unit)
58
67
  end
59
- if work_units.any? && available_nodes.any?
60
- filter = {:action => available_nodes.map {|node| node.actions }.flatten.uniq }
61
- next
62
- end
68
+
69
+ # If there are both units and nodes left over, try again.
70
+ next if work_units.any? && available_nodes.any?
71
+
72
+ # If we still have units at this point, or we're fresh out of nodes,
73
+ # that means we're done.
63
74
  return if work_units.any? || available_nodes.empty?
64
75
  end
65
76
  ensure
@@ -41,8 +41,6 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
41
41
  t.datetime "updated_at"
42
42
  end
43
43
 
44
- # Here be indices. After looking, it seems faster not to have them at all.
45
- #
46
44
  add_index "jobs", ["status"], :name => "index_jobs_on_status"
47
45
  add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
48
46
  add_index "work_units", ["worker_pid"], :name => "index_work_units_on_worker_pid"
@@ -50,7 +50,7 @@ class ActionTest < Test::Unit::TestCase
50
50
  end
51
51
 
52
52
  should "be able to count the number of words in this file" do
53
- assert @action.process == 274
53
+ assert @action.process == 266
54
54
  end
55
55
 
56
56
  should "raise an exception when backticks fail" do
@@ -60,8 +60,8 @@ class ActionTest < Test::Unit::TestCase
60
60
 
61
61
  should "be able to download a remote file" do
62
62
  path = "temp.txt"
63
- @action.download('http://example.com', path)
64
- assert File.read(path).match(/These domain names are reserved for use in documentation/)
63
+ @action.download('http://www.w3.org', path)
64
+ assert File.read(path).match(/standards/i)
65
65
  FileUtils.rm path
66
66
  end
67
67
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- hash: 15
5
- prerelease: false
4
+ hash: 7
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
- - 5
9
- - 2
10
- version: 0.5.2
8
+ - 6
9
+ - 0
10
+ version: 0.6.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jeremy Ashkenas
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-03 00:00:00 -04:00
18
+ date: 2011-03-04 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -291,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
291
291
  requirements: []
292
292
 
293
293
  rubyforge_project: cloud-crowd
294
- rubygems_version: 1.3.7
294
+ rubygems_version: 1.4.2
295
295
  signing_key:
296
296
  specification_version: 3
297
297
  summary: Parallel Processing for the Rest of Us