cloud-crowd 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.5.2' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2010-08-03'
3
+ s.version = '0.6.0' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2011-03-04'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -45,7 +45,7 @@ module CloudCrowd
45
45
  autoload :WorkUnit, 'cloud_crowd/models'
46
46
 
47
47
  # Keep this version in sync with the gemspec.
48
- VERSION = '0.5.2'
48
+ VERSION = '0.6.0'
49
49
 
50
50
  # Increment the schema version when there's a backwards incompatible change.
51
51
  SCHEMA_VERSION = 4
@@ -29,7 +29,7 @@ module CloudCrowd
29
29
  def initialize(status, input, options, store)
30
30
  @input, @options, @store = input, options, store
31
31
  @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
32
- @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
32
+ @work_directory = File.expand_path(File.join(@store.temp_storage_path, local_storage_prefix))
33
33
  FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
34
34
  parse_input
35
35
  download_input
@@ -59,7 +59,7 @@ module CloudCrowd
59
59
  # Takes a local filesystem path, saves the file to S3, and returns the
60
60
  # public (or authenticated) url on S3 where the file can be accessed.
61
61
  def save(file_path)
62
- save_path = File.join(storage_prefix, File.basename(file_path))
62
+ save_path = File.join(remote_storage_prefix, File.basename(file_path))
63
63
  @store.save(file_path, save_path)
64
64
  end
65
65
 
@@ -89,14 +89,18 @@ module CloudCrowd
89
89
  File.basename(name, ext).gsub('.', '-') + ext
90
90
  end
91
91
 
92
- # The directory prefix to use for both local and S3 storage.
93
- # [action]/job_[job_id]/unit_[work_unit_it]
94
- def storage_prefix
95
- path_parts = []
96
- path_parts << Inflector.underscore(self.class)
97
- path_parts << "job_#{@job_id}"
98
- path_parts << "unit_#{@work_unit_id}" if @work_unit_id
99
- @storage_prefix ||= File.join(path_parts)
92
+ # The directory prefix to use for remote storage.
93
+ # [action]/job_[job_id]
94
+ def remote_storage_prefix
95
+ @remote_storage_prefix ||= Inflector.underscore(self.class) +
96
+ "/job_#{@job_id}" + (@work_unit_id ? "/unit_#{@work_unit_id}" : '')
97
+ end
98
+
99
+ # The directory prefix to use for local storage.
100
+ # [action]/unit_[work_unit_id]
101
+ def local_storage_prefix
102
+ @local_storage_prefix ||= Inflector.underscore(self.class) +
103
+ (@work_unit_id ? "/unit_#{@work_unit_id}" : '')
100
104
  end
101
105
 
102
106
  # If we think that the input is JSON, replace it with the parsed form.
@@ -39,13 +39,22 @@ module CloudCrowd
39
39
  # action in question disabled.
40
40
  def self.distribute_to_nodes
41
41
  reservation = nil
42
- filter = {}
43
42
  loop do
43
+
44
+ # Find the available nodes, and determine what actions we're capable
45
+ # of running at the moment.
46
+ available_nodes = NodeRecord.available
47
+ available_actions = available_nodes.map {|node| node.actions }.flatten.uniq
48
+ filter = "action in (#{available_actions.map{|a| "'#{a}'"}.join(',')})"
49
+
50
+ # Reserve a handful of available work units.
44
51
  WorkUnit.cancel_reservations(reservation) if reservation
45
52
  return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT, :conditions => filter)
46
53
  work_units = WorkUnit.reserved(reservation)
47
- available_nodes = NodeRecord.available
48
- while node = available_nodes.shift and unit = work_units.shift do
54
+
55
+ # Round robin through the nodes and units, sending the unit if the node
56
+ # is able to process it.
57
+ while (node = available_nodes.shift) && (unit = work_units.shift) do
49
58
  if node.actions.include?(unit.action)
50
59
  if node.send_work_unit(unit)
51
60
  available_nodes.push(node) unless node.busy?
@@ -56,10 +65,12 @@ module CloudCrowd
56
65
  end
57
66
  work_units.push(unit)
58
67
  end
59
- if work_units.any? && available_nodes.any?
60
- filter = {:action => available_nodes.map {|node| node.actions }.flatten.uniq }
61
- next
62
- end
68
+
69
+ # If there are both units and nodes left over, try again.
70
+ next if work_units.any? && available_nodes.any?
71
+
72
+ # If we still have units at this point, or we're fresh out of nodes,
73
+ # that means we're done.
63
74
  return if work_units.any? || available_nodes.empty?
64
75
  end
65
76
  ensure
@@ -41,8 +41,6 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
41
41
  t.datetime "updated_at"
42
42
  end
43
43
 
44
- # Here be indices. After looking, it seems faster not to have them at all.
45
- #
46
44
  add_index "jobs", ["status"], :name => "index_jobs_on_status"
47
45
  add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
48
46
  add_index "work_units", ["worker_pid"], :name => "index_work_units_on_worker_pid"
@@ -50,7 +50,7 @@ class ActionTest < Test::Unit::TestCase
50
50
  end
51
51
 
52
52
  should "be able to count the number of words in this file" do
53
- assert @action.process == 274
53
+ assert @action.process == 266
54
54
  end
55
55
 
56
56
  should "raise an exception when backticks fail" do
@@ -60,8 +60,8 @@ class ActionTest < Test::Unit::TestCase
60
60
 
61
61
  should "be able to download a remote file" do
62
62
  path = "temp.txt"
63
- @action.download('http://example.com', path)
64
- assert File.read(path).match(/These domain names are reserved for use in documentation/)
63
+ @action.download('http://www.w3.org', path)
64
+ assert File.read(path).match(/standards/i)
65
65
  FileUtils.rm path
66
66
  end
67
67
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- hash: 15
5
- prerelease: false
4
+ hash: 7
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
- - 5
9
- - 2
10
- version: 0.5.2
8
+ - 6
9
+ - 0
10
+ version: 0.6.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jeremy Ashkenas
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-03 00:00:00 -04:00
18
+ date: 2011-03-04 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -291,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
291
291
  requirements: []
292
292
 
293
293
  rubyforge_project: cloud-crowd
294
- rubygems_version: 1.3.7
294
+ rubygems_version: 1.4.2
295
295
  signing_key:
296
296
  specification_version: 3
297
297
  summary: Parallel Processing for the Rest of Us