cloud-crowd 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/cloud-crowd.gemspec +2 -2
- data/lib/cloud-crowd.rb +1 -1
- data/lib/cloud_crowd/action.rb +14 -10
- data/lib/cloud_crowd/models/work_unit.rb +18 -7
- data/lib/cloud_crowd/schema.rb +0 -2
- data/test/unit/test_action.rb +3 -3
- metadata +7 -7
data/cloud-crowd.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.
|
4
|
-
s.date = '
|
3
|
+
s.version = '0.6.0' # Keep version in sync with cloud-cloud.rb
|
4
|
+
s.date = '2011-03-04'
|
5
5
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
7
7
|
s.summary = "Parallel Processing for the Rest of Us"
|
data/lib/cloud-crowd.rb
CHANGED
@@ -45,7 +45,7 @@ module CloudCrowd
|
|
45
45
|
autoload :WorkUnit, 'cloud_crowd/models'
|
46
46
|
|
47
47
|
# Keep this version in sync with the gemspec.
|
48
|
-
VERSION = '0.
|
48
|
+
VERSION = '0.6.0'
|
49
49
|
|
50
50
|
# Increment the schema version when there's a backwards incompatible change.
|
51
51
|
SCHEMA_VERSION = 4
|
data/lib/cloud_crowd/action.rb
CHANGED
@@ -29,7 +29,7 @@ module CloudCrowd
|
|
29
29
|
def initialize(status, input, options, store)
|
30
30
|
@input, @options, @store = input, options, store
|
31
31
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
32
|
-
@work_directory = File.expand_path(File.join(@store.temp_storage_path,
|
32
|
+
@work_directory = File.expand_path(File.join(@store.temp_storage_path, local_storage_prefix))
|
33
33
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
34
34
|
parse_input
|
35
35
|
download_input
|
@@ -59,7 +59,7 @@ module CloudCrowd
|
|
59
59
|
# Takes a local filesystem path, saves the file to S3, and returns the
|
60
60
|
# public (or authenticated) url on S3 where the file can be accessed.
|
61
61
|
def save(file_path)
|
62
|
-
save_path = File.join(
|
62
|
+
save_path = File.join(remote_storage_prefix, File.basename(file_path))
|
63
63
|
@store.save(file_path, save_path)
|
64
64
|
end
|
65
65
|
|
@@ -89,14 +89,18 @@ module CloudCrowd
|
|
89
89
|
File.basename(name, ext).gsub('.', '-') + ext
|
90
90
|
end
|
91
91
|
|
92
|
-
# The directory prefix to use for
|
93
|
-
# [action]/job_[job_id]
|
94
|
-
def
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
92
|
+
# The directory prefix to use for remote storage.
|
93
|
+
# [action]/job_[job_id]
|
94
|
+
def remote_storage_prefix
|
95
|
+
@remote_storage_prefix ||= Inflector.underscore(self.class) +
|
96
|
+
"/job_#{@job_id}" + (@work_unit_id ? "/unit_#{@work_unit_id}" : '')
|
97
|
+
end
|
98
|
+
|
99
|
+
# The directory prefix to use for local storage.
|
100
|
+
# [action]/unit_[work_unit_id]
|
101
|
+
def local_storage_prefix
|
102
|
+
@local_storage_prefix ||= Inflector.underscore(self.class) +
|
103
|
+
(@work_unit_id ? "/unit_#{@work_unit_id}" : '')
|
100
104
|
end
|
101
105
|
|
102
106
|
# If we think that the input is JSON, replace it with the parsed form.
|
@@ -39,13 +39,22 @@ module CloudCrowd
|
|
39
39
|
# action in question disabled.
|
40
40
|
def self.distribute_to_nodes
|
41
41
|
reservation = nil
|
42
|
-
filter = {}
|
43
42
|
loop do
|
43
|
+
|
44
|
+
# Find the available nodes, and determine what actions we're capable
|
45
|
+
# of running at the moment.
|
46
|
+
available_nodes = NodeRecord.available
|
47
|
+
available_actions = available_nodes.map {|node| node.actions }.flatten.uniq
|
48
|
+
filter = "action in (#{available_actions.map{|a| "'#{a}'"}.join(',')})"
|
49
|
+
|
50
|
+
# Reserve a handful of available work units.
|
44
51
|
WorkUnit.cancel_reservations(reservation) if reservation
|
45
52
|
return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT, :conditions => filter)
|
46
53
|
work_units = WorkUnit.reserved(reservation)
|
47
|
-
|
48
|
-
|
54
|
+
|
55
|
+
# Round robin through the nodes and units, sending the unit if the node
|
56
|
+
# is able to process it.
|
57
|
+
while (node = available_nodes.shift) && (unit = work_units.shift) do
|
49
58
|
if node.actions.include?(unit.action)
|
50
59
|
if node.send_work_unit(unit)
|
51
60
|
available_nodes.push(node) unless node.busy?
|
@@ -56,10 +65,12 @@ module CloudCrowd
|
|
56
65
|
end
|
57
66
|
work_units.push(unit)
|
58
67
|
end
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
68
|
+
|
69
|
+
# If there are both units and nodes left over, try again.
|
70
|
+
next if work_units.any? && available_nodes.any?
|
71
|
+
|
72
|
+
# If we still have units at this point, or we're fresh out of nodes,
|
73
|
+
# that means we're done.
|
63
74
|
return if work_units.any? || available_nodes.empty?
|
64
75
|
end
|
65
76
|
ensure
|
data/lib/cloud_crowd/schema.rb
CHANGED
@@ -41,8 +41,6 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
|
|
41
41
|
t.datetime "updated_at"
|
42
42
|
end
|
43
43
|
|
44
|
-
# Here be indices. After looking, it seems faster not to have them at all.
|
45
|
-
#
|
46
44
|
add_index "jobs", ["status"], :name => "index_jobs_on_status"
|
47
45
|
add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
|
48
46
|
add_index "work_units", ["worker_pid"], :name => "index_work_units_on_worker_pid"
|
data/test/unit/test_action.rb
CHANGED
@@ -50,7 +50,7 @@ class ActionTest < Test::Unit::TestCase
|
|
50
50
|
end
|
51
51
|
|
52
52
|
should "be able to count the number of words in this file" do
|
53
|
-
assert @action.process ==
|
53
|
+
assert @action.process == 266
|
54
54
|
end
|
55
55
|
|
56
56
|
should "raise an exception when backticks fail" do
|
@@ -60,8 +60,8 @@ class ActionTest < Test::Unit::TestCase
|
|
60
60
|
|
61
61
|
should "be able to download a remote file" do
|
62
62
|
path = "temp.txt"
|
63
|
-
@action.download('http://
|
64
|
-
assert File.read(path).match(/
|
63
|
+
@action.download('http://www.w3.org', path)
|
64
|
+
assert File.read(path).match(/standards/i)
|
65
65
|
FileUtils.rm path
|
66
66
|
end
|
67
67
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cloud-crowd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 7
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 6
|
9
|
+
- 0
|
10
|
+
version: 0.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jeremy Ashkenas
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2011-03-04 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -291,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
291
291
|
requirements: []
|
292
292
|
|
293
293
|
rubyforge_project: cloud-crowd
|
294
|
-
rubygems_version: 1.
|
294
|
+
rubygems_version: 1.4.2
|
295
295
|
signing_key:
|
296
296
|
specification_version: 3
|
297
297
|
summary: Parallel Processing for the Rest of Us
|