cloud-crowd 0.5.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/cloud-crowd.gemspec +2 -2
- data/lib/cloud-crowd.rb +1 -1
- data/lib/cloud_crowd/action.rb +14 -10
- data/lib/cloud_crowd/models/work_unit.rb +18 -7
- data/lib/cloud_crowd/schema.rb +0 -2
- data/test/unit/test_action.rb +3 -3
- metadata +7 -7
data/cloud-crowd.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.
|
4
|
-
s.date = '
|
3
|
+
s.version = '0.6.0' # Keep version in sync with cloud-cloud.rb
|
4
|
+
s.date = '2011-03-04'
|
5
5
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
7
7
|
s.summary = "Parallel Processing for the Rest of Us"
|
data/lib/cloud-crowd.rb
CHANGED
@@ -45,7 +45,7 @@ module CloudCrowd
|
|
45
45
|
autoload :WorkUnit, 'cloud_crowd/models'
|
46
46
|
|
47
47
|
# Keep this version in sync with the gemspec.
|
48
|
-
VERSION = '0.
|
48
|
+
VERSION = '0.6.0'
|
49
49
|
|
50
50
|
# Increment the schema version when there's a backwards incompatible change.
|
51
51
|
SCHEMA_VERSION = 4
|
data/lib/cloud_crowd/action.rb
CHANGED
@@ -29,7 +29,7 @@ module CloudCrowd
|
|
29
29
|
def initialize(status, input, options, store)
|
30
30
|
@input, @options, @store = input, options, store
|
31
31
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
32
|
-
@work_directory = File.expand_path(File.join(@store.temp_storage_path,
|
32
|
+
@work_directory = File.expand_path(File.join(@store.temp_storage_path, local_storage_prefix))
|
33
33
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
34
34
|
parse_input
|
35
35
|
download_input
|
@@ -59,7 +59,7 @@ module CloudCrowd
|
|
59
59
|
# Takes a local filesystem path, saves the file to S3, and returns the
|
60
60
|
# public (or authenticated) url on S3 where the file can be accessed.
|
61
61
|
def save(file_path)
|
62
|
-
save_path = File.join(
|
62
|
+
save_path = File.join(remote_storage_prefix, File.basename(file_path))
|
63
63
|
@store.save(file_path, save_path)
|
64
64
|
end
|
65
65
|
|
@@ -89,14 +89,18 @@ module CloudCrowd
|
|
89
89
|
File.basename(name, ext).gsub('.', '-') + ext
|
90
90
|
end
|
91
91
|
|
92
|
-
# The directory prefix to use for
|
93
|
-
# [action]/job_[job_id]
|
94
|
-
def
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
92
|
+
# The directory prefix to use for remote storage.
|
93
|
+
# [action]/job_[job_id]
|
94
|
+
def remote_storage_prefix
|
95
|
+
@remote_storage_prefix ||= Inflector.underscore(self.class) +
|
96
|
+
"/job_#{@job_id}" + (@work_unit_id ? "/unit_#{@work_unit_id}" : '')
|
97
|
+
end
|
98
|
+
|
99
|
+
# The directory prefix to use for local storage.
|
100
|
+
# [action]/unit_[work_unit_id]
|
101
|
+
def local_storage_prefix
|
102
|
+
@local_storage_prefix ||= Inflector.underscore(self.class) +
|
103
|
+
(@work_unit_id ? "/unit_#{@work_unit_id}" : '')
|
100
104
|
end
|
101
105
|
|
102
106
|
# If we think that the input is JSON, replace it with the parsed form.
|
@@ -39,13 +39,22 @@ module CloudCrowd
|
|
39
39
|
# action in question disabled.
|
40
40
|
def self.distribute_to_nodes
|
41
41
|
reservation = nil
|
42
|
-
filter = {}
|
43
42
|
loop do
|
43
|
+
|
44
|
+
# Find the available nodes, and determine what actions we're capable
|
45
|
+
# of running at the moment.
|
46
|
+
available_nodes = NodeRecord.available
|
47
|
+
available_actions = available_nodes.map {|node| node.actions }.flatten.uniq
|
48
|
+
filter = "action in (#{available_actions.map{|a| "'#{a}'"}.join(',')})"
|
49
|
+
|
50
|
+
# Reserve a handful of available work units.
|
44
51
|
WorkUnit.cancel_reservations(reservation) if reservation
|
45
52
|
return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT, :conditions => filter)
|
46
53
|
work_units = WorkUnit.reserved(reservation)
|
47
|
-
|
48
|
-
|
54
|
+
|
55
|
+
# Round robin through the nodes and units, sending the unit if the node
|
56
|
+
# is able to process it.
|
57
|
+
while (node = available_nodes.shift) && (unit = work_units.shift) do
|
49
58
|
if node.actions.include?(unit.action)
|
50
59
|
if node.send_work_unit(unit)
|
51
60
|
available_nodes.push(node) unless node.busy?
|
@@ -56,10 +65,12 @@ module CloudCrowd
|
|
56
65
|
end
|
57
66
|
work_units.push(unit)
|
58
67
|
end
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
68
|
+
|
69
|
+
# If there are both units and nodes left over, try again.
|
70
|
+
next if work_units.any? && available_nodes.any?
|
71
|
+
|
72
|
+
# If we still have units at this point, or we're fresh out of nodes,
|
73
|
+
# that means we're done.
|
63
74
|
return if work_units.any? || available_nodes.empty?
|
64
75
|
end
|
65
76
|
ensure
|
data/lib/cloud_crowd/schema.rb
CHANGED
@@ -41,8 +41,6 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
|
|
41
41
|
t.datetime "updated_at"
|
42
42
|
end
|
43
43
|
|
44
|
-
# Here be indices. After looking, it seems faster not to have them at all.
|
45
|
-
#
|
46
44
|
add_index "jobs", ["status"], :name => "index_jobs_on_status"
|
47
45
|
add_index "work_units", ["job_id"], :name => "index_work_units_on_job_id"
|
48
46
|
add_index "work_units", ["worker_pid"], :name => "index_work_units_on_worker_pid"
|
data/test/unit/test_action.rb
CHANGED
@@ -50,7 +50,7 @@ class ActionTest < Test::Unit::TestCase
|
|
50
50
|
end
|
51
51
|
|
52
52
|
should "be able to count the number of words in this file" do
|
53
|
-
assert @action.process ==
|
53
|
+
assert @action.process == 266
|
54
54
|
end
|
55
55
|
|
56
56
|
should "raise an exception when backticks fail" do
|
@@ -60,8 +60,8 @@ class ActionTest < Test::Unit::TestCase
|
|
60
60
|
|
61
61
|
should "be able to download a remote file" do
|
62
62
|
path = "temp.txt"
|
63
|
-
@action.download('http://
|
64
|
-
assert File.read(path).match(/
|
63
|
+
@action.download('http://www.w3.org', path)
|
64
|
+
assert File.read(path).match(/standards/i)
|
65
65
|
FileUtils.rm path
|
66
66
|
end
|
67
67
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cloud-crowd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 7
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 6
|
9
|
+
- 0
|
10
|
+
version: 0.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jeremy Ashkenas
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2011-03-04 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -291,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
291
291
|
requirements: []
|
292
292
|
|
293
293
|
rubyforge_project: cloud-crowd
|
294
|
-
rubygems_version: 1.
|
294
|
+
rubygems_version: 1.4.2
|
295
295
|
signing_key:
|
296
296
|
specification_version: 3
|
297
297
|
summary: Parallel Processing for the Rest of Us
|