cloud-crowd 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/actions/process_pdfs.rb +1 -1
- data/cloud-crowd.gemspec +2 -2
- data/lib/cloud-crowd.rb +13 -1
- data/lib/cloud_crowd/action.rb +6 -3
- data/lib/cloud_crowd/models/work_unit.rb +16 -14
- data/lib/cloud_crowd/node.rb +1 -0
- data/lib/cloud_crowd/server.rb +6 -0
- data/test/acceptance/test_server.rb +5 -12
- data/test/unit/test_node.rb +4 -0
- metadata +2 -2
data/actions/process_pdfs.rb
CHANGED
|
@@ -21,7 +21,7 @@ class ProcessPdfs < CloudCrowd::Action
|
|
|
21
21
|
batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
|
|
22
22
|
`tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
|
|
23
23
|
end
|
|
24
|
-
Dir["*.tar"].map {|tar| save(tar) }
|
|
24
|
+
Dir["*.tar"].map {|tar| save(tar) }
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
# Convert a pdf page into different-sized thumbnails. Grab the text.
|
data/cloud-crowd.gemspec
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = 'cloud-crowd'
|
|
3
|
-
s.version = '0.2.
|
|
4
|
-
s.date = '2009-
|
|
3
|
+
s.version = '0.2.9' # Keep version in sync with cloud-cloud.rb
|
|
4
|
+
s.date = '2009-11-03'
|
|
5
5
|
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
|
7
7
|
s.summary = "Parallel Processing for the Rest of Us"
|
data/lib/cloud-crowd.rb
CHANGED
|
@@ -44,7 +44,7 @@ module CloudCrowd
|
|
|
44
44
|
autoload :WorkUnit, 'cloud_crowd/models'
|
|
45
45
|
|
|
46
46
|
# Keep this version in sync with the gemspec.
|
|
47
|
-
VERSION = '0.2.
|
|
47
|
+
VERSION = '0.2.9'
|
|
48
48
|
|
|
49
49
|
# Increment the schema version when there's a backwards incompatible change.
|
|
50
50
|
SCHEMA_VERSION = 3
|
|
@@ -87,6 +87,7 @@ module CloudCrowd
|
|
|
87
87
|
|
|
88
88
|
class << self
|
|
89
89
|
attr_reader :config
|
|
90
|
+
attr_accessor :identity
|
|
90
91
|
|
|
91
92
|
# Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
|
|
92
93
|
def configure(config_path)
|
|
@@ -171,6 +172,17 @@ module CloudCrowd
|
|
|
171
172
|
default_actions + installed_actions + custom_actions
|
|
172
173
|
end
|
|
173
174
|
|
|
175
|
+
# Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
|
|
176
|
+
# code from actions.
|
|
177
|
+
def server?
|
|
178
|
+
@identity == :server
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Or is it a node?
|
|
182
|
+
def node?
|
|
183
|
+
@identity == :node
|
|
184
|
+
end
|
|
185
|
+
|
|
174
186
|
end
|
|
175
187
|
|
|
176
188
|
end
|
data/lib/cloud_crowd/action.rb
CHANGED
|
@@ -31,7 +31,8 @@ module CloudCrowd
|
|
|
31
31
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
|
32
32
|
@work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
|
|
33
33
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
|
34
|
-
|
|
34
|
+
parse_input
|
|
35
|
+
download_input
|
|
35
36
|
end
|
|
36
37
|
|
|
37
38
|
# Each Action subclass must implement a +process+ method, overriding this.
|
|
@@ -98,9 +99,11 @@ module CloudCrowd
|
|
|
98
99
|
@storage_prefix ||= File.join(path_parts)
|
|
99
100
|
end
|
|
100
101
|
|
|
101
|
-
# If we
|
|
102
|
+
# If we think that the input is JSON, replace it with the parsed form.
|
|
103
|
+
# It would be great if the JSON module had an is_json? method.
|
|
102
104
|
def parse_input
|
|
103
|
-
|
|
105
|
+
return unless ['[', '{'].include? @input[0..0]
|
|
106
|
+
@input = JSON.parse(@input) rescue @input
|
|
104
107
|
end
|
|
105
108
|
|
|
106
109
|
def input_is_url?
|
|
@@ -23,8 +23,8 @@ module CloudCrowd
|
|
|
23
23
|
# Available WorkUnits are waiting to be distributed to Nodes for processing.
|
|
24
24
|
named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
|
|
25
25
|
# Reserved WorkUnits have been marked for distribution by a central server process.
|
|
26
|
-
named_scope :reserved, lambda {|
|
|
27
|
-
{:conditions => {:reservation =>
|
|
26
|
+
named_scope :reserved, lambda {|reservation|
|
|
27
|
+
{:conditions => {:reservation => reservation}, :order => 'updated_at asc'}
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
# Attempt to send a list of WorkUnits to nodes with available capacity.
|
|
@@ -38,9 +38,10 @@ module CloudCrowd
|
|
|
38
38
|
# successfully sent, and Nodes get removed when they are busy or have the
|
|
39
39
|
# action in question disabled.
|
|
40
40
|
def self.distribute_to_nodes
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
reservation = nil
|
|
42
|
+
loop do
|
|
43
|
+
return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
|
|
44
|
+
work_units = WorkUnit.reserved(reservation)
|
|
44
45
|
available_nodes = NodeRecord.available
|
|
45
46
|
while node = available_nodes.shift and unit = work_units.shift do
|
|
46
47
|
if node.actions.include? unit.action
|
|
@@ -51,23 +52,23 @@ module CloudCrowd
|
|
|
51
52
|
end
|
|
52
53
|
work_units.push(unit)
|
|
53
54
|
end
|
|
54
|
-
|
|
55
|
-
ensure
|
|
56
|
-
WorkUnit.cancel_reservations(reservation_number) if reservation_number
|
|
55
|
+
return if work_units.any? || available_nodes.empty?
|
|
57
56
|
end
|
|
57
|
+
ensure
|
|
58
|
+
WorkUnit.cancel_reservations(reservation) if reservation
|
|
58
59
|
end
|
|
59
60
|
|
|
60
61
|
# Reserves all available WorkUnits for this process. Returns false if there
|
|
61
62
|
# were none available.
|
|
62
63
|
def self.reserve_available(options={})
|
|
63
|
-
|
|
64
|
-
any = WorkUnit.available.update_all("reservation = #{
|
|
65
|
-
any &&
|
|
64
|
+
reservation = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
|
|
65
|
+
any = WorkUnit.available.update_all("reservation = #{reservation}", nil, options) > 0
|
|
66
|
+
any && reservation
|
|
66
67
|
end
|
|
67
68
|
|
|
68
69
|
# Cancels all outstanding WorkUnit reservations for this process.
|
|
69
|
-
def self.cancel_reservations(
|
|
70
|
-
WorkUnit.reserved(
|
|
70
|
+
def self.cancel_reservations(reservation)
|
|
71
|
+
WorkUnit.reserved(reservation).update_all('reservation = null')
|
|
71
72
|
end
|
|
72
73
|
|
|
73
74
|
# Cancels all outstanding WorkUnit reservations for all processes. (Useful
|
|
@@ -95,7 +96,8 @@ module CloudCrowd
|
|
|
95
96
|
# without waiting for the rest of their splitting cousins to complete.
|
|
96
97
|
def finish(result, time_taken)
|
|
97
98
|
if splitting?
|
|
98
|
-
[
|
|
99
|
+
[parsed_output(result)].flatten.each do |new_input|
|
|
100
|
+
new_input = new_input.to_json unless new_input.is_a? String
|
|
99
101
|
WorkUnit.start(job, action, new_input, PROCESSING)
|
|
100
102
|
end
|
|
101
103
|
self.destroy
|
data/lib/cloud_crowd/node.rb
CHANGED
|
@@ -65,6 +65,7 @@ module CloudCrowd
|
|
|
65
65
|
# When creating a node, specify the port it should run on.
|
|
66
66
|
def initialize(port=nil, daemon=false)
|
|
67
67
|
require 'json'
|
|
68
|
+
CloudCrowd.identity = :node
|
|
68
69
|
@central = CloudCrowd.central_server
|
|
69
70
|
@host = Socket.gethostname
|
|
70
71
|
@enabled_actions = CloudCrowd.actions.keys
|
data/lib/cloud_crowd/server.rb
CHANGED
|
@@ -15,6 +15,11 @@ class ServerTest < Test::Unit::TestCase
|
|
|
15
15
|
2.times { Job.make }
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
+
should "set the identity of the Ruby instance" do
|
|
19
|
+
app.new
|
|
20
|
+
assert CloudCrowd.server?
|
|
21
|
+
end
|
|
22
|
+
|
|
18
23
|
should "be able to render the Operations Center (GET /)" do
|
|
19
24
|
get '/'
|
|
20
25
|
assert last_response.body.include? '<div id="nodes">'
|
|
@@ -29,18 +34,6 @@ class ServerTest < Test::Unit::TestCase
|
|
|
29
34
|
assert resp['work_unit_count'] == 2
|
|
30
35
|
end
|
|
31
36
|
|
|
32
|
-
# should "be able to check in a worker daemon, and then check out a work unit" do
|
|
33
|
-
# put '/worker', :name => '101@localhost', :thread_status => 'sleeping'
|
|
34
|
-
# assert last_response.successful? && last_response.empty?
|
|
35
|
-
# post '/work', :worker_name => '101@localhost', :worker_actions => 'graphics_magick'
|
|
36
|
-
# checked_out = JSON.parse(last_response.body)
|
|
37
|
-
# assert checked_out['action'] == 'graphics_magick'
|
|
38
|
-
# assert checked_out['attempts'] == 0
|
|
39
|
-
# assert checked_out['status'] == CloudCrowd::PROCESSING
|
|
40
|
-
# status_check = JSON.parse(get('/worker/101@localhost').body)
|
|
41
|
-
# assert checked_out == status_check
|
|
42
|
-
# end
|
|
43
|
-
|
|
44
37
|
should "have a heartbeat" do
|
|
45
38
|
assert get('/heartbeat').body == 'buh-bump'
|
|
46
39
|
end
|
data/test/unit/test_node.rb
CHANGED
|
@@ -8,6 +8,10 @@ class NodeUnitTest < Test::Unit::TestCase
|
|
|
8
8
|
@node = Node.new(11011).instance_variable_get(:@app)
|
|
9
9
|
end
|
|
10
10
|
|
|
11
|
+
should "set the identity of the Ruby instance" do
|
|
12
|
+
assert CloudCrowd.node?
|
|
13
|
+
end
|
|
14
|
+
|
|
11
15
|
should "instantiate correctly" do
|
|
12
16
|
assert @node.central.to_s == "http://localhost:9173"
|
|
13
17
|
assert @node.port == 11011
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cloud-crowd
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jeremy Ashkenas
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-
|
|
12
|
+
date: 2009-11-03 00:00:00 -05:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|