cloud-crowd 0.2.8 → 0.2.9
Sign up to get free protection for your applications and to get access to all the features.
- data/actions/process_pdfs.rb +1 -1
- data/cloud-crowd.gemspec +2 -2
- data/lib/cloud-crowd.rb +13 -1
- data/lib/cloud_crowd/action.rb +6 -3
- data/lib/cloud_crowd/models/work_unit.rb +16 -14
- data/lib/cloud_crowd/node.rb +1 -0
- data/lib/cloud_crowd/server.rb +6 -0
- data/test/acceptance/test_server.rb +5 -12
- data/test/unit/test_node.rb +4 -0
- metadata +2 -2
data/actions/process_pdfs.rb
CHANGED
@@ -21,7 +21,7 @@ class ProcessPdfs < CloudCrowd::Action
|
|
21
21
|
batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
|
22
22
|
`tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
|
23
23
|
end
|
24
|
-
Dir["*.tar"].map {|tar| save(tar) }
|
24
|
+
Dir["*.tar"].map {|tar| save(tar) }
|
25
25
|
end
|
26
26
|
|
27
27
|
# Convert a pdf page into different-sized thumbnails. Grab the text.
|
data/cloud-crowd.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.2.
|
4
|
-
s.date = '2009-
|
3
|
+
s.version = '0.2.9' # Keep version in sync with cloud-cloud.rb
|
4
|
+
s.date = '2009-11-03'
|
5
5
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
7
7
|
s.summary = "Parallel Processing for the Rest of Us"
|
data/lib/cloud-crowd.rb
CHANGED
@@ -44,7 +44,7 @@ module CloudCrowd
|
|
44
44
|
autoload :WorkUnit, 'cloud_crowd/models'
|
45
45
|
|
46
46
|
# Keep this version in sync with the gemspec.
|
47
|
-
VERSION = '0.2.
|
47
|
+
VERSION = '0.2.9'
|
48
48
|
|
49
49
|
# Increment the schema version when there's a backwards incompatible change.
|
50
50
|
SCHEMA_VERSION = 3
|
@@ -87,6 +87,7 @@ module CloudCrowd
|
|
87
87
|
|
88
88
|
class << self
|
89
89
|
attr_reader :config
|
90
|
+
attr_accessor :identity
|
90
91
|
|
91
92
|
# Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
|
92
93
|
def configure(config_path)
|
@@ -171,6 +172,17 @@ module CloudCrowd
|
|
171
172
|
default_actions + installed_actions + custom_actions
|
172
173
|
end
|
173
174
|
|
175
|
+
# Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
|
176
|
+
# code from actions.
|
177
|
+
def server?
|
178
|
+
@identity == :server
|
179
|
+
end
|
180
|
+
|
181
|
+
# Or is it a node?
|
182
|
+
def node?
|
183
|
+
@identity == :node
|
184
|
+
end
|
185
|
+
|
174
186
|
end
|
175
187
|
|
176
188
|
end
|
data/lib/cloud_crowd/action.rb
CHANGED
@@ -31,7 +31,8 @@ module CloudCrowd
|
|
31
31
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
32
32
|
@work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
|
33
33
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
34
|
-
|
34
|
+
parse_input
|
35
|
+
download_input
|
35
36
|
end
|
36
37
|
|
37
38
|
# Each Action subclass must implement a +process+ method, overriding this.
|
@@ -98,9 +99,11 @@ module CloudCrowd
|
|
98
99
|
@storage_prefix ||= File.join(path_parts)
|
99
100
|
end
|
100
101
|
|
101
|
-
# If we
|
102
|
+
# If we think that the input is JSON, replace it with the parsed form.
|
103
|
+
# It would be great if the JSON module had an is_json? method.
|
102
104
|
def parse_input
|
103
|
-
|
105
|
+
return unless ['[', '{'].include? @input[0..0]
|
106
|
+
@input = JSON.parse(@input) rescue @input
|
104
107
|
end
|
105
108
|
|
106
109
|
def input_is_url?
|
@@ -23,8 +23,8 @@ module CloudCrowd
|
|
23
23
|
# Available WorkUnits are waiting to be distributed to Nodes for processing.
|
24
24
|
named_scope :available, {:conditions => {:reservation => nil, :worker_pid => nil, :status => INCOMPLETE}}
|
25
25
|
# Reserved WorkUnits have been marked for distribution by a central server process.
|
26
|
-
named_scope :reserved, lambda {|
|
27
|
-
{:conditions => {:reservation =>
|
26
|
+
named_scope :reserved, lambda {|reservation|
|
27
|
+
{:conditions => {:reservation => reservation}, :order => 'updated_at asc'}
|
28
28
|
}
|
29
29
|
|
30
30
|
# Attempt to send a list of WorkUnits to nodes with available capacity.
|
@@ -38,9 +38,10 @@ module CloudCrowd
|
|
38
38
|
# successfully sent, and Nodes get removed when they are busy or have the
|
39
39
|
# action in question disabled.
|
40
40
|
def self.distribute_to_nodes
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
reservation = nil
|
42
|
+
loop do
|
43
|
+
return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
|
44
|
+
work_units = WorkUnit.reserved(reservation)
|
44
45
|
available_nodes = NodeRecord.available
|
45
46
|
while node = available_nodes.shift and unit = work_units.shift do
|
46
47
|
if node.actions.include? unit.action
|
@@ -51,23 +52,23 @@ module CloudCrowd
|
|
51
52
|
end
|
52
53
|
work_units.push(unit)
|
53
54
|
end
|
54
|
-
|
55
|
-
ensure
|
56
|
-
WorkUnit.cancel_reservations(reservation_number) if reservation_number
|
55
|
+
return if work_units.any? || available_nodes.empty?
|
57
56
|
end
|
57
|
+
ensure
|
58
|
+
WorkUnit.cancel_reservations(reservation) if reservation
|
58
59
|
end
|
59
60
|
|
60
61
|
# Reserves all available WorkUnits for this process. Returns false if there
|
61
62
|
# were none available.
|
62
63
|
def self.reserve_available(options={})
|
63
|
-
|
64
|
-
any = WorkUnit.available.update_all("reservation = #{
|
65
|
-
any &&
|
64
|
+
reservation = ActiveSupport::SecureRandom.random_number(MAX_RESERVATION)
|
65
|
+
any = WorkUnit.available.update_all("reservation = #{reservation}", nil, options) > 0
|
66
|
+
any && reservation
|
66
67
|
end
|
67
68
|
|
68
69
|
# Cancels all outstanding WorkUnit reservations for this process.
|
69
|
-
def self.cancel_reservations(
|
70
|
-
WorkUnit.reserved(
|
70
|
+
def self.cancel_reservations(reservation)
|
71
|
+
WorkUnit.reserved(reservation).update_all('reservation = null')
|
71
72
|
end
|
72
73
|
|
73
74
|
# Cancels all outstanding WorkUnit reservations for all processes. (Useful
|
@@ -95,7 +96,8 @@ module CloudCrowd
|
|
95
96
|
# without waiting for the rest of their splitting cousins to complete.
|
96
97
|
def finish(result, time_taken)
|
97
98
|
if splitting?
|
98
|
-
[
|
99
|
+
[parsed_output(result)].flatten.each do |new_input|
|
100
|
+
new_input = new_input.to_json unless new_input.is_a? String
|
99
101
|
WorkUnit.start(job, action, new_input, PROCESSING)
|
100
102
|
end
|
101
103
|
self.destroy
|
data/lib/cloud_crowd/node.rb
CHANGED
@@ -65,6 +65,7 @@ module CloudCrowd
|
|
65
65
|
# When creating a node, specify the port it should run on.
|
66
66
|
def initialize(port=nil, daemon=false)
|
67
67
|
require 'json'
|
68
|
+
CloudCrowd.identity = :node
|
68
69
|
@central = CloudCrowd.central_server
|
69
70
|
@host = Socket.gethostname
|
70
71
|
@enabled_actions = CloudCrowd.actions.keys
|
data/lib/cloud_crowd/server.rb
CHANGED
@@ -15,6 +15,11 @@ class ServerTest < Test::Unit::TestCase
|
|
15
15
|
2.times { Job.make }
|
16
16
|
end
|
17
17
|
|
18
|
+
should "set the identity of the Ruby instance" do
|
19
|
+
app.new
|
20
|
+
assert CloudCrowd.server?
|
21
|
+
end
|
22
|
+
|
18
23
|
should "be able to render the Operations Center (GET /)" do
|
19
24
|
get '/'
|
20
25
|
assert last_response.body.include? '<div id="nodes">'
|
@@ -29,18 +34,6 @@ class ServerTest < Test::Unit::TestCase
|
|
29
34
|
assert resp['work_unit_count'] == 2
|
30
35
|
end
|
31
36
|
|
32
|
-
# should "be able to check in a worker daemon, and then check out a work unit" do
|
33
|
-
# put '/worker', :name => '101@localhost', :thread_status => 'sleeping'
|
34
|
-
# assert last_response.successful? && last_response.empty?
|
35
|
-
# post '/work', :worker_name => '101@localhost', :worker_actions => 'graphics_magick'
|
36
|
-
# checked_out = JSON.parse(last_response.body)
|
37
|
-
# assert checked_out['action'] == 'graphics_magick'
|
38
|
-
# assert checked_out['attempts'] == 0
|
39
|
-
# assert checked_out['status'] == CloudCrowd::PROCESSING
|
40
|
-
# status_check = JSON.parse(get('/worker/101@localhost').body)
|
41
|
-
# assert checked_out == status_check
|
42
|
-
# end
|
43
|
-
|
44
37
|
should "have a heartbeat" do
|
45
38
|
assert get('/heartbeat').body == 'buh-bump'
|
46
39
|
end
|
data/test/unit/test_node.rb
CHANGED
@@ -8,6 +8,10 @@ class NodeUnitTest < Test::Unit::TestCase
|
|
8
8
|
@node = Node.new(11011).instance_variable_get(:@app)
|
9
9
|
end
|
10
10
|
|
11
|
+
should "set the identity of the Ruby instance" do
|
12
|
+
assert CloudCrowd.node?
|
13
|
+
end
|
14
|
+
|
11
15
|
should "instantiate correctly" do
|
12
16
|
assert @node.central.to_s == "http://localhost:9173"
|
13
17
|
assert @node.port == 11011
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cloud-crowd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Ashkenas
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-11-03 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|