documentcloud-cloud-crowd 0.0.6 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +12 -3
- data/cloud-crowd.gemspec +9 -4
- data/config/config.example.yml +2 -2
- data/lib/cloud-crowd.rb +1 -1
- data/lib/cloud_crowd/action.rb +9 -10
- data/lib/cloud_crowd/app.rb +1 -1
- data/lib/cloud_crowd/asset_store.rb +12 -72
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +28 -0
- data/lib/cloud_crowd/asset_store/s3_store.rb +40 -0
- data/lib/cloud_crowd/command_line.rb +1 -0
- data/lib/cloud_crowd/exceptions.rb +21 -15
- data/lib/cloud_crowd/helpers.rb +1 -1
- data/lib/cloud_crowd/inflector.rb +1 -1
- data/lib/cloud_crowd/models/job.rb +5 -5
- data/lib/cloud_crowd/worker.rb +8 -5
- data/public/js/{excanvas.pack.js → excanvas.js} +0 -0
- data/public/js/{jquery.flot.pack.js → flot.js} +0 -0
- data/public/js/{jquery-1.3.2.min.js → jquery.js} +0 -0
- data/test/acceptance/test_app.rb +72 -0
- data/test/unit/test_action.rb +49 -0
- data/test/unit/test_configuration.rb +28 -0
- data/views/index.erb +3 -3
- metadata +9 -4
data/README
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
=
|
2
2
|
_ _
|
3
3
|
( ` )_
|
4
4
|
( ) `)
|
@@ -29,11 +29,20 @@
|
|
29
29
|
* Built for Amazon EC2 and S3
|
30
30
|
* split -> process -> merge
|
31
31
|
* As easy as `gem install cloud-crowd`
|
32
|
+
|
33
|
+
Well-suited for:
|
34
|
+
|
35
|
+
* Generating or resizing images.
|
36
|
+
* Encoding video.
|
37
|
+
* Running text extraction or OCR on PDFs.
|
38
|
+
* Migrating a large file set or database.
|
39
|
+
* Web scraping.
|
32
40
|
|
33
41
|
|
34
|
-
~
|
42
|
+
~ Documentation ~
|
35
43
|
|
36
|
-
http://wiki.github.com/documentcloud/cloud-crowd
|
44
|
+
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
45
|
+
Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
|
37
46
|
|
38
47
|
|
39
48
|
~ Getting started ~
|
data/cloud-crowd.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.0
|
3
|
+
s.version = '0.1.0' # Keep version in sync with cloud-cloud.rb
|
4
4
|
s.date = '2009-09-01'
|
5
5
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
@@ -57,6 +57,8 @@ examples/word_count_example.rb
|
|
57
57
|
lib/cloud-crowd.rb
|
58
58
|
lib/cloud_crowd/action.rb
|
59
59
|
lib/cloud_crowd/app.rb
|
60
|
+
lib/cloud_crowd/asset_store/filesystem_store.rb
|
61
|
+
lib/cloud_crowd/asset_store/s3_store.rb
|
60
62
|
lib/cloud_crowd/asset_store.rb
|
61
63
|
lib/cloud_crowd/command_line.rb
|
62
64
|
lib/cloud_crowd/daemon.rb
|
@@ -87,10 +89,11 @@ public/images/sidebar_top.png
|
|
87
89
|
public/images/worker_info.png
|
88
90
|
public/images/worker_info_loading.gif
|
89
91
|
public/js/admin_console.js
|
90
|
-
public/js/excanvas.
|
91
|
-
public/js/
|
92
|
-
public/js/jquery
|
92
|
+
public/js/excanvas.js
|
93
|
+
public/js/flot.js
|
94
|
+
public/js/jquery.js
|
93
95
|
README
|
96
|
+
test/acceptance/test_app.rb
|
94
97
|
test/acceptance/test_failing_work_units.rb
|
95
98
|
test/acceptance/test_word_count.rb
|
96
99
|
test/blueprints.rb
|
@@ -99,6 +102,8 @@ test/config/config.yml
|
|
99
102
|
test/config/database.yml
|
100
103
|
test/config/actions/failure_testing.rb
|
101
104
|
test/test_helper.rb
|
105
|
+
test/unit/test_action.rb
|
106
|
+
test/unit/test_configuration.rb
|
102
107
|
test/unit/test_job.rb
|
103
108
|
test/unit/test_work_unit.rb
|
104
109
|
views/index.erb
|
data/config/config.example.yml
CHANGED
@@ -35,13 +35,13 @@
|
|
35
35
|
# desired balance between latency and traffic.
|
36
36
|
|
37
37
|
# The number of workers that `crowd workers start` spins up.
|
38
|
-
:num_workers:
|
38
|
+
:num_workers: 3
|
39
39
|
|
40
40
|
# The minimum number of seconds a worker waits between checking the job queue.
|
41
41
|
:min_worker_wait: 1
|
42
42
|
|
43
43
|
# The maximum number of seconds a worker waits between checking the job queue.
|
44
|
-
:max_worker_wait:
|
44
|
+
:max_worker_wait: 5
|
45
45
|
|
46
46
|
# The number of separate attempts that will be made to process an individual
|
47
47
|
# work unit, before marking it as having failed.
|
data/lib/cloud-crowd.rb
CHANGED
@@ -45,7 +45,7 @@ module CloudCrowd
|
|
45
45
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
46
46
|
|
47
47
|
# Keep the version in sync with the gemspec.
|
48
|
-
VERSION = '0.0
|
48
|
+
VERSION = '0.1.0'
|
49
49
|
|
50
50
|
# A Job is processing if its WorkUnits in the queue to be handled by workers.
|
51
51
|
PROCESSING = 1
|
data/lib/cloud_crowd/action.rb
CHANGED
@@ -28,7 +28,6 @@ module CloudCrowd
|
|
28
28
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
29
29
|
@work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
|
30
30
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
31
|
-
Dir.chdir @work_directory
|
32
31
|
status == MERGING ? parse_input : download_input
|
33
32
|
end
|
34
33
|
|
@@ -53,13 +52,11 @@ module CloudCrowd
|
|
53
52
|
def save(file_path)
|
54
53
|
save_path = File.join(storage_prefix, File.basename(file_path))
|
55
54
|
@store.save(file_path, save_path)
|
56
|
-
return @store.url(save_path)
|
57
55
|
end
|
58
56
|
|
59
57
|
# After the Action has finished, we remove the work directory and return
|
60
58
|
# to the root directory (where daemons run by default).
|
61
59
|
def cleanup_work_directory
|
62
|
-
Dir.chdir '/'
|
63
60
|
FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
|
64
61
|
end
|
65
62
|
|
@@ -68,8 +65,8 @@ module CloudCrowd
|
|
68
65
|
|
69
66
|
# Convert an unsafe URL into a filesystem-friendly filename.
|
70
67
|
def safe_filename(url)
|
71
|
-
ext
|
72
|
-
name = File.basename(url)
|
68
|
+
ext = File.extname(url)
|
69
|
+
name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-')
|
73
70
|
File.basename(name, ext).gsub('.', '-') + ext
|
74
71
|
end
|
75
72
|
|
@@ -90,11 +87,13 @@ module CloudCrowd
|
|
90
87
|
|
91
88
|
# If the input is a URL, download the file before beginning processing.
|
92
89
|
def download_input
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
90
|
+
Dir.chdir(@work_directory) do
|
91
|
+
input_is_url = !!URI.parse(@input) rescue false
|
92
|
+
return unless input_is_url
|
93
|
+
@input_path = File.join(@work_directory, safe_filename(@input))
|
94
|
+
@file_name = File.basename(@input_path, File.extname(@input_path))
|
95
|
+
download(@input, @input_path)
|
96
|
+
end
|
98
97
|
end
|
99
98
|
|
100
99
|
end
|
data/lib/cloud_crowd/app.rb
CHANGED
@@ -10,19 +10,24 @@ module CloudCrowd
|
|
10
10
|
# and +save+ methods use it behind the scenes.
|
11
11
|
class AssetStore
|
12
12
|
|
13
|
+
autoload :S3Store, 'cloud_crowd/asset_store/s3_store'
|
14
|
+
autoload :FilesystemStore, 'cloud_crowd/asset_store/filesystem_store'
|
15
|
+
|
13
16
|
LOCAL_STORAGE_PATH = '/tmp/cloud_crowd_storage'
|
14
17
|
|
15
|
-
#
|
18
|
+
# Configure the AssetStore with the specific storage implementation
|
16
19
|
# specified by 'storage' in <tt>config.yml</tt>.
|
20
|
+
case CloudCrowd.config[:storage]
|
21
|
+
when 's3' then include S3Store
|
22
|
+
when 'filesystem' then include FilesystemStore
|
23
|
+
else raise Error::StorageNotFound, "#{CloudCrowd.config[:storage]} is not a valid storage back end"
|
24
|
+
end
|
25
|
+
|
26
|
+
# Creating the AssetStore ensures that its scratch directory exists.
|
17
27
|
def initialize
|
18
28
|
@use_auth = CloudCrowd.config[:use_s3_authentication]
|
19
|
-
@storage = CloudCrowd.config[:storage]
|
20
29
|
FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
|
21
|
-
|
22
|
-
when 's3' then extend S3Store
|
23
|
-
when 'filesystem' then extend FilesystemStore
|
24
|
-
else raise StorageNotFound, "#{@storage} is not a valid storage back end"
|
25
|
-
end
|
30
|
+
raise Error::StorageNotWritable, "#{temp_storage_path} is not writable" unless File.writable?(temp_storage_path)
|
26
31
|
end
|
27
32
|
|
28
33
|
# Get the path to CloudCrowd's temporary local storage. All actions run
|
@@ -30,71 +35,6 @@ module CloudCrowd
|
|
30
35
|
def temp_storage_path
|
31
36
|
"#{Dir.tmpdir}/cloud_crowd_tmp"
|
32
37
|
end
|
33
|
-
|
34
|
-
|
35
|
-
# The S3Store is an implementation of an AssetStore that uses a bucket
|
36
|
-
# on S3 for all resulting files.
|
37
|
-
module S3Store
|
38
|
-
|
39
|
-
# Save a finished file from local storage to S3. Save it publicly unless
|
40
|
-
# we're configured to use S3 authentication.
|
41
|
-
def save(local_path, save_path)
|
42
|
-
ensure_s3_connection
|
43
|
-
permission = @use_auth ? 'private' : 'public-read'
|
44
|
-
@bucket.put(save_path, File.open(local_path), {}, permission)
|
45
|
-
end
|
46
|
-
|
47
|
-
# Return the S3 public URL for a finshed file. Authenticated links expire
|
48
|
-
# after one day by default.
|
49
|
-
def url(save_path)
|
50
|
-
@use_auth ? @s3.interface.get_link(@bucket, save_path) :
|
51
|
-
@bucket.key(save_path).public_link
|
52
|
-
end
|
53
|
-
|
54
|
-
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
55
|
-
def cleanup_job(job)
|
56
|
-
ensure_s3_connection
|
57
|
-
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
58
|
-
end
|
59
|
-
|
60
|
-
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
61
|
-
# Ensure we have a persistent S3 connection after first use.
|
62
|
-
def ensure_s3_connection
|
63
|
-
unless @s3 && @bucket
|
64
|
-
params = {:port => 80, :protocol => 'http'}
|
65
|
-
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
66
|
-
@bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
# The FilesystemStore is an implementation of the AssetStore, good only for
|
73
|
-
# use in development, testing, or if you're only running a single-machine
|
74
|
-
# installation.
|
75
|
-
module FilesystemStore
|
76
|
-
|
77
|
-
# Save a file to somewhere semi-persistent on the filesystem. Can be used
|
78
|
-
# in development, when offline, or if you happen to have a single-machine
|
79
|
-
# CloudCrowd installation. To use, configure :local_storage.
|
80
|
-
def save(local_path, save_path)
|
81
|
-
save_path = File.join(LOCAL_STORAGE_PATH, save_path)
|
82
|
-
save_dir = File.dirname(save_path)
|
83
|
-
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
84
|
-
FileUtils.cp(local_path, save_path)
|
85
|
-
end
|
86
|
-
|
87
|
-
# Return the URL for a file saved to the local filesystem.
|
88
|
-
def url(save_path)
|
89
|
-
"file://#{File.expand_path(File.join(LOCAL_STORAGE_PATH, save_path))}"
|
90
|
-
end
|
91
|
-
|
92
|
-
# Remove all of a Job's result files from the filesystem.
|
93
|
-
def cleanup_job(job)
|
94
|
-
path = "#{LOCAL_STORAGE_PATH}/#{job.action}/job_#{job.id}"
|
95
|
-
FileUtils.rm_r(path) if File.exists?(path)
|
96
|
-
end
|
97
|
-
end
|
98
38
|
|
99
39
|
end
|
100
40
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
class AssetStore
|
3
|
+
|
4
|
+
# The FilesystemStore is an implementation of the AssetStore, good only for
|
5
|
+
# use in development, testing, or if you're only running a single-machine
|
6
|
+
# installation.
|
7
|
+
module FilesystemStore
|
8
|
+
|
9
|
+
# Save a file to somewhere semi-persistent on the filesystem. Can be used
|
10
|
+
# in development, when offline, or if you happen to have a single-machine
|
11
|
+
# CloudCrowd installation. To use, configure <tt>:storage => 'filesystem'</tt>.
|
12
|
+
def save(local_path, save_path)
|
13
|
+
save_path = File.join(LOCAL_STORAGE_PATH, save_path)
|
14
|
+
save_dir = File.dirname(save_path)
|
15
|
+
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
16
|
+
FileUtils.cp(local_path, save_path)
|
17
|
+
"file://#{File.expand_path(save_path)}"
|
18
|
+
end
|
19
|
+
|
20
|
+
# Remove all of a Job's result files from the filesystem.
|
21
|
+
def cleanup(job)
|
22
|
+
path = "#{LOCAL_STORAGE_PATH}/#{job.action}/job_#{job.id}"
|
23
|
+
FileUtils.rm_r(path) if File.exists?(path)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
class AssetStore
|
3
|
+
|
4
|
+
# The S3Store is an implementation of an AssetStore that uses a bucket
|
5
|
+
# on S3 for all resulting files.
|
6
|
+
module S3Store
|
7
|
+
|
8
|
+
# Save a finished file from local storage to S3. Save it publicly unless
|
9
|
+
# we're configured to use S3 authentication. Authenticated links expire
|
10
|
+
# after one day by default.
|
11
|
+
def save(local_path, save_path)
|
12
|
+
ensure_s3_connection
|
13
|
+
if @use_auth
|
14
|
+
@bucket.put(save_path, File.open(local_path), {}, 'private')
|
15
|
+
@s3.interface.get_link(@bucket, save_path)
|
16
|
+
else
|
17
|
+
@bucket.put(save_path, File.open(local_path), {}, 'public-read')
|
18
|
+
@bucket.key(save_path).public_link
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
23
|
+
def cleanup(job)
|
24
|
+
ensure_s3_connection
|
25
|
+
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
26
|
+
end
|
27
|
+
|
28
|
+
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
29
|
+
# Ensure we have a persistent S3 connection after first use.
|
30
|
+
def ensure_s3_connection
|
31
|
+
unless @s3 && @bucket
|
32
|
+
params = {:port => 80, :protocol => 'http'}
|
33
|
+
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
34
|
+
@bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
@@ -1,22 +1,28 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
3
|
# Base Error class which all custom CloudCrowd exceptions inherit from.
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# StorageNotFound is raised when config.yml specifies a storage back end that
|
13
|
-
# doesn't exist.
|
14
|
-
class StorageNotFound < Error #:nodoc:
|
15
|
-
end
|
4
|
+
# Rescuing CloudCrowd::Error (or RuntimeError) will get all custom exceptions.
|
5
|
+
class Error < RuntimeError
|
6
|
+
|
7
|
+
# ActionNotFound is raised when a job is created for an action that doesn't
|
8
|
+
# exist.
|
9
|
+
class ActionNotFound < Error
|
10
|
+
end
|
16
11
|
|
17
|
-
|
18
|
-
|
19
|
-
|
12
|
+
# StorageNotFound is raised when config.yml specifies a storage back end that
|
13
|
+
# doesn't exist.
|
14
|
+
class StorageNotFound < Error
|
15
|
+
end
|
16
|
+
|
17
|
+
# If the AssetStore can't write to its scratch directory.
|
18
|
+
class StorageNotWritable < Error
|
19
|
+
end
|
20
|
+
|
21
|
+
# StatusUnspecified is raised when a WorkUnit returns without a valid
|
22
|
+
# status code.
|
23
|
+
class StatusUnspecified < Error
|
24
|
+
end
|
25
|
+
|
20
26
|
end
|
21
27
|
|
22
28
|
end
|
data/lib/cloud_crowd/helpers.rb
CHANGED
@@ -13,7 +13,7 @@ module CloudCrowd
|
|
13
13
|
|
14
14
|
before_validation_on_create :set_initial_status
|
15
15
|
after_create :queue_for_workers
|
16
|
-
before_destroy :
|
16
|
+
before_destroy :cleanup_assets
|
17
17
|
|
18
18
|
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
19
19
|
# TODO: Think about XML support.
|
@@ -61,9 +61,9 @@ module CloudCrowd
|
|
61
61
|
end
|
62
62
|
|
63
63
|
# Cleaning up after a job will remove all of its files from S3. Destroying
|
64
|
-
# a Job calls
|
65
|
-
def
|
66
|
-
AssetStore.new.
|
64
|
+
# a Job calls cleanup_assets first.
|
65
|
+
def cleanup_assets
|
66
|
+
AssetStore.new.cleanup(self)
|
67
67
|
end
|
68
68
|
|
69
69
|
# Have all of the WorkUnits finished?
|
@@ -94,7 +94,7 @@ module CloudCrowd
|
|
94
94
|
def action_class
|
95
95
|
klass = CloudCrowd.actions[self.action]
|
96
96
|
return klass if klass
|
97
|
-
raise ActionNotFound, "no action named: '#{self.action}' could be found"
|
97
|
+
raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
|
98
98
|
end
|
99
99
|
|
100
100
|
# How complete is this Job?
|
data/lib/cloud_crowd/worker.rb
CHANGED
@@ -113,12 +113,15 @@ module CloudCrowd
|
|
113
113
|
# Executes the current work unit, catching all exceptions as failures.
|
114
114
|
def run_work_unit
|
115
115
|
begin
|
116
|
+
result = nil
|
116
117
|
@action = CloudCrowd.actions[@action_name].new(@status, @input, @options, @store)
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
118
|
+
Dir.chdir(@action.work_directory) do
|
119
|
+
result = case @status
|
120
|
+
when PROCESSING then @action.process
|
121
|
+
when SPLITTING then @action.split
|
122
|
+
when MERGING then @action.merge
|
123
|
+
else raise Error::StatusUnspecified, "work units must specify their status"
|
124
|
+
end
|
122
125
|
end
|
123
126
|
complete_work_unit({'output' => result}.to_json)
|
124
127
|
rescue Exception => e
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class AppTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Rack::Test::Methods
|
6
|
+
|
7
|
+
def app
|
8
|
+
CloudCrowd::App
|
9
|
+
end
|
10
|
+
|
11
|
+
context "The CloudCrowd::App (Sinatra)" do
|
12
|
+
|
13
|
+
setup do
|
14
|
+
CloudCrowd::Job.destroy_all
|
15
|
+
2.times { CloudCrowd::Job.make }
|
16
|
+
end
|
17
|
+
|
18
|
+
should "be able to render the Operations Center (GET /)" do
|
19
|
+
get '/'
|
20
|
+
assert last_response.body.include? '<div id="workers">'
|
21
|
+
assert last_response.body.include? '<div id="graphs">'
|
22
|
+
end
|
23
|
+
|
24
|
+
should "be able to get the current status for all jobs (GET /status)" do
|
25
|
+
resp = JSON.parse(get('/status').body)
|
26
|
+
assert resp['jobs'].length == 2
|
27
|
+
assert resp['jobs'][0]['status'] == 'processing'
|
28
|
+
assert resp['jobs'][0]['percent_complete'] == 0
|
29
|
+
assert resp['work_unit_count'] == 2
|
30
|
+
end
|
31
|
+
|
32
|
+
should "be able to check in a worker daemon, and then check out a work unit" do
|
33
|
+
put '/worker', :name => '101@localhost', :thread_status => 'sleeping'
|
34
|
+
assert last_response.successful? && last_response.empty?
|
35
|
+
post '/work', :worker_name => '101@localhost', :worker_actions => 'graphics_magick'
|
36
|
+
checked_out = JSON.parse(last_response.body)
|
37
|
+
assert checked_out['action'] == 'graphics_magick'
|
38
|
+
assert checked_out['attempts'] == 0
|
39
|
+
assert checked_out['status'] == CloudCrowd::PROCESSING
|
40
|
+
status_check = JSON.parse(get('/worker/101@localhost').body)
|
41
|
+
assert checked_out == status_check
|
42
|
+
end
|
43
|
+
|
44
|
+
should "have a heartbeat" do
|
45
|
+
assert get('/heartbeat').body == 'buh-bump'
|
46
|
+
end
|
47
|
+
|
48
|
+
should "be able to create a job" do
|
49
|
+
post('/jobs', :job => '{"action":"graphics_magick","inputs":["http://www.google.com/"]}')
|
50
|
+
assert last_response.ok?
|
51
|
+
job_info = JSON.parse(last_response.body)
|
52
|
+
assert job_info['percent_complete'] == 0
|
53
|
+
assert job_info['work_units'] == 1
|
54
|
+
assert CloudCrowd::Job.last.id == job_info['id']
|
55
|
+
end
|
56
|
+
|
57
|
+
should "be able to check in on the status of a job" do
|
58
|
+
get("/jobs/#{CloudCrowd::Job.last.id}")
|
59
|
+
assert last_response.ok?
|
60
|
+
assert JSON.parse(last_response.body)['percent_complete'] == 0
|
61
|
+
end
|
62
|
+
|
63
|
+
should "be able to clean up a job when we're done with it" do
|
64
|
+
id = CloudCrowd::Job.last.id
|
65
|
+
delete("/jobs/#{id}")
|
66
|
+
assert last_response.successful? && last_response.empty?
|
67
|
+
assert !CloudCrowd::Job.find_by_id(id)
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class CloudCrowd::Action
|
4
|
+
public :safe_filename
|
5
|
+
end
|
6
|
+
|
7
|
+
class EmptyAction < CloudCrowd::Action
|
8
|
+
end
|
9
|
+
|
10
|
+
class ActionTest < Test::Unit::TestCase
|
11
|
+
|
12
|
+
context "A CloudCrowd Job" do
|
13
|
+
|
14
|
+
setup do
|
15
|
+
@store = CloudCrowd::AssetStore.new
|
16
|
+
@args = [CloudCrowd::PROCESSING, 'file://' + File.expand_path(__FILE__), {'job_id' => 1, 'work_unit_id' => 1}, @store]
|
17
|
+
@action = CloudCrowd.actions['word_count'].new(*@args)
|
18
|
+
end
|
19
|
+
|
20
|
+
should "throw an exception if the 'process' method isn't implemented" do
|
21
|
+
assert_raise(NotImplementedError) { EmptyAction.new(*@args).process }
|
22
|
+
end
|
23
|
+
|
24
|
+
should "have downloaded the input URL to local storage" do
|
25
|
+
assert @action.input_path
|
26
|
+
assert File.read(@action.input_path) == File.read(File.expand_path(__FILE__))
|
27
|
+
end
|
28
|
+
|
29
|
+
should "be able to save (to the filesystem while testing)" do
|
30
|
+
assert @action.save(@action.input_path) == "file://#{CloudCrowd::AssetStore::LOCAL_STORAGE_PATH}/word_count/job_1/unit_1/test_action.rb"
|
31
|
+
end
|
32
|
+
|
33
|
+
should "be able to clean up after itself" do
|
34
|
+
@action.cleanup_work_directory
|
35
|
+
assert !File.exists?(@action.work_directory)
|
36
|
+
end
|
37
|
+
|
38
|
+
should "be able to generate a safe filename for a URL to write to disk" do
|
39
|
+
name = @action.safe_filename("http://example.com/Some%20(Crazy'Kinda%7E)'Filename.txt")
|
40
|
+
assert name == 'Some-Crazy-Kinda-Filename.txt'
|
41
|
+
end
|
42
|
+
|
43
|
+
should "be able to count the number of words in this file" do
|
44
|
+
assert @action.process == 149
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class ConfigurationTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "CloudCrowd Configuration" do
|
6
|
+
|
7
|
+
should "have read in config.yml" do
|
8
|
+
assert CloudCrowd.config[:num_workers] == 4
|
9
|
+
assert CloudCrowd.config[:storage] == 'filesystem'
|
10
|
+
end
|
11
|
+
|
12
|
+
should "allow config.yml to configure the implementation of AssetStore" do
|
13
|
+
assert CloudCrowd::AssetStore.ancestors.include?(CloudCrowd::AssetStore::FilesystemStore)
|
14
|
+
end
|
15
|
+
|
16
|
+
should "have properly configured the ActiveRecord database" do
|
17
|
+
assert ActiveRecord::Base.connection.active?
|
18
|
+
end
|
19
|
+
|
20
|
+
should "have loaded in the default set of actions" do
|
21
|
+
assert CloudCrowd.actions['word_count'] == WordCount
|
22
|
+
assert CloudCrowd.actions['process_pdfs'] == ProcessPdfs
|
23
|
+
assert CloudCrowd.actions['graphics_magick'] == GraphicsMagick
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
data/views/index.erb
CHANGED
@@ -5,9 +5,9 @@
|
|
5
5
|
<title>Operations Center | CloudCrowd</title>
|
6
6
|
<link href="/css/reset.css" media="screen" rel="stylesheet" type="text/css" />
|
7
7
|
<link href="/css/admin_console.css" media="screen" rel="stylesheet" type="text/css" />
|
8
|
-
<script src="/js/jquery
|
9
|
-
<!--[if IE]><script src="/js/excanvas.
|
10
|
-
<script src="/js/
|
8
|
+
<script src="/js/jquery.js" type="text/javascript"></script>
|
9
|
+
<!--[if IE]><script src="/js/excanvas.js" type="text/javascript"></script><![endif]-->
|
10
|
+
<script src="/js/flot.js" type="text/javascript"></script>
|
11
11
|
<script src="/js/admin_console.js" type="text/javascript"></script>
|
12
12
|
</head>
|
13
13
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: documentcloud-cloud-crowd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Ashkenas
|
@@ -145,6 +145,8 @@ files:
|
|
145
145
|
- lib/cloud-crowd.rb
|
146
146
|
- lib/cloud_crowd/action.rb
|
147
147
|
- lib/cloud_crowd/app.rb
|
148
|
+
- lib/cloud_crowd/asset_store/filesystem_store.rb
|
149
|
+
- lib/cloud_crowd/asset_store/s3_store.rb
|
148
150
|
- lib/cloud_crowd/asset_store.rb
|
149
151
|
- lib/cloud_crowd/command_line.rb
|
150
152
|
- lib/cloud_crowd/daemon.rb
|
@@ -175,10 +177,11 @@ files:
|
|
175
177
|
- public/images/worker_info.png
|
176
178
|
- public/images/worker_info_loading.gif
|
177
179
|
- public/js/admin_console.js
|
178
|
-
- public/js/excanvas.
|
179
|
-
- public/js/
|
180
|
-
- public/js/jquery
|
180
|
+
- public/js/excanvas.js
|
181
|
+
- public/js/flot.js
|
182
|
+
- public/js/jquery.js
|
181
183
|
- README
|
184
|
+
- test/acceptance/test_app.rb
|
182
185
|
- test/acceptance/test_failing_work_units.rb
|
183
186
|
- test/acceptance/test_word_count.rb
|
184
187
|
- test/blueprints.rb
|
@@ -187,6 +190,8 @@ files:
|
|
187
190
|
- test/config/database.yml
|
188
191
|
- test/config/actions/failure_testing.rb
|
189
192
|
- test/test_helper.rb
|
193
|
+
- test/unit/test_action.rb
|
194
|
+
- test/unit/test_configuration.rb
|
190
195
|
- test/unit/test_job.rb
|
191
196
|
- test/unit/test_work_unit.rb
|
192
197
|
- views/index.erb
|