documentcloud-cloud-crowd 0.0.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +12 -3
- data/cloud-crowd.gemspec +9 -4
- data/config/config.example.yml +2 -2
- data/lib/cloud-crowd.rb +1 -1
- data/lib/cloud_crowd/action.rb +9 -10
- data/lib/cloud_crowd/app.rb +1 -1
- data/lib/cloud_crowd/asset_store.rb +12 -72
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +28 -0
- data/lib/cloud_crowd/asset_store/s3_store.rb +40 -0
- data/lib/cloud_crowd/command_line.rb +1 -0
- data/lib/cloud_crowd/exceptions.rb +21 -15
- data/lib/cloud_crowd/helpers.rb +1 -1
- data/lib/cloud_crowd/inflector.rb +1 -1
- data/lib/cloud_crowd/models/job.rb +5 -5
- data/lib/cloud_crowd/worker.rb +8 -5
- data/public/js/{excanvas.pack.js → excanvas.js} +0 -0
- data/public/js/{jquery.flot.pack.js → flot.js} +0 -0
- data/public/js/{jquery-1.3.2.min.js → jquery.js} +0 -0
- data/test/acceptance/test_app.rb +72 -0
- data/test/unit/test_action.rb +49 -0
- data/test/unit/test_configuration.rb +28 -0
- data/views/index.erb +3 -3
- metadata +9 -4
data/README
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
=
|
2
2
|
_ _
|
3
3
|
( ` )_
|
4
4
|
( ) `)
|
@@ -29,11 +29,20 @@
|
|
29
29
|
* Built for Amazon EC2 and S3
|
30
30
|
* split -> process -> merge
|
31
31
|
* As easy as `gem install cloud-crowd`
|
32
|
+
|
33
|
+
Well-suited for:
|
34
|
+
|
35
|
+
* Generating or resizing images.
|
36
|
+
* Encoding video.
|
37
|
+
* Running text extraction or OCR on PDFs.
|
38
|
+
* Migrating a large file set or database.
|
39
|
+
* Web scraping.
|
32
40
|
|
33
41
|
|
34
|
-
~
|
42
|
+
~ Documentation ~
|
35
43
|
|
36
|
-
http://wiki.github.com/documentcloud/cloud-crowd
|
44
|
+
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
45
|
+
Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
|
37
46
|
|
38
47
|
|
39
48
|
~ Getting started ~
|
data/cloud-crowd.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.0
|
3
|
+
s.version = '0.1.0' # Keep version in sync with cloud-cloud.rb
|
4
4
|
s.date = '2009-09-01'
|
5
5
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
@@ -57,6 +57,8 @@ examples/word_count_example.rb
|
|
57
57
|
lib/cloud-crowd.rb
|
58
58
|
lib/cloud_crowd/action.rb
|
59
59
|
lib/cloud_crowd/app.rb
|
60
|
+
lib/cloud_crowd/asset_store/filesystem_store.rb
|
61
|
+
lib/cloud_crowd/asset_store/s3_store.rb
|
60
62
|
lib/cloud_crowd/asset_store.rb
|
61
63
|
lib/cloud_crowd/command_line.rb
|
62
64
|
lib/cloud_crowd/daemon.rb
|
@@ -87,10 +89,11 @@ public/images/sidebar_top.png
|
|
87
89
|
public/images/worker_info.png
|
88
90
|
public/images/worker_info_loading.gif
|
89
91
|
public/js/admin_console.js
|
90
|
-
public/js/excanvas.
|
91
|
-
public/js/
|
92
|
-
public/js/jquery
|
92
|
+
public/js/excanvas.js
|
93
|
+
public/js/flot.js
|
94
|
+
public/js/jquery.js
|
93
95
|
README
|
96
|
+
test/acceptance/test_app.rb
|
94
97
|
test/acceptance/test_failing_work_units.rb
|
95
98
|
test/acceptance/test_word_count.rb
|
96
99
|
test/blueprints.rb
|
@@ -99,6 +102,8 @@ test/config/config.yml
|
|
99
102
|
test/config/database.yml
|
100
103
|
test/config/actions/failure_testing.rb
|
101
104
|
test/test_helper.rb
|
105
|
+
test/unit/test_action.rb
|
106
|
+
test/unit/test_configuration.rb
|
102
107
|
test/unit/test_job.rb
|
103
108
|
test/unit/test_work_unit.rb
|
104
109
|
views/index.erb
|
data/config/config.example.yml
CHANGED
@@ -35,13 +35,13 @@
|
|
35
35
|
# desired balance between latency and traffic.
|
36
36
|
|
37
37
|
# The number of workers that `crowd workers start` spins up.
|
38
|
-
:num_workers:
|
38
|
+
:num_workers: 3
|
39
39
|
|
40
40
|
# The minimum number of seconds a worker waits between checking the job queue.
|
41
41
|
:min_worker_wait: 1
|
42
42
|
|
43
43
|
# The maximum number of seconds a worker waits between checking the job queue.
|
44
|
-
:max_worker_wait:
|
44
|
+
:max_worker_wait: 5
|
45
45
|
|
46
46
|
# The number of separate attempts that will be made to process an individual
|
47
47
|
# work unit, before marking it as having failed.
|
data/lib/cloud-crowd.rb
CHANGED
@@ -45,7 +45,7 @@ module CloudCrowd
|
|
45
45
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
46
46
|
|
47
47
|
# Keep the version in sync with the gemspec.
|
48
|
-
VERSION = '0.0
|
48
|
+
VERSION = '0.1.0'
|
49
49
|
|
50
50
|
# A Job is processing if its WorkUnits in the queue to be handled by workers.
|
51
51
|
PROCESSING = 1
|
data/lib/cloud_crowd/action.rb
CHANGED
@@ -28,7 +28,6 @@ module CloudCrowd
|
|
28
28
|
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
29
29
|
@work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
|
30
30
|
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
31
|
-
Dir.chdir @work_directory
|
32
31
|
status == MERGING ? parse_input : download_input
|
33
32
|
end
|
34
33
|
|
@@ -53,13 +52,11 @@ module CloudCrowd
|
|
53
52
|
def save(file_path)
|
54
53
|
save_path = File.join(storage_prefix, File.basename(file_path))
|
55
54
|
@store.save(file_path, save_path)
|
56
|
-
return @store.url(save_path)
|
57
55
|
end
|
58
56
|
|
59
57
|
# After the Action has finished, we remove the work directory and return
|
60
58
|
# to the root directory (where daemons run by default).
|
61
59
|
def cleanup_work_directory
|
62
|
-
Dir.chdir '/'
|
63
60
|
FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
|
64
61
|
end
|
65
62
|
|
@@ -68,8 +65,8 @@ module CloudCrowd
|
|
68
65
|
|
69
66
|
# Convert an unsafe URL into a filesystem-friendly filename.
|
70
67
|
def safe_filename(url)
|
71
|
-
ext
|
72
|
-
name = File.basename(url)
|
68
|
+
ext = File.extname(url)
|
69
|
+
name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-')
|
73
70
|
File.basename(name, ext).gsub('.', '-') + ext
|
74
71
|
end
|
75
72
|
|
@@ -90,11 +87,13 @@ module CloudCrowd
|
|
90
87
|
|
91
88
|
# If the input is a URL, download the file before beginning processing.
|
92
89
|
def download_input
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
90
|
+
Dir.chdir(@work_directory) do
|
91
|
+
input_is_url = !!URI.parse(@input) rescue false
|
92
|
+
return unless input_is_url
|
93
|
+
@input_path = File.join(@work_directory, safe_filename(@input))
|
94
|
+
@file_name = File.basename(@input_path, File.extname(@input_path))
|
95
|
+
download(@input, @input_path)
|
96
|
+
end
|
98
97
|
end
|
99
98
|
|
100
99
|
end
|
data/lib/cloud_crowd/app.rb
CHANGED
@@ -10,19 +10,24 @@ module CloudCrowd
|
|
10
10
|
# and +save+ methods use it behind the scenes.
|
11
11
|
class AssetStore
|
12
12
|
|
13
|
+
autoload :S3Store, 'cloud_crowd/asset_store/s3_store'
|
14
|
+
autoload :FilesystemStore, 'cloud_crowd/asset_store/filesystem_store'
|
15
|
+
|
13
16
|
LOCAL_STORAGE_PATH = '/tmp/cloud_crowd_storage'
|
14
17
|
|
15
|
-
#
|
18
|
+
# Configure the AssetStore with the specific storage implementation
|
16
19
|
# specified by 'storage' in <tt>config.yml</tt>.
|
20
|
+
case CloudCrowd.config[:storage]
|
21
|
+
when 's3' then include S3Store
|
22
|
+
when 'filesystem' then include FilesystemStore
|
23
|
+
else raise Error::StorageNotFound, "#{CloudCrowd.config[:storage]} is not a valid storage back end"
|
24
|
+
end
|
25
|
+
|
26
|
+
# Creating the AssetStore ensures that its scratch directory exists.
|
17
27
|
def initialize
|
18
28
|
@use_auth = CloudCrowd.config[:use_s3_authentication]
|
19
|
-
@storage = CloudCrowd.config[:storage]
|
20
29
|
FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
|
21
|
-
|
22
|
-
when 's3' then extend S3Store
|
23
|
-
when 'filesystem' then extend FilesystemStore
|
24
|
-
else raise StorageNotFound, "#{@storage} is not a valid storage back end"
|
25
|
-
end
|
30
|
+
raise Error::StorageNotWritable, "#{temp_storage_path} is not writable" unless File.writable?(temp_storage_path)
|
26
31
|
end
|
27
32
|
|
28
33
|
# Get the path to CloudCrowd's temporary local storage. All actions run
|
@@ -30,71 +35,6 @@ module CloudCrowd
|
|
30
35
|
def temp_storage_path
|
31
36
|
"#{Dir.tmpdir}/cloud_crowd_tmp"
|
32
37
|
end
|
33
|
-
|
34
|
-
|
35
|
-
# The S3Store is an implementation of an AssetStore that uses a bucket
|
36
|
-
# on S3 for all resulting files.
|
37
|
-
module S3Store
|
38
|
-
|
39
|
-
# Save a finished file from local storage to S3. Save it publicly unless
|
40
|
-
# we're configured to use S3 authentication.
|
41
|
-
def save(local_path, save_path)
|
42
|
-
ensure_s3_connection
|
43
|
-
permission = @use_auth ? 'private' : 'public-read'
|
44
|
-
@bucket.put(save_path, File.open(local_path), {}, permission)
|
45
|
-
end
|
46
|
-
|
47
|
-
# Return the S3 public URL for a finshed file. Authenticated links expire
|
48
|
-
# after one day by default.
|
49
|
-
def url(save_path)
|
50
|
-
@use_auth ? @s3.interface.get_link(@bucket, save_path) :
|
51
|
-
@bucket.key(save_path).public_link
|
52
|
-
end
|
53
|
-
|
54
|
-
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
55
|
-
def cleanup_job(job)
|
56
|
-
ensure_s3_connection
|
57
|
-
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
58
|
-
end
|
59
|
-
|
60
|
-
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
61
|
-
# Ensure we have a persistent S3 connection after first use.
|
62
|
-
def ensure_s3_connection
|
63
|
-
unless @s3 && @bucket
|
64
|
-
params = {:port => 80, :protocol => 'http'}
|
65
|
-
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
66
|
-
@bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
# The FilesystemStore is an implementation of the AssetStore, good only for
|
73
|
-
# use in development, testing, or if you're only running a single-machine
|
74
|
-
# installation.
|
75
|
-
module FilesystemStore
|
76
|
-
|
77
|
-
# Save a file to somewhere semi-persistent on the filesystem. Can be used
|
78
|
-
# in development, when offline, or if you happen to have a single-machine
|
79
|
-
# CloudCrowd installation. To use, configure :local_storage.
|
80
|
-
def save(local_path, save_path)
|
81
|
-
save_path = File.join(LOCAL_STORAGE_PATH, save_path)
|
82
|
-
save_dir = File.dirname(save_path)
|
83
|
-
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
84
|
-
FileUtils.cp(local_path, save_path)
|
85
|
-
end
|
86
|
-
|
87
|
-
# Return the URL for a file saved to the local filesystem.
|
88
|
-
def url(save_path)
|
89
|
-
"file://#{File.expand_path(File.join(LOCAL_STORAGE_PATH, save_path))}"
|
90
|
-
end
|
91
|
-
|
92
|
-
# Remove all of a Job's result files from the filesystem.
|
93
|
-
def cleanup_job(job)
|
94
|
-
path = "#{LOCAL_STORAGE_PATH}/#{job.action}/job_#{job.id}"
|
95
|
-
FileUtils.rm_r(path) if File.exists?(path)
|
96
|
-
end
|
97
|
-
end
|
98
38
|
|
99
39
|
end
|
100
40
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
class AssetStore
|
3
|
+
|
4
|
+
# The FilesystemStore is an implementation of the AssetStore, good only for
|
5
|
+
# use in development, testing, or if you're only running a single-machine
|
6
|
+
# installation.
|
7
|
+
module FilesystemStore
|
8
|
+
|
9
|
+
# Save a file to somewhere semi-persistent on the filesystem. Can be used
|
10
|
+
# in development, when offline, or if you happen to have a single-machine
|
11
|
+
# CloudCrowd installation. To use, configure <tt>:storage => 'filesystem'</tt>.
|
12
|
+
def save(local_path, save_path)
|
13
|
+
save_path = File.join(LOCAL_STORAGE_PATH, save_path)
|
14
|
+
save_dir = File.dirname(save_path)
|
15
|
+
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
16
|
+
FileUtils.cp(local_path, save_path)
|
17
|
+
"file://#{File.expand_path(save_path)}"
|
18
|
+
end
|
19
|
+
|
20
|
+
# Remove all of a Job's result files from the filesystem.
|
21
|
+
def cleanup(job)
|
22
|
+
path = "#{LOCAL_STORAGE_PATH}/#{job.action}/job_#{job.id}"
|
23
|
+
FileUtils.rm_r(path) if File.exists?(path)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
class AssetStore
|
3
|
+
|
4
|
+
# The S3Store is an implementation of an AssetStore that uses a bucket
|
5
|
+
# on S3 for all resulting files.
|
6
|
+
module S3Store
|
7
|
+
|
8
|
+
# Save a finished file from local storage to S3. Save it publicly unless
|
9
|
+
# we're configured to use S3 authentication. Authenticated links expire
|
10
|
+
# after one day by default.
|
11
|
+
def save(local_path, save_path)
|
12
|
+
ensure_s3_connection
|
13
|
+
if @use_auth
|
14
|
+
@bucket.put(save_path, File.open(local_path), {}, 'private')
|
15
|
+
@s3.interface.get_link(@bucket, save_path)
|
16
|
+
else
|
17
|
+
@bucket.put(save_path, File.open(local_path), {}, 'public-read')
|
18
|
+
@bucket.key(save_path).public_link
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
23
|
+
def cleanup(job)
|
24
|
+
ensure_s3_connection
|
25
|
+
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
26
|
+
end
|
27
|
+
|
28
|
+
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
29
|
+
# Ensure we have a persistent S3 connection after first use.
|
30
|
+
def ensure_s3_connection
|
31
|
+
unless @s3 && @bucket
|
32
|
+
params = {:port => 80, :protocol => 'http'}
|
33
|
+
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
34
|
+
@bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
@@ -1,22 +1,28 @@
|
|
1
1
|
module CloudCrowd
|
2
2
|
|
3
3
|
# Base Error class which all custom CloudCrowd exceptions inherit from.
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
# StorageNotFound is raised when config.yml specifies a storage back end that
|
13
|
-
# doesn't exist.
|
14
|
-
class StorageNotFound < Error #:nodoc:
|
15
|
-
end
|
4
|
+
# Rescuing CloudCrowd::Error (or RuntimeError) will get all custom exceptions.
|
5
|
+
class Error < RuntimeError
|
6
|
+
|
7
|
+
# ActionNotFound is raised when a job is created for an action that doesn't
|
8
|
+
# exist.
|
9
|
+
class ActionNotFound < Error
|
10
|
+
end
|
16
11
|
|
17
|
-
|
18
|
-
|
19
|
-
|
12
|
+
# StorageNotFound is raised when config.yml specifies a storage back end that
|
13
|
+
# doesn't exist.
|
14
|
+
class StorageNotFound < Error
|
15
|
+
end
|
16
|
+
|
17
|
+
# If the AssetStore can't write to its scratch directory.
|
18
|
+
class StorageNotWritable < Error
|
19
|
+
end
|
20
|
+
|
21
|
+
# StatusUnspecified is raised when a WorkUnit returns without a valid
|
22
|
+
# status code.
|
23
|
+
class StatusUnspecified < Error
|
24
|
+
end
|
25
|
+
|
20
26
|
end
|
21
27
|
|
22
28
|
end
|
data/lib/cloud_crowd/helpers.rb
CHANGED
@@ -13,7 +13,7 @@ module CloudCrowd
|
|
13
13
|
|
14
14
|
before_validation_on_create :set_initial_status
|
15
15
|
after_create :queue_for_workers
|
16
|
-
before_destroy :
|
16
|
+
before_destroy :cleanup_assets
|
17
17
|
|
18
18
|
# Create a Job from an incoming JSON or XML request, and add it to the queue.
|
19
19
|
# TODO: Think about XML support.
|
@@ -61,9 +61,9 @@ module CloudCrowd
|
|
61
61
|
end
|
62
62
|
|
63
63
|
# Cleaning up after a job will remove all of its files from S3. Destroying
|
64
|
-
# a Job calls
|
65
|
-
def
|
66
|
-
AssetStore.new.
|
64
|
+
# a Job calls cleanup_assets first.
|
65
|
+
def cleanup_assets
|
66
|
+
AssetStore.new.cleanup(self)
|
67
67
|
end
|
68
68
|
|
69
69
|
# Have all of the WorkUnits finished?
|
@@ -94,7 +94,7 @@ module CloudCrowd
|
|
94
94
|
def action_class
|
95
95
|
klass = CloudCrowd.actions[self.action]
|
96
96
|
return klass if klass
|
97
|
-
raise ActionNotFound, "no action named: '#{self.action}' could be found"
|
97
|
+
raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
|
98
98
|
end
|
99
99
|
|
100
100
|
# How complete is this Job?
|
data/lib/cloud_crowd/worker.rb
CHANGED
@@ -113,12 +113,15 @@ module CloudCrowd
|
|
113
113
|
# Executes the current work unit, catching all exceptions as failures.
|
114
114
|
def run_work_unit
|
115
115
|
begin
|
116
|
+
result = nil
|
116
117
|
@action = CloudCrowd.actions[@action_name].new(@status, @input, @options, @store)
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
118
|
+
Dir.chdir(@action.work_directory) do
|
119
|
+
result = case @status
|
120
|
+
when PROCESSING then @action.process
|
121
|
+
when SPLITTING then @action.split
|
122
|
+
when MERGING then @action.merge
|
123
|
+
else raise Error::StatusUnspecified, "work units must specify their status"
|
124
|
+
end
|
122
125
|
end
|
123
126
|
complete_work_unit({'output' => result}.to_json)
|
124
127
|
rescue Exception => e
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class AppTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Rack::Test::Methods
|
6
|
+
|
7
|
+
def app
|
8
|
+
CloudCrowd::App
|
9
|
+
end
|
10
|
+
|
11
|
+
context "The CloudCrowd::App (Sinatra)" do
|
12
|
+
|
13
|
+
setup do
|
14
|
+
CloudCrowd::Job.destroy_all
|
15
|
+
2.times { CloudCrowd::Job.make }
|
16
|
+
end
|
17
|
+
|
18
|
+
should "be able to render the Operations Center (GET /)" do
|
19
|
+
get '/'
|
20
|
+
assert last_response.body.include? '<div id="workers">'
|
21
|
+
assert last_response.body.include? '<div id="graphs">'
|
22
|
+
end
|
23
|
+
|
24
|
+
should "be able to get the current status for all jobs (GET /status)" do
|
25
|
+
resp = JSON.parse(get('/status').body)
|
26
|
+
assert resp['jobs'].length == 2
|
27
|
+
assert resp['jobs'][0]['status'] == 'processing'
|
28
|
+
assert resp['jobs'][0]['percent_complete'] == 0
|
29
|
+
assert resp['work_unit_count'] == 2
|
30
|
+
end
|
31
|
+
|
32
|
+
should "be able to check in a worker daemon, and then check out a work unit" do
|
33
|
+
put '/worker', :name => '101@localhost', :thread_status => 'sleeping'
|
34
|
+
assert last_response.successful? && last_response.empty?
|
35
|
+
post '/work', :worker_name => '101@localhost', :worker_actions => 'graphics_magick'
|
36
|
+
checked_out = JSON.parse(last_response.body)
|
37
|
+
assert checked_out['action'] == 'graphics_magick'
|
38
|
+
assert checked_out['attempts'] == 0
|
39
|
+
assert checked_out['status'] == CloudCrowd::PROCESSING
|
40
|
+
status_check = JSON.parse(get('/worker/101@localhost').body)
|
41
|
+
assert checked_out == status_check
|
42
|
+
end
|
43
|
+
|
44
|
+
should "have a heartbeat" do
|
45
|
+
assert get('/heartbeat').body == 'buh-bump'
|
46
|
+
end
|
47
|
+
|
48
|
+
should "be able to create a job" do
|
49
|
+
post('/jobs', :job => '{"action":"graphics_magick","inputs":["http://www.google.com/"]}')
|
50
|
+
assert last_response.ok?
|
51
|
+
job_info = JSON.parse(last_response.body)
|
52
|
+
assert job_info['percent_complete'] == 0
|
53
|
+
assert job_info['work_units'] == 1
|
54
|
+
assert CloudCrowd::Job.last.id == job_info['id']
|
55
|
+
end
|
56
|
+
|
57
|
+
should "be able to check in on the status of a job" do
|
58
|
+
get("/jobs/#{CloudCrowd::Job.last.id}")
|
59
|
+
assert last_response.ok?
|
60
|
+
assert JSON.parse(last_response.body)['percent_complete'] == 0
|
61
|
+
end
|
62
|
+
|
63
|
+
should "be able to clean up a job when we're done with it" do
|
64
|
+
id = CloudCrowd::Job.last.id
|
65
|
+
delete("/jobs/#{id}")
|
66
|
+
assert last_response.successful? && last_response.empty?
|
67
|
+
assert !CloudCrowd::Job.find_by_id(id)
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class CloudCrowd::Action
|
4
|
+
public :safe_filename
|
5
|
+
end
|
6
|
+
|
7
|
+
class EmptyAction < CloudCrowd::Action
|
8
|
+
end
|
9
|
+
|
10
|
+
class ActionTest < Test::Unit::TestCase
|
11
|
+
|
12
|
+
context "A CloudCrowd Job" do
|
13
|
+
|
14
|
+
setup do
|
15
|
+
@store = CloudCrowd::AssetStore.new
|
16
|
+
@args = [CloudCrowd::PROCESSING, 'file://' + File.expand_path(__FILE__), {'job_id' => 1, 'work_unit_id' => 1}, @store]
|
17
|
+
@action = CloudCrowd.actions['word_count'].new(*@args)
|
18
|
+
end
|
19
|
+
|
20
|
+
should "throw an exception if the 'process' method isn't implemented" do
|
21
|
+
assert_raise(NotImplementedError) { EmptyAction.new(*@args).process }
|
22
|
+
end
|
23
|
+
|
24
|
+
should "have downloaded the input URL to local storage" do
|
25
|
+
assert @action.input_path
|
26
|
+
assert File.read(@action.input_path) == File.read(File.expand_path(__FILE__))
|
27
|
+
end
|
28
|
+
|
29
|
+
should "be able to save (to the filesystem while testing)" do
|
30
|
+
assert @action.save(@action.input_path) == "file://#{CloudCrowd::AssetStore::LOCAL_STORAGE_PATH}/word_count/job_1/unit_1/test_action.rb"
|
31
|
+
end
|
32
|
+
|
33
|
+
should "be able to clean up after itself" do
|
34
|
+
@action.cleanup_work_directory
|
35
|
+
assert !File.exists?(@action.work_directory)
|
36
|
+
end
|
37
|
+
|
38
|
+
should "be able to generate a safe filename for a URL to write to disk" do
|
39
|
+
name = @action.safe_filename("http://example.com/Some%20(Crazy'Kinda%7E)'Filename.txt")
|
40
|
+
assert name == 'Some-Crazy-Kinda-Filename.txt'
|
41
|
+
end
|
42
|
+
|
43
|
+
should "be able to count the number of words in this file" do
|
44
|
+
assert @action.process == 149
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class ConfigurationTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "CloudCrowd Configuration" do
|
6
|
+
|
7
|
+
should "have read in config.yml" do
|
8
|
+
assert CloudCrowd.config[:num_workers] == 4
|
9
|
+
assert CloudCrowd.config[:storage] == 'filesystem'
|
10
|
+
end
|
11
|
+
|
12
|
+
should "allow config.yml to configure the implementation of AssetStore" do
|
13
|
+
assert CloudCrowd::AssetStore.ancestors.include?(CloudCrowd::AssetStore::FilesystemStore)
|
14
|
+
end
|
15
|
+
|
16
|
+
should "have properly configured the ActiveRecord database" do
|
17
|
+
assert ActiveRecord::Base.connection.active?
|
18
|
+
end
|
19
|
+
|
20
|
+
should "have loaded in the default set of actions" do
|
21
|
+
assert CloudCrowd.actions['word_count'] == WordCount
|
22
|
+
assert CloudCrowd.actions['process_pdfs'] == ProcessPdfs
|
23
|
+
assert CloudCrowd.actions['graphics_magick'] == GraphicsMagick
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
data/views/index.erb
CHANGED
@@ -5,9 +5,9 @@
|
|
5
5
|
<title>Operations Center | CloudCrowd</title>
|
6
6
|
<link href="/css/reset.css" media="screen" rel="stylesheet" type="text/css" />
|
7
7
|
<link href="/css/admin_console.css" media="screen" rel="stylesheet" type="text/css" />
|
8
|
-
<script src="/js/jquery
|
9
|
-
<!--[if IE]><script src="/js/excanvas.
|
10
|
-
<script src="/js/
|
8
|
+
<script src="/js/jquery.js" type="text/javascript"></script>
|
9
|
+
<!--[if IE]><script src="/js/excanvas.js" type="text/javascript"></script><![endif]-->
|
10
|
+
<script src="/js/flot.js" type="text/javascript"></script>
|
11
11
|
<script src="/js/admin_console.js" type="text/javascript"></script>
|
12
12
|
</head>
|
13
13
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: documentcloud-cloud-crowd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Ashkenas
|
@@ -145,6 +145,8 @@ files:
|
|
145
145
|
- lib/cloud-crowd.rb
|
146
146
|
- lib/cloud_crowd/action.rb
|
147
147
|
- lib/cloud_crowd/app.rb
|
148
|
+
- lib/cloud_crowd/asset_store/filesystem_store.rb
|
149
|
+
- lib/cloud_crowd/asset_store/s3_store.rb
|
148
150
|
- lib/cloud_crowd/asset_store.rb
|
149
151
|
- lib/cloud_crowd/command_line.rb
|
150
152
|
- lib/cloud_crowd/daemon.rb
|
@@ -175,10 +177,11 @@ files:
|
|
175
177
|
- public/images/worker_info.png
|
176
178
|
- public/images/worker_info_loading.gif
|
177
179
|
- public/js/admin_console.js
|
178
|
-
- public/js/excanvas.
|
179
|
-
- public/js/
|
180
|
-
- public/js/jquery
|
180
|
+
- public/js/excanvas.js
|
181
|
+
- public/js/flot.js
|
182
|
+
- public/js/jquery.js
|
181
183
|
- README
|
184
|
+
- test/acceptance/test_app.rb
|
182
185
|
- test/acceptance/test_failing_work_units.rb
|
183
186
|
- test/acceptance/test_word_count.rb
|
184
187
|
- test/blueprints.rb
|
@@ -187,6 +190,8 @@ files:
|
|
187
190
|
- test/config/database.yml
|
188
191
|
- test/config/actions/failure_testing.rb
|
189
192
|
- test/test_helper.rb
|
193
|
+
- test/unit/test_action.rb
|
194
|
+
- test/unit/test_configuration.rb
|
190
195
|
- test/unit/test_job.rb
|
191
196
|
- test/unit/test_work_unit.rb
|
192
197
|
- views/index.erb
|