cloud-crowd 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/EPIGRAPHS +17 -0
- data/LICENSE +22 -0
- data/README +93 -0
- data/actions/graphics_magick.rb +43 -0
- data/actions/process_pdfs.rb +92 -0
- data/actions/word_count.rb +14 -0
- data/bin/crowd +5 -0
- data/cloud-crowd.gemspec +111 -0
- data/config/config.example.ru +17 -0
- data/config/config.example.yml +48 -0
- data/config/database.example.yml +9 -0
- data/examples/graphics_magick_example.rb +44 -0
- data/examples/process_pdfs_example.rb +40 -0
- data/examples/word_count_example.rb +41 -0
- data/lib/cloud-crowd.rb +130 -0
- data/lib/cloud_crowd/action.rb +101 -0
- data/lib/cloud_crowd/app.rb +117 -0
- data/lib/cloud_crowd/asset_store.rb +41 -0
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +28 -0
- data/lib/cloud_crowd/asset_store/s3_store.rb +40 -0
- data/lib/cloud_crowd/command_line.rb +209 -0
- data/lib/cloud_crowd/daemon.rb +95 -0
- data/lib/cloud_crowd/exceptions.rb +28 -0
- data/lib/cloud_crowd/helpers.rb +8 -0
- data/lib/cloud_crowd/helpers/authorization.rb +50 -0
- data/lib/cloud_crowd/helpers/resources.rb +45 -0
- data/lib/cloud_crowd/inflector.rb +19 -0
- data/lib/cloud_crowd/models.rb +40 -0
- data/lib/cloud_crowd/models/job.rb +176 -0
- data/lib/cloud_crowd/models/work_unit.rb +89 -0
- data/lib/cloud_crowd/models/worker_record.rb +61 -0
- data/lib/cloud_crowd/runner.rb +15 -0
- data/lib/cloud_crowd/schema.rb +45 -0
- data/lib/cloud_crowd/worker.rb +186 -0
- data/public/css/admin_console.css +221 -0
- data/public/css/reset.css +42 -0
- data/public/images/bullet_green.png +0 -0
- data/public/images/bullet_white.png +0 -0
- data/public/images/cloud_hand.png +0 -0
- data/public/images/header_back.png +0 -0
- data/public/images/logo.png +0 -0
- data/public/images/queue_fill.png +0 -0
- data/public/images/server_error.png +0 -0
- data/public/images/sidebar_bottom.png +0 -0
- data/public/images/sidebar_top.png +0 -0
- data/public/images/worker_info.png +0 -0
- data/public/images/worker_info_loading.gif +0 -0
- data/public/js/admin_console.js +168 -0
- data/public/js/excanvas.js +1 -0
- data/public/js/flot.js +1 -0
- data/public/js/jquery.js +19 -0
- data/test/acceptance/test_app.rb +72 -0
- data/test/acceptance/test_failing_work_units.rb +32 -0
- data/test/acceptance/test_word_count.rb +49 -0
- data/test/blueprints.rb +17 -0
- data/test/config/actions/failure_testing.rb +13 -0
- data/test/config/config.ru +17 -0
- data/test/config/config.yml +7 -0
- data/test/config/database.yml +6 -0
- data/test/test_helper.rb +19 -0
- data/test/unit/test_action.rb +49 -0
- data/test/unit/test_configuration.rb +28 -0
- data/test/unit/test_job.rb +78 -0
- data/test/unit/test_work_unit.rb +55 -0
- data/views/index.erb +77 -0
- metadata +233 -0
data/EPIGRAPHS
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
The crowd, suddenly there where there was nothing before, is a mysterious and
|
2
|
+
universal phenomenon. A few people may have been standing together -- five, ten
|
3
|
+
or twelve, nor more; nothing has been announced, nothing is expected. Suddenly
|
4
|
+
everywhere is black with people and more come streaming from all sides as though
|
5
|
+
streets had only one direction. Most of them do not know what has happened and,
|
6
|
+
if questioned, have no answer; but they hurry to be there where most other
|
7
|
+
people are. There is a determination in their movement which is quite different
|
8
|
+
from the expression of ordinary curiosity. It seems as through the movement of
|
9
|
+
some of them transmits itself to all the others. But that is not all; they have
|
10
|
+
a goal which is there before they can find words for it. -p 16
|
11
|
+
|
12
|
+
Crowd crystals are the small, rigid groups of men, strictly delimited and of
|
13
|
+
great constancy, which serve to precipitate crowds. Their structure is such
|
14
|
+
that they can be comprehended and taken in at a glance. Their unity is more
|
15
|
+
important than their size. -p 73
|
16
|
+
|
17
|
+
From Elias Canetti's "Crowds and Power" (1962).
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2009 Jeremy Ashkenas, DocumentCloud
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
=
|
2
|
+
_ _
|
3
|
+
( ` )_
|
4
|
+
( ) `)
|
5
|
+
(_ (_ . _) _)
|
6
|
+
_
|
7
|
+
( )
|
8
|
+
_ . ( ` ) . )
|
9
|
+
( _ )_ (_, _( ,_)_)
|
10
|
+
(_ _(_ ,)
|
11
|
+
|
12
|
+
_ _ ___ _ _ ___ _
|
13
|
+
( ` )_ / __| |___ _ _ __| |/ __|_ _ _____ __ ____| |
|
14
|
+
( ) `) | (__| / _ \ || / _` | (__| '_/ _ \ V V / _` |
|
15
|
+
(_ (_ . _) _) \___|_\___/\_,_\__,_|\___|_| \___/\_/\_/\__,_|
|
16
|
+
|
17
|
+
_
|
18
|
+
( )
|
19
|
+
_, _ . ( ` ) . )
|
20
|
+
( ( _ )_ (_, _( ,_)_)
|
21
|
+
(_(_ _(_ ,)
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
~ CloudCrowd ~
|
26
|
+
|
27
|
+
* Parallel processing for the rest of us
|
28
|
+
* Write your scripts in Ruby
|
29
|
+
* Built for Amazon EC2 and S3
|
30
|
+
* split -> process -> merge
|
31
|
+
* As easy as `gem install cloud-crowd`
|
32
|
+
|
33
|
+
Well-suited for:
|
34
|
+
|
35
|
+
* Generating or resizing images.
|
36
|
+
* Encoding video.
|
37
|
+
* Running text extraction or OCR on PDFs.
|
38
|
+
* Migrating a large file set or database.
|
39
|
+
* Web scraping.
|
40
|
+
|
41
|
+
|
42
|
+
~ Documentation ~
|
43
|
+
|
44
|
+
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
45
|
+
Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
|
46
|
+
|
47
|
+
|
48
|
+
~ Getting started ~
|
49
|
+
|
50
|
+
# Install the gem.
|
51
|
+
|
52
|
+
>> sudo gem install cloud-crowd
|
53
|
+
|
54
|
+
# Install the CloudCrowd configuration files to a location of your choosing.
|
55
|
+
|
56
|
+
>> crowd install ~/config/cloud-crowd
|
57
|
+
|
58
|
+
# Now, you can use the full complement of `crowd` commands from inside of
|
59
|
+
# this configuration directory. To see the available commands:
|
60
|
+
|
61
|
+
>> crowd --help
|
62
|
+
|
63
|
+
# Edit the configuration files to your satisfaction, add AWS credentials,
|
64
|
+
# and then load the CloudCrowd schema into your configured database.
|
65
|
+
|
66
|
+
>> mate ~/config/cloud-crowd/config.yml
|
67
|
+
>> mate ~/config/cloud-crowd/database.yml
|
68
|
+
>> crowd load_schema
|
69
|
+
|
70
|
+
# Write your actions, and install them into the 'actions' subdirectory.
|
71
|
+
# CloudCrowd comes with some default actions as an example.
|
72
|
+
|
73
|
+
# To launch the central server (make sure that you include its location
|
74
|
+
# in config.yml), either:
|
75
|
+
|
76
|
+
>> crowd server
|
77
|
+
|
78
|
+
# or:
|
79
|
+
|
80
|
+
>> thin -R config.ru --servers 3 -e production start
|
81
|
+
|
82
|
+
# Any server that supports Rack should work with the rackup file.
|
83
|
+
|
84
|
+
# Then, to spin up 10 workers:
|
85
|
+
|
86
|
+
>> crowd workers start -n 10
|
87
|
+
|
88
|
+
# To spin up workers remotely, install the 'cloud-crowd' gem, and copy over
|
89
|
+
# your configuration directory.
|
90
|
+
|
91
|
+
# At this point you can visit your server console at localhost:9173 to
|
92
|
+
# view all of your workers, ready for action.
|
93
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# The GraphicsMagick action, dependent on the `gm` command, is able to perform
|
2
|
+
# any number of GraphicsMagick conversions on an image passed in as an input.
|
3
|
+
# The options hash should specify the +name+ for the particular step (which is
|
4
|
+
# appended to the resulting image filename) the +command+ (eg. convert, mogrify),
|
5
|
+
# the +options+ (to the command, eg. -shadow -blur), and the +extension+ which
|
6
|
+
# will determine the resulting image type. Optionally, you may also specify
|
7
|
+
# +input+ as the name of a previous step; doing this will use the result of
|
8
|
+
# that step as the source image, otherwise each step uses the original image
|
9
|
+
# as its source.
|
10
|
+
class GraphicsMagick < CloudCrowd::Action
|
11
|
+
|
12
|
+
# Download the initial image, and run each of the specified GraphicsMagick
|
13
|
+
# commands against it, returning the aggregate output.
|
14
|
+
def process
|
15
|
+
options['steps'].inject({}) {|h, step| h[step['name']] = run_step(step); h }
|
16
|
+
end
|
17
|
+
|
18
|
+
# Run an individual step (single GraphicsMagick command) in a shell-injection
|
19
|
+
# safe way, uploading the result to the AssetStore, and returning the public
|
20
|
+
# URL as the result.
|
21
|
+
# TODO: +system+ wasn't working, figure out some other way to escape.
|
22
|
+
def run_step(step)
|
23
|
+
cmd, opts = step['command'], step['options']
|
24
|
+
in_path, out_path = input_path_for(step), output_path_for(step)
|
25
|
+
`gm #{cmd} #{opts} #{in_path} #{out_path}`
|
26
|
+
save(out_path)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Where should the starting image be located?
|
30
|
+
# If you pass in an optional step, returns the path to that step's output
|
31
|
+
# as input for further processing.
|
32
|
+
def input_path_for(step)
|
33
|
+
in_step = step && step['input'] && options['steps'].detect {|s| s['name'] == step['input']}
|
34
|
+
return input_path unless in_step
|
35
|
+
return output_path_for(in_step)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Where should resulting images be saved locally?
|
39
|
+
def output_path_for(step)
|
40
|
+
"#{work_directory}/#{file_name}_#{step['name']}.#{step['extension']}"
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# Depends on working pdftk, gm (GraphicsMagick), and pdftotext (Poppler) commands.
|
2
|
+
# Splits a pdf into batches of N pages, creates their thumbnails and icons,
|
3
|
+
# as specified in the Job options, gets the text for every page, and merges
|
4
|
+
# it all back into a tar archive for convenient download.
|
5
|
+
#
|
6
|
+
# See <tt>examples/process_pdfs_example.rb</tt> for more information.
|
7
|
+
class ProcessPdfs < CloudCrowd::Action
|
8
|
+
|
9
|
+
# Split up a large pdf into single-page pdfs. Batch them into 'batch_size'
|
10
|
+
# chunks for processing. The double pdftk shuffle fixes the document xrefs.
|
11
|
+
def split
|
12
|
+
`pdftk #{input_path} burst output "#{file_name}_%05d.pdf_temp"`
|
13
|
+
FileUtils.rm input_path
|
14
|
+
pdfs = Dir["*.pdf_temp"]
|
15
|
+
pdfs.each {|pdf| `pdftk #{pdf} output #{File.basename(pdf, '.pdf_temp')}.pdf`}
|
16
|
+
pdfs = Dir["*.pdf"]
|
17
|
+
batch_size = options['batch_size']
|
18
|
+
batches = (pdfs.length / batch_size.to_f).ceil
|
19
|
+
batches.times do |batch_num|
|
20
|
+
tar_path = "#{sprintf('%05d', batch_num)}.tar"
|
21
|
+
batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
|
22
|
+
`tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
|
23
|
+
end
|
24
|
+
Dir["*.tar"].map {|tar| save(tar) }.to_json
|
25
|
+
end
|
26
|
+
|
27
|
+
# Convert a pdf page into different-sized thumbnails. Grab the text.
|
28
|
+
def process
|
29
|
+
`tar -xzf #{input_path}`
|
30
|
+
FileUtils.rm input_path
|
31
|
+
cmds = []
|
32
|
+
generate_images_commands(cmds)
|
33
|
+
generate_text_commands(cmds)
|
34
|
+
system cmds.join(' && ')
|
35
|
+
FileUtils.rm Dir['*.pdf']
|
36
|
+
`tar -czf #{file_name}.tar *`
|
37
|
+
save("#{file_name}.tar")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Merge all of the resulting images, all of the resulting text files, and
|
41
|
+
# the concatenated merge of the full-text into a single tar archive, ready to
|
42
|
+
# for download.
|
43
|
+
def merge
|
44
|
+
input.each do |batch_url|
|
45
|
+
batch_path = File.basename(batch_url)
|
46
|
+
download(batch_url, batch_path)
|
47
|
+
`tar -xzf #{batch_path}`
|
48
|
+
FileUtils.rm batch_path
|
49
|
+
end
|
50
|
+
|
51
|
+
names = Dir['*.txt'].map {|fn| fn.sub(/_\d+(_\w+)?\.txt\Z/, '') }.uniq
|
52
|
+
dirs = names.map {|n| ["#{n}/text/full", "#{n}/text/pages"] + options['images'].map {|i| "#{n}/images/#{i['name']}" } }.flatten
|
53
|
+
FileUtils.mkdir_p(dirs)
|
54
|
+
|
55
|
+
Dir['*.*'].each do |file|
|
56
|
+
ext = File.extname(file)
|
57
|
+
name = file.sub(/_\d+(_\w+)?#{ext}\Z/, '')
|
58
|
+
if ext == '.txt'
|
59
|
+
FileUtils.mv(file, "#{name}/text/pages/#{file}")
|
60
|
+
else
|
61
|
+
suffix = file.match(/_([^_]+)#{ext}\Z/)[1]
|
62
|
+
sans_suffix = file.sub(/_([^_]+)#{ext}\Z/, ext)
|
63
|
+
FileUtils.mv(file, "#{name}/images/#{suffix}/#{sans_suffix}")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
names.each {|n| `cat #{n}/text/pages/*.txt > #{n}/text/full/#{n}.txt` }
|
68
|
+
|
69
|
+
`tar -czf processed_pdfs.tar *`
|
70
|
+
save("processed_pdfs.tar")
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def generate_images_commands(command_list)
|
77
|
+
Dir["*.pdf"].each do |pdf|
|
78
|
+
name = File.basename(pdf, File.extname(pdf))
|
79
|
+
options['images'].each do |i|
|
80
|
+
command_list << "gm convert #{i['options']} #{pdf} #{name}_#{i['name']}.#{i['extension']}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def generate_text_commands(command_list)
|
86
|
+
Dir["*.pdf"].each do |pdf|
|
87
|
+
name = File.basename(pdf, File.extname(pdf))
|
88
|
+
command_list << "pdftotext -enc UTF-8 -layout -q #{pdf} #{name}.txt"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# A parallel WordCount. Depends on the 'wc' utility.
|
2
|
+
class WordCount < CloudCrowd::Action
|
3
|
+
|
4
|
+
# Count the words in a single book.
|
5
|
+
def process
|
6
|
+
(`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
# Sum the total word count.
|
10
|
+
def merge
|
11
|
+
input.inject(0) {|sum, count| sum + count }
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
data/bin/crowd
ADDED
data/cloud-crowd.gemspec
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'cloud-crowd'
|
3
|
+
s.version = '0.1.0' # Keep version in sync with cloud-cloud.rb
|
4
|
+
s.date = '2009-09-14'
|
5
|
+
|
6
|
+
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
7
|
+
s.summary = "Parallel Processing for the Rest of Us"
|
8
|
+
s.description = <<-EOS
|
9
|
+
The crowd, suddenly there where there was nothing before, is a mysterious and
|
10
|
+
universal phenomenon. A few people may have been standing together -- five, ten
|
11
|
+
or twelve, nor more; nothing has been announced, nothing is expected. Suddenly
|
12
|
+
everywhere is black with people and more come streaming from all sides as though
|
13
|
+
streets had only one direction.
|
14
|
+
EOS
|
15
|
+
|
16
|
+
s.authors = ['Jeremy Ashkenas']
|
17
|
+
s.email = 'jeremy@documentcloud.org'
|
18
|
+
s.rubyforge_project = 'cloud-crowd'
|
19
|
+
|
20
|
+
s.require_paths = ['lib']
|
21
|
+
s.executables = ['crowd']
|
22
|
+
|
23
|
+
s.has_rdoc = true
|
24
|
+
s.extra_rdoc_files = ['README']
|
25
|
+
s.rdoc_options << '--title' << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
|
26
|
+
'--exclude' << 'test' <<
|
27
|
+
'--main' << 'README' <<
|
28
|
+
'--all'
|
29
|
+
|
30
|
+
s.add_dependency 'sinatra', ['>= 0.9.4']
|
31
|
+
s.add_dependency 'activerecord', ['>= 2.3.3']
|
32
|
+
s.add_dependency 'json', ['>= 1.1.7']
|
33
|
+
s.add_dependency 'rest-client', ['>= 1.0.3']
|
34
|
+
s.add_dependency 'right_aws', ['>= 1.10.0']
|
35
|
+
s.add_dependency 'daemons', ['>= 1.0.10']
|
36
|
+
|
37
|
+
if s.respond_to?(:add_development_dependency)
|
38
|
+
s.add_development_dependency 'faker', ['>= 0.3.1']
|
39
|
+
s.add_development_dependency 'thoughtbot-shoulda', ['>= 2.10.2']
|
40
|
+
s.add_development_dependency 'notahat-machinist', ['>= 1.0.3']
|
41
|
+
s.add_development_dependency 'rack-test', ['>= 0.4.1']
|
42
|
+
s.add_development_dependency 'mocha', ['>= 0.9.7']
|
43
|
+
end
|
44
|
+
|
45
|
+
s.files = %w(
|
46
|
+
actions/graphics_magick.rb
|
47
|
+
actions/process_pdfs.rb
|
48
|
+
actions/word_count.rb
|
49
|
+
cloud-crowd.gemspec
|
50
|
+
config/config.example.ru
|
51
|
+
config/config.example.yml
|
52
|
+
config/database.example.yml
|
53
|
+
EPIGRAPHS
|
54
|
+
examples/graphics_magick_example.rb
|
55
|
+
examples/process_pdfs_example.rb
|
56
|
+
examples/word_count_example.rb
|
57
|
+
lib/cloud-crowd.rb
|
58
|
+
lib/cloud_crowd/action.rb
|
59
|
+
lib/cloud_crowd/app.rb
|
60
|
+
lib/cloud_crowd/asset_store/filesystem_store.rb
|
61
|
+
lib/cloud_crowd/asset_store/s3_store.rb
|
62
|
+
lib/cloud_crowd/asset_store.rb
|
63
|
+
lib/cloud_crowd/command_line.rb
|
64
|
+
lib/cloud_crowd/daemon.rb
|
65
|
+
lib/cloud_crowd/exceptions.rb
|
66
|
+
lib/cloud_crowd/helpers/authorization.rb
|
67
|
+
lib/cloud_crowd/helpers/resources.rb
|
68
|
+
lib/cloud_crowd/helpers.rb
|
69
|
+
lib/cloud_crowd/inflector.rb
|
70
|
+
lib/cloud_crowd/models/job.rb
|
71
|
+
lib/cloud_crowd/models/work_unit.rb
|
72
|
+
lib/cloud_crowd/models/worker_record.rb
|
73
|
+
lib/cloud_crowd/models.rb
|
74
|
+
lib/cloud_crowd/runner.rb
|
75
|
+
lib/cloud_crowd/schema.rb
|
76
|
+
lib/cloud_crowd/worker.rb
|
77
|
+
LICENSE
|
78
|
+
public/css/admin_console.css
|
79
|
+
public/css/reset.css
|
80
|
+
public/images/bullet_green.png
|
81
|
+
public/images/bullet_white.png
|
82
|
+
public/images/cloud_hand.png
|
83
|
+
public/images/header_back.png
|
84
|
+
public/images/logo.png
|
85
|
+
public/images/queue_fill.png
|
86
|
+
public/images/server_error.png
|
87
|
+
public/images/sidebar_bottom.png
|
88
|
+
public/images/sidebar_top.png
|
89
|
+
public/images/worker_info.png
|
90
|
+
public/images/worker_info_loading.gif
|
91
|
+
public/js/admin_console.js
|
92
|
+
public/js/excanvas.js
|
93
|
+
public/js/flot.js
|
94
|
+
public/js/jquery.js
|
95
|
+
README
|
96
|
+
test/acceptance/test_app.rb
|
97
|
+
test/acceptance/test_failing_work_units.rb
|
98
|
+
test/acceptance/test_word_count.rb
|
99
|
+
test/blueprints.rb
|
100
|
+
test/config/config.ru
|
101
|
+
test/config/config.yml
|
102
|
+
test/config/database.yml
|
103
|
+
test/config/actions/failure_testing.rb
|
104
|
+
test/test_helper.rb
|
105
|
+
test/unit/test_action.rb
|
106
|
+
test/unit/test_configuration.rb
|
107
|
+
test/unit/test_job.rb
|
108
|
+
test/unit/test_work_unit.rb
|
109
|
+
views/index.erb
|
110
|
+
)
|
111
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# This rackup script can be used to start the central CloudCrowd server
|
4
|
+
# using any Rack-compliant server handler. For example, start up three servers
|
5
|
+
# with a specified port number, using Thin:
|
6
|
+
#
|
7
|
+
# thin start -R config.ru -p 9173 --servers 3
|
8
|
+
|
9
|
+
require 'rubygems'
|
10
|
+
require 'cloud-crowd'
|
11
|
+
|
12
|
+
CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
|
13
|
+
CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
|
14
|
+
|
15
|
+
map '/' do
|
16
|
+
run CloudCrowd::App
|
17
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# The URL where you're planning on running the central server/queue/database.
|
2
|
+
:central_server: http://localhost:9173
|
3
|
+
|
4
|
+
# The storage back-end that you'd like to use for intermediate and final results
|
5
|
+
# of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
|
6
|
+
# be used in development, or on single-machine installations.
|
7
|
+
:storage: s3
|
8
|
+
|
9
|
+
# Please provide your AWS credentials for S3 storage of job output.
|
10
|
+
:aws_access_key: [your AWS access key]
|
11
|
+
:aws_secret_key: [your AWS secret access key]
|
12
|
+
|
13
|
+
# Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
|
14
|
+
# to keep all resulting files on S3 private. If so, you'll receive authenticated
|
15
|
+
# S3 URLs as job output, good for 24 hours. If left public, you'll get the
|
16
|
+
# straight URLs to the files on S3.
|
17
|
+
:s3_bucket: [your CloudCrowd bucket]
|
18
|
+
:use_s3_authentication: no
|
19
|
+
|
20
|
+
# Use HTTP Basic Auth for all requests? (Includes all internal worker requests
|
21
|
+
# to the central server). If yes, specify the login and password that all
|
22
|
+
# requests must provide for authentication.
|
23
|
+
:use_http_authentication: no
|
24
|
+
:login: [your login name]
|
25
|
+
:password: [your password]
|
26
|
+
|
27
|
+
# By default, CloudCrowd looks for installed actions inside the 'actions'
|
28
|
+
# subdirectory of this configuration folder. 'actions_path' allows you to load
|
29
|
+
# additional actions from a location of your choice.
|
30
|
+
# :actions_path: /path/to/actions
|
31
|
+
|
32
|
+
# Set the following numbers to tweak the configuration of your worker daemons.
|
33
|
+
# Optimum results will depend on proportion of the Memory/CPU/IO bottlenecks
|
34
|
+
# in your actions, the number of central servers you have running, and your
|
35
|
+
# desired balance between latency and traffic.
|
36
|
+
|
37
|
+
# The number of workers that `crowd workers start` spins up.
|
38
|
+
:num_workers: 3
|
39
|
+
|
40
|
+
# The minimum number of seconds a worker waits between checking the job queue.
|
41
|
+
:min_worker_wait: 1
|
42
|
+
|
43
|
+
# The maximum number of seconds a worker waits between checking the job queue.
|
44
|
+
:max_worker_wait: 5
|
45
|
+
|
46
|
+
# The number of separate attempts that will be made to process an individual
|
47
|
+
# work unit, before marking it as having failed.
|
48
|
+
:work_unit_retries: 3
|