cloud-crowd 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/EPIGRAPHS +17 -0
- data/LICENSE +22 -0
- data/README +93 -0
- data/actions/graphics_magick.rb +43 -0
- data/actions/process_pdfs.rb +92 -0
- data/actions/word_count.rb +14 -0
- data/bin/crowd +5 -0
- data/cloud-crowd.gemspec +111 -0
- data/config/config.example.ru +17 -0
- data/config/config.example.yml +48 -0
- data/config/database.example.yml +9 -0
- data/examples/graphics_magick_example.rb +44 -0
- data/examples/process_pdfs_example.rb +40 -0
- data/examples/word_count_example.rb +41 -0
- data/lib/cloud-crowd.rb +130 -0
- data/lib/cloud_crowd/action.rb +101 -0
- data/lib/cloud_crowd/app.rb +117 -0
- data/lib/cloud_crowd/asset_store.rb +41 -0
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +28 -0
- data/lib/cloud_crowd/asset_store/s3_store.rb +40 -0
- data/lib/cloud_crowd/command_line.rb +209 -0
- data/lib/cloud_crowd/daemon.rb +95 -0
- data/lib/cloud_crowd/exceptions.rb +28 -0
- data/lib/cloud_crowd/helpers.rb +8 -0
- data/lib/cloud_crowd/helpers/authorization.rb +50 -0
- data/lib/cloud_crowd/helpers/resources.rb +45 -0
- data/lib/cloud_crowd/inflector.rb +19 -0
- data/lib/cloud_crowd/models.rb +40 -0
- data/lib/cloud_crowd/models/job.rb +176 -0
- data/lib/cloud_crowd/models/work_unit.rb +89 -0
- data/lib/cloud_crowd/models/worker_record.rb +61 -0
- data/lib/cloud_crowd/runner.rb +15 -0
- data/lib/cloud_crowd/schema.rb +45 -0
- data/lib/cloud_crowd/worker.rb +186 -0
- data/public/css/admin_console.css +221 -0
- data/public/css/reset.css +42 -0
- data/public/images/bullet_green.png +0 -0
- data/public/images/bullet_white.png +0 -0
- data/public/images/cloud_hand.png +0 -0
- data/public/images/header_back.png +0 -0
- data/public/images/logo.png +0 -0
- data/public/images/queue_fill.png +0 -0
- data/public/images/server_error.png +0 -0
- data/public/images/sidebar_bottom.png +0 -0
- data/public/images/sidebar_top.png +0 -0
- data/public/images/worker_info.png +0 -0
- data/public/images/worker_info_loading.gif +0 -0
- data/public/js/admin_console.js +168 -0
- data/public/js/excanvas.js +1 -0
- data/public/js/flot.js +1 -0
- data/public/js/jquery.js +19 -0
- data/test/acceptance/test_app.rb +72 -0
- data/test/acceptance/test_failing_work_units.rb +32 -0
- data/test/acceptance/test_word_count.rb +49 -0
- data/test/blueprints.rb +17 -0
- data/test/config/actions/failure_testing.rb +13 -0
- data/test/config/config.ru +17 -0
- data/test/config/config.yml +7 -0
- data/test/config/database.yml +6 -0
- data/test/test_helper.rb +19 -0
- data/test/unit/test_action.rb +49 -0
- data/test/unit/test_configuration.rb +28 -0
- data/test/unit/test_job.rb +78 -0
- data/test/unit/test_work_unit.rb +55 -0
- data/views/index.erb +77 -0
- metadata +233 -0
data/EPIGRAPHS
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
The crowd, suddenly there where there was nothing before, is a mysterious and
|
2
|
+
universal phenomenon. A few people may have been standing together -- five, ten
|
3
|
+
or twelve, nor more; nothing has been announced, nothing is expected. Suddenly
|
4
|
+
everywhere is black with people and more come streaming from all sides as though
|
5
|
+
streets had only one direction. Most of them do not know what has happened and,
|
6
|
+
if questioned, have no answer; but they hurry to be there where most other
|
7
|
+
people are. There is a determination in their movement which is quite different
|
8
|
+
from the expression of ordinary curiosity. It seems as through the movement of
|
9
|
+
some of them transmits itself to all the others. But that is not all; they have
|
10
|
+
a goal which is there before they can find words for it. -p 16
|
11
|
+
|
12
|
+
Crowd crystals are the small, rigid groups of men, strictly delimited and of
|
13
|
+
great constancy, which serve to precipitate crowds. Their structure is such
|
14
|
+
that they can be comprehended and taken in at a glance. Their unity is more
|
15
|
+
important than their size. -p 73
|
16
|
+
|
17
|
+
From Elias Canetti's "Crowds and Power" (1962).
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2009 Jeremy Ashkenas, DocumentCloud
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
=
|
2
|
+
_ _
|
3
|
+
( ` )_
|
4
|
+
( ) `)
|
5
|
+
(_ (_ . _) _)
|
6
|
+
_
|
7
|
+
( )
|
8
|
+
_ . ( ` ) . )
|
9
|
+
( _ )_ (_, _( ,_)_)
|
10
|
+
(_ _(_ ,)
|
11
|
+
|
12
|
+
_ _ ___ _ _ ___ _
|
13
|
+
( ` )_ / __| |___ _ _ __| |/ __|_ _ _____ __ ____| |
|
14
|
+
( ) `) | (__| / _ \ || / _` | (__| '_/ _ \ V V / _` |
|
15
|
+
(_ (_ . _) _) \___|_\___/\_,_\__,_|\___|_| \___/\_/\_/\__,_|
|
16
|
+
|
17
|
+
_
|
18
|
+
( )
|
19
|
+
_, _ . ( ` ) . )
|
20
|
+
( ( _ )_ (_, _( ,_)_)
|
21
|
+
(_(_ _(_ ,)
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
~ CloudCrowd ~
|
26
|
+
|
27
|
+
* Parallel processing for the rest of us
|
28
|
+
* Write your scripts in Ruby
|
29
|
+
* Built for Amazon EC2 and S3
|
30
|
+
* split -> process -> merge
|
31
|
+
* As easy as `gem install cloud-crowd`
|
32
|
+
|
33
|
+
Well-suited for:
|
34
|
+
|
35
|
+
* Generating or resizing images.
|
36
|
+
* Encoding video.
|
37
|
+
* Running text extraction or OCR on PDFs.
|
38
|
+
* Migrating a large file set or database.
|
39
|
+
* Web scraping.
|
40
|
+
|
41
|
+
|
42
|
+
~ Documentation ~
|
43
|
+
|
44
|
+
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
45
|
+
Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
|
46
|
+
|
47
|
+
|
48
|
+
~ Getting started ~
|
49
|
+
|
50
|
+
# Install the gem.
|
51
|
+
|
52
|
+
>> sudo gem install cloud-crowd
|
53
|
+
|
54
|
+
# Install the CloudCrowd configuration files to a location of your choosing.
|
55
|
+
|
56
|
+
>> crowd install ~/config/cloud-crowd
|
57
|
+
|
58
|
+
# Now, you can use the full complement of `crowd` commands from inside of
|
59
|
+
# this configuration directory. To see the available commands:
|
60
|
+
|
61
|
+
>> crowd --help
|
62
|
+
|
63
|
+
# Edit the configuration files to your satisfaction, add AWS credentials,
|
64
|
+
# and then load the CloudCrowd schema into your configured database.
|
65
|
+
|
66
|
+
>> mate ~/config/cloud-crowd/config.yml
|
67
|
+
>> mate ~/config/cloud-crowd/database.yml
|
68
|
+
>> crowd load_schema
|
69
|
+
|
70
|
+
# Write your actions, and install them into the 'actions' subdirectory.
|
71
|
+
# CloudCrowd comes with some default actions as an example.
|
72
|
+
|
73
|
+
# To launch the central server (make sure that you include its location
|
74
|
+
# in config.yml), either:
|
75
|
+
|
76
|
+
>> crowd server
|
77
|
+
|
78
|
+
# or:
|
79
|
+
|
80
|
+
>> thin -R config.ru --servers 3 -e production start
|
81
|
+
|
82
|
+
# Any server that supports Rack should work with the rackup file.
|
83
|
+
|
84
|
+
# Then, to spin up 10 workers:
|
85
|
+
|
86
|
+
>> crowd workers start -n 10
|
87
|
+
|
88
|
+
# To spin up workers remotely, install the 'cloud-crowd' gem, and copy over
|
89
|
+
# your configuration directory.
|
90
|
+
|
91
|
+
# At this point you can visit your server console at localhost:9173 to
|
92
|
+
# view all of your workers, ready for action.
|
93
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# The GraphicsMagick action, dependent on the `gm` command, is able to perform
|
2
|
+
# any number of GraphicsMagick conversions on an image passed in as an input.
|
3
|
+
# The options hash should specify the +name+ for the particular step (which is
|
4
|
+
# appended to the resulting image filename) the +command+ (eg. convert, mogrify),
|
5
|
+
# the +options+ (to the command, eg. -shadow -blur), and the +extension+ which
|
6
|
+
# will determine the resulting image type. Optionally, you may also specify
|
7
|
+
# +input+ as the name of a previous step; doing this will use the result of
|
8
|
+
# that step as the source image, otherwise each step uses the original image
|
9
|
+
# as its source.
|
10
|
+
class GraphicsMagick < CloudCrowd::Action
|
11
|
+
|
12
|
+
# Download the initial image, and run each of the specified GraphicsMagick
|
13
|
+
# commands against it, returning the aggregate output.
|
14
|
+
def process
|
15
|
+
options['steps'].inject({}) {|h, step| h[step['name']] = run_step(step); h }
|
16
|
+
end
|
17
|
+
|
18
|
+
# Run an individual step (single GraphicsMagick command) in a shell-injection
|
19
|
+
# safe way, uploading the result to the AssetStore, and returning the public
|
20
|
+
# URL as the result.
|
21
|
+
# TODO: +system+ wasn't working, figure out some other way to escape.
|
22
|
+
def run_step(step)
|
23
|
+
cmd, opts = step['command'], step['options']
|
24
|
+
in_path, out_path = input_path_for(step), output_path_for(step)
|
25
|
+
`gm #{cmd} #{opts} #{in_path} #{out_path}`
|
26
|
+
save(out_path)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Where should the starting image be located?
|
30
|
+
# If you pass in an optional step, returns the path to that step's output
|
31
|
+
# as input for further processing.
|
32
|
+
def input_path_for(step)
|
33
|
+
in_step = step && step['input'] && options['steps'].detect {|s| s['name'] == step['input']}
|
34
|
+
return input_path unless in_step
|
35
|
+
return output_path_for(in_step)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Where should resulting images be saved locally?
|
39
|
+
def output_path_for(step)
|
40
|
+
"#{work_directory}/#{file_name}_#{step['name']}.#{step['extension']}"
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# Depends on working pdftk, gm (GraphicsMagick), and pdftotext (Poppler) commands.
|
2
|
+
# Splits a pdf into batches of N pages, creates their thumbnails and icons,
|
3
|
+
# as specified in the Job options, gets the text for every page, and merges
|
4
|
+
# it all back into a tar archive for convenient download.
|
5
|
+
#
|
6
|
+
# See <tt>examples/process_pdfs_example.rb</tt> for more information.
|
7
|
+
class ProcessPdfs < CloudCrowd::Action
|
8
|
+
|
9
|
+
# Split up a large pdf into single-page pdfs. Batch them into 'batch_size'
|
10
|
+
# chunks for processing. The double pdftk shuffle fixes the document xrefs.
|
11
|
+
def split
|
12
|
+
`pdftk #{input_path} burst output "#{file_name}_%05d.pdf_temp"`
|
13
|
+
FileUtils.rm input_path
|
14
|
+
pdfs = Dir["*.pdf_temp"]
|
15
|
+
pdfs.each {|pdf| `pdftk #{pdf} output #{File.basename(pdf, '.pdf_temp')}.pdf`}
|
16
|
+
pdfs = Dir["*.pdf"]
|
17
|
+
batch_size = options['batch_size']
|
18
|
+
batches = (pdfs.length / batch_size.to_f).ceil
|
19
|
+
batches.times do |batch_num|
|
20
|
+
tar_path = "#{sprintf('%05d', batch_num)}.tar"
|
21
|
+
batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
|
22
|
+
`tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
|
23
|
+
end
|
24
|
+
Dir["*.tar"].map {|tar| save(tar) }.to_json
|
25
|
+
end
|
26
|
+
|
27
|
+
# Convert a pdf page into different-sized thumbnails. Grab the text.
|
28
|
+
def process
|
29
|
+
`tar -xzf #{input_path}`
|
30
|
+
FileUtils.rm input_path
|
31
|
+
cmds = []
|
32
|
+
generate_images_commands(cmds)
|
33
|
+
generate_text_commands(cmds)
|
34
|
+
system cmds.join(' && ')
|
35
|
+
FileUtils.rm Dir['*.pdf']
|
36
|
+
`tar -czf #{file_name}.tar *`
|
37
|
+
save("#{file_name}.tar")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Merge all of the resulting images, all of the resulting text files, and
|
41
|
+
# the concatenated merge of the full-text into a single tar archive, ready to
|
42
|
+
# for download.
|
43
|
+
def merge
|
44
|
+
input.each do |batch_url|
|
45
|
+
batch_path = File.basename(batch_url)
|
46
|
+
download(batch_url, batch_path)
|
47
|
+
`tar -xzf #{batch_path}`
|
48
|
+
FileUtils.rm batch_path
|
49
|
+
end
|
50
|
+
|
51
|
+
names = Dir['*.txt'].map {|fn| fn.sub(/_\d+(_\w+)?\.txt\Z/, '') }.uniq
|
52
|
+
dirs = names.map {|n| ["#{n}/text/full", "#{n}/text/pages"] + options['images'].map {|i| "#{n}/images/#{i['name']}" } }.flatten
|
53
|
+
FileUtils.mkdir_p(dirs)
|
54
|
+
|
55
|
+
Dir['*.*'].each do |file|
|
56
|
+
ext = File.extname(file)
|
57
|
+
name = file.sub(/_\d+(_\w+)?#{ext}\Z/, '')
|
58
|
+
if ext == '.txt'
|
59
|
+
FileUtils.mv(file, "#{name}/text/pages/#{file}")
|
60
|
+
else
|
61
|
+
suffix = file.match(/_([^_]+)#{ext}\Z/)[1]
|
62
|
+
sans_suffix = file.sub(/_([^_]+)#{ext}\Z/, ext)
|
63
|
+
FileUtils.mv(file, "#{name}/images/#{suffix}/#{sans_suffix}")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
names.each {|n| `cat #{n}/text/pages/*.txt > #{n}/text/full/#{n}.txt` }
|
68
|
+
|
69
|
+
`tar -czf processed_pdfs.tar *`
|
70
|
+
save("processed_pdfs.tar")
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def generate_images_commands(command_list)
|
77
|
+
Dir["*.pdf"].each do |pdf|
|
78
|
+
name = File.basename(pdf, File.extname(pdf))
|
79
|
+
options['images'].each do |i|
|
80
|
+
command_list << "gm convert #{i['options']} #{pdf} #{name}_#{i['name']}.#{i['extension']}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def generate_text_commands(command_list)
|
86
|
+
Dir["*.pdf"].each do |pdf|
|
87
|
+
name = File.basename(pdf, File.extname(pdf))
|
88
|
+
command_list << "pdftotext -enc UTF-8 -layout -q #{pdf} #{name}.txt"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# A parallel WordCount. Depends on the 'wc' utility.
|
2
|
+
class WordCount < CloudCrowd::Action
|
3
|
+
|
4
|
+
# Count the words in a single book.
|
5
|
+
def process
|
6
|
+
(`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
|
7
|
+
end
|
8
|
+
|
9
|
+
# Sum the total word count.
|
10
|
+
def merge
|
11
|
+
input.inject(0) {|sum, count| sum + count }
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
data/bin/crowd
ADDED
data/cloud-crowd.gemspec
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'cloud-crowd'
|
3
|
+
s.version = '0.1.0' # Keep version in sync with cloud-cloud.rb
|
4
|
+
s.date = '2009-09-14'
|
5
|
+
|
6
|
+
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
7
|
+
s.summary = "Parallel Processing for the Rest of Us"
|
8
|
+
s.description = <<-EOS
|
9
|
+
The crowd, suddenly there where there was nothing before, is a mysterious and
|
10
|
+
universal phenomenon. A few people may have been standing together -- five, ten
|
11
|
+
or twelve, nor more; nothing has been announced, nothing is expected. Suddenly
|
12
|
+
everywhere is black with people and more come streaming from all sides as though
|
13
|
+
streets had only one direction.
|
14
|
+
EOS
|
15
|
+
|
16
|
+
s.authors = ['Jeremy Ashkenas']
|
17
|
+
s.email = 'jeremy@documentcloud.org'
|
18
|
+
s.rubyforge_project = 'cloud-crowd'
|
19
|
+
|
20
|
+
s.require_paths = ['lib']
|
21
|
+
s.executables = ['crowd']
|
22
|
+
|
23
|
+
s.has_rdoc = true
|
24
|
+
s.extra_rdoc_files = ['README']
|
25
|
+
s.rdoc_options << '--title' << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
|
26
|
+
'--exclude' << 'test' <<
|
27
|
+
'--main' << 'README' <<
|
28
|
+
'--all'
|
29
|
+
|
30
|
+
s.add_dependency 'sinatra', ['>= 0.9.4']
|
31
|
+
s.add_dependency 'activerecord', ['>= 2.3.3']
|
32
|
+
s.add_dependency 'json', ['>= 1.1.7']
|
33
|
+
s.add_dependency 'rest-client', ['>= 1.0.3']
|
34
|
+
s.add_dependency 'right_aws', ['>= 1.10.0']
|
35
|
+
s.add_dependency 'daemons', ['>= 1.0.10']
|
36
|
+
|
37
|
+
if s.respond_to?(:add_development_dependency)
|
38
|
+
s.add_development_dependency 'faker', ['>= 0.3.1']
|
39
|
+
s.add_development_dependency 'thoughtbot-shoulda', ['>= 2.10.2']
|
40
|
+
s.add_development_dependency 'notahat-machinist', ['>= 1.0.3']
|
41
|
+
s.add_development_dependency 'rack-test', ['>= 0.4.1']
|
42
|
+
s.add_development_dependency 'mocha', ['>= 0.9.7']
|
43
|
+
end
|
44
|
+
|
45
|
+
s.files = %w(
|
46
|
+
actions/graphics_magick.rb
|
47
|
+
actions/process_pdfs.rb
|
48
|
+
actions/word_count.rb
|
49
|
+
cloud-crowd.gemspec
|
50
|
+
config/config.example.ru
|
51
|
+
config/config.example.yml
|
52
|
+
config/database.example.yml
|
53
|
+
EPIGRAPHS
|
54
|
+
examples/graphics_magick_example.rb
|
55
|
+
examples/process_pdfs_example.rb
|
56
|
+
examples/word_count_example.rb
|
57
|
+
lib/cloud-crowd.rb
|
58
|
+
lib/cloud_crowd/action.rb
|
59
|
+
lib/cloud_crowd/app.rb
|
60
|
+
lib/cloud_crowd/asset_store/filesystem_store.rb
|
61
|
+
lib/cloud_crowd/asset_store/s3_store.rb
|
62
|
+
lib/cloud_crowd/asset_store.rb
|
63
|
+
lib/cloud_crowd/command_line.rb
|
64
|
+
lib/cloud_crowd/daemon.rb
|
65
|
+
lib/cloud_crowd/exceptions.rb
|
66
|
+
lib/cloud_crowd/helpers/authorization.rb
|
67
|
+
lib/cloud_crowd/helpers/resources.rb
|
68
|
+
lib/cloud_crowd/helpers.rb
|
69
|
+
lib/cloud_crowd/inflector.rb
|
70
|
+
lib/cloud_crowd/models/job.rb
|
71
|
+
lib/cloud_crowd/models/work_unit.rb
|
72
|
+
lib/cloud_crowd/models/worker_record.rb
|
73
|
+
lib/cloud_crowd/models.rb
|
74
|
+
lib/cloud_crowd/runner.rb
|
75
|
+
lib/cloud_crowd/schema.rb
|
76
|
+
lib/cloud_crowd/worker.rb
|
77
|
+
LICENSE
|
78
|
+
public/css/admin_console.css
|
79
|
+
public/css/reset.css
|
80
|
+
public/images/bullet_green.png
|
81
|
+
public/images/bullet_white.png
|
82
|
+
public/images/cloud_hand.png
|
83
|
+
public/images/header_back.png
|
84
|
+
public/images/logo.png
|
85
|
+
public/images/queue_fill.png
|
86
|
+
public/images/server_error.png
|
87
|
+
public/images/sidebar_bottom.png
|
88
|
+
public/images/sidebar_top.png
|
89
|
+
public/images/worker_info.png
|
90
|
+
public/images/worker_info_loading.gif
|
91
|
+
public/js/admin_console.js
|
92
|
+
public/js/excanvas.js
|
93
|
+
public/js/flot.js
|
94
|
+
public/js/jquery.js
|
95
|
+
README
|
96
|
+
test/acceptance/test_app.rb
|
97
|
+
test/acceptance/test_failing_work_units.rb
|
98
|
+
test/acceptance/test_word_count.rb
|
99
|
+
test/blueprints.rb
|
100
|
+
test/config/config.ru
|
101
|
+
test/config/config.yml
|
102
|
+
test/config/database.yml
|
103
|
+
test/config/actions/failure_testing.rb
|
104
|
+
test/test_helper.rb
|
105
|
+
test/unit/test_action.rb
|
106
|
+
test/unit/test_configuration.rb
|
107
|
+
test/unit/test_job.rb
|
108
|
+
test/unit/test_work_unit.rb
|
109
|
+
views/index.erb
|
110
|
+
)
|
111
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# This rackup script can be used to start the central CloudCrowd server
|
4
|
+
# using any Rack-compliant server handler. For example, start up three servers
|
5
|
+
# with a specified port number, using Thin:
|
6
|
+
#
|
7
|
+
# thin start -R config.ru -p 9173 --servers 3
|
8
|
+
|
9
|
+
require 'rubygems'
|
10
|
+
require 'cloud-crowd'
|
11
|
+
|
12
|
+
CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
|
13
|
+
CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
|
14
|
+
|
15
|
+
map '/' do
|
16
|
+
run CloudCrowd::App
|
17
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# The URL where you're planning on running the central server/queue/database.
|
2
|
+
:central_server: http://localhost:9173
|
3
|
+
|
4
|
+
# The storage back-end that you'd like to use for intermediate and final results
|
5
|
+
# of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
|
6
|
+
# be used in development, or on single-machine installations.
|
7
|
+
:storage: s3
|
8
|
+
|
9
|
+
# Please provide your AWS credentials for S3 storage of job output.
|
10
|
+
:aws_access_key: [your AWS access key]
|
11
|
+
:aws_secret_key: [your AWS secret access key]
|
12
|
+
|
13
|
+
# Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
|
14
|
+
# to keep all resulting files on S3 private. If so, you'll receive authenticated
|
15
|
+
# S3 URLs as job output, good for 24 hours. If left public, you'll get the
|
16
|
+
# straight URLs to the files on S3.
|
17
|
+
:s3_bucket: [your CloudCrowd bucket]
|
18
|
+
:use_s3_authentication: no
|
19
|
+
|
20
|
+
# Use HTTP Basic Auth for all requests? (Includes all internal worker requests
|
21
|
+
# to the central server). If yes, specify the login and password that all
|
22
|
+
# requests must provide for authentication.
|
23
|
+
:use_http_authentication: no
|
24
|
+
:login: [your login name]
|
25
|
+
:password: [your password]
|
26
|
+
|
27
|
+
# By default, CloudCrowd looks for installed actions inside the 'actions'
|
28
|
+
# subdirectory of this configuration folder. 'actions_path' allows you to load
|
29
|
+
# additional actions from a location of your choice.
|
30
|
+
# :actions_path: /path/to/actions
|
31
|
+
|
32
|
+
# Set the following numbers to tweak the configuration of your worker daemons.
|
33
|
+
# Optimum results will depend on proportion of the Memory/CPU/IO bottlenecks
|
34
|
+
# in your actions, the number of central servers you have running, and your
|
35
|
+
# desired balance between latency and traffic.
|
36
|
+
|
37
|
+
# The number of workers that `crowd workers start` spins up.
|
38
|
+
:num_workers: 3
|
39
|
+
|
40
|
+
# The minimum number of seconds a worker waits between checking the job queue.
|
41
|
+
:min_worker_wait: 1
|
42
|
+
|
43
|
+
# The maximum number of seconds a worker waits between checking the job queue.
|
44
|
+
:max_worker_wait: 5
|
45
|
+
|
46
|
+
# The number of separate attempts that will be made to process an individual
|
47
|
+
# work unit, before marking it as having failed.
|
48
|
+
:work_unit_retries: 3
|