mooktakim-cloud-crowd 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/EPIGRAPHS +17 -0
- data/LICENSE +22 -0
- data/README +93 -0
- data/actions/graphics_magick.rb +43 -0
- data/actions/process_pdfs.rb +92 -0
- data/actions/word_count.rb +16 -0
- data/bin/crowd +5 -0
- data/config/config.example.ru +23 -0
- data/config/config.example.yml +55 -0
- data/config/database.example.yml +16 -0
- data/examples/graphics_magick_example.rb +44 -0
- data/examples/process_pdfs_example.rb +40 -0
- data/examples/word_count_example.rb +42 -0
- data/lib/cloud-crowd.rb +188 -0
- data/lib/cloud_crowd/action.rb +125 -0
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +39 -0
- data/lib/cloud_crowd/asset_store/s3_store.rb +43 -0
- data/lib/cloud_crowd/asset_store.rb +41 -0
- data/lib/cloud_crowd/command_line.rb +242 -0
- data/lib/cloud_crowd/exceptions.rb +46 -0
- data/lib/cloud_crowd/helpers/authorization.rb +52 -0
- data/lib/cloud_crowd/helpers/resources.rb +25 -0
- data/lib/cloud_crowd/helpers.rb +8 -0
- data/lib/cloud_crowd/inflector.rb +19 -0
- data/lib/cloud_crowd/models/job.rb +190 -0
- data/lib/cloud_crowd/models/node_record.rb +107 -0
- data/lib/cloud_crowd/models/work_unit.rb +170 -0
- data/lib/cloud_crowd/models.rb +40 -0
- data/lib/cloud_crowd/node.rb +199 -0
- data/lib/cloud_crowd/schema.rb +50 -0
- data/lib/cloud_crowd/server.rb +123 -0
- data/lib/cloud_crowd/worker.rb +149 -0
- data/mooktakim-cloud-crowd.gemspec +116 -0
- data/public/css/admin_console.css +243 -0
- data/public/css/reset.css +42 -0
- data/public/images/bullet_green.png +0 -0
- data/public/images/bullet_white.png +0 -0
- data/public/images/cloud_hand.png +0 -0
- data/public/images/header_back.png +0 -0
- data/public/images/logo.png +0 -0
- data/public/images/queue_fill.png +0 -0
- data/public/images/server.png +0 -0
- data/public/images/server_busy.png +0 -0
- data/public/images/server_error.png +0 -0
- data/public/images/sidebar_bottom.png +0 -0
- data/public/images/sidebar_top.png +0 -0
- data/public/images/worker_info.png +0 -0
- data/public/images/worker_info_loading.gif +0 -0
- data/public/js/admin_console.js +197 -0
- data/public/js/excanvas.js +1 -0
- data/public/js/flot.js +1 -0
- data/public/js/jquery.js +19 -0
- data/test/acceptance/test_failing_work_units.rb +33 -0
- data/test/acceptance/test_node.rb +20 -0
- data/test/acceptance/test_server.rb +66 -0
- data/test/acceptance/test_word_count.rb +40 -0
- data/test/blueprints.rb +25 -0
- data/test/config/actions/failure_testing.rb +13 -0
- data/test/config/config.ru +17 -0
- data/test/config/config.yml +6 -0
- data/test/config/database.yml +3 -0
- data/test/test_helper.rb +19 -0
- data/test/unit/test_action.rb +70 -0
- data/test/unit/test_configuration.rb +48 -0
- data/test/unit/test_job.rb +103 -0
- data/test/unit/test_node.rb +41 -0
- data/test/unit/test_node_record.rb +42 -0
- data/test/unit/test_work_unit.rb +53 -0
- data/test/unit/test_worker.rb +48 -0
- data/views/operations_center.erb +82 -0
- metadata +290 -0
data/EPIGRAPHS
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
The crowd, suddenly there where there was nothing before, is a mysterious and
|
2
|
+
universal phenomenon. A few people may have been standing together -- five, ten
|
3
|
+
or twelve, nor more; nothing has been announced, nothing is expected. Suddenly
|
4
|
+
everywhere is black with people and more come streaming from all sides as though
|
5
|
+
streets had only one direction. Most of them do not know what has happened and,
|
6
|
+
if questioned, have no answer; but they hurry to be there where most other
|
7
|
+
people are. There is a determination in their movement which is quite different
|
8
|
+
from the expression of ordinary curiosity. It seems as through the movement of
|
9
|
+
some of them transmits itself to all the others. But that is not all; they have
|
10
|
+
a goal which is there before they can find words for it. -p 16
|
11
|
+
|
12
|
+
Crowd crystals are the small, rigid groups of men, strictly delimited and of
|
13
|
+
great constancy, which serve to precipitate crowds. Their structure is such
|
14
|
+
that they can be comprehended and taken in at a glance. Their unity is more
|
15
|
+
important than their size. -p 73
|
16
|
+
|
17
|
+
From Elias Canetti's "Crowds and Power" (1962).
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2009 Jeremy Ashkenas, DocumentCloud
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
=
|
2
|
+
_ _
|
3
|
+
( ` )_
|
4
|
+
( ) `)
|
5
|
+
(_ (_ . _) _)
|
6
|
+
_
|
7
|
+
( )
|
8
|
+
_ . ( ` ) . )
|
9
|
+
( _ )_ (_, _( ,_)_)
|
10
|
+
(_ _(_ ,)
|
11
|
+
|
12
|
+
_ _ ___ _ _ ___ _
|
13
|
+
( ` )_ / __| |___ _ _ __| |/ __|_ _ _____ __ ____| |
|
14
|
+
( ) `) | (__| / _ \ || / _` | (__| '_/ _ \ V V / _` |
|
15
|
+
(_ (_ . _) _) \___|_\___/\_,_\__,_|\___|_| \___/\_/\_/\__,_|
|
16
|
+
|
17
|
+
_
|
18
|
+
( )
|
19
|
+
_, _ . ( ` ) . )
|
20
|
+
( ( _ )_ (_, _( ,_)_)
|
21
|
+
(_(_ _(_ ,)
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
~ CloudCrowd ~
|
26
|
+
|
27
|
+
* Parallel processing for the rest of us
|
28
|
+
* Write your scripts in Ruby
|
29
|
+
* Works with Amazon EC2 and S3
|
30
|
+
* split -> process -> merge
|
31
|
+
* As easy as `gem install cloud-crowd`
|
32
|
+
|
33
|
+
Well-suited for:
|
34
|
+
|
35
|
+
* Generating or resizing images.
|
36
|
+
* Encoding video.
|
37
|
+
* Running text extraction or OCR on PDFs.
|
38
|
+
* Migrating a large file set or database.
|
39
|
+
* Web scraping.
|
40
|
+
|
41
|
+
|
42
|
+
~ Documentation ~
|
43
|
+
|
44
|
+
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
45
|
+
Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
|
46
|
+
|
47
|
+
|
48
|
+
~ Getting started ~
|
49
|
+
|
50
|
+
# Install the gem.
|
51
|
+
|
52
|
+
>> sudo gem install cloud-crowd
|
53
|
+
|
54
|
+
# Install the CloudCrowd configuration files to a location of your choosing.
|
55
|
+
|
56
|
+
>> crowd install ~/config/cloud-crowd
|
57
|
+
|
58
|
+
# Now, you can use the full complement of `crowd` commands from inside of
|
59
|
+
# this configuration directory. To see the available commands:
|
60
|
+
|
61
|
+
>> crowd --help
|
62
|
+
|
63
|
+
# Edit the configuration files to your satisfaction, add AWS credentials,
|
64
|
+
# and then load the CloudCrowd schema into your configured database.
|
65
|
+
|
66
|
+
>> cd ~/config/cloud-crowd
|
67
|
+
>> mate config.yml
|
68
|
+
>> mate database.yml
|
69
|
+
>> [create the database you just configured...]
|
70
|
+
>> crowd load_schema
|
71
|
+
|
72
|
+
# Write your actions, and install them into the 'actions' subdirectory.
|
73
|
+
# CloudCrowd comes with a few default actions as an example.
|
74
|
+
|
75
|
+
# To launch the central server (make sure that you include its location
|
76
|
+
# in config.yml):
|
77
|
+
|
78
|
+
>> crowd server
|
79
|
+
|
80
|
+
# The configuration folder also includes 'config.ru', which can be used by
|
81
|
+
# any Rack-compliant webserver to run your central server.
|
82
|
+
|
83
|
+
# Then, to launch a node of workers:
|
84
|
+
|
85
|
+
>> crowd node
|
86
|
+
|
87
|
+
# To spin up remote nodes, install the 'cloud-crowd' gem and copy over
|
88
|
+
# your configuration directory. Run `crowd node`, and the remote machines
|
89
|
+
# will register with the central server, becoming available for processing.
|
90
|
+
|
91
|
+
# At this point you can visit your Operations Center at localhost:9173 to
|
92
|
+
# view all of your nodes, ready for action.
|
93
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# The GraphicsMagick action, dependent on the `gm` command, is able to perform
|
2
|
+
# any number of GraphicsMagick conversions on an image passed in as an input.
|
3
|
+
# The options hash should specify the +name+ for the particular step (which is
|
4
|
+
# appended to the resulting image filename) the +command+ (eg. convert, mogrify),
|
5
|
+
# the +options+ (to the command, eg. -shadow -blur), and the +extension+ which
|
6
|
+
# will determine the resulting image type. Optionally, you may also specify
|
7
|
+
# +input+ as the name of a previous step; doing this will use the result of
|
8
|
+
# that step as the source image, otherwise each step uses the original image
|
9
|
+
# as its source.
|
10
|
+
class GraphicsMagick < CloudCrowd::Action
|
11
|
+
|
12
|
+
# Download the initial image, and run each of the specified GraphicsMagick
|
13
|
+
# commands against it, returning the aggregate output.
|
14
|
+
def process
|
15
|
+
options['steps'].inject({}) {|h, step| h[step['name']] = run_step(step); h }
|
16
|
+
end
|
17
|
+
|
18
|
+
# Run an individual step (single GraphicsMagick command) in a shell-injection
|
19
|
+
# safe way, uploading the result to the AssetStore, and returning the public
|
20
|
+
# URL as the result.
|
21
|
+
# TODO: +system+ wasn't working, figure out some other way to escape.
|
22
|
+
def run_step(step)
|
23
|
+
cmd, opts = step['command'], step['options']
|
24
|
+
in_path, out_path = input_path_for(step), output_path_for(step)
|
25
|
+
`gm #{cmd} #{opts} #{in_path} #{out_path}`
|
26
|
+
save(out_path)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Where should the starting image be located?
|
30
|
+
# If you pass in an optional step, returns the path to that step's output
|
31
|
+
# as input for further processing.
|
32
|
+
def input_path_for(step)
|
33
|
+
in_step = step && step['input'] && options['steps'].detect {|s| s['name'] == step['input']}
|
34
|
+
return input_path unless in_step
|
35
|
+
return output_path_for(in_step)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Where should resulting images be saved locally?
|
39
|
+
def output_path_for(step)
|
40
|
+
"#{work_directory}/#{file_name}_#{step['name']}.#{step['extension']}"
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# Depends on working pdftk, gm (GraphicsMagick), and pdftotext (Poppler) commands.
|
2
|
+
# Splits a pdf into batches of N pages, creates their thumbnails and icons,
|
3
|
+
# as specified in the Job options, gets the text for every page, and merges
|
4
|
+
# it all back into a tar archive for convenient download.
|
5
|
+
#
|
6
|
+
# See <tt>examples/process_pdfs_example.rb</tt> for more information.
|
7
|
+
class ProcessPdfs < CloudCrowd::Action
|
8
|
+
|
9
|
+
# Split up a large pdf into single-page pdfs. Batch them into 'batch_size'
|
10
|
+
# chunks for processing. The double pdftk shuffle fixes the document xrefs.
|
11
|
+
def split
|
12
|
+
`pdftk #{input_path} burst output "#{file_name}_%05d.pdf_temp"`
|
13
|
+
FileUtils.rm input_path
|
14
|
+
pdfs = Dir["*.pdf_temp"]
|
15
|
+
pdfs.each {|pdf| `pdftk #{pdf} output #{File.basename(pdf, '.pdf_temp')}.pdf`}
|
16
|
+
pdfs = Dir["*.pdf"]
|
17
|
+
batch_size = options['batch_size']
|
18
|
+
batches = (pdfs.length / batch_size.to_f).ceil
|
19
|
+
batches.times do |batch_num|
|
20
|
+
tar_path = "#{sprintf('%05d', batch_num)}.tar"
|
21
|
+
batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
|
22
|
+
`tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
|
23
|
+
end
|
24
|
+
Dir["*.tar"].map {|tar| save(tar) }
|
25
|
+
end
|
26
|
+
|
27
|
+
# Convert a pdf page into different-sized thumbnails. Grab the text.
|
28
|
+
def process
|
29
|
+
`tar -xzf #{input_path}`
|
30
|
+
FileUtils.rm input_path
|
31
|
+
cmds = []
|
32
|
+
generate_images_commands(cmds)
|
33
|
+
generate_text_commands(cmds)
|
34
|
+
system cmds.join(' && ')
|
35
|
+
FileUtils.rm Dir['*.pdf']
|
36
|
+
`tar -czf #{file_name}.tar *`
|
37
|
+
save("#{file_name}.tar")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Merge all of the resulting images, all of the resulting text files, and
|
41
|
+
# the concatenated merge of the full-text into a single tar archive, ready to
|
42
|
+
# for download.
|
43
|
+
def merge
|
44
|
+
input.each do |batch_url|
|
45
|
+
batch_path = File.basename(batch_url)
|
46
|
+
download(batch_url, batch_path)
|
47
|
+
`tar -xzf #{batch_path}`
|
48
|
+
FileUtils.rm batch_path
|
49
|
+
end
|
50
|
+
|
51
|
+
names = Dir['*.txt'].map {|fn| fn.sub(/_\d+(_\w+)?\.txt\Z/, '') }.uniq
|
52
|
+
dirs = names.map {|n| ["#{n}/text/full", "#{n}/text/pages"] + options['images'].map {|i| "#{n}/images/#{i['name']}" } }.flatten
|
53
|
+
FileUtils.mkdir_p(dirs)
|
54
|
+
|
55
|
+
Dir['*.*'].each do |file|
|
56
|
+
ext = File.extname(file)
|
57
|
+
name = file.sub(/_\d+(_\w+)?#{ext}\Z/, '')
|
58
|
+
if ext == '.txt'
|
59
|
+
FileUtils.mv(file, "#{name}/text/pages/#{file}")
|
60
|
+
else
|
61
|
+
suffix = file.match(/_([^_]+)#{ext}\Z/)[1]
|
62
|
+
sans_suffix = file.sub(/_([^_]+)#{ext}\Z/, ext)
|
63
|
+
FileUtils.mv(file, "#{name}/images/#{suffix}/#{sans_suffix}")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
names.each {|n| `cat #{n}/text/pages/*.txt > #{n}/text/full/#{n}.txt` }
|
68
|
+
|
69
|
+
`tar -czf processed_pdfs.tar *`
|
70
|
+
save("processed_pdfs.tar")
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def generate_images_commands(command_list)
|
77
|
+
Dir["*.pdf"].each do |pdf|
|
78
|
+
name = File.basename(pdf, File.extname(pdf))
|
79
|
+
options['images'].each do |i|
|
80
|
+
command_list << "gm convert #{i['options']} #{pdf} #{name}_#{i['name']}.#{i['extension']}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def generate_text_commands(command_list)
|
86
|
+
Dir["*.pdf"].each do |pdf|
|
87
|
+
name = File.basename(pdf, File.extname(pdf))
|
88
|
+
command_list << "pdftotext -enc UTF-8 -layout -q #{pdf} #{name}.txt"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# A parallel WordCount. Depends on the 'wc' utility.
|
2
|
+
class WordCount < CloudCrowd::Action
|
3
|
+
|
4
|
+
# Count the words in a single book.
|
5
|
+
# Pretend that this takes longer than it really does, for demonstration purposes.
|
6
|
+
def process
|
7
|
+
sleep 5
|
8
|
+
(`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
|
9
|
+
end
|
10
|
+
|
11
|
+
# Sum the total word count.
|
12
|
+
def merge
|
13
|
+
input.inject(0) {|sum, count| sum + count }
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
data/bin/crowd
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# This rackup script can be used to start the central CloudCrowd server
|
4
|
+
# using any Rack-compliant server handler. For example, start up three servers
|
5
|
+
# with a specified port number, using Thin:
|
6
|
+
#
|
7
|
+
# thin start -R config.ru --servers 3
|
8
|
+
#
|
9
|
+
# Or a single server with Unicorn:
|
10
|
+
#
|
11
|
+
# unicorn config.ru
|
12
|
+
#
|
13
|
+
|
14
|
+
|
15
|
+
require 'rubygems'
|
16
|
+
require 'cloud-crowd'
|
17
|
+
|
18
|
+
CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
|
19
|
+
CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
|
20
|
+
|
21
|
+
map '/' do
|
22
|
+
run CloudCrowd::Server
|
23
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# The URL where you're planning on running the central server/queue/database.
|
2
|
+
:central_server: http://localhost:9173
|
3
|
+
|
4
|
+
# The following settings allow you to control the number of workers that can run
|
5
|
+
# on a given node, to prevent the node from becoming overloaded. 'max_workers'
|
6
|
+
# is a simple cap on the maximum number of workers a node is allowed to run
|
7
|
+
# concurrently. 'max_load' is the maximum (one-minute) load average, above which
|
8
|
+
# a node will refuse to take new work. 'min_free_memory' is the minimum amount
|
9
|
+
# of free RAM (in megabytes) a node is allowed to have, below which no new
|
10
|
+
# workers are run. These settings may be used in any combination.
|
11
|
+
:max_workers: 5
|
12
|
+
# :max_load: 5.0
|
13
|
+
# :min_free_memory: 150
|
14
|
+
|
15
|
+
# The storage back-end that you'd like to use for intermediate and final results
|
16
|
+
# of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
|
17
|
+
# be used in development, on single-machine installations, or networked drives.
|
18
|
+
# If you *are* developing an action, filesystem is certainly faster and easier.
|
19
|
+
:storage: s3
|
20
|
+
|
21
|
+
# Please provide your AWS credentials for S3 storage of job output.
|
22
|
+
:aws_access_key: [your AWS access key]
|
23
|
+
:aws_secret_key: [your AWS secret access key]
|
24
|
+
|
25
|
+
# Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
|
26
|
+
# to keep all resulting files on S3 private. If so, you'll receive authenticated
|
27
|
+
# S3 URLs as job output, good for 24 hours. If left public, you'll get the
|
28
|
+
# straight URLs to the files on S3.
|
29
|
+
:s3_bucket: [your CloudCrowd bucket]
|
30
|
+
:s3_authentication: no
|
31
|
+
|
32
|
+
# The following settings configure local paths. 'local_storage_path' is the
|
33
|
+
# directory in which all files will be saved if you're using the 'filesystem'
|
34
|
+
# storage. 'log_path' and 'pid_path' are the directories in which daemonized
|
35
|
+
# servers and nodes will store their process ids and log files. The default
|
36
|
+
# values are listed.
|
37
|
+
# :local_storage_path: /tmp/cloud_crowd_storage
|
38
|
+
# :log_path: log
|
39
|
+
# :pid_path: tmp/pids
|
40
|
+
|
41
|
+
# Use HTTP Basic Auth for all requests? (Includes all internal worker requests
|
42
|
+
# to the central server). If yes, specify the login and password that all
|
43
|
+
# requests must provide for authentication.
|
44
|
+
:http_authentication: no
|
45
|
+
:login: [your login name]
|
46
|
+
:password: [your password]
|
47
|
+
|
48
|
+
# By default, CloudCrowd looks for installed actions inside the 'actions'
|
49
|
+
# subdirectory of this configuration folder. 'actions_path' allows you to load
|
50
|
+
# additional actions from a location of your choice.
|
51
|
+
# :actions_path: /path/to/actions
|
52
|
+
|
53
|
+
# The number of separate attempts that will be made to process an individual
|
54
|
+
# work unit, before marking it as having failed.
|
55
|
+
:work_unit_retries: 3
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# This is a standard ActiveRecord database.yml file. You can configure it
|
2
|
+
# to use any database that ActiveRecord supports. Only the central server needs
|
3
|
+
# this file to be configured -- nodes never connect directly to the database.
|
4
|
+
|
5
|
+
:adapter: mysql
|
6
|
+
:encoding: utf8
|
7
|
+
:username: root
|
8
|
+
:password:
|
9
|
+
:socket: /tmp/mysql.sock
|
10
|
+
:database: cloud_crowd
|
11
|
+
|
12
|
+
# If you'd prefer to use an SQLite database instead, the following configuration
|
13
|
+
# will do nicely:
|
14
|
+
#
|
15
|
+
# :adapter: sqlite3
|
16
|
+
# :database: cloud_crowd.db
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby -rubygems
|
2
|
+
|
3
|
+
require 'restclient'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
# This example demonstrates the GraphicsMagick action by taking in a list of
|
7
|
+
# five images, and producing annotated, blurred, and black and white versions
|
8
|
+
# of each image. See actions/graphics_magick.rb
|
9
|
+
|
10
|
+
RestClient.post('http://localhost:9173/jobs',
|
11
|
+
{:job => {
|
12
|
+
|
13
|
+
'action' => 'graphics_magick',
|
14
|
+
|
15
|
+
'inputs' => [
|
16
|
+
'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
|
17
|
+
'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
|
18
|
+
'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
|
19
|
+
'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
|
20
|
+
'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
|
21
|
+
],
|
22
|
+
|
23
|
+
'options' => {
|
24
|
+
'steps' => [{
|
25
|
+
'name' => 'annotated',
|
26
|
+
'command' => 'convert',
|
27
|
+
'options' => '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
|
28
|
+
'extension' => 'jpg'
|
29
|
+
},{
|
30
|
+
'name' => 'blurred',
|
31
|
+
'command' => 'convert',
|
32
|
+
'options' => '-blur 10x5',
|
33
|
+
'extension' => 'png'
|
34
|
+
},{
|
35
|
+
'name' => 'bw',
|
36
|
+
'input' => 'blurred',
|
37
|
+
'command' => 'convert',
|
38
|
+
'options' => '-monochrome',
|
39
|
+
'extension' => 'jpg'
|
40
|
+
}]
|
41
|
+
}
|
42
|
+
|
43
|
+
}.to_json}
|
44
|
+
)
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/env ruby -rubygems
|
2
|
+
|
3
|
+
require 'restclient'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
# This example demonstrates a fairly complicated PDF-processing action, designed
|
7
|
+
# to extract the PDF's text, and produce GIF versions of each page. The action
|
8
|
+
# (actions/process_pdfs.rb) shows an example of using all three steps,
|
9
|
+
# split, process, and merge.
|
10
|
+
|
11
|
+
RestClient.post('http://localhost:9173/jobs',
|
12
|
+
{:job => {
|
13
|
+
|
14
|
+
'action' => 'process_pdfs',
|
15
|
+
|
16
|
+
'inputs' => [
|
17
|
+
'http://tigger.uic.edu/~victor/personal/futurism.pdf',
|
18
|
+
'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
|
19
|
+
'http://www.dzignism.com/articles/Futurist.Manifesto.pdf',
|
20
|
+
'http://www.pitt.edu/~slavic/sisc/SISC4/dadswell.pdf'
|
21
|
+
],
|
22
|
+
|
23
|
+
'options' => {
|
24
|
+
|
25
|
+
'batch_size' => 7,
|
26
|
+
|
27
|
+
'images' => [{
|
28
|
+
'name' => '700',
|
29
|
+
'options' => '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
|
30
|
+
'extension' => 'gif'
|
31
|
+
},{
|
32
|
+
'name' => '1000',
|
33
|
+
'options' => '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
|
34
|
+
'extension' => 'gif'
|
35
|
+
}]
|
36
|
+
|
37
|
+
}
|
38
|
+
|
39
|
+
}.to_json}
|
40
|
+
)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby -rubygems
|
2
|
+
|
3
|
+
require 'restclient'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
# Let's count all the words in Shakespeare.
|
7
|
+
|
8
|
+
RestClient.post('http://localhost:9173/jobs',
|
9
|
+
{:job => {
|
10
|
+
|
11
|
+
'action' => 'word_count',
|
12
|
+
|
13
|
+
'inputs' => [
|
14
|
+
'http://www.gutenberg.org/dirs/etext97/1ws3010.txt', # All's Well That Ends Well
|
15
|
+
'http://www.gutenberg.org/dirs/etext99/1ws3511.txt', # Anthony and Cleopatra
|
16
|
+
'http://www.gutenberg.org/dirs/etext97/1ws2510.txt', # As You Like It
|
17
|
+
'http://www.gutenberg.org/dirs/etext97/1ws0610.txt', # The Comedy of Errors
|
18
|
+
'http://www.gutenberg.org/dirs/etext99/1ws3911.txt', # Cymbeline
|
19
|
+
'http://www.gutenberg.org/dirs/etext00/0ws2610.txt', # Hamlet
|
20
|
+
'http://www.gutenberg.org/dirs/etext00/0ws1910.txt', # Henry IV
|
21
|
+
'http://www.gutenberg.org/dirs/etext99/1ws2411.txt', # Julius Caesar
|
22
|
+
'http://www.gutenberg.org/dirs/etext98/2ws3310.txt', # King Lear
|
23
|
+
'http://www.gutenberg.org/dirs/etext99/1ws1211j.txt', # Love's Labour's Lost
|
24
|
+
'http://www.gutenberg.org/dirs/etext98/2ws3410.txt', # Macbeth
|
25
|
+
'http://www.gutenberg.org/dirs/etext98/2ws1810.txt', # The Merchant of Venice
|
26
|
+
'http://www.gutenberg.org/dirs/etext99/1ws1711.txt', # Midsummer Night's Dream
|
27
|
+
'http://www.gutenberg.org/dirs/etext98/3ws2210.txt', # Much Ado About Nothing
|
28
|
+
'http://www.gutenberg.org/dirs/etext00/0ws3210.txt', # Othello
|
29
|
+
'http://www.gutenberg.org/dirs/etext98/2ws1610.txt', # Romeo and Juliet
|
30
|
+
'http://www.gutenberg.org/dirs/etext98/2ws1010.txt', # The Taming of the Shrew
|
31
|
+
'http://www.gutenberg.org/dirs/etext99/1ws4111.txt', # The Tempest
|
32
|
+
'http://www.gutenberg.org/dirs/etext00/0ws0910.txt', # Titus Andronicus
|
33
|
+
'http://www.gutenberg.org/dirs/etext99/1ws2911.txt', # Troilus and Cressida
|
34
|
+
'http://www.gutenberg.org/dirs/etext98/3ws2810.txt', # Twelfth Night
|
35
|
+
'http://www.gutenberg.org/files/1539/1539.txt' # The Winter's Tale
|
36
|
+
]
|
37
|
+
|
38
|
+
}.to_json}
|
39
|
+
)
|
40
|
+
|
41
|
+
# With 23 Workers running, and over Wifi, it counted all the words in 5.5 secs.
|
42
|
+
# On a fast internet connection, you may not even see this job show up.
|