mooktakim-cloud-crowd 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/EPIGRAPHS +17 -0
  2. data/LICENSE +22 -0
  3. data/README +93 -0
  4. data/actions/graphics_magick.rb +43 -0
  5. data/actions/process_pdfs.rb +92 -0
  6. data/actions/word_count.rb +16 -0
  7. data/bin/crowd +5 -0
  8. data/config/config.example.ru +23 -0
  9. data/config/config.example.yml +55 -0
  10. data/config/database.example.yml +16 -0
  11. data/examples/graphics_magick_example.rb +44 -0
  12. data/examples/process_pdfs_example.rb +40 -0
  13. data/examples/word_count_example.rb +42 -0
  14. data/lib/cloud-crowd.rb +188 -0
  15. data/lib/cloud_crowd/action.rb +125 -0
  16. data/lib/cloud_crowd/asset_store/filesystem_store.rb +39 -0
  17. data/lib/cloud_crowd/asset_store/s3_store.rb +43 -0
  18. data/lib/cloud_crowd/asset_store.rb +41 -0
  19. data/lib/cloud_crowd/command_line.rb +242 -0
  20. data/lib/cloud_crowd/exceptions.rb +46 -0
  21. data/lib/cloud_crowd/helpers/authorization.rb +52 -0
  22. data/lib/cloud_crowd/helpers/resources.rb +25 -0
  23. data/lib/cloud_crowd/helpers.rb +8 -0
  24. data/lib/cloud_crowd/inflector.rb +19 -0
  25. data/lib/cloud_crowd/models/job.rb +190 -0
  26. data/lib/cloud_crowd/models/node_record.rb +107 -0
  27. data/lib/cloud_crowd/models/work_unit.rb +170 -0
  28. data/lib/cloud_crowd/models.rb +40 -0
  29. data/lib/cloud_crowd/node.rb +199 -0
  30. data/lib/cloud_crowd/schema.rb +50 -0
  31. data/lib/cloud_crowd/server.rb +123 -0
  32. data/lib/cloud_crowd/worker.rb +149 -0
  33. data/mooktakim-cloud-crowd.gemspec +116 -0
  34. data/public/css/admin_console.css +243 -0
  35. data/public/css/reset.css +42 -0
  36. data/public/images/bullet_green.png +0 -0
  37. data/public/images/bullet_white.png +0 -0
  38. data/public/images/cloud_hand.png +0 -0
  39. data/public/images/header_back.png +0 -0
  40. data/public/images/logo.png +0 -0
  41. data/public/images/queue_fill.png +0 -0
  42. data/public/images/server.png +0 -0
  43. data/public/images/server_busy.png +0 -0
  44. data/public/images/server_error.png +0 -0
  45. data/public/images/sidebar_bottom.png +0 -0
  46. data/public/images/sidebar_top.png +0 -0
  47. data/public/images/worker_info.png +0 -0
  48. data/public/images/worker_info_loading.gif +0 -0
  49. data/public/js/admin_console.js +197 -0
  50. data/public/js/excanvas.js +1 -0
  51. data/public/js/flot.js +1 -0
  52. data/public/js/jquery.js +19 -0
  53. data/test/acceptance/test_failing_work_units.rb +33 -0
  54. data/test/acceptance/test_node.rb +20 -0
  55. data/test/acceptance/test_server.rb +66 -0
  56. data/test/acceptance/test_word_count.rb +40 -0
  57. data/test/blueprints.rb +25 -0
  58. data/test/config/actions/failure_testing.rb +13 -0
  59. data/test/config/config.ru +17 -0
  60. data/test/config/config.yml +6 -0
  61. data/test/config/database.yml +3 -0
  62. data/test/test_helper.rb +19 -0
  63. data/test/unit/test_action.rb +70 -0
  64. data/test/unit/test_configuration.rb +48 -0
  65. data/test/unit/test_job.rb +103 -0
  66. data/test/unit/test_node.rb +41 -0
  67. data/test/unit/test_node_record.rb +42 -0
  68. data/test/unit/test_work_unit.rb +53 -0
  69. data/test/unit/test_worker.rb +48 -0
  70. data/views/operations_center.erb +82 -0
  71. metadata +290 -0
data/EPIGRAPHS ADDED
@@ -0,0 +1,17 @@
1
+ The crowd, suddenly there where there was nothing before, is a mysterious and
2
+ universal phenomenon. A few people may have been standing together -- five, ten
3
+ or twelve, nor more; nothing has been announced, nothing is expected. Suddenly
4
+ everywhere is black with people and more come streaming from all sides as though
5
+ streets had only one direction. Most of them do not know what has happened and,
6
+ if questioned, have no answer; but they hurry to be there where most other
7
+ people are. There is a determination in their movement which is quite different
8
+ from the expression of ordinary curiosity. It seems as through the movement of
9
+ some of them transmits itself to all the others. But that is not all; they have
10
+ a goal which is there before they can find words for it. -p 16
11
+
12
+ Crowd crystals are the small, rigid groups of men, strictly delimited and of
13
+ great constancy, which serve to precipitate crowds. Their structure is such
14
+ that they can be comprehended and taken in at a glance. Their unity is more
15
+ important than their size. -p 73
16
+
17
+ From Elias Canetti's "Crowds and Power" (1962).
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2009 Jeremy Ashkenas, DocumentCloud
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,93 @@
1
+ =
2
+ _ _
3
+ ( ` )_
4
+ ( ) `)
5
+ (_ (_ . _) _)
6
+ _
7
+ ( )
8
+ _ . ( ` ) . )
9
+ ( _ )_ (_, _( ,_)_)
10
+ (_ _(_ ,)
11
+
12
+ _ _ ___ _ _ ___ _
13
+ ( ` )_ / __| |___ _ _ __| |/ __|_ _ _____ __ ____| |
14
+ ( ) `) | (__| / _ \ || / _` | (__| '_/ _ \ V V / _` |
15
+ (_ (_ . _) _) \___|_\___/\_,_\__,_|\___|_| \___/\_/\_/\__,_|
16
+
17
+ _
18
+ ( )
19
+ _, _ . ( ` ) . )
20
+ ( ( _ )_ (_, _( ,_)_)
21
+ (_(_ _(_ ,)
22
+
23
+
24
+
25
+ ~ CloudCrowd ~
26
+
27
+ * Parallel processing for the rest of us
28
+ * Write your scripts in Ruby
29
+ * Works with Amazon EC2 and S3
30
+ * split -> process -> merge
31
+ * As easy as `gem install cloud-crowd`
32
+
33
+ Well-suited for:
34
+
35
+ * Generating or resizing images.
36
+ * Encoding video.
37
+ * Running text extraction or OCR on PDFs.
38
+ * Migrating a large file set or database.
39
+ * Web scraping.
40
+
41
+
42
+ ~ Documentation ~
43
+
44
+ Wiki: http://wiki.github.com/documentcloud/cloud-crowd
45
+ Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
46
+
47
+
48
+ ~ Getting started ~
49
+
50
+ # Install the gem.
51
+
52
+ >> sudo gem install cloud-crowd
53
+
54
+ # Install the CloudCrowd configuration files to a location of your choosing.
55
+
56
+ >> crowd install ~/config/cloud-crowd
57
+
58
+ # Now, you can use the full complement of `crowd` commands from inside of
59
+ # this configuration directory. To see the available commands:
60
+
61
+ >> crowd --help
62
+
63
+ # Edit the configuration files to your satisfaction, add AWS credentials,
64
+ # and then load the CloudCrowd schema into your configured database.
65
+
66
+ >> cd ~/config/cloud-crowd
67
+ >> mate config.yml
68
+ >> mate database.yml
69
+ >> [create the database you just configured...]
70
+ >> crowd load_schema
71
+
72
+ # Write your actions, and install them into the 'actions' subdirectory.
73
+ # CloudCrowd comes with a few default actions as an example.
74
+
75
+ # To launch the central server (make sure that you include its location
76
+ # in config.yml):
77
+
78
+ >> crowd server
79
+
80
+ # The configuration folder also includes 'config.ru', which can be used by
81
+ # any Rack-compliant webserver to run your central server.
82
+
83
+ # Then, to launch a node of workers:
84
+
85
+ >> crowd node
86
+
87
+ # To spin up remote nodes, install the 'cloud-crowd' gem and copy over
88
+ # your configuration directory. Run `crowd node`, and the remote machines
89
+ # will register with the central server, becoming available for processing.
90
+
91
+ # At this point you can visit your Operations Center at localhost:9173 to
92
+ # view all of your nodes, ready for action.
93
+
@@ -0,0 +1,43 @@
1
+ # The GraphicsMagick action, dependent on the `gm` command, is able to perform
2
+ # any number of GraphicsMagick conversions on an image passed in as an input.
3
+ # The options hash should specify the +name+ for the particular step (which is
4
+ # appended to the resulting image filename) the +command+ (eg. convert, mogrify),
5
+ # the +options+ (to the command, eg. -shadow -blur), and the +extension+ which
6
+ # will determine the resulting image type. Optionally, you may also specify
7
+ # +input+ as the name of a previous step; doing this will use the result of
8
+ # that step as the source image, otherwise each step uses the original image
9
+ # as its source.
10
+ class GraphicsMagick < CloudCrowd::Action
11
+
12
+ # Download the initial image, and run each of the specified GraphicsMagick
13
+ # commands against it, returning the aggregate output.
14
+ def process
15
+ options['steps'].inject({}) {|h, step| h[step['name']] = run_step(step); h }
16
+ end
17
+
18
+ # Run an individual step (single GraphicsMagick command) in a shell-injection
19
+ # safe way, uploading the result to the AssetStore, and returning the public
20
+ # URL as the result.
21
+ # TODO: +system+ wasn't working, figure out some other way to escape.
22
+ def run_step(step)
23
+ cmd, opts = step['command'], step['options']
24
+ in_path, out_path = input_path_for(step), output_path_for(step)
25
+ `gm #{cmd} #{opts} #{in_path} #{out_path}`
26
+ save(out_path)
27
+ end
28
+
29
+ # Where should the starting image be located?
30
+ # If you pass in an optional step, returns the path to that step's output
31
+ # as input for further processing.
32
+ def input_path_for(step)
33
+ in_step = step && step['input'] && options['steps'].detect {|s| s['name'] == step['input']}
34
+ return input_path unless in_step
35
+ return output_path_for(in_step)
36
+ end
37
+
38
+ # Where should resulting images be saved locally?
39
+ def output_path_for(step)
40
+ "#{work_directory}/#{file_name}_#{step['name']}.#{step['extension']}"
41
+ end
42
+
43
+ end
@@ -0,0 +1,92 @@
1
+ # Depends on working pdftk, gm (GraphicsMagick), and pdftotext (Poppler) commands.
2
+ # Splits a pdf into batches of N pages, creates their thumbnails and icons,
3
+ # as specified in the Job options, gets the text for every page, and merges
4
+ # it all back into a tar archive for convenient download.
5
+ #
6
+ # See <tt>examples/process_pdfs_example.rb</tt> for more information.
7
+ class ProcessPdfs < CloudCrowd::Action
8
+
9
+ # Split up a large pdf into single-page pdfs. Batch them into 'batch_size'
10
+ # chunks for processing. The double pdftk shuffle fixes the document xrefs.
11
+ def split
12
+ `pdftk #{input_path} burst output "#{file_name}_%05d.pdf_temp"`
13
+ FileUtils.rm input_path
14
+ pdfs = Dir["*.pdf_temp"]
15
+ pdfs.each {|pdf| `pdftk #{pdf} output #{File.basename(pdf, '.pdf_temp')}.pdf`}
16
+ pdfs = Dir["*.pdf"]
17
+ batch_size = options['batch_size']
18
+ batches = (pdfs.length / batch_size.to_f).ceil
19
+ batches.times do |batch_num|
20
+ tar_path = "#{sprintf('%05d', batch_num)}.tar"
21
+ batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
22
+ `tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
23
+ end
24
+ Dir["*.tar"].map {|tar| save(tar) }
25
+ end
26
+
27
+ # Convert a pdf page into different-sized thumbnails. Grab the text.
28
+ def process
29
+ `tar -xzf #{input_path}`
30
+ FileUtils.rm input_path
31
+ cmds = []
32
+ generate_images_commands(cmds)
33
+ generate_text_commands(cmds)
34
+ system cmds.join(' && ')
35
+ FileUtils.rm Dir['*.pdf']
36
+ `tar -czf #{file_name}.tar *`
37
+ save("#{file_name}.tar")
38
+ end
39
+
40
+ # Merge all of the resulting images, all of the resulting text files, and
41
+ # the concatenated merge of the full-text into a single tar archive, ready to
42
+ # for download.
43
+ def merge
44
+ input.each do |batch_url|
45
+ batch_path = File.basename(batch_url)
46
+ download(batch_url, batch_path)
47
+ `tar -xzf #{batch_path}`
48
+ FileUtils.rm batch_path
49
+ end
50
+
51
+ names = Dir['*.txt'].map {|fn| fn.sub(/_\d+(_\w+)?\.txt\Z/, '') }.uniq
52
+ dirs = names.map {|n| ["#{n}/text/full", "#{n}/text/pages"] + options['images'].map {|i| "#{n}/images/#{i['name']}" } }.flatten
53
+ FileUtils.mkdir_p(dirs)
54
+
55
+ Dir['*.*'].each do |file|
56
+ ext = File.extname(file)
57
+ name = file.sub(/_\d+(_\w+)?#{ext}\Z/, '')
58
+ if ext == '.txt'
59
+ FileUtils.mv(file, "#{name}/text/pages/#{file}")
60
+ else
61
+ suffix = file.match(/_([^_]+)#{ext}\Z/)[1]
62
+ sans_suffix = file.sub(/_([^_]+)#{ext}\Z/, ext)
63
+ FileUtils.mv(file, "#{name}/images/#{suffix}/#{sans_suffix}")
64
+ end
65
+ end
66
+
67
+ names.each {|n| `cat #{n}/text/pages/*.txt > #{n}/text/full/#{n}.txt` }
68
+
69
+ `tar -czf processed_pdfs.tar *`
70
+ save("processed_pdfs.tar")
71
+ end
72
+
73
+
74
+ private
75
+
76
+ def generate_images_commands(command_list)
77
+ Dir["*.pdf"].each do |pdf|
78
+ name = File.basename(pdf, File.extname(pdf))
79
+ options['images'].each do |i|
80
+ command_list << "gm convert #{i['options']} #{pdf} #{name}_#{i['name']}.#{i['extension']}"
81
+ end
82
+ end
83
+ end
84
+
85
+ def generate_text_commands(command_list)
86
+ Dir["*.pdf"].each do |pdf|
87
+ name = File.basename(pdf, File.extname(pdf))
88
+ command_list << "pdftotext -enc UTF-8 -layout -q #{pdf} #{name}.txt"
89
+ end
90
+ end
91
+
92
+ end
@@ -0,0 +1,16 @@
1
+ # A parallel WordCount. Depends on the 'wc' utility.
2
+ class WordCount < CloudCrowd::Action
3
+
4
+ # Count the words in a single book.
5
+ # Pretend that this takes longer than it really does, for demonstration purposes.
6
+ def process
7
+ sleep 5
8
+ (`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
9
+ end
10
+
11
+ # Sum the total word count.
12
+ def merge
13
+ input.inject(0) {|sum, count| sum + count }
14
+ end
15
+
16
+ end
data/bin/crowd ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "#{File.dirname(__FILE__)}/../lib/cloud_crowd/command_line"
4
+
5
+ CloudCrowd::CommandLine.new
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This rackup script can be used to start the central CloudCrowd server
4
+ # using any Rack-compliant server handler. For example, start up three servers
5
+ # with a specified port number, using Thin:
6
+ #
7
+ # thin start -R config.ru --servers 3
8
+ #
9
+ # Or a single server with Unicorn:
10
+ #
11
+ # unicorn config.ru
12
+ #
13
+
14
+
15
+ require 'rubygems'
16
+ require 'cloud-crowd'
17
+
18
+ CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
19
+ CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
20
+
21
+ map '/' do
22
+ run CloudCrowd::Server
23
+ end
@@ -0,0 +1,55 @@
1
+ # The URL where you're planning on running the central server/queue/database.
2
+ :central_server: http://localhost:9173
3
+
4
+ # The following settings allow you to control the number of workers that can run
5
+ # on a given node, to prevent the node from becoming overloaded. 'max_workers'
6
+ # is a simple cap on the maximum number of workers a node is allowed to run
7
+ # concurrently. 'max_load' is the maximum (one-minute) load average, above which
8
+ # a node will refuse to take new work. 'min_free_memory' is the minimum amount
9
+ # of free RAM (in megabytes) a node is allowed to have, below which no new
10
+ # workers are run. These settings may be used in any combination.
11
+ :max_workers: 5
12
+ # :max_load: 5.0
13
+ # :min_free_memory: 150
14
+
15
+ # The storage back-end that you'd like to use for intermediate and final results
16
+ # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
17
+ # be used in development, on single-machine installations, or networked drives.
18
+ # If you *are* developing an action, filesystem is certainly faster and easier.
19
+ :storage: s3
20
+
21
+ # Please provide your AWS credentials for S3 storage of job output.
22
+ :aws_access_key: [your AWS access key]
23
+ :aws_secret_key: [your AWS secret access key]
24
+
25
+ # Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
26
+ # to keep all resulting files on S3 private. If so, you'll receive authenticated
27
+ # S3 URLs as job output, good for 24 hours. If left public, you'll get the
28
+ # straight URLs to the files on S3.
29
+ :s3_bucket: [your CloudCrowd bucket]
30
+ :s3_authentication: no
31
+
32
+ # The following settings configure local paths. 'local_storage_path' is the
33
+ # directory in which all files will be saved if you're using the 'filesystem'
34
+ # storage. 'log_path' and 'pid_path' are the directories in which daemonized
35
+ # servers and nodes will store their process ids and log files. The default
36
+ # values are listed.
37
+ # :local_storage_path: /tmp/cloud_crowd_storage
38
+ # :log_path: log
39
+ # :pid_path: tmp/pids
40
+
41
+ # Use HTTP Basic Auth for all requests? (Includes all internal worker requests
42
+ # to the central server). If yes, specify the login and password that all
43
+ # requests must provide for authentication.
44
+ :http_authentication: no
45
+ :login: [your login name]
46
+ :password: [your password]
47
+
48
+ # By default, CloudCrowd looks for installed actions inside the 'actions'
49
+ # subdirectory of this configuration folder. 'actions_path' allows you to load
50
+ # additional actions from a location of your choice.
51
+ # :actions_path: /path/to/actions
52
+
53
+ # The number of separate attempts that will be made to process an individual
54
+ # work unit, before marking it as having failed.
55
+ :work_unit_retries: 3
@@ -0,0 +1,16 @@
1
+ # This is a standard ActiveRecord database.yml file. You can configure it
2
+ # to use any database that ActiveRecord supports. Only the central server needs
3
+ # this file to be configured -- nodes never connect directly to the database.
4
+
5
+ :adapter: mysql
6
+ :encoding: utf8
7
+ :username: root
8
+ :password:
9
+ :socket: /tmp/mysql.sock
10
+ :database: cloud_crowd
11
+
12
+ # If you'd prefer to use an SQLite database instead, the following configuration
13
+ # will do nicely:
14
+ #
15
+ # :adapter: sqlite3
16
+ # :database: cloud_crowd.db
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # This example demonstrates the GraphicsMagick action by taking in a list of
7
+ # five images, and producing annotated, blurred, and black and white versions
8
+ # of each image. See actions/graphics_magick.rb
9
+
10
+ RestClient.post('http://localhost:9173/jobs',
11
+ {:job => {
12
+
13
+ 'action' => 'graphics_magick',
14
+
15
+ 'inputs' => [
16
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
17
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
18
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
19
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
20
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
21
+ ],
22
+
23
+ 'options' => {
24
+ 'steps' => [{
25
+ 'name' => 'annotated',
26
+ 'command' => 'convert',
27
+ 'options' => '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
28
+ 'extension' => 'jpg'
29
+ },{
30
+ 'name' => 'blurred',
31
+ 'command' => 'convert',
32
+ 'options' => '-blur 10x5',
33
+ 'extension' => 'png'
34
+ },{
35
+ 'name' => 'bw',
36
+ 'input' => 'blurred',
37
+ 'command' => 'convert',
38
+ 'options' => '-monochrome',
39
+ 'extension' => 'jpg'
40
+ }]
41
+ }
42
+
43
+ }.to_json}
44
+ )
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # This example demonstrates a fairly complicated PDF-processing action, designed
7
+ # to extract the PDF's text, and produce GIF versions of each page. The action
8
+ # (actions/process_pdfs.rb) shows an example of using all three steps,
9
+ # split, process, and merge.
10
+
11
+ RestClient.post('http://localhost:9173/jobs',
12
+ {:job => {
13
+
14
+ 'action' => 'process_pdfs',
15
+
16
+ 'inputs' => [
17
+ 'http://tigger.uic.edu/~victor/personal/futurism.pdf',
18
+ 'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
19
+ 'http://www.dzignism.com/articles/Futurist.Manifesto.pdf',
20
+ 'http://www.pitt.edu/~slavic/sisc/SISC4/dadswell.pdf'
21
+ ],
22
+
23
+ 'options' => {
24
+
25
+ 'batch_size' => 7,
26
+
27
+ 'images' => [{
28
+ 'name' => '700',
29
+ 'options' => '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
30
+ 'extension' => 'gif'
31
+ },{
32
+ 'name' => '1000',
33
+ 'options' => '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
34
+ 'extension' => 'gif'
35
+ }]
36
+
37
+ }
38
+
39
+ }.to_json}
40
+ )
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # Let's count all the words in Shakespeare.
7
+
8
+ RestClient.post('http://localhost:9173/jobs',
9
+ {:job => {
10
+
11
+ 'action' => 'word_count',
12
+
13
+ 'inputs' => [
14
+ 'http://www.gutenberg.org/dirs/etext97/1ws3010.txt', # All's Well That Ends Well
15
+ 'http://www.gutenberg.org/dirs/etext99/1ws3511.txt', # Anthony and Cleopatra
16
+ 'http://www.gutenberg.org/dirs/etext97/1ws2510.txt', # As You Like It
17
+ 'http://www.gutenberg.org/dirs/etext97/1ws0610.txt', # The Comedy of Errors
18
+ 'http://www.gutenberg.org/dirs/etext99/1ws3911.txt', # Cymbeline
19
+ 'http://www.gutenberg.org/dirs/etext00/0ws2610.txt', # Hamlet
20
+ 'http://www.gutenberg.org/dirs/etext00/0ws1910.txt', # Henry IV
21
+ 'http://www.gutenberg.org/dirs/etext99/1ws2411.txt', # Julius Caesar
22
+ 'http://www.gutenberg.org/dirs/etext98/2ws3310.txt', # King Lear
23
+ 'http://www.gutenberg.org/dirs/etext99/1ws1211j.txt', # Love's Labour's Lost
24
+ 'http://www.gutenberg.org/dirs/etext98/2ws3410.txt', # Macbeth
25
+ 'http://www.gutenberg.org/dirs/etext98/2ws1810.txt', # The Merchant of Venice
26
+ 'http://www.gutenberg.org/dirs/etext99/1ws1711.txt', # Midsummer Night's Dream
27
+ 'http://www.gutenberg.org/dirs/etext98/3ws2210.txt', # Much Ado About Nothing
28
+ 'http://www.gutenberg.org/dirs/etext00/0ws3210.txt', # Othello
29
+ 'http://www.gutenberg.org/dirs/etext98/2ws1610.txt', # Romeo and Juliet
30
+ 'http://www.gutenberg.org/dirs/etext98/2ws1010.txt', # The Taming of the Shrew
31
+ 'http://www.gutenberg.org/dirs/etext99/1ws4111.txt', # The Tempest
32
+ 'http://www.gutenberg.org/dirs/etext00/0ws0910.txt', # Titus Andronicus
33
+ 'http://www.gutenberg.org/dirs/etext99/1ws2911.txt', # Troilus and Cressida
34
+ 'http://www.gutenberg.org/dirs/etext98/3ws2810.txt', # Twelfth Night
35
+ 'http://www.gutenberg.org/files/1539/1539.txt' # The Winter's Tale
36
+ ]
37
+
38
+ }.to_json}
39
+ )
40
+
41
+ # With 23 Workers running, and over Wifi, it counted all the words in 5.5 secs.
42
+ # On a fast internet connection, you may not even see this job show up.