mooktakim-cloud-crowd 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/EPIGRAPHS +17 -0
  2. data/LICENSE +22 -0
  3. data/README +93 -0
  4. data/actions/graphics_magick.rb +43 -0
  5. data/actions/process_pdfs.rb +92 -0
  6. data/actions/word_count.rb +16 -0
  7. data/bin/crowd +5 -0
  8. data/config/config.example.ru +23 -0
  9. data/config/config.example.yml +55 -0
  10. data/config/database.example.yml +16 -0
  11. data/examples/graphics_magick_example.rb +44 -0
  12. data/examples/process_pdfs_example.rb +40 -0
  13. data/examples/word_count_example.rb +42 -0
  14. data/lib/cloud-crowd.rb +188 -0
  15. data/lib/cloud_crowd/action.rb +125 -0
  16. data/lib/cloud_crowd/asset_store/filesystem_store.rb +39 -0
  17. data/lib/cloud_crowd/asset_store/s3_store.rb +43 -0
  18. data/lib/cloud_crowd/asset_store.rb +41 -0
  19. data/lib/cloud_crowd/command_line.rb +242 -0
  20. data/lib/cloud_crowd/exceptions.rb +46 -0
  21. data/lib/cloud_crowd/helpers/authorization.rb +52 -0
  22. data/lib/cloud_crowd/helpers/resources.rb +25 -0
  23. data/lib/cloud_crowd/helpers.rb +8 -0
  24. data/lib/cloud_crowd/inflector.rb +19 -0
  25. data/lib/cloud_crowd/models/job.rb +190 -0
  26. data/lib/cloud_crowd/models/node_record.rb +107 -0
  27. data/lib/cloud_crowd/models/work_unit.rb +170 -0
  28. data/lib/cloud_crowd/models.rb +40 -0
  29. data/lib/cloud_crowd/node.rb +199 -0
  30. data/lib/cloud_crowd/schema.rb +50 -0
  31. data/lib/cloud_crowd/server.rb +123 -0
  32. data/lib/cloud_crowd/worker.rb +149 -0
  33. data/mooktakim-cloud-crowd.gemspec +116 -0
  34. data/public/css/admin_console.css +243 -0
  35. data/public/css/reset.css +42 -0
  36. data/public/images/bullet_green.png +0 -0
  37. data/public/images/bullet_white.png +0 -0
  38. data/public/images/cloud_hand.png +0 -0
  39. data/public/images/header_back.png +0 -0
  40. data/public/images/logo.png +0 -0
  41. data/public/images/queue_fill.png +0 -0
  42. data/public/images/server.png +0 -0
  43. data/public/images/server_busy.png +0 -0
  44. data/public/images/server_error.png +0 -0
  45. data/public/images/sidebar_bottom.png +0 -0
  46. data/public/images/sidebar_top.png +0 -0
  47. data/public/images/worker_info.png +0 -0
  48. data/public/images/worker_info_loading.gif +0 -0
  49. data/public/js/admin_console.js +197 -0
  50. data/public/js/excanvas.js +1 -0
  51. data/public/js/flot.js +1 -0
  52. data/public/js/jquery.js +19 -0
  53. data/test/acceptance/test_failing_work_units.rb +33 -0
  54. data/test/acceptance/test_node.rb +20 -0
  55. data/test/acceptance/test_server.rb +66 -0
  56. data/test/acceptance/test_word_count.rb +40 -0
  57. data/test/blueprints.rb +25 -0
  58. data/test/config/actions/failure_testing.rb +13 -0
  59. data/test/config/config.ru +17 -0
  60. data/test/config/config.yml +6 -0
  61. data/test/config/database.yml +3 -0
  62. data/test/test_helper.rb +19 -0
  63. data/test/unit/test_action.rb +70 -0
  64. data/test/unit/test_configuration.rb +48 -0
  65. data/test/unit/test_job.rb +103 -0
  66. data/test/unit/test_node.rb +41 -0
  67. data/test/unit/test_node_record.rb +42 -0
  68. data/test/unit/test_work_unit.rb +53 -0
  69. data/test/unit/test_worker.rb +48 -0
  70. data/views/operations_center.erb +82 -0
  71. metadata +290 -0
data/EPIGRAPHS ADDED
@@ -0,0 +1,17 @@
1
+ The crowd, suddenly there where there was nothing before, is a mysterious and
2
+ universal phenomenon. A few people may have been standing together -- five, ten
3
+ or twelve, nor more; nothing has been announced, nothing is expected. Suddenly
4
+ everywhere is black with people and more come streaming from all sides as though
5
+ streets had only one direction. Most of them do not know what has happened and,
6
+ if questioned, have no answer; but they hurry to be there where most other
7
+ people are. There is a determination in their movement which is quite different
8
+ from the expression of ordinary curiosity. It seems as through the movement of
9
+ some of them transmits itself to all the others. But that is not all; they have
10
+ a goal which is there before they can find words for it. -p 16
11
+
12
+ Crowd crystals are the small, rigid groups of men, strictly delimited and of
13
+ great constancy, which serve to precipitate crowds. Their structure is such
14
+ that they can be comprehended and taken in at a glance. Their unity is more
15
+ important than their size. -p 73
16
+
17
+ From Elias Canetti's "Crowds and Power" (1962).
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2009 Jeremy Ashkenas, DocumentCloud
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,93 @@
1
+ =
2
+ _ _
3
+ ( ` )_
4
+ ( ) `)
5
+ (_ (_ . _) _)
6
+ _
7
+ ( )
8
+ _ . ( ` ) . )
9
+ ( _ )_ (_, _( ,_)_)
10
+ (_ _(_ ,)
11
+
12
+ _ _ ___ _ _ ___ _
13
+ ( ` )_ / __| |___ _ _ __| |/ __|_ _ _____ __ ____| |
14
+ ( ) `) | (__| / _ \ || / _` | (__| '_/ _ \ V V / _` |
15
+ (_ (_ . _) _) \___|_\___/\_,_\__,_|\___|_| \___/\_/\_/\__,_|
16
+
17
+ _
18
+ ( )
19
+ _, _ . ( ` ) . )
20
+ ( ( _ )_ (_, _( ,_)_)
21
+ (_(_ _(_ ,)
22
+
23
+
24
+
25
+ ~ CloudCrowd ~
26
+
27
+ * Parallel processing for the rest of us
28
+ * Write your scripts in Ruby
29
+ * Works with Amazon EC2 and S3
30
+ * split -> process -> merge
31
+ * As easy as `gem install cloud-crowd`
32
+
33
+ Well-suited for:
34
+
35
+ * Generating or resizing images.
36
+ * Encoding video.
37
+ * Running text extraction or OCR on PDFs.
38
+ * Migrating a large file set or database.
39
+ * Web scraping.
40
+
41
+
42
+ ~ Documentation ~
43
+
44
+ Wiki: http://wiki.github.com/documentcloud/cloud-crowd
45
+ Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
46
+
47
+
48
+ ~ Getting started ~
49
+
50
+ # Install the gem.
51
+
52
+ >> sudo gem install cloud-crowd
53
+
54
+ # Install the CloudCrowd configuration files to a location of your choosing.
55
+
56
+ >> crowd install ~/config/cloud-crowd
57
+
58
+ # Now, you can use the full complement of `crowd` commands from inside of
59
+ # this configuration directory. To see the available commands:
60
+
61
+ >> crowd --help
62
+
63
+ # Edit the configuration files to your satisfaction, add AWS credentials,
64
+ # and then load the CloudCrowd schema into your configured database.
65
+
66
+ >> cd ~/config/cloud-crowd
67
+ >> mate config.yml
68
+ >> mate database.yml
69
+ >> [create the database you just configured...]
70
+ >> crowd load_schema
71
+
72
+ # Write your actions, and install them into the 'actions' subdirectory.
73
+ # CloudCrowd comes with a few default actions as an example.
74
+
75
+ # To launch the central server (make sure that you include its location
76
+ # in config.yml):
77
+
78
+ >> crowd server
79
+
80
+ # The configuration folder also includes 'config.ru', which can be used by
81
+ # any Rack-compliant webserver to run your central server.
82
+
83
+ # Then, to launch a node of workers:
84
+
85
+ >> crowd node
86
+
87
+ # To spin up remote nodes, install the 'cloud-crowd' gem and copy over
88
+ # your configuration directory. Run `crowd node`, and the remote machines
89
+ # will register with the central server, becoming available for processing.
90
+
91
+ # At this point you can visit your Operations Center at localhost:9173 to
92
+ # view all of your nodes, ready for action.
93
+
@@ -0,0 +1,43 @@
1
+ # The GraphicsMagick action, dependent on the `gm` command, is able to perform
2
+ # any number of GraphicsMagick conversions on an image passed in as an input.
3
+ # The options hash should specify the +name+ for the particular step (which is
4
+ # appended to the resulting image filename) the +command+ (eg. convert, mogrify),
5
+ # the +options+ (to the command, eg. -shadow -blur), and the +extension+ which
6
+ # will determine the resulting image type. Optionally, you may also specify
7
+ # +input+ as the name of a previous step; doing this will use the result of
8
+ # that step as the source image, otherwise each step uses the original image
9
+ # as its source.
10
+ class GraphicsMagick < CloudCrowd::Action
11
+
12
+ # Download the initial image, and run each of the specified GraphicsMagick
13
+ # commands against it, returning the aggregate output.
14
+ def process
15
+ options['steps'].inject({}) {|h, step| h[step['name']] = run_step(step); h }
16
+ end
17
+
18
+ # Run an individual step (single GraphicsMagick command) in a shell-injection
19
+ # safe way, uploading the result to the AssetStore, and returning the public
20
+ # URL as the result.
21
+ # TODO: +system+ wasn't working, figure out some other way to escape.
22
+ def run_step(step)
23
+ cmd, opts = step['command'], step['options']
24
+ in_path, out_path = input_path_for(step), output_path_for(step)
25
+ `gm #{cmd} #{opts} #{in_path} #{out_path}`
26
+ save(out_path)
27
+ end
28
+
29
+ # Where should the starting image be located?
30
+ # If you pass in an optional step, returns the path to that step's output
31
+ # as input for further processing.
32
+ def input_path_for(step)
33
+ in_step = step && step['input'] && options['steps'].detect {|s| s['name'] == step['input']}
34
+ return input_path unless in_step
35
+ return output_path_for(in_step)
36
+ end
37
+
38
+ # Where should resulting images be saved locally?
39
+ def output_path_for(step)
40
+ "#{work_directory}/#{file_name}_#{step['name']}.#{step['extension']}"
41
+ end
42
+
43
+ end
@@ -0,0 +1,92 @@
1
+ # Depends on working pdftk, gm (GraphicsMagick), and pdftotext (Poppler) commands.
2
+ # Splits a pdf into batches of N pages, creates their thumbnails and icons,
3
+ # as specified in the Job options, gets the text for every page, and merges
4
+ # it all back into a tar archive for convenient download.
5
+ #
6
+ # See <tt>examples/process_pdfs_example.rb</tt> for more information.
7
+ class ProcessPdfs < CloudCrowd::Action
8
+
9
+ # Split up a large pdf into single-page pdfs. Batch them into 'batch_size'
10
+ # chunks for processing. The double pdftk shuffle fixes the document xrefs.
11
+ def split
12
+ `pdftk #{input_path} burst output "#{file_name}_%05d.pdf_temp"`
13
+ FileUtils.rm input_path
14
+ pdfs = Dir["*.pdf_temp"]
15
+ pdfs.each {|pdf| `pdftk #{pdf} output #{File.basename(pdf, '.pdf_temp')}.pdf`}
16
+ pdfs = Dir["*.pdf"]
17
+ batch_size = options['batch_size']
18
+ batches = (pdfs.length / batch_size.to_f).ceil
19
+ batches.times do |batch_num|
20
+ tar_path = "#{sprintf('%05d', batch_num)}.tar"
21
+ batch_pdfs = pdfs[batch_num*batch_size...(batch_num + 1)*batch_size]
22
+ `tar -czf #{tar_path} #{batch_pdfs.join(' ')}`
23
+ end
24
+ Dir["*.tar"].map {|tar| save(tar) }
25
+ end
26
+
27
+ # Convert a pdf page into different-sized thumbnails. Grab the text.
28
+ def process
29
+ `tar -xzf #{input_path}`
30
+ FileUtils.rm input_path
31
+ cmds = []
32
+ generate_images_commands(cmds)
33
+ generate_text_commands(cmds)
34
+ system cmds.join(' && ')
35
+ FileUtils.rm Dir['*.pdf']
36
+ `tar -czf #{file_name}.tar *`
37
+ save("#{file_name}.tar")
38
+ end
39
+
40
+ # Merge all of the resulting images, all of the resulting text files, and
41
+ # the concatenated merge of the full-text into a single tar archive, ready to
42
+ # for download.
43
+ def merge
44
+ input.each do |batch_url|
45
+ batch_path = File.basename(batch_url)
46
+ download(batch_url, batch_path)
47
+ `tar -xzf #{batch_path}`
48
+ FileUtils.rm batch_path
49
+ end
50
+
51
+ names = Dir['*.txt'].map {|fn| fn.sub(/_\d+(_\w+)?\.txt\Z/, '') }.uniq
52
+ dirs = names.map {|n| ["#{n}/text/full", "#{n}/text/pages"] + options['images'].map {|i| "#{n}/images/#{i['name']}" } }.flatten
53
+ FileUtils.mkdir_p(dirs)
54
+
55
+ Dir['*.*'].each do |file|
56
+ ext = File.extname(file)
57
+ name = file.sub(/_\d+(_\w+)?#{ext}\Z/, '')
58
+ if ext == '.txt'
59
+ FileUtils.mv(file, "#{name}/text/pages/#{file}")
60
+ else
61
+ suffix = file.match(/_([^_]+)#{ext}\Z/)[1]
62
+ sans_suffix = file.sub(/_([^_]+)#{ext}\Z/, ext)
63
+ FileUtils.mv(file, "#{name}/images/#{suffix}/#{sans_suffix}")
64
+ end
65
+ end
66
+
67
+ names.each {|n| `cat #{n}/text/pages/*.txt > #{n}/text/full/#{n}.txt` }
68
+
69
+ `tar -czf processed_pdfs.tar *`
70
+ save("processed_pdfs.tar")
71
+ end
72
+
73
+
74
+ private
75
+
76
+ def generate_images_commands(command_list)
77
+ Dir["*.pdf"].each do |pdf|
78
+ name = File.basename(pdf, File.extname(pdf))
79
+ options['images'].each do |i|
80
+ command_list << "gm convert #{i['options']} #{pdf} #{name}_#{i['name']}.#{i['extension']}"
81
+ end
82
+ end
83
+ end
84
+
85
+ def generate_text_commands(command_list)
86
+ Dir["*.pdf"].each do |pdf|
87
+ name = File.basename(pdf, File.extname(pdf))
88
+ command_list << "pdftotext -enc UTF-8 -layout -q #{pdf} #{name}.txt"
89
+ end
90
+ end
91
+
92
+ end
@@ -0,0 +1,16 @@
1
+ # A parallel WordCount. Depends on the 'wc' utility.
2
+ class WordCount < CloudCrowd::Action
3
+
4
+ # Count the words in a single book.
5
+ # Pretend that this takes longer than it really does, for demonstration purposes.
6
+ def process
7
+ sleep 5
8
+ (`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
9
+ end
10
+
11
+ # Sum the total word count.
12
+ def merge
13
+ input.inject(0) {|sum, count| sum + count }
14
+ end
15
+
16
+ end
data/bin/crowd ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "#{File.dirname(__FILE__)}/../lib/cloud_crowd/command_line"
4
+
5
+ CloudCrowd::CommandLine.new
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This rackup script can be used to start the central CloudCrowd server
4
+ # using any Rack-compliant server handler. For example, start up three servers
5
+ # with a specified port number, using Thin:
6
+ #
7
+ # thin start -R config.ru --servers 3
8
+ #
9
+ # Or a single server with Unicorn:
10
+ #
11
+ # unicorn config.ru
12
+ #
13
+
14
+
15
+ require 'rubygems'
16
+ require 'cloud-crowd'
17
+
18
+ CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
19
+ CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
20
+
21
+ map '/' do
22
+ run CloudCrowd::Server
23
+ end
@@ -0,0 +1,55 @@
1
+ # The URL where you're planning on running the central server/queue/database.
2
+ :central_server: http://localhost:9173
3
+
4
+ # The following settings allow you to control the number of workers that can run
5
+ # on a given node, to prevent the node from becoming overloaded. 'max_workers'
6
+ # is a simple cap on the maximum number of workers a node is allowed to run
7
+ # concurrently. 'max_load' is the maximum (one-minute) load average, above which
8
+ # a node will refuse to take new work. 'min_free_memory' is the minimum amount
9
+ # of free RAM (in megabytes) a node is allowed to have, below which no new
10
+ # workers are run. These settings may be used in any combination.
11
+ :max_workers: 5
12
+ # :max_load: 5.0
13
+ # :min_free_memory: 150
14
+
15
+ # The storage back-end that you'd like to use for intermediate and final results
16
+ # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
17
+ # be used in development, on single-machine installations, or networked drives.
18
+ # If you *are* developing an action, filesystem is certainly faster and easier.
19
+ :storage: s3
20
+
21
+ # Please provide your AWS credentials for S3 storage of job output.
22
+ :aws_access_key: [your AWS access key]
23
+ :aws_secret_key: [your AWS secret access key]
24
+
25
+ # Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
26
+ # to keep all resulting files on S3 private. If so, you'll receive authenticated
27
+ # S3 URLs as job output, good for 24 hours. If left public, you'll get the
28
+ # straight URLs to the files on S3.
29
+ :s3_bucket: [your CloudCrowd bucket]
30
+ :s3_authentication: no
31
+
32
+ # The following settings configure local paths. 'local_storage_path' is the
33
+ # directory in which all files will be saved if you're using the 'filesystem'
34
+ # storage. 'log_path' and 'pid_path' are the directories in which daemonized
35
+ # servers and nodes will store their process ids and log files. The default
36
+ # values are listed.
37
+ # :local_storage_path: /tmp/cloud_crowd_storage
38
+ # :log_path: log
39
+ # :pid_path: tmp/pids
40
+
41
+ # Use HTTP Basic Auth for all requests? (Includes all internal worker requests
42
+ # to the central server). If yes, specify the login and password that all
43
+ # requests must provide for authentication.
44
+ :http_authentication: no
45
+ :login: [your login name]
46
+ :password: [your password]
47
+
48
+ # By default, CloudCrowd looks for installed actions inside the 'actions'
49
+ # subdirectory of this configuration folder. 'actions_path' allows you to load
50
+ # additional actions from a location of your choice.
51
+ # :actions_path: /path/to/actions
52
+
53
+ # The number of separate attempts that will be made to process an individual
54
+ # work unit, before marking it as having failed.
55
+ :work_unit_retries: 3
@@ -0,0 +1,16 @@
1
+ # This is a standard ActiveRecord database.yml file. You can configure it
2
+ # to use any database that ActiveRecord supports. Only the central server needs
3
+ # this file to be configured -- nodes never connect directly to the database.
4
+
5
+ :adapter: mysql
6
+ :encoding: utf8
7
+ :username: root
8
+ :password:
9
+ :socket: /tmp/mysql.sock
10
+ :database: cloud_crowd
11
+
12
+ # If you'd prefer to use an SQLite database instead, the following configuration
13
+ # will do nicely:
14
+ #
15
+ # :adapter: sqlite3
16
+ # :database: cloud_crowd.db
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # This example demonstrates the GraphicsMagick action by taking in a list of
7
+ # five images, and producing annotated, blurred, and black and white versions
8
+ # of each image. See actions/graphics_magick.rb
9
+
10
+ RestClient.post('http://localhost:9173/jobs',
11
+ {:job => {
12
+
13
+ 'action' => 'graphics_magick',
14
+
15
+ 'inputs' => [
16
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
17
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
18
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
19
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
20
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
21
+ ],
22
+
23
+ 'options' => {
24
+ 'steps' => [{
25
+ 'name' => 'annotated',
26
+ 'command' => 'convert',
27
+ 'options' => '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
28
+ 'extension' => 'jpg'
29
+ },{
30
+ 'name' => 'blurred',
31
+ 'command' => 'convert',
32
+ 'options' => '-blur 10x5',
33
+ 'extension' => 'png'
34
+ },{
35
+ 'name' => 'bw',
36
+ 'input' => 'blurred',
37
+ 'command' => 'convert',
38
+ 'options' => '-monochrome',
39
+ 'extension' => 'jpg'
40
+ }]
41
+ }
42
+
43
+ }.to_json}
44
+ )
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # This example demonstrates a fairly complicated PDF-processing action, designed
7
+ # to extract the PDF's text, and produce GIF versions of each page. The action
8
+ # (actions/process_pdfs.rb) shows an example of using all three steps,
9
+ # split, process, and merge.
10
+
11
+ RestClient.post('http://localhost:9173/jobs',
12
+ {:job => {
13
+
14
+ 'action' => 'process_pdfs',
15
+
16
+ 'inputs' => [
17
+ 'http://tigger.uic.edu/~victor/personal/futurism.pdf',
18
+ 'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
19
+ 'http://www.dzignism.com/articles/Futurist.Manifesto.pdf',
20
+ 'http://www.pitt.edu/~slavic/sisc/SISC4/dadswell.pdf'
21
+ ],
22
+
23
+ 'options' => {
24
+
25
+ 'batch_size' => 7,
26
+
27
+ 'images' => [{
28
+ 'name' => '700',
29
+ 'options' => '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
30
+ 'extension' => 'gif'
31
+ },{
32
+ 'name' => '1000',
33
+ 'options' => '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
34
+ 'extension' => 'gif'
35
+ }]
36
+
37
+ }
38
+
39
+ }.to_json}
40
+ )
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # Let's count all the words in Shakespeare.
7
+
8
+ RestClient.post('http://localhost:9173/jobs',
9
+ {:job => {
10
+
11
+ 'action' => 'word_count',
12
+
13
+ 'inputs' => [
14
+ 'http://www.gutenberg.org/dirs/etext97/1ws3010.txt', # All's Well That Ends Well
15
+ 'http://www.gutenberg.org/dirs/etext99/1ws3511.txt', # Anthony and Cleopatra
16
+ 'http://www.gutenberg.org/dirs/etext97/1ws2510.txt', # As You Like It
17
+ 'http://www.gutenberg.org/dirs/etext97/1ws0610.txt', # The Comedy of Errors
18
+ 'http://www.gutenberg.org/dirs/etext99/1ws3911.txt', # Cymbeline
19
+ 'http://www.gutenberg.org/dirs/etext00/0ws2610.txt', # Hamlet
20
+ 'http://www.gutenberg.org/dirs/etext00/0ws1910.txt', # Henry IV
21
+ 'http://www.gutenberg.org/dirs/etext99/1ws2411.txt', # Julius Caesar
22
+ 'http://www.gutenberg.org/dirs/etext98/2ws3310.txt', # King Lear
23
+ 'http://www.gutenberg.org/dirs/etext99/1ws1211j.txt', # Love's Labour's Lost
24
+ 'http://www.gutenberg.org/dirs/etext98/2ws3410.txt', # Macbeth
25
+ 'http://www.gutenberg.org/dirs/etext98/2ws1810.txt', # The Merchant of Venice
26
+ 'http://www.gutenberg.org/dirs/etext99/1ws1711.txt', # Midsummer Night's Dream
27
+ 'http://www.gutenberg.org/dirs/etext98/3ws2210.txt', # Much Ado About Nothing
28
+ 'http://www.gutenberg.org/dirs/etext00/0ws3210.txt', # Othello
29
+ 'http://www.gutenberg.org/dirs/etext98/2ws1610.txt', # Romeo and Juliet
30
+ 'http://www.gutenberg.org/dirs/etext98/2ws1010.txt', # The Taming of the Shrew
31
+ 'http://www.gutenberg.org/dirs/etext99/1ws4111.txt', # The Tempest
32
+ 'http://www.gutenberg.org/dirs/etext00/0ws0910.txt', # Titus Andronicus
33
+ 'http://www.gutenberg.org/dirs/etext99/1ws2911.txt', # Troilus and Cressida
34
+ 'http://www.gutenberg.org/dirs/etext98/3ws2810.txt', # Twelfth Night
35
+ 'http://www.gutenberg.org/files/1539/1539.txt' # The Winter's Tale
36
+ ]
37
+
38
+ }.to_json}
39
+ )
40
+
41
+ # With 23 Workers running, and over Wifi, it counted all the words in 5.5 secs.
42
+ # On a fast internet connection, you may not even see this job show up.