documentcloud-cloud-crowd 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/README +59 -50
  2. data/actions/process_pdfs.rb +3 -3
  3. data/actions/word_count.rb +14 -0
  4. data/cloud-crowd.gemspec +27 -13
  5. data/config/config.example.yml +8 -11
  6. data/examples/graphics_magick_example.rb +40 -44
  7. data/examples/process_pdfs_example.rb +39 -29
  8. data/examples/word_count_example.rb +41 -0
  9. data/lib/cloud-crowd.rb +20 -17
  10. data/lib/cloud_crowd/action.rb +26 -9
  11. data/lib/cloud_crowd/app.rb +26 -4
  12. data/lib/cloud_crowd/asset_store.rb +69 -40
  13. data/lib/cloud_crowd/command_line.rb +6 -4
  14. data/lib/cloud_crowd/daemon.rb +65 -25
  15. data/lib/cloud_crowd/exceptions.rb +5 -0
  16. data/lib/cloud_crowd/helpers/resources.rb +2 -2
  17. data/lib/cloud_crowd/models/job.rb +9 -13
  18. data/lib/cloud_crowd/models/work_unit.rb +23 -15
  19. data/lib/cloud_crowd/models/worker_record.rb +61 -0
  20. data/lib/cloud_crowd/models.rb +7 -1
  21. data/lib/cloud_crowd/schema.rb +12 -3
  22. data/lib/cloud_crowd/worker.rb +48 -10
  23. data/public/css/admin_console.css +174 -4
  24. data/public/css/reset.css +17 -27
  25. data/public/images/bullet_green.png +0 -0
  26. data/public/images/bullet_white.png +0 -0
  27. data/public/images/cloud_hand.png +0 -0
  28. data/public/images/header_back.png +0 -0
  29. data/public/images/logo.png +0 -0
  30. data/public/images/server_error.png +0 -0
  31. data/public/images/sidebar_bottom.png +0 -0
  32. data/public/images/sidebar_top.png +0 -0
  33. data/public/images/worker_info.png +0 -0
  34. data/public/images/worker_info_loading.gif +0 -0
  35. data/public/js/admin_console.js +127 -10
  36. data/public/js/excanvas.pack.js +1 -0
  37. data/public/js/jquery-1.3.2.min.js +19 -0
  38. data/public/js/jquery.flot.pack.js +1 -0
  39. data/test/acceptance/test_word_count.rb +49 -0
  40. data/test/blueprints.rb +6 -5
  41. data/test/config/config.yml +1 -4
  42. data/test/test_helper.rb +1 -0
  43. data/test/unit/test_job.rb +12 -4
  44. data/test/unit/test_work_unit.rb +2 -2
  45. data/views/index.erb +69 -14
  46. metadata +23 -6
  47. data/public/js/jquery-1.3.2.js +0 -4376
data/README CHANGED
@@ -22,54 +22,63 @@
22
22
 
23
23
 
24
24
 
25
- ~ CloudCrowd ~
25
+ ~ CloudCrowd ~
26
26
 
27
- * A batch-processing system, map-reduce style
28
- * Write your scripts in Ruby
29
- * Built for Amazon EC2 and S3
30
- * split -> process -> merge
31
- * As easy as `gem install cloud-crowd`
32
-
33
-
34
- ~ Getting started ~
35
-
36
- # Install the gem (documentcloud-cloud-crowd until the first official release).
37
-
38
- >> sudo gem install cloud-crowd
39
-
40
- # Install the CloudCrowd configuration files to a location of your choosing.
41
-
42
- >> crowd install ~/config/cloud-crowd
43
-
44
- # Now, you can use the full complement of `crowd` commands from inside of
45
- # this configuration directory. To see the available commands:
46
-
47
- >> crowd --help
48
-
49
- # Edit the configuration files to your satisfaction, and add AWS credentials.
50
-
51
- >> mate ~/config/cloud-crowd/config.yml
52
- >> mate ~/config/cloud-crowd/database.yml
53
-
54
- # Write your actions, and install them into the 'actions' subdirectory.
55
- # CloudCrowd comes with some default actions as an example.
56
-
57
- # To spin up the central server (make sure that you include its location
58
- # in config.yml), either:
59
-
60
- >> crowd server
61
-
62
- # or:
63
-
64
- >> thin -R config.ru --servers 3 -e production start
65
-
66
- # Any server that supports Rack should work with the rackup file.
67
-
68
- # Then, to spin up 10 workers:
69
-
70
- >> crowd workers start -n 10
71
-
72
- # To spin up workers remotely, install the 'cloud-crowd' gem, and copy over
73
- # your configuration directory.
74
-
75
-
27
+ * Parallel processing for the rest of us
28
+ * Write your scripts in Ruby
29
+ * Built for Amazon EC2 and S3
30
+ * split -> process -> merge
31
+ * As easy as `gem install cloud-crowd`
32
+
33
+
34
+ ~ Wiki ~
35
+
36
+ http://wiki.github.com/documentcloud/cloud-crowd
37
+
38
+
39
+ ~ Getting started ~
40
+
41
+ # Install the gem.
42
+
43
+ >> sudo gem install cloud-crowd
44
+
45
+ # Install the CloudCrowd configuration files to a location of your choosing.
46
+
47
+ >> crowd install ~/config/cloud-crowd
48
+
49
+ # Now, you can use the full complement of `crowd` commands from inside of
50
+ # this configuration directory. To see the available commands:
51
+
52
+ >> crowd --help
53
+
54
+ # Edit the configuration files to your satisfaction, add AWS credentials,
55
+ # and then load the CloudCrowd schema into your configured database.
56
+
57
+ >> mate ~/config/cloud-crowd/config.yml
58
+ >> mate ~/config/cloud-crowd/database.yml
59
+ >> crowd load_schema
60
+
61
+ # Write your actions, and install them into the 'actions' subdirectory.
62
+ # CloudCrowd comes with some default actions as an example.
63
+
64
+ # To launch the central server (make sure that you include its location
65
+ # in config.yml), either:
66
+
67
+ >> crowd server
68
+
69
+ # or:
70
+
71
+ >> thin -R config.ru --servers 3 -e production start
72
+
73
+ # Any server that supports Rack should work with the rackup file.
74
+
75
+ # Then, to spin up 10 workers:
76
+
77
+ >> crowd workers start -n 10
78
+
79
+ # To spin up workers remotely, install the 'cloud-crowd' gem, and copy over
80
+ # your configuration directory.
81
+
82
+ # At this point you can visit your server console at localhost:9173 to
83
+ # view all of your workers, ready for action.
84
+
@@ -6,8 +6,8 @@
6
6
  # See <tt>examples/process_pdfs_example.rb</tt> for more information.
7
7
  class ProcessPdfs < CloudCrowd::Action
8
8
 
9
- # Split up a large pdf into single-page pdfs.
10
- # The double pdftk shuffle fixes the document xrefs.
9
+ # Split up a large pdf into single-page pdfs. Batch them into 'batch_size'
10
+ # chunks for processing. The double pdftk shuffle fixes the document xrefs.
11
11
  def split
12
12
  `pdftk #{input_path} burst output "#{file_name}_%05d.pdf_temp"`
13
13
  FileUtils.rm input_path
@@ -41,7 +41,7 @@ class ProcessPdfs < CloudCrowd::Action
41
41
  # the concatenated merge of the full-text into a single tar archive, ready to
42
42
  # for download.
43
43
  def merge
44
- JSON.parse(input).each do |batch_url|
44
+ input.each do |batch_url|
45
45
  batch_path = File.basename(batch_url)
46
46
  download(batch_url, batch_path)
47
47
  `tar -xzf #{batch_path}`
@@ -0,0 +1,14 @@
1
+ # A parallel WordCount. Depends on the 'wc' utility.
2
+ class WordCount < CloudCrowd::Action
3
+
4
+ # Count the words in a single book.
5
+ def process
6
+ (`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
7
+ end
8
+
9
+ # Sum the total word count.
10
+ def merge
11
+ input.inject(0) {|sum, count| sum + count }
12
+ end
13
+
14
+ end
data/cloud-crowd.gemspec CHANGED
@@ -1,10 +1,10 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.0.5' # Keep version in sync with cloud-cloud.rb
3
+ s.version = '0.0.6' # Keep version in sync with cloud-cloud.rb
4
4
  s.date = '2009-09-01'
5
5
 
6
- s.homepage = "http://documentcloud.org" # wiki page on github?
7
- s.summary = "Better living through Map --> Ruby --> Reduce"
6
+ s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
+ s.summary = "Parallel Processing for the Rest of Us"
8
8
  s.description = <<-EOS
9
9
  The crowd, suddenly there where there was nothing before, is a mysterious and
10
10
  universal phenomenon. A few people may have been standing together -- five, ten
@@ -13,18 +13,16 @@ Gem::Specification.new do |s|
13
13
  streets had only one direction.
14
14
  EOS
15
15
 
16
- s.authors = ['Jeremy Ashkenas']
17
- s.email = 'jeremy@documentcloud.org'
18
- s.rubyforge_project = 'cloud-crowd'
19
-
20
- s.require_paths = ['lib']
21
- s.executables = ['crowd']
22
-
23
- # s.post_install_message = "Run `crowd --help` for information on using CloudCrowd."
16
+ s.authors = ['Jeremy Ashkenas']
17
+ s.email = 'jeremy@documentcloud.org'
18
+ s.rubyforge_project = 'cloud-crowd'
24
19
 
20
+ s.require_paths = ['lib']
21
+ s.executables = ['crowd']
22
+
25
23
  s.has_rdoc = true
26
24
  s.extra_rdoc_files = ['README']
27
- s.rdoc_options << '--title' << 'CloudCrowd | Better Living through Map --> Ruby --> Reduce' <<
25
+ s.rdoc_options << '--title' << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
28
26
  '--exclude' << 'test' <<
29
27
  '--main' << 'README' <<
30
28
  '--all'
@@ -47,6 +45,7 @@ Gem::Specification.new do |s|
47
45
  s.files = %w(
48
46
  actions/graphics_magick.rb
49
47
  actions/process_pdfs.rb
48
+ actions/word_count.rb
50
49
  cloud-crowd.gemspec
51
50
  config/config.example.ru
52
51
  config/config.example.yml
@@ -54,6 +53,7 @@ config/database.example.yml
54
53
  EPIGRAPHS
55
54
  examples/graphics_magick_example.rb
56
55
  examples/process_pdfs_example.rb
56
+ examples/word_count_example.rb
57
57
  lib/cloud-crowd.rb
58
58
  lib/cloud_crowd/action.rb
59
59
  lib/cloud_crowd/app.rb
@@ -67,6 +67,7 @@ lib/cloud_crowd/helpers.rb
67
67
  lib/cloud_crowd/inflector.rb
68
68
  lib/cloud_crowd/models/job.rb
69
69
  lib/cloud_crowd/models/work_unit.rb
70
+ lib/cloud_crowd/models/worker_record.rb
70
71
  lib/cloud_crowd/models.rb
71
72
  lib/cloud_crowd/runner.rb
72
73
  lib/cloud_crowd/schema.rb
@@ -74,11 +75,24 @@ lib/cloud_crowd/worker.rb
74
75
  LICENSE
75
76
  public/css/admin_console.css
76
77
  public/css/reset.css
78
+ public/images/bullet_green.png
79
+ public/images/bullet_white.png
80
+ public/images/cloud_hand.png
81
+ public/images/header_back.png
82
+ public/images/logo.png
77
83
  public/images/queue_fill.png
84
+ public/images/server_error.png
85
+ public/images/sidebar_bottom.png
86
+ public/images/sidebar_top.png
87
+ public/images/worker_info.png
88
+ public/images/worker_info_loading.gif
78
89
  public/js/admin_console.js
79
- public/js/jquery-1.3.2.js
90
+ public/js/excanvas.pack.js
91
+ public/js/jquery.flot.pack.js
92
+ public/js/jquery-1.3.2.min.js
80
93
  README
81
94
  test/acceptance/test_failing_work_units.rb
95
+ test/acceptance/test_word_count.rb
82
96
  test/blueprints.rb
83
97
  test/config/config.ru
84
98
  test/config/config.yml
@@ -1,6 +1,11 @@
1
- # The URL where you're planning on running the server/queue/database.
1
+ # The URL where you're planning on running the central server/queue/database.
2
2
  :central_server: http://localhost:9173
3
3
 
4
+ # The storage back-end that you'd like to use for intermediate and final results
5
+ # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
6
+ # be used in development, or on single-machine installations.
7
+ :storage: s3
8
+
4
9
  # Please provide your AWS credentials for S3 storage of job output.
5
10
  :aws_access_key: [your AWS access key]
6
11
  :aws_secret_key: [your AWS secret access key]
@@ -20,8 +25,8 @@
20
25
  :password: [your password]
21
26
 
22
27
  # By default, CloudCrowd looks for installed actions inside the 'actions'
23
- # subdirectory of this configuration folder. 'actions_path' allows you to install
24
- # them in a different location.
28
+ # subdirectory of this configuration folder. 'actions_path' allows you to load
29
+ # additional actions from a location of your choice.
25
30
  # :actions_path: /path/to/actions
26
31
 
27
32
  # Set the following numbers to tweak the configuration of your worker daemons.
@@ -38,14 +43,6 @@
38
43
  # The maximum number of seconds a worker waits between checking the job queue.
39
44
  :max_worker_wait: 20
40
45
 
41
- # The backoff multiplier the worker uses to slow down the check interval when
42
- # there's no work in the queue.
43
- :worker_wait_multiplier: 1.3
44
-
45
- # The number of seconds a worker waits to retry when there's some kind of
46
- # internal error (ie. the central server fails to respond)
47
- :worker_retry_wait: 5
48
-
49
46
  # The number of separate attempts that will be made to process an individual
50
47
  # work unit, before marking it as having failed.
51
48
  :work_unit_retries: 3
@@ -1,48 +1,44 @@
1
- # Inside of a restclient session:
2
- # This is a fancy example that produces black and white, annotated, and blurred
3
- # versions of a list of URLs downloaded from the web.
1
+ #!/usr/bin/env ruby -rubygems
4
2
 
3
+ require 'restclient'
5
4
  require 'json'
6
5
 
7
- RestClient.post(
8
- 'http://localhost:9173/jobs',
9
- {:job => {
10
-
11
- 'action' => 'graphics_magick',
12
-
13
- 'inputs' => [
14
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
15
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
16
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
17
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
18
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
19
- ],
20
-
21
- 'options' => {
22
- 'steps' => [{
23
- 'name' => 'annotated',
24
- 'command' => 'convert',
25
- 'options' => '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
26
- 'extension' => 'jpg'
27
- },{
28
- 'name' => 'blurred',
29
- 'command' => 'convert',
30
- 'options' => '-blur 10x5',
31
- 'extension' => 'png'
32
- },{
33
- 'name' => 'bw',
34
- 'input' => 'blurred',
35
- 'command' => 'convert',
36
- 'options' => '-monochrome',
37
- 'extension' => 'jpg'
38
- }]
39
- }
40
-
41
- }.to_json}
42
- )
6
+ # This example demonstrates the GraphicsMagick action by taking in a list of
7
+ # five images, and producing annotated, blurred, and black and white versions
8
+ # of each image. See actions/graphics_magick.rb
43
9
 
44
- # status = RestClient.get('http://localhost:9173/jobs/[job_id]')
45
-
46
- # puts JSON.parse(RestClient.get('http://localhost:9173/jobs/[job_id]'))['outputs'].values.map {|v|
47
- # JSON.parse(v).map {|v| v['url']}
48
- # }.flatten.join("\n")
10
+ RestClient.post('http://localhost:9173/jobs',
11
+ {:job => {
12
+
13
+ 'action' => 'graphics_magick',
14
+
15
+ 'inputs' => [
16
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
17
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
18
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
19
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
20
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
21
+ ],
22
+
23
+ 'options' => {
24
+ 'steps' => [{
25
+ 'name' => 'annotated',
26
+ 'command' => 'convert',
27
+ 'options' => '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
28
+ 'extension' => 'jpg'
29
+ },{
30
+ 'name' => 'blurred',
31
+ 'command' => 'convert',
32
+ 'options' => '-blur 10x5',
33
+ 'extension' => 'png'
34
+ },{
35
+ 'name' => 'bw',
36
+ 'input' => 'blurred',
37
+ 'command' => 'convert',
38
+ 'options' => '-monochrome',
39
+ 'extension' => 'jpg'
40
+ }]
41
+ }
42
+
43
+ }.to_json}
44
+ )
@@ -1,30 +1,40 @@
1
- RestClient.post(
2
- 'http://localhost:9173/jobs',
3
- {:job => {
4
-
5
- 'action' => 'process_pdfs',
6
-
7
- 'inputs' => [
8
- 'http://tigger.uic.edu/~victor/personal/futurism.pdf',
9
- 'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
10
- 'http://www.dzignism.com/articles/Futurist.Manifesto.pdf'
11
- ],
12
-
13
- 'options' => {
14
-
15
- 'batch_size' => 7,
16
-
17
- 'images' => [{
18
- 'name' => '700',
19
- 'options' => '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
20
- 'extension' => 'gif'
21
- },{
22
- 'name' => '1000',
23
- 'options' => '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
24
- 'extension' => 'gif'
25
- }]
26
-
27
- }
28
-
29
- }.to_json}
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # This example demonstrates a fairly complicated PDF-processing action, designed
7
+ # to extract the PDF's text, and produce GIF versions of each page. The action
8
+ # (actions/process_pdfs.rb) shows an example of using all three steps,
9
+ # split, process, and merge.
10
+
11
+ RestClient.post('http://localhost:9173/jobs',
12
+ {:job => {
13
+
14
+ 'action' => 'process_pdfs',
15
+
16
+ 'inputs' => [
17
+ 'http://tigger.uic.edu/~victor/personal/futurism.pdf',
18
+ 'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
19
+ 'http://www.dzignism.com/articles/Futurist.Manifesto.pdf',
20
+ 'http://benfry.com/phd/dissertation-050312b-acrobat.pdf'
21
+ ],
22
+
23
+ 'options' => {
24
+
25
+ 'batch_size' => 7,
26
+
27
+ 'images' => [{
28
+ 'name' => '700',
29
+ 'options' => '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
30
+ 'extension' => 'gif'
31
+ },{
32
+ 'name' => '1000',
33
+ 'options' => '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
34
+ 'extension' => 'gif'
35
+ }]
36
+
37
+ }
38
+
39
+ }.to_json}
30
40
  )
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # Let's count all the words in Shakespeare.
7
+
8
+ RestClient.post('http://localhost:9173/jobs',
9
+ {:job => {
10
+
11
+ 'action' => 'word_count',
12
+
13
+ 'inputs' => [
14
+ 'http://www.gutenberg.org/dirs/etext97/1ws3010.txt', # All's Well That Ends Well
15
+ 'http://www.gutenberg.org/dirs/etext99/1ws3511.txt', # Anthony and Cleopatra
16
+ 'http://www.gutenberg.org/dirs/etext97/1ws2510.txt', # As You Like It
17
+ 'http://www.gutenberg.org/dirs/etext97/1ws0610.txt', # The Comedy of Errors
18
+ 'http://www.gutenberg.org/dirs/etext99/1ws3911.txt', # Cymbeline
19
+ 'http://www.gutenberg.org/dirs/etext00/0ws2610.txt', # Hamlet
20
+ 'http://www.gutenberg.org/dirs/etext00/0ws1910.txt', # Henry IV
21
+ 'http://www.gutenberg.org/dirs/etext99/1ws2411.txt', # Julius Caesar
22
+ 'http://www.gutenberg.org/dirs/etext98/2ws3310.txt', # King Lear
23
+ 'http://www.gutenberg.org/dirs/etext99/1ws1211j.txt', # Love's Labour's Lost
24
+ 'http://www.gutenberg.org/dirs/etext98/2ws3410.txt', # Macbeth
25
+ 'http://www.gutenberg.org/dirs/etext98/2ws1810.txt', # The Merchant of Venice
26
+ 'http://www.gutenberg.org/dirs/etext99/1ws1711.txt', # Midsummer Night's Dream
27
+ 'http://www.gutenberg.org/dirs/etext98/3ws2210.txt', # Much Ado About Nothing
28
+ 'http://www.gutenberg.org/dirs/etext00/0ws3210.txt', # Othello
29
+ 'http://www.gutenberg.org/dirs/etext98/2ws1610.txt', # Romeo and Juliet
30
+ 'http://www.gutenberg.org/dirs/etext98/2ws1010.txt', # The Taming of the Shrew
31
+ 'http://www.gutenberg.org/dirs/etext99/1ws4111.txt', # The Tempest
32
+ 'http://www.gutenberg.org/dirs/etext00/0ws0910.txt', # Titus Andronicus
33
+ 'http://www.gutenberg.org/dirs/etext99/1ws2911.txt', # Troilus and Cressida
34
+ 'http://www.gutenberg.org/dirs/etext98/3ws2810.txt', # Twelfth Night
35
+ 'http://www.gutenberg.org/files/1539/1539.txt' # The Winter's Tale
36
+ ]
37
+
38
+ }.to_json}
39
+ )
40
+
41
+ # With 23 Workers running, and over Wifi, it counted all the words in 5.5 secs.
data/lib/cloud-crowd.rb CHANGED
@@ -19,28 +19,33 @@ autoload :Digest, 'digest'
19
19
  autoload :ERB, 'erb'
20
20
  autoload :FileUtils, 'fileutils'
21
21
  autoload :JSON, 'json'
22
- autoload :RestClient, 'rest_client'
22
+ autoload :RestClient, 'restclient'
23
23
  autoload :RightAws, 'right_aws'
24
24
  autoload :Sinatra, 'sinatra'
25
25
  autoload :Socket, 'socket'
26
26
  autoload :YAML, 'yaml'
27
27
 
28
+ # Common code which should really be required in every circumstance.
29
+ require 'cloud_crowd/exceptions'
30
+
28
31
  module CloudCrowd
29
32
 
30
33
  # Autoload all the CloudCrowd classes which may not be required.
31
- autoload :App, 'cloud_crowd/app'
32
- autoload :Action, 'cloud_crowd/action'
33
- autoload :AssetStore, 'cloud_crowd/asset_store'
34
- autoload :Helpers, 'cloud_crowd/helpers'
35
- autoload :Inflector, 'cloud_crowd/inflector'
36
- autoload :Job, 'cloud_crowd/models'
37
- autoload :WorkUnit, 'cloud_crowd/models'
34
+ autoload :App, 'cloud_crowd/app'
35
+ autoload :Action, 'cloud_crowd/action'
36
+ autoload :AssetStore, 'cloud_crowd/asset_store'
37
+ autoload :Helpers, 'cloud_crowd/helpers'
38
+ autoload :Inflector, 'cloud_crowd/inflector'
39
+ autoload :Job, 'cloud_crowd/models'
40
+ autoload :Worker, 'cloud_crowd/worker'
41
+ autoload :WorkUnit, 'cloud_crowd/models'
42
+ autoload :WorkerRecord, 'cloud_crowd/models'
38
43
 
39
44
  # Root directory of the CloudCrowd gem.
40
45
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
41
46
 
42
47
  # Keep the version in sync with the gemspec.
43
- VERSION = '0.0.5'
48
+ VERSION = '0.0.6'
44
49
 
45
50
  # A Job is processing if its WorkUnits in the queue to be handled by workers.
46
51
  PROCESSING = 1
@@ -68,9 +73,7 @@ module CloudCrowd
68
73
  INCOMPLETE = [PROCESSING, SPLITTING, MERGING]
69
74
 
70
75
  # Mapping of statuses to their display strings.
71
- DISPLAY_STATUS_MAP = {
72
- 1 => 'processing', 2 => 'succeeded', 3 => 'failed', 4 => 'splitting', 5 => 'merging'
73
- }
76
+ DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
74
77
 
75
78
  class << self
76
79
  attr_reader :config
@@ -101,7 +104,7 @@ module CloudCrowd
101
104
  # Return the displayable status name of an internal CloudCrowd status number.
102
105
  # (See the above constants).
103
106
  def display_status(status)
104
- DISPLAY_STATUS_MAP[status]
107
+ DISPLAY_STATUS_MAP[status] || 'unknown'
105
108
  end
106
109
 
107
110
  # CloudCrowd::Actions are requested dynamically by name. Access them through
@@ -112,10 +115,10 @@ module CloudCrowd
112
115
  def actions
113
116
  return @actions if @actions
114
117
  @actions = {}
115
- default_actions = Dir["#{ROOT}/actions/*.rb"]
116
- custom_actions = Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] ||
117
- Dir["#{@config_path}/actions/*.rb"]
118
- (default_actions + custom_actions).each do |path|
118
+ default_actions = Dir["#{ROOT}/actions/*.rb"]
119
+ installed_actions = Dir["#{@config_path}/actions/*.rb"]
120
+ custom_actions = Dir["#{CloudCrowd.config[:actions_path]}/*.rb"]
121
+ (default_actions + installed_actions + custom_actions).each do |path|
119
122
  name = File.basename(path, File.extname(path))
120
123
  require path
121
124
  @actions[name] = Module.const_get(Inflector.camelize(name))
@@ -2,7 +2,7 @@ module CloudCrowd
2
2
 
3
3
  # As you write your custom actions, have them inherit from CloudCrowd::Action.
4
4
  # All actions must implement a +process+ method, which should return a
5
- # JSON-serializeable object that will be used as the output for the work unit.
5
+ # JSON-serializable object that will be used as the output for the work unit.
6
6
  # See the default actions for examples.
7
7
  #
8
8
  # Optionally, actions may define +split+ and +merge+ methods to do mapping
@@ -14,6 +14,8 @@ module CloudCrowd
14
14
  # and spend their duration inside of it, so relative paths work well.
15
15
  class Action
16
16
 
17
+ FILE_URL = /\Afile:\/\//
18
+
17
19
  attr_reader :input, :input_path, :file_name, :options, :work_directory
18
20
 
19
21
  # Initializing an Action sets up all of the read-only variables that
@@ -27,11 +29,7 @@ module CloudCrowd
27
29
  @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
28
30
  FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
29
31
  Dir.chdir @work_directory
30
- unless status == MERGING
31
- @input_path = File.join(@work_directory, safe_filename(@input))
32
- @file_name = File.basename(@input_path, File.extname(@input_path))
33
- download(@input, @input_path)
34
- end
32
+ status == MERGING ? parse_input : download_input
35
33
  end
36
34
 
37
35
  # Each Action subclass must implement a +process+ method, overriding this.
@@ -39,9 +37,14 @@ module CloudCrowd
39
37
  raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.")
40
38
  end
41
39
 
42
- # Download a file to the specified path with *curl*.
40
+ # Download a file to the specified path.
43
41
  def download(url, path)
44
- `curl -s "#{url}" > "#{path}"`
42
+ if url.match(FILE_URL)
43
+ FileUtils.cp(url.sub(FILE_URL, ''), path)
44
+ else
45
+ resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
46
+ FileUtils.mv resp.file.path, path
47
+ end
45
48
  path
46
49
  end
47
50
 
@@ -57,7 +60,7 @@ module CloudCrowd
57
60
  # to the root directory (where daemons run by default).
58
61
  def cleanup_work_directory
59
62
  Dir.chdir '/'
60
- FileUtils.rm_r(@work_directory)
63
+ FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
61
64
  end
62
65
 
63
66
 
@@ -80,6 +83,20 @@ module CloudCrowd
80
83
  @storage_prefix ||= File.join(path_parts)
81
84
  end
82
85
 
86
+ # If we know that the input is JSON, replace it with the parsed form.
87
+ def parse_input
88
+ @input = JSON.parse(@input)
89
+ end
90
+
91
+ # If the input is a URL, download the file before beginning processing.
92
+ def download_input
93
+ input_is_url = !!URI.parse(@input) rescue false
94
+ return unless input_is_url
95
+ @input_path = File.join(@work_directory, safe_filename(@input))
96
+ @file_name = File.basename(@input_path, File.extname(@input_path))
97
+ download(@input, @input_path)
98
+ end
99
+
83
100
  end
84
101
 
85
102
  end