documentcloud-cloud-crowd 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/README +59 -50
  2. data/actions/process_pdfs.rb +3 -3
  3. data/actions/word_count.rb +14 -0
  4. data/cloud-crowd.gemspec +27 -13
  5. data/config/config.example.yml +8 -11
  6. data/examples/graphics_magick_example.rb +40 -44
  7. data/examples/process_pdfs_example.rb +39 -29
  8. data/examples/word_count_example.rb +41 -0
  9. data/lib/cloud-crowd.rb +20 -17
  10. data/lib/cloud_crowd/action.rb +26 -9
  11. data/lib/cloud_crowd/app.rb +26 -4
  12. data/lib/cloud_crowd/asset_store.rb +69 -40
  13. data/lib/cloud_crowd/command_line.rb +6 -4
  14. data/lib/cloud_crowd/daemon.rb +65 -25
  15. data/lib/cloud_crowd/exceptions.rb +5 -0
  16. data/lib/cloud_crowd/helpers/resources.rb +2 -2
  17. data/lib/cloud_crowd/models/job.rb +9 -13
  18. data/lib/cloud_crowd/models/work_unit.rb +23 -15
  19. data/lib/cloud_crowd/models/worker_record.rb +61 -0
  20. data/lib/cloud_crowd/models.rb +7 -1
  21. data/lib/cloud_crowd/schema.rb +12 -3
  22. data/lib/cloud_crowd/worker.rb +48 -10
  23. data/public/css/admin_console.css +174 -4
  24. data/public/css/reset.css +17 -27
  25. data/public/images/bullet_green.png +0 -0
  26. data/public/images/bullet_white.png +0 -0
  27. data/public/images/cloud_hand.png +0 -0
  28. data/public/images/header_back.png +0 -0
  29. data/public/images/logo.png +0 -0
  30. data/public/images/server_error.png +0 -0
  31. data/public/images/sidebar_bottom.png +0 -0
  32. data/public/images/sidebar_top.png +0 -0
  33. data/public/images/worker_info.png +0 -0
  34. data/public/images/worker_info_loading.gif +0 -0
  35. data/public/js/admin_console.js +127 -10
  36. data/public/js/excanvas.pack.js +1 -0
  37. data/public/js/jquery-1.3.2.min.js +19 -0
  38. data/public/js/jquery.flot.pack.js +1 -0
  39. data/test/acceptance/test_word_count.rb +49 -0
  40. data/test/blueprints.rb +6 -5
  41. data/test/config/config.yml +1 -4
  42. data/test/test_helper.rb +1 -0
  43. data/test/unit/test_job.rb +12 -4
  44. data/test/unit/test_work_unit.rb +2 -2
  45. data/views/index.erb +69 -14
  46. metadata +23 -6
  47. data/public/js/jquery-1.3.2.js +0 -4376
data/README CHANGED
@@ -22,54 +22,63 @@
22
22
 
23
23
 
24
24
 
25
- ~ CloudCrowd ~
25
+ ~ CloudCrowd ~
26
26
 
27
- * A batch-processing system, map-reduce style
28
- * Write your scripts in Ruby
29
- * Built for Amazon EC2 and S3
30
- * split -> process -> merge
31
- * As easy as `gem install cloud-crowd`
32
-
33
-
34
- ~ Getting started ~
35
-
36
- # Install the gem (documentcloud-cloud-crowd until the first official release).
37
-
38
- >> sudo gem install cloud-crowd
39
-
40
- # Install the CloudCrowd configuration files to a location of your choosing.
41
-
42
- >> crowd install ~/config/cloud-crowd
43
-
44
- # Now, you can use the full complement of `crowd` commands from inside of
45
- # this configuration directory. To see the available commands:
46
-
47
- >> crowd --help
48
-
49
- # Edit the configuration files to your satisfaction, and add AWS credentials.
50
-
51
- >> mate ~/config/cloud-crowd/config.yml
52
- >> mate ~/config/cloud-crowd/database.yml
53
-
54
- # Write your actions, and install them into the 'actions' subdirectory.
55
- # CloudCrowd comes with some default actions as an example.
56
-
57
- # To spin up the central server (make sure that you include its location
58
- # in config.yml), either:
59
-
60
- >> crowd server
61
-
62
- # or:
63
-
64
- >> thin -R config.ru --servers 3 -e production start
65
-
66
- # Any server that supports Rack should work with the rackup file.
67
-
68
- # Then, to spin up 10 workers:
69
-
70
- >> crowd workers start -n 10
71
-
72
- # To spin up workers remotely, install the 'cloud-crowd' gem, and copy over
73
- # your configuration directory.
74
-
75
-
27
+ * Parallel processing for the rest of us
28
+ * Write your scripts in Ruby
29
+ * Built for Amazon EC2 and S3
30
+ * split -> process -> merge
31
+ * As easy as `gem install cloud-crowd`
32
+
33
+
34
+ ~ Wiki ~
35
+
36
+ http://wiki.github.com/documentcloud/cloud-crowd
37
+
38
+
39
+ ~ Getting started ~
40
+
41
+ # Install the gem.
42
+
43
+ >> sudo gem install cloud-crowd
44
+
45
+ # Install the CloudCrowd configuration files to a location of your choosing.
46
+
47
+ >> crowd install ~/config/cloud-crowd
48
+
49
+ # Now, you can use the full complement of `crowd` commands from inside of
50
+ # this configuration directory. To see the available commands:
51
+
52
+ >> crowd --help
53
+
54
+ # Edit the configuration files to your satisfaction, add AWS credentials,
55
+ # and then load the CloudCrowd schema into your configured database.
56
+
57
+ >> mate ~/config/cloud-crowd/config.yml
58
+ >> mate ~/config/cloud-crowd/database.yml
59
+ >> crowd load_schema
60
+
61
+ # Write your actions, and install them into the 'actions' subdirectory.
62
+ # CloudCrowd comes with some default actions as an example.
63
+
64
+ # To launch the central server (make sure that you include its location
65
+ # in config.yml), either:
66
+
67
+ >> crowd server
68
+
69
+ # or:
70
+
71
+ >> thin -R config.ru --servers 3 -e production start
72
+
73
+ # Any server that supports Rack should work with the rackup file.
74
+
75
+ # Then, to spin up 10 workers:
76
+
77
+ >> crowd workers start -n 10
78
+
79
+ # To spin up workers remotely, install the 'cloud-crowd' gem, and copy over
80
+ # your configuration directory.
81
+
82
+ # At this point you can visit your server console at localhost:9173 to
83
+ # view all of your workers, ready for action.
84
+
@@ -6,8 +6,8 @@
6
6
  # See <tt>examples/process_pdfs_example.rb</tt> for more information.
7
7
  class ProcessPdfs < CloudCrowd::Action
8
8
 
9
- # Split up a large pdf into single-page pdfs.
10
- # The double pdftk shuffle fixes the document xrefs.
9
+ # Split up a large pdf into single-page pdfs. Batch them into 'batch_size'
10
+ # chunks for processing. The double pdftk shuffle fixes the document xrefs.
11
11
  def split
12
12
  `pdftk #{input_path} burst output "#{file_name}_%05d.pdf_temp"`
13
13
  FileUtils.rm input_path
@@ -41,7 +41,7 @@ class ProcessPdfs < CloudCrowd::Action
41
41
  # the concatenated merge of the full-text into a single tar archive, ready to
42
42
  # for download.
43
43
  def merge
44
- JSON.parse(input).each do |batch_url|
44
+ input.each do |batch_url|
45
45
  batch_path = File.basename(batch_url)
46
46
  download(batch_url, batch_path)
47
47
  `tar -xzf #{batch_path}`
@@ -0,0 +1,14 @@
1
+ # A parallel WordCount. Depends on the 'wc' utility.
2
+ class WordCount < CloudCrowd::Action
3
+
4
+ # Count the words in a single book.
5
+ def process
6
+ (`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
7
+ end
8
+
9
+ # Sum the total word count.
10
+ def merge
11
+ input.inject(0) {|sum, count| sum + count }
12
+ end
13
+
14
+ end
data/cloud-crowd.gemspec CHANGED
@@ -1,10 +1,10 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.0.5' # Keep version in sync with cloud-cloud.rb
3
+ s.version = '0.0.6' # Keep version in sync with cloud-cloud.rb
4
4
  s.date = '2009-09-01'
5
5
 
6
- s.homepage = "http://documentcloud.org" # wiki page on github?
7
- s.summary = "Better living through Map --> Ruby --> Reduce"
6
+ s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
+ s.summary = "Parallel Processing for the Rest of Us"
8
8
  s.description = <<-EOS
9
9
  The crowd, suddenly there where there was nothing before, is a mysterious and
10
10
  universal phenomenon. A few people may have been standing together -- five, ten
@@ -13,18 +13,16 @@ Gem::Specification.new do |s|
13
13
  streets had only one direction.
14
14
  EOS
15
15
 
16
- s.authors = ['Jeremy Ashkenas']
17
- s.email = 'jeremy@documentcloud.org'
18
- s.rubyforge_project = 'cloud-crowd'
19
-
20
- s.require_paths = ['lib']
21
- s.executables = ['crowd']
22
-
23
- # s.post_install_message = "Run `crowd --help` for information on using CloudCrowd."
16
+ s.authors = ['Jeremy Ashkenas']
17
+ s.email = 'jeremy@documentcloud.org'
18
+ s.rubyforge_project = 'cloud-crowd'
24
19
 
20
+ s.require_paths = ['lib']
21
+ s.executables = ['crowd']
22
+
25
23
  s.has_rdoc = true
26
24
  s.extra_rdoc_files = ['README']
27
- s.rdoc_options << '--title' << 'CloudCrowd | Better Living through Map --> Ruby --> Reduce' <<
25
+ s.rdoc_options << '--title' << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
28
26
  '--exclude' << 'test' <<
29
27
  '--main' << 'README' <<
30
28
  '--all'
@@ -47,6 +45,7 @@ Gem::Specification.new do |s|
47
45
  s.files = %w(
48
46
  actions/graphics_magick.rb
49
47
  actions/process_pdfs.rb
48
+ actions/word_count.rb
50
49
  cloud-crowd.gemspec
51
50
  config/config.example.ru
52
51
  config/config.example.yml
@@ -54,6 +53,7 @@ config/database.example.yml
54
53
  EPIGRAPHS
55
54
  examples/graphics_magick_example.rb
56
55
  examples/process_pdfs_example.rb
56
+ examples/word_count_example.rb
57
57
  lib/cloud-crowd.rb
58
58
  lib/cloud_crowd/action.rb
59
59
  lib/cloud_crowd/app.rb
@@ -67,6 +67,7 @@ lib/cloud_crowd/helpers.rb
67
67
  lib/cloud_crowd/inflector.rb
68
68
  lib/cloud_crowd/models/job.rb
69
69
  lib/cloud_crowd/models/work_unit.rb
70
+ lib/cloud_crowd/models/worker_record.rb
70
71
  lib/cloud_crowd/models.rb
71
72
  lib/cloud_crowd/runner.rb
72
73
  lib/cloud_crowd/schema.rb
@@ -74,11 +75,24 @@ lib/cloud_crowd/worker.rb
74
75
  LICENSE
75
76
  public/css/admin_console.css
76
77
  public/css/reset.css
78
+ public/images/bullet_green.png
79
+ public/images/bullet_white.png
80
+ public/images/cloud_hand.png
81
+ public/images/header_back.png
82
+ public/images/logo.png
77
83
  public/images/queue_fill.png
84
+ public/images/server_error.png
85
+ public/images/sidebar_bottom.png
86
+ public/images/sidebar_top.png
87
+ public/images/worker_info.png
88
+ public/images/worker_info_loading.gif
78
89
  public/js/admin_console.js
79
- public/js/jquery-1.3.2.js
90
+ public/js/excanvas.pack.js
91
+ public/js/jquery.flot.pack.js
92
+ public/js/jquery-1.3.2.min.js
80
93
  README
81
94
  test/acceptance/test_failing_work_units.rb
95
+ test/acceptance/test_word_count.rb
82
96
  test/blueprints.rb
83
97
  test/config/config.ru
84
98
  test/config/config.yml
@@ -1,6 +1,11 @@
1
- # The URL where you're planning on running the server/queue/database.
1
+ # The URL where you're planning on running the central server/queue/database.
2
2
  :central_server: http://localhost:9173
3
3
 
4
+ # The storage back-end that you'd like to use for intermediate and final results
5
+ # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
6
+ # be used in development, or on single-machine installations.
7
+ :storage: s3
8
+
4
9
  # Please provide your AWS credentials for S3 storage of job output.
5
10
  :aws_access_key: [your AWS access key]
6
11
  :aws_secret_key: [your AWS secret access key]
@@ -20,8 +25,8 @@
20
25
  :password: [your password]
21
26
 
22
27
  # By default, CloudCrowd looks for installed actions inside the 'actions'
23
- # subdirectory of this configuration folder. 'actions_path' allows you to install
24
- # them in a different location.
28
+ # subdirectory of this configuration folder. 'actions_path' allows you to load
29
+ # additional actions from a location of your choice.
25
30
  # :actions_path: /path/to/actions
26
31
 
27
32
  # Set the following numbers to tweak the configuration of your worker daemons.
@@ -38,14 +43,6 @@
38
43
  # The maximum number of seconds a worker waits between checking the job queue.
39
44
  :max_worker_wait: 20
40
45
 
41
- # The backoff multiplier the worker uses to slow down the check interval when
42
- # there's no work in the queue.
43
- :worker_wait_multiplier: 1.3
44
-
45
- # The number of seconds a worker waits to retry when there's some kind of
46
- # internal error (ie. the central server fails to respond)
47
- :worker_retry_wait: 5
48
-
49
46
  # The number of separate attempts that will be made to process an individual
50
47
  # work unit, before marking it as having failed.
51
48
  :work_unit_retries: 3
@@ -1,48 +1,44 @@
1
- # Inside of a restclient session:
2
- # This is a fancy example that produces black and white, annotated, and blurred
3
- # versions of a list of URLs downloaded from the web.
1
+ #!/usr/bin/env ruby -rubygems
4
2
 
3
+ require 'restclient'
5
4
  require 'json'
6
5
 
7
- RestClient.post(
8
- 'http://localhost:9173/jobs',
9
- {:job => {
10
-
11
- 'action' => 'graphics_magick',
12
-
13
- 'inputs' => [
14
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
15
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
16
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
17
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
18
- 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
19
- ],
20
-
21
- 'options' => {
22
- 'steps' => [{
23
- 'name' => 'annotated',
24
- 'command' => 'convert',
25
- 'options' => '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
26
- 'extension' => 'jpg'
27
- },{
28
- 'name' => 'blurred',
29
- 'command' => 'convert',
30
- 'options' => '-blur 10x5',
31
- 'extension' => 'png'
32
- },{
33
- 'name' => 'bw',
34
- 'input' => 'blurred',
35
- 'command' => 'convert',
36
- 'options' => '-monochrome',
37
- 'extension' => 'jpg'
38
- }]
39
- }
40
-
41
- }.to_json}
42
- )
6
+ # This example demonstrates the GraphicsMagick action by taking in a list of
7
+ # five images, and producing annotated, blurred, and black and white versions
8
+ # of each image. See actions/graphics_magick.rb
43
9
 
44
- # status = RestClient.get('http://localhost:9173/jobs/[job_id]')
45
-
46
- # puts JSON.parse(RestClient.get('http://localhost:9173/jobs/[job_id]'))['outputs'].values.map {|v|
47
- # JSON.parse(v).map {|v| v['url']}
48
- # }.flatten.join("\n")
10
+ RestClient.post('http://localhost:9173/jobs',
11
+ {:job => {
12
+
13
+ 'action' => 'graphics_magick',
14
+
15
+ 'inputs' => [
16
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
17
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
18
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
19
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
20
+ 'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
21
+ ],
22
+
23
+ 'options' => {
24
+ 'steps' => [{
25
+ 'name' => 'annotated',
26
+ 'command' => 'convert',
27
+ 'options' => '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
28
+ 'extension' => 'jpg'
29
+ },{
30
+ 'name' => 'blurred',
31
+ 'command' => 'convert',
32
+ 'options' => '-blur 10x5',
33
+ 'extension' => 'png'
34
+ },{
35
+ 'name' => 'bw',
36
+ 'input' => 'blurred',
37
+ 'command' => 'convert',
38
+ 'options' => '-monochrome',
39
+ 'extension' => 'jpg'
40
+ }]
41
+ }
42
+
43
+ }.to_json}
44
+ )
@@ -1,30 +1,40 @@
1
- RestClient.post(
2
- 'http://localhost:9173/jobs',
3
- {:job => {
4
-
5
- 'action' => 'process_pdfs',
6
-
7
- 'inputs' => [
8
- 'http://tigger.uic.edu/~victor/personal/futurism.pdf',
9
- 'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
10
- 'http://www.dzignism.com/articles/Futurist.Manifesto.pdf'
11
- ],
12
-
13
- 'options' => {
14
-
15
- 'batch_size' => 7,
16
-
17
- 'images' => [{
18
- 'name' => '700',
19
- 'options' => '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
20
- 'extension' => 'gif'
21
- },{
22
- 'name' => '1000',
23
- 'options' => '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
24
- 'extension' => 'gif'
25
- }]
26
-
27
- }
28
-
29
- }.to_json}
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # This example demonstrates a fairly complicated PDF-processing action, designed
7
+ # to extract the PDF's text, and produce GIF versions of each page. The action
8
+ # (actions/process_pdfs.rb) shows an example of using all three steps,
9
+ # split, process, and merge.
10
+
11
+ RestClient.post('http://localhost:9173/jobs',
12
+ {:job => {
13
+
14
+ 'action' => 'process_pdfs',
15
+
16
+ 'inputs' => [
17
+ 'http://tigger.uic.edu/~victor/personal/futurism.pdf',
18
+ 'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
19
+ 'http://www.dzignism.com/articles/Futurist.Manifesto.pdf',
20
+ 'http://benfry.com/phd/dissertation-050312b-acrobat.pdf'
21
+ ],
22
+
23
+ 'options' => {
24
+
25
+ 'batch_size' => 7,
26
+
27
+ 'images' => [{
28
+ 'name' => '700',
29
+ 'options' => '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
30
+ 'extension' => 'gif'
31
+ },{
32
+ 'name' => '1000',
33
+ 'options' => '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
34
+ 'extension' => 'gif'
35
+ }]
36
+
37
+ }
38
+
39
+ }.to_json}
30
40
  )
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+
3
+ require 'restclient'
4
+ require 'json'
5
+
6
+ # Let's count all the words in Shakespeare.
7
+
8
+ RestClient.post('http://localhost:9173/jobs',
9
+ {:job => {
10
+
11
+ 'action' => 'word_count',
12
+
13
+ 'inputs' => [
14
+ 'http://www.gutenberg.org/dirs/etext97/1ws3010.txt', # All's Well That Ends Well
15
+ 'http://www.gutenberg.org/dirs/etext99/1ws3511.txt', # Anthony and Cleopatra
16
+ 'http://www.gutenberg.org/dirs/etext97/1ws2510.txt', # As You Like It
17
+ 'http://www.gutenberg.org/dirs/etext97/1ws0610.txt', # The Comedy of Errors
18
+ 'http://www.gutenberg.org/dirs/etext99/1ws3911.txt', # Cymbeline
19
+ 'http://www.gutenberg.org/dirs/etext00/0ws2610.txt', # Hamlet
20
+ 'http://www.gutenberg.org/dirs/etext00/0ws1910.txt', # Henry IV
21
+ 'http://www.gutenberg.org/dirs/etext99/1ws2411.txt', # Julius Caesar
22
+ 'http://www.gutenberg.org/dirs/etext98/2ws3310.txt', # King Lear
23
+ 'http://www.gutenberg.org/dirs/etext99/1ws1211j.txt', # Love's Labour's Lost
24
+ 'http://www.gutenberg.org/dirs/etext98/2ws3410.txt', # Macbeth
25
+ 'http://www.gutenberg.org/dirs/etext98/2ws1810.txt', # The Merchant of Venice
26
+ 'http://www.gutenberg.org/dirs/etext99/1ws1711.txt', # Midsummer Night's Dream
27
+ 'http://www.gutenberg.org/dirs/etext98/3ws2210.txt', # Much Ado About Nothing
28
+ 'http://www.gutenberg.org/dirs/etext00/0ws3210.txt', # Othello
29
+ 'http://www.gutenberg.org/dirs/etext98/2ws1610.txt', # Romeo and Juliet
30
+ 'http://www.gutenberg.org/dirs/etext98/2ws1010.txt', # The Taming of the Shrew
31
+ 'http://www.gutenberg.org/dirs/etext99/1ws4111.txt', # The Tempest
32
+ 'http://www.gutenberg.org/dirs/etext00/0ws0910.txt', # Titus Andronicus
33
+ 'http://www.gutenberg.org/dirs/etext99/1ws2911.txt', # Troilus and Cressida
34
+ 'http://www.gutenberg.org/dirs/etext98/3ws2810.txt', # Twelfth Night
35
+ 'http://www.gutenberg.org/files/1539/1539.txt' # The Winter's Tale
36
+ ]
37
+
38
+ }.to_json}
39
+ )
40
+
41
+ # With 23 Workers running, and over Wifi, it counted all the words in 5.5 secs.
data/lib/cloud-crowd.rb CHANGED
@@ -19,28 +19,33 @@ autoload :Digest, 'digest'
19
19
  autoload :ERB, 'erb'
20
20
  autoload :FileUtils, 'fileutils'
21
21
  autoload :JSON, 'json'
22
- autoload :RestClient, 'rest_client'
22
+ autoload :RestClient, 'restclient'
23
23
  autoload :RightAws, 'right_aws'
24
24
  autoload :Sinatra, 'sinatra'
25
25
  autoload :Socket, 'socket'
26
26
  autoload :YAML, 'yaml'
27
27
 
28
+ # Common code which should really be required in every circumstance.
29
+ require 'cloud_crowd/exceptions'
30
+
28
31
  module CloudCrowd
29
32
 
30
33
  # Autoload all the CloudCrowd classes which may not be required.
31
- autoload :App, 'cloud_crowd/app'
32
- autoload :Action, 'cloud_crowd/action'
33
- autoload :AssetStore, 'cloud_crowd/asset_store'
34
- autoload :Helpers, 'cloud_crowd/helpers'
35
- autoload :Inflector, 'cloud_crowd/inflector'
36
- autoload :Job, 'cloud_crowd/models'
37
- autoload :WorkUnit, 'cloud_crowd/models'
34
+ autoload :App, 'cloud_crowd/app'
35
+ autoload :Action, 'cloud_crowd/action'
36
+ autoload :AssetStore, 'cloud_crowd/asset_store'
37
+ autoload :Helpers, 'cloud_crowd/helpers'
38
+ autoload :Inflector, 'cloud_crowd/inflector'
39
+ autoload :Job, 'cloud_crowd/models'
40
+ autoload :Worker, 'cloud_crowd/worker'
41
+ autoload :WorkUnit, 'cloud_crowd/models'
42
+ autoload :WorkerRecord, 'cloud_crowd/models'
38
43
 
39
44
  # Root directory of the CloudCrowd gem.
40
45
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
41
46
 
42
47
  # Keep the version in sync with the gemspec.
43
- VERSION = '0.0.5'
48
+ VERSION = '0.0.6'
44
49
 
45
50
  # A Job is processing if its WorkUnits in the queue to be handled by workers.
46
51
  PROCESSING = 1
@@ -68,9 +73,7 @@ module CloudCrowd
68
73
  INCOMPLETE = [PROCESSING, SPLITTING, MERGING]
69
74
 
70
75
  # Mapping of statuses to their display strings.
71
- DISPLAY_STATUS_MAP = {
72
- 1 => 'processing', 2 => 'succeeded', 3 => 'failed', 4 => 'splitting', 5 => 'merging'
73
- }
76
+ DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
74
77
 
75
78
  class << self
76
79
  attr_reader :config
@@ -101,7 +104,7 @@ module CloudCrowd
101
104
  # Return the displayable status name of an internal CloudCrowd status number.
102
105
  # (See the above constants).
103
106
  def display_status(status)
104
- DISPLAY_STATUS_MAP[status]
107
+ DISPLAY_STATUS_MAP[status] || 'unknown'
105
108
  end
106
109
 
107
110
  # CloudCrowd::Actions are requested dynamically by name. Access them through
@@ -112,10 +115,10 @@ module CloudCrowd
112
115
  def actions
113
116
  return @actions if @actions
114
117
  @actions = {}
115
- default_actions = Dir["#{ROOT}/actions/*.rb"]
116
- custom_actions = Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] ||
117
- Dir["#{@config_path}/actions/*.rb"]
118
- (default_actions + custom_actions).each do |path|
118
+ default_actions = Dir["#{ROOT}/actions/*.rb"]
119
+ installed_actions = Dir["#{@config_path}/actions/*.rb"]
120
+ custom_actions = Dir["#{CloudCrowd.config[:actions_path]}/*.rb"]
121
+ (default_actions + installed_actions + custom_actions).each do |path|
119
122
  name = File.basename(path, File.extname(path))
120
123
  require path
121
124
  @actions[name] = Module.const_get(Inflector.camelize(name))
@@ -2,7 +2,7 @@ module CloudCrowd
2
2
 
3
3
  # As you write your custom actions, have them inherit from CloudCrowd::Action.
4
4
  # All actions must implement a +process+ method, which should return a
5
- # JSON-serializeable object that will be used as the output for the work unit.
5
+ # JSON-serializable object that will be used as the output for the work unit.
6
6
  # See the default actions for examples.
7
7
  #
8
8
  # Optionally, actions may define +split+ and +merge+ methods to do mapping
@@ -14,6 +14,8 @@ module CloudCrowd
14
14
  # and spend their duration inside of it, so relative paths work well.
15
15
  class Action
16
16
 
17
+ FILE_URL = /\Afile:\/\//
18
+
17
19
  attr_reader :input, :input_path, :file_name, :options, :work_directory
18
20
 
19
21
  # Initializing an Action sets up all of the read-only variables that
@@ -27,11 +29,7 @@ module CloudCrowd
27
29
  @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
28
30
  FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
29
31
  Dir.chdir @work_directory
30
- unless status == MERGING
31
- @input_path = File.join(@work_directory, safe_filename(@input))
32
- @file_name = File.basename(@input_path, File.extname(@input_path))
33
- download(@input, @input_path)
34
- end
32
+ status == MERGING ? parse_input : download_input
35
33
  end
36
34
 
37
35
  # Each Action subclass must implement a +process+ method, overriding this.
@@ -39,9 +37,14 @@ module CloudCrowd
39
37
  raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.")
40
38
  end
41
39
 
42
- # Download a file to the specified path with *curl*.
40
+ # Download a file to the specified path.
43
41
  def download(url, path)
44
- `curl -s "#{url}" > "#{path}"`
42
+ if url.match(FILE_URL)
43
+ FileUtils.cp(url.sub(FILE_URL, ''), path)
44
+ else
45
+ resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
46
+ FileUtils.mv resp.file.path, path
47
+ end
45
48
  path
46
49
  end
47
50
 
@@ -57,7 +60,7 @@ module CloudCrowd
57
60
  # to the root directory (where daemons run by default).
58
61
  def cleanup_work_directory
59
62
  Dir.chdir '/'
60
- FileUtils.rm_r(@work_directory)
63
+ FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
61
64
  end
62
65
 
63
66
 
@@ -80,6 +83,20 @@ module CloudCrowd
80
83
  @storage_prefix ||= File.join(path_parts)
81
84
  end
82
85
 
86
+ # If we know that the input is JSON, replace it with the parsed form.
87
+ def parse_input
88
+ @input = JSON.parse(@input)
89
+ end
90
+
91
+ # If the input is a URL, download the file before beginning processing.
92
+ def download_input
93
+ input_is_url = !!URI.parse(@input) rescue false
94
+ return unless input_is_url
95
+ @input_path = File.join(@work_directory, safe_filename(@input))
96
+ @file_name = File.basename(@input_path, File.extname(@input_path))
97
+ download(@input, @input_path)
98
+ end
99
+
83
100
  end
84
101
 
85
102
  end