RubyGems - documentcloud-cloud-crowd - Versions diffs - 0.0.5 → 0.0.6 - Mend

documentcloud-cloud-crowd 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

data/README +59 -50
data/actions/process_pdfs.rb +3 -3
data/actions/word_count.rb +14 -0
data/cloud-crowd.gemspec +27 -13
data/config/config.example.yml +8 -11
data/examples/graphics_magick_example.rb +40 -44
data/examples/process_pdfs_example.rb +39 -29
data/examples/word_count_example.rb +41 -0
data/lib/cloud-crowd.rb +20 -17
data/lib/cloud_crowd/action.rb +26 -9
data/lib/cloud_crowd/app.rb +26 -4
data/lib/cloud_crowd/asset_store.rb +69 -40
data/lib/cloud_crowd/command_line.rb +6 -4
data/lib/cloud_crowd/daemon.rb +65 -25
data/lib/cloud_crowd/exceptions.rb +5 -0
data/lib/cloud_crowd/helpers/resources.rb +2 -2
data/lib/cloud_crowd/models/job.rb +9 -13
data/lib/cloud_crowd/models/work_unit.rb +23 -15
data/lib/cloud_crowd/models/worker_record.rb +61 -0
data/lib/cloud_crowd/models.rb +7 -1
data/lib/cloud_crowd/schema.rb +12 -3
data/lib/cloud_crowd/worker.rb +48 -10
data/public/css/admin_console.css +174 -4
data/public/css/reset.css +17 -27
data/public/images/bullet_green.png +0 -0
data/public/images/bullet_white.png +0 -0
data/public/images/cloud_hand.png +0 -0
data/public/images/header_back.png +0 -0
data/public/images/logo.png +0 -0
data/public/images/server_error.png +0 -0
data/public/images/sidebar_bottom.png +0 -0
data/public/images/sidebar_top.png +0 -0
data/public/images/worker_info.png +0 -0
data/public/images/worker_info_loading.gif +0 -0
data/public/js/admin_console.js +127 -10
data/public/js/excanvas.pack.js +1 -0
data/public/js/jquery-1.3.2.min.js +19 -0
data/public/js/jquery.flot.pack.js +1 -0
data/test/acceptance/test_word_count.rb +49 -0
data/test/blueprints.rb +6 -5
data/test/config/config.yml +1 -4
data/test/test_helper.rb +1 -0
data/test/unit/test_job.rb +12 -4
data/test/unit/test_work_unit.rb +2 -2
data/views/index.erb +69 -14
metadata +23 -6
data/public/js/jquery-1.3.2.js +0 -4376

data/README CHANGED Viewed

@@ -22,54 +22,63 @@
-	~ CloudCrowd ~
+  ~ CloudCrowd ~
-		* A batch-processing system, map-reduce style
-		* Write your scripts in Ruby
-		* Built for Amazon EC2 and S3
-		* split -> process -> merge
-		* As easy as `gem install cloud-crowd`
-	~ Getting started ~
-		# Install the gem (documentcloud-cloud-crowd until the first official release).
-		>> sudo gem install cloud-crowd
-		# Install the CloudCrowd configuration files to a location of your choosing.
-		>> crowd install ~/config/cloud-crowd
-		# Now, you can use the full complement of `crowd` commands from inside of
-		# this configuration directory. To see the available commands:
-		>> crowd --help
-		# Edit the configuration files to your satisfaction, and add AWS credentials.
-		>> mate ~/config/cloud-crowd/config.yml
-		>> mate ~/config/cloud-crowd/database.yml
-		# Write your actions, and install them into the 'actions' subdirectory.
-		# CloudCrowd comes with some default actions as an example.
-		# To spin up the central server (make sure that you include its location
-		# in config.yml), either:
-		>> crowd server
-		# or:
-		>> thin -R config.ru --servers 3 -e production start
-		# Any server that supports Rack should work with the rackup file.
-		# Then, to spin up 10 workers:
-		>> crowd workers start -n 10
-		# To spin up workers remotely, install the 'cloud-crowd' gem, and copy over
-		# your configuration directory.
+    * Parallel processing for the rest of us
+    * Write your scripts in Ruby
+    * Built for Amazon EC2 and S3
+    * split -> process -> merge
+    * As easy as `gem install cloud-crowd`
+  ~ Wiki ~
+    http://wiki.github.com/documentcloud/cloud-crowd
+  ~ Getting started ~
+    # Install the gem.
+      >> sudo gem install cloud-crowd
+    # Install the CloudCrowd configuration files to a location of your choosing.
+      >> crowd install ~/config/cloud-crowd
+    # Now, you can use the full complement of `crowd` commands from inside of
+    # this configuration directory. To see the available commands:
+      >> crowd --help
+    # Edit the configuration files to your satisfaction, add AWS credentials,
+    # and then load the CloudCrowd schema into your configured database.
+      >> mate ~/config/cloud-crowd/config.yml
+      >> mate ~/config/cloud-crowd/database.yml
+      >> crowd load_schema
+    # Write your actions, and install them into the 'actions' subdirectory.
+    # CloudCrowd comes with some default actions as an example.
+    # To launch the central server (make sure that you include its location
+    # in config.yml), either:
+      >> crowd server
+    # or:
+      >> thin -R config.ru --servers 3 -e production start
+    # Any server that supports Rack should work with the rackup file.
+    # Then, to spin up 10 workers:
+      >> crowd workers start -n 10
+    # To spin up workers remotely, install the 'cloud-crowd' gem, and copy over
+    # your configuration directory.
+    # At this point you can visit your server console at localhost:9173 to
+    # view all of your workers, ready for action.

data/actions/process_pdfs.rb CHANGED Viewed

@@ -6,8 +6,8 @@
 # See <tt>examples/process_pdfs_example.rb</tt> for more information.
 class ProcessPdfs < CloudCrowd::Action
-  # Split up a large pdf into single-page pdfs.
-  # The double pdftk shuffle fixes the document xrefs.
+  # Split up a large pdf into single-page pdfs. Batch them into 'batch_size'
+  # chunks for processing. The double pdftk shuffle fixes the document xrefs.
   def split
     `pdftk #{input_path} burst output "#{file_name}_%05d.pdf_temp"`
     FileUtils.rm input_path
@@ -41,7 +41,7 @@ class ProcessPdfs < CloudCrowd::Action
   # the concatenated merge of the full-text into a single tar archive, ready to
   # for download.
   def merge
-    JSON.parse(input).each do |batch_url|
+    input.each do |batch_url|
       batch_path = File.basename(batch_url)
       download(batch_url, batch_path)
       `tar -xzf #{batch_path}`

data/actions/word_count.rb ADDED Viewed

@@ -0,0 +1,14 @@
+# A parallel WordCount. Depends on the 'wc' utility.
+class WordCount < CloudCrowd::Action
+  # Count the words in a single book.
+  def process
+    (`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
+  end
+  # Sum the total word count.
+  def merge
+    input.inject(0) {|sum, count| sum + count }
+  end
+end

data/cloud-crowd.gemspec CHANGED Viewed

@@ -1,10 +1,10 @@
 Gem::Specification.new do |s|
   s.name      = 'cloud-crowd'
-  s.version   = '0.0.5'         # Keep version in sync with cloud-cloud.rb
+  s.version   = '0.0.6'         # Keep version in sync with cloud-cloud.rb
   s.date      = '2009-09-01'
-  s.homepage    = "http://documentcloud.org" # wiki page on github?
-  s.summary     = "Better living through Map --> Ruby --> Reduce"
+  s.homepage    = "http://wiki.github.com/documentcloud/cloud-crowd"
+  s.summary     = "Parallel Processing for the Rest of Us"
   s.description = <<-EOS
     The crowd, suddenly there where there was nothing before, is a mysterious and
     universal phenomenon. A few people may have been standing together -- five, ten
@@ -13,18 +13,16 @@ Gem::Specification.new do |s|
     streets had only one direction.
   EOS
-  s.authors     = ['Jeremy Ashkenas']
-  s.email       = 'jeremy@documentcloud.org'
-  s.rubyforge_project    = 'cloud-crowd'
-  s.require_paths = ['lib']
-  s.executables   = ['crowd']
-  # s.post_install_message = "Run `crowd --help` for information on using CloudCrowd."
+  s.authors           = ['Jeremy Ashkenas']
+  s.email             = 'jeremy@documentcloud.org'
+  s.rubyforge_project = 'cloud-crowd'
+  s.require_paths     = ['lib']
+  s.executables       = ['crowd']
   s.has_rdoc          = true
   s.extra_rdoc_files  = ['README']
-  s.rdoc_options      << '--title'    << 'CloudCrowd | Better Living through Map --> Ruby --> Reduce' <<
+  s.rdoc_options      << '--title'    << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
                          '--exclude'  << 'test' <<
                          '--main'     << 'README' <<
                          '--all'
@@ -47,6 +45,7 @@ Gem::Specification.new do |s|
   s.files = %w(
 actions/graphics_magick.rb
 actions/process_pdfs.rb
+actions/word_count.rb
 cloud-crowd.gemspec
 config/config.example.ru
 config/config.example.yml
@@ -54,6 +53,7 @@ config/database.example.yml
 EPIGRAPHS
 examples/graphics_magick_example.rb
 examples/process_pdfs_example.rb
+examples/word_count_example.rb
 lib/cloud-crowd.rb
 lib/cloud_crowd/action.rb
 lib/cloud_crowd/app.rb
@@ -67,6 +67,7 @@ lib/cloud_crowd/helpers.rb
 lib/cloud_crowd/inflector.rb
 lib/cloud_crowd/models/job.rb
 lib/cloud_crowd/models/work_unit.rb
+lib/cloud_crowd/models/worker_record.rb
 lib/cloud_crowd/models.rb
 lib/cloud_crowd/runner.rb
 lib/cloud_crowd/schema.rb
@@ -74,11 +75,24 @@ lib/cloud_crowd/worker.rb
 LICENSE
 public/css/admin_console.css
 public/css/reset.css
+public/images/bullet_green.png
+public/images/bullet_white.png
+public/images/cloud_hand.png
+public/images/header_back.png
+public/images/logo.png
 public/images/queue_fill.png
+public/images/server_error.png
+public/images/sidebar_bottom.png
+public/images/sidebar_top.png
+public/images/worker_info.png
+public/images/worker_info_loading.gif
 public/js/admin_console.js
-public/js/jquery-1.3.2.js
+public/js/excanvas.pack.js
+public/js/jquery.flot.pack.js
+public/js/jquery-1.3.2.min.js
 README
 test/acceptance/test_failing_work_units.rb
+test/acceptance/test_word_count.rb
 test/blueprints.rb
 test/config/config.ru
 test/config/config.yml

data/config/config.example.yml CHANGED Viewed

@@ -1,6 +1,11 @@
-# The URL where you're planning on running the server/queue/database.
+# The URL where you're planning on running the central server/queue/database.
 :central_server:          http://localhost:9173
+# The storage back-end that you'd like to use for intermediate and final results
+# of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
+# be used in development, or on single-machine installations.
+:storage:                 s3
 # Please provide your AWS credentials for S3 storage of job output.
 :aws_access_key:          [your AWS access key]
 :aws_secret_key:          [your AWS secret access key]
@@ -20,8 +25,8 @@
 :password:                [your password]
 # By default, CloudCrowd looks for installed actions inside the 'actions'
-# subdirectory of this configuration folder. 'actions_path' allows you to install
-# them in a different location.
+# subdirectory of this configuration folder. 'actions_path' allows you to load
+# additional actions from a location of your choice.
 # :actions_path: /path/to/actions
 # Set the following numbers to tweak the configuration of your worker daemons.
@@ -38,14 +43,6 @@
 # The maximum number of seconds a worker waits between checking the job queue.
 :max_worker_wait:         20
-# The backoff multiplier the worker uses to slow down the check interval when
-# there's no work in the queue.
-:worker_wait_multiplier:  1.3
-# The number of seconds a worker waits to retry when there's some kind of
-# internal error (ie. the central server fails to respond)
-:worker_retry_wait:       5
 # The number of separate attempts that will be made to process an individual
 # work unit, before marking it as having failed.
 :work_unit_retries:       3

data/examples/graphics_magick_example.rb CHANGED Viewed

@@ -1,48 +1,44 @@
-# Inside of a restclient session:
-# This is a fancy example that produces black and white, annotated, and blurred
-# versions of a list of URLs downloaded from the web.
+#!/usr/bin/env ruby -rubygems
+require 'restclient'
 require 'json'
-RestClient.post(
-	'http://localhost:9173/jobs',
-	{:job => {
-		'action' => 'graphics_magick',
-		'inputs' => [
-			'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
-			'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
-			'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
-			'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
-			'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
-		],
-		'options' => {
-			'steps' => [{
-				'name' 			=> 'annotated',
-				'command' 	=> 'convert',
-				'options'		=> '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
-				'extension' => 'jpg'
-			},{
-				'name'			=> 'blurred',
-				'command' 	=> 'convert',
-				'options'		=> '-blur 10x5',
-				'extension' => 'png'
-			},{
-				'name' 			=> 'bw',
-				'input'			=> 'blurred',
-				'command' 	=> 'convert',
-				'options' 	=> '-monochrome',
-				'extension' => 'jpg'
-			}]
-		}
-	}.to_json}
-)
+# This example demonstrates the GraphicsMagick action by taking in a list of
+# five images, and producing annotated, blurred, and black and white versions
+# of each image. See actions/graphics_magick.rb
-# status = RestClient.get('http://localhost:9173/jobs/[job_id]')
-# puts JSON.parse(RestClient.get('http://localhost:9173/jobs/[job_id]'))['outputs'].values.map {|v|
-#		JSON.parse(v).map {|v| v['url']}
-#	}.flatten.join("\n")
+RestClient.post('http://localhost:9173/jobs',
+  {:job => {
+    'action' => 'graphics_magick',
+    'inputs' => [
+      'http://www.sci-fi-o-rama.com/wp-content/uploads/2008/10/dan_mcpharlin_the_land_of_sleeping_things.jpg',
+      'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread01.jpg',
+      'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread03.jpg',
+      'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/07/dan_mcpharlin_wired_spread02.jpg',
+      'http://www.sci-fi-o-rama.com/wp-content/uploads/2009/02/dan_mcpharlin_untitled.jpg'
+    ],
+    'options' => {
+      'steps' => [{
+        'name'      => 'annotated',
+        'command'   => 'convert',
+        'options'   => '-font helvetica -fill red -draw "font-size 35; text 75,75 CloudCrowd!"',
+        'extension' => 'jpg'
+      },{
+        'name'      => 'blurred',
+        'command'   => 'convert',
+        'options'   => '-blur 10x5',
+        'extension' => 'png'
+      },{
+        'name'      => 'bw',
+        'input'     => 'blurred',
+        'command'   => 'convert',
+        'options'   => '-monochrome',
+        'extension' => 'jpg'
+      }]
+    }
+  }.to_json}
+)

data/examples/process_pdfs_example.rb CHANGED Viewed

@@ -1,30 +1,40 @@
-RestClient.post(
-	'http://localhost:9173/jobs',
-	{:job => {
-		'action' => 'process_pdfs',
-		'inputs' => [
-		  'http://tigger.uic.edu/~victor/personal/futurism.pdf',
-		  'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
-		  'http://www.dzignism.com/articles/Futurist.Manifesto.pdf'
-		],
-		'options' => {
-		  'batch_size' => 7,
-		  'images' => [{
-				'name' 			=> '700',
-				'options'		=> '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
-				'extension' => 'gif'
-			},{
-				'name' 			=> '1000',
-				'options'		=> '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
-				'extension' => 'gif'
-			}]
-		}
-	}.to_json}
+#!/usr/bin/env ruby -rubygems
+require 'restclient'
+require 'json'
+# This example demonstrates a fairly complicated PDF-processing action, designed
+# to extract the PDF's text, and produce GIF versions of each page. The action
+# (actions/process_pdfs.rb) shows an example of using all three steps,
+# split, process, and merge.
+RestClient.post('http://localhost:9173/jobs',
+  {:job => {
+    'action' => 'process_pdfs',
+    'inputs' => [
+      'http://tigger.uic.edu/~victor/personal/futurism.pdf',
+      'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
+      'http://www.dzignism.com/articles/Futurist.Manifesto.pdf',
+      'http://benfry.com/phd/dissertation-050312b-acrobat.pdf'
+    ],
+    'options' => {
+      'batch_size' => 7,
+      'images' => [{
+        'name'      => '700',
+        'options'   => '-resize 700x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
+        'extension' => 'gif'
+      },{
+        'name'      => '1000',
+        'options'   => '-resize 1000x -density 220 -depth 4 -unsharp 0.5x0.5+0.5+0.03',
+        'extension' => 'gif'
+      }]
+    }
+  }.to_json}
 )

data/examples/word_count_example.rb ADDED Viewed

@@ -0,0 +1,41 @@
+#!/usr/bin/env ruby -rubygems
+require 'restclient'
+require 'json'
+# Let's count all the words in Shakespeare.
+RestClient.post('http://localhost:9173/jobs',
+  {:job => {
+    'action' => 'word_count',
+    'inputs' => [
+      'http://www.gutenberg.org/dirs/etext97/1ws3010.txt',  # All's Well That Ends Well
+      'http://www.gutenberg.org/dirs/etext99/1ws3511.txt',  # Anthony and Cleopatra
+      'http://www.gutenberg.org/dirs/etext97/1ws2510.txt',  # As You Like It
+      'http://www.gutenberg.org/dirs/etext97/1ws0610.txt',  # The Comedy of Errors
+      'http://www.gutenberg.org/dirs/etext99/1ws3911.txt',  # Cymbeline
+      'http://www.gutenberg.org/dirs/etext00/0ws2610.txt',  # Hamlet
+      'http://www.gutenberg.org/dirs/etext00/0ws1910.txt',  # Henry IV
+      'http://www.gutenberg.org/dirs/etext99/1ws2411.txt',  # Julius Caesar
+      'http://www.gutenberg.org/dirs/etext98/2ws3310.txt',  # King Lear
+      'http://www.gutenberg.org/dirs/etext99/1ws1211j.txt', # Love's Labour's Lost
+      'http://www.gutenberg.org/dirs/etext98/2ws3410.txt',  # Macbeth
+      'http://www.gutenberg.org/dirs/etext98/2ws1810.txt',  # The Merchant of Venice
+      'http://www.gutenberg.org/dirs/etext99/1ws1711.txt',  # Midsummer Night's Dream
+      'http://www.gutenberg.org/dirs/etext98/3ws2210.txt',  # Much Ado About Nothing
+      'http://www.gutenberg.org/dirs/etext00/0ws3210.txt',  # Othello
+      'http://www.gutenberg.org/dirs/etext98/2ws1610.txt',  # Romeo and Juliet
+      'http://www.gutenberg.org/dirs/etext98/2ws1010.txt',  # The Taming of the Shrew
+      'http://www.gutenberg.org/dirs/etext99/1ws4111.txt',  # The Tempest
+      'http://www.gutenberg.org/dirs/etext00/0ws0910.txt',  # Titus Andronicus
+      'http://www.gutenberg.org/dirs/etext99/1ws2911.txt',  # Troilus and Cressida
+      'http://www.gutenberg.org/dirs/etext98/3ws2810.txt',  # Twelfth Night
+      'http://www.gutenberg.org/files/1539/1539.txt'        # The Winter's Tale
+    ]
+  }.to_json}
+)
+# With 23 Workers running, and over Wifi, it counted all the words in 5.5 secs.

data/lib/cloud-crowd.rb CHANGED Viewed

@@ -19,28 +19,33 @@ autoload :Digest,       'digest'
 autoload :ERB,          'erb'
 autoload :FileUtils,    'fileutils'
 autoload :JSON,         'json'
-autoload :RestClient,   'rest_client'
+autoload :RestClient,   'restclient'
 autoload :RightAws,     'right_aws'
 autoload :Sinatra,      'sinatra'
 autoload :Socket,       'socket'
 autoload :YAML,         'yaml'
+# Common code which should really be required in every circumstance.
+require 'cloud_crowd/exceptions'
 module CloudCrowd
   # Autoload all the CloudCrowd classes which may not be required.
-  autoload :App,        'cloud_crowd/app'
-  autoload :Action,     'cloud_crowd/action'
-  autoload :AssetStore, 'cloud_crowd/asset_store'
-  autoload :Helpers,    'cloud_crowd/helpers'
-  autoload :Inflector,  'cloud_crowd/inflector'
-  autoload :Job,        'cloud_crowd/models'
-  autoload :WorkUnit,   'cloud_crowd/models'
+  autoload :App,          'cloud_crowd/app'
+  autoload :Action,       'cloud_crowd/action'
+  autoload :AssetStore,   'cloud_crowd/asset_store'
+  autoload :Helpers,      'cloud_crowd/helpers'
+  autoload :Inflector,    'cloud_crowd/inflector'
+  autoload :Job,          'cloud_crowd/models'
+  autoload :Worker,       'cloud_crowd/worker'
+  autoload :WorkUnit,     'cloud_crowd/models'
+  autoload :WorkerRecord, 'cloud_crowd/models'
   # Root directory of the CloudCrowd gem.
   ROOT        = File.expand_path(File.dirname(__FILE__) + '/..')
   # Keep the version in sync with the gemspec.
-  VERSION     = '0.0.5'
+  VERSION     = '0.0.6'
   # A Job is processing if its WorkUnits in the queue to be handled by workers.
   PROCESSING  = 1
@@ -68,9 +73,7 @@ module CloudCrowd
   INCOMPLETE  = [PROCESSING, SPLITTING, MERGING]
   # Mapping of statuses to their display strings.
-  DISPLAY_STATUS_MAP = {
-    1 => 'processing', 2 => 'succeeded', 3 => 'failed', 4 => 'splitting', 5 => 'merging'
-  }
+  DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
   class << self
     attr_reader :config
@@ -101,7 +104,7 @@ module CloudCrowd
     # Return the displayable status name of an internal CloudCrowd status number.
     # (See the above constants).
     def display_status(status)
-      DISPLAY_STATUS_MAP[status]
+      DISPLAY_STATUS_MAP[status] || 'unknown'
     end
     # CloudCrowd::Actions are requested dynamically by name. Access them through
@@ -112,10 +115,10 @@ module CloudCrowd
     def actions
       return @actions if @actions
       @actions = {}
-      default_actions = Dir["#{ROOT}/actions/*.rb"]
-      custom_actions  = Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] ||
-                        Dir["#{@config_path}/actions/*.rb"]
-      (default_actions + custom_actions).each do |path|
+      default_actions   = Dir["#{ROOT}/actions/*.rb"]
+      installed_actions = Dir["#{@config_path}/actions/*.rb"]
+      custom_actions    = Dir["#{CloudCrowd.config[:actions_path]}/*.rb"]
+      (default_actions + installed_actions + custom_actions).each do |path|
         name = File.basename(path, File.extname(path))
         require path
         @actions[name] = Module.const_get(Inflector.camelize(name))

data/lib/cloud_crowd/action.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module CloudCrowd
   # As you write your custom actions, have them inherit from CloudCrowd::Action.
   # All actions must implement a +process+ method, which should return a
-  # JSON-serializeable object that will be used as the output for the work unit.
+  # JSON-serializable object that will be used as the output for the work unit.
   # See the default actions for examples.
   #
   # Optionally, actions may define +split+ and +merge+ methods to do mapping
@@ -14,6 +14,8 @@ module CloudCrowd
   # and spend their duration inside of it, so relative paths work well.
   class Action
+    FILE_URL = /\Afile:\/\//
     attr_reader :input, :input_path, :file_name, :options, :work_directory
     # Initializing an Action sets up all of the read-only variables that
@@ -27,11 +29,7 @@ module CloudCrowd
       @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
       FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
       Dir.chdir @work_directory
-      unless status == MERGING
-        @input_path = File.join(@work_directory, safe_filename(@input))
-        @file_name = File.basename(@input_path, File.extname(@input_path))
-        download(@input, @input_path)
-      end
+      status == MERGING ? parse_input : download_input
     end
     # Each Action subclass must implement a +process+ method, overriding this.
@@ -39,9 +37,14 @@ module CloudCrowd
       raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.")
     end
-    # Download a file to the specified path with *curl*.
+    # Download a file to the specified path.
     def download(url, path)
-      `curl -s "#{url}" > "#{path}"`
+      if url.match(FILE_URL)
+        FileUtils.cp(url.sub(FILE_URL, ''), path)
+      else
+        resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
+        FileUtils.mv resp.file.path, path
+      end
       path
     end
@@ -57,7 +60,7 @@ module CloudCrowd
     # to the root directory (where daemons run by default).
     def cleanup_work_directory
       Dir.chdir '/'
-      FileUtils.rm_r(@work_directory)
+      FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
     end
@@ -80,6 +83,20 @@ module CloudCrowd
       @storage_prefix ||= File.join(path_parts)
     end
+    # If we know that the input is JSON, replace it with the parsed form.
+    def parse_input
+      @input = JSON.parse(@input)
+    end
+    # If the input is a URL, download the file before beginning processing.
+    def download_input
+      input_is_url = !!URI.parse(@input) rescue false
+      return unless input_is_url
+      @input_path = File.join(@work_directory, safe_filename(@input))
+      @file_name = File.basename(@input_path, File.extname(@input_path))
+      download(@input, @input_path)
+    end
   end
 end