documentcloud-cloud-crowd 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/README +8 -8
  2. data/cloud-crowd.gemspec +8 -8
  3. data/config/config.example.ru +8 -2
  4. data/config/config.example.yml +6 -15
  5. data/examples/process_pdfs_example.rb +1 -1
  6. data/examples/word_count_example.rb +1 -0
  7. data/lib/cloud-crowd.rb +6 -5
  8. data/lib/cloud_crowd/action.rb +11 -7
  9. data/lib/cloud_crowd/asset_store/filesystem_store.rb +5 -0
  10. data/lib/cloud_crowd/asset_store/s3_store.rb +7 -3
  11. data/lib/cloud_crowd/asset_store.rb +1 -1
  12. data/lib/cloud_crowd/command_line.rb +14 -53
  13. data/lib/cloud_crowd/exceptions.rb +4 -0
  14. data/lib/cloud_crowd/helpers/authorization.rb +2 -2
  15. data/lib/cloud_crowd/helpers/resources.rb +0 -20
  16. data/lib/cloud_crowd/models/job.rb +25 -26
  17. data/lib/cloud_crowd/models/node_record.rb +81 -0
  18. data/lib/cloud_crowd/models/work_unit.rb +70 -30
  19. data/lib/cloud_crowd/models.rb +1 -1
  20. data/lib/cloud_crowd/node.rb +87 -0
  21. data/lib/cloud_crowd/schema.rb +19 -16
  22. data/lib/cloud_crowd/{app.rb → server.rb} +25 -30
  23. data/lib/cloud_crowd/worker.rb +50 -74
  24. data/public/css/admin_console.css +26 -14
  25. data/public/images/server.png +0 -0
  26. data/public/js/admin_console.js +45 -18
  27. data/test/acceptance/test_failing_work_units.rb +1 -1
  28. data/test/acceptance/{test_app.rb → test_server.rb} +15 -15
  29. data/test/acceptance/test_word_count.rb +3 -9
  30. data/test/blueprints.rb +0 -1
  31. data/test/config/config.ru +1 -1
  32. data/test/config/config.yml +1 -3
  33. data/test/unit/test_configuration.rb +1 -1
  34. data/test/unit/test_job.rb +1 -0
  35. data/test/unit/test_work_unit.rb +2 -4
  36. data/views/index.erb +13 -8
  37. metadata +9 -9
  38. data/lib/cloud_crowd/daemon.rb +0 -95
  39. data/lib/cloud_crowd/models/worker_record.rb +0 -61
  40. data/lib/cloud_crowd/runner.rb +0 -15
data/README CHANGED
@@ -30,19 +30,19 @@
30
30
  * split -> process -> merge
31
31
  * As easy as `gem install cloud-crowd`
32
32
 
33
- Well-suited for:
34
-
35
- * Generating or resizing images.
36
- * Encoding video.
37
- * Running text extraction or OCR on PDFs.
38
- * Migrating a large file set or database.
39
- * Web scraping.
33
+ Well-suited for:
34
+
35
+ * Generating or resizing images.
36
+ * Encoding video.
37
+ * Running text extraction or OCR on PDFs.
38
+ * Migrating a large file set or database.
39
+ * Web scraping.
40
40
 
41
41
 
42
42
  ~ Documentation ~
43
43
 
44
44
  Wiki: http://wiki.github.com/documentcloud/cloud-crowd
45
- Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
45
+ Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
46
46
 
47
47
 
48
48
  ~ Getting started ~
data/cloud-crowd.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.1.0' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-09-01'
3
+ s.version = '0.1.1' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-09-15'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  s.add_dependency 'json', ['>= 1.1.7']
33
33
  s.add_dependency 'rest-client', ['>= 1.0.3']
34
34
  s.add_dependency 'right_aws', ['>= 1.10.0']
35
- s.add_dependency 'daemons', ['>= 1.0.10']
35
+ s.add_dependency 'thin', ['>= 1.2.4']
36
36
 
37
37
  if s.respond_to?(:add_development_dependency)
38
38
  s.add_development_dependency 'faker', ['>= 0.3.1']
@@ -56,23 +56,22 @@ examples/process_pdfs_example.rb
56
56
  examples/word_count_example.rb
57
57
  lib/cloud-crowd.rb
58
58
  lib/cloud_crowd/action.rb
59
- lib/cloud_crowd/app.rb
60
59
  lib/cloud_crowd/asset_store/filesystem_store.rb
61
60
  lib/cloud_crowd/asset_store/s3_store.rb
62
61
  lib/cloud_crowd/asset_store.rb
63
62
  lib/cloud_crowd/command_line.rb
64
- lib/cloud_crowd/daemon.rb
65
63
  lib/cloud_crowd/exceptions.rb
66
64
  lib/cloud_crowd/helpers/authorization.rb
67
65
  lib/cloud_crowd/helpers/resources.rb
68
66
  lib/cloud_crowd/helpers.rb
69
67
  lib/cloud_crowd/inflector.rb
70
68
  lib/cloud_crowd/models/job.rb
69
+ lib/cloud_crowd/models/node_record.rb
71
70
  lib/cloud_crowd/models/work_unit.rb
72
- lib/cloud_crowd/models/worker_record.rb
73
71
  lib/cloud_crowd/models.rb
74
- lib/cloud_crowd/runner.rb
72
+ lib/cloud_crowd/node.rb
75
73
  lib/cloud_crowd/schema.rb
74
+ lib/cloud_crowd/server.rb
76
75
  lib/cloud_crowd/worker.rb
77
76
  LICENSE
78
77
  public/css/admin_console.css
@@ -83,6 +82,7 @@ public/images/cloud_hand.png
83
82
  public/images/header_back.png
84
83
  public/images/logo.png
85
84
  public/images/queue_fill.png
85
+ public/images/server.png
86
86
  public/images/server_error.png
87
87
  public/images/sidebar_bottom.png
88
88
  public/images/sidebar_top.png
@@ -93,7 +93,7 @@ public/js/excanvas.js
93
93
  public/js/flot.js
94
94
  public/js/jquery.js
95
95
  README
96
- test/acceptance/test_app.rb
96
+ test/acceptance/test_server.rb
97
97
  test/acceptance/test_failing_work_units.rb
98
98
  test/acceptance/test_word_count.rb
99
99
  test/blueprints.rb
@@ -4,7 +4,13 @@
4
4
  # using any Rack-compliant server handler. For example, start up three servers
5
5
  # with a specified port number, using Thin:
6
6
  #
7
- # thin start -R config.ru -p 9173 --servers 3
7
+ # thin start -R config.ru --servers 3
8
+ #
9
+ # Or a single server with Unicorn:
10
+ #
11
+ # unicorn config.ru
12
+ #
13
+
8
14
 
9
15
  require 'rubygems'
10
16
  require 'cloud-crowd'
@@ -13,5 +19,5 @@ CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
13
19
  CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
14
20
 
15
21
  map '/' do
16
- run CloudCrowd::App
22
+ run CloudCrowd::Server
17
23
  end
@@ -1,6 +1,11 @@
1
1
  # The URL where you're planning on running the central server/queue/database.
2
2
  :central_server: http://localhost:9173
3
3
 
4
+ # Set the maximum number of workers allowed per-node. Workers only run while
5
+ # there's work to be done. It's best to set 'max_workers' below the point where
6
+ # you'd start to swap or peg your CPU (as determined by experiment).
7
+ :max_workers: 5
8
+
4
9
  # The storage back-end that you'd like to use for intermediate and final results
5
10
  # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
6
11
  # be used in development, or on single-machine installations.
@@ -29,20 +34,6 @@
29
34
  # additional actions from a location of your choice.
30
35
  # :actions_path: /path/to/actions
31
36
 
32
- # Set the following numbers to tweak the configuration of your worker daemons.
33
- # Optimum results will depend on proportion of the Memory/CPU/IO bottlenecks
34
- # in your actions, the number of central servers you have running, and your
35
- # desired balance between latency and traffic.
36
-
37
- # The number of workers that `crowd workers start` spins up.
38
- :num_workers: 3
39
-
40
- # The minimum number of seconds a worker waits between checking the job queue.
41
- :min_worker_wait: 1
42
-
43
- # The maximum number of seconds a worker waits between checking the job queue.
44
- :max_worker_wait: 5
45
-
46
37
  # The number of separate attempts that will be made to process an individual
47
38
  # work unit, before marking it as having failed.
48
- :work_unit_retries: 3
39
+ :work_unit_retries: 3
@@ -17,7 +17,7 @@ RestClient.post('http://localhost:9173/jobs',
17
17
  'http://tigger.uic.edu/~victor/personal/futurism.pdf',
18
18
  'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
19
19
  'http://www.dzignism.com/articles/Futurist.Manifesto.pdf',
20
- 'http://benfry.com/phd/dissertation-050312b-acrobat.pdf'
20
+ 'http://www.pitt.edu/~slavic/sisc/SISC4/dadswell.pdf'
21
21
  ],
22
22
 
23
23
  'options' => {
@@ -39,3 +39,4 @@ RestClient.post('http://localhost:9173/jobs',
39
39
  )
40
40
 
41
41
  # With 23 Workers running, and over Wifi, it counted all the words in 5.5 secs.
42
+ # On a fast internet connection, you may not even see this job show up.
data/lib/cloud-crowd.rb CHANGED
@@ -5,16 +5,15 @@ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
5
5
  # Common Gems:
6
6
  require 'rubygems'
7
7
  gem 'activerecord'
8
- gem 'daemons'
9
8
  gem 'json'
10
9
  gem 'rest-client'
11
10
  gem 'right_aws'
12
11
  gem 'sinatra'
12
+ gem 'thin'
13
13
 
14
14
  # Autoloading for all the pieces which may or may not be needed:
15
15
  autoload :ActiveRecord, 'activerecord'
16
16
  autoload :Benchmark, 'benchmark'
17
- autoload :Daemons, 'daemons'
18
17
  autoload :Digest, 'digest'
19
18
  autoload :ERB, 'erb'
20
19
  autoload :FileUtils, 'fileutils'
@@ -23,6 +22,7 @@ autoload :RestClient, 'restclient'
23
22
  autoload :RightAws, 'right_aws'
24
23
  autoload :Sinatra, 'sinatra'
25
24
  autoload :Socket, 'socket'
25
+ autoload :Thin, 'thin'
26
26
  autoload :YAML, 'yaml'
27
27
 
28
28
  # Common code which should really be required in every circumstance.
@@ -31,21 +31,22 @@ require 'cloud_crowd/exceptions'
31
31
  module CloudCrowd
32
32
 
33
33
  # Autoload all the CloudCrowd classes which may not be required.
34
- autoload :App, 'cloud_crowd/app'
35
34
  autoload :Action, 'cloud_crowd/action'
36
35
  autoload :AssetStore, 'cloud_crowd/asset_store'
37
36
  autoload :Helpers, 'cloud_crowd/helpers'
38
37
  autoload :Inflector, 'cloud_crowd/inflector'
39
38
  autoload :Job, 'cloud_crowd/models'
39
+ autoload :Node, 'cloud_crowd/node'
40
+ autoload :NodeRecord, 'cloud_crowd/models'
41
+ autoload :Server, 'cloud_crowd/server'
40
42
  autoload :Worker, 'cloud_crowd/worker'
41
43
  autoload :WorkUnit, 'cloud_crowd/models'
42
- autoload :WorkerRecord, 'cloud_crowd/models'
43
44
 
44
45
  # Root directory of the CloudCrowd gem.
45
46
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
46
47
 
47
48
  # Keep the version in sync with the gemspec.
48
- VERSION = '0.1.0'
49
+ VERSION = '0.1.1'
49
50
 
50
51
  # A Job is processing if its WorkUnits in the queue to be handled by workers.
51
52
  PROCESSING = 1
@@ -38,12 +38,16 @@ module CloudCrowd
38
38
 
39
39
  # Download a file to the specified path.
40
40
  def download(url, path)
41
- if url.match(FILE_URL)
42
- FileUtils.cp(url.sub(FILE_URL, ''), path)
43
- else
44
- resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
45
- FileUtils.mv resp.file.path, path
46
- end
41
+ URI.parse(url) # Sanity check.
42
+ `curl -s "#{url}" > "#{path}"`
43
+ # if url.match(FILE_URL)
44
+ # FileUtils.cp(url.sub(FILE_URL, ''), path)
45
+ # else
46
+ # # An alternative would be shelling out: `curl -s "#{url}" > "#{path}"`
47
+ # puts url
48
+ # resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
49
+ # FileUtils.mv resp.file.path, path
50
+ # end
47
51
  path
48
52
  end
49
53
 
@@ -55,7 +59,7 @@ module CloudCrowd
55
59
  end
56
60
 
57
61
  # After the Action has finished, we remove the work directory and return
58
- # to the root directory (where daemons run by default).
62
+ # to the root directory (where workers run by default).
59
63
  def cleanup_work_directory
60
64
  FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
61
65
  end
@@ -6,6 +6,11 @@ module CloudCrowd
6
6
  # installation.
7
7
  module FilesystemStore
8
8
 
9
+ # Make sure that local storage is writeable before starting.
10
+ def setup
11
+ raise Error::StorageNotWritable, "#{LOCAL_STORAGE_PATH} is not writable" unless File.writable?(LOCAL_STORAGE_PATH)
12
+ end
13
+
9
14
  # Save a file to somewhere semi-persistent on the filesystem. Can be used
10
15
  # in development, when offline, or if you happen to have a single-machine
11
16
  # CloudCrowd installation. To use, configure <tt>:storage => 'filesystem'</tt>.
@@ -5,11 +5,16 @@ module CloudCrowd
5
5
  # on S3 for all resulting files.
6
6
  module S3Store
7
7
 
8
+ # Configure authentication and establish a connection to S3, first thing.
9
+ def setup
10
+ @use_auth = CloudCrowd.config[:use_s3_authentication]
11
+ establish_s3_connection
12
+ end
13
+
8
14
  # Save a finished file from local storage to S3. Save it publicly unless
9
15
  # we're configured to use S3 authentication. Authenticated links expire
10
16
  # after one day by default.
11
17
  def save(local_path, save_path)
12
- ensure_s3_connection
13
18
  if @use_auth
14
19
  @bucket.put(save_path, File.open(local_path), {}, 'private')
15
20
  @s3.interface.get_link(@bucket, save_path)
@@ -21,13 +26,12 @@ module CloudCrowd
21
26
 
22
27
  # Remove all of a Job's resulting files from S3, both intermediate and finished.
23
28
  def cleanup(job)
24
- ensure_s3_connection
25
29
  @bucket.delete_folder("#{job.action}/job_#{job.id}")
26
30
  end
27
31
 
28
32
  # Workers, through the course of many WorkUnits, keep around an AssetStore.
29
33
  # Ensure we have a persistent S3 connection after first use.
30
- def ensure_s3_connection
34
+ def establish_s3_connection
31
35
  unless @s3 && @bucket
32
36
  params = {:port => 80, :protocol => 'http'}
33
37
  @s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
@@ -25,9 +25,9 @@ module CloudCrowd
25
25
 
26
26
  # Creating the AssetStore ensures that its scratch directory exists.
27
27
  def initialize
28
- @use_auth = CloudCrowd.config[:use_s3_authentication]
29
28
  FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
30
29
  raise Error::StorageNotWritable, "#{temp_storage_path} is not writable" unless File.writable?(temp_storage_path)
30
+ setup if respond_to? :setup
31
31
  end
32
32
 
33
33
  # Get the path to CloudCrowd's temporary local storage. All actions run
@@ -9,9 +9,6 @@ module CloudCrowd
9
9
  # Reference the absolute path to the root.
10
10
  CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
11
11
 
12
- # Path to the Daemons gem script which launches workers.
13
- WORKER_RUNNER = File.expand_path("#{CC_ROOT}/lib/cloud_crowd/runner.rb")
14
-
15
12
  # Command-line banner for the usage message.
16
13
  BANNER = <<-EOS
17
14
  CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
@@ -24,7 +21,7 @@ Usage: crowd COMMAND OPTIONS
24
21
  Commands:
25
22
  install Install the CloudCrowd configuration files to the specified directory
26
23
  server Start up the central server (requires a database)
27
- workers Control worker daemons, use: (start | stop | restart | status | run)
24
+ node Start up a worker node (only one node per machine, please)
28
25
  console Launch a CloudCrowd console, connected to the central database
29
26
  load_schema Load the schema into the database specified by database.yml
30
27
 
@@ -38,7 +35,7 @@ Options:
38
35
  case command
39
36
  when 'console' then run_console
40
37
  when 'server' then run_server
41
- when 'workers' then run_workers_command
38
+ when 'node' then run_node
42
39
  when 'load_schema' then run_load_schema
43
40
  when 'install' then run_install
44
41
  else usage
@@ -63,6 +60,7 @@ Options:
63
60
  # (Mongrel, falling back to WEBrick). The equivalent of Rails' script/server.
64
61
  def run_server
65
62
  ensure_config
63
+ @options[:port] ||= 9173
66
64
  require 'rubygems'
67
65
  rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
68
66
  if Gem.available? 'thin'
@@ -72,6 +70,14 @@ Options:
72
70
  end
73
71
  end
74
72
 
73
+ # Launch a Node. Please only run a single node per machine. The Node process
74
+ # will be long-lived, although its workers will come and go.
75
+ def run_node
76
+ ENV['RACK_ENV'] = @options['environment']
77
+ load_code
78
+ Node.new(@options[:port])
79
+ end
80
+
75
81
  # Load in the database schema to the database specified in 'database.yml'.
76
82
  def run_load_schema
77
83
  load_code
@@ -86,51 +92,11 @@ Options:
86
92
  install_path = ARGV.shift || '.'
87
93
  FileUtils.mkdir_p install_path unless File.exists?(install_path)
88
94
  install_file "#{CC_ROOT}/config/config.example.yml", "#{install_path}/config.yml"
89
- install_file "#{CC_ROOT}/config/config.example.ru", "#{install_path}/config.ru"
90
95
  install_file "#{CC_ROOT}/config/database.example.yml", "#{install_path}/database.yml"
96
+ install_file "#{CC_ROOT}/config/config.example.ru", "#{install_path}/config.ru"
91
97
  install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
92
98
  end
93
99
 
94
- # Manipulate worker daemons -- handles all commands that the Daemons gem
95
- # provides: start, stop, restart, run, and status.
96
- def run_workers_command
97
- ensure_config
98
- command = ARGV.shift
99
- case command
100
- when 'start' then start_workers
101
- when 'stop' then stop_workers
102
- when 'restart' then stop_workers && start_workers
103
- when 'run' then run_worker
104
- when 'status' then show_worker_status
105
- else usage
106
- end
107
- end
108
-
109
- # Start up N workers, specified by argument or the number of workers in
110
- # config.yml.
111
- def start_workers
112
- load_code
113
- num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
114
- num_workers.times do
115
- `CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
116
- end
117
- end
118
-
119
- # For debugging, run a single worker in the current process, showing output.
120
- def run_worker
121
- exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
122
- end
123
-
124
- # Stop all active workers.
125
- def stop_workers
126
- `ruby #{WORKER_RUNNER} stop`
127
- end
128
-
129
- # Display the status of all active workers.
130
- def show_worker_status
131
- puts `ruby #{WORKER_RUNNER} status`
132
- end
133
-
134
100
  # Print `crowd` usage.
135
101
  def usage
136
102
  puts "\n#{@option_parser}\n"
@@ -150,7 +116,6 @@ Options:
150
116
  # Parse all options for all commands.
151
117
  def parse_options
152
118
  @options = {
153
- :port => 9173,
154
119
  :environment => 'production',
155
120
  :config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
156
121
  }
@@ -158,17 +123,14 @@ Options:
158
123
  opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
159
124
  @options[:config_path] = conf_path
160
125
  end
161
- opts.on('-n', '--num-workers NUM', OptionParser::DecimalInteger, 'number of worker processes') do |num|
162
- @options[:num_workers] = num
163
- end
164
- opts.on('-p', '--port PORT', 'central server port number') do |port_num|
126
+ opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
165
127
  @options[:port] = port_num
166
128
  end
167
129
  opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
168
130
  @options[:environment] = env
169
131
  end
170
132
  opts.on_tail('-v', '--version', 'show version') do
171
- load_code
133
+ require "#{CC_ROOT}/lib/cloud-crowd"
172
134
  puts "CloudCrowd version #{VERSION}"
173
135
  exit
174
136
  end
@@ -181,7 +143,6 @@ Options:
181
143
  # Not all commands require this.
182
144
  def load_code
183
145
  ensure_config
184
- require 'rubygems'
185
146
  require "#{CC_ROOT}/lib/cloud-crowd"
186
147
  CloudCrowd.configure("#{@options[:config_path]}/config.yml")
187
148
  end
@@ -8,6 +8,10 @@ module CloudCrowd
8
8
  # exist.
9
9
  class ActionNotFound < Error
10
10
  end
11
+
12
+ # CentralServerUnavailable is used then the central server can't be reached.
13
+ class CentralServerUnavailable < Error
14
+ end
11
15
 
12
16
  # StorageNotFound is raised when config.yml specifies a storage back end that
13
17
  # doesn't exist.
@@ -23,7 +23,7 @@ module CloudCrowd
23
23
  # A request is authorized if its login and password match those stored
24
24
  # in config.yml, or if authentication is disabled. If authentication is
25
25
  # turned on, then every request is authenticated, including between
26
- # the worker daemons and the central server.
26
+ # the nodes and the central server.
27
27
  def authorize(login, password)
28
28
  return true unless CloudCrowd.config[:use_http_authentication]
29
29
  return CloudCrowd.config[:login] == login &&
@@ -37,7 +37,7 @@ module CloudCrowd
37
37
  @auth ||= Rack::Auth::Basic::Request.new(request.env)
38
38
  end
39
39
 
40
- def unauthorized!(realm = App.authorization_realm)
40
+ def unauthorized!(realm = Server.authorization_realm)
41
41
  response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
42
42
  halt 401, 'Authorization Required'
43
43
  end
@@ -20,26 +20,6 @@ module CloudCrowd
20
20
  @work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
21
21
  end
22
22
 
23
- # Try to fetch a work unit from the queue. If none are pending, respond
24
- # with no content.
25
- def dequeue_work_unit(offset=0)
26
- handle_conflicts do
27
- worker, actions = params[:worker_name], params[:worker_actions].split(',')
28
- WorkUnit.dequeue(worker, actions, offset)
29
- end
30
- end
31
-
32
- # We're using ActiveRecords optimistic locking, so stale work units
33
- # may sometimes arise. handle_conflicts responds with a the HTTP status
34
- # code of your choosing if the update failed to be applied.
35
- def handle_conflicts(code=204)
36
- begin
37
- yield
38
- rescue ActiveRecord::StaleObjectError => e
39
- return status(code) && ''
40
- end
41
- end
42
-
43
23
  end
44
24
  end
45
25
  end
@@ -31,30 +31,39 @@ module CloudCrowd
31
31
  # finished, if so, continue on to the next phase of the job.
32
32
  def check_for_completion
33
33
  return unless all_work_units_complete?
34
- transition_to_next_phase
35
- output_list = gather_outputs_from_work_units
36
-
37
- if complete?
38
- self.outputs = output_list.to_json
39
- self.time = Time.now - self.created_at
40
- end
41
- self.save
34
+ set_next_status
35
+ outs = gather_outputs_from_work_units
36
+ update_attributes(:outputs => outs.to_json, :time => time_taken) if complete?
42
37
 
43
38
  case self.status
44
- when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
45
- when MERGING then queue_for_workers(output_list.to_json)
39
+ when PROCESSING then queue_for_workers(outs.map {|o| JSON.parse(o) }.flatten)
40
+ when MERGING then queue_for_workers(outs.to_json)
46
41
  else fire_callback
47
42
  end
48
43
  self
49
44
  end
50
45
 
46
+ # Transition this Job's status to the appropriate next status.
47
+ def set_next_status
48
+ update_attribute(:status,
49
+ any_work_units_failed? ? FAILED :
50
+ self.splitting? ? PROCESSING :
51
+ self.mergeable? ? MERGING :
52
+ SUCCEEDED
53
+ )
54
+ end
55
+
51
56
  # If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
52
57
  # completion. The <tt>callback_url</tt> may include HTTP basic authentication,
53
58
  # if you like:
54
59
  # http://user:password@example.com/job_complete
60
+ # If the callback_url is successfully pinged, we proceed to cleanup the job.
61
+ # TODO: This should be moved into a Work Unit...
55
62
  def fire_callback
63
+ return unless callback_url
56
64
  begin
57
- RestClient.post(callback_url, {:job => self.to_json}) if callback_url
65
+ RestClient.post(callback_url, {:job => self.to_json})
66
+ self.destroy
58
67
  rescue RestClient::Exception => e
59
68
  puts "Failed to fire job callback. Hmmm, what should happen here?"
60
69
  end
@@ -62,15 +71,12 @@ module CloudCrowd
62
71
 
63
72
  # Cleaning up after a job will remove all of its files from S3. Destroying
64
73
  # a Job calls cleanup_assets first.
74
+ # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
65
75
  def cleanup_assets
66
76
  AssetStore.new.cleanup(self)
67
77
  end
68
78
 
69
79
  # Have all of the WorkUnits finished?
70
- #--
71
- # We could trade reads for writes here
72
- # by keeping a completed_count on the Job itself.
73
- #++
74
80
  def all_work_units_complete?
75
81
  self.work_units.incomplete.count <= 0
76
82
  end
@@ -98,10 +104,11 @@ module CloudCrowd
98
104
  end
99
105
 
100
106
  # How complete is this Job?
107
+ # Unfortunately, with the current processing sequence, the percent_complete
108
+ # can pull a fast one and go backwards.
101
109
  def percent_complete
102
- return 0 if splitting?
103
- return 100 if complete?
104
110
  return 99 if merging?
111
+ return 100 if complete?
105
112
  (work_units.complete.count / work_units.count.to_f * 100).round
106
113
  end
107
114
 
@@ -143,21 +150,13 @@ module CloudCrowd
143
150
  self.work_units.complete.destroy_all
144
151
  outs
145
152
  end
146
-
147
- # Transition this Job's status to the appropriate next status.
148
- def transition_to_next_phase
149
- self.status = any_work_units_failed? ? FAILED :
150
- self.splitting? ? PROCESSING :
151
- self.mergeable? ? MERGING :
152
- SUCCEEDED
153
- end
154
153
 
155
154
  # When starting a new job, or moving to a new stage, split up the inputs
156
155
  # into WorkUnits, and queue them. Workers will start picking them up right
157
156
  # away.
158
157
  def queue_for_workers(input=nil)
159
158
  input ||= JSON.parse(self.inputs)
160
- [input].flatten.each do |wu_input|
159
+ [input].flatten.map do |wu_input|
161
160
  WorkUnit.create(
162
161
  :job => self,
163
162
  :action => self.action,