documentcloud-cloud-crowd 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +8 -8
- data/cloud-crowd.gemspec +8 -8
- data/config/config.example.ru +8 -2
- data/config/config.example.yml +6 -15
- data/examples/process_pdfs_example.rb +1 -1
- data/examples/word_count_example.rb +1 -0
- data/lib/cloud-crowd.rb +6 -5
- data/lib/cloud_crowd/action.rb +11 -7
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +5 -0
- data/lib/cloud_crowd/asset_store/s3_store.rb +7 -3
- data/lib/cloud_crowd/asset_store.rb +1 -1
- data/lib/cloud_crowd/command_line.rb +14 -53
- data/lib/cloud_crowd/exceptions.rb +4 -0
- data/lib/cloud_crowd/helpers/authorization.rb +2 -2
- data/lib/cloud_crowd/helpers/resources.rb +0 -20
- data/lib/cloud_crowd/models/job.rb +25 -26
- data/lib/cloud_crowd/models/node_record.rb +81 -0
- data/lib/cloud_crowd/models/work_unit.rb +70 -30
- data/lib/cloud_crowd/models.rb +1 -1
- data/lib/cloud_crowd/node.rb +87 -0
- data/lib/cloud_crowd/schema.rb +19 -16
- data/lib/cloud_crowd/{app.rb → server.rb} +25 -30
- data/lib/cloud_crowd/worker.rb +50 -74
- data/public/css/admin_console.css +26 -14
- data/public/images/server.png +0 -0
- data/public/js/admin_console.js +45 -18
- data/test/acceptance/test_failing_work_units.rb +1 -1
- data/test/acceptance/{test_app.rb → test_server.rb} +15 -15
- data/test/acceptance/test_word_count.rb +3 -9
- data/test/blueprints.rb +0 -1
- data/test/config/config.ru +1 -1
- data/test/config/config.yml +1 -3
- data/test/unit/test_configuration.rb +1 -1
- data/test/unit/test_job.rb +1 -0
- data/test/unit/test_work_unit.rb +2 -4
- data/views/index.erb +13 -8
- metadata +9 -9
- data/lib/cloud_crowd/daemon.rb +0 -95
- data/lib/cloud_crowd/models/worker_record.rb +0 -61
- data/lib/cloud_crowd/runner.rb +0 -15
data/README
CHANGED
@@ -30,19 +30,19 @@
|
|
30
30
|
* split -> process -> merge
|
31
31
|
* As easy as `gem install cloud-crowd`
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
Well-suited for:
|
34
|
+
|
35
|
+
* Generating or resizing images.
|
36
|
+
* Encoding video.
|
37
|
+
* Running text extraction or OCR on PDFs.
|
38
|
+
* Migrating a large file set or database.
|
39
|
+
* Web scraping.
|
40
40
|
|
41
41
|
|
42
42
|
~ Documentation ~
|
43
43
|
|
44
44
|
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
45
|
-
|
45
|
+
Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
|
46
46
|
|
47
47
|
|
48
48
|
~ Getting started ~
|
data/cloud-crowd.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'cloud-crowd'
|
3
|
-
s.version = '0.1.
|
4
|
-
s.date = '2009-09-
|
3
|
+
s.version = '0.1.1' # Keep version in sync with cloud-cloud.rb
|
4
|
+
s.date = '2009-09-15'
|
5
5
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
7
7
|
s.summary = "Parallel Processing for the Rest of Us"
|
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.add_dependency 'json', ['>= 1.1.7']
|
33
33
|
s.add_dependency 'rest-client', ['>= 1.0.3']
|
34
34
|
s.add_dependency 'right_aws', ['>= 1.10.0']
|
35
|
-
s.add_dependency '
|
35
|
+
s.add_dependency 'thin', ['>= 1.2.4']
|
36
36
|
|
37
37
|
if s.respond_to?(:add_development_dependency)
|
38
38
|
s.add_development_dependency 'faker', ['>= 0.3.1']
|
@@ -56,23 +56,22 @@ examples/process_pdfs_example.rb
|
|
56
56
|
examples/word_count_example.rb
|
57
57
|
lib/cloud-crowd.rb
|
58
58
|
lib/cloud_crowd/action.rb
|
59
|
-
lib/cloud_crowd/app.rb
|
60
59
|
lib/cloud_crowd/asset_store/filesystem_store.rb
|
61
60
|
lib/cloud_crowd/asset_store/s3_store.rb
|
62
61
|
lib/cloud_crowd/asset_store.rb
|
63
62
|
lib/cloud_crowd/command_line.rb
|
64
|
-
lib/cloud_crowd/daemon.rb
|
65
63
|
lib/cloud_crowd/exceptions.rb
|
66
64
|
lib/cloud_crowd/helpers/authorization.rb
|
67
65
|
lib/cloud_crowd/helpers/resources.rb
|
68
66
|
lib/cloud_crowd/helpers.rb
|
69
67
|
lib/cloud_crowd/inflector.rb
|
70
68
|
lib/cloud_crowd/models/job.rb
|
69
|
+
lib/cloud_crowd/models/node_record.rb
|
71
70
|
lib/cloud_crowd/models/work_unit.rb
|
72
|
-
lib/cloud_crowd/models/worker_record.rb
|
73
71
|
lib/cloud_crowd/models.rb
|
74
|
-
lib/cloud_crowd/
|
72
|
+
lib/cloud_crowd/node.rb
|
75
73
|
lib/cloud_crowd/schema.rb
|
74
|
+
lib/cloud_crowd/server.rb
|
76
75
|
lib/cloud_crowd/worker.rb
|
77
76
|
LICENSE
|
78
77
|
public/css/admin_console.css
|
@@ -83,6 +82,7 @@ public/images/cloud_hand.png
|
|
83
82
|
public/images/header_back.png
|
84
83
|
public/images/logo.png
|
85
84
|
public/images/queue_fill.png
|
85
|
+
public/images/server.png
|
86
86
|
public/images/server_error.png
|
87
87
|
public/images/sidebar_bottom.png
|
88
88
|
public/images/sidebar_top.png
|
@@ -93,7 +93,7 @@ public/js/excanvas.js
|
|
93
93
|
public/js/flot.js
|
94
94
|
public/js/jquery.js
|
95
95
|
README
|
96
|
-
test/acceptance/
|
96
|
+
test/acceptance/test_server.rb
|
97
97
|
test/acceptance/test_failing_work_units.rb
|
98
98
|
test/acceptance/test_word_count.rb
|
99
99
|
test/blueprints.rb
|
data/config/config.example.ru
CHANGED
@@ -4,7 +4,13 @@
|
|
4
4
|
# using any Rack-compliant server handler. For example, start up three servers
|
5
5
|
# with a specified port number, using Thin:
|
6
6
|
#
|
7
|
-
# thin start -R config.ru
|
7
|
+
# thin start -R config.ru --servers 3
|
8
|
+
#
|
9
|
+
# Or a single server with Unicorn:
|
10
|
+
#
|
11
|
+
# unicorn config.ru
|
12
|
+
#
|
13
|
+
|
8
14
|
|
9
15
|
require 'rubygems'
|
10
16
|
require 'cloud-crowd'
|
@@ -13,5 +19,5 @@ CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
|
|
13
19
|
CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
|
14
20
|
|
15
21
|
map '/' do
|
16
|
-
run CloudCrowd::
|
22
|
+
run CloudCrowd::Server
|
17
23
|
end
|
data/config/config.example.yml
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
# The URL where you're planning on running the central server/queue/database.
|
2
2
|
:central_server: http://localhost:9173
|
3
3
|
|
4
|
+
# Set the maximum number of workers allowed per-node. Workers only run while
|
5
|
+
# there's work to be done. It's best to set 'max_workers' below the point where
|
6
|
+
# you'd start to swap or peg your CPU (as determined by experiment).
|
7
|
+
:max_workers: 5
|
8
|
+
|
4
9
|
# The storage back-end that you'd like to use for intermediate and final results
|
5
10
|
# of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
|
6
11
|
# be used in development, or on single-machine installations.
|
@@ -29,20 +34,6 @@
|
|
29
34
|
# additional actions from a location of your choice.
|
30
35
|
# :actions_path: /path/to/actions
|
31
36
|
|
32
|
-
# Set the following numbers to tweak the configuration of your worker daemons.
|
33
|
-
# Optimum results will depend on proportion of the Memory/CPU/IO bottlenecks
|
34
|
-
# in your actions, the number of central servers you have running, and your
|
35
|
-
# desired balance between latency and traffic.
|
36
|
-
|
37
|
-
# The number of workers that `crowd workers start` spins up.
|
38
|
-
:num_workers: 3
|
39
|
-
|
40
|
-
# The minimum number of seconds a worker waits between checking the job queue.
|
41
|
-
:min_worker_wait: 1
|
42
|
-
|
43
|
-
# The maximum number of seconds a worker waits between checking the job queue.
|
44
|
-
:max_worker_wait: 5
|
45
|
-
|
46
37
|
# The number of separate attempts that will be made to process an individual
|
47
38
|
# work unit, before marking it as having failed.
|
48
|
-
:work_unit_retries: 3
|
39
|
+
:work_unit_retries: 3
|
@@ -17,7 +17,7 @@ RestClient.post('http://localhost:9173/jobs',
|
|
17
17
|
'http://tigger.uic.edu/~victor/personal/futurism.pdf',
|
18
18
|
'http://www.jonasmekas.com/Catalog_excerpt/The%20Avant-Garde%20From%20Futurism%20to%20Fluxus.pdf',
|
19
19
|
'http://www.dzignism.com/articles/Futurist.Manifesto.pdf',
|
20
|
-
'http://
|
20
|
+
'http://www.pitt.edu/~slavic/sisc/SISC4/dadswell.pdf'
|
21
21
|
],
|
22
22
|
|
23
23
|
'options' => {
|
data/lib/cloud-crowd.rb
CHANGED
@@ -5,16 +5,15 @@ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
|
|
5
5
|
# Common Gems:
|
6
6
|
require 'rubygems'
|
7
7
|
gem 'activerecord'
|
8
|
-
gem 'daemons'
|
9
8
|
gem 'json'
|
10
9
|
gem 'rest-client'
|
11
10
|
gem 'right_aws'
|
12
11
|
gem 'sinatra'
|
12
|
+
gem 'thin'
|
13
13
|
|
14
14
|
# Autoloading for all the pieces which may or may not be needed:
|
15
15
|
autoload :ActiveRecord, 'activerecord'
|
16
16
|
autoload :Benchmark, 'benchmark'
|
17
|
-
autoload :Daemons, 'daemons'
|
18
17
|
autoload :Digest, 'digest'
|
19
18
|
autoload :ERB, 'erb'
|
20
19
|
autoload :FileUtils, 'fileutils'
|
@@ -23,6 +22,7 @@ autoload :RestClient, 'restclient'
|
|
23
22
|
autoload :RightAws, 'right_aws'
|
24
23
|
autoload :Sinatra, 'sinatra'
|
25
24
|
autoload :Socket, 'socket'
|
25
|
+
autoload :Thin, 'thin'
|
26
26
|
autoload :YAML, 'yaml'
|
27
27
|
|
28
28
|
# Common code which should really be required in every circumstance.
|
@@ -31,21 +31,22 @@ require 'cloud_crowd/exceptions'
|
|
31
31
|
module CloudCrowd
|
32
32
|
|
33
33
|
# Autoload all the CloudCrowd classes which may not be required.
|
34
|
-
autoload :App, 'cloud_crowd/app'
|
35
34
|
autoload :Action, 'cloud_crowd/action'
|
36
35
|
autoload :AssetStore, 'cloud_crowd/asset_store'
|
37
36
|
autoload :Helpers, 'cloud_crowd/helpers'
|
38
37
|
autoload :Inflector, 'cloud_crowd/inflector'
|
39
38
|
autoload :Job, 'cloud_crowd/models'
|
39
|
+
autoload :Node, 'cloud_crowd/node'
|
40
|
+
autoload :NodeRecord, 'cloud_crowd/models'
|
41
|
+
autoload :Server, 'cloud_crowd/server'
|
40
42
|
autoload :Worker, 'cloud_crowd/worker'
|
41
43
|
autoload :WorkUnit, 'cloud_crowd/models'
|
42
|
-
autoload :WorkerRecord, 'cloud_crowd/models'
|
43
44
|
|
44
45
|
# Root directory of the CloudCrowd gem.
|
45
46
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
46
47
|
|
47
48
|
# Keep the version in sync with the gemspec.
|
48
|
-
VERSION = '0.1.
|
49
|
+
VERSION = '0.1.1'
|
49
50
|
|
50
51
|
# A Job is processing if its WorkUnits in the queue to be handled by workers.
|
51
52
|
PROCESSING = 1
|
data/lib/cloud_crowd/action.rb
CHANGED
@@ -38,12 +38,16 @@ module CloudCrowd
|
|
38
38
|
|
39
39
|
# Download a file to the specified path.
|
40
40
|
def download(url, path)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
41
|
+
URI.parse(url) # Sanity check.
|
42
|
+
`curl -s "#{url}" > "#{path}"`
|
43
|
+
# if url.match(FILE_URL)
|
44
|
+
# FileUtils.cp(url.sub(FILE_URL, ''), path)
|
45
|
+
# else
|
46
|
+
# # An alternative would be shelling out: `curl -s "#{url}" > "#{path}"`
|
47
|
+
# puts url
|
48
|
+
# resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
|
49
|
+
# FileUtils.mv resp.file.path, path
|
50
|
+
# end
|
47
51
|
path
|
48
52
|
end
|
49
53
|
|
@@ -55,7 +59,7 @@ module CloudCrowd
|
|
55
59
|
end
|
56
60
|
|
57
61
|
# After the Action has finished, we remove the work directory and return
|
58
|
-
# to the root directory (where
|
62
|
+
# to the root directory (where workers run by default).
|
59
63
|
def cleanup_work_directory
|
60
64
|
FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
|
61
65
|
end
|
@@ -6,6 +6,11 @@ module CloudCrowd
|
|
6
6
|
# installation.
|
7
7
|
module FilesystemStore
|
8
8
|
|
9
|
+
# Make sure that local storage is writeable before starting.
|
10
|
+
def setup
|
11
|
+
raise Error::StorageNotWritable, "#{LOCAL_STORAGE_PATH} is not writable" unless File.writable?(LOCAL_STORAGE_PATH)
|
12
|
+
end
|
13
|
+
|
9
14
|
# Save a file to somewhere semi-persistent on the filesystem. Can be used
|
10
15
|
# in development, when offline, or if you happen to have a single-machine
|
11
16
|
# CloudCrowd installation. To use, configure <tt>:storage => 'filesystem'</tt>.
|
@@ -5,11 +5,16 @@ module CloudCrowd
|
|
5
5
|
# on S3 for all resulting files.
|
6
6
|
module S3Store
|
7
7
|
|
8
|
+
# Configure authentication and establish a connection to S3, first thing.
|
9
|
+
def setup
|
10
|
+
@use_auth = CloudCrowd.config[:use_s3_authentication]
|
11
|
+
establish_s3_connection
|
12
|
+
end
|
13
|
+
|
8
14
|
# Save a finished file from local storage to S3. Save it publicly unless
|
9
15
|
# we're configured to use S3 authentication. Authenticated links expire
|
10
16
|
# after one day by default.
|
11
17
|
def save(local_path, save_path)
|
12
|
-
ensure_s3_connection
|
13
18
|
if @use_auth
|
14
19
|
@bucket.put(save_path, File.open(local_path), {}, 'private')
|
15
20
|
@s3.interface.get_link(@bucket, save_path)
|
@@ -21,13 +26,12 @@ module CloudCrowd
|
|
21
26
|
|
22
27
|
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
23
28
|
def cleanup(job)
|
24
|
-
ensure_s3_connection
|
25
29
|
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
26
30
|
end
|
27
31
|
|
28
32
|
# Workers, through the course of many WorkUnits, keep around an AssetStore.
|
29
33
|
# Ensure we have a persistent S3 connection after first use.
|
30
|
-
def
|
34
|
+
def establish_s3_connection
|
31
35
|
unless @s3 && @bucket
|
32
36
|
params = {:port => 80, :protocol => 'http'}
|
33
37
|
@s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
|
@@ -25,9 +25,9 @@ module CloudCrowd
|
|
25
25
|
|
26
26
|
# Creating the AssetStore ensures that its scratch directory exists.
|
27
27
|
def initialize
|
28
|
-
@use_auth = CloudCrowd.config[:use_s3_authentication]
|
29
28
|
FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
|
30
29
|
raise Error::StorageNotWritable, "#{temp_storage_path} is not writable" unless File.writable?(temp_storage_path)
|
30
|
+
setup if respond_to? :setup
|
31
31
|
end
|
32
32
|
|
33
33
|
# Get the path to CloudCrowd's temporary local storage. All actions run
|
@@ -9,9 +9,6 @@ module CloudCrowd
|
|
9
9
|
# Reference the absolute path to the root.
|
10
10
|
CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
|
11
11
|
|
12
|
-
# Path to the Daemons gem script which launches workers.
|
13
|
-
WORKER_RUNNER = File.expand_path("#{CC_ROOT}/lib/cloud_crowd/runner.rb")
|
14
|
-
|
15
12
|
# Command-line banner for the usage message.
|
16
13
|
BANNER = <<-EOS
|
17
14
|
CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
|
@@ -24,7 +21,7 @@ Usage: crowd COMMAND OPTIONS
|
|
24
21
|
Commands:
|
25
22
|
install Install the CloudCrowd configuration files to the specified directory
|
26
23
|
server Start up the central server (requires a database)
|
27
|
-
|
24
|
+
node Start up a worker node (only one node per machine, please)
|
28
25
|
console Launch a CloudCrowd console, connected to the central database
|
29
26
|
load_schema Load the schema into the database specified by database.yml
|
30
27
|
|
@@ -38,7 +35,7 @@ Options:
|
|
38
35
|
case command
|
39
36
|
when 'console' then run_console
|
40
37
|
when 'server' then run_server
|
41
|
-
when '
|
38
|
+
when 'node' then run_node
|
42
39
|
when 'load_schema' then run_load_schema
|
43
40
|
when 'install' then run_install
|
44
41
|
else usage
|
@@ -63,6 +60,7 @@ Options:
|
|
63
60
|
# (Mongrel, falling back to WEBrick). The equivalent of Rails' script/server.
|
64
61
|
def run_server
|
65
62
|
ensure_config
|
63
|
+
@options[:port] ||= 9173
|
66
64
|
require 'rubygems'
|
67
65
|
rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
|
68
66
|
if Gem.available? 'thin'
|
@@ -72,6 +70,14 @@ Options:
|
|
72
70
|
end
|
73
71
|
end
|
74
72
|
|
73
|
+
# Launch a Node. Please only run a single node per machine. The Node process
|
74
|
+
# will be long-lived, although its workers will come and go.
|
75
|
+
def run_node
|
76
|
+
ENV['RACK_ENV'] = @options['environment']
|
77
|
+
load_code
|
78
|
+
Node.new(@options[:port])
|
79
|
+
end
|
80
|
+
|
75
81
|
# Load in the database schema to the database specified in 'database.yml'.
|
76
82
|
def run_load_schema
|
77
83
|
load_code
|
@@ -86,51 +92,11 @@ Options:
|
|
86
92
|
install_path = ARGV.shift || '.'
|
87
93
|
FileUtils.mkdir_p install_path unless File.exists?(install_path)
|
88
94
|
install_file "#{CC_ROOT}/config/config.example.yml", "#{install_path}/config.yml"
|
89
|
-
install_file "#{CC_ROOT}/config/config.example.ru", "#{install_path}/config.ru"
|
90
95
|
install_file "#{CC_ROOT}/config/database.example.yml", "#{install_path}/database.yml"
|
96
|
+
install_file "#{CC_ROOT}/config/config.example.ru", "#{install_path}/config.ru"
|
91
97
|
install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
|
92
98
|
end
|
93
99
|
|
94
|
-
# Manipulate worker daemons -- handles all commands that the Daemons gem
|
95
|
-
# provides: start, stop, restart, run, and status.
|
96
|
-
def run_workers_command
|
97
|
-
ensure_config
|
98
|
-
command = ARGV.shift
|
99
|
-
case command
|
100
|
-
when 'start' then start_workers
|
101
|
-
when 'stop' then stop_workers
|
102
|
-
when 'restart' then stop_workers && start_workers
|
103
|
-
when 'run' then run_worker
|
104
|
-
when 'status' then show_worker_status
|
105
|
-
else usage
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
# Start up N workers, specified by argument or the number of workers in
|
110
|
-
# config.yml.
|
111
|
-
def start_workers
|
112
|
-
load_code
|
113
|
-
num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
|
114
|
-
num_workers.times do
|
115
|
-
`CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
# For debugging, run a single worker in the current process, showing output.
|
120
|
-
def run_worker
|
121
|
-
exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
|
122
|
-
end
|
123
|
-
|
124
|
-
# Stop all active workers.
|
125
|
-
def stop_workers
|
126
|
-
`ruby #{WORKER_RUNNER} stop`
|
127
|
-
end
|
128
|
-
|
129
|
-
# Display the status of all active workers.
|
130
|
-
def show_worker_status
|
131
|
-
puts `ruby #{WORKER_RUNNER} status`
|
132
|
-
end
|
133
|
-
|
134
100
|
# Print `crowd` usage.
|
135
101
|
def usage
|
136
102
|
puts "\n#{@option_parser}\n"
|
@@ -150,7 +116,6 @@ Options:
|
|
150
116
|
# Parse all options for all commands.
|
151
117
|
def parse_options
|
152
118
|
@options = {
|
153
|
-
:port => 9173,
|
154
119
|
:environment => 'production',
|
155
120
|
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
|
156
121
|
}
|
@@ -158,17 +123,14 @@ Options:
|
|
158
123
|
opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
|
159
124
|
@options[:config_path] = conf_path
|
160
125
|
end
|
161
|
-
opts.on('-
|
162
|
-
@options[:num_workers] = num
|
163
|
-
end
|
164
|
-
opts.on('-p', '--port PORT', 'central server port number') do |port_num|
|
126
|
+
opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
|
165
127
|
@options[:port] = port_num
|
166
128
|
end
|
167
129
|
opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
|
168
130
|
@options[:environment] = env
|
169
131
|
end
|
170
132
|
opts.on_tail('-v', '--version', 'show version') do
|
171
|
-
|
133
|
+
require "#{CC_ROOT}/lib/cloud-crowd"
|
172
134
|
puts "CloudCrowd version #{VERSION}"
|
173
135
|
exit
|
174
136
|
end
|
@@ -181,7 +143,6 @@ Options:
|
|
181
143
|
# Not all commands require this.
|
182
144
|
def load_code
|
183
145
|
ensure_config
|
184
|
-
require 'rubygems'
|
185
146
|
require "#{CC_ROOT}/lib/cloud-crowd"
|
186
147
|
CloudCrowd.configure("#{@options[:config_path]}/config.yml")
|
187
148
|
end
|
@@ -8,6 +8,10 @@ module CloudCrowd
|
|
8
8
|
# exist.
|
9
9
|
class ActionNotFound < Error
|
10
10
|
end
|
11
|
+
|
12
|
+
# CentralServerUnavailable is used then the central server can't be reached.
|
13
|
+
class CentralServerUnavailable < Error
|
14
|
+
end
|
11
15
|
|
12
16
|
# StorageNotFound is raised when config.yml specifies a storage back end that
|
13
17
|
# doesn't exist.
|
@@ -23,7 +23,7 @@ module CloudCrowd
|
|
23
23
|
# A request is authorized if its login and password match those stored
|
24
24
|
# in config.yml, or if authentication is disabled. If authentication is
|
25
25
|
# turned on, then every request is authenticated, including between
|
26
|
-
# the
|
26
|
+
# the nodes and the central server.
|
27
27
|
def authorize(login, password)
|
28
28
|
return true unless CloudCrowd.config[:use_http_authentication]
|
29
29
|
return CloudCrowd.config[:login] == login &&
|
@@ -37,7 +37,7 @@ module CloudCrowd
|
|
37
37
|
@auth ||= Rack::Auth::Basic::Request.new(request.env)
|
38
38
|
end
|
39
39
|
|
40
|
-
def unauthorized!(realm =
|
40
|
+
def unauthorized!(realm = Server.authorization_realm)
|
41
41
|
response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
|
42
42
|
halt 401, 'Authorization Required'
|
43
43
|
end
|
@@ -20,26 +20,6 @@ module CloudCrowd
|
|
20
20
|
@work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
|
21
21
|
end
|
22
22
|
|
23
|
-
# Try to fetch a work unit from the queue. If none are pending, respond
|
24
|
-
# with no content.
|
25
|
-
def dequeue_work_unit(offset=0)
|
26
|
-
handle_conflicts do
|
27
|
-
worker, actions = params[:worker_name], params[:worker_actions].split(',')
|
28
|
-
WorkUnit.dequeue(worker, actions, offset)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
# We're using ActiveRecords optimistic locking, so stale work units
|
33
|
-
# may sometimes arise. handle_conflicts responds with a the HTTP status
|
34
|
-
# code of your choosing if the update failed to be applied.
|
35
|
-
def handle_conflicts(code=204)
|
36
|
-
begin
|
37
|
-
yield
|
38
|
-
rescue ActiveRecord::StaleObjectError => e
|
39
|
-
return status(code) && ''
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
23
|
end
|
44
24
|
end
|
45
25
|
end
|
@@ -31,30 +31,39 @@ module CloudCrowd
|
|
31
31
|
# finished, if so, continue on to the next phase of the job.
|
32
32
|
def check_for_completion
|
33
33
|
return unless all_work_units_complete?
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
if complete?
|
38
|
-
self.outputs = output_list.to_json
|
39
|
-
self.time = Time.now - self.created_at
|
40
|
-
end
|
41
|
-
self.save
|
34
|
+
set_next_status
|
35
|
+
outs = gather_outputs_from_work_units
|
36
|
+
update_attributes(:outputs => outs.to_json, :time => time_taken) if complete?
|
42
37
|
|
43
38
|
case self.status
|
44
|
-
when PROCESSING then queue_for_workers(
|
45
|
-
when MERGING then queue_for_workers(
|
39
|
+
when PROCESSING then queue_for_workers(outs.map {|o| JSON.parse(o) }.flatten)
|
40
|
+
when MERGING then queue_for_workers(outs.to_json)
|
46
41
|
else fire_callback
|
47
42
|
end
|
48
43
|
self
|
49
44
|
end
|
50
45
|
|
46
|
+
# Transition this Job's status to the appropriate next status.
|
47
|
+
def set_next_status
|
48
|
+
update_attribute(:status,
|
49
|
+
any_work_units_failed? ? FAILED :
|
50
|
+
self.splitting? ? PROCESSING :
|
51
|
+
self.mergeable? ? MERGING :
|
52
|
+
SUCCEEDED
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
51
56
|
# If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
|
52
57
|
# completion. The <tt>callback_url</tt> may include HTTP basic authentication,
|
53
58
|
# if you like:
|
54
59
|
# http://user:password@example.com/job_complete
|
60
|
+
# If the callback_url is successfully pinged, we proceed to cleanup the job.
|
61
|
+
# TODO: This should be moved into a Work Unit...
|
55
62
|
def fire_callback
|
63
|
+
return unless callback_url
|
56
64
|
begin
|
57
|
-
RestClient.post(callback_url, {:job => self.to_json})
|
65
|
+
RestClient.post(callback_url, {:job => self.to_json})
|
66
|
+
self.destroy
|
58
67
|
rescue RestClient::Exception => e
|
59
68
|
puts "Failed to fire job callback. Hmmm, what should happen here?"
|
60
69
|
end
|
@@ -62,15 +71,12 @@ module CloudCrowd
|
|
62
71
|
|
63
72
|
# Cleaning up after a job will remove all of its files from S3. Destroying
|
64
73
|
# a Job calls cleanup_assets first.
|
74
|
+
# TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
|
65
75
|
def cleanup_assets
|
66
76
|
AssetStore.new.cleanup(self)
|
67
77
|
end
|
68
78
|
|
69
79
|
# Have all of the WorkUnits finished?
|
70
|
-
#--
|
71
|
-
# We could trade reads for writes here
|
72
|
-
# by keeping a completed_count on the Job itself.
|
73
|
-
#++
|
74
80
|
def all_work_units_complete?
|
75
81
|
self.work_units.incomplete.count <= 0
|
76
82
|
end
|
@@ -98,10 +104,11 @@ module CloudCrowd
|
|
98
104
|
end
|
99
105
|
|
100
106
|
# How complete is this Job?
|
107
|
+
# Unfortunately, with the current processing sequence, the percent_complete
|
108
|
+
# can pull a fast one and go backwards.
|
101
109
|
def percent_complete
|
102
|
-
return 0 if splitting?
|
103
|
-
return 100 if complete?
|
104
110
|
return 99 if merging?
|
111
|
+
return 100 if complete?
|
105
112
|
(work_units.complete.count / work_units.count.to_f * 100).round
|
106
113
|
end
|
107
114
|
|
@@ -143,21 +150,13 @@ module CloudCrowd
|
|
143
150
|
self.work_units.complete.destroy_all
|
144
151
|
outs
|
145
152
|
end
|
146
|
-
|
147
|
-
# Transition this Job's status to the appropriate next status.
|
148
|
-
def transition_to_next_phase
|
149
|
-
self.status = any_work_units_failed? ? FAILED :
|
150
|
-
self.splitting? ? PROCESSING :
|
151
|
-
self.mergeable? ? MERGING :
|
152
|
-
SUCCEEDED
|
153
|
-
end
|
154
153
|
|
155
154
|
# When starting a new job, or moving to a new stage, split up the inputs
|
156
155
|
# into WorkUnits, and queue them. Workers will start picking them up right
|
157
156
|
# away.
|
158
157
|
def queue_for_workers(input=nil)
|
159
158
|
input ||= JSON.parse(self.inputs)
|
160
|
-
[input].flatten.
|
159
|
+
[input].flatten.map do |wu_input|
|
161
160
|
WorkUnit.create(
|
162
161
|
:job => self,
|
163
162
|
:action => self.action,
|