cloud-crowd 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/cloud-crowd.gemspec +2 -2
- data/config/config.example.yml +6 -67
- data/lib/cloud-crowd.rb +5 -4
- data/lib/cloud_crowd/action.rb +13 -14
- data/lib/cloud_crowd/asset_store/cloudfiles_store.rb +6 -6
- data/lib/cloud_crowd/command_line.rb +35 -32
- data/lib/cloud_crowd/models/job.rb +1 -1
- data/lib/cloud_crowd/models/node_record.rb +3 -1
- data/lib/cloud_crowd/models/work_unit.rb +6 -1
- data/lib/cloud_crowd/node.rb +6 -4
- data/lib/cloud_crowd/schema.rb +2 -1
- data/lib/cloud_crowd/server.rb +1 -1
- data/public/css/admin_console.css +2 -1
- data/public/js/admin_console.js +4 -3
- data/test/config/config.yml +1 -1
- data/test/unit/test_action.rb +15 -1
- data/test/unit/test_node.rb +10 -9
- metadata +4 -4
data/cloud-crowd.gemspec
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = 'cloud-crowd'
|
|
3
|
-
s.version = '0.
|
|
4
|
-
s.date = '2010-
|
|
3
|
+
s.version = '0.5.0' # Keep version in sync with cloud-cloud.rb
|
|
4
|
+
s.date = '2010-06-22'
|
|
5
5
|
|
|
6
6
|
s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
|
|
7
7
|
s.summary = "Parallel Processing for the Rest of Us"
|
data/config/config.example.yml
CHANGED
|
@@ -1,68 +1,7 @@
|
|
|
1
|
-
#
|
|
2
|
-
:
|
|
1
|
+
# This file configures your CloudCrowd installation, and should be consistent
|
|
2
|
+
# between your server and all of your nodes. For more information, see:
|
|
3
|
+
# http://wiki.github.com/documentcloud/cloud-crowd/the-configuration-folder
|
|
3
4
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
# concurrently. 'max_load' is the maximum (one-minute) load average, above which
|
|
8
|
-
# a node will refuse to take new work. 'min_free_memory' is the minimum amount
|
|
9
|
-
# of free RAM (in megabytes) a node is allowed to have, below which no new
|
|
10
|
-
# workers are run. These settings may be used in any combination.
|
|
11
|
-
:max_workers: 5
|
|
12
|
-
# :max_load: 5.0
|
|
13
|
-
# :min_free_memory: 150
|
|
14
|
-
|
|
15
|
-
# The storage back-end that you'd like to use for intermediate and final results
|
|
16
|
-
# of processing. 's3', 'filesystem', and 'cloudfiles' are supported.
|
|
17
|
-
# 'filesystem' should only be used in development, on single-machine installations,
|
|
18
|
-
# or networked drives. If you *are* developing an action, filesystem is certainly
|
|
19
|
-
# faster and easier.
|
|
20
|
-
:storage: s3
|
|
21
|
-
|
|
22
|
-
# Please provide your AWS credentials for S3 storage of job output.
|
|
23
|
-
:aws_access_key: [your AWS access key]
|
|
24
|
-
:aws_secret_key: [your AWS secret access key]
|
|
25
|
-
|
|
26
|
-
# Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
|
|
27
|
-
# to keep all resulting files on S3 private. If so, you'll receive authenticated
|
|
28
|
-
# S3 URLs as job output, good for 24 hours. If left public, you'll get the
|
|
29
|
-
# straight URLs to the files on S3.
|
|
30
|
-
:s3_bucket: [your CloudCrowd bucket]
|
|
31
|
-
:s3_authentication: no
|
|
32
|
-
|
|
33
|
-
# Cloudfiles
|
|
34
|
-
:cloudfiles_username: [your Rackspace Cloud Files username]
|
|
35
|
-
:cloudfiles_api_key: [your Rackspace Cloud Files API key]
|
|
36
|
-
:cloudfiles_container: [your Rackspace Cloud Files container]
|
|
37
|
-
|
|
38
|
-
# The following settings configure local paths. 'local_storage_path' is the
|
|
39
|
-
# directory in which all files will be saved if you're using the 'filesystem'
|
|
40
|
-
# storage. 'log_path' and 'pid_path' are the directories in which daemonized
|
|
41
|
-
# servers and nodes will store their process ids and log files. The default
|
|
42
|
-
# values are listed.
|
|
43
|
-
# :local_storage_path: /tmp/cloud_crowd_storage
|
|
44
|
-
# :log_path: log
|
|
45
|
-
# :pid_path: tmp/pids
|
|
46
|
-
|
|
47
|
-
# Use HTTP Basic Auth for all requests? (Includes all internal worker requests
|
|
48
|
-
# to the central server). If yes, specify the login and password that all
|
|
49
|
-
# requests must provide for authentication.
|
|
50
|
-
:http_authentication: no
|
|
51
|
-
:login: [your login name]
|
|
52
|
-
:password: [your password]
|
|
53
|
-
|
|
54
|
-
# Disable all the default built-in actions
|
|
55
|
-
# :disable_default_actions: true
|
|
56
|
-
|
|
57
|
-
# Disable specific actions for the node
|
|
58
|
-
# Use this if you want to disable a limited number of actions
|
|
59
|
-
# :disabled_actions: ['word_count']
|
|
60
|
-
|
|
61
|
-
# By default, CloudCrowd looks for installed actions inside the 'actions'
|
|
62
|
-
# subdirectory of this configuration folder. 'actions_path' allows you to load
|
|
63
|
-
# additional actions from a location of your choice.
|
|
64
|
-
# :actions_path: /path/to/actions
|
|
65
|
-
|
|
66
|
-
# The number of separate attempts that will be made to process an individual
|
|
67
|
-
# work unit, before marking it as having failed.
|
|
68
|
-
:work_unit_retries: 3
|
|
5
|
+
:central_server: http://localhost:9173
|
|
6
|
+
:max_workers: 5
|
|
7
|
+
:storage: filesystem
|
data/lib/cloud-crowd.rb
CHANGED
|
@@ -26,6 +26,7 @@ autoload :YAML, 'yaml'
|
|
|
26
26
|
|
|
27
27
|
# Common code which should really be required in every circumstance.
|
|
28
28
|
require 'socket'
|
|
29
|
+
require 'net/http'
|
|
29
30
|
require 'cloud_crowd/exceptions'
|
|
30
31
|
|
|
31
32
|
module CloudCrowd
|
|
@@ -44,10 +45,10 @@ module CloudCrowd
|
|
|
44
45
|
autoload :WorkUnit, 'cloud_crowd/models'
|
|
45
46
|
|
|
46
47
|
# Keep this version in sync with the gemspec.
|
|
47
|
-
VERSION = '0.
|
|
48
|
+
VERSION = '0.5.0'
|
|
48
49
|
|
|
49
50
|
# Increment the schema version when there's a backwards incompatible change.
|
|
50
|
-
SCHEMA_VERSION =
|
|
51
|
+
SCHEMA_VERSION = 4
|
|
51
52
|
|
|
52
53
|
# Root directory of the CloudCrowd gem.
|
|
53
54
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
|
@@ -92,14 +93,14 @@ module CloudCrowd
|
|
|
92
93
|
# Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
|
|
93
94
|
def configure(config_path)
|
|
94
95
|
@config_path = File.expand_path(File.dirname(config_path))
|
|
95
|
-
@config = YAML.
|
|
96
|
+
@config = YAML.load(ERB.new(File.read(config_path)).result)
|
|
96
97
|
end
|
|
97
98
|
|
|
98
99
|
# Configure the CloudCrowd central database (and connect to it), by passing
|
|
99
100
|
# in a path to <tt>database.yml</tt>. The file should use the standard
|
|
100
101
|
# ActiveRecord connection format.
|
|
101
102
|
def configure_database(config_path, validate_schema=true)
|
|
102
|
-
configuration = YAML.
|
|
103
|
+
configuration = YAML.load(ERB.new(File.read(config_path)).result)
|
|
103
104
|
ActiveRecord::Base.establish_connection(configuration)
|
|
104
105
|
if validate_schema
|
|
105
106
|
version = ActiveRecord::Base.connection.select_values('select max(version) from schema_migrations').first.to_i
|
data/lib/cloud_crowd/action.rb
CHANGED
|
@@ -42,19 +42,18 @@ module CloudCrowd
|
|
|
42
42
|
|
|
43
43
|
# Download a file to the specified path.
|
|
44
44
|
def download(url, path)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
# end
|
|
45
|
+
if url.match(FILE_URL)
|
|
46
|
+
FileUtils.cp(url.sub(FILE_URL, ''), path)
|
|
47
|
+
else
|
|
48
|
+
File.open(path, 'w+') do |file|
|
|
49
|
+
Net::HTTP.get_response(URI(url)) do |response|
|
|
50
|
+
response.read_body do |chunk|
|
|
51
|
+
file.write chunk
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
path
|
|
58
57
|
end
|
|
59
58
|
|
|
60
59
|
# Takes a local filesystem path, saves the file to S3, and returns the
|
|
@@ -84,7 +83,7 @@ module CloudCrowd
|
|
|
84
83
|
|
|
85
84
|
# Convert an unsafe URL into a filesystem-friendly filename.
|
|
86
85
|
def safe_filename(url)
|
|
87
|
-
url.sub
|
|
86
|
+
url = url.sub(/\?.*\Z/, '')
|
|
88
87
|
ext = File.extname(url)
|
|
89
88
|
name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-')
|
|
90
89
|
File.basename(name, ext).gsub('.', '-') + ext
|
|
@@ -27,13 +27,13 @@ module CloudCrowd
|
|
|
27
27
|
|
|
28
28
|
# Remove all of a Job's resulting files from Cloud Files, both intermediate and finished.
|
|
29
29
|
def cleanup(job)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
end
|
|
30
|
+
@container.objects(:prefix => "#{job.action}/job_#{job.id}").each do |object|
|
|
31
|
+
begin
|
|
32
|
+
@container.delete_object object
|
|
33
|
+
rescue
|
|
34
|
+
log "failed to delete #{job.action}/job_#{job.id}"
|
|
36
35
|
end
|
|
36
|
+
end
|
|
37
37
|
end
|
|
38
38
|
end
|
|
39
39
|
|
|
@@ -2,13 +2,13 @@ require 'optparse'
|
|
|
2
2
|
|
|
3
3
|
module CloudCrowd
|
|
4
4
|
class CommandLine
|
|
5
|
-
|
|
5
|
+
|
|
6
6
|
# Configuration files required for the `crowd` command to function.
|
|
7
7
|
CONFIG_FILES = ['config.yml', 'config.ru', 'database.yml']
|
|
8
|
-
|
|
8
|
+
|
|
9
9
|
# Reference the absolute path to the root.
|
|
10
10
|
CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
# Command-line banner for the usage message.
|
|
13
13
|
BANNER = <<-EOS
|
|
14
14
|
CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
|
|
@@ -25,13 +25,13 @@ Commands:
|
|
|
25
25
|
console Launch a CloudCrowd console, connected to the central database
|
|
26
26
|
load_schema Load the schema into the database specified by database.yml
|
|
27
27
|
cleanup Removes jobs that were finished over --days (7 by default) ago
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
server -d [start | stop | restart] Servers and nodes can be launched as
|
|
30
30
|
node -d [start | stop | restart] daemons, then stopped or restarted.
|
|
31
31
|
|
|
32
32
|
Options:
|
|
33
33
|
EOS
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
# Creating a CloudCrowd::CommandLine runs from the contents of ARGV.
|
|
36
36
|
def initialize
|
|
37
37
|
parse_options
|
|
@@ -47,7 +47,7 @@ Options:
|
|
|
47
47
|
else usage
|
|
48
48
|
end
|
|
49
49
|
end
|
|
50
|
-
|
|
50
|
+
|
|
51
51
|
# Spin up an IRB session with the CloudCrowd code loaded in, and a database
|
|
52
52
|
# connection established. The equivalent of Rails' `script/console`.
|
|
53
53
|
def run_console
|
|
@@ -60,7 +60,7 @@ Options:
|
|
|
60
60
|
Object.send(:include, CloudCrowd)
|
|
61
61
|
IRB.start
|
|
62
62
|
end
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
# `crowd server` can either 'start', 'stop', or 'restart'.
|
|
65
65
|
def run_server(subcommand)
|
|
66
66
|
load_code
|
|
@@ -71,7 +71,7 @@ Options:
|
|
|
71
71
|
when 'restart' then restart_server
|
|
72
72
|
end
|
|
73
73
|
end
|
|
74
|
-
|
|
74
|
+
|
|
75
75
|
# Convenience command for quickly spinning up the central server. More
|
|
76
76
|
# sophisticated deployments, load-balancing across multiple app servers,
|
|
77
77
|
# should use the config.ru rackup file directly. This method will start
|
|
@@ -86,19 +86,19 @@ Options:
|
|
|
86
86
|
puts "Starting CloudCrowd Central Server on port #{port}..."
|
|
87
87
|
exec "thin -e #{@options[:environment]} -p #{port} #{daemonize} --tag cloud-crowd-server --log #{log_path} --pid #{pid_path} -R #{rackup_path} start"
|
|
88
88
|
end
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
# Stop the daemonized central server, if it exists.
|
|
91
91
|
def stop_server
|
|
92
92
|
Thin::Server.kill(CloudCrowd.pid_path('server.pid'), 0)
|
|
93
93
|
end
|
|
94
|
-
|
|
94
|
+
|
|
95
95
|
# Restart the daemonized central server.
|
|
96
96
|
def restart_server
|
|
97
97
|
stop_server
|
|
98
98
|
sleep 1
|
|
99
99
|
start_server
|
|
100
100
|
end
|
|
101
|
-
|
|
101
|
+
|
|
102
102
|
# `crowd node` can either 'start', 'stop', or 'restart'.
|
|
103
103
|
def run_node(subcommand)
|
|
104
104
|
load_code
|
|
@@ -109,34 +109,34 @@ Options:
|
|
|
109
109
|
when 'restart' then restart_node
|
|
110
110
|
end
|
|
111
111
|
end
|
|
112
|
-
|
|
112
|
+
|
|
113
113
|
# Launch a Node. Please only run a single node per machine. The Node process
|
|
114
114
|
# will be long-lived, although its workers will come and go.
|
|
115
115
|
def start_node
|
|
116
|
-
|
|
117
|
-
puts "Starting CloudCrowd Node on port #{port}..."
|
|
118
|
-
Node.new(
|
|
116
|
+
@options[:port] ||= Node::DEFAULT_PORT
|
|
117
|
+
puts "Starting CloudCrowd Node on port #{@options[:port]}..."
|
|
118
|
+
Node.new(@options)
|
|
119
119
|
end
|
|
120
|
-
|
|
120
|
+
|
|
121
121
|
# If the daemonized Node is running, stop it.
|
|
122
122
|
def stop_node
|
|
123
123
|
Thin::Server.kill CloudCrowd.pid_path('node.pid')
|
|
124
124
|
end
|
|
125
|
-
|
|
125
|
+
|
|
126
126
|
# Restart the daemonized Node, if it exists.
|
|
127
127
|
def restart_node
|
|
128
128
|
stop_node
|
|
129
129
|
sleep 1
|
|
130
130
|
start_node
|
|
131
131
|
end
|
|
132
|
-
|
|
132
|
+
|
|
133
133
|
# Load in the database schema to the database specified in 'database.yml'.
|
|
134
134
|
def run_load_schema
|
|
135
135
|
load_code
|
|
136
136
|
connect_to_database(false)
|
|
137
137
|
require 'cloud_crowd/schema.rb'
|
|
138
138
|
end
|
|
139
|
-
|
|
139
|
+
|
|
140
140
|
# Install the required CloudCrowd configuration files into the specified
|
|
141
141
|
# directory, or the current one.
|
|
142
142
|
def run_install(install_path)
|
|
@@ -148,22 +148,22 @@ Options:
|
|
|
148
148
|
install_file "#{CC_ROOT}/config/database.example.yml", "#{install_path}/database.yml"
|
|
149
149
|
install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
|
|
150
150
|
end
|
|
151
|
-
|
|
151
|
+
|
|
152
152
|
# Clean up all Jobs in the CloudCrowd database older than --days old.
|
|
153
153
|
def run_cleanup
|
|
154
154
|
load_code
|
|
155
155
|
connect_to_database(true)
|
|
156
156
|
Job.cleanup_all(:days => @options[:days])
|
|
157
157
|
end
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
# Print `crowd` usage.
|
|
160
160
|
def usage
|
|
161
161
|
puts "\n#{@option_parser}\n"
|
|
162
162
|
end
|
|
163
|
-
|
|
164
|
-
|
|
163
|
+
|
|
164
|
+
|
|
165
165
|
private
|
|
166
|
-
|
|
166
|
+
|
|
167
167
|
# Check for configuration files, either in the current directory, or in
|
|
168
168
|
# the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
|
|
169
169
|
def ensure_config
|
|
@@ -171,9 +171,9 @@ Options:
|
|
|
171
171
|
found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
|
|
172
172
|
found ? @config_dir = true : config_not_found
|
|
173
173
|
end
|
|
174
|
-
|
|
174
|
+
|
|
175
175
|
# Parse all options for all commands.
|
|
176
|
-
# Valid options are: --config --port --environment --daemonize --days.
|
|
176
|
+
# Valid options are: --config --port --environment --tag --daemonize --days.
|
|
177
177
|
def parse_options
|
|
178
178
|
@options = {
|
|
179
179
|
:environment => 'production',
|
|
@@ -190,6 +190,9 @@ Options:
|
|
|
190
190
|
opts.on('-e', '--environment ENV', 'server environment (defaults to production)') do |env|
|
|
191
191
|
@options[:environment] = env
|
|
192
192
|
end
|
|
193
|
+
opts.on('-t', '--tag TAG', 'tag a node with a name') do |tag|
|
|
194
|
+
@options[:tag] = tag
|
|
195
|
+
end
|
|
193
196
|
opts.on('-d', '--daemonize', 'run as a background daemon') do |daemonize|
|
|
194
197
|
@options[:daemonize] = daemonize
|
|
195
198
|
end
|
|
@@ -205,7 +208,7 @@ Options:
|
|
|
205
208
|
@option_parser.banner = BANNER
|
|
206
209
|
@option_parser.parse!(ARGV)
|
|
207
210
|
end
|
|
208
|
-
|
|
211
|
+
|
|
209
212
|
# Load in the CloudCrowd module code, dependencies, lib files and models.
|
|
210
213
|
# Not all commands require this.
|
|
211
214
|
def load_code
|
|
@@ -213,21 +216,21 @@ Options:
|
|
|
213
216
|
require "#{CC_ROOT}/lib/cloud-crowd"
|
|
214
217
|
CloudCrowd.configure("#{@options[:config_path]}/config.yml")
|
|
215
218
|
end
|
|
216
|
-
|
|
219
|
+
|
|
217
220
|
# Establish a connection to the central server's database. Not all commands
|
|
218
221
|
# require this.
|
|
219
222
|
def connect_to_database(validate_schema)
|
|
220
223
|
require 'cloud_crowd/models'
|
|
221
224
|
CloudCrowd.configure_database("#{@options[:config_path]}/database.yml", validate_schema)
|
|
222
225
|
end
|
|
223
|
-
|
|
226
|
+
|
|
224
227
|
# Exit with an explanation if the configuration files couldn't be found.
|
|
225
228
|
def config_not_found
|
|
226
229
|
puts "`crowd` can't find the CloudCrowd configuration directory. Please use `crowd -c path/to/config`, or run `crowd` from inside of the configuration directory itself."
|
|
227
230
|
exit(1)
|
|
228
231
|
end
|
|
229
|
-
|
|
230
|
-
# Install a file and log the installation. If we're overwriting a file,
|
|
232
|
+
|
|
233
|
+
# Install a file and log the installation. If we're overwriting a file,
|
|
231
234
|
# offer a chance to back out.
|
|
232
235
|
def install_file(source, dest, is_dir=false)
|
|
233
236
|
if File.exists?(dest)
|
|
@@ -237,6 +240,6 @@ Options:
|
|
|
237
240
|
is_dir ? FileUtils.cp_r(source, dest) : FileUtils.cp(source, dest)
|
|
238
241
|
puts "installed #{dest}" unless ENV['RACK_ENV'] == 'test'
|
|
239
242
|
end
|
|
240
|
-
|
|
243
|
+
|
|
241
244
|
end
|
|
242
245
|
end
|
|
@@ -48,7 +48,7 @@ module CloudCrowd
|
|
|
48
48
|
return queue_for_workers([outs]) if merging?
|
|
49
49
|
if complete?
|
|
50
50
|
update_attributes(:outputs => outs, :time => time_taken)
|
|
51
|
-
puts "Job ##{id} (#{action}) #{display_status}."
|
|
51
|
+
puts "Job ##{id} (#{action}) #{display_status}." unless ENV['RACK_ENV'] == 'test'
|
|
52
52
|
Thread.new { fire_callback } if callback_url
|
|
53
53
|
end
|
|
54
54
|
self
|
|
@@ -28,6 +28,7 @@ module CloudCrowd
|
|
|
28
28
|
:ip_address => request.ip,
|
|
29
29
|
:port => params[:host].match(PORT)[1].to_i,
|
|
30
30
|
:busy => params[:busy],
|
|
31
|
+
:tag => params[:tag],
|
|
31
32
|
:max_workers => params[:max_workers],
|
|
32
33
|
:enabled_actions => params[:enabled_actions]
|
|
33
34
|
}
|
|
@@ -93,7 +94,8 @@ module CloudCrowd
|
|
|
93
94
|
def to_json(opts={})
|
|
94
95
|
{ 'host' => host,
|
|
95
96
|
'workers' => worker_pids,
|
|
96
|
-
'status' => display_status
|
|
97
|
+
'status' => display_status,
|
|
98
|
+
'tag' => tag
|
|
97
99
|
}.to_json
|
|
98
100
|
end
|
|
99
101
|
|
|
@@ -39,8 +39,9 @@ module CloudCrowd
|
|
|
39
39
|
# action in question disabled.
|
|
40
40
|
def self.distribute_to_nodes
|
|
41
41
|
reservation = nil
|
|
42
|
+
filter = {}
|
|
42
43
|
loop do
|
|
43
|
-
return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT)
|
|
44
|
+
return unless reservation = WorkUnit.reserve_available(:limit => RESERVATION_LIMIT, :conditions => filter)
|
|
44
45
|
work_units = WorkUnit.reserved(reservation)
|
|
45
46
|
available_nodes = NodeRecord.available
|
|
46
47
|
while node = available_nodes.shift and unit = work_units.shift do
|
|
@@ -54,6 +55,10 @@ module CloudCrowd
|
|
|
54
55
|
end
|
|
55
56
|
work_units.push(unit)
|
|
56
57
|
end
|
|
58
|
+
if work_units.any? && available_nodes.any?
|
|
59
|
+
filter = {:action => available_nodes.map {|node| node.actions }.flatten.uniq }
|
|
60
|
+
next
|
|
61
|
+
end
|
|
57
62
|
return if work_units.any? || available_nodes.empty?
|
|
58
63
|
end
|
|
59
64
|
ensure
|
data/lib/cloud_crowd/node.rb
CHANGED
|
@@ -30,7 +30,7 @@ module CloudCrowd
|
|
|
30
30
|
# The response sent back when this node is overloaded.
|
|
31
31
|
OVERLOADED_MESSAGE = 'Node Overloaded'
|
|
32
32
|
|
|
33
|
-
attr_reader :enabled_actions, :host, :port, :central
|
|
33
|
+
attr_reader :enabled_actions, :host, :port, :tag, :central
|
|
34
34
|
|
|
35
35
|
set :root, ROOT
|
|
36
36
|
set :authorization_realm, "CloudCrowd"
|
|
@@ -63,15 +63,16 @@ module CloudCrowd
|
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
# When creating a node, specify the port it should run on.
|
|
66
|
-
def initialize(
|
|
66
|
+
def initialize(options={})
|
|
67
67
|
require 'json'
|
|
68
68
|
CloudCrowd.identity = :node
|
|
69
69
|
@central = CloudCrowd.central_server
|
|
70
70
|
@host = Socket.gethostname
|
|
71
71
|
@enabled_actions = CloudCrowd.actions.keys - (CloudCrowd.config[:disabled_actions] || [])
|
|
72
|
-
@port = port || DEFAULT_PORT
|
|
72
|
+
@port = options[:port] || DEFAULT_PORT
|
|
73
73
|
@id = "#{@host}:#{@port}"
|
|
74
|
-
@daemon =
|
|
74
|
+
@daemon = !!options[:daemonize]
|
|
75
|
+
@tag = options[:tag]
|
|
75
76
|
@overloaded = false
|
|
76
77
|
@max_load = CloudCrowd.config[:max_load]
|
|
77
78
|
@min_memory = CloudCrowd.config[:min_free_memory]
|
|
@@ -102,6 +103,7 @@ module CloudCrowd
|
|
|
102
103
|
def check_in(critical=false)
|
|
103
104
|
@central["/node/#{@id}"].put(
|
|
104
105
|
:busy => @overloaded,
|
|
106
|
+
:tag => @tag,
|
|
105
107
|
:max_workers => CloudCrowd.config[:max_workers],
|
|
106
108
|
:enabled_actions => @enabled_actions.join(',')
|
|
107
109
|
)
|
data/lib/cloud_crowd/schema.rb
CHANGED
|
@@ -18,8 +18,9 @@ ActiveRecord::Schema.define(:version => CloudCrowd::SCHEMA_VERSION) do
|
|
|
18
18
|
t.string "host", :null => false
|
|
19
19
|
t.string "ip_address", :null => false
|
|
20
20
|
t.integer "port", :null => false
|
|
21
|
-
t.
|
|
21
|
+
t.text "enabled_actions", :default => '', :null => false
|
|
22
22
|
t.boolean "busy", :default => false, :null => false
|
|
23
|
+
t.string "tag"
|
|
23
24
|
t.integer "max_workers"
|
|
24
25
|
t.datetime "created_at"
|
|
25
26
|
t.datetime "updated_at"
|
data/lib/cloud_crowd/server.rb
CHANGED
|
@@ -71,7 +71,7 @@ module CloudCrowd
|
|
|
71
71
|
post '/jobs' do
|
|
72
72
|
job = Job.create_from_request(JSON.parse(params[:job]))
|
|
73
73
|
WorkUnit.distribute_to_nodes
|
|
74
|
-
puts "Job ##{job.id} (#{job.action}) started."
|
|
74
|
+
puts "Job ##{job.id} (#{job.action}) started." unless ENV['RACK_ENV'] == 'test'
|
|
75
75
|
json job
|
|
76
76
|
end
|
|
77
77
|
|
|
@@ -126,8 +126,9 @@ body {
|
|
|
126
126
|
}
|
|
127
127
|
#nodes .node {
|
|
128
128
|
font-size: 11px;
|
|
129
|
-
line-height: 22px;
|
|
129
|
+
line-height: 22px; height: 22px;
|
|
130
130
|
background-image: url(../images/server.png);
|
|
131
|
+
overflow: hidden;
|
|
131
132
|
}
|
|
132
133
|
#nodes .node.busy {
|
|
133
134
|
background-image: url(../images/server_busy.png);
|
data/public/js/admin_console.js
CHANGED
|
@@ -107,10 +107,11 @@ window.Console = {
|
|
|
107
107
|
$('.has_nodes', header).html(nc + " Node" + (nc != 1 ? 's' : '') + " / " + wc + " Worker" + (wc != 1 ? 's' : ''));
|
|
108
108
|
header.toggleClass('no_nodes', this._nodes.length <= 0);
|
|
109
109
|
$('#nodes').html($.map(this._nodes, function(node) {
|
|
110
|
-
var html
|
|
110
|
+
var html = "";
|
|
111
111
|
var extra = node.status == 'busy' ? ' <span class="busy">[busy]</span>' : '';
|
|
112
|
-
|
|
113
|
-
html
|
|
112
|
+
var tag = node.tag ? '[' + node.tag + '] ' : '';
|
|
113
|
+
html += '<div class="node ' + node.status + '">' + tag + node.host + extra + '</div>';
|
|
114
|
+
html += $.map(node.workers, function(pid) {
|
|
114
115
|
var name = pid + '@' + node.host;
|
|
115
116
|
return '<div class="worker" rel="' + name + '">' + name + '</div>';
|
|
116
117
|
}).join('');
|
data/test/config/config.yml
CHANGED
data/test/unit/test_action.rb
CHANGED
|
@@ -42,8 +42,15 @@ class ActionTest < Test::Unit::TestCase
|
|
|
42
42
|
assert name == 'file.pdf'
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
+
should "not change the original URL when generating a safe filename" do
|
|
46
|
+
url = "http://example.com/file.format?parameter=value"
|
|
47
|
+
path = @action.safe_filename url
|
|
48
|
+
assert url == "http://example.com/file.format?parameter=value"
|
|
49
|
+
assert path == "file.format"
|
|
50
|
+
end
|
|
51
|
+
|
|
45
52
|
should "be able to count the number of words in this file" do
|
|
46
|
-
assert @action.process ==
|
|
53
|
+
assert @action.process == 274
|
|
47
54
|
end
|
|
48
55
|
|
|
49
56
|
should "raise an exception when backticks fail" do
|
|
@@ -51,6 +58,13 @@ class ActionTest < Test::Unit::TestCase
|
|
|
51
58
|
assert_raise(CloudCrowd::Error::CommandFailed) { @action.process }
|
|
52
59
|
end
|
|
53
60
|
|
|
61
|
+
should "be able to download a remote file" do
|
|
62
|
+
path = "temp.txt"
|
|
63
|
+
@action.download('http://example.com', path)
|
|
64
|
+
assert File.read(path).match(/These domain names are reserved for use in documentation/)
|
|
65
|
+
FileUtils.rm path
|
|
66
|
+
end
|
|
67
|
+
|
|
54
68
|
end
|
|
55
69
|
|
|
56
70
|
|
data/test/unit/test_node.rb
CHANGED
|
@@ -1,31 +1,32 @@
|
|
|
1
1
|
require 'test_helper'
|
|
2
2
|
|
|
3
3
|
class NodeUnitTest < Test::Unit::TestCase
|
|
4
|
-
|
|
4
|
+
|
|
5
5
|
context "A Node" do
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
setup do
|
|
8
|
-
@node = Node.new(11011).instance_variable_get(:@app)
|
|
8
|
+
@node = Node.new(:port => 11011, :tag => "nodule").instance_variable_get(:@app)
|
|
9
9
|
end
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
should "set the identity of the Ruby instance" do
|
|
12
12
|
assert CloudCrowd.node?
|
|
13
13
|
end
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
should "instantiate correctly" do
|
|
16
16
|
assert @node.central.to_s == "http://localhost:9173"
|
|
17
17
|
assert @node.port == 11011
|
|
18
18
|
assert @node.host == Socket.gethostname
|
|
19
19
|
assert @node.enabled_actions.length > 2
|
|
20
20
|
assert @node.asset_store.is_a? AssetStore::FilesystemStore
|
|
21
|
+
assert @node.tag == "nodule"
|
|
21
22
|
end
|
|
22
|
-
|
|
23
|
+
|
|
23
24
|
should "trap signals and launch a server at start" do
|
|
24
25
|
Thin::Server.any_instance.expects(:start)
|
|
25
26
|
@node.expects(:check_in)
|
|
26
27
|
@node.start
|
|
27
28
|
end
|
|
28
|
-
|
|
29
|
+
|
|
29
30
|
should "be able to determine if the node is overloaded" do
|
|
30
31
|
assert !@node.overloaded?
|
|
31
32
|
@node.instance_variable_set :@max_load, 0.01
|
|
@@ -35,7 +36,7 @@ class NodeUnitTest < Test::Unit::TestCase
|
|
|
35
36
|
@node.instance_variable_set :@min_memory, 8000
|
|
36
37
|
assert @node.overloaded?
|
|
37
38
|
end
|
|
38
|
-
|
|
39
|
+
|
|
39
40
|
end
|
|
40
|
-
|
|
41
|
+
|
|
41
42
|
end
|
metadata
CHANGED
|
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
|
4
4
|
prerelease: false
|
|
5
5
|
segments:
|
|
6
6
|
- 0
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
version: 0.
|
|
7
|
+
- 5
|
|
8
|
+
- 0
|
|
9
|
+
version: 0.5.0
|
|
10
10
|
platform: ruby
|
|
11
11
|
authors:
|
|
12
12
|
- Jeremy Ashkenas
|
|
@@ -14,7 +14,7 @@ autorequire:
|
|
|
14
14
|
bindir: bin
|
|
15
15
|
cert_chain: []
|
|
16
16
|
|
|
17
|
-
date: 2010-
|
|
17
|
+
date: 2010-06-22 00:00:00 -04:00
|
|
18
18
|
default_executable:
|
|
19
19
|
dependencies:
|
|
20
20
|
- !ruby/object:Gem::Dependency
|