mooktakim-cloud-crowd 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/EPIGRAPHS +17 -0
- data/LICENSE +22 -0
- data/README +93 -0
- data/actions/graphics_magick.rb +43 -0
- data/actions/process_pdfs.rb +92 -0
- data/actions/word_count.rb +16 -0
- data/bin/crowd +5 -0
- data/config/config.example.ru +23 -0
- data/config/config.example.yml +55 -0
- data/config/database.example.yml +16 -0
- data/examples/graphics_magick_example.rb +44 -0
- data/examples/process_pdfs_example.rb +40 -0
- data/examples/word_count_example.rb +42 -0
- data/lib/cloud-crowd.rb +188 -0
- data/lib/cloud_crowd/action.rb +125 -0
- data/lib/cloud_crowd/asset_store/filesystem_store.rb +39 -0
- data/lib/cloud_crowd/asset_store/s3_store.rb +43 -0
- data/lib/cloud_crowd/asset_store.rb +41 -0
- data/lib/cloud_crowd/command_line.rb +242 -0
- data/lib/cloud_crowd/exceptions.rb +46 -0
- data/lib/cloud_crowd/helpers/authorization.rb +52 -0
- data/lib/cloud_crowd/helpers/resources.rb +25 -0
- data/lib/cloud_crowd/helpers.rb +8 -0
- data/lib/cloud_crowd/inflector.rb +19 -0
- data/lib/cloud_crowd/models/job.rb +190 -0
- data/lib/cloud_crowd/models/node_record.rb +107 -0
- data/lib/cloud_crowd/models/work_unit.rb +170 -0
- data/lib/cloud_crowd/models.rb +40 -0
- data/lib/cloud_crowd/node.rb +199 -0
- data/lib/cloud_crowd/schema.rb +50 -0
- data/lib/cloud_crowd/server.rb +123 -0
- data/lib/cloud_crowd/worker.rb +149 -0
- data/mooktakim-cloud-crowd.gemspec +116 -0
- data/public/css/admin_console.css +243 -0
- data/public/css/reset.css +42 -0
- data/public/images/bullet_green.png +0 -0
- data/public/images/bullet_white.png +0 -0
- data/public/images/cloud_hand.png +0 -0
- data/public/images/header_back.png +0 -0
- data/public/images/logo.png +0 -0
- data/public/images/queue_fill.png +0 -0
- data/public/images/server.png +0 -0
- data/public/images/server_busy.png +0 -0
- data/public/images/server_error.png +0 -0
- data/public/images/sidebar_bottom.png +0 -0
- data/public/images/sidebar_top.png +0 -0
- data/public/images/worker_info.png +0 -0
- data/public/images/worker_info_loading.gif +0 -0
- data/public/js/admin_console.js +197 -0
- data/public/js/excanvas.js +1 -0
- data/public/js/flot.js +1 -0
- data/public/js/jquery.js +19 -0
- data/test/acceptance/test_failing_work_units.rb +33 -0
- data/test/acceptance/test_node.rb +20 -0
- data/test/acceptance/test_server.rb +66 -0
- data/test/acceptance/test_word_count.rb +40 -0
- data/test/blueprints.rb +25 -0
- data/test/config/actions/failure_testing.rb +13 -0
- data/test/config/config.ru +17 -0
- data/test/config/config.yml +6 -0
- data/test/config/database.yml +3 -0
- data/test/test_helper.rb +19 -0
- data/test/unit/test_action.rb +70 -0
- data/test/unit/test_configuration.rb +48 -0
- data/test/unit/test_job.rb +103 -0
- data/test/unit/test_node.rb +41 -0
- data/test/unit/test_node_record.rb +42 -0
- data/test/unit/test_work_unit.rb +53 -0
- data/test/unit/test_worker.rb +48 -0
- data/views/operations_center.erb +82 -0
- metadata +290 -0
data/lib/cloud-crowd.rb
ADDED
@@ -0,0 +1,188 @@
|
|
1
|
+
# The Grand Central of code loading...
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
|
4
|
+
|
5
|
+
# Common Gems:
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'activerecord', '2.3.5'
|
8
|
+
gem 'json'
|
9
|
+
gem 'rest-client'
|
10
|
+
gem 'right_aws'
|
11
|
+
gem 'sinatra'
|
12
|
+
gem 'thin'
|
13
|
+
|
14
|
+
# Autoloading for all the pieces which may or may not be needed:
|
15
|
+
autoload :ActiveRecord, 'active_record'
|
16
|
+
autoload :Benchmark, 'benchmark'
|
17
|
+
autoload :Digest, 'digest'
|
18
|
+
autoload :ERB, 'erb'
|
19
|
+
autoload :FileUtils, 'fileutils'
|
20
|
+
autoload :JSON, 'json'
|
21
|
+
autoload :RestClient, 'rest_client'
|
22
|
+
autoload :RightAws, 'right_aws'
|
23
|
+
autoload :Sinatra, 'sinatra'
|
24
|
+
autoload :Thin, 'thin'
|
25
|
+
autoload :YAML, 'yaml'
|
26
|
+
|
27
|
+
# Common code which should really be required in every circumstance.
|
28
|
+
require 'socket'
|
29
|
+
require 'cloud_crowd/exceptions'
|
30
|
+
|
31
|
+
module CloudCrowd
|
32
|
+
|
33
|
+
# Autoload all the CloudCrowd internals.
|
34
|
+
autoload :Action, 'cloud_crowd/action'
|
35
|
+
autoload :AssetStore, 'cloud_crowd/asset_store'
|
36
|
+
autoload :CommandLine, 'cloud_crowd/command_line'
|
37
|
+
autoload :Helpers, 'cloud_crowd/helpers'
|
38
|
+
autoload :Inflector, 'cloud_crowd/inflector'
|
39
|
+
autoload :Job, 'cloud_crowd/models'
|
40
|
+
autoload :Node, 'cloud_crowd/node'
|
41
|
+
autoload :NodeRecord, 'cloud_crowd/models'
|
42
|
+
autoload :Server, 'cloud_crowd/server'
|
43
|
+
autoload :Worker, 'cloud_crowd/worker'
|
44
|
+
autoload :WorkUnit, 'cloud_crowd/models'
|
45
|
+
|
46
|
+
# Keep this version in sync with the gemspec.
|
47
|
+
VERSION = '0.3.3'
|
48
|
+
|
49
|
+
# Increment the schema version when there's a backwards incompatible change.
|
50
|
+
SCHEMA_VERSION = 3
|
51
|
+
|
52
|
+
# Root directory of the CloudCrowd gem.
|
53
|
+
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
54
|
+
|
55
|
+
# Default folder to log daemonized servers and nodes into.
|
56
|
+
LOG_PATH = 'log'
|
57
|
+
|
58
|
+
# Default folder to contain the pids of daemonized servers and nodes.
|
59
|
+
PID_PATH = 'tmp/pids'
|
60
|
+
|
61
|
+
# A Job is processing if its WorkUnits are in the queue to be handled by nodes.
|
62
|
+
PROCESSING = 1
|
63
|
+
|
64
|
+
# A Job has succeeded if all of its WorkUnits have finished successfully.
|
65
|
+
SUCCEEDED = 2
|
66
|
+
|
67
|
+
# A Job has failed if even a single one of its WorkUnits has failed (they may
|
68
|
+
# be attempted multiple times on failure, however).
|
69
|
+
FAILED = 3
|
70
|
+
|
71
|
+
# A Job is splitting if it's in the process of dividing its inputs up into
|
72
|
+
# multiple WorkUnits.
|
73
|
+
SPLITTING = 4
|
74
|
+
|
75
|
+
# A Job is merging if it's busy collecting all of its successful WorkUnits
|
76
|
+
# back together into the final result.
|
77
|
+
MERGING = 5
|
78
|
+
|
79
|
+
# A Job is considered to be complete if it succeeded or if it failed.
|
80
|
+
COMPLETE = [SUCCEEDED, FAILED]
|
81
|
+
|
82
|
+
# A Job is considered incomplete if it's being processed, split up or merged.
|
83
|
+
INCOMPLETE = [PROCESSING, SPLITTING, MERGING]
|
84
|
+
|
85
|
+
# Mapping of statuses to their display strings.
|
86
|
+
DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
|
87
|
+
|
88
|
+
class << self
|
89
|
+
attr_reader :config
|
90
|
+
attr_accessor :identity
|
91
|
+
|
92
|
+
# Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
|
93
|
+
def configure(config_path)
|
94
|
+
@config_path = File.expand_path(File.dirname(config_path))
|
95
|
+
@config = YAML.load_file(config_path)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Configure the CloudCrowd central database (and connect to it), by passing
|
99
|
+
# in a path to <tt>database.yml</tt>. The file should use the standard
|
100
|
+
# ActiveRecord connection format.
|
101
|
+
def configure_database(config_path, validate_schema=true)
|
102
|
+
configuration = YAML.load_file(config_path)
|
103
|
+
ActiveRecord::Base.establish_connection(configuration)
|
104
|
+
if validate_schema
|
105
|
+
version = ActiveRecord::Base.connection.select_values('select max(version) from schema_migrations').first.to_i
|
106
|
+
return true if version == SCHEMA_VERSION
|
107
|
+
puts "Your database schema is out of date. Please use `crowd load_schema` to update it. This will wipe all the tables, so make sure that your jobs have a chance to finish first.\nexiting..."
|
108
|
+
exit
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Get a reference to the central server, including authentication if
|
113
|
+
# configured.
|
114
|
+
def central_server
|
115
|
+
@central_server ||= RestClient::Resource.new(CloudCrowd.config[:central_server], CloudCrowd.client_options)
|
116
|
+
end
|
117
|
+
|
118
|
+
# The path that daemonized servers and nodes will log to.
|
119
|
+
def log_path(log_file=nil)
|
120
|
+
@log_path ||= config[:log_path] || LOG_PATH
|
121
|
+
log_file ? File.join(@log_path, log_file) : @log_path
|
122
|
+
end
|
123
|
+
|
124
|
+
# The path in which daemonized servers and nodes will store their pids.
|
125
|
+
def pid_path(pid_file=nil)
|
126
|
+
@pid_path ||= config[:pid_path] || PID_PATH
|
127
|
+
pid_file ? File.join(@pid_path, pid_file) : @pid_path
|
128
|
+
end
|
129
|
+
|
130
|
+
# The standard RestClient options for the central server talking to nodes,
|
131
|
+
# as well as the other way around. There's a timeout of 5 seconds to open
|
132
|
+
# a connection, and a timeout of 30 to finish reading it.
|
133
|
+
def client_options
|
134
|
+
return @client_options if @client_options
|
135
|
+
@client_options = {:timeout => 30, :open_timeout => 5}
|
136
|
+
if CloudCrowd.config[:http_authentication]
|
137
|
+
@client_options[:user] = CloudCrowd.config[:login]
|
138
|
+
@client_options[:password] = CloudCrowd.config[:password]
|
139
|
+
end
|
140
|
+
@client_options
|
141
|
+
end
|
142
|
+
|
143
|
+
# Return the displayable status name of an internal CloudCrowd status number.
|
144
|
+
# (See the above constants).
|
145
|
+
def display_status(status)
|
146
|
+
DISPLAY_STATUS_MAP[status] || 'unknown'
|
147
|
+
end
|
148
|
+
|
149
|
+
# CloudCrowd::Actions are requested dynamically by name. Access them through
|
150
|
+
# this actions property, which behaves like a hash. At load time, we
|
151
|
+
# load all installed Actions and CloudCrowd's default Actions into it.
|
152
|
+
# If you wish to have certain nodes be specialized to only handle certain
|
153
|
+
# Actions, then install only those into the actions directory.
|
154
|
+
def actions
|
155
|
+
return @actions if @actions
|
156
|
+
@actions = action_paths.inject({}) do |memo, path|
|
157
|
+
name = File.basename(path, File.extname(path))
|
158
|
+
require path
|
159
|
+
memo[name] = Module.const_get(Inflector.camelize(name))
|
160
|
+
memo
|
161
|
+
end
|
162
|
+
rescue NameError => e
|
163
|
+
adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
|
164
|
+
raise NameError.new(adjusted_message, e.name)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Retrieve the list of every installed Action for this node or server.
|
168
|
+
def action_paths
|
169
|
+
default_actions = Dir["#{ROOT}/actions/*.rb"]
|
170
|
+
installed_actions = Dir["#{@config_path}/actions/*.rb"]
|
171
|
+
custom_actions = CloudCrowd.config[:actions_path] ? Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] : []
|
172
|
+
default_actions + installed_actions + custom_actions
|
173
|
+
end
|
174
|
+
|
175
|
+
# Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
|
176
|
+
# code from actions.
|
177
|
+
def server?
|
178
|
+
@identity == :server
|
179
|
+
end
|
180
|
+
|
181
|
+
# Or is it a node?
|
182
|
+
def node?
|
183
|
+
@identity == :node
|
184
|
+
end
|
185
|
+
|
186
|
+
end
|
187
|
+
|
188
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
|
3
|
+
# As you write your custom actions, have them inherit from CloudCrowd::Action.
|
4
|
+
# All actions must implement a +process+ method, which should return a
|
5
|
+
# JSON-serializable object that will be used as the output for the work unit.
|
6
|
+
# See the default actions for examples.
|
7
|
+
#
|
8
|
+
# Optionally, actions may define +split+ and +merge+ methods to do mapping
|
9
|
+
# and reducing around the +input+. +split+ should return an array of URLs --
|
10
|
+
# to be mapped into WorkUnits and processed in parallel. In the +merge+ step,
|
11
|
+
# +input+ will be an array of all the resulting outputs from calling process.
|
12
|
+
#
|
13
|
+
# All actions have use of an individual +work_directory+, for scratch files,
|
14
|
+
# and spend their duration inside of it, so relative paths work well.
|
15
|
+
#
|
16
|
+
# Note that Actions inherit a backticks (`) method that raises an Exception
|
17
|
+
# if the external command fails.
|
18
|
+
class Action
|
19
|
+
|
20
|
+
FILE_URL = /\Afile:\/\//
|
21
|
+
|
22
|
+
attr_reader :input, :input_path, :file_name, :options, :work_directory
|
23
|
+
|
24
|
+
# Initializing an Action sets up all of the read-only variables that
|
25
|
+
# form the bulk of the API for action subclasses. (Paths to read from and
|
26
|
+
# write to). It creates the +work_directory+ and moves into it.
|
27
|
+
# If we're not merging multiple results, it downloads the input file into
|
28
|
+
# the +work_directory+ before starting.
|
29
|
+
def initialize(status, input, options, store)
|
30
|
+
@input, @options, @store = input, options, store
|
31
|
+
@job_id, @work_unit_id = options['job_id'], options['work_unit_id']
|
32
|
+
@work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
|
33
|
+
FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
|
34
|
+
parse_input
|
35
|
+
download_input
|
36
|
+
end
|
37
|
+
|
38
|
+
# Each Action subclass must implement a +process+ method, overriding this.
|
39
|
+
def process
|
40
|
+
raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code."
|
41
|
+
end
|
42
|
+
|
43
|
+
# Download a file to the specified path.
|
44
|
+
def download(url, path)
|
45
|
+
`curl -s "#{url}" > "#{path}"`
|
46
|
+
return path
|
47
|
+
# The previous implementation is below, and, although it would be
|
48
|
+
# wonderful not to shell out, RestClient wasn't handling URLs with encoded
|
49
|
+
# entities (%20, for example), and doesn't let you download to a given
|
50
|
+
# location. Getting a RestClient patch in would be ideal.
|
51
|
+
#
|
52
|
+
# if url.match(FILE_URL)
|
53
|
+
# FileUtils.cp(url.sub(FILE_URL, ''), path)
|
54
|
+
# else
|
55
|
+
# resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
|
56
|
+
# FileUtils.mv resp.file.path, path
|
57
|
+
# end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Takes a local filesystem path, saves the file to S3, and returns the
|
61
|
+
# public (or authenticated) url on S3 where the file can be accessed.
|
62
|
+
def save(file_path)
|
63
|
+
save_path = File.join(storage_prefix, File.basename(file_path))
|
64
|
+
@store.save(file_path, save_path)
|
65
|
+
end
|
66
|
+
|
67
|
+
# After the Action has finished, we remove the work directory and return
|
68
|
+
# to the root directory (where workers run by default).
|
69
|
+
def cleanup_work_directory
|
70
|
+
FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Actions have a backticks command that raises a CommandFailed exception
|
74
|
+
# on failure, so that processing doesn't just blithely continue.
|
75
|
+
def `(command)
|
76
|
+
result = super(command)
|
77
|
+
exit_code = $?.to_i
|
78
|
+
raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0
|
79
|
+
result
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
# Convert an unsafe URL into a filesystem-friendly filename.
|
86
|
+
def safe_filename(url)
|
87
|
+
ext = File.extname(url)
|
88
|
+
name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-')
|
89
|
+
File.basename(name, ext).gsub('.', '-') + ext
|
90
|
+
end
|
91
|
+
|
92
|
+
# The directory prefix to use for both local and S3 storage.
|
93
|
+
# [action]/job_[job_id]/unit_[work_unit_it]
|
94
|
+
def storage_prefix
|
95
|
+
path_parts = []
|
96
|
+
path_parts << Inflector.underscore(self.class)
|
97
|
+
path_parts << "job_#{@job_id}"
|
98
|
+
path_parts << "unit_#{@work_unit_id}" if @work_unit_id
|
99
|
+
@storage_prefix ||= File.join(path_parts)
|
100
|
+
end
|
101
|
+
|
102
|
+
# If we think that the input is JSON, replace it with the parsed form.
|
103
|
+
# It would be great if the JSON module had an is_json? method.
|
104
|
+
def parse_input
|
105
|
+
return unless ['[', '{'].include? @input[0..0]
|
106
|
+
@input = JSON.parse(@input) rescue @input
|
107
|
+
end
|
108
|
+
|
109
|
+
def input_is_url?
|
110
|
+
!URI.parse(@input).scheme.nil? rescue false
|
111
|
+
end
|
112
|
+
|
113
|
+
# If the input is a URL, download the file before beginning processing.
|
114
|
+
def download_input
|
115
|
+
return unless input_is_url?
|
116
|
+
Dir.chdir(@work_directory) do
|
117
|
+
@input_path = File.join(@work_directory, safe_filename(@input))
|
118
|
+
@file_name = File.basename(@input_path, File.extname(@input_path))
|
119
|
+
download(@input, @input_path)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
class AssetStore
|
3
|
+
|
4
|
+
# The FilesystemStore is an implementation of the AssetStore, good only for
|
5
|
+
# use in development, testing, if you're only running a single-machine
|
6
|
+
# installation, or are using a networked drive.
|
7
|
+
module FilesystemStore
|
8
|
+
|
9
|
+
DEFAULT_STORAGE_PATH = '/tmp/cloud_crowd_storage'
|
10
|
+
|
11
|
+
attr_reader :local_storage_path
|
12
|
+
|
13
|
+
# Make sure that local storage exists and is writeable before starting.
|
14
|
+
def setup
|
15
|
+
lsp = @local_storage_path = CloudCrowd.config[:local_storage_path] || DEFAULT_STORAGE_PATH
|
16
|
+
FileUtils.mkdir_p(lsp) unless File.exists?(lsp)
|
17
|
+
raise Error::StorageNotWritable, "#{lsp} is not writable" unless File.writable?(lsp)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Save a file to somewhere semi-persistent on the filesystem. To use,
|
21
|
+
# configure <tt>:storage: 'filesystem'</tt> in *config.yml*, as well as
|
22
|
+
# <tt>:local_storage_path:</tt>.
|
23
|
+
def save(local_path, save_path)
|
24
|
+
save_path = File.join(@local_storage_path, save_path)
|
25
|
+
save_dir = File.dirname(save_path)
|
26
|
+
FileUtils.mkdir_p save_dir unless File.exists? save_dir
|
27
|
+
FileUtils.cp(local_path, save_path)
|
28
|
+
"file://#{File.expand_path(save_path)}"
|
29
|
+
end
|
30
|
+
|
31
|
+
# Remove all of a Job's result files from the filesystem.
|
32
|
+
def cleanup(job)
|
33
|
+
path = "#{@local_storage_path}/#{job.action}/job_#{job.id}"
|
34
|
+
FileUtils.rm_r(path) if File.exists?(path)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module CloudCrowd
|
2
|
+
class AssetStore
|
3
|
+
|
4
|
+
# The S3Store is an implementation of an AssetStore that uses a bucket
|
5
|
+
# on S3 for all resulting files.
|
6
|
+
module S3Store
|
7
|
+
|
8
|
+
# Configure authentication and establish a connection to S3, first thing.
|
9
|
+
def setup
|
10
|
+
@use_auth = CloudCrowd.config[:s3_authentication]
|
11
|
+
bucket_name = CloudCrowd.config[:s3_bucket]
|
12
|
+
key, secret = CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key]
|
13
|
+
valid_conf = [bucket_name, key, secret].all? {|s| s.is_a? String }
|
14
|
+
raise Error::MissingConfiguration, "An S3 account must be configured in 'config.yml' before 's3' storage can be used" unless valid_conf
|
15
|
+
protocol = @use_auth ? 'https' : 'http'
|
16
|
+
port = @use_auth ? 443 : 80
|
17
|
+
@s3 = RightAws::S3.new(key, secret, :protocol => protocol, :port => port)
|
18
|
+
@bucket = @s3.bucket(bucket_name)
|
19
|
+
@bucket = @s3.bucket(bucket_name, true) unless @bucket
|
20
|
+
end
|
21
|
+
|
22
|
+
# Save a finished file from local storage to S3. Save it publicly unless
|
23
|
+
# we're configured to use S3 authentication. Authenticated links expire
|
24
|
+
# after one day by default.
|
25
|
+
def save(local_path, save_path)
|
26
|
+
if @use_auth
|
27
|
+
@bucket.put(save_path, File.open(local_path), {}, 'private')
|
28
|
+
@s3.interface.get_link(@bucket, save_path)
|
29
|
+
else
|
30
|
+
@bucket.put(save_path, File.open(local_path), {}, 'public-read')
|
31
|
+
@bucket.key(save_path).public_link
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Remove all of a Job's resulting files from S3, both intermediate and finished.
|
36
|
+
def cleanup(job)
|
37
|
+
@bucket.delete_folder("#{job.action}/job_#{job.id}")
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'tmpdir'
|
2
|
+
|
3
|
+
module CloudCrowd
|
4
|
+
|
5
|
+
# The AssetStore provides a common API for storing files and returning URLs
|
6
|
+
# that can access them. At the moment, the files can be saved to either S3, or
|
7
|
+
# the local filesystem. You shouldn't need to use the AssetStore directly --
|
8
|
+
# Action's +download+ and +save+ methods use it behind the scenes.
|
9
|
+
#
|
10
|
+
# To implement a new back-end for the AssetStore, you must provide
|
11
|
+
# <tt>save(local_path, save_path)</tt>, <tt>cleanup(job)</tt>, and optionally,
|
12
|
+
# a <tt>setup</tt> method that will be called once at initialization.
|
13
|
+
class AssetStore
|
14
|
+
|
15
|
+
autoload :S3Store, 'cloud_crowd/asset_store/s3_store'
|
16
|
+
autoload :FilesystemStore, 'cloud_crowd/asset_store/filesystem_store'
|
17
|
+
|
18
|
+
# Configure the AssetStore with the specific storage implementation
|
19
|
+
# specified by 'storage' in <tt>config.yml</tt>.
|
20
|
+
case CloudCrowd.config[:storage]
|
21
|
+
when 's3' then include S3Store
|
22
|
+
when 'filesystem' then include FilesystemStore
|
23
|
+
else raise Error::StorageNotFound, "#{CloudCrowd.config[:storage]} is not a valid storage back end"
|
24
|
+
end
|
25
|
+
|
26
|
+
# Creating the AssetStore ensures that its scratch directory exists.
|
27
|
+
def initialize
|
28
|
+
FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
|
29
|
+
raise Error::StorageNotWritable, "#{temp_storage_path} is not writable" unless File.writable?(temp_storage_path)
|
30
|
+
setup if respond_to? :setup
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get the path to CloudCrowd's temporary local storage. All actions run
|
34
|
+
# in subdirectories of this.
|
35
|
+
def temp_storage_path
|
36
|
+
@temp_storage_path ||= CloudCrowd.config[:temp_storage_path] || "#{Dir.tmpdir}/cloud_crowd_tmp"
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module CloudCrowd
|
4
|
+
class CommandLine
|
5
|
+
|
6
|
+
# Configuration files required for the `crowd` command to function.
|
7
|
+
CONFIG_FILES = ['config.yml', 'database.yml']
|
8
|
+
|
9
|
+
# Reference the absolute path to the root.
|
10
|
+
CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
|
11
|
+
|
12
|
+
# Command-line banner for the usage message.
|
13
|
+
BANNER = <<-EOS
|
14
|
+
CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
|
15
|
+
|
16
|
+
Wiki: http://wiki.github.com/documentcloud/cloud-crowd
|
17
|
+
Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
|
18
|
+
|
19
|
+
Usage: crowd COMMAND OPTIONS
|
20
|
+
|
21
|
+
Commands:
|
22
|
+
install Install the CloudCrowd configuration files to the specified directory
|
23
|
+
server Start up the central server (requires a database)
|
24
|
+
node Start up a worker node (only one node per machine, please)
|
25
|
+
console Launch a CloudCrowd console, connected to the central database
|
26
|
+
load_schema Load the schema into the database specified by database.yml
|
27
|
+
cleanup Removes jobs that were finished over --days (7 by default) ago
|
28
|
+
|
29
|
+
server -d [start | stop | restart] Servers and nodes can be launched as
|
30
|
+
node -d [start | stop | restart] daemons, then stopped or restarted.
|
31
|
+
|
32
|
+
Options:
|
33
|
+
EOS
|
34
|
+
|
35
|
+
# Creating a CloudCrowd::CommandLine runs from the contents of ARGV.
|
36
|
+
def initialize
|
37
|
+
parse_options
|
38
|
+
command = ARGV.shift
|
39
|
+
subcommand = ARGV.shift
|
40
|
+
case command
|
41
|
+
when 'console' then run_console
|
42
|
+
when 'server' then run_server(subcommand)
|
43
|
+
when 'node' then run_node(subcommand)
|
44
|
+
when 'load_schema' then run_load_schema
|
45
|
+
when 'install' then run_install(subcommand)
|
46
|
+
when 'cleanup' then run_cleanup
|
47
|
+
else usage
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Spin up an IRB session with the CloudCrowd code loaded in, and a database
|
52
|
+
# connection established. The equivalent of Rails' `script/console`.
|
53
|
+
def run_console
|
54
|
+
require 'irb'
|
55
|
+
require 'irb/completion'
|
56
|
+
require 'pp'
|
57
|
+
load_code
|
58
|
+
connect_to_database true
|
59
|
+
CloudCrowd::Server # Preload server to autoload classes.
|
60
|
+
Object.send(:include, CloudCrowd)
|
61
|
+
IRB.start
|
62
|
+
end
|
63
|
+
|
64
|
+
# `crowd server` can either 'start', 'stop', or 'restart'.
|
65
|
+
def run_server(subcommand)
|
66
|
+
load_code
|
67
|
+
subcommand ||= 'start'
|
68
|
+
case subcommand
|
69
|
+
when 'start' then start_server
|
70
|
+
when 'stop' then stop_server
|
71
|
+
when 'restart' then restart_server
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Convenience command for quickly spinning up the central server. More
|
76
|
+
# sophisticated deployments, load-balancing across multiple app servers,
|
77
|
+
# should use the config.ru rackup file directly. This method will start
|
78
|
+
# a single Thin server.
|
79
|
+
def start_server
|
80
|
+
port = @options[:port] || 9173
|
81
|
+
daemonize = @options[:daemonize] ? '-d' : ''
|
82
|
+
log_path = CloudCrowd.log_path('server.log')
|
83
|
+
pid_path = CloudCrowd.pid_path('server.pid')
|
84
|
+
rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
|
85
|
+
FileUtils.mkdir_p(CloudCrowd.log_path) if @options[:daemonize] && !File.exists?(CloudCrowd.log_path)
|
86
|
+
puts "Starting CloudCrowd Central Server on port #{port}..."
|
87
|
+
exec "thin -e #{@options[:environment]} -p #{port} #{daemonize} --tag cloud-crowd-server --log #{log_path} --pid #{pid_path} -R #{rackup_path} start"
|
88
|
+
end
|
89
|
+
|
90
|
+
# Stop the daemonized central server, if it exists.
|
91
|
+
def stop_server
|
92
|
+
Thin::Server.kill(CloudCrowd.pid_path('server.pid'), 0)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Restart the daemonized central server.
|
96
|
+
def restart_server
|
97
|
+
stop_server
|
98
|
+
sleep 1
|
99
|
+
start_server
|
100
|
+
end
|
101
|
+
|
102
|
+
# `crowd node` can either 'start', 'stop', or 'restart'.
|
103
|
+
def run_node(subcommand)
|
104
|
+
load_code
|
105
|
+
ENV['RACK_ENV'] = @options[:environment]
|
106
|
+
case (subcommand || 'start')
|
107
|
+
when 'start' then start_node
|
108
|
+
when 'stop' then stop_node
|
109
|
+
when 'restart' then restart_node
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Launch a Node. Please only run a single node per machine. The Node process
|
114
|
+
# will be long-lived, although its workers will come and go.
|
115
|
+
def start_node
|
116
|
+
port = @options[:port] || Node::DEFAULT_PORT
|
117
|
+
puts "Starting CloudCrowd Node on port #{port}..."
|
118
|
+
Node.new(port, @options[:daemonize])
|
119
|
+
end
|
120
|
+
|
121
|
+
# If the daemonized Node is running, stop it.
|
122
|
+
def stop_node
|
123
|
+
Thin::Server.kill CloudCrowd.pid_path('node.pid')
|
124
|
+
end
|
125
|
+
|
126
|
+
# Restart the daemonized Node, if it exists.
|
127
|
+
def restart_node
|
128
|
+
stop_node
|
129
|
+
sleep 1
|
130
|
+
start_node
|
131
|
+
end
|
132
|
+
|
133
|
+
# Load in the database schema to the database specified in 'database.yml'.
|
134
|
+
def run_load_schema
|
135
|
+
load_code
|
136
|
+
connect_to_database(false)
|
137
|
+
require 'cloud_crowd/schema.rb'
|
138
|
+
end
|
139
|
+
|
140
|
+
# Install the required CloudCrowd configuration files into the specified
|
141
|
+
# directory, or the current one.
|
142
|
+
def run_install(install_path)
|
143
|
+
require 'fileutils'
|
144
|
+
install_path ||= '.'
|
145
|
+
FileUtils.mkdir_p install_path unless File.exists?(install_path)
|
146
|
+
install_file "#{CC_ROOT}/config/config.example.yml", "#{install_path}/config.yml"
|
147
|
+
install_file "#{CC_ROOT}/config/config.example.ru", "#{install_path}/config.ru"
|
148
|
+
install_file "#{CC_ROOT}/config/database.example.yml", "#{install_path}/database.yml"
|
149
|
+
install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
|
150
|
+
end
|
151
|
+
|
152
|
+
# Clean up all Jobs in the CloudCrowd database older than --days old.
|
153
|
+
def run_cleanup
|
154
|
+
load_code
|
155
|
+
connect_to_database(true)
|
156
|
+
Job.cleanup_all(:days => @options[:days])
|
157
|
+
end
|
158
|
+
|
159
|
+
# Print `crowd` usage.
|
160
|
+
def usage
|
161
|
+
puts "\n#{@option_parser}\n"
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
private
|
166
|
+
|
167
|
+
# Check for configuration files, either in the current directory, or in
|
168
|
+
# the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
|
169
|
+
def ensure_config
|
170
|
+
return if @config_found
|
171
|
+
found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
|
172
|
+
found ? @config_dir = true : config_not_found
|
173
|
+
end
|
174
|
+
|
175
|
+
# Parse all options for all commands.
|
176
|
+
# Valid options are: --config --port --environment --daemonize --days.
|
177
|
+
def parse_options
|
178
|
+
@options = {
|
179
|
+
:environment => 'production',
|
180
|
+
:config_path => ENV['CLOUD_CROWD_CONFIG'] || '.',
|
181
|
+
:daemonize => false
|
182
|
+
}
|
183
|
+
@option_parser = OptionParser.new do |opts|
|
184
|
+
opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
|
185
|
+
@options[:config_path] = conf_path
|
186
|
+
end
|
187
|
+
opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
|
188
|
+
@options[:port] = port_num
|
189
|
+
end
|
190
|
+
opts.on('-e', '--environment ENV', 'server environment (defaults to production)') do |env|
|
191
|
+
@options[:environment] = env
|
192
|
+
end
|
193
|
+
opts.on('-d', '--daemonize', 'run as a background daemon') do |daemonize|
|
194
|
+
@options[:daemonize] = daemonize
|
195
|
+
end
|
196
|
+
opts.on('--days NUM_DAYS', 'grace period before cleanup (7 by default)') do |days|
|
197
|
+
@options[:days] = days.to_i if days.match(/\A\d+\Z/)
|
198
|
+
end
|
199
|
+
opts.on_tail('-v', '--version', 'show version') do
|
200
|
+
require "#{CC_ROOT}/lib/cloud-crowd"
|
201
|
+
puts "CloudCrowd version #{VERSION}"
|
202
|
+
exit
|
203
|
+
end
|
204
|
+
end
|
205
|
+
@option_parser.banner = BANNER
|
206
|
+
@option_parser.parse!(ARGV)
|
207
|
+
end
|
208
|
+
|
209
|
+
# Load in the CloudCrowd module code, dependencies, lib files and models.
|
210
|
+
# Not all commands require this.
|
211
|
+
def load_code
|
212
|
+
ensure_config
|
213
|
+
require "#{CC_ROOT}/lib/cloud-crowd"
|
214
|
+
CloudCrowd.configure("#{@options[:config_path]}/config.yml")
|
215
|
+
end
|
216
|
+
|
217
|
+
# Establish a connection to the central server's database. Not all commands
|
218
|
+
# require this.
|
219
|
+
def connect_to_database(validate_schema)
|
220
|
+
require 'cloud_crowd/models'
|
221
|
+
CloudCrowd.configure_database("#{@options[:config_path]}/database.yml", validate_schema)
|
222
|
+
end
|
223
|
+
|
224
|
+
# Exit with an explanation if the configuration files couldn't be found.
|
225
|
+
def config_not_found
|
226
|
+
puts "`crowd` can't find the CloudCrowd configuration directory. Please use `crowd -c path/to/config`, or run `crowd` from inside of the configuration directory itself."
|
227
|
+
exit(1)
|
228
|
+
end
|
229
|
+
|
230
|
+
# Install a file and log the installation. If we're overwriting a file,
|
231
|
+
# offer a chance to back out.
|
232
|
+
def install_file(source, dest, is_dir=false)
|
233
|
+
if File.exists?(dest)
|
234
|
+
print "#{dest} already exists. Overwrite it? (yes/no) "
|
235
|
+
return unless ['y', 'yes', 'ok'].include? gets.chomp.downcase
|
236
|
+
end
|
237
|
+
is_dir ? FileUtils.cp_r(source, dest) : FileUtils.cp(source, dest)
|
238
|
+
puts "installed #{dest}" unless ENV['RACK_ENV'] == 'test'
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
242
|
+
end
|