mooktakim-cloud-crowd 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/EPIGRAPHS +17 -0
  2. data/LICENSE +22 -0
  3. data/README +93 -0
  4. data/actions/graphics_magick.rb +43 -0
  5. data/actions/process_pdfs.rb +92 -0
  6. data/actions/word_count.rb +16 -0
  7. data/bin/crowd +5 -0
  8. data/config/config.example.ru +23 -0
  9. data/config/config.example.yml +55 -0
  10. data/config/database.example.yml +16 -0
  11. data/examples/graphics_magick_example.rb +44 -0
  12. data/examples/process_pdfs_example.rb +40 -0
  13. data/examples/word_count_example.rb +42 -0
  14. data/lib/cloud-crowd.rb +188 -0
  15. data/lib/cloud_crowd/action.rb +125 -0
  16. data/lib/cloud_crowd/asset_store/filesystem_store.rb +39 -0
  17. data/lib/cloud_crowd/asset_store/s3_store.rb +43 -0
  18. data/lib/cloud_crowd/asset_store.rb +41 -0
  19. data/lib/cloud_crowd/command_line.rb +242 -0
  20. data/lib/cloud_crowd/exceptions.rb +46 -0
  21. data/lib/cloud_crowd/helpers/authorization.rb +52 -0
  22. data/lib/cloud_crowd/helpers/resources.rb +25 -0
  23. data/lib/cloud_crowd/helpers.rb +8 -0
  24. data/lib/cloud_crowd/inflector.rb +19 -0
  25. data/lib/cloud_crowd/models/job.rb +190 -0
  26. data/lib/cloud_crowd/models/node_record.rb +107 -0
  27. data/lib/cloud_crowd/models/work_unit.rb +170 -0
  28. data/lib/cloud_crowd/models.rb +40 -0
  29. data/lib/cloud_crowd/node.rb +199 -0
  30. data/lib/cloud_crowd/schema.rb +50 -0
  31. data/lib/cloud_crowd/server.rb +123 -0
  32. data/lib/cloud_crowd/worker.rb +149 -0
  33. data/mooktakim-cloud-crowd.gemspec +116 -0
  34. data/public/css/admin_console.css +243 -0
  35. data/public/css/reset.css +42 -0
  36. data/public/images/bullet_green.png +0 -0
  37. data/public/images/bullet_white.png +0 -0
  38. data/public/images/cloud_hand.png +0 -0
  39. data/public/images/header_back.png +0 -0
  40. data/public/images/logo.png +0 -0
  41. data/public/images/queue_fill.png +0 -0
  42. data/public/images/server.png +0 -0
  43. data/public/images/server_busy.png +0 -0
  44. data/public/images/server_error.png +0 -0
  45. data/public/images/sidebar_bottom.png +0 -0
  46. data/public/images/sidebar_top.png +0 -0
  47. data/public/images/worker_info.png +0 -0
  48. data/public/images/worker_info_loading.gif +0 -0
  49. data/public/js/admin_console.js +197 -0
  50. data/public/js/excanvas.js +1 -0
  51. data/public/js/flot.js +1 -0
  52. data/public/js/jquery.js +19 -0
  53. data/test/acceptance/test_failing_work_units.rb +33 -0
  54. data/test/acceptance/test_node.rb +20 -0
  55. data/test/acceptance/test_server.rb +66 -0
  56. data/test/acceptance/test_word_count.rb +40 -0
  57. data/test/blueprints.rb +25 -0
  58. data/test/config/actions/failure_testing.rb +13 -0
  59. data/test/config/config.ru +17 -0
  60. data/test/config/config.yml +6 -0
  61. data/test/config/database.yml +3 -0
  62. data/test/test_helper.rb +19 -0
  63. data/test/unit/test_action.rb +70 -0
  64. data/test/unit/test_configuration.rb +48 -0
  65. data/test/unit/test_job.rb +103 -0
  66. data/test/unit/test_node.rb +41 -0
  67. data/test/unit/test_node_record.rb +42 -0
  68. data/test/unit/test_work_unit.rb +53 -0
  69. data/test/unit/test_worker.rb +48 -0
  70. data/views/operations_center.erb +82 -0
  71. metadata +290 -0
@@ -0,0 +1,188 @@
1
+ # The Grand Central of code loading...
2
+
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
4
+
5
+ # Common Gems:
6
+ require 'rubygems'
7
+ gem 'activerecord', '2.3.5'
8
+ gem 'json'
9
+ gem 'rest-client'
10
+ gem 'right_aws'
11
+ gem 'sinatra'
12
+ gem 'thin'
13
+
14
+ # Autoloading for all the pieces which may or may not be needed:
15
+ autoload :ActiveRecord, 'active_record'
16
+ autoload :Benchmark, 'benchmark'
17
+ autoload :Digest, 'digest'
18
+ autoload :ERB, 'erb'
19
+ autoload :FileUtils, 'fileutils'
20
+ autoload :JSON, 'json'
21
+ autoload :RestClient, 'rest_client'
22
+ autoload :RightAws, 'right_aws'
23
+ autoload :Sinatra, 'sinatra'
24
+ autoload :Thin, 'thin'
25
+ autoload :YAML, 'yaml'
26
+
27
+ # Common code which should really be required in every circumstance.
28
+ require 'socket'
29
+ require 'cloud_crowd/exceptions'
30
+
31
+ module CloudCrowd
32
+
33
+ # Autoload all the CloudCrowd internals.
34
+ autoload :Action, 'cloud_crowd/action'
35
+ autoload :AssetStore, 'cloud_crowd/asset_store'
36
+ autoload :CommandLine, 'cloud_crowd/command_line'
37
+ autoload :Helpers, 'cloud_crowd/helpers'
38
+ autoload :Inflector, 'cloud_crowd/inflector'
39
+ autoload :Job, 'cloud_crowd/models'
40
+ autoload :Node, 'cloud_crowd/node'
41
+ autoload :NodeRecord, 'cloud_crowd/models'
42
+ autoload :Server, 'cloud_crowd/server'
43
+ autoload :Worker, 'cloud_crowd/worker'
44
+ autoload :WorkUnit, 'cloud_crowd/models'
45
+
46
+ # Keep this version in sync with the gemspec.
47
+ VERSION = '0.3.3'
48
+
49
+ # Increment the schema version when there's a backwards incompatible change.
50
+ SCHEMA_VERSION = 3
51
+
52
+ # Root directory of the CloudCrowd gem.
53
+ ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
54
+
55
+ # Default folder to log daemonized servers and nodes into.
56
+ LOG_PATH = 'log'
57
+
58
+ # Default folder to contain the pids of daemonized servers and nodes.
59
+ PID_PATH = 'tmp/pids'
60
+
61
+ # A Job is processing if its WorkUnits are in the queue to be handled by nodes.
62
+ PROCESSING = 1
63
+
64
+ # A Job has succeeded if all of its WorkUnits have finished successfully.
65
+ SUCCEEDED = 2
66
+
67
+ # A Job has failed if even a single one of its WorkUnits has failed (they may
68
+ # be attempted multiple times on failure, however).
69
+ FAILED = 3
70
+
71
+ # A Job is splitting if it's in the process of dividing its inputs up into
72
+ # multiple WorkUnits.
73
+ SPLITTING = 4
74
+
75
+ # A Job is merging if it's busy collecting all of its successful WorkUnits
76
+ # back together into the final result.
77
+ MERGING = 5
78
+
79
+ # A Job is considered to be complete if it succeeded or if it failed.
80
+ COMPLETE = [SUCCEEDED, FAILED]
81
+
82
+ # A Job is considered incomplete if it's being processed, split up or merged.
83
+ INCOMPLETE = [PROCESSING, SPLITTING, MERGING]
84
+
85
+ # Mapping of statuses to their display strings.
86
+ DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
87
+
88
+ class << self
89
+ attr_reader :config
90
+ attr_accessor :identity
91
+
92
+ # Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
93
+ def configure(config_path)
94
+ @config_path = File.expand_path(File.dirname(config_path))
95
+ @config = YAML.load_file(config_path)
96
+ end
97
+
98
+ # Configure the CloudCrowd central database (and connect to it), by passing
99
+ # in a path to <tt>database.yml</tt>. The file should use the standard
100
+ # ActiveRecord connection format.
101
+ def configure_database(config_path, validate_schema=true)
102
+ configuration = YAML.load_file(config_path)
103
+ ActiveRecord::Base.establish_connection(configuration)
104
+ if validate_schema
105
+ version = ActiveRecord::Base.connection.select_values('select max(version) from schema_migrations').first.to_i
106
+ return true if version == SCHEMA_VERSION
107
+ puts "Your database schema is out of date. Please use `crowd load_schema` to update it. This will wipe all the tables, so make sure that your jobs have a chance to finish first.\nexiting..."
108
+ exit
109
+ end
110
+ end
111
+
112
+ # Get a reference to the central server, including authentication if
113
+ # configured.
114
+ def central_server
115
+ @central_server ||= RestClient::Resource.new(CloudCrowd.config[:central_server], CloudCrowd.client_options)
116
+ end
117
+
118
+ # The path that daemonized servers and nodes will log to.
119
+ def log_path(log_file=nil)
120
+ @log_path ||= config[:log_path] || LOG_PATH
121
+ log_file ? File.join(@log_path, log_file) : @log_path
122
+ end
123
+
124
+ # The path in which daemonized servers and nodes will store their pids.
125
+ def pid_path(pid_file=nil)
126
+ @pid_path ||= config[:pid_path] || PID_PATH
127
+ pid_file ? File.join(@pid_path, pid_file) : @pid_path
128
+ end
129
+
130
+ # The standard RestClient options for the central server talking to nodes,
131
+ # as well as the other way around. There's a timeout of 5 seconds to open
132
+ # a connection, and a timeout of 30 to finish reading it.
133
+ def client_options
134
+ return @client_options if @client_options
135
+ @client_options = {:timeout => 30, :open_timeout => 5}
136
+ if CloudCrowd.config[:http_authentication]
137
+ @client_options[:user] = CloudCrowd.config[:login]
138
+ @client_options[:password] = CloudCrowd.config[:password]
139
+ end
140
+ @client_options
141
+ end
142
+
143
+ # Return the displayable status name of an internal CloudCrowd status number.
144
+ # (See the above constants).
145
+ def display_status(status)
146
+ DISPLAY_STATUS_MAP[status] || 'unknown'
147
+ end
148
+
149
+ # CloudCrowd::Actions are requested dynamically by name. Access them through
150
+ # this actions property, which behaves like a hash. At load time, we
151
+ # load all installed Actions and CloudCrowd's default Actions into it.
152
+ # If you wish to have certain nodes be specialized to only handle certain
153
+ # Actions, then install only those into the actions directory.
154
+ def actions
155
+ return @actions if @actions
156
+ @actions = action_paths.inject({}) do |memo, path|
157
+ name = File.basename(path, File.extname(path))
158
+ require path
159
+ memo[name] = Module.const_get(Inflector.camelize(name))
160
+ memo
161
+ end
162
+ rescue NameError => e
163
+ adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
164
+ raise NameError.new(adjusted_message, e.name)
165
+ end
166
+
167
+ # Retrieve the list of every installed Action for this node or server.
168
+ def action_paths
169
+ default_actions = Dir["#{ROOT}/actions/*.rb"]
170
+ installed_actions = Dir["#{@config_path}/actions/*.rb"]
171
+ custom_actions = CloudCrowd.config[:actions_path] ? Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] : []
172
+ default_actions + installed_actions + custom_actions
173
+ end
174
+
175
+ # Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
176
+ # code from actions.
177
+ def server?
178
+ @identity == :server
179
+ end
180
+
181
+ # Or is it a node?
182
+ def node?
183
+ @identity == :node
184
+ end
185
+
186
+ end
187
+
188
+ end
@@ -0,0 +1,125 @@
1
+ module CloudCrowd
2
+
3
+ # As you write your custom actions, have them inherit from CloudCrowd::Action.
4
+ # All actions must implement a +process+ method, which should return a
5
+ # JSON-serializable object that will be used as the output for the work unit.
6
+ # See the default actions for examples.
7
+ #
8
+ # Optionally, actions may define +split+ and +merge+ methods to do mapping
9
+ # and reducing around the +input+. +split+ should return an array of URLs --
10
+ # to be mapped into WorkUnits and processed in parallel. In the +merge+ step,
11
+ # +input+ will be an array of all the resulting outputs from calling process.
12
+ #
13
+ # All actions have use of an individual +work_directory+, for scratch files,
14
+ # and spend their duration inside of it, so relative paths work well.
15
+ #
16
+ # Note that Actions inherit a backticks (`) method that raises an Exception
17
+ # if the external command fails.
18
+ class Action
19
+
20
+ FILE_URL = /\Afile:\/\//
21
+
22
+ attr_reader :input, :input_path, :file_name, :options, :work_directory
23
+
24
+ # Initializing an Action sets up all of the read-only variables that
25
+ # form the bulk of the API for action subclasses. (Paths to read from and
26
+ # write to). It creates the +work_directory+ and moves into it.
27
+ # If we're not merging multiple results, it downloads the input file into
28
+ # the +work_directory+ before starting.
29
+ def initialize(status, input, options, store)
30
+ @input, @options, @store = input, options, store
31
+ @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
32
+ @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
33
+ FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
34
+ parse_input
35
+ download_input
36
+ end
37
+
38
+ # Each Action subclass must implement a +process+ method, overriding this.
39
+ def process
40
+ raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code."
41
+ end
42
+
43
+ # Download a file to the specified path.
44
+ def download(url, path)
45
+ `curl -s "#{url}" > "#{path}"`
46
+ return path
47
+ # The previous implementation is below, and, although it would be
48
+ # wonderful not to shell out, RestClient wasn't handling URLs with encoded
49
+ # entities (%20, for example), and doesn't let you download to a given
50
+ # location. Getting a RestClient patch in would be ideal.
51
+ #
52
+ # if url.match(FILE_URL)
53
+ # FileUtils.cp(url.sub(FILE_URL, ''), path)
54
+ # else
55
+ # resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
56
+ # FileUtils.mv resp.file.path, path
57
+ # end
58
+ end
59
+
60
+ # Takes a local filesystem path, saves the file to S3, and returns the
61
+ # public (or authenticated) url on S3 where the file can be accessed.
62
+ def save(file_path)
63
+ save_path = File.join(storage_prefix, File.basename(file_path))
64
+ @store.save(file_path, save_path)
65
+ end
66
+
67
+ # After the Action has finished, we remove the work directory and return
68
+ # to the root directory (where workers run by default).
69
+ def cleanup_work_directory
70
+ FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
71
+ end
72
+
73
+ # Actions have a backticks command that raises a CommandFailed exception
74
+ # on failure, so that processing doesn't just blithely continue.
75
+ def `(command)
76
+ result = super(command)
77
+ exit_code = $?.to_i
78
+ raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0
79
+ result
80
+ end
81
+
82
+
83
+ private
84
+
85
+ # Convert an unsafe URL into a filesystem-friendly filename.
86
+ def safe_filename(url)
87
+ ext = File.extname(url)
88
+ name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-')
89
+ File.basename(name, ext).gsub('.', '-') + ext
90
+ end
91
+
92
+ # The directory prefix to use for both local and S3 storage.
93
+ # [action]/job_[job_id]/unit_[work_unit_it]
94
+ def storage_prefix
95
+ path_parts = []
96
+ path_parts << Inflector.underscore(self.class)
97
+ path_parts << "job_#{@job_id}"
98
+ path_parts << "unit_#{@work_unit_id}" if @work_unit_id
99
+ @storage_prefix ||= File.join(path_parts)
100
+ end
101
+
102
+ # If we think that the input is JSON, replace it with the parsed form.
103
+ # It would be great if the JSON module had an is_json? method.
104
+ def parse_input
105
+ return unless ['[', '{'].include? @input[0..0]
106
+ @input = JSON.parse(@input) rescue @input
107
+ end
108
+
109
+ def input_is_url?
110
+ !URI.parse(@input).scheme.nil? rescue false
111
+ end
112
+
113
+ # If the input is a URL, download the file before beginning processing.
114
+ def download_input
115
+ return unless input_is_url?
116
+ Dir.chdir(@work_directory) do
117
+ @input_path = File.join(@work_directory, safe_filename(@input))
118
+ @file_name = File.basename(@input_path, File.extname(@input_path))
119
+ download(@input, @input_path)
120
+ end
121
+ end
122
+
123
+ end
124
+
125
+ end
@@ -0,0 +1,39 @@
1
+ module CloudCrowd
2
+ class AssetStore
3
+
4
+ # The FilesystemStore is an implementation of the AssetStore, good only for
5
+ # use in development, testing, if you're only running a single-machine
6
+ # installation, or are using a networked drive.
7
+ module FilesystemStore
8
+
9
+ DEFAULT_STORAGE_PATH = '/tmp/cloud_crowd_storage'
10
+
11
+ attr_reader :local_storage_path
12
+
13
+ # Make sure that local storage exists and is writeable before starting.
14
+ def setup
15
+ lsp = @local_storage_path = CloudCrowd.config[:local_storage_path] || DEFAULT_STORAGE_PATH
16
+ FileUtils.mkdir_p(lsp) unless File.exists?(lsp)
17
+ raise Error::StorageNotWritable, "#{lsp} is not writable" unless File.writable?(lsp)
18
+ end
19
+
20
+ # Save a file to somewhere semi-persistent on the filesystem. To use,
21
+ # configure <tt>:storage: 'filesystem'</tt> in *config.yml*, as well as
22
+ # <tt>:local_storage_path:</tt>.
23
+ def save(local_path, save_path)
24
+ save_path = File.join(@local_storage_path, save_path)
25
+ save_dir = File.dirname(save_path)
26
+ FileUtils.mkdir_p save_dir unless File.exists? save_dir
27
+ FileUtils.cp(local_path, save_path)
28
+ "file://#{File.expand_path(save_path)}"
29
+ end
30
+
31
+ # Remove all of a Job's result files from the filesystem.
32
+ def cleanup(job)
33
+ path = "#{@local_storage_path}/#{job.action}/job_#{job.id}"
34
+ FileUtils.rm_r(path) if File.exists?(path)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,43 @@
1
+ module CloudCrowd
2
+ class AssetStore
3
+
4
+ # The S3Store is an implementation of an AssetStore that uses a bucket
5
+ # on S3 for all resulting files.
6
+ module S3Store
7
+
8
+ # Configure authentication and establish a connection to S3, first thing.
9
+ def setup
10
+ @use_auth = CloudCrowd.config[:s3_authentication]
11
+ bucket_name = CloudCrowd.config[:s3_bucket]
12
+ key, secret = CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key]
13
+ valid_conf = [bucket_name, key, secret].all? {|s| s.is_a? String }
14
+ raise Error::MissingConfiguration, "An S3 account must be configured in 'config.yml' before 's3' storage can be used" unless valid_conf
15
+ protocol = @use_auth ? 'https' : 'http'
16
+ port = @use_auth ? 443 : 80
17
+ @s3 = RightAws::S3.new(key, secret, :protocol => protocol, :port => port)
18
+ @bucket = @s3.bucket(bucket_name)
19
+ @bucket = @s3.bucket(bucket_name, true) unless @bucket
20
+ end
21
+
22
+ # Save a finished file from local storage to S3. Save it publicly unless
23
+ # we're configured to use S3 authentication. Authenticated links expire
24
+ # after one day by default.
25
+ def save(local_path, save_path)
26
+ if @use_auth
27
+ @bucket.put(save_path, File.open(local_path), {}, 'private')
28
+ @s3.interface.get_link(@bucket, save_path)
29
+ else
30
+ @bucket.put(save_path, File.open(local_path), {}, 'public-read')
31
+ @bucket.key(save_path).public_link
32
+ end
33
+ end
34
+
35
+ # Remove all of a Job's resulting files from S3, both intermediate and finished.
36
+ def cleanup(job)
37
+ @bucket.delete_folder("#{job.action}/job_#{job.id}")
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,41 @@
1
+ require 'tmpdir'
2
+
3
+ module CloudCrowd
4
+
5
+ # The AssetStore provides a common API for storing files and returning URLs
6
+ # that can access them. At the moment, the files can be saved to either S3, or
7
+ # the local filesystem. You shouldn't need to use the AssetStore directly --
8
+ # Action's +download+ and +save+ methods use it behind the scenes.
9
+ #
10
+ # To implement a new back-end for the AssetStore, you must provide
11
+ # <tt>save(local_path, save_path)</tt>, <tt>cleanup(job)</tt>, and optionally,
12
+ # a <tt>setup</tt> method that will be called once at initialization.
13
+ class AssetStore
14
+
15
+ autoload :S3Store, 'cloud_crowd/asset_store/s3_store'
16
+ autoload :FilesystemStore, 'cloud_crowd/asset_store/filesystem_store'
17
+
18
+ # Configure the AssetStore with the specific storage implementation
19
+ # specified by 'storage' in <tt>config.yml</tt>.
20
+ case CloudCrowd.config[:storage]
21
+ when 's3' then include S3Store
22
+ when 'filesystem' then include FilesystemStore
23
+ else raise Error::StorageNotFound, "#{CloudCrowd.config[:storage]} is not a valid storage back end"
24
+ end
25
+
26
+ # Creating the AssetStore ensures that its scratch directory exists.
27
+ def initialize
28
+ FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
29
+ raise Error::StorageNotWritable, "#{temp_storage_path} is not writable" unless File.writable?(temp_storage_path)
30
+ setup if respond_to? :setup
31
+ end
32
+
33
+ # Get the path to CloudCrowd's temporary local storage. All actions run
34
+ # in subdirectories of this.
35
+ def temp_storage_path
36
+ @temp_storage_path ||= CloudCrowd.config[:temp_storage_path] || "#{Dir.tmpdir}/cloud_crowd_tmp"
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,242 @@
1
+ require 'optparse'
2
+
3
+ module CloudCrowd
4
+ class CommandLine
5
+
6
+ # Configuration files required for the `crowd` command to function.
7
+ CONFIG_FILES = ['config.yml', 'database.yml']
8
+
9
+ # Reference the absolute path to the root.
10
+ CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
11
+
12
+ # Command-line banner for the usage message.
13
+ BANNER = <<-EOS
14
+ CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
15
+
16
+ Wiki: http://wiki.github.com/documentcloud/cloud-crowd
17
+ Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
18
+
19
+ Usage: crowd COMMAND OPTIONS
20
+
21
+ Commands:
22
+ install Install the CloudCrowd configuration files to the specified directory
23
+ server Start up the central server (requires a database)
24
+ node Start up a worker node (only one node per machine, please)
25
+ console Launch a CloudCrowd console, connected to the central database
26
+ load_schema Load the schema into the database specified by database.yml
27
+ cleanup Removes jobs that were finished over --days (7 by default) ago
28
+
29
+ server -d [start | stop | restart] Servers and nodes can be launched as
30
+ node -d [start | stop | restart] daemons, then stopped or restarted.
31
+
32
+ Options:
33
+ EOS
34
+
35
+ # Creating a CloudCrowd::CommandLine runs from the contents of ARGV.
36
+ def initialize
37
+ parse_options
38
+ command = ARGV.shift
39
+ subcommand = ARGV.shift
40
+ case command
41
+ when 'console' then run_console
42
+ when 'server' then run_server(subcommand)
43
+ when 'node' then run_node(subcommand)
44
+ when 'load_schema' then run_load_schema
45
+ when 'install' then run_install(subcommand)
46
+ when 'cleanup' then run_cleanup
47
+ else usage
48
+ end
49
+ end
50
+
51
+ # Spin up an IRB session with the CloudCrowd code loaded in, and a database
52
+ # connection established. The equivalent of Rails' `script/console`.
53
+ def run_console
54
+ require 'irb'
55
+ require 'irb/completion'
56
+ require 'pp'
57
+ load_code
58
+ connect_to_database true
59
+ CloudCrowd::Server # Preload server to autoload classes.
60
+ Object.send(:include, CloudCrowd)
61
+ IRB.start
62
+ end
63
+
64
+ # `crowd server` can either 'start', 'stop', or 'restart'.
65
+ def run_server(subcommand)
66
+ load_code
67
+ subcommand ||= 'start'
68
+ case subcommand
69
+ when 'start' then start_server
70
+ when 'stop' then stop_server
71
+ when 'restart' then restart_server
72
+ end
73
+ end
74
+
75
+ # Convenience command for quickly spinning up the central server. More
76
+ # sophisticated deployments, load-balancing across multiple app servers,
77
+ # should use the config.ru rackup file directly. This method will start
78
+ # a single Thin server.
79
+ def start_server
80
+ port = @options[:port] || 9173
81
+ daemonize = @options[:daemonize] ? '-d' : ''
82
+ log_path = CloudCrowd.log_path('server.log')
83
+ pid_path = CloudCrowd.pid_path('server.pid')
84
+ rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
85
+ FileUtils.mkdir_p(CloudCrowd.log_path) if @options[:daemonize] && !File.exists?(CloudCrowd.log_path)
86
+ puts "Starting CloudCrowd Central Server on port #{port}..."
87
+ exec "thin -e #{@options[:environment]} -p #{port} #{daemonize} --tag cloud-crowd-server --log #{log_path} --pid #{pid_path} -R #{rackup_path} start"
88
+ end
89
+
90
+ # Stop the daemonized central server, if it exists.
91
+ def stop_server
92
+ Thin::Server.kill(CloudCrowd.pid_path('server.pid'), 0)
93
+ end
94
+
95
+ # Restart the daemonized central server.
96
+ def restart_server
97
+ stop_server
98
+ sleep 1
99
+ start_server
100
+ end
101
+
102
+ # `crowd node` can either 'start', 'stop', or 'restart'.
103
+ def run_node(subcommand)
104
+ load_code
105
+ ENV['RACK_ENV'] = @options[:environment]
106
+ case (subcommand || 'start')
107
+ when 'start' then start_node
108
+ when 'stop' then stop_node
109
+ when 'restart' then restart_node
110
+ end
111
+ end
112
+
113
+ # Launch a Node. Please only run a single node per machine. The Node process
114
+ # will be long-lived, although its workers will come and go.
115
+ def start_node
116
+ port = @options[:port] || Node::DEFAULT_PORT
117
+ puts "Starting CloudCrowd Node on port #{port}..."
118
+ Node.new(port, @options[:daemonize])
119
+ end
120
+
121
+ # If the daemonized Node is running, stop it.
122
+ def stop_node
123
+ Thin::Server.kill CloudCrowd.pid_path('node.pid')
124
+ end
125
+
126
+ # Restart the daemonized Node, if it exists.
127
+ def restart_node
128
+ stop_node
129
+ sleep 1
130
+ start_node
131
+ end
132
+
133
+ # Load in the database schema to the database specified in 'database.yml'.
134
+ def run_load_schema
135
+ load_code
136
+ connect_to_database(false)
137
+ require 'cloud_crowd/schema.rb'
138
+ end
139
+
140
+ # Install the required CloudCrowd configuration files into the specified
141
+ # directory, or the current one.
142
+ def run_install(install_path)
143
+ require 'fileutils'
144
+ install_path ||= '.'
145
+ FileUtils.mkdir_p install_path unless File.exists?(install_path)
146
+ install_file "#{CC_ROOT}/config/config.example.yml", "#{install_path}/config.yml"
147
+ install_file "#{CC_ROOT}/config/config.example.ru", "#{install_path}/config.ru"
148
+ install_file "#{CC_ROOT}/config/database.example.yml", "#{install_path}/database.yml"
149
+ install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
150
+ end
151
+
152
+ # Clean up all Jobs in the CloudCrowd database older than --days old.
153
+ def run_cleanup
154
+ load_code
155
+ connect_to_database(true)
156
+ Job.cleanup_all(:days => @options[:days])
157
+ end
158
+
159
+ # Print `crowd` usage.
160
+ def usage
161
+ puts "\n#{@option_parser}\n"
162
+ end
163
+
164
+
165
+ private
166
+
167
+ # Check for configuration files, either in the current directory, or in
168
+ # the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
169
+ def ensure_config
170
+ return if @config_found
171
+ found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
172
+ found ? @config_dir = true : config_not_found
173
+ end
174
+
175
+ # Parse all options for all commands.
176
+ # Valid options are: --config --port --environment --daemonize --days.
177
+ def parse_options
178
+ @options = {
179
+ :environment => 'production',
180
+ :config_path => ENV['CLOUD_CROWD_CONFIG'] || '.',
181
+ :daemonize => false
182
+ }
183
+ @option_parser = OptionParser.new do |opts|
184
+ opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
185
+ @options[:config_path] = conf_path
186
+ end
187
+ opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
188
+ @options[:port] = port_num
189
+ end
190
+ opts.on('-e', '--environment ENV', 'server environment (defaults to production)') do |env|
191
+ @options[:environment] = env
192
+ end
193
+ opts.on('-d', '--daemonize', 'run as a background daemon') do |daemonize|
194
+ @options[:daemonize] = daemonize
195
+ end
196
+ opts.on('--days NUM_DAYS', 'grace period before cleanup (7 by default)') do |days|
197
+ @options[:days] = days.to_i if days.match(/\A\d+\Z/)
198
+ end
199
+ opts.on_tail('-v', '--version', 'show version') do
200
+ require "#{CC_ROOT}/lib/cloud-crowd"
201
+ puts "CloudCrowd version #{VERSION}"
202
+ exit
203
+ end
204
+ end
205
+ @option_parser.banner = BANNER
206
+ @option_parser.parse!(ARGV)
207
+ end
208
+
209
+ # Load in the CloudCrowd module code, dependencies, lib files and models.
210
+ # Not all commands require this.
211
+ def load_code
212
+ ensure_config
213
+ require "#{CC_ROOT}/lib/cloud-crowd"
214
+ CloudCrowd.configure("#{@options[:config_path]}/config.yml")
215
+ end
216
+
217
+ # Establish a connection to the central server's database. Not all commands
218
+ # require this.
219
+ def connect_to_database(validate_schema)
220
+ require 'cloud_crowd/models'
221
+ CloudCrowd.configure_database("#{@options[:config_path]}/database.yml", validate_schema)
222
+ end
223
+
224
+ # Exit with an explanation if the configuration files couldn't be found.
225
+ def config_not_found
226
+ puts "`crowd` can't find the CloudCrowd configuration directory. Please use `crowd -c path/to/config`, or run `crowd` from inside of the configuration directory itself."
227
+ exit(1)
228
+ end
229
+
230
+ # Install a file and log the installation. If we're overwriting a file,
231
+ # offer a chance to back out.
232
+ def install_file(source, dest, is_dir=false)
233
+ if File.exists?(dest)
234
+ print "#{dest} already exists. Overwrite it? (yes/no) "
235
+ return unless ['y', 'yes', 'ok'].include? gets.chomp.downcase
236
+ end
237
+ is_dir ? FileUtils.cp_r(source, dest) : FileUtils.cp(source, dest)
238
+ puts "installed #{dest}" unless ENV['RACK_ENV'] == 'test'
239
+ end
240
+
241
+ end
242
+ end