mooktakim-cloud-crowd 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/EPIGRAPHS +17 -0
  2. data/LICENSE +22 -0
  3. data/README +93 -0
  4. data/actions/graphics_magick.rb +43 -0
  5. data/actions/process_pdfs.rb +92 -0
  6. data/actions/word_count.rb +16 -0
  7. data/bin/crowd +5 -0
  8. data/config/config.example.ru +23 -0
  9. data/config/config.example.yml +55 -0
  10. data/config/database.example.yml +16 -0
  11. data/examples/graphics_magick_example.rb +44 -0
  12. data/examples/process_pdfs_example.rb +40 -0
  13. data/examples/word_count_example.rb +42 -0
  14. data/lib/cloud-crowd.rb +188 -0
  15. data/lib/cloud_crowd/action.rb +125 -0
  16. data/lib/cloud_crowd/asset_store/filesystem_store.rb +39 -0
  17. data/lib/cloud_crowd/asset_store/s3_store.rb +43 -0
  18. data/lib/cloud_crowd/asset_store.rb +41 -0
  19. data/lib/cloud_crowd/command_line.rb +242 -0
  20. data/lib/cloud_crowd/exceptions.rb +46 -0
  21. data/lib/cloud_crowd/helpers/authorization.rb +52 -0
  22. data/lib/cloud_crowd/helpers/resources.rb +25 -0
  23. data/lib/cloud_crowd/helpers.rb +8 -0
  24. data/lib/cloud_crowd/inflector.rb +19 -0
  25. data/lib/cloud_crowd/models/job.rb +190 -0
  26. data/lib/cloud_crowd/models/node_record.rb +107 -0
  27. data/lib/cloud_crowd/models/work_unit.rb +170 -0
  28. data/lib/cloud_crowd/models.rb +40 -0
  29. data/lib/cloud_crowd/node.rb +199 -0
  30. data/lib/cloud_crowd/schema.rb +50 -0
  31. data/lib/cloud_crowd/server.rb +123 -0
  32. data/lib/cloud_crowd/worker.rb +149 -0
  33. data/mooktakim-cloud-crowd.gemspec +116 -0
  34. data/public/css/admin_console.css +243 -0
  35. data/public/css/reset.css +42 -0
  36. data/public/images/bullet_green.png +0 -0
  37. data/public/images/bullet_white.png +0 -0
  38. data/public/images/cloud_hand.png +0 -0
  39. data/public/images/header_back.png +0 -0
  40. data/public/images/logo.png +0 -0
  41. data/public/images/queue_fill.png +0 -0
  42. data/public/images/server.png +0 -0
  43. data/public/images/server_busy.png +0 -0
  44. data/public/images/server_error.png +0 -0
  45. data/public/images/sidebar_bottom.png +0 -0
  46. data/public/images/sidebar_top.png +0 -0
  47. data/public/images/worker_info.png +0 -0
  48. data/public/images/worker_info_loading.gif +0 -0
  49. data/public/js/admin_console.js +197 -0
  50. data/public/js/excanvas.js +1 -0
  51. data/public/js/flot.js +1 -0
  52. data/public/js/jquery.js +19 -0
  53. data/test/acceptance/test_failing_work_units.rb +33 -0
  54. data/test/acceptance/test_node.rb +20 -0
  55. data/test/acceptance/test_server.rb +66 -0
  56. data/test/acceptance/test_word_count.rb +40 -0
  57. data/test/blueprints.rb +25 -0
  58. data/test/config/actions/failure_testing.rb +13 -0
  59. data/test/config/config.ru +17 -0
  60. data/test/config/config.yml +6 -0
  61. data/test/config/database.yml +3 -0
  62. data/test/test_helper.rb +19 -0
  63. data/test/unit/test_action.rb +70 -0
  64. data/test/unit/test_configuration.rb +48 -0
  65. data/test/unit/test_job.rb +103 -0
  66. data/test/unit/test_node.rb +41 -0
  67. data/test/unit/test_node_record.rb +42 -0
  68. data/test/unit/test_work_unit.rb +53 -0
  69. data/test/unit/test_worker.rb +48 -0
  70. data/views/operations_center.erb +82 -0
  71. metadata +290 -0
@@ -0,0 +1,188 @@
1
+ # The Grand Central of code loading...
2
+
3
+ $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
4
+
5
+ # Common Gems:
6
+ require 'rubygems'
7
+ gem 'activerecord', '2.3.5'
8
+ gem 'json'
9
+ gem 'rest-client'
10
+ gem 'right_aws'
11
+ gem 'sinatra'
12
+ gem 'thin'
13
+
14
+ # Autoloading for all the pieces which may or may not be needed:
15
+ autoload :ActiveRecord, 'active_record'
16
+ autoload :Benchmark, 'benchmark'
17
+ autoload :Digest, 'digest'
18
+ autoload :ERB, 'erb'
19
+ autoload :FileUtils, 'fileutils'
20
+ autoload :JSON, 'json'
21
+ autoload :RestClient, 'rest_client'
22
+ autoload :RightAws, 'right_aws'
23
+ autoload :Sinatra, 'sinatra'
24
+ autoload :Thin, 'thin'
25
+ autoload :YAML, 'yaml'
26
+
27
+ # Common code which should really be required in every circumstance.
28
+ require 'socket'
29
+ require 'cloud_crowd/exceptions'
30
+
31
+ module CloudCrowd
32
+
33
+ # Autoload all the CloudCrowd internals.
34
+ autoload :Action, 'cloud_crowd/action'
35
+ autoload :AssetStore, 'cloud_crowd/asset_store'
36
+ autoload :CommandLine, 'cloud_crowd/command_line'
37
+ autoload :Helpers, 'cloud_crowd/helpers'
38
+ autoload :Inflector, 'cloud_crowd/inflector'
39
+ autoload :Job, 'cloud_crowd/models'
40
+ autoload :Node, 'cloud_crowd/node'
41
+ autoload :NodeRecord, 'cloud_crowd/models'
42
+ autoload :Server, 'cloud_crowd/server'
43
+ autoload :Worker, 'cloud_crowd/worker'
44
+ autoload :WorkUnit, 'cloud_crowd/models'
45
+
46
+ # Keep this version in sync with the gemspec.
47
+ VERSION = '0.3.3'
48
+
49
+ # Increment the schema version when there's a backwards incompatible change.
50
+ SCHEMA_VERSION = 3
51
+
52
+ # Root directory of the CloudCrowd gem.
53
+ ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
54
+
55
+ # Default folder to log daemonized servers and nodes into.
56
+ LOG_PATH = 'log'
57
+
58
+ # Default folder to contain the pids of daemonized servers and nodes.
59
+ PID_PATH = 'tmp/pids'
60
+
61
+ # A Job is processing if its WorkUnits are in the queue to be handled by nodes.
62
+ PROCESSING = 1
63
+
64
+ # A Job has succeeded if all of its WorkUnits have finished successfully.
65
+ SUCCEEDED = 2
66
+
67
+ # A Job has failed if even a single one of its WorkUnits has failed (they may
68
+ # be attempted multiple times on failure, however).
69
+ FAILED = 3
70
+
71
+ # A Job is splitting if it's in the process of dividing its inputs up into
72
+ # multiple WorkUnits.
73
+ SPLITTING = 4
74
+
75
+ # A Job is merging if it's busy collecting all of its successful WorkUnits
76
+ # back together into the final result.
77
+ MERGING = 5
78
+
79
+ # A Job is considered to be complete if it succeeded or if it failed.
80
+ COMPLETE = [SUCCEEDED, FAILED]
81
+
82
+ # A Job is considered incomplete if it's being processed, split up or merged.
83
+ INCOMPLETE = [PROCESSING, SPLITTING, MERGING]
84
+
85
+ # Mapping of statuses to their display strings.
86
+ DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
87
+
88
+ class << self
89
+ attr_reader :config
90
+ attr_accessor :identity
91
+
92
+ # Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
93
+ def configure(config_path)
94
+ @config_path = File.expand_path(File.dirname(config_path))
95
+ @config = YAML.load_file(config_path)
96
+ end
97
+
98
+ # Configure the CloudCrowd central database (and connect to it), by passing
99
+ # in a path to <tt>database.yml</tt>. The file should use the standard
100
+ # ActiveRecord connection format.
101
+ def configure_database(config_path, validate_schema=true)
102
+ configuration = YAML.load_file(config_path)
103
+ ActiveRecord::Base.establish_connection(configuration)
104
+ if validate_schema
105
+ version = ActiveRecord::Base.connection.select_values('select max(version) from schema_migrations').first.to_i
106
+ return true if version == SCHEMA_VERSION
107
+ puts "Your database schema is out of date. Please use `crowd load_schema` to update it. This will wipe all the tables, so make sure that your jobs have a chance to finish first.\nexiting..."
108
+ exit
109
+ end
110
+ end
111
+
112
+ # Get a reference to the central server, including authentication if
113
+ # configured.
114
+ def central_server
115
+ @central_server ||= RestClient::Resource.new(CloudCrowd.config[:central_server], CloudCrowd.client_options)
116
+ end
117
+
118
+ # The path that daemonized servers and nodes will log to.
119
+ def log_path(log_file=nil)
120
+ @log_path ||= config[:log_path] || LOG_PATH
121
+ log_file ? File.join(@log_path, log_file) : @log_path
122
+ end
123
+
124
+ # The path in which daemonized servers and nodes will store their pids.
125
+ def pid_path(pid_file=nil)
126
+ @pid_path ||= config[:pid_path] || PID_PATH
127
+ pid_file ? File.join(@pid_path, pid_file) : @pid_path
128
+ end
129
+
130
+ # The standard RestClient options for the central server talking to nodes,
131
+ # as well as the other way around. There's a timeout of 5 seconds to open
132
+ # a connection, and a timeout of 30 to finish reading it.
133
+ def client_options
134
+ return @client_options if @client_options
135
+ @client_options = {:timeout => 30, :open_timeout => 5}
136
+ if CloudCrowd.config[:http_authentication]
137
+ @client_options[:user] = CloudCrowd.config[:login]
138
+ @client_options[:password] = CloudCrowd.config[:password]
139
+ end
140
+ @client_options
141
+ end
142
+
143
+ # Return the displayable status name of an internal CloudCrowd status number.
144
+ # (See the above constants).
145
+ def display_status(status)
146
+ DISPLAY_STATUS_MAP[status] || 'unknown'
147
+ end
148
+
149
+ # CloudCrowd::Actions are requested dynamically by name. Access them through
150
+ # this actions property, which behaves like a hash. At load time, we
151
+ # load all installed Actions and CloudCrowd's default Actions into it.
152
+ # If you wish to have certain nodes be specialized to only handle certain
153
+ # Actions, then install only those into the actions directory.
154
+ def actions
155
+ return @actions if @actions
156
+ @actions = action_paths.inject({}) do |memo, path|
157
+ name = File.basename(path, File.extname(path))
158
+ require path
159
+ memo[name] = Module.const_get(Inflector.camelize(name))
160
+ memo
161
+ end
162
+ rescue NameError => e
163
+ adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
164
+ raise NameError.new(adjusted_message, e.name)
165
+ end
166
+
167
+ # Retrieve the list of every installed Action for this node or server.
168
+ def action_paths
169
+ default_actions = Dir["#{ROOT}/actions/*.rb"]
170
+ installed_actions = Dir["#{@config_path}/actions/*.rb"]
171
+ custom_actions = CloudCrowd.config[:actions_path] ? Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] : []
172
+ default_actions + installed_actions + custom_actions
173
+ end
174
+
175
+ # Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
176
+ # code from actions.
177
+ def server?
178
+ @identity == :server
179
+ end
180
+
181
+ # Or is it a node?
182
+ def node?
183
+ @identity == :node
184
+ end
185
+
186
+ end
187
+
188
+ end
@@ -0,0 +1,125 @@
1
+ module CloudCrowd
2
+
3
+ # As you write your custom actions, have them inherit from CloudCrowd::Action.
4
+ # All actions must implement a +process+ method, which should return a
5
+ # JSON-serializable object that will be used as the output for the work unit.
6
+ # See the default actions for examples.
7
+ #
8
+ # Optionally, actions may define +split+ and +merge+ methods to do mapping
9
+ # and reducing around the +input+. +split+ should return an array of URLs --
10
+ # to be mapped into WorkUnits and processed in parallel. In the +merge+ step,
11
+ # +input+ will be an array of all the resulting outputs from calling process.
12
+ #
13
+ # All actions have use of an individual +work_directory+, for scratch files,
14
+ # and spend their duration inside of it, so relative paths work well.
15
+ #
16
+ # Note that Actions inherit a backticks (`) method that raises an Exception
17
+ # if the external command fails.
18
+ class Action
19
+
20
+ FILE_URL = /\Afile:\/\//
21
+
22
+ attr_reader :input, :input_path, :file_name, :options, :work_directory
23
+
24
+ # Initializing an Action sets up all of the read-only variables that
25
+ # form the bulk of the API for action subclasses. (Paths to read from and
26
+ # write to). It creates the +work_directory+ and moves into it.
27
+ # If we're not merging multiple results, it downloads the input file into
28
+ # the +work_directory+ before starting.
29
+ def initialize(status, input, options, store)
30
+ @input, @options, @store = input, options, store
31
+ @job_id, @work_unit_id = options['job_id'], options['work_unit_id']
32
+ @work_directory = File.expand_path(File.join(@store.temp_storage_path, storage_prefix))
33
+ FileUtils.mkdir_p(@work_directory) unless File.exists?(@work_directory)
34
+ parse_input
35
+ download_input
36
+ end
37
+
38
+ # Each Action subclass must implement a +process+ method, overriding this.
39
+ def process
40
+ raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code."
41
+ end
42
+
43
+ # Download a file to the specified path.
44
+ def download(url, path)
45
+ `curl -s "#{url}" > "#{path}"`
46
+ return path
47
+ # The previous implementation is below, and, although it would be
48
+ # wonderful not to shell out, RestClient wasn't handling URLs with encoded
49
+ # entities (%20, for example), and doesn't let you download to a given
50
+ # location. Getting a RestClient patch in would be ideal.
51
+ #
52
+ # if url.match(FILE_URL)
53
+ # FileUtils.cp(url.sub(FILE_URL, ''), path)
54
+ # else
55
+ # resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
56
+ # FileUtils.mv resp.file.path, path
57
+ # end
58
+ end
59
+
60
+ # Takes a local filesystem path, saves the file to S3, and returns the
61
+ # public (or authenticated) url on S3 where the file can be accessed.
62
+ def save(file_path)
63
+ save_path = File.join(storage_prefix, File.basename(file_path))
64
+ @store.save(file_path, save_path)
65
+ end
66
+
67
+ # After the Action has finished, we remove the work directory and return
68
+ # to the root directory (where workers run by default).
69
+ def cleanup_work_directory
70
+ FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
71
+ end
72
+
73
+ # Actions have a backticks command that raises a CommandFailed exception
74
+ # on failure, so that processing doesn't just blithely continue.
75
+ def `(command)
76
+ result = super(command)
77
+ exit_code = $?.to_i
78
+ raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0
79
+ result
80
+ end
81
+
82
+
83
+ private
84
+
85
+ # Convert an unsafe URL into a filesystem-friendly filename.
86
+ def safe_filename(url)
87
+ ext = File.extname(url)
88
+ name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-')
89
+ File.basename(name, ext).gsub('.', '-') + ext
90
+ end
91
+
92
+ # The directory prefix to use for both local and S3 storage.
93
+ # [action]/job_[job_id]/unit_[work_unit_it]
94
+ def storage_prefix
95
+ path_parts = []
96
+ path_parts << Inflector.underscore(self.class)
97
+ path_parts << "job_#{@job_id}"
98
+ path_parts << "unit_#{@work_unit_id}" if @work_unit_id
99
+ @storage_prefix ||= File.join(path_parts)
100
+ end
101
+
102
+ # If we think that the input is JSON, replace it with the parsed form.
103
+ # It would be great if the JSON module had an is_json? method.
104
+ def parse_input
105
+ return unless ['[', '{'].include? @input[0..0]
106
+ @input = JSON.parse(@input) rescue @input
107
+ end
108
+
109
+ def input_is_url?
110
+ !URI.parse(@input).scheme.nil? rescue false
111
+ end
112
+
113
+ # If the input is a URL, download the file before beginning processing.
114
+ def download_input
115
+ return unless input_is_url?
116
+ Dir.chdir(@work_directory) do
117
+ @input_path = File.join(@work_directory, safe_filename(@input))
118
+ @file_name = File.basename(@input_path, File.extname(@input_path))
119
+ download(@input, @input_path)
120
+ end
121
+ end
122
+
123
+ end
124
+
125
+ end
@@ -0,0 +1,39 @@
1
+ module CloudCrowd
2
+ class AssetStore
3
+
4
+ # The FilesystemStore is an implementation of the AssetStore, good only for
5
+ # use in development, testing, if you're only running a single-machine
6
+ # installation, or are using a networked drive.
7
+ module FilesystemStore
8
+
9
+ DEFAULT_STORAGE_PATH = '/tmp/cloud_crowd_storage'
10
+
11
+ attr_reader :local_storage_path
12
+
13
+ # Make sure that local storage exists and is writeable before starting.
14
+ def setup
15
+ lsp = @local_storage_path = CloudCrowd.config[:local_storage_path] || DEFAULT_STORAGE_PATH
16
+ FileUtils.mkdir_p(lsp) unless File.exists?(lsp)
17
+ raise Error::StorageNotWritable, "#{lsp} is not writable" unless File.writable?(lsp)
18
+ end
19
+
20
+ # Save a file to somewhere semi-persistent on the filesystem. To use,
21
+ # configure <tt>:storage: 'filesystem'</tt> in *config.yml*, as well as
22
+ # <tt>:local_storage_path:</tt>.
23
+ def save(local_path, save_path)
24
+ save_path = File.join(@local_storage_path, save_path)
25
+ save_dir = File.dirname(save_path)
26
+ FileUtils.mkdir_p save_dir unless File.exists? save_dir
27
+ FileUtils.cp(local_path, save_path)
28
+ "file://#{File.expand_path(save_path)}"
29
+ end
30
+
31
+ # Remove all of a Job's result files from the filesystem.
32
+ def cleanup(job)
33
+ path = "#{@local_storage_path}/#{job.action}/job_#{job.id}"
34
+ FileUtils.rm_r(path) if File.exists?(path)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,43 @@
1
+ module CloudCrowd
2
+ class AssetStore
3
+
4
+ # The S3Store is an implementation of an AssetStore that uses a bucket
5
+ # on S3 for all resulting files.
6
+ module S3Store
7
+
8
+ # Configure authentication and establish a connection to S3, first thing.
9
+ def setup
10
+ @use_auth = CloudCrowd.config[:s3_authentication]
11
+ bucket_name = CloudCrowd.config[:s3_bucket]
12
+ key, secret = CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key]
13
+ valid_conf = [bucket_name, key, secret].all? {|s| s.is_a? String }
14
+ raise Error::MissingConfiguration, "An S3 account must be configured in 'config.yml' before 's3' storage can be used" unless valid_conf
15
+ protocol = @use_auth ? 'https' : 'http'
16
+ port = @use_auth ? 443 : 80
17
+ @s3 = RightAws::S3.new(key, secret, :protocol => protocol, :port => port)
18
+ @bucket = @s3.bucket(bucket_name)
19
+ @bucket = @s3.bucket(bucket_name, true) unless @bucket
20
+ end
21
+
22
+ # Save a finished file from local storage to S3. Save it publicly unless
23
+ # we're configured to use S3 authentication. Authenticated links expire
24
+ # after one day by default.
25
+ def save(local_path, save_path)
26
+ if @use_auth
27
+ @bucket.put(save_path, File.open(local_path), {}, 'private')
28
+ @s3.interface.get_link(@bucket, save_path)
29
+ else
30
+ @bucket.put(save_path, File.open(local_path), {}, 'public-read')
31
+ @bucket.key(save_path).public_link
32
+ end
33
+ end
34
+
35
+ # Remove all of a Job's resulting files from S3, both intermediate and finished.
36
+ def cleanup(job)
37
+ @bucket.delete_folder("#{job.action}/job_#{job.id}")
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,41 @@
1
+ require 'tmpdir'
2
+
3
+ module CloudCrowd
4
+
5
+ # The AssetStore provides a common API for storing files and returning URLs
6
+ # that can access them. At the moment, the files can be saved to either S3, or
7
+ # the local filesystem. You shouldn't need to use the AssetStore directly --
8
+ # Action's +download+ and +save+ methods use it behind the scenes.
9
+ #
10
+ # To implement a new back-end for the AssetStore, you must provide
11
+ # <tt>save(local_path, save_path)</tt>, <tt>cleanup(job)</tt>, and optionally,
12
+ # a <tt>setup</tt> method that will be called once at initialization.
13
+ class AssetStore
14
+
15
+ autoload :S3Store, 'cloud_crowd/asset_store/s3_store'
16
+ autoload :FilesystemStore, 'cloud_crowd/asset_store/filesystem_store'
17
+
18
+ # Configure the AssetStore with the specific storage implementation
19
+ # specified by 'storage' in <tt>config.yml</tt>.
20
+ case CloudCrowd.config[:storage]
21
+ when 's3' then include S3Store
22
+ when 'filesystem' then include FilesystemStore
23
+ else raise Error::StorageNotFound, "#{CloudCrowd.config[:storage]} is not a valid storage back end"
24
+ end
25
+
26
+ # Creating the AssetStore ensures that its scratch directory exists.
27
+ def initialize
28
+ FileUtils.mkdir_p temp_storage_path unless File.exists? temp_storage_path
29
+ raise Error::StorageNotWritable, "#{temp_storage_path} is not writable" unless File.writable?(temp_storage_path)
30
+ setup if respond_to? :setup
31
+ end
32
+
33
+ # Get the path to CloudCrowd's temporary local storage. All actions run
34
+ # in subdirectories of this.
35
+ def temp_storage_path
36
+ @temp_storage_path ||= CloudCrowd.config[:temp_storage_path] || "#{Dir.tmpdir}/cloud_crowd_tmp"
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,242 @@
1
+ require 'optparse'
2
+
3
+ module CloudCrowd
4
+ class CommandLine
5
+
6
+ # Configuration files required for the `crowd` command to function.
7
+ CONFIG_FILES = ['config.yml', 'database.yml']
8
+
9
+ # Reference the absolute path to the root.
10
+ CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
11
+
12
+ # Command-line banner for the usage message.
13
+ BANNER = <<-EOS
14
+ CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
15
+
16
+ Wiki: http://wiki.github.com/documentcloud/cloud-crowd
17
+ Rdoc: http://rdoc.info/projects/documentcloud/cloud-crowd
18
+
19
+ Usage: crowd COMMAND OPTIONS
20
+
21
+ Commands:
22
+ install Install the CloudCrowd configuration files to the specified directory
23
+ server Start up the central server (requires a database)
24
+ node Start up a worker node (only one node per machine, please)
25
+ console Launch a CloudCrowd console, connected to the central database
26
+ load_schema Load the schema into the database specified by database.yml
27
+ cleanup Removes jobs that were finished over --days (7 by default) ago
28
+
29
+ server -d [start | stop | restart] Servers and nodes can be launched as
30
+ node -d [start | stop | restart] daemons, then stopped or restarted.
31
+
32
+ Options:
33
+ EOS
34
+
35
+ # Creating a CloudCrowd::CommandLine runs from the contents of ARGV.
36
+ def initialize
37
+ parse_options
38
+ command = ARGV.shift
39
+ subcommand = ARGV.shift
40
+ case command
41
+ when 'console' then run_console
42
+ when 'server' then run_server(subcommand)
43
+ when 'node' then run_node(subcommand)
44
+ when 'load_schema' then run_load_schema
45
+ when 'install' then run_install(subcommand)
46
+ when 'cleanup' then run_cleanup
47
+ else usage
48
+ end
49
+ end
50
+
51
+ # Spin up an IRB session with the CloudCrowd code loaded in, and a database
52
+ # connection established. The equivalent of Rails' `script/console`.
53
+ def run_console
54
+ require 'irb'
55
+ require 'irb/completion'
56
+ require 'pp'
57
+ load_code
58
+ connect_to_database true
59
+ CloudCrowd::Server # Preload server to autoload classes.
60
+ Object.send(:include, CloudCrowd)
61
+ IRB.start
62
+ end
63
+
64
+ # `crowd server` can either 'start', 'stop', or 'restart'.
65
+ def run_server(subcommand)
66
+ load_code
67
+ subcommand ||= 'start'
68
+ case subcommand
69
+ when 'start' then start_server
70
+ when 'stop' then stop_server
71
+ when 'restart' then restart_server
72
+ end
73
+ end
74
+
75
+ # Convenience command for quickly spinning up the central server. More
76
+ # sophisticated deployments, load-balancing across multiple app servers,
77
+ # should use the config.ru rackup file directly. This method will start
78
+ # a single Thin server.
79
+ def start_server
80
+ port = @options[:port] || 9173
81
+ daemonize = @options[:daemonize] ? '-d' : ''
82
+ log_path = CloudCrowd.log_path('server.log')
83
+ pid_path = CloudCrowd.pid_path('server.pid')
84
+ rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
85
+ FileUtils.mkdir_p(CloudCrowd.log_path) if @options[:daemonize] && !File.exists?(CloudCrowd.log_path)
86
+ puts "Starting CloudCrowd Central Server on port #{port}..."
87
+ exec "thin -e #{@options[:environment]} -p #{port} #{daemonize} --tag cloud-crowd-server --log #{log_path} --pid #{pid_path} -R #{rackup_path} start"
88
+ end
89
+
90
+ # Stop the daemonized central server, if it exists.
91
+ def stop_server
92
+ Thin::Server.kill(CloudCrowd.pid_path('server.pid'), 0)
93
+ end
94
+
95
+ # Restart the daemonized central server.
96
+ def restart_server
97
+ stop_server
98
+ sleep 1
99
+ start_server
100
+ end
101
+
102
+ # `crowd node` can either 'start', 'stop', or 'restart'.
103
+ def run_node(subcommand)
104
+ load_code
105
+ ENV['RACK_ENV'] = @options[:environment]
106
+ case (subcommand || 'start')
107
+ when 'start' then start_node
108
+ when 'stop' then stop_node
109
+ when 'restart' then restart_node
110
+ end
111
+ end
112
+
113
+ # Launch a Node. Please only run a single node per machine. The Node process
114
+ # will be long-lived, although its workers will come and go.
115
+ def start_node
116
+ port = @options[:port] || Node::DEFAULT_PORT
117
+ puts "Starting CloudCrowd Node on port #{port}..."
118
+ Node.new(port, @options[:daemonize])
119
+ end
120
+
121
+ # If the daemonized Node is running, stop it.
122
+ def stop_node
123
+ Thin::Server.kill CloudCrowd.pid_path('node.pid')
124
+ end
125
+
126
+ # Restart the daemonized Node, if it exists.
127
+ def restart_node
128
+ stop_node
129
+ sleep 1
130
+ start_node
131
+ end
132
+
133
+ # Load in the database schema to the database specified in 'database.yml'.
134
+ def run_load_schema
135
+ load_code
136
+ connect_to_database(false)
137
+ require 'cloud_crowd/schema.rb'
138
+ end
139
+
140
+ # Install the required CloudCrowd configuration files into the specified
141
+ # directory, or the current one.
142
+ def run_install(install_path)
143
+ require 'fileutils'
144
+ install_path ||= '.'
145
+ FileUtils.mkdir_p install_path unless File.exists?(install_path)
146
+ install_file "#{CC_ROOT}/config/config.example.yml", "#{install_path}/config.yml"
147
+ install_file "#{CC_ROOT}/config/config.example.ru", "#{install_path}/config.ru"
148
+ install_file "#{CC_ROOT}/config/database.example.yml", "#{install_path}/database.yml"
149
+ install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
150
+ end
151
+
152
+ # Clean up all Jobs in the CloudCrowd database older than --days old.
153
+ def run_cleanup
154
+ load_code
155
+ connect_to_database(true)
156
+ Job.cleanup_all(:days => @options[:days])
157
+ end
158
+
159
+ # Print `crowd` usage.
160
+ def usage
161
+ puts "\n#{@option_parser}\n"
162
+ end
163
+
164
+
165
+ private
166
+
167
+ # Check for configuration files, either in the current directory, or in
168
+ # the CLOUD_CROWD_CONFIG environment variable. Exit if they're not found.
169
+ def ensure_config
170
+ return if @config_found
171
+ found = CONFIG_FILES.all? {|f| File.exists? "#{@options[:config_path]}/#{f}" }
172
+ found ? @config_dir = true : config_not_found
173
+ end
174
+
175
+ # Parse all options for all commands.
176
+ # Valid options are: --config --port --environment --daemonize --days.
177
+ def parse_options
178
+ @options = {
179
+ :environment => 'production',
180
+ :config_path => ENV['CLOUD_CROWD_CONFIG'] || '.',
181
+ :daemonize => false
182
+ }
183
+ @option_parser = OptionParser.new do |opts|
184
+ opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
185
+ @options[:config_path] = conf_path
186
+ end
187
+ opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
188
+ @options[:port] = port_num
189
+ end
190
+ opts.on('-e', '--environment ENV', 'server environment (defaults to production)') do |env|
191
+ @options[:environment] = env
192
+ end
193
+ opts.on('-d', '--daemonize', 'run as a background daemon') do |daemonize|
194
+ @options[:daemonize] = daemonize
195
+ end
196
+ opts.on('--days NUM_DAYS', 'grace period before cleanup (7 by default)') do |days|
197
+ @options[:days] = days.to_i if days.match(/\A\d+\Z/)
198
+ end
199
+ opts.on_tail('-v', '--version', 'show version') do
200
+ require "#{CC_ROOT}/lib/cloud-crowd"
201
+ puts "CloudCrowd version #{VERSION}"
202
+ exit
203
+ end
204
+ end
205
+ @option_parser.banner = BANNER
206
+ @option_parser.parse!(ARGV)
207
+ end
208
+
209
+ # Load in the CloudCrowd module code, dependencies, lib files and models.
210
+ # Not all commands require this.
211
+ def load_code
212
+ ensure_config
213
+ require "#{CC_ROOT}/lib/cloud-crowd"
214
+ CloudCrowd.configure("#{@options[:config_path]}/config.yml")
215
+ end
216
+
217
+ # Establish a connection to the central server's database. Not all commands
218
+ # require this.
219
+ def connect_to_database(validate_schema)
220
+ require 'cloud_crowd/models'
221
+ CloudCrowd.configure_database("#{@options[:config_path]}/database.yml", validate_schema)
222
+ end
223
+
224
+ # Exit with an explanation if the configuration files couldn't be found.
225
+ def config_not_found
226
+ puts "`crowd` can't find the CloudCrowd configuration directory. Please use `crowd -c path/to/config`, or run `crowd` from inside of the configuration directory itself."
227
+ exit(1)
228
+ end
229
+
230
+ # Install a file and log the installation. If we're overwriting a file,
231
+ # offer a chance to back out.
232
+ def install_file(source, dest, is_dir=false)
233
+ if File.exists?(dest)
234
+ print "#{dest} already exists. Overwrite it? (yes/no) "
235
+ return unless ['y', 'yes', 'ok'].include? gets.chomp.downcase
236
+ end
237
+ is_dir ? FileUtils.cp_r(source, dest) : FileUtils.cp(source, dest)
238
+ puts "installed #{dest}" unless ENV['RACK_ENV'] == 'test'
239
+ end
240
+
241
+ end
242
+ end