cloud-crowd 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/README +16 -16
  2. data/cloud-crowd.gemspec +10 -9
  3. data/config/config.example.ru +8 -2
  4. data/config/config.example.yml +21 -25
  5. data/examples/process_pdfs_example.rb +1 -1
  6. data/examples/word_count_example.rb +1 -0
  7. data/lib/cloud-crowd.rb +47 -28
  8. data/lib/cloud_crowd/action.rb +14 -8
  9. data/lib/cloud_crowd/asset_store.rb +8 -8
  10. data/lib/cloud_crowd/asset_store/filesystem_store.rb +18 -7
  11. data/lib/cloud_crowd/asset_store/s3_store.rb +14 -11
  12. data/lib/cloud_crowd/command_line.rb +24 -58
  13. data/lib/cloud_crowd/exceptions.rb +7 -0
  14. data/lib/cloud_crowd/helpers/authorization.rb +5 -3
  15. data/lib/cloud_crowd/helpers/resources.rb +0 -20
  16. data/lib/cloud_crowd/models.rb +1 -1
  17. data/lib/cloud_crowd/models/job.rb +37 -40
  18. data/lib/cloud_crowd/models/node_record.rb +95 -0
  19. data/lib/cloud_crowd/models/work_unit.rb +87 -33
  20. data/lib/cloud_crowd/node.rb +105 -0
  21. data/lib/cloud_crowd/schema.rb +22 -18
  22. data/lib/cloud_crowd/{app.rb → server.rb} +34 -34
  23. data/lib/cloud_crowd/worker.rb +68 -107
  24. data/public/css/admin_console.css +40 -18
  25. data/public/images/server.png +0 -0
  26. data/public/images/server_busy.png +0 -0
  27. data/public/js/admin_console.js +47 -18
  28. data/test/acceptance/test_failing_work_units.rb +1 -1
  29. data/test/acceptance/{test_app.rb → test_server.rb} +15 -15
  30. data/test/acceptance/test_word_count.rb +3 -9
  31. data/test/blueprints.rb +0 -1
  32. data/test/config/config.ru +1 -1
  33. data/test/config/config.yml +2 -4
  34. data/test/unit/test_action.rb +1 -1
  35. data/test/unit/test_configuration.rb +1 -1
  36. data/test/unit/test_job.rb +3 -0
  37. data/test/unit/test_work_unit.rb +2 -4
  38. data/views/{index.erb → operations_center.erb} +13 -8
  39. metadata +11 -10
  40. data/lib/cloud_crowd/daemon.rb +0 -95
  41. data/lib/cloud_crowd/models/worker_record.rb +0 -61
  42. data/lib/cloud_crowd/runner.rb +0 -15
@@ -2,15 +2,26 @@ module CloudCrowd
2
2
  class AssetStore
3
3
 
4
4
  # The FilesystemStore is an implementation of the AssetStore, good only for
5
- # use in development, testing, or if you're only running a single-machine
6
- # installation.
5
+ # use in development, testing, if you're only running a single-machine
6
+ # installation, or are using a networked drive.
7
7
  module FilesystemStore
8
8
 
9
- # Save a file to somewhere semi-persistent on the filesystem. Can be used
10
- # in development, when offline, or if you happen to have a single-machine
11
- # CloudCrowd installation. To use, configure <tt>:storage => 'filesystem'</tt>.
9
+ DEFAULT_STORAGE_PATH = '/tmp/cloud_crowd_storage'
10
+
11
+ attr_reader :local_storage_path
12
+
13
+ # Make sure that local storage exists and is writeable before starting.
14
+ def setup
15
+ lsp = @local_storage_path = CloudCrowd.config[:local_storage_path] || DEFAULT_STORAGE_PATH
16
+ FileUtils.mkdir_p(lsp) unless File.exists?(lsp)
17
+ raise Error::StorageNotWritable, "#{lsp} is not writable" unless File.writable?(lsp)
18
+ end
19
+
20
+ # Save a file to somewhere semi-persistent on the filesystem. To use,
21
+ # configure <tt>:storage: 'filesystem'</tt> in *config.yml*, as well as
22
+ # <tt>:local_storage_path:</tt>.
12
23
  def save(local_path, save_path)
13
- save_path = File.join(LOCAL_STORAGE_PATH, save_path)
24
+ save_path = File.join(@local_storage_path, save_path)
14
25
  save_dir = File.dirname(save_path)
15
26
  FileUtils.mkdir_p save_dir unless File.exists? save_dir
16
27
  FileUtils.cp(local_path, save_path)
@@ -19,7 +30,7 @@ module CloudCrowd
19
30
 
20
31
  # Remove all of a Job's result files from the filesystem.
21
32
  def cleanup(job)
22
- path = "#{LOCAL_STORAGE_PATH}/#{job.action}/job_#{job.id}"
33
+ path = "#{@local_storage_path}/#{job.action}/job_#{job.id}"
23
34
  FileUtils.rm_r(path) if File.exists?(path)
24
35
  end
25
36
  end
@@ -5,11 +5,24 @@ module CloudCrowd
5
5
  # on S3 for all resulting files.
6
6
  module S3Store
7
7
 
8
+ # Configure authentication and establish a connection to S3, first thing.
9
+ def setup
10
+ @use_auth = CloudCrowd.config[:s3_authentication]
11
+ bucket_name = CloudCrowd.config[:s3_bucket]
12
+ key, secret = CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key]
13
+ valid_conf = [bucket_name, key, secret].all? {|s| s.is_a? String }
14
+ raise Error::MissingConfiguration, "An S3 account must be configured in 'config.yml' before 's3' storage can be used" unless valid_conf
15
+ protocol = @use_auth ? 'https' : 'http'
16
+ port = @use_auth ? 443 : 80
17
+ @s3 = RightAws::S3.new(key, secret, :protocol => protocol, :port => port)
18
+ @bucket = @s3.bucket(bucket_name)
19
+ @bucket = @s3.bucket(bucket_name, true) unless @bucket
20
+ end
21
+
8
22
  # Save a finished file from local storage to S3. Save it publicly unless
9
23
  # we're configured to use S3 authentication. Authenticated links expire
10
24
  # after one day by default.
11
25
  def save(local_path, save_path)
12
- ensure_s3_connection
13
26
  if @use_auth
14
27
  @bucket.put(save_path, File.open(local_path), {}, 'private')
15
28
  @s3.interface.get_link(@bucket, save_path)
@@ -21,19 +34,9 @@ module CloudCrowd
21
34
 
22
35
  # Remove all of a Job's resulting files from S3, both intermediate and finished.
23
36
  def cleanup(job)
24
- ensure_s3_connection
25
37
  @bucket.delete_folder("#{job.action}/job_#{job.id}")
26
38
  end
27
39
 
28
- # Workers, through the course of many WorkUnits, keep around an AssetStore.
29
- # Ensure we have a persistent S3 connection after first use.
30
- def ensure_s3_connection
31
- unless @s3 && @bucket
32
- params = {:port => 80, :protocol => 'http'}
33
- @s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
34
- @bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
35
- end
36
- end
37
40
  end
38
41
 
39
42
  end
@@ -9,9 +9,6 @@ module CloudCrowd
9
9
  # Reference the absolute path to the root.
10
10
  CC_ROOT = File.expand_path(File.dirname(__FILE__) + '/../..')
11
11
 
12
- # Path to the Daemons gem script which launches workers.
13
- WORKER_RUNNER = File.expand_path("#{CC_ROOT}/lib/cloud_crowd/runner.rb")
14
-
15
12
  # Command-line banner for the usage message.
16
13
  BANNER = <<-EOS
17
14
  CloudCrowd is a MapReduce-inspired Parallel Processing System for Ruby.
@@ -24,7 +21,7 @@ Usage: crowd COMMAND OPTIONS
24
21
  Commands:
25
22
  install Install the CloudCrowd configuration files to the specified directory
26
23
  server Start up the central server (requires a database)
27
- workers Control worker daemons, use: (start | stop | restart | status | run)
24
+ node Start up a worker node (only one node per machine, please)
28
25
  console Launch a CloudCrowd console, connected to the central database
29
26
  load_schema Load the schema into the database specified by database.yml
30
27
 
@@ -38,7 +35,7 @@ Options:
38
35
  case command
39
36
  when 'console' then run_console
40
37
  when 'server' then run_server
41
- when 'workers' then run_workers_command
38
+ when 'node' then run_node
42
39
  when 'load_schema' then run_load_schema
43
40
  when 'install' then run_install
44
41
  else usage
@@ -52,7 +49,7 @@ Options:
52
49
  require 'irb/completion'
53
50
  require 'pp'
54
51
  load_code
55
- connect_to_database
52
+ connect_to_database(true)
56
53
  IRB.start
57
54
  end
58
55
 
@@ -63,6 +60,7 @@ Options:
63
60
  # (Mongrel, falling back to WEBrick). The equivalent of Rails' script/server.
64
61
  def run_server
65
62
  ensure_config
63
+ @options[:port] ||= 9173
66
64
  require 'rubygems'
67
65
  rackup_path = File.expand_path("#{@options[:config_path]}/config.ru")
68
66
  if Gem.available? 'thin'
@@ -72,10 +70,18 @@ Options:
72
70
  end
73
71
  end
74
72
 
73
+ # Launch a Node. Please only run a single node per machine. The Node process
74
+ # will be long-lived, although its workers will come and go.
75
+ def run_node
76
+ ENV['RACK_ENV'] = @options['environment']
77
+ load_code
78
+ Node.new(@options[:port])
79
+ end
80
+
75
81
  # Load in the database schema to the database specified in 'database.yml'.
76
82
  def run_load_schema
77
83
  load_code
78
- connect_to_database
84
+ connect_to_database(false)
79
85
  require 'cloud_crowd/schema.rb'
80
86
  end
81
87
 
@@ -91,46 +97,6 @@ Options:
91
97
  install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
92
98
  end
93
99
 
94
- # Manipulate worker daemons -- handles all commands that the Daemons gem
95
- # provides: start, stop, restart, run, and status.
96
- def run_workers_command
97
- ensure_config
98
- command = ARGV.shift
99
- case command
100
- when 'start' then start_workers
101
- when 'stop' then stop_workers
102
- when 'restart' then stop_workers && start_workers
103
- when 'run' then run_worker
104
- when 'status' then show_worker_status
105
- else usage
106
- end
107
- end
108
-
109
- # Start up N workers, specified by argument or the number of workers in
110
- # config.yml.
111
- def start_workers
112
- load_code
113
- num_workers = @options[:num_workers] || CloudCrowd.config[:num_workers]
114
- num_workers.times do
115
- `CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} start`
116
- end
117
- end
118
-
119
- # For debugging, run a single worker in the current process, showing output.
120
- def run_worker
121
- exec "CLOUD_CROWD_CONFIG='#{File.expand_path(@options[:config_path] + "/config.yml")}' ruby #{WORKER_RUNNER} run"
122
- end
123
-
124
- # Stop all active workers.
125
- def stop_workers
126
- `ruby #{WORKER_RUNNER} stop`
127
- end
128
-
129
- # Display the status of all active workers.
130
- def show_worker_status
131
- puts `ruby #{WORKER_RUNNER} status`
132
- end
133
-
134
100
  # Print `crowd` usage.
135
101
  def usage
136
102
  puts "\n#{@option_parser}\n"
@@ -150,7 +116,6 @@ Options:
150
116
  # Parse all options for all commands.
151
117
  def parse_options
152
118
  @options = {
153
- :port => 9173,
154
119
  :environment => 'production',
155
120
  :config_path => ENV['CLOUD_CROWD_CONFIG'] || '.'
156
121
  }
@@ -158,17 +123,14 @@ Options:
158
123
  opts.on('-c', '--config PATH', 'path to configuration directory') do |conf_path|
159
124
  @options[:config_path] = conf_path
160
125
  end
161
- opts.on('-n', '--num-workers NUM', OptionParser::DecimalInteger, 'number of worker processes') do |num|
162
- @options[:num_workers] = num
163
- end
164
- opts.on('-p', '--port PORT', 'central server port number') do |port_num|
126
+ opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
165
127
  @options[:port] = port_num
166
128
  end
167
129
  opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
168
130
  @options[:environment] = env
169
131
  end
170
132
  opts.on_tail('-v', '--version', 'show version') do
171
- load_code
133
+ require "#{CC_ROOT}/lib/cloud-crowd"
172
134
  puts "CloudCrowd version #{VERSION}"
173
135
  exit
174
136
  end
@@ -181,26 +143,30 @@ Options:
181
143
  # Not all commands require this.
182
144
  def load_code
183
145
  ensure_config
184
- require 'rubygems'
185
146
  require "#{CC_ROOT}/lib/cloud-crowd"
186
147
  CloudCrowd.configure("#{@options[:config_path]}/config.yml")
187
148
  end
188
149
 
189
150
  # Establish a connection to the central server's database. Not all commands
190
151
  # require this.
191
- def connect_to_database
152
+ def connect_to_database(validate_schema)
192
153
  require 'cloud_crowd/models'
193
- CloudCrowd.configure_database("#{@options[:config_path]}/database.yml")
154
+ CloudCrowd.configure_database("#{@options[:config_path]}/database.yml", validate_schema)
194
155
  end
195
156
 
196
157
  # Exit with an explanation if the configuration files couldn't be found.
197
158
  def config_not_found
198
- puts "`crowd` can't find the CloudCrowd configuration directory. Please either run `crowd` from inside of the configuration directory, or use `crowd -c path/to/config`"
159
+ puts "`crowd` can't find the CloudCrowd configuration directory. Please use `crowd -c path/to/config`, or run `crowd` from inside of the configuration directory itself."
199
160
  exit(1)
200
161
  end
201
162
 
202
- # Install a file and log the installation.
163
+ # Install a file and log the installation. If we're overwriting a file,
164
+ # offer a chance to back out.
203
165
  def install_file(source, dest, is_dir=false)
166
+ if File.exists?(dest)
167
+ print "#{dest} already exists. Overwrite it? (yes/no) "
168
+ return unless ['y', 'yes', 'ok'].include? gets.chomp.downcase
169
+ end
204
170
  is_dir ? FileUtils.cp_r(source, dest) : FileUtils.cp(source, dest)
205
171
  puts "installed #{dest}"
206
172
  end
@@ -2,6 +2,8 @@ module CloudCrowd
2
2
 
3
3
  # Base Error class which all custom CloudCrowd exceptions inherit from.
4
4
  # Rescuing CloudCrowd::Error (or RuntimeError) will get all custom exceptions.
5
+ # If your cluster is correctly configured, you should never expect to see any
6
+ # of these.
5
7
  class Error < RuntimeError
6
8
 
7
9
  # ActionNotFound is raised when a job is created for an action that doesn't
@@ -23,6 +25,11 @@ module CloudCrowd
23
25
  class StatusUnspecified < Error
24
26
  end
25
27
 
28
+ # MissingConfiguration is raised when we're trying to run a method that
29
+ # needs configuration not present in config.yml.
30
+ class MissingConfiguration < Error
31
+ end
32
+
26
33
  end
27
34
 
28
35
  end
@@ -23,9 +23,9 @@ module CloudCrowd
23
23
  # A request is authorized if its login and password match those stored
24
24
  # in config.yml, or if authentication is disabled. If authentication is
25
25
  # turned on, then every request is authenticated, including between
26
- # the worker daemons and the central server.
26
+ # the nodes and the central server.
27
27
  def authorize(login, password)
28
- return true unless CloudCrowd.config[:use_http_authentication]
28
+ return true unless CloudCrowd.config[:http_authentication]
29
29
  return CloudCrowd.config[:login] == login &&
30
30
  CloudCrowd.config[:password] == password
31
31
  end
@@ -33,11 +33,13 @@ module CloudCrowd
33
33
 
34
34
  private
35
35
 
36
+ # Provide a Rack Authorization object.
36
37
  def auth
37
38
  @auth ||= Rack::Auth::Basic::Request.new(request.env)
38
39
  end
39
40
 
40
- def unauthorized!(realm = App.authorization_realm)
41
+ # Unauthorized requests will prompt the browser to provide credentials.
42
+ def unauthorized!(realm = Server.authorization_realm)
41
43
  response['WWW-Authenticate'] = "Basic realm=\"#{realm}\""
42
44
  halt 401, 'Authorization Required'
43
45
  end
@@ -20,26 +20,6 @@ module CloudCrowd
20
20
  @work_unit ||= WorkUnit.find_by_id(params[:work_unit_id]) or raise Sinatra::NotFound
21
21
  end
22
22
 
23
- # Try to fetch a work unit from the queue. If none are pending, respond
24
- # with no content.
25
- def dequeue_work_unit(offset=0)
26
- handle_conflicts do
27
- worker, actions = params[:worker_name], params[:worker_actions].split(',')
28
- WorkUnit.dequeue(worker, actions, offset)
29
- end
30
- end
31
-
32
- # We're using ActiveRecords optimistic locking, so stale work units
33
- # may sometimes arise. handle_conflicts responds with a the HTTP status
34
- # code of your choosing if the update failed to be applied.
35
- def handle_conflicts(code=204)
36
- begin
37
- yield
38
- rescue ActiveRecord::StaleObjectError => e
39
- return status(code) && ''
40
- end
41
- end
42
-
43
23
  end
44
24
  end
45
25
  end
@@ -36,5 +36,5 @@ module CloudCrowd
36
36
  end
37
37
 
38
38
  require 'cloud_crowd/models/job'
39
+ require 'cloud_crowd/models/node_record'
39
40
  require 'cloud_crowd/models/work_unit'
40
- require 'cloud_crowd/models/worker_record'
@@ -31,30 +31,36 @@ module CloudCrowd
31
31
  # finished, if so, continue on to the next phase of the job.
32
32
  def check_for_completion
33
33
  return unless all_work_units_complete?
34
- transition_to_next_phase
35
- output_list = gather_outputs_from_work_units
36
-
34
+ set_next_status
35
+ outs = gather_outputs_from_work_units
36
+ return queue_for_workers(outs) if merging?
37
37
  if complete?
38
- self.outputs = output_list.to_json
39
- self.time = Time.now - self.created_at
40
- end
41
- self.save
42
-
43
- case self.status
44
- when PROCESSING then queue_for_workers(output_list.map {|o| JSON.parse(o) }.flatten)
45
- when MERGING then queue_for_workers(output_list.to_json)
46
- else fire_callback
38
+ update_attributes(:outputs => outs, :time => time_taken)
39
+ fire_callback if callback_url
47
40
  end
48
41
  self
49
42
  end
50
43
 
44
+ # Transition this Job's status to the appropriate next status.
45
+ def set_next_status
46
+ update_attribute(:status,
47
+ any_work_units_failed? ? FAILED :
48
+ self.splitting? ? PROCESSING :
49
+ self.mergeable? ? MERGING :
50
+ SUCCEEDED
51
+ )
52
+ end
53
+
51
54
  # If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
52
55
  # completion. The <tt>callback_url</tt> may include HTTP basic authentication,
53
56
  # if you like:
54
57
  # http://user:password@example.com/job_complete
58
+ # If the callback_url is successfully pinged, we proceed to cleanup the job.
59
+ # TODO: This should be moved into a Work Unit...
55
60
  def fire_callback
56
61
  begin
57
- RestClient.post(callback_url, {:job => self.to_json}) if callback_url
62
+ RestClient.post(callback_url, {:job => self.to_json})
63
+ self.destroy
58
64
  rescue RestClient::Exception => e
59
65
  puts "Failed to fire job callback. Hmmm, what should happen here?"
60
66
  end
@@ -62,15 +68,12 @@ module CloudCrowd
62
68
 
63
69
  # Cleaning up after a job will remove all of its files from S3. Destroying
64
70
  # a Job calls cleanup_assets first.
71
+ # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
65
72
  def cleanup_assets
66
73
  AssetStore.new.cleanup(self)
67
74
  end
68
75
 
69
76
  # Have all of the WorkUnits finished?
70
- #--
71
- # We could trade reads for writes here
72
- # by keeping a completed_count on the Job itself.
73
- #++
74
77
  def all_work_units_complete?
75
78
  self.work_units.incomplete.count <= 0
76
79
  end
@@ -85,6 +88,11 @@ module CloudCrowd
85
88
  self.action_class.public_instance_methods.include? 'split'
86
89
  end
87
90
 
91
+ # This job is done splitting if it's finished with its splitting work units.
92
+ def done_splitting?
93
+ splittable? && work_units.splitting.count <= 0
94
+ end
95
+
88
96
  # This job is mergeable if its Action has a +merge+ method.
89
97
  def mergeable?
90
98
  self.processing? && self.action_class.public_instance_methods.include?('merge')
@@ -92,16 +100,19 @@ module CloudCrowd
92
100
 
93
101
  # Retrieve the class for this Job's Action.
94
102
  def action_class
95
- klass = CloudCrowd.actions[self.action]
96
- return klass if klass
103
+ @action_class ||= CloudCrowd.actions[self.action]
104
+ return @action_class if @action_class
97
105
  raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
98
106
  end
99
107
 
100
108
  # How complete is this Job?
109
+ # Unfortunately, with the current processing sequence, the percent_complete
110
+ # can pull a fast one and go backwards. This happens when there's a single
111
+ # large input that takes a long time to split, and when it finally does it
112
+ # creates a whole swarm of work units. This seems unavoidable.
101
113
  def percent_complete
102
- return 0 if splitting?
103
- return 100 if complete?
104
114
  return 99 if merging?
115
+ return 100 if complete?
105
116
  (work_units.complete.count / work_units.count.to_f * 100).round
106
117
  end
107
118
 
@@ -136,20 +147,12 @@ module CloudCrowd
136
147
  private
137
148
 
138
149
  # When the WorkUnits are all finished, gather all their outputs together
139
- # before removing them from the database entirely.
150
+ # before removing them from the database entirely. Returns their merged JSON.
140
151
  def gather_outputs_from_work_units
141
152
  units = self.work_units.complete
142
- outs = self.work_units.complete.map {|u| JSON.parse(u.output)['output'] }
153
+ outs = self.work_units.complete.map {|u| u.parsed_output }
143
154
  self.work_units.complete.destroy_all
144
- outs
145
- end
146
-
147
- # Transition this Job's status to the appropriate next status.
148
- def transition_to_next_phase
149
- self.status = any_work_units_failed? ? FAILED :
150
- self.splitting? ? PROCESSING :
151
- self.mergeable? ? MERGING :
152
- SUCCEEDED
155
+ outs.to_json
153
156
  end
154
157
 
155
158
  # When starting a new job, or moving to a new stage, split up the inputs
@@ -157,14 +160,8 @@ module CloudCrowd
157
160
  # away.
158
161
  def queue_for_workers(input=nil)
159
162
  input ||= JSON.parse(self.inputs)
160
- [input].flatten.each do |wu_input|
161
- WorkUnit.create(
162
- :job => self,
163
- :action => self.action,
164
- :input => wu_input,
165
- :status => self.status
166
- )
167
- end
163
+ [input].flatten.each {|i| WorkUnit.start(self, action, i, status) }
164
+ self
168
165
  end
169
166
 
170
167
  # A Job starts out either splitting or processing, depending on its action.