cloud-crowd 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.2.6' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-10-05'
3
+ s.version = '0.2.7' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-10-19'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -13,4 +13,4 @@
13
13
  # will do nicely:
14
14
  #
15
15
  # :adapter: sqlite3
16
- # :database: cloud_crowd.db
16
+ # :database: cloud_crowd.db
@@ -44,7 +44,7 @@ module CloudCrowd
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
45
 
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.2.6'
47
+ VERSION = '0.2.7'
48
48
 
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
@@ -24,6 +24,7 @@ Commands:
24
24
  node Start up a worker node (only one node per machine, please)
25
25
  console Launch a CloudCrowd console, connected to the central database
26
26
  load_schema Load the schema into the database specified by database.yml
27
+ cleanup Removes jobs that were finished over --days (7 by default) ago
27
28
 
28
29
  server -d [start | stop | restart] Servers and nodes can be launched as
29
30
  node -d [start | stop | restart] daemons, then stopped or restarted.
@@ -42,6 +43,7 @@ Options:
42
43
  when 'node' then run_node(subcommand)
43
44
  when 'load_schema' then run_load_schema
44
45
  when 'install' then run_install(subcommand)
46
+ when 'cleanup' then run_cleanup
45
47
  else usage
46
48
  end
47
49
  end
@@ -53,13 +55,14 @@ Options:
53
55
  require 'irb/completion'
54
56
  require 'pp'
55
57
  load_code
56
- connect_to_database(true)
58
+ connect_to_database true
59
+ CloudCrowd::Server # Preload server to autoload classes.
60
+ Object.send(:include, CloudCrowd)
57
61
  IRB.start
58
62
  end
59
63
 
60
64
  # `crowd server` can either 'start', 'stop', or 'restart'.
61
65
  def run_server(subcommand)
62
- ensure_config
63
66
  load_code
64
67
  subcommand ||= 'start'
65
68
  case subcommand
@@ -99,7 +102,7 @@ Options:
99
102
  # `crowd node` can either 'start', 'stop', or 'restart'.
100
103
  def run_node(subcommand)
101
104
  load_code
102
- ENV['RACK_ENV'] = @options['environment']
105
+ ENV['RACK_ENV'] = @options[:environment]
103
106
  case (subcommand || 'start')
104
107
  when 'start' then start_node
105
108
  when 'stop' then stop_node
@@ -146,6 +149,13 @@ Options:
146
149
  install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
147
150
  end
148
151
 
152
+ # Clean up all Jobs in the CloudCrowd database older than --days old.
153
+ def run_cleanup
154
+ load_code
155
+ connect_to_database(true)
156
+ Job.cleanup_all(:days => @options[:days])
157
+ end
158
+
149
159
  # Print `crowd` usage.
150
160
  def usage
151
161
  puts "\n#{@option_parser}\n"
@@ -163,6 +173,7 @@ Options:
163
173
  end
164
174
 
165
175
  # Parse all options for all commands.
176
+ # Valid options are: --config --port --environment --daemonize --days.
166
177
  def parse_options
167
178
  @options = {
168
179
  :environment => 'production',
@@ -176,12 +187,15 @@ Options:
176
187
  opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
177
188
  @options[:port] = port_num
178
189
  end
179
- opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
190
+ opts.on('-e', '--environment ENV', 'server environment (defaults to production)') do |env|
180
191
  @options[:environment] = env
181
192
  end
182
193
  opts.on('-d', '--daemonize', 'run as a background daemon') do |daemonize|
183
194
  @options[:daemonize] = daemonize
184
195
  end
196
+ opts.on('--days NUM_DAYS', 'grace period before cleanup (7 by default)') do |days|
197
+ @options[:days] = days.to_i if days.match(/\A\d+\Z/)
198
+ end
185
199
  opts.on_tail('-v', '--version', 'show version') do
186
200
  require "#{CC_ROOT}/lib/cloud-crowd"
187
201
  puts "CloudCrowd version #{VERSION}"
@@ -7,6 +7,8 @@ module CloudCrowd
7
7
  class Job < ActiveRecord::Base
8
8
  include ModelStatus
9
9
 
10
+ CLEANUP_GRACE_PERIOD = 7 # That's a week.
11
+
10
12
  has_many :work_units, :dependent => :destroy
11
13
 
12
14
  validates_presence_of :status, :inputs, :action, :options
@@ -14,6 +16,9 @@ module CloudCrowd
14
16
  before_validation_on_create :set_initial_status
15
17
  after_create :queue_for_workers
16
18
  before_destroy :cleanup_assets
19
+
20
+ # Jobs that were last updated more than N days ago.
21
+ named_scope :older_than, lambda {|num| {:conditions => ['updated_at < ?', num.days.ago]} }
17
22
 
18
23
  # Create a Job from an incoming JSON request, and add it to the queue.
19
24
  def self.create_from_request(h)
@@ -26,6 +31,14 @@ module CloudCrowd
26
31
  )
27
32
  end
28
33
 
34
+ # Clean up all jobs beyond a certain age.
35
+ def self.cleanup_all(opts = {})
36
+ days = opts[:days] || CLEANUP_GRACE_PERIOD
37
+ self.complete.older_than(days).find_in_batches(:batch_size => 100) do |jobs|
38
+ jobs.each {|job| job.destroy }
39
+ end
40
+ end
41
+
29
42
  # After work units are marked successful, we check to see if all of them have
30
43
  # finished, if so, continue on to the next phase of the job.
31
44
  def check_for_completion
@@ -42,7 +42,7 @@ module CloudCrowd
42
42
  rescue RestClient::RequestFailed => e
43
43
  raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
44
44
  update_attribute(:busy, true) && false
45
- rescue RestClient::Exception, Errno::ECONNREFUSED
45
+ rescue RestClient::Exception, Errno::ECONNREFUSED, Timeout::Error
46
46
  # Couldn't post to node, assume it's gone away.
47
47
  destroy && false
48
48
  end
@@ -14,7 +14,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
14
14
  }.to_json
15
15
  assert browser.last_response.ok?
16
16
 
17
- job = CloudCrowd::Job.last
17
+ job = Job.last
18
18
  (CloudCrowd.config[:work_unit_retries] - 1).times do
19
19
  job.work_units.each {|unit| unit.fail('failed', 10) }
20
20
  end
@@ -11,8 +11,8 @@ class ServerTest < Test::Unit::TestCase
11
11
  context "The CloudCrowd::Server (Sinatra)" do
12
12
 
13
13
  setup do
14
- CloudCrowd::Job.destroy_all
15
- 2.times { CloudCrowd::Job.make }
14
+ Job.destroy_all
15
+ 2.times { Job.make }
16
16
  end
17
17
 
18
18
  should "be able to render the Operations Center (GET /)" do
@@ -52,20 +52,20 @@ class ServerTest < Test::Unit::TestCase
52
52
  job_info = JSON.parse(last_response.body)
53
53
  assert job_info['percent_complete'] == 0
54
54
  assert job_info['work_units'] == 1
55
- assert CloudCrowd::Job.last.id == job_info['id']
55
+ assert Job.last.id == job_info['id']
56
56
  end
57
57
 
58
58
  should "be able to check in on the status of a job" do
59
- get("/jobs/#{CloudCrowd::Job.last.id}")
59
+ get("/jobs/#{Job.last.id}")
60
60
  assert last_response.ok?
61
61
  assert JSON.parse(last_response.body)['percent_complete'] == 0
62
62
  end
63
63
 
64
64
  should "be able to clean up a job when we're done with it" do
65
- id = CloudCrowd::Job.last.id
65
+ id = Job.last.id
66
66
  delete("/jobs/#{id}")
67
67
  assert last_response.successful? && last_response.empty?
68
- assert !CloudCrowd::Job.find_by_id(id)
68
+ assert !Job.find_by_id(id)
69
69
  end
70
70
 
71
71
  end
@@ -5,7 +5,7 @@ class JobTest < Test::Unit::TestCase
5
5
  context "A CloudCrowd Job" do
6
6
 
7
7
  setup do
8
- @job = CloudCrowd::Job.make
8
+ @job = Job.make
9
9
  @unit = @job.work_units.first
10
10
  end
11
11
 
@@ -25,7 +25,7 @@ class JobTest < Test::Unit::TestCase
25
25
 
26
26
  should "know its completion status" do
27
27
  assert !@job.all_work_units_complete?
28
- @unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => '{"output":"hello"}')
28
+ @unit.update_attributes(:status => SUCCEEDED, :output => '{"output":"hello"}')
29
29
  @job.check_for_completion
30
30
  assert @job.reload.all_work_units_complete?
31
31
  assert @job.percent_complete == 100
@@ -38,7 +38,7 @@ class JobTest < Test::Unit::TestCase
38
38
  end
39
39
 
40
40
  should "be able to create a job from a JSON request" do
41
- job = CloudCrowd::Job.create_from_request(JSON.parse(<<-EOS
41
+ job = Job.create_from_request(JSON.parse(<<-EOS
42
42
  { "inputs" : ["one", "two", "three"],
43
43
  "action" : "graphics_magick",
44
44
  "email" : "bob@example.com",
@@ -60,26 +60,38 @@ class JobTest < Test::Unit::TestCase
60
60
  end
61
61
 
62
62
  should "create jobs with a SPLITTING status for actions that have a split method defined" do
63
- job = CloudCrowd::Job.create_from_request({'inputs' => ['1'], 'action' => 'process_pdfs'})
63
+ job = Job.create_from_request({'inputs' => ['1'], 'action' => 'process_pdfs'})
64
64
  assert job.splittable?
65
65
  assert job.splitting?
66
66
  end
67
67
 
68
68
  should "fire a callback when a job has finished, successfully or not" do
69
69
  @job.update_attribute(:callback_url, 'http://example.com/callback')
70
- CloudCrowd::Job.any_instance.stubs(:fire_callback).returns(true)
71
- CloudCrowd::Job.any_instance.expects(:fire_callback)
70
+ Job.any_instance.stubs(:fire_callback).returns(true)
71
+ Job.any_instance.expects(:fire_callback)
72
72
  @job.work_units.first.finish('{"output":"output"}', 10)
73
73
  assert @job.all_work_units_complete?
74
74
  end
75
75
 
76
76
  should "have a 'pretty' display of the Job's status" do
77
77
  assert @job.display_status == 'processing'
78
- @job.update_attribute(:status, CloudCrowd::FAILED)
78
+ @job.update_attribute(:status, FAILED)
79
79
  assert @job.display_status == 'failed'
80
- @job.update_attribute(:status, CloudCrowd::MERGING)
80
+ @job.update_attribute(:status, MERGING)
81
81
  assert @job.display_status == 'merging'
82
82
  end
83
+
84
+ should "be able to clean up jobs that have aged beyond their use" do
85
+ count = Job.count
86
+ Job.cleanup_all
87
+ assert count == Job.count
88
+ Job.record_timestamps = false
89
+ @job.update_attributes :status => SUCCEEDED, :updated_at => 10.days.ago
90
+ Job.record_timestamps = true
91
+ Job.cleanup_all
92
+ assert count > Job.count
93
+ assert !Job.find_by_id(@job.id)
94
+ end
83
95
 
84
96
  end
85
97
 
@@ -17,14 +17,14 @@ class WorkUnitTest < Test::Unit::TestCase
17
17
 
18
18
  should "know if its done" do
19
19
  assert !@unit.complete?
20
- @unit.status = CloudCrowd::SUCCEEDED
20
+ @unit.status = SUCCEEDED
21
21
  assert @unit.complete?
22
- @unit.status = CloudCrowd::FAILED
22
+ @unit.status = FAILED
23
23
  assert @unit.complete?
24
24
  end
25
25
 
26
26
  should "have JSON that includes job attributes" do
27
- job = CloudCrowd::Job.make
27
+ job = Job.make
28
28
  unit_data = JSON.parse(job.work_units.first.to_json)
29
29
  assert unit_data['job_id'] == job.id
30
30
  assert unit_data['action'] == job.action
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-05 00:00:00 -04:00
12
+ date: 2009-10-19 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency