cloud-crowd 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.2.6' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-10-05'
3
+ s.version = '0.2.7' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-10-19'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -13,4 +13,4 @@
13
13
  # will do nicely:
14
14
  #
15
15
  # :adapter: sqlite3
16
- # :database: cloud_crowd.db
16
+ # :database: cloud_crowd.db
@@ -44,7 +44,7 @@ module CloudCrowd
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
45
 
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.2.6'
47
+ VERSION = '0.2.7'
48
48
 
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
@@ -24,6 +24,7 @@ Commands:
24
24
  node Start up a worker node (only one node per machine, please)
25
25
  console Launch a CloudCrowd console, connected to the central database
26
26
  load_schema Load the schema into the database specified by database.yml
27
+ cleanup Removes jobs that were finished over --days (7 by default) ago
27
28
 
28
29
  server -d [start | stop | restart] Servers and nodes can be launched as
29
30
  node -d [start | stop | restart] daemons, then stopped or restarted.
@@ -42,6 +43,7 @@ Options:
42
43
  when 'node' then run_node(subcommand)
43
44
  when 'load_schema' then run_load_schema
44
45
  when 'install' then run_install(subcommand)
46
+ when 'cleanup' then run_cleanup
45
47
  else usage
46
48
  end
47
49
  end
@@ -53,13 +55,14 @@ Options:
53
55
  require 'irb/completion'
54
56
  require 'pp'
55
57
  load_code
56
- connect_to_database(true)
58
+ connect_to_database true
59
+ CloudCrowd::Server # Preload server to autoload classes.
60
+ Object.send(:include, CloudCrowd)
57
61
  IRB.start
58
62
  end
59
63
 
60
64
  # `crowd server` can either 'start', 'stop', or 'restart'.
61
65
  def run_server(subcommand)
62
- ensure_config
63
66
  load_code
64
67
  subcommand ||= 'start'
65
68
  case subcommand
@@ -99,7 +102,7 @@ Options:
99
102
  # `crowd node` can either 'start', 'stop', or 'restart'.
100
103
  def run_node(subcommand)
101
104
  load_code
102
- ENV['RACK_ENV'] = @options['environment']
105
+ ENV['RACK_ENV'] = @options[:environment]
103
106
  case (subcommand || 'start')
104
107
  when 'start' then start_node
105
108
  when 'stop' then stop_node
@@ -146,6 +149,13 @@ Options:
146
149
  install_file "#{CC_ROOT}/actions", "#{install_path}/actions", true
147
150
  end
148
151
 
152
+ # Clean up all Jobs in the CloudCrowd database older than --days old.
153
+ def run_cleanup
154
+ load_code
155
+ connect_to_database(true)
156
+ Job.cleanup_all(:days => @options[:days])
157
+ end
158
+
149
159
  # Print `crowd` usage.
150
160
  def usage
151
161
  puts "\n#{@option_parser}\n"
@@ -163,6 +173,7 @@ Options:
163
173
  end
164
174
 
165
175
  # Parse all options for all commands.
176
+ # Valid options are: --config --port --environment --daemonize --days.
166
177
  def parse_options
167
178
  @options = {
168
179
  :environment => 'production',
@@ -176,12 +187,15 @@ Options:
176
187
  opts.on('-p', '--port PORT', 'port number for server (central or node)') do |port_num|
177
188
  @options[:port] = port_num
178
189
  end
179
- opts.on('-e', '--environment ENV', 'server environment (sinatra)') do |env|
190
+ opts.on('-e', '--environment ENV', 'server environment (defaults to production)') do |env|
180
191
  @options[:environment] = env
181
192
  end
182
193
  opts.on('-d', '--daemonize', 'run as a background daemon') do |daemonize|
183
194
  @options[:daemonize] = daemonize
184
195
  end
196
+ opts.on('--days NUM_DAYS', 'grace period before cleanup (7 by default)') do |days|
197
+ @options[:days] = days.to_i if days.match(/\A\d+\Z/)
198
+ end
185
199
  opts.on_tail('-v', '--version', 'show version') do
186
200
  require "#{CC_ROOT}/lib/cloud-crowd"
187
201
  puts "CloudCrowd version #{VERSION}"
@@ -7,6 +7,8 @@ module CloudCrowd
7
7
  class Job < ActiveRecord::Base
8
8
  include ModelStatus
9
9
 
10
+ CLEANUP_GRACE_PERIOD = 7 # That's a week.
11
+
10
12
  has_many :work_units, :dependent => :destroy
11
13
 
12
14
  validates_presence_of :status, :inputs, :action, :options
@@ -14,6 +16,9 @@ module CloudCrowd
14
16
  before_validation_on_create :set_initial_status
15
17
  after_create :queue_for_workers
16
18
  before_destroy :cleanup_assets
19
+
20
+ # Jobs that were last updated more than N days ago.
21
+ named_scope :older_than, lambda {|num| {:conditions => ['updated_at < ?', num.days.ago]} }
17
22
 
18
23
  # Create a Job from an incoming JSON request, and add it to the queue.
19
24
  def self.create_from_request(h)
@@ -26,6 +31,14 @@ module CloudCrowd
26
31
  )
27
32
  end
28
33
 
34
+ # Clean up all jobs beyond a certain age.
35
+ def self.cleanup_all(opts = {})
36
+ days = opts[:days] || CLEANUP_GRACE_PERIOD
37
+ self.complete.older_than(days).find_in_batches(:batch_size => 100) do |jobs|
38
+ jobs.each {|job| job.destroy }
39
+ end
40
+ end
41
+
29
42
  # After work units are marked successful, we check to see if all of them have
30
43
  # finished, if so, continue on to the next phase of the job.
31
44
  def check_for_completion
@@ -42,7 +42,7 @@ module CloudCrowd
42
42
  rescue RestClient::RequestFailed => e
43
43
  raise e unless e.http_code == 503 && e.http_body == Node::OVERLOADED_MESSAGE
44
44
  update_attribute(:busy, true) && false
45
- rescue RestClient::Exception, Errno::ECONNREFUSED
45
+ rescue RestClient::Exception, Errno::ECONNREFUSED, Timeout::Error
46
46
  # Couldn't post to node, assume it's gone away.
47
47
  destroy && false
48
48
  end
@@ -14,7 +14,7 @@ class FailingWorkUnitsTest < Test::Unit::TestCase
14
14
  }.to_json
15
15
  assert browser.last_response.ok?
16
16
 
17
- job = CloudCrowd::Job.last
17
+ job = Job.last
18
18
  (CloudCrowd.config[:work_unit_retries] - 1).times do
19
19
  job.work_units.each {|unit| unit.fail('failed', 10) }
20
20
  end
@@ -11,8 +11,8 @@ class ServerTest < Test::Unit::TestCase
11
11
  context "The CloudCrowd::Server (Sinatra)" do
12
12
 
13
13
  setup do
14
- CloudCrowd::Job.destroy_all
15
- 2.times { CloudCrowd::Job.make }
14
+ Job.destroy_all
15
+ 2.times { Job.make }
16
16
  end
17
17
 
18
18
  should "be able to render the Operations Center (GET /)" do
@@ -52,20 +52,20 @@ class ServerTest < Test::Unit::TestCase
52
52
  job_info = JSON.parse(last_response.body)
53
53
  assert job_info['percent_complete'] == 0
54
54
  assert job_info['work_units'] == 1
55
- assert CloudCrowd::Job.last.id == job_info['id']
55
+ assert Job.last.id == job_info['id']
56
56
  end
57
57
 
58
58
  should "be able to check in on the status of a job" do
59
- get("/jobs/#{CloudCrowd::Job.last.id}")
59
+ get("/jobs/#{Job.last.id}")
60
60
  assert last_response.ok?
61
61
  assert JSON.parse(last_response.body)['percent_complete'] == 0
62
62
  end
63
63
 
64
64
  should "be able to clean up a job when we're done with it" do
65
- id = CloudCrowd::Job.last.id
65
+ id = Job.last.id
66
66
  delete("/jobs/#{id}")
67
67
  assert last_response.successful? && last_response.empty?
68
- assert !CloudCrowd::Job.find_by_id(id)
68
+ assert !Job.find_by_id(id)
69
69
  end
70
70
 
71
71
  end
@@ -5,7 +5,7 @@ class JobTest < Test::Unit::TestCase
5
5
  context "A CloudCrowd Job" do
6
6
 
7
7
  setup do
8
- @job = CloudCrowd::Job.make
8
+ @job = Job.make
9
9
  @unit = @job.work_units.first
10
10
  end
11
11
 
@@ -25,7 +25,7 @@ class JobTest < Test::Unit::TestCase
25
25
 
26
26
  should "know its completion status" do
27
27
  assert !@job.all_work_units_complete?
28
- @unit.update_attributes(:status => CloudCrowd::SUCCEEDED, :output => '{"output":"hello"}')
28
+ @unit.update_attributes(:status => SUCCEEDED, :output => '{"output":"hello"}')
29
29
  @job.check_for_completion
30
30
  assert @job.reload.all_work_units_complete?
31
31
  assert @job.percent_complete == 100
@@ -38,7 +38,7 @@ class JobTest < Test::Unit::TestCase
38
38
  end
39
39
 
40
40
  should "be able to create a job from a JSON request" do
41
- job = CloudCrowd::Job.create_from_request(JSON.parse(<<-EOS
41
+ job = Job.create_from_request(JSON.parse(<<-EOS
42
42
  { "inputs" : ["one", "two", "three"],
43
43
  "action" : "graphics_magick",
44
44
  "email" : "bob@example.com",
@@ -60,26 +60,38 @@ class JobTest < Test::Unit::TestCase
60
60
  end
61
61
 
62
62
  should "create jobs with a SPLITTING status for actions that have a split method defined" do
63
- job = CloudCrowd::Job.create_from_request({'inputs' => ['1'], 'action' => 'process_pdfs'})
63
+ job = Job.create_from_request({'inputs' => ['1'], 'action' => 'process_pdfs'})
64
64
  assert job.splittable?
65
65
  assert job.splitting?
66
66
  end
67
67
 
68
68
  should "fire a callback when a job has finished, successfully or not" do
69
69
  @job.update_attribute(:callback_url, 'http://example.com/callback')
70
- CloudCrowd::Job.any_instance.stubs(:fire_callback).returns(true)
71
- CloudCrowd::Job.any_instance.expects(:fire_callback)
70
+ Job.any_instance.stubs(:fire_callback).returns(true)
71
+ Job.any_instance.expects(:fire_callback)
72
72
  @job.work_units.first.finish('{"output":"output"}', 10)
73
73
  assert @job.all_work_units_complete?
74
74
  end
75
75
 
76
76
  should "have a 'pretty' display of the Job's status" do
77
77
  assert @job.display_status == 'processing'
78
- @job.update_attribute(:status, CloudCrowd::FAILED)
78
+ @job.update_attribute(:status, FAILED)
79
79
  assert @job.display_status == 'failed'
80
- @job.update_attribute(:status, CloudCrowd::MERGING)
80
+ @job.update_attribute(:status, MERGING)
81
81
  assert @job.display_status == 'merging'
82
82
  end
83
+
84
+ should "be able to clean up jobs that have aged beyond their use" do
85
+ count = Job.count
86
+ Job.cleanup_all
87
+ assert count == Job.count
88
+ Job.record_timestamps = false
89
+ @job.update_attributes :status => SUCCEEDED, :updated_at => 10.days.ago
90
+ Job.record_timestamps = true
91
+ Job.cleanup_all
92
+ assert count > Job.count
93
+ assert !Job.find_by_id(@job.id)
94
+ end
83
95
 
84
96
  end
85
97
 
@@ -17,14 +17,14 @@ class WorkUnitTest < Test::Unit::TestCase
17
17
 
18
18
  should "know if its done" do
19
19
  assert !@unit.complete?
20
- @unit.status = CloudCrowd::SUCCEEDED
20
+ @unit.status = SUCCEEDED
21
21
  assert @unit.complete?
22
- @unit.status = CloudCrowd::FAILED
22
+ @unit.status = FAILED
23
23
  assert @unit.complete?
24
24
  end
25
25
 
26
26
  should "have JSON that includes job attributes" do
27
- job = CloudCrowd::Job.make
27
+ job = Job.make
28
28
  unit_data = JSON.parse(job.work_units.first.to_json)
29
29
  assert unit_data['job_id'] == job.id
30
30
  assert unit_data['action'] == job.action
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-05 00:00:00 -04:00
12
+ date: 2009-10-19 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency