cloud-crowd 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,16 @@
1
- # A parallel WordCount. Depends on the 'wc' utility.
1
+ # A parallel WordCount. Depends on the 'wc' utility.
2
2
  class WordCount < CloudCrowd::Action
3
-
3
+
4
4
  # Count the words in a single book.
5
+ # Pretend that this takes longer than it really does, for demonstration purposes.
5
6
  def process
7
+ sleep 5
6
8
  (`wc -w #{input_path}`).match(/\A\s*(\d+)/)[1].to_i
7
9
  end
8
-
10
+
9
11
  # Sum the total word count.
10
12
  def merge
11
13
  input.inject(0) {|sum, count| sum + count }
12
14
  end
13
-
15
+
14
16
  end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.3.1' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-11-19'
3
+ s.version = '0.3.2' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2010-01-08'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -12,13 +12,13 @@ gem 'sinatra'
12
12
  gem 'thin'
13
13
 
14
14
  # Autoloading for all the pieces which may or may not be needed:
15
- autoload :ActiveRecord, 'activerecord'
15
+ autoload :ActiveRecord, 'active_record'
16
16
  autoload :Benchmark, 'benchmark'
17
17
  autoload :Digest, 'digest'
18
18
  autoload :ERB, 'erb'
19
19
  autoload :FileUtils, 'fileutils'
20
20
  autoload :JSON, 'json'
21
- autoload :RestClient, 'restclient'
21
+ autoload :RestClient, 'rest_client'
22
22
  autoload :RightAws, 'right_aws'
23
23
  autoload :Sinatra, 'sinatra'
24
24
  autoload :Thin, 'thin'
@@ -44,7 +44,7 @@ module CloudCrowd
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
45
 
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.3.1'
47
+ VERSION = '0.3.2'
48
48
 
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
@@ -1,25 +1,25 @@
1
1
  module CloudCrowd
2
-
2
+
3
3
  # A chunk of work that will be farmed out into many WorkUnits to be processed
4
4
  # in parallel by each active CloudCrowd::Worker. Jobs are defined by a list
5
- # of inputs (usually public urls to files), an action (the name of a script that
5
+ # of inputs (usually public urls to files), an action (the name of a script that
6
6
  # CloudCrowd knows how to run), and, eventually a corresponding list of output.
7
7
  class Job < ActiveRecord::Base
8
8
  include ModelStatus
9
-
9
+
10
10
  CLEANUP_GRACE_PERIOD = 7 # That's a week.
11
-
11
+
12
12
  has_many :work_units, :dependent => :destroy
13
-
13
+
14
14
  validates_presence_of :status, :inputs, :action, :options
15
-
15
+
16
16
  before_validation_on_create :set_initial_status
17
17
  after_create :queue_for_workers
18
18
  before_destroy :cleanup_assets
19
-
19
+
20
20
  # Jobs that were last updated more than N days ago.
21
21
  named_scope :older_than, lambda {|num| {:conditions => ['updated_at < ?', num.days.ago]} }
22
-
22
+
23
23
  # Create a Job from an incoming JSON request, and add it to the queue.
24
24
  def self.create_from_request(h)
25
25
  self.create(
@@ -30,7 +30,7 @@ module CloudCrowd
30
30
  :callback_url => h['callback_url']
31
31
  )
32
32
  end
33
-
33
+
34
34
  # Clean up all jobs beyond a certain age.
35
35
  def self.cleanup_all(opts = {})
36
36
  days = opts[:days] || CLEANUP_GRACE_PERIOD
@@ -38,12 +38,12 @@ module CloudCrowd
38
38
  jobs.each {|job| job.destroy }
39
39
  end
40
40
  end
41
-
41
+
42
42
  # After work units are marked successful, we check to see if all of them have
43
- # finished, if so, continue on to the next phase of the job.
43
+ # finished, if so, continue on to the next phase of the job.
44
44
  def check_for_completion
45
45
  return unless all_work_units_complete?
46
- set_next_status
46
+ set_next_status
47
47
  outs = gather_outputs_from_work_units
48
48
  return queue_for_workers([outs]) if merging?
49
49
  if complete?
@@ -52,7 +52,7 @@ module CloudCrowd
52
52
  end
53
53
  self
54
54
  end
55
-
55
+
56
56
  # Transition this Job's current status to the appropriate next one, based
57
57
  # on the state of the WorkUnits and the nature of the Action.
58
58
  def set_next_status
@@ -63,12 +63,12 @@ module CloudCrowd
63
63
  SUCCEEDED
64
64
  )
65
65
  end
66
-
67
- # If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
66
+
67
+ # If a <tt>callback_url</tt> is defined, post the Job's JSON to it upon
68
68
  # completion. The <tt>callback_url</tt> may include HTTP basic authentication,
69
69
  # if you like:
70
70
  # http://user:password@example.com/job_complete
71
- # If the callback URL returns a '201 Created' HTTP status code, CloudCrowd
71
+ # If the callback URL returns a '201 Created' HTTP status code, CloudCrowd
72
72
  # will assume that the resource has been successfully created, and the Job
73
73
  # will be cleaned up.
74
74
  def fire_callback
@@ -76,54 +76,54 @@ module CloudCrowd
76
76
  response = RestClient.post(callback_url, {:job => self.to_json})
77
77
  Thread.new { self.destroy } if response && response.code == 201
78
78
  rescue RestClient::Exception => e
79
- puts "Failed to fire job callback. Hmmm, what should happen here?"
79
+ puts "Job ##{id} failed to fire callback: #{callback_url}"
80
80
  end
81
81
  end
82
-
82
+
83
83
  # Cleaning up after a job will remove all of its files from S3 or the
84
- # filesystem. Destroying a Job will cleanup_assets first. Run this in a
84
+ # filesystem. Destroying a Job will cleanup_assets first. Run this in a
85
85
  # separate thread to get out of the transaction's way.
86
86
  # TODO: Convert this into a 'cleanup' work unit that gets run by a worker.
87
87
  def cleanup_assets
88
88
  AssetStore.new.cleanup(self)
89
89
  end
90
-
91
- # Have all of the WorkUnits finished?
90
+
91
+ # Have all of the WorkUnits finished?
92
92
  def all_work_units_complete?
93
93
  self.work_units.incomplete.count <= 0
94
94
  end
95
-
95
+
96
96
  # Have any of the WorkUnits failed?
97
97
  def any_work_units_failed?
98
98
  self.work_units.failed.count > 0
99
99
  end
100
-
100
+
101
101
  # This job is splittable if its Action has a +split+ method.
102
102
  def splittable?
103
103
  self.action_class.public_instance_methods.map {|m| m.to_sym }.include? :split
104
104
  end
105
-
105
+
106
106
  # This job is done splitting if it's finished with its splitting work units.
107
107
  def done_splitting?
108
108
  splittable? && work_units.splitting.count <= 0
109
109
  end
110
-
110
+
111
111
  # This job is mergeable if its Action has a +merge+ method.
112
112
  def mergeable?
113
113
  self.processing? && self.action_class.public_instance_methods.map {|m| m.to_sym }.include?(:merge)
114
114
  end
115
-
115
+
116
116
  # Retrieve the class for this Job's Action.
117
117
  def action_class
118
118
  @action_class ||= CloudCrowd.actions[self.action]
119
119
  return @action_class if @action_class
120
120
  raise Error::ActionNotFound, "no action named: '#{self.action}' could be found"
121
121
  end
122
-
122
+
123
123
  # How complete is this Job?
124
124
  # Unfortunately, with the current processing sequence, the percent_complete
125
125
  # can pull a fast one and go backwards. This happens when there's a single
126
- # large input that takes a long time to split, and when it finally does it
126
+ # large input that takes a long time to split, and when it finally does it
127
127
  # creates a whole swarm of work units. This seems unavoidable.
128
128
  def percent_complete
129
129
  return 99 if merging?
@@ -132,25 +132,25 @@ module CloudCrowd
132
132
  return 100 if unit_count <= 0
133
133
  (work_units.complete.count / unit_count.to_f * 100).round
134
134
  end
135
-
135
+
136
136
  # How long has this Job taken?
137
137
  def time_taken
138
138
  return self.time if self.time
139
139
  Time.now - self.created_at
140
140
  end
141
-
141
+
142
142
  # Generate a stable 8-bit Hex color code, based on the Job's id.
143
143
  def color
144
144
  @color ||= Digest::MD5.hexdigest(self.id.to_s)[-7...-1]
145
145
  end
146
-
146
+
147
147
  # A JSON representation of this job includes the statuses of its component
148
148
  # WorkUnits, as well as any completed outputs.
149
149
  def to_json(opts={})
150
150
  atts = {
151
151
  'id' => id,
152
152
  'color' => color,
153
- 'status' => display_status,
153
+ 'status' => display_status,
154
154
  'percent_complete' => percent_complete,
155
155
  'work_units' => work_units.count,
156
156
  'time_taken' => time_taken
@@ -159,10 +159,10 @@ module CloudCrowd
159
159
  atts['email'] = email if email
160
160
  atts.to_json
161
161
  end
162
-
163
-
162
+
163
+
164
164
  private
165
-
165
+
166
166
  # When the WorkUnits are all finished, gather all their outputs together
167
167
  # before removing them from the database entirely. Returns their merged JSON.
168
168
  def gather_outputs_from_work_units
@@ -171,20 +171,20 @@ module CloudCrowd
171
171
  self.work_units.complete.destroy_all
172
172
  outs.to_json
173
173
  end
174
-
175
- # When starting a new job, or moving to a new stage, split up the inputs
174
+
175
+ # When starting a new job, or moving to a new stage, split up the inputs
176
176
  # into WorkUnits, and queue them. Workers will start picking them up right
177
177
  # away.
178
178
  def queue_for_workers(input=nil)
179
179
  input ||= JSON.parse(self.inputs)
180
- input.each {|i| WorkUnit.start(self, action, i, status) }
180
+ input.each {|i| WorkUnit.start(self, action, i, status) }
181
181
  self
182
182
  end
183
-
183
+
184
184
  # A Job starts out either splitting or processing, depending on its action.
185
185
  def set_initial_status
186
186
  self.status = self.splittable? ? SPLITTING : PROCESSING
187
187
  end
188
-
188
+
189
189
  end
190
190
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-19 00:00:00 -05:00
12
+ date: 2010-01-08 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency