cloud-crowd 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.3.0' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-11-06'
3
+ s.version = '0.3.1' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-11-19'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -12,21 +12,21 @@ Gem::Specification.new do |s|
12
12
  everywhere is black with people and more come streaming from all sides as though
13
13
  streets had only one direction.
14
14
  EOS
15
-
15
+
16
16
  s.authors = ['Jeremy Ashkenas']
17
17
  s.email = 'jeremy@documentcloud.org'
18
18
  s.rubyforge_project = 'cloud-crowd'
19
-
19
+
20
20
  s.require_paths = ['lib']
21
21
  s.executables = ['crowd']
22
-
22
+
23
23
  s.has_rdoc = true
24
24
  s.extra_rdoc_files = ['README']
25
25
  s.rdoc_options << '--title' << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
26
26
  '--exclude' << 'test' <<
27
27
  '--main' << 'README' <<
28
28
  '--all'
29
-
29
+
30
30
  s.add_dependency 'sinatra', ['>= 0.9.4']
31
31
  s.add_dependency 'activerecord', ['>= 2.3.3']
32
32
  s.add_dependency 'json', ['>= 1.1.7']
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
41
41
  s.add_development_dependency 'rack-test', ['>= 0.4.1']
42
42
  s.add_development_dependency 'mocha', ['>= 0.9.7']
43
43
  end
44
-
44
+
45
45
  s.files = %w(
46
46
  actions/graphics_magick.rb
47
47
  actions/process_pdfs.rb
@@ -15,8 +15,8 @@
15
15
  require 'rubygems'
16
16
  require 'cloud-crowd'
17
17
 
18
- CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
19
- CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
18
+ CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
19
+ CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
20
20
 
21
21
  map '/' do
22
22
  run CloudCrowd::Server
@@ -29,7 +29,7 @@ require 'socket'
29
29
  require 'cloud_crowd/exceptions'
30
30
 
31
31
  module CloudCrowd
32
-
32
+
33
33
  # Autoload all the CloudCrowd internals.
34
34
  autoload :Action, 'cloud_crowd/action'
35
35
  autoload :AssetStore, 'cloud_crowd/asset_store'
@@ -42,53 +42,53 @@ module CloudCrowd
42
42
  autoload :Server, 'cloud_crowd/server'
43
43
  autoload :Worker, 'cloud_crowd/worker'
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
-
45
+
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.3.0'
48
-
47
+ VERSION = '0.3.1'
48
+
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
51
-
51
+
52
52
  # Root directory of the CloudCrowd gem.
53
53
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
54
-
54
+
55
55
  # Default folder to log daemonized servers and nodes into.
56
56
  LOG_PATH = 'log'
57
-
57
+
58
58
  # Default folder to contain the pids of daemonized servers and nodes.
59
59
  PID_PATH = 'tmp/pids'
60
-
60
+
61
61
  # A Job is processing if its WorkUnits are in the queue to be handled by nodes.
62
62
  PROCESSING = 1
63
-
63
+
64
64
  # A Job has succeeded if all of its WorkUnits have finished successfully.
65
65
  SUCCEEDED = 2
66
-
66
+
67
67
  # A Job has failed if even a single one of its WorkUnits has failed (they may
68
68
  # be attempted multiple times on failure, however).
69
69
  FAILED = 3
70
-
70
+
71
71
  # A Job is splitting if it's in the process of dividing its inputs up into
72
72
  # multiple WorkUnits.
73
73
  SPLITTING = 4
74
-
74
+
75
75
  # A Job is merging if it's busy collecting all of its successful WorkUnits
76
76
  # back together into the final result.
77
77
  MERGING = 5
78
-
78
+
79
79
  # A Job is considered to be complete if it succeeded or if it failed.
80
80
  COMPLETE = [SUCCEEDED, FAILED]
81
-
81
+
82
82
  # A Job is considered incomplete if it's being processed, split up or merged.
83
83
  INCOMPLETE = [PROCESSING, SPLITTING, MERGING]
84
-
84
+
85
85
  # Mapping of statuses to their display strings.
86
86
  DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
87
-
87
+
88
88
  class << self
89
89
  attr_reader :config
90
90
  attr_accessor :identity
91
-
91
+
92
92
  # Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
93
93
  def configure(config_path)
94
94
  @config_path = File.expand_path(File.dirname(config_path))
@@ -96,7 +96,7 @@ module CloudCrowd
96
96
  end
97
97
 
98
98
  # Configure the CloudCrowd central database (and connect to it), by passing
99
- # in a path to <tt>database.yml</tt>. The file should use the standard
99
+ # in a path to <tt>database.yml</tt>. The file should use the standard
100
100
  # ActiveRecord connection format.
101
101
  def configure_database(config_path, validate_schema=true)
102
102
  configuration = YAML.load_file(config_path)
@@ -108,25 +108,25 @@ module CloudCrowd
108
108
  exit
109
109
  end
110
110
  end
111
-
112
- # Get a reference to the central server, including authentication if
111
+
112
+ # Get a reference to the central server, including authentication if
113
113
  # configured.
114
114
  def central_server
115
115
  @central_server ||= RestClient::Resource.new(CloudCrowd.config[:central_server], CloudCrowd.client_options)
116
116
  end
117
-
117
+
118
118
  # The path that daemonized servers and nodes will log to.
119
119
  def log_path(log_file=nil)
120
120
  @log_path ||= config[:log_path] || LOG_PATH
121
121
  log_file ? File.join(@log_path, log_file) : @log_path
122
122
  end
123
-
123
+
124
124
  # The path in which daemonized servers and nodes will store their pids.
125
125
  def pid_path(pid_file=nil)
126
126
  @pid_path ||= config[:pid_path] || PID_PATH
127
127
  pid_file ? File.join(@pid_path, pid_file) : @pid_path
128
128
  end
129
-
129
+
130
130
  # The standard RestClient options for the central server talking to nodes,
131
131
  # as well as the other way around. There's a timeout of 5 seconds to open
132
132
  # a connection, and a timeout of 30 to finish reading it.
@@ -145,11 +145,11 @@ module CloudCrowd
145
145
  def display_status(status)
146
146
  DISPLAY_STATUS_MAP[status] || 'unknown'
147
147
  end
148
-
148
+
149
149
  # CloudCrowd::Actions are requested dynamically by name. Access them through
150
150
  # this actions property, which behaves like a hash. At load time, we
151
151
  # load all installed Actions and CloudCrowd's default Actions into it.
152
- # If you wish to have certain nodes be specialized to only handle certain
152
+ # If you wish to have certain nodes be specialized to only handle certain
153
153
  # Actions, then install only those into the actions directory.
154
154
  def actions
155
155
  return @actions if @actions
@@ -160,10 +160,10 @@ module CloudCrowd
160
160
  memo
161
161
  end
162
162
  rescue NameError => e
163
- adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
163
+ adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
164
164
  raise NameError.new(adjusted_message, e.name)
165
165
  end
166
-
166
+
167
167
  # Retrieve the list of every installed Action for this node or server.
168
168
  def action_paths
169
169
  default_actions = Dir["#{ROOT}/actions/*.rb"]
@@ -171,18 +171,18 @@ module CloudCrowd
171
171
  custom_actions = CloudCrowd.config[:actions_path] ? Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] : []
172
172
  default_actions + installed_actions + custom_actions
173
173
  end
174
-
174
+
175
175
  # Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
176
176
  # code from actions.
177
177
  def server?
178
178
  @identity == :server
179
179
  end
180
-
180
+
181
181
  # Or is it a node?
182
182
  def node?
183
183
  @identity == :node
184
184
  end
185
-
185
+
186
186
  end
187
-
187
+
188
188
  end
@@ -1,21 +1,21 @@
1
1
  module CloudCrowd
2
-
3
- # The Worker, forked off from the Node when a new WorkUnit is received,
2
+
3
+ # The Worker, forked off from the Node when a new WorkUnit is received,
4
4
  # launches an Action for processing. Workers will only ever receive WorkUnits
5
- # that they are able to handle (for which they have a corresponding action in
6
- # their actions directory). If communication with the central server is
7
- # interrupted, the Worker will repeatedly attempt to complete its unit --
8
- # every Worker::RETRY_WAIT seconds. Any exceptions that take place during
9
- # the course of the Action will cause the Worker to mark the WorkUnit as
5
+ # that they are able to handle (for which they have a corresponding action in
6
+ # their actions directory). If communication with the central server is
7
+ # interrupted, the Worker will repeatedly attempt to complete its unit --
8
+ # every Worker::RETRY_WAIT seconds. Any exceptions that take place during
9
+ # the course of the Action will cause the Worker to mark the WorkUnit as
10
10
  # having failed. When finished, the Worker's process exits, minimizing the
11
11
  # potential for memory leaks.
12
12
  class Worker
13
-
13
+
14
14
  # Wait five seconds to retry, after internal communcication errors.
15
15
  RETRY_WAIT = 5
16
-
16
+
17
17
  attr_reader :pid, :node, :unit, :status
18
-
18
+
19
19
  # A new Worker customizes itself to its WorkUnit at instantiation.
20
20
  def initialize(node, unit)
21
21
  @start_time = Time.now
@@ -25,7 +25,7 @@ module CloudCrowd
25
25
  @status = @unit['status']
26
26
  @retry_wait = RETRY_WAIT
27
27
  end
28
-
28
+
29
29
  # Return output to the central server, marking the WorkUnit done.
30
30
  def complete_work_unit(result)
31
31
  keep_trying_to "complete work unit" do
@@ -34,7 +34,7 @@ module CloudCrowd
34
34
  log "finished #{display_work_unit} in #{data[:time]} seconds"
35
35
  end
36
36
  end
37
-
37
+
38
38
  # Mark the WorkUnit failed, returning the exception to central.
39
39
  def fail_work_unit(exception)
40
40
  keep_trying_to "mark work unit as failed" do
@@ -43,9 +43,9 @@ module CloudCrowd
43
43
  log "failed #{display_work_unit} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
44
44
  end
45
45
  end
46
-
46
+
47
47
  # We expect and require internal communication between the central server
48
- # and the workers to succeed. If it fails for any reason, log it, and then
48
+ # and the workers to succeed. If it fails for any reason, log it, and then
49
49
  # keep trying the same request.
50
50
  def keep_trying_to(title)
51
51
  begin
@@ -60,13 +60,13 @@ module CloudCrowd
60
60
  retry
61
61
  end
62
62
  end
63
-
63
+
64
64
  # Loggable details describing what the Worker is up to.
65
65
  def display_work_unit
66
66
  "unit ##{@unit['id']} (#{@unit['action']}/#{CloudCrowd.display_status(@status)})"
67
67
  end
68
-
69
- # Executes the WorkUnit by running the Action, catching all exceptions as
68
+
69
+ # Executes the WorkUnit by running the Action, catching all exceptions as
70
70
  # failures. We capture the thread so that we can kill it from the outside,
71
71
  # when exiting.
72
72
  def run_work_unit
@@ -82,14 +82,14 @@ module CloudCrowd
82
82
  else raise Error::StatusUnspecified, "work units must specify their status"
83
83
  end
84
84
  end
85
+ action.cleanup_work_directory if action
85
86
  complete_work_unit({'output' => result}.to_json)
86
87
  rescue Exception => e
87
- fail_work_unit(e)
88
- ensure
89
88
  action.cleanup_work_directory if action
89
+ fail_work_unit(e)
90
90
  end
91
91
  end
92
-
92
+
93
93
  # Run this worker inside of a fork. Attempts to exit cleanly.
94
94
  # Wraps run_work_unit to benchmark the execution time, if requested.
95
95
  def run
@@ -102,39 +102,39 @@ module CloudCrowd
102
102
  end
103
103
  Process.exit!
104
104
  end
105
-
106
- # There are some potentially important attributes of the WorkUnit that we'd
107
- # like to pass into the Action -- in case it needs to know them. They will
105
+
106
+ # There are some potentially important attributes of the WorkUnit that we'd
107
+ # like to pass into the Action -- in case it needs to know them. They will
108
108
  # always be made available in the options hash.
109
109
  def enhanced_unit_options
110
110
  @unit['options'].merge({
111
111
  'job_id' => @unit['job_id'],
112
112
  'work_unit_id' => @unit['id'],
113
- 'attempts' => @unit['attempts']
113
+ 'attempts' => @unit['attempts']
114
114
  })
115
115
  end
116
-
116
+
117
117
  # How long has this worker been running for?
118
118
  def time_taken
119
119
  Time.now - @start_time
120
120
  end
121
-
122
-
121
+
122
+
123
123
  private
124
-
125
- # Common parameters to send back to central upon unit completion,
124
+
125
+ # Common parameters to send back to central upon unit completion,
126
126
  # regardless of success or failure.
127
127
  def base_params
128
128
  { :pid => @pid,
129
- :id => @unit['id'],
129
+ :id => @unit['id'],
130
130
  :time => time_taken }
131
131
  end
132
-
132
+
133
133
  # Log a message to the daemon log. Includes PID for identification.
134
134
  def log(message)
135
135
  puts "Worker ##{@pid}: #{message}" unless ENV['RACK_ENV'] == 'test'
136
136
  end
137
-
137
+
138
138
  # When signaled to exit, make sure that the Worker shuts down without firing
139
139
  # the Node's at_exit callbacks.
140
140
  def trap_signals
@@ -143,7 +143,7 @@ module CloudCrowd
143
143
  Signal.trap('KILL') { Process.exit! }
144
144
  Signal.trap('TERM') { Process.exit! }
145
145
  end
146
-
146
+
147
147
  end
148
-
148
+
149
149
  end
@@ -9,8 +9,8 @@
9
9
  require 'rubygems'
10
10
  require 'cloud-crowd'
11
11
 
12
- CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
13
- CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
12
+ CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
13
+ CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
14
14
 
15
15
  map '/' do
16
16
  run CloudCrowd::Server
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-06 00:00:00 -05:00
12
+ date: 2009-11-19 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency