cloud-crowd 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'cloud-crowd'
3
- s.version = '0.3.0' # Keep version in sync with cloud-cloud.rb
4
- s.date = '2009-11-06'
3
+ s.version = '0.3.1' # Keep version in sync with cloud-cloud.rb
4
+ s.date = '2009-11-19'
5
5
 
6
6
  s.homepage = "http://wiki.github.com/documentcloud/cloud-crowd"
7
7
  s.summary = "Parallel Processing for the Rest of Us"
@@ -12,21 +12,21 @@ Gem::Specification.new do |s|
12
12
  everywhere is black with people and more come streaming from all sides as though
13
13
  streets had only one direction.
14
14
  EOS
15
-
15
+
16
16
  s.authors = ['Jeremy Ashkenas']
17
17
  s.email = 'jeremy@documentcloud.org'
18
18
  s.rubyforge_project = 'cloud-crowd'
19
-
19
+
20
20
  s.require_paths = ['lib']
21
21
  s.executables = ['crowd']
22
-
22
+
23
23
  s.has_rdoc = true
24
24
  s.extra_rdoc_files = ['README']
25
25
  s.rdoc_options << '--title' << 'CloudCrowd | Parallel Processing for the Rest of Us' <<
26
26
  '--exclude' << 'test' <<
27
27
  '--main' << 'README' <<
28
28
  '--all'
29
-
29
+
30
30
  s.add_dependency 'sinatra', ['>= 0.9.4']
31
31
  s.add_dependency 'activerecord', ['>= 2.3.3']
32
32
  s.add_dependency 'json', ['>= 1.1.7']
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
41
41
  s.add_development_dependency 'rack-test', ['>= 0.4.1']
42
42
  s.add_development_dependency 'mocha', ['>= 0.9.7']
43
43
  end
44
-
44
+
45
45
  s.files = %w(
46
46
  actions/graphics_magick.rb
47
47
  actions/process_pdfs.rb
@@ -15,8 +15,8 @@
15
15
  require 'rubygems'
16
16
  require 'cloud-crowd'
17
17
 
18
- CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
19
- CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
18
+ CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
19
+ CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
20
20
 
21
21
  map '/' do
22
22
  run CloudCrowd::Server
@@ -29,7 +29,7 @@ require 'socket'
29
29
  require 'cloud_crowd/exceptions'
30
30
 
31
31
  module CloudCrowd
32
-
32
+
33
33
  # Autoload all the CloudCrowd internals.
34
34
  autoload :Action, 'cloud_crowd/action'
35
35
  autoload :AssetStore, 'cloud_crowd/asset_store'
@@ -42,53 +42,53 @@ module CloudCrowd
42
42
  autoload :Server, 'cloud_crowd/server'
43
43
  autoload :Worker, 'cloud_crowd/worker'
44
44
  autoload :WorkUnit, 'cloud_crowd/models'
45
-
45
+
46
46
  # Keep this version in sync with the gemspec.
47
- VERSION = '0.3.0'
48
-
47
+ VERSION = '0.3.1'
48
+
49
49
  # Increment the schema version when there's a backwards incompatible change.
50
50
  SCHEMA_VERSION = 3
51
-
51
+
52
52
  # Root directory of the CloudCrowd gem.
53
53
  ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
54
-
54
+
55
55
  # Default folder to log daemonized servers and nodes into.
56
56
  LOG_PATH = 'log'
57
-
57
+
58
58
  # Default folder to contain the pids of daemonized servers and nodes.
59
59
  PID_PATH = 'tmp/pids'
60
-
60
+
61
61
  # A Job is processing if its WorkUnits are in the queue to be handled by nodes.
62
62
  PROCESSING = 1
63
-
63
+
64
64
  # A Job has succeeded if all of its WorkUnits have finished successfully.
65
65
  SUCCEEDED = 2
66
-
66
+
67
67
  # A Job has failed if even a single one of its WorkUnits has failed (they may
68
68
  # be attempted multiple times on failure, however).
69
69
  FAILED = 3
70
-
70
+
71
71
  # A Job is splitting if it's in the process of dividing its inputs up into
72
72
  # multiple WorkUnits.
73
73
  SPLITTING = 4
74
-
74
+
75
75
  # A Job is merging if it's busy collecting all of its successful WorkUnits
76
76
  # back together into the final result.
77
77
  MERGING = 5
78
-
78
+
79
79
  # A Job is considered to be complete if it succeeded or if it failed.
80
80
  COMPLETE = [SUCCEEDED, FAILED]
81
-
81
+
82
82
  # A Job is considered incomplete if it's being processed, split up or merged.
83
83
  INCOMPLETE = [PROCESSING, SPLITTING, MERGING]
84
-
84
+
85
85
  # Mapping of statuses to their display strings.
86
86
  DISPLAY_STATUS_MAP = ['unknown', 'processing', 'succeeded', 'failed', 'splitting', 'merging']
87
-
87
+
88
88
  class << self
89
89
  attr_reader :config
90
90
  attr_accessor :identity
91
-
91
+
92
92
  # Configure CloudCrowd by passing in the path to <tt>config.yml</tt>.
93
93
  def configure(config_path)
94
94
  @config_path = File.expand_path(File.dirname(config_path))
@@ -96,7 +96,7 @@ module CloudCrowd
96
96
  end
97
97
 
98
98
  # Configure the CloudCrowd central database (and connect to it), by passing
99
- # in a path to <tt>database.yml</tt>. The file should use the standard
99
+ # in a path to <tt>database.yml</tt>. The file should use the standard
100
100
  # ActiveRecord connection format.
101
101
  def configure_database(config_path, validate_schema=true)
102
102
  configuration = YAML.load_file(config_path)
@@ -108,25 +108,25 @@ module CloudCrowd
108
108
  exit
109
109
  end
110
110
  end
111
-
112
- # Get a reference to the central server, including authentication if
111
+
112
+ # Get a reference to the central server, including authentication if
113
113
  # configured.
114
114
  def central_server
115
115
  @central_server ||= RestClient::Resource.new(CloudCrowd.config[:central_server], CloudCrowd.client_options)
116
116
  end
117
-
117
+
118
118
  # The path that daemonized servers and nodes will log to.
119
119
  def log_path(log_file=nil)
120
120
  @log_path ||= config[:log_path] || LOG_PATH
121
121
  log_file ? File.join(@log_path, log_file) : @log_path
122
122
  end
123
-
123
+
124
124
  # The path in which daemonized servers and nodes will store their pids.
125
125
  def pid_path(pid_file=nil)
126
126
  @pid_path ||= config[:pid_path] || PID_PATH
127
127
  pid_file ? File.join(@pid_path, pid_file) : @pid_path
128
128
  end
129
-
129
+
130
130
  # The standard RestClient options for the central server talking to nodes,
131
131
  # as well as the other way around. There's a timeout of 5 seconds to open
132
132
  # a connection, and a timeout of 30 to finish reading it.
@@ -145,11 +145,11 @@ module CloudCrowd
145
145
  def display_status(status)
146
146
  DISPLAY_STATUS_MAP[status] || 'unknown'
147
147
  end
148
-
148
+
149
149
  # CloudCrowd::Actions are requested dynamically by name. Access them through
150
150
  # this actions property, which behaves like a hash. At load time, we
151
151
  # load all installed Actions and CloudCrowd's default Actions into it.
152
- # If you wish to have certain nodes be specialized to only handle certain
152
+ # If you wish to have certain nodes be specialized to only handle certain
153
153
  # Actions, then install only those into the actions directory.
154
154
  def actions
155
155
  return @actions if @actions
@@ -160,10 +160,10 @@ module CloudCrowd
160
160
  memo
161
161
  end
162
162
  rescue NameError => e
163
- adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
163
+ adjusted_message = "One of your actions failed to load. Please ensure that the name of your action class can be deduced from the name of the file. ex: 'word_count.rb' => 'WordCount'\n#{e.message}"
164
164
  raise NameError.new(adjusted_message, e.name)
165
165
  end
166
-
166
+
167
167
  # Retrieve the list of every installed Action for this node or server.
168
168
  def action_paths
169
169
  default_actions = Dir["#{ROOT}/actions/*.rb"]
@@ -171,18 +171,18 @@ module CloudCrowd
171
171
  custom_actions = CloudCrowd.config[:actions_path] ? Dir["#{CloudCrowd.config[:actions_path]}/*.rb"] : []
172
172
  default_actions + installed_actions + custom_actions
173
173
  end
174
-
174
+
175
175
  # Is this CloudCrowd instance a server? Useful for avoiding loading unneeded
176
176
  # code from actions.
177
177
  def server?
178
178
  @identity == :server
179
179
  end
180
-
180
+
181
181
  # Or is it a node?
182
182
  def node?
183
183
  @identity == :node
184
184
  end
185
-
185
+
186
186
  end
187
-
187
+
188
188
  end
@@ -1,21 +1,21 @@
1
1
  module CloudCrowd
2
-
3
- # The Worker, forked off from the Node when a new WorkUnit is received,
2
+
3
+ # The Worker, forked off from the Node when a new WorkUnit is received,
4
4
  # launches an Action for processing. Workers will only ever receive WorkUnits
5
- # that they are able to handle (for which they have a corresponding action in
6
- # their actions directory). If communication with the central server is
7
- # interrupted, the Worker will repeatedly attempt to complete its unit --
8
- # every Worker::RETRY_WAIT seconds. Any exceptions that take place during
9
- # the course of the Action will cause the Worker to mark the WorkUnit as
5
+ # that they are able to handle (for which they have a corresponding action in
6
+ # their actions directory). If communication with the central server is
7
+ # interrupted, the Worker will repeatedly attempt to complete its unit --
8
+ # every Worker::RETRY_WAIT seconds. Any exceptions that take place during
9
+ # the course of the Action will cause the Worker to mark the WorkUnit as
10
10
  # having failed. When finished, the Worker's process exits, minimizing the
11
11
  # potential for memory leaks.
12
12
  class Worker
13
-
13
+
14
14
  # Wait five seconds to retry, after internal communcication errors.
15
15
  RETRY_WAIT = 5
16
-
16
+
17
17
  attr_reader :pid, :node, :unit, :status
18
-
18
+
19
19
  # A new Worker customizes itself to its WorkUnit at instantiation.
20
20
  def initialize(node, unit)
21
21
  @start_time = Time.now
@@ -25,7 +25,7 @@ module CloudCrowd
25
25
  @status = @unit['status']
26
26
  @retry_wait = RETRY_WAIT
27
27
  end
28
-
28
+
29
29
  # Return output to the central server, marking the WorkUnit done.
30
30
  def complete_work_unit(result)
31
31
  keep_trying_to "complete work unit" do
@@ -34,7 +34,7 @@ module CloudCrowd
34
34
  log "finished #{display_work_unit} in #{data[:time]} seconds"
35
35
  end
36
36
  end
37
-
37
+
38
38
  # Mark the WorkUnit failed, returning the exception to central.
39
39
  def fail_work_unit(exception)
40
40
  keep_trying_to "mark work unit as failed" do
@@ -43,9 +43,9 @@ module CloudCrowd
43
43
  log "failed #{display_work_unit} in #{data[:time]} seconds\n#{exception.message}\n#{exception.backtrace}"
44
44
  end
45
45
  end
46
-
46
+
47
47
  # We expect and require internal communication between the central server
48
- # and the workers to succeed. If it fails for any reason, log it, and then
48
+ # and the workers to succeed. If it fails for any reason, log it, and then
49
49
  # keep trying the same request.
50
50
  def keep_trying_to(title)
51
51
  begin
@@ -60,13 +60,13 @@ module CloudCrowd
60
60
  retry
61
61
  end
62
62
  end
63
-
63
+
64
64
  # Loggable details describing what the Worker is up to.
65
65
  def display_work_unit
66
66
  "unit ##{@unit['id']} (#{@unit['action']}/#{CloudCrowd.display_status(@status)})"
67
67
  end
68
-
69
- # Executes the WorkUnit by running the Action, catching all exceptions as
68
+
69
+ # Executes the WorkUnit by running the Action, catching all exceptions as
70
70
  # failures. We capture the thread so that we can kill it from the outside,
71
71
  # when exiting.
72
72
  def run_work_unit
@@ -82,14 +82,14 @@ module CloudCrowd
82
82
  else raise Error::StatusUnspecified, "work units must specify their status"
83
83
  end
84
84
  end
85
+ action.cleanup_work_directory if action
85
86
  complete_work_unit({'output' => result}.to_json)
86
87
  rescue Exception => e
87
- fail_work_unit(e)
88
- ensure
89
88
  action.cleanup_work_directory if action
89
+ fail_work_unit(e)
90
90
  end
91
91
  end
92
-
92
+
93
93
  # Run this worker inside of a fork. Attempts to exit cleanly.
94
94
  # Wraps run_work_unit to benchmark the execution time, if requested.
95
95
  def run
@@ -102,39 +102,39 @@ module CloudCrowd
102
102
  end
103
103
  Process.exit!
104
104
  end
105
-
106
- # There are some potentially important attributes of the WorkUnit that we'd
107
- # like to pass into the Action -- in case it needs to know them. They will
105
+
106
+ # There are some potentially important attributes of the WorkUnit that we'd
107
+ # like to pass into the Action -- in case it needs to know them. They will
108
108
  # always be made available in the options hash.
109
109
  def enhanced_unit_options
110
110
  @unit['options'].merge({
111
111
  'job_id' => @unit['job_id'],
112
112
  'work_unit_id' => @unit['id'],
113
- 'attempts' => @unit['attempts']
113
+ 'attempts' => @unit['attempts']
114
114
  })
115
115
  end
116
-
116
+
117
117
  # How long has this worker been running for?
118
118
  def time_taken
119
119
  Time.now - @start_time
120
120
  end
121
-
122
-
121
+
122
+
123
123
  private
124
-
125
- # Common parameters to send back to central upon unit completion,
124
+
125
+ # Common parameters to send back to central upon unit completion,
126
126
  # regardless of success or failure.
127
127
  def base_params
128
128
  { :pid => @pid,
129
- :id => @unit['id'],
129
+ :id => @unit['id'],
130
130
  :time => time_taken }
131
131
  end
132
-
132
+
133
133
  # Log a message to the daemon log. Includes PID for identification.
134
134
  def log(message)
135
135
  puts "Worker ##{@pid}: #{message}" unless ENV['RACK_ENV'] == 'test'
136
136
  end
137
-
137
+
138
138
  # When signaled to exit, make sure that the Worker shuts down without firing
139
139
  # the Node's at_exit callbacks.
140
140
  def trap_signals
@@ -143,7 +143,7 @@ module CloudCrowd
143
143
  Signal.trap('KILL') { Process.exit! }
144
144
  Signal.trap('TERM') { Process.exit! }
145
145
  end
146
-
146
+
147
147
  end
148
-
148
+
149
149
  end
@@ -9,8 +9,8 @@
9
9
  require 'rubygems'
10
10
  require 'cloud-crowd'
11
11
 
12
- CloudCrowd.configure(File.dirname(__FILE__) + '/config.yml')
13
- CloudCrowd.configure_database(File.dirname(__FILE__) + '/database.yml')
12
+ CloudCrowd.configure(::File.dirname(__FILE__) + '/config.yml')
13
+ CloudCrowd.configure_database(::File.dirname(__FILE__) + '/database.yml')
14
14
 
15
15
  map '/' do
16
16
  run CloudCrowd::Server
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cloud-crowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-06 00:00:00 -05:00
12
+ date: 2009-11-19 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency