parallel-forkmanager 1.0.1 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # require 'rubygems'
4
+ require "net/http"
5
+ # require 'forkmanager'
6
+ require "lib/parallel/forkmanager.rb"
7
+
8
+ min_version = "1.2.0"
9
+
10
+ if Parallel::ForkManager::VERSION < min_version
11
+ warn <<-ETX
12
+ This script will only run under Parallel::ForkManager #{min_version} or newer!
13
+ Please update your version of Parallel::ForkManager and try again!
14
+ ETX
15
+ exit 1
16
+ end
17
+
18
+ my_urls = [
19
+ "http://www.fakesite.us/",
20
+ "http://www.cnn.com/",
21
+ "http://oreilly.com/",
22
+ "http://www.cakewalk.com/",
23
+ "http://www.asdfsemicolonl.kj/index.htm"
24
+ ]
25
+
26
+ max_proc = 20
27
+ my_timeout = 5 # seconds
28
+
29
+ pfm = Parallel::ForkManager.new(max_proc)
30
+
31
+ pfm.run_on_finish do |pid, exit_code, ident|
32
+ print "** PID (#{pid}) for #{ident} exited with code #{exit_code}!\n"
33
+ end
34
+
35
+ my_urls.each do |my_url|
36
+ begin
37
+ pfm.start(my_url) && next
38
+ url = URI.parse(my_url)
39
+
40
+ begin
41
+ http = Net::HTTP.new(url.host, url.port)
42
+ http.open_timeout = http.read_timeout = my_timeout
43
+ res = http.get(url.path)
44
+ status = res.code
45
+
46
+ # You may want to check some other code than 200 here!
47
+ if status.to_i == 200
48
+ pfm.finish(0)
49
+ else
50
+ pfm.finish(255)
51
+ end
52
+ rescue Timeout::Error => e
53
+ print "*** #{my_url}: #{e.message}!\n"
54
+ pfm.finish(255)
55
+ end # begin
56
+ rescue StandardError => e
57
+ print "Connection error: #{e.message}!\n"
58
+ pfm.finish(255)
59
+ end
60
+ end
61
+
62
+ pfm.wait_all_children
63
+
64
+ print "\n"
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require "parallel/forkmanager"
5
+
6
+ max_procs = 5
7
+ pfm = Parallel::ForkManager.new(max_procs)
8
+
9
+ items = (1..10).to_a
10
+
11
+ pfm.run_on_start do |pid, ident|
12
+ print "run on start ::: #{ident} (#{pid})\n"
13
+ end
14
+
15
+ pfm.run_on_finish do |pid, exit_code, ident|
16
+ print "run on finish ::: ** PID: #{pid} EXIT: #{exit_code} IDENT: #{ident}\n"
17
+ end
18
+
19
+ period = 1.0
20
+ pfm.run_on_wait(period) do
21
+ print "** Have to wait for one child ...\n"
22
+ end
23
+
24
+ items.each do |item|
25
+ my_item = "nate-" + item.to_s
26
+ pfm.start(my_item) && next
27
+ pfm.finish(23)
28
+ end
29
+
30
+ pfm.wait_all_children
@@ -1,460 +1,742 @@
1
- # Parallel::ForkManager -- A simple parallel processing fork manager.
2
- #
3
- #
4
- # Copyright (c) 2008 Nathan Patwardhan
5
- #
6
- # Author: Nathan Patwardhan <noopy.org@gmail.com>
7
- #
8
- # Documentation: Nathan Patwardhan <noopy.org@gmail.com>, based on Perl Parallel::ForkManager documentation by Noah Robin <sitz@onastick.net> and dlux <dlux@kapu.hu>.
9
- #
10
- # Credits (for original Perl implementation):
11
- # - Chuck Hirstius <chirstius@megapathdsl.net> (callback exit status, original Perl example)
12
- # - Grant Hopwood <hopwoodg@valero.com> (win32 port)
13
- # - Mark Southern <mark_southern@merck.com> (bugfix)
14
- #
15
- # Credits (Ruby port):
16
- # - Robert Klemme <shortcutter@googlemail.com> (clarification on Ruby lambda)
17
- # - David A. Black <dblack@rubypal.com> (clarification on Ruby lambda)
18
- # - Roger Pack <rogerdpack@gmail.com> (bugfix)
19
- #
20
- # == Overview
21
- #
22
- # Parallel::ForkManager is used for operations that you would like to do in parallel
23
- # (e.g. downloading a bunch of web content simultaneously) but would prefer to use
24
- # fork() instead of threads. Instead of managing child processes yourself Parallel::ForkManager
25
- # handles the cleanup for you. Parallel::ForkManager also provides some nifty callbacks
26
- # you can use at start and finish, or while you're waiting for child processes to complete.
27
- #
28
- # == Introduction
29
- #
30
- # If you've used fork() before, you're well aware that you need to be responsible
31
- # for managing (i.e. cleaning up) the processes that were created as a result.
32
- # Parallel::ForkManager handles this for you such that you start() and finish()
33
- # a process without having to worry about child processes along the way.
34
- #
35
- # For instance you can use the following code to grab a list of webpages in
36
- # parallel using Net::HTTP -- and store the output in files.
37
- #
38
- # == Example
39
- #
40
- # #!/usr/bin/env ruby
41
- #
42
- # require 'net/http'
43
- # require 'Parallel/ForkManager'
44
- #
45
- # save_dir = '/tmp'
46
- #
47
- # my_urls = [
48
- # 'http://www.cnn.com/index.html',
49
- # 'http://www.oreilly.com/index.html',
50
- # 'http://www.cakewalk.com/index.html',
51
- # 'http://www.asdfsemicolonl.kj/index.htm'
52
- # ]
53
- #
54
- # max_proc = 20
55
- # pfm = Parallel::ForkManager.new(max_proc)
56
- #
57
- # pfm.run_on_finish(
58
- # lambda {
59
- # |pid,exit_code,ident|
60
- # print "** PID (#{pid}) for #{ident} exited with code #{exit_code}!\n"
61
- # }
62
- # )
63
- #
64
- # for my_url in my_urls
65
- # pfm.start(my_url) and next
66
- #
67
- # url = URI.parse(my_url)
68
- #
69
- # begin
70
- # req = Net::HTTP::Get.new(url.path)
71
- # res = Net::HTTP.start(url.host, url.port) {|http|
72
- # http.request(req)
73
- # }
74
- # rescue
75
- # pfm.finish(255)
76
- # end
77
- #
78
- # status = res.code
79
- # out_file = save_dir + '/' + url.host + '.txt';
80
- #
81
- # if status.to_i == 200
82
- # f = File.open(out_file, 'w')
83
- # f.print res.body
84
- # f.close()
85
- # pfm.finish(0)
86
- # else
87
- # pfm.finish(255)
88
- # end
89
- # end
90
- #
91
- # pfm.wait_all_children()
92
- #
93
- # First you need to instantiate the ForkManager with the "new" constructor.
94
- # You must specify the maximum number of processes to be created. If you
95
- # specify 0, then NO fork will be done; this is good for debugging purposes.
96
- #
97
- # Next, use pfm.start() to do the fork. pfm returns 0 for the child process,
98
- # and child pid for the parent process. The "and next" skips the internal
99
- # loop in the parent process.
100
- #
101
- # - pm.start() dies if the fork fails.
102
- #
103
- # - pfm.finish() terminates the child process (assuming a fork was done in the "start").
104
- #
105
- # - You cannot use pfm.start() if you are already in the child process.
106
- # If you want to manage another set of subprocesses in the child process,
107
- # you must instantiate another Parallel::ForkManager object!
108
- #
109
- # == Bugs and Limitations
110
- #
111
- # Parallel::ForkManager is a Ruby-centric rebase of Perl Parallel::ForkManager 0.7.5.
112
- # While much of the original code was rewritten such that ForkManager worked in the "Ruby way",
113
- # you might find some "warts" due to inconsistencies between Ruby and the original Perl code.
114
- #
115
- # Do not use Parallel::ForkManager in an environment where other child
116
- # processes can affect the run of the main program, so using this module
117
- # is not recommended in an environment where fork() / wait() is already used.
118
- #
119
- # If you want to use more than one copy of the Parallel::ForkManager then
120
- # you have to make sure that all children processes are terminated -- before you
121
- # use the second object in the main program.
122
- #
123
- # You are free to use a new copy of Parallel::ForkManager in the child
124
- # processes, although I don't think it makes sense.
125
- #
1
+ require "English"
2
+ require "tmpdir"
3
+ require "yaml"
126
4
 
5
+ require_relative "forkmanager/version"
6
+ require_relative "forkmanager/process_interface"
7
+ require_relative "forkmanager/serializer"
8
+ require_relative "forkmanager/dummy_process_status"
9
+
10
+ ##
11
+ # This module provides a namespace.
127
12
  module Parallel
13
+ ##
14
+ # This class provides a higher level interface to +fork+, allowing you to
15
+ # limit the number of child processes spawned and it provides a mechanism for
16
+ # child processes to return data structures to the parent.
17
+ class ForkManager
18
+ include Parallel::ForkManager::ProcessInterface
128
19
 
129
- class ForkManager
130
- VERSION = '1.0.1' # $Revision: 1.2 $
20
+ ##
21
+ # Instantiate a Parallel::ForkManager object. You must specify the maximum
22
+ # number of children to fork off. If you specify 0 (zero), then no children
23
+ # will be forked. This is intended for debugging purposes.
24
+ #
25
+ # The optional second parameter, params, is only used if you want to customize
26
+ # the behavior that children will use to send back some data (see Retrieving
27
+ # Data Structures below) to the parent. The following values are currently
28
+ # accepted for params (and their meanings):
29
+ # - params['tempdir'] represents the location of the temporary directory where serialized data structures will be stored.
30
+ # - params['serialize_as'] represents how the data will be serialized.
31
+ #
32
+ # XXX: Not quite true at the moment, debug is set to 0 if no params are
33
+ # provided, and the serialization isn't set.
34
+ #
35
+ # If params has not been provided, the following values are set:
36
+ # - @debug is set to non-zero to provide debugging messages. Default is 0.
37
+ # - @tempdir is set to Dir.tmpdir() (likely defaults to /tmp).
38
+ #
39
+ # NOTE NOTE NOTE: If you set tempdir to a directory that does not exist,
40
+ # Parallel::ForkManager will <em>not</em> create this directory for you
41
+ # and new() will exit!
42
+ #
43
+ # @param max_procs[Integer] maximum number of concurrent child processes.
44
+ # @param params[Hash] configuration parameters.
45
+ def initialize(max_procs = 0, params = {})
46
+ check_ruby_version
47
+ setup_instance_variables(max_procs, params)
131
48
 
132
- # Set debug to 1 for debugging messages.
133
- attr_accessor :debug
134
- attr_accessor :max_proc, :processes, :in_child, :on_wait_period
135
- attr_accessor :do_on_start, :do_on_finish, :do_on_wait
49
+ # Always provide debug information if our max processes are zero!
50
+ if @max_procs.zero?
51
+ puts "Zero processes have been specified so we will not fork and will proceed in debug mode!"
52
+ puts "in initialize #{max_procs}!"
53
+ puts "Will use tempdir #{@tempdir}"
54
+ end
136
55
 
137
- def initialize(procs)
138
- @debug = 0
139
- @max_proc = procs
140
- @processes = {}
141
- @do_on_finish = {}
142
- @in_child = 0
56
+ # Appetite for Destruction.
57
+ ObjectSpace.define_finalizer(self, self.class._finalize)
58
+ end
143
59
 
144
- if self.debug == 1
145
- print "in initialize #{max_proc}!\n"
60
+ ##
61
+ # This finalizer is not meant to be called manually, it cleans up temporary
62
+ # files which were used to return serialized data from the children.
63
+ def self._finalize
64
+ proc do
65
+ Dir.foreach(tempdir) do |file_name|
66
+ prefix = "Parallel-ForkManager-#{parent_pid}-"
67
+ next unless file_name.start_with prefix
68
+ File.unlink("#{tempdir}/#{file_name}")
146
69
  end
70
+ end
147
71
  end
148
72
 
149
- #
150
- # start("string") -- "string" identification is optional.
151
- #
152
- # start("string") "puts the fork in Parallel::ForkManager" -- as start() does
153
- # the fork().
154
- #
155
- # start("string") takes an optional "string" argument to
156
- # use as a process identifier. It is used by
157
- # the "run_on_finish" callback for identifying the finished
158
- # process. See run_on_finish() for more information.
159
- #
160
- # Return: PID of child process if in parent, or 0 if in the
161
- # child process.
162
-
163
- def start(identification=nil)
164
- if self.in_child == 1
165
- puts "Cannot start another process while you are in the child process"
166
- exit 1
167
- end
73
+ attr_reader :max_procs
168
74
 
169
- while(self.processes.length() >= self.max_proc)
170
- self.on_wait()
171
- if defined? self.on_wait_period
172
- arg = Process::WNOHANG
173
- else
174
- arg = nil
175
- end
176
- self.wait_one_child(arg)
75
+ ##
76
+ # start("string") "puts the fork in Parallel::ForkManager" -- as start() does
77
+ # the fork(). start() returns the pid of the child process for the parent,
78
+ # and 0 for the child process. If you set the 'processes' parameter for the
79
+ # constructor to 0, then, assuming you're in the child process, pm.start()
80
+ # simply returns 0.
81
+ #
82
+ # start("string") takes an optional "string" argument to use as a process
83
+ # identifier. It is used by the "run_on_finish" callback for identifying
84
+ # the finished process. See run_on_finish() for more information.
85
+ #
86
+ # For example:
87
+ #
88
+ # my_ident = "webwacker-1.0"
89
+ # pm.start(my_ident)
90
+ #
91
+ # start("string") { block } takes an optional block parameter
92
+ # that tells the ForkManager to follow Ruby fork() semantics for blocks.
93
+ # For example:
94
+ #
95
+ # my_ident = "webwacker-1.0"
96
+ # pm.start(my_ident) {
97
+ # print "As easy as "
98
+ # [1,2,3].each {
99
+ # |i|
100
+ # print i, "... "
101
+ # }
102
+ # }
103
+ #
104
+ # start("string", arg1, arg2, ... , argN) { block } requires a block parameter
105
+ # that tells the ForkManager to follow Ruby fork() semantics for blocks. Like
106
+ # start("string"), "string" is an optional argument to use as a process
107
+ # identifier and is used by the "run_on_finish" callback for identifying
108
+ # the finished process. For example:
109
+ #
110
+ # my_ident = "webwacker-1.0"
111
+ # pm.start(my_ident, 1, 2, 3) {
112
+ # |*my_args|
113
+ # unless my_args.empty?
114
+ # print "As easy as "
115
+ # my_args.each {
116
+ # |i|
117
+ # print i, "... "
118
+ # }
119
+ # end
120
+ # }
121
+ #
122
+ # <em>NOTE NOTE NOTE: when you use start("string") with an optional block
123
+ # parameter, the code in your block <em>must</em> explicitly exit non-zero
124
+ # if you are using callbacks with the ForkManager (e.g. run_on_finish).</em>
125
+ # This is because fork(), when run with a block parameter, terminates the
126
+ # subprocess with a status of 0 by default. If your block fails to exit
127
+ # non-zero, *all* of your exit_code(s) will be zero regardless of any value
128
+ # you might have passed to finish(...).
129
+ #
130
+ # To accommodate this behavior of fork and blocks, you can do
131
+ # something like the following:
132
+ #
133
+ # my_urls = [ ... some list of urls here ... ]
134
+ # my_ident = "webwacker-1.0"
135
+ #
136
+ # my_urls.each {
137
+ # |my_url|
138
+ # pm.start(my_ident) {
139
+ # my_status = get_some_url(my_url)
140
+ # if my_status.to_i == 200
141
+ # exit 0
142
+ # else
143
+ # exit 255
144
+ # }
145
+ # }
146
+ #
147
+ # ... etc ...
148
+ #
149
+
150
+ def start(identification = nil, *args, &run_block)
151
+ fail AttemptedStartInChildProcessError if in_child
152
+
153
+ while @max_procs.nonzero? && @processes.length >= @max_procs
154
+ on_wait
155
+ arg = (defined? @on_wait_period && !@on_wait_period.nil?) ? Process::WNOHANG : nil
156
+ kid = wait_one_child(arg)
157
+ if kid == 0 || kid == -1
158
+ sleep @waitpid_blocking_sleep
177
159
  end
178
-
179
- self.wait_children()
180
-
181
- if self.max_proc
182
- pid = fork()
183
- if ! defined? pid
184
- print "Cannot fork #{$!}\n"
185
- exit 1
186
- end
187
-
188
- if pid != nil
189
- self.processes[pid] = identification
190
- self.on_start(pid, identification)
191
- else
192
- if ! pid
193
- self.in_child = 1
194
- end
195
- end
196
- return pid
160
+ end
161
+
162
+ wait_children
163
+
164
+ if @max_procs.nonzero?
165
+ if block_given?
166
+ fail "start(...) wrong number of args" if run_block.arity >= 0 && args.size != run_block.arity
167
+ @has_block = true
168
+ pid = (!args.empty?) ?
169
+ fork { run_block.call(*args); } :
170
+ fork { run_block.call(); }
197
171
  else
198
- self.processes[$$] = identification
199
- self.on_start($$, identification)
200
- return 0
201
- end
202
- end
172
+ fail "start(...) args given but block is empty!" unless args.empty?
203
173
 
204
- #
205
- # finish(exit_code) -- exit_code is optional
206
- #
207
- # finish() loses the child process by exiting and accepts an optional exit code.
208
- # Default exit code is 0 and can be retrieved in the parent via callback.
209
- # If you're running the program in debug mode (max_proc == 0), this method
210
- # doesn't do anything.
211
- #
212
- def finish(exit_code = 0)
213
- if self.in_child == 1
214
- exit exit_code || 0
174
+ pid = fork
215
175
  end
176
+ fail "Cannot fork #{$ERROR_INFO}" unless defined? pid
216
177
 
217
- if self.max_proc == 0
218
- self.on_finish($$, exit_code, self.processes[$$], 0, 0)
219
- self.processes.delete($$)
178
+ if pid.nil?
179
+ self.in_child = true
180
+ else
181
+ @processes[pid] = identification
182
+ on_start(pid, identification)
220
183
  end
221
-
222
- return 0
184
+
185
+ return pid
186
+ else
187
+ @processes[$PID] = identification
188
+ on_start($PID, identification)
189
+
190
+ return nil
191
+ end
223
192
  end
224
-
225
- def wait_children()
226
- return if self.processes.empty?
227
-
228
- kid = nil # Should our default be nil?
229
- loop do
230
- kid = self.wait_one_child(Process::WNOHANG)
231
- break if kid > 0 || kid < -1
193
+
194
+ #
195
+ # finish(exit_code, [data_structure]) -- exit_code is optional
196
+ #
197
+ # finish() closes the child process by exiting and accepts an optional exit
198
+ # code (default exit code is 0) which can be retrieved in the parent via
199
+ # callback. If you're running the program in debug mode (max_proc == 0),
200
+ # this method just calls the callback.
201
+ #
202
+ # If <em>data_structure</em> is provided, then <em>data structure</em> is
203
+ # serialized and passed to the parent process. See <em>Retrieving Data
204
+ # Structures</em> in the next section for more info. For example:
205
+ #
206
+ # %w{Fred Wilma Ernie Bert Lucy Ethel Curly Moe Larry}.each {
207
+ # |person|
208
+ # # pm.start(...) here
209
+ #
210
+ # # ... etc ...
211
+ #
212
+ # # Pass along data structure to finish().
213
+ # pm.finish(0, {'person' => person})
214
+ # }
215
+ #
216
+ #
217
+ # === Retrieving Data Structures
218
+ #
219
+ # The ability for the parent to retrieve data structures from child processes
220
+ # was adapted to Parallel::ForkManager 1.5.0 (and newer) from Perl Parallel::ForkManager.
221
+ # This functionality was originally introduced in Perl Parallel::ForkManager
222
+ # 0.7.6.
223
+ #
224
+ # Each child process may optionally send 1 data structure back to the parent.
225
+ # By data structure, we mean a a string, hash, or array. The contents of the
226
+ # data structure are written out to temporary files on disk using the Marshal
227
+ # dump() method. This data structure is then retrieved from within the code
228
+ # you send to the run_on_finish callback.
229
+ #
230
+ # NOTE NOTE NOTE: Only serialization with Marshal and yaml are supported at
231
+ # this time. Future versions of Parallel::ForkManager <em>may</em> support
232
+ # expanded functionality!
233
+ #
234
+ # There are 2 steps involved in retrieving data structures:
235
+ # 1. The data structure the child wishes to send back to the parent is provided as the second argument to the finish() call. It is up to the child to decide whether or not to send anything back to the parent.
236
+ # 2. The data structure is retrieved using the callback provided in the run_on_finish() method.
237
+ #
238
+ # Data structure retrieval is <em>not</em> the same as returning a data
239
+ # structure from a method call! The data structure referenced by a given
240
+ # child process is serialized and written out to a file in the type specified
241
+ # earlier in serialize_as. If serialize_as was not specified earlier, then
242
+ # no serialization will be done.
243
+ #
244
+ # The file is subseqently read back into memory and a new data structure that
245
+ # belongs to the parent process is created. Therefore it is recommended that
246
+ # you keep the returned structure small in size to mitigate any possible
247
+ # performance penalties.
248
+ #
249
+ def finish(exit_code = 0, data_structure = nil)
250
+ if @has_block
251
+ fail "Do not use finish(...) when using blocks. Use an explicit exit in your block instead!\n"
252
+ end
253
+
254
+ if in_child
255
+ exit_code ||= 0
256
+
257
+ unless data_structure.nil?
258
+ @data_structure = data_structure
259
+
260
+ the_tempfile = "#{@tempdir}Parallel-ForkManager-#{@parent_pid}-#{$PID}.txt"
261
+
262
+ begin
263
+ fail "Unable to serialize data!" unless _serialize_data(the_tempfile)
264
+ rescue => e
265
+ puts "Unable to store #{the_tempfile}: #{e.message}"
266
+ exit 1
267
+ end
232
268
  end
269
+
270
+ Kernel.exit!(exit_code)
271
+ end
272
+
273
+ if @max_procs == 0
274
+ on_finish($PID, exit_code, @processes[$PID], 0, 0)
275
+ @processes.delete($PID)
276
+ end
277
+ 0
278
+ end
279
+
280
+ # reap_finished_children() / wait_children()
281
+ #
282
+ # This is a non-blocking call to reap children and execute callbacks independent
283
+ # of calls to "start" or "wait_all_children". Use this in scenarios where
284
+ # "start" is called infrequently but you would like the callbacks executed quickly.
285
+
286
+ def wait_children
287
+ return if @processes.keys.empty?
288
+
289
+ kid = nil
290
+ begin
291
+ begin
292
+ kid = wait_one_child(Process::WNOHANG)
293
+ end while kid > 0 || kid < -1
294
+ rescue Errno::ECHILD
295
+ return
296
+ end
233
297
  end
234
-
235
- alias :wait_childs :wait_children # compatibility
236
-
237
- #
238
- # Probably won't want to call this directly. Just let wait_all_children(...)
239
- # make the call for you.
240
- #
241
- def wait_one_child(parent)
242
- kid = nil
243
- while true
244
- # Call _NT_waitpid(...) if we're using a Windows or Java variant.
245
- if(RUBY_PLATFORM =~ /mswin|mingw|bccwin|wince|emx|java/)
246
- kid = self._NT_waitpid(-1, parent ||= 0)
247
- else
248
- kid = self._waitpid(-1, parent ||= 0)
298
+
299
+ alias_method :wait_childs, :wait_children # compatibility
300
+ alias_method :reap_finished_children, :wait_children; # behavioral synonym for clarity
301
+
302
+ #
303
+ # Probably won't want to call this directly. Just let wait_all_children(...)
304
+ # make the call for you.
305
+ #
306
+ def wait_one_child(par)
307
+ params = par || 0
308
+
309
+ kid = nil
310
+ loop do
311
+ kid = _waitpid(-1, params)
312
+ break if kid.nil? || kid == 0 || kid == -1 # Win32 returns negative PIDs
313
+ redo unless @processes.key?(kid)
314
+ id = @processes.delete(kid)
315
+
316
+ # Retrieve child data structure, if any.
317
+ the_retr_data = nil
318
+ the_tempfile = "#{@tempdir}Parallel-ForkManager-#{$PID}-#{kid}.txt"
319
+
320
+ begin
321
+ if File.exist?(the_tempfile) && !File.zero?(the_tempfile)
322
+ unless _unserialize_data(the_tempfile)
323
+ fail "Unable to unserialize data!"
249
324
  end
250
- last if kid == 0 or kid == -1 # Win32 returns negative PIDs
251
- redo if ! self.processes.has_key?(kid)
252
- id = self.processes.delete(kid)
253
- self.on_finish(kid, $? >> 8, id, $? & 0x7f, $? & 0x80 ? 1 : 0)
254
- break
325
+
326
+ the_retr_data = @data_structure
327
+ end
328
+
329
+ File.unlink(the_tempfile) if File.exist?(the_tempfile)
330
+ rescue => e
331
+ print "wait_one_child failed to retrieve object: #{e.message}\n"
332
+ exit 1
255
333
  end
256
334
 
257
- kid
335
+ status = child_status
336
+ on_finish(kid, status.exitstatus, id, status.stopsig, status.coredump?, the_retr_data)
337
+ break
338
+ end
339
+
340
+ kid ||= 0
341
+ kid
258
342
  end
259
343
 
260
- #
261
- # wait_all_children() will wait for all the processes which have been
262
- # forked. This is a blocking wait.
263
- #
264
- def wait_all_children()
265
- while ! self.processes.empty?
266
- self.on_wait()
267
- if defined? self.on_wait_period
268
- arg = Process::WNOHANG
269
- else
270
- arg = nil
271
- end
272
- self.wait_one_child(arg)
344
+ #
345
+ # wait_all_children() will wait for all the processes which have been
346
+ # forked. This is a blocking wait.
347
+ #
348
+ def wait_all_children
349
+ until @processes.empty?
350
+ on_wait
351
+ arg = (defined? @on_wait_period and !@on_wait_period.nil?) ? Process::WNOHANG : nil
352
+ kid = wait_one_child(arg)
353
+ if kid == 0 || kid == -1
354
+ sleep @waitpid_blocking_sleep
273
355
  end
356
+ end
357
+ rescue Errno::ECHILD
358
+ # do nothing.
274
359
  end
275
-
276
- alias :wait_all_childs :wait_all_children # compatibility
277
-
278
- #
279
- # You can define run_on_finish(...) that is called when a child in the parent
280
- # process when a child is terminated.
281
- #
282
- # The parameters of run_on_finish(...) are:
283
- #
284
- # - pid of the process, which is terminated
285
- # - exit code of the program
286
- # - identification of the process (if provided in the "start" method)
287
- # - exit signal (0-127: signal name)
288
- # - core dump (1 if there was core dump at exit)
289
- #
290
- # Example:
291
- #
292
- # pfm.run_on_finish(
293
- # lambda {
294
- # |pid,exit_code,ident|
295
- # print "** PID (#{pid}) for #{ident} exited with code #{exit_code}!\n"
296
- # }
297
- # )
298
- #
299
- def run_on_finish(code, pid=0)
300
- begin
301
- self.do_on_finish[pid] = code
302
- rescue
303
- raise "couldn't run on finish!\n"
360
+
361
+ alias_method :wait_all_childs, :wait_all_children # compatibility
362
+
363
+ #
364
+ # max_procs() -- Returns the maximal number of processes the object will fork.
365
+ #
366
+ attr_reader :max_procs
367
+
368
+ #
369
+ # running_procs() -- Returns the pids of the forked processes currently
370
+ # monitored by the Parallel::ForkManager. Note that children are still
371
+ # reports as running until the fork manager will harvest them, via the
372
+ # next call to start(...) or wait_all_children().
373
+ #
374
+ def running_procs
375
+ @processes.keys
376
+ end
377
+
378
+ #
379
+ # is_parent()
380
+ #
381
+ # Returns true if within the parent or false if within the child.
382
+ #
383
+ def is_parent()
384
+ !in_child
385
+ end
386
+
387
+ #
388
+ # is_child()
389
+ #
390
+ # Returns true if within the child or false if within the parent.
391
+ #
392
+ def is_child()
393
+ in_child
394
+ end
395
+
396
+ #
397
+ # wait_for_available_procs(nbr) -- Wait until 'n' available process slots
398
+ # are available. If 'n' is not given, defaults to I.
399
+ #
400
+ def wait_for_available_procs(nbr)
401
+ nbr ||= 1
402
+
403
+ fail "Number processes '#{nbr}' higher than then max number of processes: #{@max_procs}" if nbr > max_procs
404
+
405
+ wait_one_child(0) until (max_procs - running_procs) >= nbr
406
+ end
407
+
408
+ #
409
+ # You can define run_on_finish(...) that is called when a child in the parent
410
+ # process when a child is terminated.
411
+ #
412
+ # The parameters of run_on_finish(...) are:
413
+ #
414
+ # - pid of the process, which is terminated
415
+ # - exit code of the program
416
+ # - identification of the process (if provided in the "start" method)
417
+ # - exit signal (0-127: signal name)
418
+ # - core dump (1 if there was core dump at exit)
419
+ # - data structure or nil (see Retrieving Data Structures)
420
+ #
421
+ # As of Parallel::ForkManager 1.2.0 run_on_finish supports a block argument.
422
+ #
423
+ # Example:
424
+ #
425
+ # pm.run_on_finish {
426
+ # |pid,exit_code,ident|
427
+ # print "** PID (#{pid}) for #{ident} exited with code #{exit_code}!\n"
428
+ # }
429
+ #
430
+ def run_on_finish(code = nil, pid = 0, &my_block)
431
+ if !code.nil? && !my_block.nil?
432
+ fail "run_on_finish: code and block are mutually exclusive options!"
433
+ end
434
+
435
+ if !code.nil?
436
+ if code.class.to_s == "Proc" && VERSION >= "1.5.0"
437
+ print "Passing Proc has been deprecated as of Parallel::ForkManager #{VERSION}!\nPlease refer to rdoc about how to change your code!\n"
304
438
  end
439
+ @do_on_finish[pid] = code
440
+ elsif !my_block.nil?
441
+ @do_on_finish[pid] = my_block
442
+ end
443
+ rescue TypeError => e
444
+ raise e.message
305
445
  end
306
446
 
447
+ #
448
+ # on_finish is a private method and should not be called directly.
449
+ #
307
450
  def on_finish(*params)
308
- pid = params[0]
309
- code = self.do_on_finish[pid] || self.do_on_finish[0] or return 0
310
- begin
311
- my_argc = code.arity - 1
312
- if my_argc > 0
313
- my_params = params[0 .. my_argc]
314
- else
315
- my_params = [params[0]]
316
- end
317
- params = my_params
318
- code.call(*params)
319
- rescue
320
- raise "on finish failed!\n"
451
+ pid = params[0]
452
+ code = @do_on_finish[pid] || @do_on_finish[0] or return 0
453
+ begin
454
+ my_argc = code.arity - 1
455
+ if my_argc > 0
456
+ my_params = params[0..my_argc]
457
+ else
458
+ my_params = [params[0]]
321
459
  end
460
+ params = my_params
461
+ code.call(*params)
462
+ rescue => e
463
+ raise "on finish failed: #{e.message}!\n"
464
+ end
322
465
  end
323
466
 
324
- #
325
- # You can define a subroutine which is called when the child process needs
326
- # to wait for the startup. If period is not defined, then one call is done per
327
- # child. If period is defined, then code is called periodically and the
328
- # method waits for "period" seconds betwen the two calls. Note, period can be
329
- # fractional number also. The exact "period seconds" is not guaranteed,
330
- # signals can shorten and the process scheduler can make it longer (i.e. on
331
- # busy systems).
332
- #
333
- # No parameters are passed to code on the call.
334
- #
335
- # Example:
336
- # timeout = 0.5
337
- # pfm.run_on_wait(
338
- # lambda {
339
- # print "** Have to wait for one child ...\n"
340
- # },
341
- # timeout
342
- # )
343
- #
344
- def run_on_wait(code, period)
345
- self.do_on_wait = code
346
- self.on_wait_period = period
347
- end
348
-
349
- def on_wait()
350
- begin
351
- if self.do_on_wait.class().name == 'Proc'
352
- self.do_on_wait.call()
353
- if defined? self.on_wait_period
354
- #
355
- # Unfortunately Ruby 1.8 has no concept of 'sigaction',
356
- # so we're unable to check if a signal handler has
357
- # already been installed for a given signal. In this
358
- # case it's no matter, since we define handler, but yikes.
359
- #
360
- Signal.trap("CHLD") do
361
- lambda{}.call()
362
- end
363
- IO.select(nil, nil, nil, self.on_wait_period)
364
- end
365
- end
467
+ #
468
+ # You can define a subroutine which is called when the child process needs
469
+ # to wait for the startup. If period is not defined, then one call is done per
470
+ # child. If period is defined, then code is called periodically and the
471
+ # method waits for "period" seconds betwen the two calls. Note, period can be
472
+ # fractional number also. The exact "period seconds" is not guaranteed,
473
+ # signals can shorten and the process scheduler can make it longer (i.e. on
474
+ # busy systems).
475
+ #
476
+ # No parameters are passed to code on the call.
477
+ #
478
+ # Example:
479
+ #
480
+ # As of Parallel::ForkManager 1.2.0 run_on_wait supports a block argument.
481
+ #
482
+ # Example:
483
+ # period = 0.5
484
+ # pm.run_on_wait(period) {
485
+ # print "** Have to wait for one child ...\n"
486
+ # }
487
+ #
488
+ #
489
+
490
+ def run_on_wait(*params, &block)
491
+ fail "period is required by run_on_wait" unless params.length
492
+
493
+ if params.length == 1
494
+ period = params[0]
495
+ fail "period must be of type float!" if period.class.to_s.downcase != "float"
496
+ elsif params.length == 2
497
+ code, period = params
498
+ fail "run_on_wait: Missing or invalid code block!" if code.class.to_s.downcase != "proc"
499
+ else
500
+ fail "run_on_wait: Invalid argument count!"
501
+ end
502
+
503
+ @on_wait_period = period
504
+ fail "Wait period must be greater than 0.0!\n" if period == 0
505
+
506
+ if !code.nil? && !block.nil?
507
+ fail "run_on_wait: code and block are mutually exclusive arguments!"
508
+ end
509
+
510
+ if !code.nil?
511
+ if code.class.to_s == "Proc" && VERSION >= "1.5.0"
512
+ puts "Passing Proc has been deprecated as of Parallel::ForkManager #{VERSION}!\nPlease refer to rdoc about how to change your code!"
366
513
  end
514
+
515
+ @do_on_wait = code
516
+ elsif !block.nil?
517
+ @do_on_wait = block
518
+ end
519
+ rescue TypeError
520
+ raise "run on wait failed!"
367
521
  end
368
522
 
369
- #
370
- # You can define a subroutine which is called when a child is started. It is
371
- # called after a successful startup of a child in the parent process.
372
- #
373
- # The parameters of code are as follows:
374
- # - pid of the process which has been started
375
- # - identification of the process (if provided in the "start" method)
376
- #
377
- # Example:
378
- #
379
- # pfm.run_on_start(
380
- # lambda {
381
- # |pid,ident|
382
- # print "run on start ::: #{ident} (#{pid})\n"
383
- # }
384
- # )
385
- #
386
- def run_on_start(code)
387
- begin
388
- self.do_on_start = code
389
- rescue
390
- raise "run on start failed!\n"
391
- end
523
+ #
524
+ # on_wait is a private method as it should not be called directly.
525
+ #
526
+ def on_wait
527
+ return unless @do_on_wait.class.name == "Proc"
528
+
529
+ @do_on_wait.call
530
+ return unless defined? @on_wait_period && !@on_wait_period.nil?
531
+ #
532
+ # Unfortunately Ruby 1.8 has no concept of 'sigaction',
533
+ # so we're unable to check if a signal handler has
534
+ # already been installed for a given signal. In this
535
+ # case it's no matter, since we define handler, but yikes.
536
+ #
537
+ Signal.trap("CHLD") do
538
+ -> {}.call if Signal.list["CHLD"].nil?
539
+ end
540
+ IO.select(nil, nil, nil, @on_wait_period)
392
541
  end
393
542
 
394
- def on_start(*params)
395
- begin
396
- if self.do_on_start.class().name == 'Proc'
397
- my_argc = self.do_on_start.arity - 1
398
- if my_argc > 0
399
- my_params = params[0 .. my_argc]
400
- else
401
- my_params = params[0]
402
- end
403
- params = my_params
404
- self.do_on_start.call(*params)
405
- end
406
- rescue
407
- raise "on_start failed\n"
408
- end
543
+ #
544
+ # You can define a subroutine which is called when a child is started. It is
545
+ # called after a successful startup of a child in the parent process.
546
+ #
547
+ # The parameters of code are as follows:
548
+ # - pid of the process which has been started
549
+ # - identification of the process (if provided in the "start" method)
550
+ #
551
+ # You can pass a block to run_on_start.
552
+ #
553
+ # Example:
554
+ #
555
+ # pm.run_on_start() {
556
+ # |pid,ident|
557
+ # print "run on start ::: #{ident} (#{pid})\n"
558
+ # }
559
+ #
560
+ #
561
+ def run_on_start(&block)
562
+ @do_on_start = block unless block.nil?
563
+ rescue TypeError
564
+ raise "run on start failed!\n"
409
565
  end
410
566
 
411
- #
412
- # set_max_procs(mp) -- mp is an integer
413
- #
414
- # set_max_procs() allows you to set a new maximum number of children to maintain.
415
- #
416
- # Return: The previous setting of max_procs.
417
- #
418
- def set_max_procs(mp=nil)
419
- if mp == nil
420
- return self.max_proc
567
+ #
568
+ # on_start() is a private method as it should not be called directly.
569
+ #
570
+ def on_start(*params)
571
+ if @do_on_start.class.name == "Proc"
572
+ my_argc = @do_on_start.arity - 1
573
+ if my_argc > 0
574
+ my_params = params[0..my_argc]
421
575
  else
422
- self.max_proc = mp
576
+ my_params = params[0]
423
577
  end
578
+ params = my_params
579
+ @do_on_start.call(*params)
580
+ end
581
+ rescue
582
+ raise "on_start failed"
424
583
  end
425
584
 
426
- #
427
- # _waitpid(...) should not be called directly as it is called automatically by
428
- # wait_one_child(...).
429
- #
430
- def _waitpid(pid, flags)
431
- return Process.waitpid(pid, flags)
585
+ #
586
+ # set_max_procs() allows you to set a new maximum number of children
587
+ # to maintain.
588
+ #
589
+ def set_max_procs(mp=nil)
590
+ @max_procs = mp
432
591
  end
433
592
 
434
- #
435
- # _NT_waitpid(...) is the Windows variant of _waitpid(...) and will be called
436
- # automatically by wait_one_child(...) depending on the value of RUBY_PLATFORM.
437
- # You should not call _NT_waitpid(...) directly.
438
- #
439
- def _NT_waitpid(pid, par)
440
- if par == Process::WNOHANG
441
- pids = self.processes.keys()
442
- if pids.length() == 0
443
- return -1
444
- end
445
-
446
- kid = 0
447
- for my_pid in pids
448
- kid = Process.waitpid(my_pid, par)
449
- if kid != 0
450
- return kid
451
- end
452
- return kid
453
- end
593
+ #
594
+ # set_wait_pid_blocking_sleep(seconds) -- Sets the sleep period,
595
+ # in seconds, of the pseudo-blocking calls. Set to 0 to disable.
596
+ #
597
+ def set_waitpid_blocking_sleep(period)
598
+ @waitpid_blocking_sleep = period
599
+ end
600
+
601
+ #
602
+ # waitpid_blocking_sleep() -- Returns the sleep period, in seconds, of the
603
+ # pseudo-blockign calls. Returns 0 if disabled.
604
+ #
605
+ def waitpid_blocking_sleep
606
+ @waitpid_blocking_sleep
607
+ end
608
+
609
+ #
610
+ # _waitpid(...) is a private method as it should not be called directly.
611
+ # It is called automatically by wait_one_child(...).
612
+ #
613
+ def _waitpid(_pid, flag)
614
+ flag != 0 ? _waitpid_non_blocking : _waitpid_blocking
615
+ end
616
+
617
+ #
618
+ # Private method used internally by _waitpid(...).
619
+ #
620
+ def _waitpid_non_blocking
621
+ running_procs.each do |pid|
622
+ p = waitpid(pid, Process::WNOHANG) || next
623
+ if p == -1
624
+ warn "Child process #{pid} disappeared. A call to 'waitpid' outside of Parallel::ForkManager might have reaped it."
625
+ # It's gone. Let's clean the process entry.
626
+ @processes.delete[pid]
454
627
  else
455
- return Process.waitpid(pid, par)
628
+ return pid
456
629
  end
630
+ end
631
+
632
+ 0
633
+ end
634
+
635
+ #
636
+ # Private method used internally by _waitpid(...). Simulates a blocking
637
+ # waitpid(...) call.
638
+ #
639
+ def _waitpid_blocking
640
+ # pseudo-blocking
641
+ sleep_period = @waitpid_blocking_sleep
642
+ loop do
643
+ pid = _waitpid_non_blocking
644
+ return pid if pid
645
+
646
+ sleep(sleep_period)
647
+ end
648
+
649
+ waitpid(-1, 0)
650
+ end
651
+
652
+ #
653
+ # _serialize_data is a private method and should not be called directly.
654
+ #
655
+ # Currently supports Marshal.dump() and YAML to serialize data.
656
+ #
657
+ def _serialize_data(store_tempfile)
658
+ return 1 if @serializer.nil?
659
+
660
+ File.open(store_tempfile, "wb") do |f|
661
+ f.write(@serializer.serialize(@data_structure))
662
+ end
663
+ return 1
664
+
665
+ rescue => e
666
+ raise "Error writing/serializing #{store_tempfile}: #{e.message}"
667
+ end
668
+
669
+ #
670
+ # _unserialize_data is a private method and should not be called directly.
671
+ #
672
+ # Currently only supports Marshal.load() to unserialize data.
673
+ #
674
+ def _unserialize_data(store_tempfile)
675
+ return 1 if @serializer.nil?
676
+
677
+ data = File.binread(store_tempfile)
678
+ @data_structure = @serializer.deserialize(data)
679
+ return 1
680
+
681
+ rescue => e
682
+ # Clean up temp file if it exists.
683
+ # Otherwise we'll have a bunch of 'em laying around.
684
+ #
685
+ File.unlink(store_tempfile) rescue nil # XXX: supress errors from unlink.
686
+ raise "Error reading/deserializing #{store_tempfile}: #{e.message}"
687
+ end
688
+
689
+ # private methods
690
+ private :on_start, :on_finish, :on_wait
691
+ private :_waitpid, :_waitpid_non_blocking, :_waitpid_blocking
692
+ private :_serialize_data, :_unserialize_data
693
+
694
+ private
695
+
696
+ attr_reader :parent_pid
697
+ attr_reader :tempdir
698
+ attr_accessor :in_child
699
+
700
+ def setup_instance_variables(max_procs, params)
701
+ @max_procs = max_procs
702
+
703
+ # TODO: remove this, it seems to be unused.
704
+ @debug = params.fetch("debug", false)
705
+
706
+ @tempdir = params.fetch("tempdir", Dir.tmpdir)
707
+ @tempdir += "/" unless @tempdir.end_with?("/")
708
+ unless File.directory? @tempdir
709
+ fail(MissingTempDirError,
710
+ "#{@tempdir} doesn't exist or is not a directory.")
711
+ end
712
+
713
+ @process_interface = params.fetch("process_interface",
714
+ ProcessInterface::Instance.new)
715
+
716
+ @data_structure = nil
717
+ @processes = {}
718
+ @do_on_finish = {}
719
+ @in_child = false
720
+ @has_block = false
721
+ @on_wait_period = nil
722
+ @parent_pid = $PID
723
+ @waitpid_blocking_sleep = 1
724
+
725
+ @serializer = Parallel::ForkManager::Serializer.new(
726
+ params["serialize_as"] || params["serialize_type"] || "marshal"
727
+ )
728
+ end
729
+
730
+ # We care about the Ruby version for a couple of reasons:
731
+ #
732
+ # * The new lanmbda syntax -> (1.9 and above)
733
+ # * Finalizers (1.8 and above)
734
+ #
735
+ # So we only allow Ruby 1.9.* and 2.*
736
+ def check_ruby_version
737
+ return if RUBY_VERSION.start_with?("1.9")
738
+ return if RUBY_VERSION.start_with?("2.")
739
+ fail "Unsupported Ruby version #{RUBY_VERSION}!"
457
740
  end
458
- end
459
-
460
- end
741
+ end # class
742
+ end # module