RubyGems - parallel-forkmanager - Versions diffs - 1.0.1 → 2.0.6 - Mend

parallel-forkmanager 1.0.1 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/.gitignore +6 -0
data/.rubocop.yml +17 -0
data/.travis.yml +5 -0
data/CHANGELOG.md +59 -0
data/EXAMPLES.yard +40 -0
data/Gemfile +3 -0
data/README.md +136 -0
data/Rakefile +18 -0
data/examples/callbacks.rb +37 -0
data/examples/data_structures_advanced.rb +67 -0
data/examples/data_structures_string.rb +44 -0
data/examples/parallel_http_get.rb +64 -0
data/examples/use_pfm.rb +30 -0
data/lib/parallel/forkmanager.rb +693 -411
data/lib/parallel/forkmanager/dummy_process_status.rb +30 -0
data/lib/parallel/forkmanager/error.rb +20 -0
data/lib/parallel/forkmanager/process_interface.rb +51 -0
data/lib/parallel/forkmanager/serializer.rb +59 -0
data/lib/parallel/forkmanager/version.rb +8 -0
data/parallel-forkmanager.gemspec +32 -0
metadata +115 -36
data/parallel_http_get.rb +0 -53
data/use_pfm.rb +0 -40

data/examples/parallel_http_get.rb ADDED

@@ -0,0 +1,64 @@
+#!/usr/bin/env ruby
+# require 'rubygems'
+require "net/http"
+# require 'forkmanager'
+require "lib/parallel/forkmanager.rb"
+min_version = "1.2.0"
+if Parallel::ForkManager::VERSION < min_version
+  warn <<-ETX
+This script will only run under Parallel::ForkManager #{min_version} or newer!
+Please update your version of Parallel::ForkManager and try again!
+  ETX
+  exit 1
+end
+my_urls = [
+  "http://www.fakesite.us/",
+  "http://www.cnn.com/",
+  "http://oreilly.com/",
+  "http://www.cakewalk.com/",
+  "http://www.asdfsemicolonl.kj/index.htm"
+]
+max_proc = 20
+my_timeout = 5 # seconds
+pfm = Parallel::ForkManager.new(max_proc)
+pfm.run_on_finish do |pid, exit_code, ident|
+  print "** PID (#{pid}) for #{ident} exited with code #{exit_code}!\n"
+end
+my_urls.each do |my_url|
+  begin
+    pfm.start(my_url) && next
+    url = URI.parse(my_url)
+    begin
+      http = Net::HTTP.new(url.host, url.port)
+      http.open_timeout = http.read_timeout = my_timeout
+      res = http.get(url.path)
+      status = res.code
+      # You may want to check some other code than 200 here!
+      if status.to_i == 200
+        pfm.finish(0)
+      else
+        pfm.finish(255)
+      end
+  rescue Timeout::Error => e
+    print "*** #{my_url}: #{e.message}!\n"
+    pfm.finish(255)
+    end # begin
+rescue StandardError => e
+  print "Connection error: #{e.message}!\n"
+  pfm.finish(255)
+  end
+end
+pfm.wait_all_children
+print "\n"

data/examples/use_pfm.rb ADDED

@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+require "rubygems"
+require "parallel/forkmanager"
+max_procs = 5
+pfm = Parallel::ForkManager.new(max_procs)
+items = (1..10).to_a
+pfm.run_on_start do |pid, ident|
+  print "run on start ::: #{ident} (#{pid})\n"
+end
+pfm.run_on_finish do |pid, exit_code, ident|
+  print "run on finish ::: ** PID: #{pid} EXIT: #{exit_code} IDENT: #{ident}\n"
+end
+period = 1.0
+pfm.run_on_wait(period) do
+  print "** Have to wait for one child ...\n"
+end
+items.each do |item|
+  my_item = "nate-" + item.to_s
+  pfm.start(my_item) && next
+  pfm.finish(23)
+end
+pfm.wait_all_children

data/lib/parallel/forkmanager.rb CHANGED

@@ -1,460 +1,742 @@
-# Parallel::ForkManager -- A simple parallel processing fork manager.
-#
-#
-# Copyright (c) 2008 Nathan Patwardhan
-#
-# Author: Nathan Patwardhan <noopy.org@gmail.com>
-#
-# Documentation: Nathan Patwardhan <noopy.org@gmail.com>, based on Perl Parallel::ForkManager documentation by Noah Robin <sitz@onastick.net> and dlux <dlux@kapu.hu>.
-#
-# Credits (for original Perl implementation):
-# - Chuck Hirstius <chirstius@megapathdsl.net> (callback exit status, original Perl example)
-# - Grant Hopwood <hopwoodg@valero.com> (win32 port)
-# - Mark Southern <mark_southern@merck.com> (bugfix)
-#
-# Credits (Ruby port):
-# - Robert Klemme <shortcutter@googlemail.com> (clarification on Ruby lambda)
-# - David A. Black <dblack@rubypal.com>        (clarification on Ruby lambda)
-# - Roger Pack <rogerdpack@gmail.com>          (bugfix)
-#
-# == Overview
-#
-# Parallel::ForkManager is used for operations that you would like to do in parallel
-# (e.g. downloading a bunch of web content simultaneously) but would prefer to use
-# fork() instead of threads.  Instead of managing child processes yourself Parallel::ForkManager
-# handles the cleanup for you.  Parallel::ForkManager also provides some nifty callbacks
-# you can use at start and finish, or while you're waiting for child processes to complete.
-#
-# == Introduction
-#
-# If you've used fork() before, you're well aware that you need to be responsible
-# for managing (i.e. cleaning up) the processes that were created as a result.
-# Parallel::ForkManager handles this for you such that you start() and finish()
-# a process without having to worry about child processes along the way.
-#
-# For instance you can use the following code to grab a list of webpages in
-# parallel using Net::HTTP -- and store the output in files.
-#
-# == Example
-#
-#  #!/usr/bin/env ruby
-#
-#  require 'net/http'
-#  require 'Parallel/ForkManager'
-#
-#  save_dir = '/tmp'
-#
-#  my_urls = [
-#      'http://www.cnn.com/index.html',
-#      'http://www.oreilly.com/index.html',
-#      'http://www.cakewalk.com/index.html',
-#      'http://www.asdfsemicolonl.kj/index.htm'
-#  ]
-#
-#  max_proc = 20
-#  pfm = Parallel::ForkManager.new(max_proc)
-#
-#  pfm.run_on_finish(
-#      lambda {
-#          |pid,exit_code,ident|
-#          print "** PID (#{pid}) for #{ident} exited with code #{exit_code}!\n"
-#      }
-#  )
-#
-#  for my_url in my_urls
-#      pfm.start(my_url) and next
-#
-#      url = URI.parse(my_url)
-#
-#      begin
-#          req = Net::HTTP::Get.new(url.path)
-#          res = Net::HTTP.start(url.host, url.port) {|http|
-#              http.request(req)
-#          }
-#      rescue
-#          pfm.finish(255)
-#      end
-#
-#      status = res.code
-#      out_file = save_dir + '/' + url.host + '.txt';
-#
-#      if status.to_i == 200
-#          f = File.open(out_file, 'w')
-#          f.print res.body
-#          f.close()
-#          pfm.finish(0)
-#      else
-#          pfm.finish(255)
-#      end
-#  end
-#
-#  pfm.wait_all_children()
-#
-# First you need to instantiate the ForkManager with the "new" constructor.
-# You must specify the maximum number of processes to be created. If you
-# specify 0, then NO fork will be done; this is good for debugging purposes.
-#
-# Next, use pfm.start() to do the fork. pfm returns 0 for the child process,
-# and child pid for the parent process.  The "and next" skips the internal
-# loop in the parent process.
-#
-# - pm.start() dies if the fork fails.
-#
-# - pfm.finish() terminates the child process (assuming a fork was done in the "start").
-#
-# - You cannot use pfm.start() if you are already in the child process.
-# If you want to manage another set of subprocesses in the child process,
-# you must instantiate another Parallel::ForkManager object!
-#
-# == Bugs and Limitations
-#
-# Parallel::ForkManager is a Ruby-centric rebase of Perl Parallel::ForkManager 0.7.5.
-# While much of the original code was rewritten such that ForkManager worked in the "Ruby way",
-# you might find some "warts" due to inconsistencies between Ruby and the original Perl code.
-#
-# Do not use Parallel::ForkManager in an environment where other child
-# processes can affect the run of the main program, so using this module
-# is not recommended in an environment where fork() / wait() is already used.
-#
-# If you want to use more than one copy of the Parallel::ForkManager then
-# you have to make sure that all children processes are terminated -- before you
-# use the second object in the main program.
-#
-# You are free to use a new copy of Parallel::ForkManager in the child
-# processes, although I don't think it makes sense.
-#
+require "English"
+require "tmpdir"
+require "yaml"
+require_relative "forkmanager/version"
+require_relative "forkmanager/process_interface"
+require_relative "forkmanager/serializer"
+require_relative "forkmanager/dummy_process_status"
+##
+# This module provides a namespace.
 module Parallel
+  ##
+  # This class provides a higher level interface to +fork+, allowing you to
+  # limit the number of child processes spawned and it provides a mechanism for
+  # child processes to return data structures to the parent.
+  class ForkManager
+    include Parallel::ForkManager::ProcessInterface
-class ForkManager
-    VERSION = '1.0.1' # $Revision: 1.2 $
+    ##
+    # Instantiate a Parallel::ForkManager object. You must specify the maximum
+    # number of children to fork off. If you specify 0 (zero), then no children
+    # will be forked.  This is intended for debugging purposes.
+    #
+    # The optional second parameter, params, is only used if you want to customize
+    # the behavior that children will use to send back some data (see Retrieving
+    # Data Structures below) to the parent.  The following values are currently
+    # accepted for params (and their meanings):
+    # - params['tempdir'] represents the location of the temporary directory where serialized data structures will be stored.
+    # - params['serialize_as'] represents how the data will be serialized.
+    #
+    # XXX: Not quite true at the moment, debug is set to 0 if no params are
+    # provided, and the serialization isn't set.
+    #
+    # If params has not been provided, the following values are set:
+    # - @debug is set to non-zero to provide debugging messages.  Default is 0.
+    # - @tempdir is set to Dir.tmpdir() (likely defaults to /tmp).
+    #
+    # NOTE NOTE NOTE: If you set tempdir to a directory that does not exist,
+    # Parallel::ForkManager will <em>not</em> create this directory for you
+    # and new() will exit!
+    #
+    # @param max_procs[Integer] maximum number of concurrent child processes.
+    # @param params[Hash] configuration parameters.
+    def initialize(max_procs = 0, params = {})
+      check_ruby_version
+      setup_instance_variables(max_procs, params)
-# Set debug to 1 for debugging messages.
-    attr_accessor :debug
-    attr_accessor :max_proc, :processes, :in_child, :on_wait_period
-    attr_accessor :do_on_start, :do_on_finish, :do_on_wait
+      # Always provide debug information if our max processes are zero!
+      if @max_procs.zero?
+        puts "Zero processes have been specified so we will not fork and will proceed in debug mode!"
+        puts "in initialize #{max_procs}!"
+        puts "Will use tempdir #{@tempdir}"
+      end
-    def initialize(procs)
-        @debug = 0
-        @max_proc = procs
-        @processes = {}
-        @do_on_finish = {}
-        @in_child = 0
+      # Appetite for Destruction.
+      ObjectSpace.define_finalizer(self, self.class._finalize)
+    end
-        if self.debug == 1
-            print "in initialize #{max_proc}!\n"
+    ##
+    # This finalizer is not meant to be called manually, it cleans up temporary
+    # files which were used to return serialized data from the children.
+    def self._finalize
+      proc do
+        Dir.foreach(tempdir) do |file_name|
+          prefix = "Parallel-ForkManager-#{parent_pid}-"
+          next unless file_name.start_with prefix
+          File.unlink("#{tempdir}/#{file_name}")
         end
+      end
     end
-#
-# start("string") -- "string" identification is optional.
-#
-# start("string") "puts the fork in Parallel::ForkManager" -- as start() does
-# the fork().
-#
-# start("string") takes an optional "string" argument to
-# use as a process identifier.  It is used by
-# the "run_on_finish" callback for identifying the finished
-# process.  See run_on_finish() for more information.
-#
-# Return: PID of child process if in parent, or 0 if in the
-# child process.
-    def start(identification=nil)
-        if self.in_child == 1
-            puts "Cannot start another process while you are in the child process"
-            exit 1
-        end
+    attr_reader :max_procs
-        while(self.processes.length() >= self.max_proc)
-            self.on_wait()
-            if defined? self.on_wait_period
-                arg = Process::WNOHANG
-            else
-                arg = nil
-            end
-            self.wait_one_child(arg)
+    ##
+    # start("string") "puts the fork in Parallel::ForkManager" -- as start() does
+    # the fork().  start() returns the pid of the child process for the parent,
+    # and 0 for the child process.  If you set the 'processes' parameter for the
+    # constructor to 0, then, assuming you're in the child process, pm.start()
+    # simply returns 0.
+    #
+    # start("string") takes an optional "string" argument to use as a process
+    # identifier.  It is used by the "run_on_finish" callback for identifying
+    # the finished process.  See run_on_finish() for more information.
+    #
+    # For example:
+    #
+    #   my_ident = "webwacker-1.0"
+    #   pm.start(my_ident)
+    #
+    # start("string") { block } takes an optional block parameter
+    # that tells the ForkManager to follow Ruby fork() semantics for blocks.
+    # For example:
+    #
+    #   my_ident = "webwacker-1.0"
+    #   pm.start(my_ident) {
+    #       print "As easy as "
+    #       [1,2,3].each {
+    #           |i|
+    #           print i, "... "
+    #       }
+    #   }
+    #
+    # start("string", arg1, arg2, ... , argN) { block } requires a block parameter
+    # that tells the ForkManager to follow Ruby fork() semantics for blocks.  Like
+    # start("string"), "string" is an optional argument to use as a process
+    # identifier and is used by the "run_on_finish" callback for identifying
+    # the finished process.  For example:
+    #
+    #   my_ident = "webwacker-1.0"
+    #   pm.start(my_ident, 1, 2, 3) {
+    #       |*my_args|
+    #       unless my_args.empty?
+    #           print "As easy as "
+    #           my_args.each {
+    #               |i|
+    #               print i, "... "
+    #           }
+    #       end
+    #   }
+    #
+    # <em>NOTE NOTE NOTE: when you use start("string") with an optional block
+    # parameter, the code in your block <em>must</em> explicitly exit non-zero
+    # if you are using callbacks with the ForkManager (e.g. run_on_finish).</em>
+    # This is because fork(), when run with a block parameter, terminates the
+    # subprocess with a status of 0 by default.  If your block fails to exit
+    # non-zero, *all* of your exit_code(s) will be zero regardless of any value
+    # you might have passed to finish(...).
+    #
+    # To accommodate this behavior of fork and blocks, you can do
+    # something like the following:
+    #
+    #   my_urls = [ ... some list of urls here ... ]
+    #   my_ident = "webwacker-1.0"
+    #
+    #   my_urls.each {
+    #       |my_url|
+    #       pm.start(my_ident) {
+    #           my_status = get_some_url(my_url)
+    #           if my_status.to_i == 200
+    #               exit 0
+    #           else
+    #               exit 255
+    #       }
+    #   }
+    #
+    #   ... etc ...
+    #
+    def start(identification = nil, *args, &run_block)
+      fail AttemptedStartInChildProcessError if in_child
+      while @max_procs.nonzero? && @processes.length >= @max_procs
+        on_wait
+        arg = (defined? @on_wait_period && !@on_wait_period.nil?) ? Process::WNOHANG : nil
+        kid = wait_one_child(arg)
+        if kid == 0 || kid == -1
+          sleep @waitpid_blocking_sleep
         end
-        self.wait_children()
-        if self.max_proc
-            pid = fork()
-            if ! defined? pid
-                print "Cannot fork #{$!}\n"
-                exit 1
-            end
-            if pid != nil
-                self.processes[pid] = identification
-                self.on_start(pid, identification)
-            else
-                if ! pid
-                    self.in_child = 1
-                end
-            end
-            return pid
+      end
+      wait_children
+      if @max_procs.nonzero?
+        if block_given?
+          fail "start(...) wrong number of args" if run_block.arity >= 0 && args.size != run_block.arity
+          @has_block = true
+          pid = (!args.empty?) ?
+            fork { run_block.call(*args); } :
+            fork { run_block.call(); }
         else
-            self.processes[$$] = identification
-            self.on_start($$, identification)
-            return 0
-        end
-    end
+          fail "start(...) args given but block is empty!" unless args.empty?
-#
-# finish(exit_code) -- exit_code is optional
-#
-# finish() loses the child process by exiting and accepts an optional exit code.
-# Default exit code is 0 and can be retrieved in the parent via callback.
-# If you're running the program in debug mode (max_proc == 0), this method
-# doesn't do anything.
-#
-    def finish(exit_code = 0)
-        if self.in_child == 1
-            exit exit_code || 0
+          pid = fork
         end
+        fail "Cannot fork #{$ERROR_INFO}" unless defined? pid
-        if self.max_proc == 0
-            self.on_finish($$, exit_code, self.processes[$$], 0, 0)
-            self.processes.delete($$)
+        if pid.nil?
+          self.in_child = true
+        else
+          @processes[pid] = identification
+          on_start(pid, identification)
         end
-        return 0
+        return pid
+      else
+        @processes[$PID] = identification
+        on_start($PID, identification)
+        return nil
+      end
     end
-    def wait_children()
-        return if self.processes.empty?
-        kid = nil # Should our default be nil?
-        loop do
-            kid = self.wait_one_child(Process::WNOHANG)
-            break if kid > 0 || kid < -1
+    #
+    # finish(exit_code, [data_structure]) -- exit_code is optional
+    #
+    # finish() closes the child process by exiting and accepts an optional exit
+    # code (default exit code is 0) which can be retrieved in the parent via
+    # callback.  If you're running the program in debug mode (max_proc == 0),
+    # this method just calls the callback.
+    #
+    # If <em>data_structure</em> is provided, then <em>data structure</em> is
+    # serialized and passed to the parent process. See <em>Retrieving Data
+    # Structures</em> in the next section for more info.  For example:
+    #
+    #    %w{Fred Wilma Ernie Bert Lucy Ethel Curly Moe Larry}.each {
+    #        |person|
+    #        # pm.start(...) here
+    #
+    #        # ... etc ...
+    #
+    #        # Pass along data structure to finish().
+    #        pm.finish(0, {'person' => person})
+    #    }
+    #
+    #
+    # === Retrieving Data Structures
+    #
+    # The ability for the parent to retrieve data structures from child processes
+    # was adapted to Parallel::ForkManager 1.5.0 (and newer) from Perl Parallel::ForkManager.
+    # This functionality was originally introduced in Perl Parallel::ForkManager
+    # 0.7.6.
+    #
+    # Each child process may optionally send 1 data structure back to the parent.
+    # By data structure, we mean a a string, hash, or array. The contents of the
+    # data structure are written out to temporary files on disk using the Marshal
+    # dump() method.  This data structure is then retrieved from within the code
+    # you send to the run_on_finish callback.
+    #
+    # NOTE NOTE NOTE: Only serialization with Marshal and yaml are supported at
+    # this time.  Future versions of Parallel::ForkManager <em>may</em> support
+    # expanded functionality!
+    #
+    # There are 2 steps involved in retrieving data structures:
+    # 1. The data structure the child wishes to send back to the parent is provided as the second argument to the finish() call. It is up to the child to decide whether or not to send anything back to the parent.
+    # 2. The data structure is retrieved using the callback provided in the run_on_finish() method.
+    #
+    # Data structure retrieval is <em>not</em> the same as returning a data
+    # structure from a method call!  The data structure referenced by a given
+    # child process is serialized and written out to a file in the type specified
+    # earlier in serialize_as.  If serialize_as was not specified earlier, then
+    # no serialization will be done.
+    #
+    # The file is subseqently read back into memory and a new data structure that
+    # belongs to the parent process is created.  Therefore it is recommended that
+    # you keep the returned structure small in size to mitigate any possible
+    # performance penalties.
+    #
+    def finish(exit_code = 0, data_structure = nil)
+      if @has_block
+        fail "Do not use finish(...) when using blocks.  Use an explicit exit in your block instead!\n"
+      end
+      if in_child
+        exit_code ||= 0
+        unless data_structure.nil?
+          @data_structure = data_structure
+          the_tempfile = "#{@tempdir}Parallel-ForkManager-#{@parent_pid}-#{$PID}.txt"
+          begin
+            fail "Unable to serialize data!" unless _serialize_data(the_tempfile)
+          rescue => e
+            puts "Unable to store #{the_tempfile}: #{e.message}"
+            exit 1
+          end
         end
+        Kernel.exit!(exit_code)
+      end
+      if @max_procs == 0
+        on_finish($PID, exit_code, @processes[$PID], 0, 0)
+        @processes.delete($PID)
+      end
+      0
+    end
+    # reap_finished_children() / wait_children()
+    #
+    # This is a non-blocking call to reap children and execute callbacks independent
+    # of calls to "start" or "wait_all_children". Use this in scenarios where
+    # "start" is called infrequently but you would like the callbacks executed quickly.
+    def wait_children
+      return if @processes.keys.empty?
+      kid = nil
+      begin
+        begin
+          kid = wait_one_child(Process::WNOHANG)
+        end while kid > 0 || kid < -1
+      rescue Errno::ECHILD
+        return
+      end
     end
-    alias :wait_childs :wait_children # compatibility
-#
-# Probably won't want to call this directly.  Just let wait_all_children(...)
-# make the call for you.
-#
-    def wait_one_child(parent)
-        kid = nil
-        while true
-            # Call _NT_waitpid(...) if we're using a Windows or Java variant.
-            if(RUBY_PLATFORM =~ /mswin|mingw|bccwin|wince|emx|java/)
-                kid = self._NT_waitpid(-1, parent ||= 0)
-            else
-                kid = self._waitpid(-1, parent ||= 0)
+    alias_method :wait_childs, :wait_children # compatibility
+    alias_method :reap_finished_children, :wait_children; # behavioral synonym for clarity
+    #
+    # Probably won't want to call this directly.  Just let wait_all_children(...)
+    # make the call for you.
+    #
+    def wait_one_child(par)
+      params = par || 0
+      kid = nil
+      loop do
+        kid = _waitpid(-1, params)
+        break if kid.nil? || kid == 0 || kid == -1 # Win32 returns negative PIDs
+        redo unless @processes.key?(kid)
+        id = @processes.delete(kid)
+        # Retrieve child data structure, if any.
+        the_retr_data = nil
+        the_tempfile = "#{@tempdir}Parallel-ForkManager-#{$PID}-#{kid}.txt"
+        begin
+          if File.exist?(the_tempfile) && !File.zero?(the_tempfile)
+            unless _unserialize_data(the_tempfile)
+              fail "Unable to unserialize data!"
             end
-            last if kid == 0 or kid == -1 # Win32 returns negative PIDs
-            redo if ! self.processes.has_key?(kid)
-            id = self.processes.delete(kid)
-            self.on_finish(kid, $? >> 8, id, $? & 0x7f, $? & 0x80 ? 1 : 0)
-            break
+            the_retr_data = @data_structure
+          end
+          File.unlink(the_tempfile) if File.exist?(the_tempfile)
+        rescue => e
+          print "wait_one_child failed to retrieve object: #{e.message}\n"
+          exit 1
         end
-        kid
+        status = child_status
+        on_finish(kid, status.exitstatus, id, status.stopsig, status.coredump?, the_retr_data)
+        break
+      end
+      kid ||= 0
+      kid
     end
-#
-# wait_all_children() will wait for all the processes which have been
-# forked. This is a blocking wait.
-#
-    def wait_all_children()
-        while ! self.processes.empty?
-            self.on_wait()
-            if defined? self.on_wait_period
-                arg = Process::WNOHANG
-            else
-                arg = nil
-            end
-            self.wait_one_child(arg)
+    #
+    # wait_all_children() will wait for all the processes which have been
+    # forked. This is a blocking wait.
+    #
+    def wait_all_children
+      until @processes.empty?
+        on_wait
+        arg = (defined? @on_wait_period and !@on_wait_period.nil?) ? Process::WNOHANG : nil
+        kid = wait_one_child(arg)
+        if kid == 0 || kid == -1
+          sleep @waitpid_blocking_sleep
         end
+      end
+    rescue Errno::ECHILD
+      # do nothing.
     end
-    alias :wait_all_childs :wait_all_children # compatibility
-#
-# You can define run_on_finish(...) that is called when a child in the parent
-# process when a child is terminated.
-#
-# The parameters of run_on_finish(...) are:
-#
-# - pid of the process, which is terminated
-# - exit code of the program
-# - identification of the process (if provided in the "start" method)
-# - exit signal (0-127: signal name)
-# - core dump (1 if there was core dump at exit)
-#
-# Example:
-#
-#   pfm.run_on_finish(
-#       lambda {
-#           |pid,exit_code,ident|
-#           print "** PID (#{pid}) for #{ident} exited with code #{exit_code}!\n"
-#       }
-#   )
-#
-    def run_on_finish(code, pid=0)
-        begin
-            self.do_on_finish[pid] = code
-        rescue
-            raise "couldn't run on finish!\n"
+    alias_method :wait_all_childs, :wait_all_children # compatibility
+    #
+    # max_procs() -- Returns the maximal number of processes the object will fork.
+    #
+    attr_reader :max_procs
+    #
+    # running_procs() -- Returns the pids of the forked processes currently
+    # monitored by the Parallel::ForkManager.  Note that children are still
+    # reports as running until the fork manager will harvest them, via the
+    # next call to start(...) or wait_all_children().
+    #
+    def running_procs
+      @processes.keys
+    end
+    #
+    # is_parent()
+    #
+    # Returns true if within the parent or false if within the child.
+    #
+    def is_parent()
+      !in_child
+    end
+    #
+    # is_child()
+    #
+    # Returns true if within the child or false if within the parent.
+    #
+    def is_child()
+      in_child
+    end
+    #
+    # wait_for_available_procs(nbr) -- Wait until 'n' available process slots
+    # are available.  If 'n' is not given, defaults to I.
+    #
+    def wait_for_available_procs(nbr)
+      nbr ||= 1
+      fail "Number processes '#{nbr}' higher than then max number of processes: #{@max_procs}" if nbr > max_procs
+      wait_one_child(0) until (max_procs - running_procs) >= nbr
+    end
+    #
+    # You can define run_on_finish(...) that is called when a child in the parent
+    # process when a child is terminated.
+    #
+    # The parameters of run_on_finish(...) are:
+    #
+    # - pid of the process, which is terminated
+    # - exit code of the program
+    # - identification of the process (if provided in the "start" method)
+    # - exit signal (0-127: signal name)
+    # - core dump (1 if there was core dump at exit)
+    # - data structure or nil (see Retrieving Data Structures)
+    #
+    # As of Parallel::ForkManager 1.2.0 run_on_finish supports a block argument.
+    #
+    # Example:
+    #
+    #   pm.run_on_finish {
+    #           |pid,exit_code,ident|
+    #           print "** PID (#{pid}) for #{ident} exited with code #{exit_code}!\n"
+    #   }
+    #
+    def run_on_finish(code = nil, pid = 0, &my_block)
+      if !code.nil? && !my_block.nil?
+        fail "run_on_finish: code and block are mutually exclusive options!"
+      end
+      if !code.nil?
+        if code.class.to_s == "Proc" && VERSION >= "1.5.0"
+          print "Passing Proc has been deprecated as of Parallel::ForkManager #{VERSION}!\nPlease refer to rdoc about how to change your code!\n"
         end
+        @do_on_finish[pid] = code
+      elsif !my_block.nil?
+        @do_on_finish[pid] = my_block
+      end
+    rescue TypeError => e
+      raise e.message
     end
+    #
+    # on_finish is a private method and should not be called directly.
+    #
     def on_finish(*params)
-        pid = params[0]
-        code = self.do_on_finish[pid] || self.do_on_finish[0] or return 0
-        begin
-            my_argc = code.arity - 1
-            if my_argc > 0
-                my_params = params[0 .. my_argc]
-            else
-                my_params = [params[0]]
-            end
-            params = my_params
-            code.call(*params)
-        rescue
-            raise "on finish failed!\n"
+      pid = params[0]
+      code = @do_on_finish[pid] || @do_on_finish[0] or return 0
+      begin
+        my_argc = code.arity - 1
+        if my_argc > 0
+          my_params = params[0..my_argc]
+        else
+          my_params = [params[0]]
         end
+        params = my_params
+        code.call(*params)
+      rescue => e
+        raise "on finish failed: #{e.message}!\n"
+      end
     end
-#
-# You can define a subroutine which is called when the child process needs
-# to wait for the startup. If period is not defined, then one call is done per
-# child. If period is defined, then code is called periodically and the
-# method waits for "period" seconds betwen the two calls. Note, period can be
-# fractional number also. The exact "period seconds" is not guaranteed,
-# signals can shorten and the process scheduler can make it longer (i.e. on
-# busy systems).
-#
-# No parameters are passed to code on the call.
-#
-# Example:
-#   timeout = 0.5
-#   pfm.run_on_wait(
-#       lambda {
-#           print "** Have to wait for one child ...\n"
-#       },
-#       timeout
-#   )
-#
-    def run_on_wait(code, period)
-        self.do_on_wait = code
-        self.on_wait_period = period
-    end
-    def on_wait()
-        begin
-            if self.do_on_wait.class().name == 'Proc'
-                self.do_on_wait.call()
-                if defined? self.on_wait_period
-                    #
-                    # Unfortunately Ruby 1.8 has no concept of 'sigaction',
-                    # so we're unable to check if a signal handler has
-                    # already been installed for a given signal.  In this
-                    # case it's no matter, since we define handler, but yikes.
-                    #
-                    Signal.trap("CHLD") do
-                        lambda{}.call()
-                    end
-                    IO.select(nil, nil, nil, self.on_wait_period)
-                end
-            end
+    #
+    # You can define a subroutine which is called when the child process needs
+    # to wait for the startup. If period is not defined, then one call is done per
+    # child. If period is defined, then code is called periodically and the
+    # method waits for "period" seconds betwen the two calls. Note, period can be
+    # fractional number also. The exact "period seconds" is not guaranteed,
+    # signals can shorten and the process scheduler can make it longer (i.e. on
+    # busy systems).
+    #
+    # No parameters are passed to code on the call.
+    #
+    # Example:
+    #
+    # As of Parallel::ForkManager 1.2.0 run_on_wait supports a block argument.
+    #
+    # Example:
+    #   period = 0.5
+    #   pm.run_on_wait(period) {
+    #           print "** Have to wait for one child ...\n"
+    #   }
+    #
+    #
+    def run_on_wait(*params, &block)
+      fail "period is required by run_on_wait" unless params.length
+      if params.length == 1
+        period = params[0]
+        fail "period must be of type float!" if period.class.to_s.downcase != "float"
+      elsif params.length == 2
+        code, period = params
+        fail "run_on_wait: Missing or invalid code block!" if code.class.to_s.downcase != "proc"
+      else
+        fail "run_on_wait: Invalid argument count!"
+      end
+      @on_wait_period = period
+      fail "Wait period must be greater than 0.0!\n" if period == 0
+      if !code.nil? && !block.nil?
+        fail "run_on_wait: code and block are mutually exclusive arguments!"
+      end
+      if !code.nil?
+        if code.class.to_s == "Proc" && VERSION >= "1.5.0"
+          puts "Passing Proc has been deprecated as of Parallel::ForkManager #{VERSION}!\nPlease refer to rdoc about how to change your code!"
         end
+        @do_on_wait = code
+      elsif !block.nil?
+        @do_on_wait = block
+      end
+    rescue TypeError
+      raise "run on wait failed!"
     end
-#
-# You can define a subroutine which is called when a child is started. It is
-# called after a successful startup of a child in the parent process.
-#
-# The parameters of code are as follows:
-# - pid of the process which has been started
-# - identification of the process (if provided in the "start" method)
-#
-# Example:
-#
-#   pfm.run_on_start(
-#       lambda {
-#           |pid,ident|
-#           print "run on start ::: #{ident} (#{pid})\n"
-#       }
-#   )
-#
-    def run_on_start(code)
-        begin
-            self.do_on_start = code
-        rescue
-            raise "run on start failed!\n"
-        end
+    #
+    # on_wait is a private method as it should not be called directly.
+    #
+    def on_wait
+      return unless @do_on_wait.class.name == "Proc"
+      @do_on_wait.call
+      return unless defined? @on_wait_period && !@on_wait_period.nil?
+      #
+      # Unfortunately Ruby 1.8 has no concept of 'sigaction',
+      # so we're unable to check if a signal handler has
+      # already been installed for a given signal.  In this
+      # case it's no matter, since we define handler, but yikes.
+      #
+      Signal.trap("CHLD") do
+        -> {}.call if Signal.list["CHLD"].nil?
+      end
+      IO.select(nil, nil, nil, @on_wait_period)
     end
-    def on_start(*params)
-        begin
-            if self.do_on_start.class().name == 'Proc'
-                my_argc = self.do_on_start.arity - 1
-                if my_argc > 0
-                    my_params = params[0 .. my_argc]
-                else
-                    my_params = params[0]
-                end
-                params = my_params
-                self.do_on_start.call(*params)
-            end
-        rescue
-            raise "on_start failed\n"
-        end
+    #
+    # You can define a subroutine which is called when a child is started. It is
+    # called after a successful startup of a child in the parent process.
+    #
+    # The parameters of code are as follows:
+    # - pid of the process which has been started
+    # - identification of the process (if provided in the "start" method)
+    #
+    # You can pass a block to run_on_start.
+    #
+    # Example:
+    #
+    #   pm.run_on_start() {
+    #           |pid,ident|
+    #           print "run on start ::: #{ident} (#{pid})\n"
+    #       }
+    #
+    #
+    def run_on_start(&block)
+      @do_on_start = block unless block.nil?
+    rescue TypeError
+      raise "run on start failed!\n"
     end
-#
-# set_max_procs(mp) -- mp is an integer
-#
-# set_max_procs() allows you to set a new maximum number of children to maintain.
-#
-# Return: The previous setting of max_procs.
-#
-    def set_max_procs(mp=nil)
-        if mp == nil
-            return self.max_proc
+    #
+    # on_start() is a private method as it should not be called directly.
+    #
+    def on_start(*params)
+      if @do_on_start.class.name == "Proc"
+        my_argc = @do_on_start.arity - 1
+        if my_argc > 0
+          my_params = params[0..my_argc]
         else
-            self.max_proc = mp
+          my_params = params[0]
         end
+        params = my_params
+        @do_on_start.call(*params)
+      end
+    rescue
+      raise "on_start failed"
     end
-#
-# _waitpid(...) should not be called directly as it is called automatically by
-# wait_one_child(...).
-#
-    def _waitpid(pid, flags)
-        return Process.waitpid(pid, flags)
+    #
+    # set_max_procs() allows you to set a new maximum number of children
+    # to maintain.
+    #
+    def set_max_procs(mp=nil)
+      @max_procs = mp
     end
-#
-# _NT_waitpid(...) is the Windows variant of _waitpid(...) and will be called
-# automatically by wait_one_child(...) depending on the value of RUBY_PLATFORM.
-# You should not call _NT_waitpid(...) directly.
-#
-    def _NT_waitpid(pid, par)
-        if par == Process::WNOHANG
-            pids = self.processes.keys()
-            if pids.length() == 0
-                return -1
-            end
-            kid = 0
-            for my_pid in pids
-                kid = Process.waitpid(my_pid, par)
-                if kid != 0
-                    return kid
-                end
-            return kid
-            end
+    #
+    # set_wait_pid_blocking_sleep(seconds) -- Sets the sleep period,
+    # in seconds, of the pseudo-blocking calls.  Set to 0 to disable.
+    #
+    def set_waitpid_blocking_sleep(period)
+      @waitpid_blocking_sleep = period
+    end
+    #
+    # waitpid_blocking_sleep() -- Returns the sleep period, in seconds, of the
+    # pseudo-blockign calls.  Returns 0 if disabled.
+    #
+    def waitpid_blocking_sleep
+      @waitpid_blocking_sleep
+    end
+    #
+    # _waitpid(...) is a private method as it should not be called directly.
+    # It is called automatically by wait_one_child(...).
+    #
+    def _waitpid(_pid, flag)
+      flag != 0 ? _waitpid_non_blocking : _waitpid_blocking
+    end
+    #
+    # Private method used internally by _waitpid(...).
+    #
+    def _waitpid_non_blocking
+      running_procs.each do |pid|
+        p = waitpid(pid, Process::WNOHANG) || next
+        if p == -1
+          warn "Child process #{pid} disappeared.  A call to 'waitpid' outside of Parallel::ForkManager might have reaped it."
+          # It's gone.  Let's clean the process entry.
+          @processes.delete[pid]
         else
-            return Process.waitpid(pid, par)
+          return pid
         end
+      end
+      0
+    end
+    #
+    # Private method used internally by _waitpid(...).  Simulates a blocking
+    # waitpid(...) call.
+    #
+    def _waitpid_blocking
+      # pseudo-blocking
+      sleep_period = @waitpid_blocking_sleep
+      loop do
+        pid = _waitpid_non_blocking
+        return pid if pid
+        sleep(sleep_period)
+      end
+      waitpid(-1, 0)
+    end
+    #
+    # _serialize_data is a private method and should not be called directly.
+    #
+    # Currently supports Marshal.dump() and YAML to serialize data.
+    #
+    def _serialize_data(store_tempfile)
+      return 1 if @serializer.nil?
+      File.open(store_tempfile, "wb") do |f|
+        f.write(@serializer.serialize(@data_structure))
+      end
+      return 1
+    rescue => e
+      raise "Error writing/serializing #{store_tempfile}: #{e.message}"
+    end
+    #
+    # _unserialize_data is a private method and should not be called directly.
+    #
+    # Currently only supports Marshal.load() to unserialize data.
+    #
+    def _unserialize_data(store_tempfile)
+      return 1 if @serializer.nil?
+      data = File.binread(store_tempfile)
+      @data_structure = @serializer.deserialize(data)
+      return 1
+    rescue => e
+      # Clean up temp file if it exists.
+      # Otherwise we'll have a bunch of 'em laying around.
+      #
+      File.unlink(store_tempfile) rescue nil # XXX: supress errors from unlink.
+      raise "Error reading/deserializing #{store_tempfile}: #{e.message}"
+    end
+    # private methods
+    private :on_start, :on_finish, :on_wait
+    private :_waitpid, :_waitpid_non_blocking, :_waitpid_blocking
+    private :_serialize_data, :_unserialize_data
+    private
+    attr_reader :parent_pid
+    attr_reader :tempdir
+    attr_accessor :in_child
+    def setup_instance_variables(max_procs, params)
+      @max_procs = max_procs
+      # TODO: remove this, it seems to be unused.
+      @debug = params.fetch("debug", false)
+      @tempdir = params.fetch("tempdir", Dir.tmpdir)
+      @tempdir += "/" unless @tempdir.end_with?("/")
+      unless File.directory? @tempdir
+        fail(MissingTempDirError,
+             "#{@tempdir} doesn't exist or is not a directory.")
+      end
+      @process_interface = params.fetch("process_interface",
+                                        ProcessInterface::Instance.new)
+      @data_structure = nil
+      @processes = {}
+      @do_on_finish = {}
+      @in_child = false
+      @has_block = false
+      @on_wait_period = nil
+      @parent_pid = $PID
+      @waitpid_blocking_sleep = 1
+      @serializer = Parallel::ForkManager::Serializer.new(
+        params["serialize_as"] || params["serialize_type"] || "marshal"
+      )
+    end
+    # We care about the Ruby version for a couple of reasons:
+    #
+    # * The new lanmbda syntax -> (1.9 and above)
+    # * Finalizers (1.8 and above)
+    #
+    # So we only allow Ruby 1.9.* and 2.*
+    def check_ruby_version
+      return if RUBY_VERSION.start_with?("1.9")
+      return if RUBY_VERSION.start_with?("2.")
+      fail "Unsupported Ruby version #{RUBY_VERSION}!"
     end
-end
-end
+  end # class
+end # module