RubyGems - parallel_enum - Versions diffs - 0.2.1 - Mend

parallel_enum 0.2.1

Files changed (2) hide show

data/lib/parallel_enum.rb +177 -0
metadata +45 -0

data/lib/parallel_enum.rb ADDED Viewed

@@ -0,0 +1,177 @@
+require 'thread'
+# Tested on Ubuntu and CentOS. Untested on Windows and OSX. The fork stuff probably won't work on Windows because Windows doesn't have Kernel#fork
+class Enumerator
+  # threaded is like each, but uses multiple threads to speed up processing when the executed code
+  # contains a lot of blocking or waiting. Try benchmarking these two pieces of code:
+  #
+  # (0...50).each{|x| sleep rand*3; puts x}
+  # (0...50).each.threaded{|x| sleep rand*3; puts x}
+  #
+  # If any thread raises an exception, Enumerator#threaded will catch it and bring it into the main thread.
+  # That said, if two different threads raise two different exceptions, one will be saved while the other
+  # will be lost to the aether. It is not possible to predict which will be saved, so it's probably best
+  # to put exception handling code within the block if you plan to catch errors.
+  #
+  # Note that even though Ruby 1.9 uses real system threads in its code, it still contains a Global
+  # Interpreter Lock that will not allow two threads to run concurrently. Benchmark these two:
+  #
+  # (0...50).each{|x| 32000000.times{}; puts x}
+  # (0...50).each.threaded{|x| 32000000.times{}; puts x}
+  #
+  # The threaded version may actually run slower than the non-threaded version because of the overhead
+  # invloved. If you want to speed up processing code by taking advantage of multiple cores, see
+  # Enumerator#forked
+  def threaded(num_threads=8,&block)
+    raise ArgumentError.new("It makes no sense to call Enumerator#threaded without a block") if block.nil?
+    raise ArgumentError.new("num_threads must be a positive integer") unless num_threads.kind_of? Fixnum and num_threads > 0
+    mutex = Mutex.new   # used to ensure only one thread is using the instruction and feedback pipes at a time
+    threads = []        # will hold the pool of threads so we can join them later
+    items = {}          # contains items returned by self.next indexed by their object_id - used to prevent garbage collection
+    exception = nil     # contains the exception raised by any thread
+    instruction_r, instruction_w = IO.pipe  # Used to assign items to the threads. 'stop' is sent to terminate the thread.
+    feedback_r, feedback_w = IO.pipe        # Used by threads to indicate completion of an item
+    # Alright. Let's make some threads!
+    num_threads.times do
+      threads << Thread.new do
+        instruction = nil       # Contains a string of the next instruction - either something like '63913' or 'stop'
+        while true              # Main evaluation loop - exited explicitly through break
+          mutex.synchronize{instruction = instruction_r.gets.chomp} # One thread at a time may read an instruction
+          break if instruction == 'stop'                            # Stop if told to stop
+          begin
+            block.call(ObjectSpace._id2ref(instruction.to_i))       # Call the block on the referenced item
+          rescue Exception => e
+            exception = e                                           # Any exceptions are caught and sent to the main thread
+          end
+          mutex.synchronize{feedback_w.puts instruction}            # Report completion to the main thread
+        end
+      end
+    end
+    # The threads are now armed and ready to evaluate
+    begin
+      # Start by sending as many items as there are threads
+      num_threads.times do
+        item = self.next                    # Grab the next item
+        items[item.object_id] = item        # Store it so it won't be garbage collected
+        instruction_w.puts item.object_id   # Send it to the thread pool
+      end
+      # Then send items as old ones come back (break out of loop when we reach the end)
+      while true
+        index = feedback_r.gets.to_i        # Wait for an item to be done
+        break if exception                  # Stop if a thread had an error
+        items.delete index                  # Delete the completed item from the item pool
+        item = self.next                    # Grab the next item
+        items[item.object_id] = item        # Store it so it won't be garbage collected
+        instruction_w.puts item.object_id   # Send it to the thread pool
+      end
+    rescue StopIteration                    # StopIteration will be raised by self.next when we reach the end of the iteration
+      nil
+    rescue Exception => e
+      mutex.synchronize{exception = e} if exception.nil?      # Any other error will be dealt with promptly
+    ensure
+      begin
+        (num_threads+1).times{instruction_w.puts 'stop'}      # Tell all the threads to stop
+        threads.each{|t| t.join}                              # and wait for them to stop
+      rescue Exception => e
+        mutex.synchronize{exception = e} if exception.nil?    # Any error at this stage will be dealt with promptly
+      ensure
+        threads.each{|t| t.kill}                              # Threads should have stopped by now, but if not, they die.
+        [instruction_r, instruction_w, feedback_r, feedback_w ].each{|io| io.close} # Close IO
+        raise exception unless exception.nil?                 # Re-raise any errors now that the thread pool is closed
+        return self
+      end
+    end
+  end
+  # forked is like threaded, but uses multiple process forks to speed up processing by taking advantage of
+  # multiple CPU cores. Note that, while this is an advantage over threaded, there are a few drawbacks:
+  # First, forked is not available on all platforms, though *nix systems are usually fine. Second, there
+  # is no inter-process mutex built into Ruby, although some libraries are available. Third, variables
+  # CANNOT be altered from within a fork, as forking the Ruby interpreter clones the environment.
+  def forked(num_forks=8,&block)
+    # Threading beind the scenes should create separate
+    # ActiveRecord connections and severely de-complicates things
+    self.threaded(num_forks) do |item|
+      xn_r, xn_w = IO.pipe      # xn pipe will be used to send a Marshal'd exception back to the main process
+      pid = Process.fork do     # Fork a new process from the thread
+        begin
+          block.call(item)      # Call block
+        rescue Exception => e   # Exceptions are caught to be sent back to the main process
+          xn = nil              # xn will hold the Marshal'd exception
+          begin
+            xn = Marshal.dump(e)# Try to dump the exception
+          rescue Exception => e # That might fail if this is a particularly exotic exception
+            xn = Marshal.dump(IOError.new("Failed to carry #{e.class} to main process"))
+          end
+          xn_w.print(xn)        # Send that Marshal'd string version of the exception back
+        ensure
+          exit!                 # Don't call any at_exit methods
+        end
+      end
+      Process.wait(pid)         # Wait for the subprocess to finish
+      xn_w.close                # Close the write pipe
+      xn = xn_r.read            # Read any exception
+      xn_r.close                # Close the read pipe
+      raise Marshal.load(xn) if xn != '' # Raise the passed exception if it exists
+    end
+  end
+end
+module Enumerable
+  # This function is identical to the map function, but uses multiple threads
+  # to speed up processing. See Enumerator#threaded for more information
+  def map_threaded(num_threads=8,&block)
+    result = {}
+    mtx = Mutex.new
+    self.each.with_index.threaded(num_threads) do |x, i|
+      r = block.call(x)
+      mtx.synchronize{result[i] = r}
+    end
+    return result.to_a.sort.map{|i, x| x}
+  end
+  # This function is identical to the map function, but uses multiple forks
+  # to speed up processing. See Enumerator#forked for more information. NOTE:
+  # Since most variables are not shared between processes, I had to rely on
+  # I/O to send the block's return values back. Because of this, the block's
+  # return value must be Marshal-able into a string.
+  def map_forked(num_forks=8,&block)
+    return self.map_threaded(num_forks) do |item|
+      xn_r, xn_w = IO.pipe      # xn pipe will be used to send a Marshal'd exception back to the main process
+      rz_r, rz_w = IO.pipe      # rz pipe will be used to send the block's return value back to the main process
+      pid = Process.fork do     # Fork a new process from the thread
+        begin
+          r = block.call(item)  # Call block
+          rz_w.print(Marshal.dump(r)) # Send the result back
+        rescue Exception => e   # Exceptions are caught to be sent back to the main process
+          xn = nil              # xn will hold the Marshal'd exception
+          begin
+            xn = Marshal.dump(e)# Try to dump the exception
+          rescue Exception => e # That might fail if this is a particularly exotic exception
+            xn = Marshal.dump(IOError.new("Failed to carry #{e.class} to main process"))
+          end
+          xn_w.print(xn)        # Send that Marshal'd string version of the exception back
+        ensure
+          exit!                 # Don't call any at_exit methods
+        end
+      end
+      Process.wait(pid)         # Wait for the subprocess to finish
+      xn_w.close                # Close the write pipe
+      xn = xn_r.read            # Read any exception
+      xn_r.close                # Close the read pipe
+      raise Marshal.load(xn) if xn != '' # Raise the passed exception if it exists
+      rz_w.close                # Close the write pipe
+      rz = rz_r.read            # Read the result of the block
+      rz_r.close                # Close the read pipe
+      Marshal.load(rz)          # Return the return value
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,45 @@
+--- !ruby/object:Gem::Specification
+name: parallel_enum
+version: !ruby/object:Gem::Version
+  version: 0.2.1
+  prerelease:
+platform: ruby
+authors:
+- Chris Dollard
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-07-01 00:00:00.000000000 Z
+dependencies: []
+description: A simple hello world gem
+email: cjd.d01071@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/parallel_enum.rb
+homepage: http://rubygems.org/gems/parallel_enum
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.11
+signing_key:
+specification_version: 3
+summary: Parallel Enum
+test_files: []