parallel_enum 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/parallel_enum.rb +177 -0
  2. metadata +45 -0
@@ -0,0 +1,177 @@
1
+ require 'thread'
2
+
3
+ # Tested on Ubuntu and CentOS. Untested on Windows and OSX. The fork stuff probably won't work on Windows because Windows doesn't have Kernel#fork
4
+
5
+ class Enumerator
6
+
7
+ # threaded is like each, but uses multiple threads to speed up processing when the executed code
8
+ # contains a lot of blocking or waiting. Try benchmarking these two pieces of code:
9
+ #
10
+ # (0...50).each{|x| sleep rand*3; puts x}
11
+ # (0...50).each.threaded{|x| sleep rand*3; puts x}
12
+ #
13
+ # If any thread raises an exception, Enumerator#threaded will catch it and bring it into the main thread.
14
+ # That said, if two different threads raise two different exceptions, one will be saved while the other
15
+ # will be lost to the aether. It is not possible to predict which will be saved, so it's probably best
16
+ # to put exception handling code within the block if you plan to catch errors.
17
+ #
18
+ # Note that even though Ruby 1.9 uses real system threads in its code, it still contains a Global
19
+ # Interpreter Lock that will not allow two threads to run concurrently. Benchmark these two:
20
+ #
21
+ # (0...50).each{|x| 32000000.times{}; puts x}
22
+ # (0...50).each.threaded{|x| 32000000.times{}; puts x}
23
+ #
24
+ # The threaded version may actually run slower than the non-threaded version because of the overhead
25
+ # invloved. If you want to speed up processing code by taking advantage of multiple cores, see
26
+ # Enumerator#forked
27
+ def threaded(num_threads=8,&block)
28
+ raise ArgumentError.new("It makes no sense to call Enumerator#threaded without a block") if block.nil?
29
+ raise ArgumentError.new("num_threads must be a positive integer") unless num_threads.kind_of? Fixnum and num_threads > 0
30
+
31
+ mutex = Mutex.new # used to ensure only one thread is using the instruction and feedback pipes at a time
32
+ threads = [] # will hold the pool of threads so we can join them later
33
+ items = {} # contains items returned by self.next indexed by their object_id - used to prevent garbage collection
34
+ exception = nil # contains the exception raised by any thread
35
+
36
+ instruction_r, instruction_w = IO.pipe # Used to assign items to the threads. 'stop' is sent to terminate the thread.
37
+ feedback_r, feedback_w = IO.pipe # Used by threads to indicate completion of an item
38
+
39
+ # Alright. Let's make some threads!
40
+ num_threads.times do
41
+ threads << Thread.new do
42
+ instruction = nil # Contains a string of the next instruction - either something like '63913' or 'stop'
43
+ while true # Main evaluation loop - exited explicitly through break
44
+ mutex.synchronize{instruction = instruction_r.gets.chomp} # One thread at a time may read an instruction
45
+ break if instruction == 'stop' # Stop if told to stop
46
+ begin
47
+ block.call(ObjectSpace._id2ref(instruction.to_i)) # Call the block on the referenced item
48
+ rescue Exception => e
49
+ exception = e # Any exceptions are caught and sent to the main thread
50
+ end
51
+ mutex.synchronize{feedback_w.puts instruction} # Report completion to the main thread
52
+ end
53
+ end
54
+ end
55
+
56
+ # The threads are now armed and ready to evaluate
57
+ begin
58
+ # Start by sending as many items as there are threads
59
+ num_threads.times do
60
+ item = self.next # Grab the next item
61
+ items[item.object_id] = item # Store it so it won't be garbage collected
62
+ instruction_w.puts item.object_id # Send it to the thread pool
63
+ end
64
+ # Then send items as old ones come back (break out of loop when we reach the end)
65
+ while true
66
+ index = feedback_r.gets.to_i # Wait for an item to be done
67
+ break if exception # Stop if a thread had an error
68
+ items.delete index # Delete the completed item from the item pool
69
+ item = self.next # Grab the next item
70
+ items[item.object_id] = item # Store it so it won't be garbage collected
71
+ instruction_w.puts item.object_id # Send it to the thread pool
72
+ end
73
+ rescue StopIteration # StopIteration will be raised by self.next when we reach the end of the iteration
74
+ nil
75
+ rescue Exception => e
76
+ mutex.synchronize{exception = e} if exception.nil? # Any other error will be dealt with promptly
77
+ ensure
78
+ begin
79
+ (num_threads+1).times{instruction_w.puts 'stop'} # Tell all the threads to stop
80
+ threads.each{|t| t.join} # and wait for them to stop
81
+ rescue Exception => e
82
+ mutex.synchronize{exception = e} if exception.nil? # Any error at this stage will be dealt with promptly
83
+ ensure
84
+ threads.each{|t| t.kill} # Threads should have stopped by now, but if not, they die.
85
+ [instruction_r, instruction_w, feedback_r, feedback_w ].each{|io| io.close} # Close IO
86
+ raise exception unless exception.nil? # Re-raise any errors now that the thread pool is closed
87
+ return self
88
+ end
89
+ end
90
+ end
91
+
92
+ # forked is like threaded, but uses multiple process forks to speed up processing by taking advantage of
93
+ # multiple CPU cores. Note that, while this is an advantage over threaded, there are a few drawbacks:
94
+ # First, forked is not available on all platforms, though *nix systems are usually fine. Second, there
95
+ # is no inter-process mutex built into Ruby, although some libraries are available. Third, variables
96
+ # CANNOT be altered from within a fork, as forking the Ruby interpreter clones the environment.
97
+
98
+ def forked(num_forks=8,&block)
99
+ # Threading beind the scenes should create separate
100
+ # ActiveRecord connections and severely de-complicates things
101
+ self.threaded(num_forks) do |item|
102
+ xn_r, xn_w = IO.pipe # xn pipe will be used to send a Marshal'd exception back to the main process
103
+ pid = Process.fork do # Fork a new process from the thread
104
+ begin
105
+ block.call(item) # Call block
106
+ rescue Exception => e # Exceptions are caught to be sent back to the main process
107
+ xn = nil # xn will hold the Marshal'd exception
108
+ begin
109
+ xn = Marshal.dump(e)# Try to dump the exception
110
+ rescue Exception => e # That might fail if this is a particularly exotic exception
111
+ xn = Marshal.dump(IOError.new("Failed to carry #{e.class} to main process"))
112
+ end
113
+ xn_w.print(xn) # Send that Marshal'd string version of the exception back
114
+ ensure
115
+ exit! # Don't call any at_exit methods
116
+ end
117
+ end
118
+ Process.wait(pid) # Wait for the subprocess to finish
119
+ xn_w.close # Close the write pipe
120
+ xn = xn_r.read # Read any exception
121
+ xn_r.close # Close the read pipe
122
+ raise Marshal.load(xn) if xn != '' # Raise the passed exception if it exists
123
+ end
124
+ end
125
+ end
126
+
127
+ module Enumerable
128
+ # This function is identical to the map function, but uses multiple threads
129
+ # to speed up processing. See Enumerator#threaded for more information
130
+ def map_threaded(num_threads=8,&block)
131
+ result = {}
132
+ mtx = Mutex.new
133
+ self.each.with_index.threaded(num_threads) do |x, i|
134
+ r = block.call(x)
135
+ mtx.synchronize{result[i] = r}
136
+ end
137
+ return result.to_a.sort.map{|i, x| x}
138
+ end
139
+
140
+ # This function is identical to the map function, but uses multiple forks
141
+ # to speed up processing. See Enumerator#forked for more information. NOTE:
142
+ # Since most variables are not shared between processes, I had to rely on
143
+ # I/O to send the block's return values back. Because of this, the block's
144
+ # return value must be Marshal-able into a string.
145
+ def map_forked(num_forks=8,&block)
146
+ return self.map_threaded(num_forks) do |item|
147
+ xn_r, xn_w = IO.pipe # xn pipe will be used to send a Marshal'd exception back to the main process
148
+ rz_r, rz_w = IO.pipe # rz pipe will be used to send the block's return value back to the main process
149
+ pid = Process.fork do # Fork a new process from the thread
150
+ begin
151
+ r = block.call(item) # Call block
152
+ rz_w.print(Marshal.dump(r)) # Send the result back
153
+ rescue Exception => e # Exceptions are caught to be sent back to the main process
154
+ xn = nil # xn will hold the Marshal'd exception
155
+ begin
156
+ xn = Marshal.dump(e)# Try to dump the exception
157
+ rescue Exception => e # That might fail if this is a particularly exotic exception
158
+ xn = Marshal.dump(IOError.new("Failed to carry #{e.class} to main process"))
159
+ end
160
+ xn_w.print(xn) # Send that Marshal'd string version of the exception back
161
+ ensure
162
+ exit! # Don't call any at_exit methods
163
+ end
164
+ end
165
+ Process.wait(pid) # Wait for the subprocess to finish
166
+ xn_w.close # Close the write pipe
167
+ xn = xn_r.read # Read any exception
168
+ xn_r.close # Close the read pipe
169
+ raise Marshal.load(xn) if xn != '' # Raise the passed exception if it exists
170
+
171
+ rz_w.close # Close the write pipe
172
+ rz = rz_r.read # Read the result of the block
173
+ rz_r.close # Close the read pipe
174
+ Marshal.load(rz) # Return the return value
175
+ end
176
+ end
177
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: parallel_enum
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Chris Dollard
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-07-01 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A simple hello world gem
15
+ email: cjd.d01071@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/parallel_enum.rb
21
+ homepage: http://rubygems.org/gems/parallel_enum
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.11
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: Parallel Enum
45
+ test_files: []