parallel_enum 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/parallel_enum.rb +177 -0
  2. metadata +45 -0
@@ -0,0 +1,177 @@
1
+ require 'thread'
2
+
3
+ # Tested on Ubuntu and CentOS. Untested on Windows and OSX. The fork stuff probably won't work on Windows because Windows doesn't have Kernel#fork
4
+
5
+ class Enumerator
6
+
7
+ # threaded is like each, but uses multiple threads to speed up processing when the executed code
8
+ # contains a lot of blocking or waiting. Try benchmarking these two pieces of code:
9
+ #
10
+ # (0...50).each{|x| sleep rand*3; puts x}
11
+ # (0...50).each.threaded{|x| sleep rand*3; puts x}
12
+ #
13
+ # If any thread raises an exception, Enumerator#threaded will catch it and bring it into the main thread.
14
+ # That said, if two different threads raise two different exceptions, one will be saved while the other
15
+ # will be lost to the aether. It is not possible to predict which will be saved, so it's probably best
16
+ # to put exception handling code within the block if you plan to catch errors.
17
+ #
18
+ # Note that even though Ruby 1.9 uses real system threads in its code, it still contains a Global
19
+ # Interpreter Lock that will not allow two threads to run concurrently. Benchmark these two:
20
+ #
21
+ # (0...50).each{|x| 32000000.times{}; puts x}
22
+ # (0...50).each.threaded{|x| 32000000.times{}; puts x}
23
+ #
24
+ # The threaded version may actually run slower than the non-threaded version because of the overhead
25
+ # invloved. If you want to speed up processing code by taking advantage of multiple cores, see
26
+ # Enumerator#forked
27
+ def threaded(num_threads=8,&block)
28
+ raise ArgumentError.new("It makes no sense to call Enumerator#threaded without a block") if block.nil?
29
+ raise ArgumentError.new("num_threads must be a positive integer") unless num_threads.kind_of? Fixnum and num_threads > 0
30
+
31
+ mutex = Mutex.new # used to ensure only one thread is using the instruction and feedback pipes at a time
32
+ threads = [] # will hold the pool of threads so we can join them later
33
+ items = {} # contains items returned by self.next indexed by their object_id - used to prevent garbage collection
34
+ exception = nil # contains the exception raised by any thread
35
+
36
+ instruction_r, instruction_w = IO.pipe # Used to assign items to the threads. 'stop' is sent to terminate the thread.
37
+ feedback_r, feedback_w = IO.pipe # Used by threads to indicate completion of an item
38
+
39
+ # Alright. Let's make some threads!
40
+ num_threads.times do
41
+ threads << Thread.new do
42
+ instruction = nil # Contains a string of the next instruction - either something like '63913' or 'stop'
43
+ while true # Main evaluation loop - exited explicitly through break
44
+ mutex.synchronize{instruction = instruction_r.gets.chomp} # One thread at a time may read an instruction
45
+ break if instruction == 'stop' # Stop if told to stop
46
+ begin
47
+ block.call(ObjectSpace._id2ref(instruction.to_i)) # Call the block on the referenced item
48
+ rescue Exception => e
49
+ exception = e # Any exceptions are caught and sent to the main thread
50
+ end
51
+ mutex.synchronize{feedback_w.puts instruction} # Report completion to the main thread
52
+ end
53
+ end
54
+ end
55
+
56
+ # The threads are now armed and ready to evaluate
57
+ begin
58
+ # Start by sending as many items as there are threads
59
+ num_threads.times do
60
+ item = self.next # Grab the next item
61
+ items[item.object_id] = item # Store it so it won't be garbage collected
62
+ instruction_w.puts item.object_id # Send it to the thread pool
63
+ end
64
+ # Then send items as old ones come back (break out of loop when we reach the end)
65
+ while true
66
+ index = feedback_r.gets.to_i # Wait for an item to be done
67
+ break if exception # Stop if a thread had an error
68
+ items.delete index # Delete the completed item from the item pool
69
+ item = self.next # Grab the next item
70
+ items[item.object_id] = item # Store it so it won't be garbage collected
71
+ instruction_w.puts item.object_id # Send it to the thread pool
72
+ end
73
+ rescue StopIteration # StopIteration will be raised by self.next when we reach the end of the iteration
74
+ nil
75
+ rescue Exception => e
76
+ mutex.synchronize{exception = e} if exception.nil? # Any other error will be dealt with promptly
77
+ ensure
78
+ begin
79
+ (num_threads+1).times{instruction_w.puts 'stop'} # Tell all the threads to stop
80
+ threads.each{|t| t.join} # and wait for them to stop
81
+ rescue Exception => e
82
+ mutex.synchronize{exception = e} if exception.nil? # Any error at this stage will be dealt with promptly
83
+ ensure
84
+ threads.each{|t| t.kill} # Threads should have stopped by now, but if not, they die.
85
+ [instruction_r, instruction_w, feedback_r, feedback_w ].each{|io| io.close} # Close IO
86
+ raise exception unless exception.nil? # Re-raise any errors now that the thread pool is closed
87
+ return self
88
+ end
89
+ end
90
+ end
91
+
92
+ # forked is like threaded, but uses multiple process forks to speed up processing by taking advantage of
93
+ # multiple CPU cores. Note that, while this is an advantage over threaded, there are a few drawbacks:
94
+ # First, forked is not available on all platforms, though *nix systems are usually fine. Second, there
95
+ # is no inter-process mutex built into Ruby, although some libraries are available. Third, variables
96
+ # CANNOT be altered from within a fork, as forking the Ruby interpreter clones the environment.
97
+
98
+ def forked(num_forks=8,&block)
99
+ # Threading beind the scenes should create separate
100
+ # ActiveRecord connections and severely de-complicates things
101
+ self.threaded(num_forks) do |item|
102
+ xn_r, xn_w = IO.pipe # xn pipe will be used to send a Marshal'd exception back to the main process
103
+ pid = Process.fork do # Fork a new process from the thread
104
+ begin
105
+ block.call(item) # Call block
106
+ rescue Exception => e # Exceptions are caught to be sent back to the main process
107
+ xn = nil # xn will hold the Marshal'd exception
108
+ begin
109
+ xn = Marshal.dump(e)# Try to dump the exception
110
+ rescue Exception => e # That might fail if this is a particularly exotic exception
111
+ xn = Marshal.dump(IOError.new("Failed to carry #{e.class} to main process"))
112
+ end
113
+ xn_w.print(xn) # Send that Marshal'd string version of the exception back
114
+ ensure
115
+ exit! # Don't call any at_exit methods
116
+ end
117
+ end
118
+ Process.wait(pid) # Wait for the subprocess to finish
119
+ xn_w.close # Close the write pipe
120
+ xn = xn_r.read # Read any exception
121
+ xn_r.close # Close the read pipe
122
+ raise Marshal.load(xn) if xn != '' # Raise the passed exception if it exists
123
+ end
124
+ end
125
+ end
126
+
127
+ module Enumerable
128
+ # This function is identical to the map function, but uses multiple threads
129
+ # to speed up processing. See Enumerator#threaded for more information
130
+ def map_threaded(num_threads=8,&block)
131
+ result = {}
132
+ mtx = Mutex.new
133
+ self.each.with_index.threaded(num_threads) do |x, i|
134
+ r = block.call(x)
135
+ mtx.synchronize{result[i] = r}
136
+ end
137
+ return result.to_a.sort.map{|i, x| x}
138
+ end
139
+
140
+ # This function is identical to the map function, but uses multiple forks
141
+ # to speed up processing. See Enumerator#forked for more information. NOTE:
142
+ # Since most variables are not shared between processes, I had to rely on
143
+ # I/O to send the block's return values back. Because of this, the block's
144
+ # return value must be Marshal-able into a string.
145
+ def map_forked(num_forks=8,&block)
146
+ return self.map_threaded(num_forks) do |item|
147
+ xn_r, xn_w = IO.pipe # xn pipe will be used to send a Marshal'd exception back to the main process
148
+ rz_r, rz_w = IO.pipe # rz pipe will be used to send the block's return value back to the main process
149
+ pid = Process.fork do # Fork a new process from the thread
150
+ begin
151
+ r = block.call(item) # Call block
152
+ rz_w.print(Marshal.dump(r)) # Send the result back
153
+ rescue Exception => e # Exceptions are caught to be sent back to the main process
154
+ xn = nil # xn will hold the Marshal'd exception
155
+ begin
156
+ xn = Marshal.dump(e)# Try to dump the exception
157
+ rescue Exception => e # That might fail if this is a particularly exotic exception
158
+ xn = Marshal.dump(IOError.new("Failed to carry #{e.class} to main process"))
159
+ end
160
+ xn_w.print(xn) # Send that Marshal'd string version of the exception back
161
+ ensure
162
+ exit! # Don't call any at_exit methods
163
+ end
164
+ end
165
+ Process.wait(pid) # Wait for the subprocess to finish
166
+ xn_w.close # Close the write pipe
167
+ xn = xn_r.read # Read any exception
168
+ xn_r.close # Close the read pipe
169
+ raise Marshal.load(xn) if xn != '' # Raise the passed exception if it exists
170
+
171
+ rz_w.close # Close the write pipe
172
+ rz = rz_r.read # Read the result of the block
173
+ rz_r.close # Close the read pipe
174
+ Marshal.load(rz) # Return the return value
175
+ end
176
+ end
177
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: parallel_enum
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Chris Dollard
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-07-01 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A simple hello world gem
15
+ email: cjd.d01071@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/parallel_enum.rb
21
+ homepage: http://rubygems.org/gems/parallel_enum
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.11
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: Parallel Enum
45
+ test_files: []