parallel_enum 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/parallel_enum.rb +177 -0
- metadata +45 -0
@@ -0,0 +1,177 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
# Tested on Ubuntu and CentOS. Untested on Windows and OSX. The fork stuff probably won't work on Windows because Windows doesn't have Kernel#fork
|
4
|
+
|
5
|
+
class Enumerator
|
6
|
+
|
7
|
+
# threaded is like each, but uses multiple threads to speed up processing when the executed code
|
8
|
+
# contains a lot of blocking or waiting. Try benchmarking these two pieces of code:
|
9
|
+
#
|
10
|
+
# (0...50).each{|x| sleep rand*3; puts x}
|
11
|
+
# (0...50).each.threaded{|x| sleep rand*3; puts x}
|
12
|
+
#
|
13
|
+
# If any thread raises an exception, Enumerator#threaded will catch it and bring it into the main thread.
|
14
|
+
# That said, if two different threads raise two different exceptions, one will be saved while the other
|
15
|
+
# will be lost to the aether. It is not possible to predict which will be saved, so it's probably best
|
16
|
+
# to put exception handling code within the block if you plan to catch errors.
|
17
|
+
#
|
18
|
+
# Note that even though Ruby 1.9 uses real system threads in its code, it still contains a Global
|
19
|
+
# Interpreter Lock that will not allow two threads to run concurrently. Benchmark these two:
|
20
|
+
#
|
21
|
+
# (0...50).each{|x| 32000000.times{}; puts x}
|
22
|
+
# (0...50).each.threaded{|x| 32000000.times{}; puts x}
|
23
|
+
#
|
24
|
+
# The threaded version may actually run slower than the non-threaded version because of the overhead
|
25
|
+
# invloved. If you want to speed up processing code by taking advantage of multiple cores, see
|
26
|
+
# Enumerator#forked
|
27
|
+
def threaded(num_threads=8,&block)
|
28
|
+
raise ArgumentError.new("It makes no sense to call Enumerator#threaded without a block") if block.nil?
|
29
|
+
raise ArgumentError.new("num_threads must be a positive integer") unless num_threads.kind_of? Fixnum and num_threads > 0
|
30
|
+
|
31
|
+
mutex = Mutex.new # used to ensure only one thread is using the instruction and feedback pipes at a time
|
32
|
+
threads = [] # will hold the pool of threads so we can join them later
|
33
|
+
items = {} # contains items returned by self.next indexed by their object_id - used to prevent garbage collection
|
34
|
+
exception = nil # contains the exception raised by any thread
|
35
|
+
|
36
|
+
instruction_r, instruction_w = IO.pipe # Used to assign items to the threads. 'stop' is sent to terminate the thread.
|
37
|
+
feedback_r, feedback_w = IO.pipe # Used by threads to indicate completion of an item
|
38
|
+
|
39
|
+
# Alright. Let's make some threads!
|
40
|
+
num_threads.times do
|
41
|
+
threads << Thread.new do
|
42
|
+
instruction = nil # Contains a string of the next instruction - either something like '63913' or 'stop'
|
43
|
+
while true # Main evaluation loop - exited explicitly through break
|
44
|
+
mutex.synchronize{instruction = instruction_r.gets.chomp} # One thread at a time may read an instruction
|
45
|
+
break if instruction == 'stop' # Stop if told to stop
|
46
|
+
begin
|
47
|
+
block.call(ObjectSpace._id2ref(instruction.to_i)) # Call the block on the referenced item
|
48
|
+
rescue Exception => e
|
49
|
+
exception = e # Any exceptions are caught and sent to the main thread
|
50
|
+
end
|
51
|
+
mutex.synchronize{feedback_w.puts instruction} # Report completion to the main thread
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# The threads are now armed and ready to evaluate
|
57
|
+
begin
|
58
|
+
# Start by sending as many items as there are threads
|
59
|
+
num_threads.times do
|
60
|
+
item = self.next # Grab the next item
|
61
|
+
items[item.object_id] = item # Store it so it won't be garbage collected
|
62
|
+
instruction_w.puts item.object_id # Send it to the thread pool
|
63
|
+
end
|
64
|
+
# Then send items as old ones come back (break out of loop when we reach the end)
|
65
|
+
while true
|
66
|
+
index = feedback_r.gets.to_i # Wait for an item to be done
|
67
|
+
break if exception # Stop if a thread had an error
|
68
|
+
items.delete index # Delete the completed item from the item pool
|
69
|
+
item = self.next # Grab the next item
|
70
|
+
items[item.object_id] = item # Store it so it won't be garbage collected
|
71
|
+
instruction_w.puts item.object_id # Send it to the thread pool
|
72
|
+
end
|
73
|
+
rescue StopIteration # StopIteration will be raised by self.next when we reach the end of the iteration
|
74
|
+
nil
|
75
|
+
rescue Exception => e
|
76
|
+
mutex.synchronize{exception = e} if exception.nil? # Any other error will be dealt with promptly
|
77
|
+
ensure
|
78
|
+
begin
|
79
|
+
(num_threads+1).times{instruction_w.puts 'stop'} # Tell all the threads to stop
|
80
|
+
threads.each{|t| t.join} # and wait for them to stop
|
81
|
+
rescue Exception => e
|
82
|
+
mutex.synchronize{exception = e} if exception.nil? # Any error at this stage will be dealt with promptly
|
83
|
+
ensure
|
84
|
+
threads.each{|t| t.kill} # Threads should have stopped by now, but if not, they die.
|
85
|
+
[instruction_r, instruction_w, feedback_r, feedback_w ].each{|io| io.close} # Close IO
|
86
|
+
raise exception unless exception.nil? # Re-raise any errors now that the thread pool is closed
|
87
|
+
return self
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# forked is like threaded, but uses multiple process forks to speed up processing by taking advantage of
|
93
|
+
# multiple CPU cores. Note that, while this is an advantage over threaded, there are a few drawbacks:
|
94
|
+
# First, forked is not available on all platforms, though *nix systems are usually fine. Second, there
|
95
|
+
# is no inter-process mutex built into Ruby, although some libraries are available. Third, variables
|
96
|
+
# CANNOT be altered from within a fork, as forking the Ruby interpreter clones the environment.
|
97
|
+
|
98
|
+
def forked(num_forks=8,&block)
|
99
|
+
# Threading beind the scenes should create separate
|
100
|
+
# ActiveRecord connections and severely de-complicates things
|
101
|
+
self.threaded(num_forks) do |item|
|
102
|
+
xn_r, xn_w = IO.pipe # xn pipe will be used to send a Marshal'd exception back to the main process
|
103
|
+
pid = Process.fork do # Fork a new process from the thread
|
104
|
+
begin
|
105
|
+
block.call(item) # Call block
|
106
|
+
rescue Exception => e # Exceptions are caught to be sent back to the main process
|
107
|
+
xn = nil # xn will hold the Marshal'd exception
|
108
|
+
begin
|
109
|
+
xn = Marshal.dump(e)# Try to dump the exception
|
110
|
+
rescue Exception => e # That might fail if this is a particularly exotic exception
|
111
|
+
xn = Marshal.dump(IOError.new("Failed to carry #{e.class} to main process"))
|
112
|
+
end
|
113
|
+
xn_w.print(xn) # Send that Marshal'd string version of the exception back
|
114
|
+
ensure
|
115
|
+
exit! # Don't call any at_exit methods
|
116
|
+
end
|
117
|
+
end
|
118
|
+
Process.wait(pid) # Wait for the subprocess to finish
|
119
|
+
xn_w.close # Close the write pipe
|
120
|
+
xn = xn_r.read # Read any exception
|
121
|
+
xn_r.close # Close the read pipe
|
122
|
+
raise Marshal.load(xn) if xn != '' # Raise the passed exception if it exists
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
module Enumerable
|
128
|
+
# This function is identical to the map function, but uses multiple threads
|
129
|
+
# to speed up processing. See Enumerator#threaded for more information
|
130
|
+
def map_threaded(num_threads=8,&block)
|
131
|
+
result = {}
|
132
|
+
mtx = Mutex.new
|
133
|
+
self.each.with_index.threaded(num_threads) do |x, i|
|
134
|
+
r = block.call(x)
|
135
|
+
mtx.synchronize{result[i] = r}
|
136
|
+
end
|
137
|
+
return result.to_a.sort.map{|i, x| x}
|
138
|
+
end
|
139
|
+
|
140
|
+
# This function is identical to the map function, but uses multiple forks
|
141
|
+
# to speed up processing. See Enumerator#forked for more information. NOTE:
|
142
|
+
# Since most variables are not shared between processes, I had to rely on
|
143
|
+
# I/O to send the block's return values back. Because of this, the block's
|
144
|
+
# return value must be Marshal-able into a string.
|
145
|
+
def map_forked(num_forks=8,&block)
|
146
|
+
return self.map_threaded(num_forks) do |item|
|
147
|
+
xn_r, xn_w = IO.pipe # xn pipe will be used to send a Marshal'd exception back to the main process
|
148
|
+
rz_r, rz_w = IO.pipe # rz pipe will be used to send the block's return value back to the main process
|
149
|
+
pid = Process.fork do # Fork a new process from the thread
|
150
|
+
begin
|
151
|
+
r = block.call(item) # Call block
|
152
|
+
rz_w.print(Marshal.dump(r)) # Send the result back
|
153
|
+
rescue Exception => e # Exceptions are caught to be sent back to the main process
|
154
|
+
xn = nil # xn will hold the Marshal'd exception
|
155
|
+
begin
|
156
|
+
xn = Marshal.dump(e)# Try to dump the exception
|
157
|
+
rescue Exception => e # That might fail if this is a particularly exotic exception
|
158
|
+
xn = Marshal.dump(IOError.new("Failed to carry #{e.class} to main process"))
|
159
|
+
end
|
160
|
+
xn_w.print(xn) # Send that Marshal'd string version of the exception back
|
161
|
+
ensure
|
162
|
+
exit! # Don't call any at_exit methods
|
163
|
+
end
|
164
|
+
end
|
165
|
+
Process.wait(pid) # Wait for the subprocess to finish
|
166
|
+
xn_w.close # Close the write pipe
|
167
|
+
xn = xn_r.read # Read any exception
|
168
|
+
xn_r.close # Close the read pipe
|
169
|
+
raise Marshal.load(xn) if xn != '' # Raise the passed exception if it exists
|
170
|
+
|
171
|
+
rz_w.close # Close the write pipe
|
172
|
+
rz = rz_r.read # Read the result of the block
|
173
|
+
rz_r.close # Close the read pipe
|
174
|
+
Marshal.load(rz) # Return the return value
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: parallel_enum
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Chris Dollard
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-07-01 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: A simple hello world gem
|
15
|
+
email: cjd.d01071@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/parallel_enum.rb
|
21
|
+
homepage: http://rubygems.org/gems/parallel_enum
|
22
|
+
licenses: []
|
23
|
+
post_install_message:
|
24
|
+
rdoc_options: []
|
25
|
+
require_paths:
|
26
|
+
- lib
|
27
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 1.8.11
|
42
|
+
signing_key:
|
43
|
+
specification_version: 3
|
44
|
+
summary: Parallel Enum
|
45
|
+
test_files: []
|