pmap 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/Gemfile +4 -0
- data/LICENSE +13 -0
- data/README.md +51 -0
- data/Rakefile +2 -0
- data/lib/pmap.rb +47 -0
- data/lib/pmap/version.rb +3 -0
- data/pmap.gemspec +19 -0
- data/test/pmap_test.rb +60 -0
- metadata +65 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2011 Bruce Adams
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
pmap
|
2
|
+
====
|
3
|
+
|
4
|
+
This Ruby gem adds two methods to any Enumerable (notably including
|
5
|
+
any Array). The two added methods are:
|
6
|
+
|
7
|
+
* _pmap_ parallel map
|
8
|
+
* _peach_ parallel each
|
9
|
+
|
10
|
+
Threading in Ruby has limitations.
|
11
|
+
----------------------------------
|
12
|
+
|
13
|
+
Matz Ruby 1.8.* uses _green_ threads. All Ruby threads are run within
|
14
|
+
a single thread in a single process. A single Ruby program will never
|
15
|
+
use more than a single core of a mutli-core machine.
|
16
|
+
|
17
|
+
Matz Ruby 1.9.* uses _native_ threads. Each Ruby thread maps directly
|
18
|
+
to a thread in the underlying operating system. In theory, a single
|
19
|
+
Ruby program can use multpile cores. Unfortunately, there is a global
|
20
|
+
interpreter lock _GIL_ that causes single-threaded behavior.
|
21
|
+
|
22
|
+
JRuby also uses _native_ threads. JRuby avoids the global interpreter
|
23
|
+
lock, allowing a single Ruby program to really use multiple CPU cores.
|
24
|
+
|
25
|
+
Threading useful for remote IO, such as HTTP
|
26
|
+
--------------------------------------------
|
27
|
+
|
28
|
+
Despite the Matz Ruby threading limitations, IO bound actions can
|
29
|
+
greatly benefit from multi-threading. A very typical use is making
|
30
|
+
multiple HTTP requests in parallel. Issuing those requests in separate
|
31
|
+
Ruby threads means the requests will be issued very quickly, well
|
32
|
+
before the responses start coming back. As responses come back, they
|
33
|
+
will be processed as they arrive.
|
34
|
+
|
35
|
+
Example
|
36
|
+
-------
|
37
|
+
|
38
|
+
Suppose that we have a function get_quote that calls out to a stock
|
39
|
+
quote service to get a current stock price. The response time for
|
40
|
+
get_quote ranges averages 0.5 seconds.
|
41
|
+
|
42
|
+
stock_symbols = [:ibm, :goog, :appl, :msft, :hp, :orcl]
|
43
|
+
|
44
|
+
# This will take about three seconds;
|
45
|
+
# an eternity if you want to render a web page.
|
46
|
+
stock_quotes = stock_symbols.map {|s| get_quote(s)}
|
47
|
+
|
48
|
+
# Replacing "map" with "pmap" speeds it up.
|
49
|
+
# This will take about half a second;
|
50
|
+
# however long the single slowest response took.
|
51
|
+
stock_quotes = stock_symbols.pmap {|s| get_quote(s)}
|
data/Rakefile
ADDED
data/lib/pmap.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
|
2
|
+
# I'd prefer to create this as a module named "Pmap" and then poke
|
3
|
+
# "Pmap" into "Enumerable". I haven't figured out how to do it.
|
4
|
+
# So, I directly reopen "Enumerable" and add "p" methods...
|
5
|
+
|
6
|
+
require 'thread' unless defined?(Mutex)
|
7
|
+
|
8
|
+
# Global variable for the default thread pool size.
|
9
|
+
$pmap_default_thread_count ||= 64
|
10
|
+
|
11
|
+
module Enumerable
|
12
|
+
# Parallel "map" for any Enumerable.
|
13
|
+
# Requires a block of code to run for each Enumerable item.
|
14
|
+
# [thread_count] is number of threads to create. Optional.
|
15
|
+
def pmap(thread_count=nil, &proc)
|
16
|
+
raise ArgumentError, "thread_count must be at least one." unless
|
17
|
+
thread_count.nil? or (thread_count.respond_to?(:>=) and thread_count >= 1)
|
18
|
+
# This seems overly fussy... (code smell)
|
19
|
+
in_array = self.to_a # I'm not sure how expensive this is...
|
20
|
+
size = in_array.size
|
21
|
+
thread_count = [thread_count||$pmap_default_thread_count, size].min
|
22
|
+
out_array = Array.new(size)
|
23
|
+
semaphore = Mutex.new
|
24
|
+
index = -1 # Our use of index is protected by semaphore
|
25
|
+
threads = (0...thread_count).map {
|
26
|
+
Thread.new {
|
27
|
+
i = nil
|
28
|
+
while (semaphore.synchronize {i = (index += 1)}; i < size)
|
29
|
+
out_array[i] = yield(in_array[i])
|
30
|
+
end
|
31
|
+
}
|
32
|
+
}
|
33
|
+
threads.each {|t| t.join}
|
34
|
+
out_array
|
35
|
+
end
|
36
|
+
|
37
|
+
# Parallel "each" for any Enumerable.
|
38
|
+
# Requires a block of code to run for each Enumerable item.
|
39
|
+
# [thread_count] is number of threads to create. Optional.
|
40
|
+
def peach(thread_count=nil, &proc)
|
41
|
+
# This is doing some extra work: building a return array that is
|
42
|
+
# thrown away. How can I share the core code of "pmap" here and omit
|
43
|
+
# the output array creation?
|
44
|
+
pmap(thread_count, &proc)
|
45
|
+
self
|
46
|
+
end
|
47
|
+
end
|
data/lib/pmap/version.rb
ADDED
data/pmap.gemspec
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
require 'pmap/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'pmap'
|
7
|
+
s.version = Pmap::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ['Bruce Adams']
|
10
|
+
s.email = ['bruce.adams@acm.org']
|
11
|
+
s.homepage = ''
|
12
|
+
s.summary = %q{Add parallel methods into Enumerable: pmap and peach}
|
13
|
+
s.description = %q{Add parallel methods into Enumerable: pmap and peach}
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ['lib']
|
19
|
+
end
|
data/test/pmap_test.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'pmap'
|
5
|
+
|
6
|
+
class Pmap_Test < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def bad_test_noproc_range
|
9
|
+
range = (1..10)
|
10
|
+
assert_equal(range.map, range.pmap)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_basic_range
|
14
|
+
proc = Proc.new {|x| x*x}
|
15
|
+
range = (1..10)
|
16
|
+
assert_equal(range.map(&proc), range.pmap(&proc))
|
17
|
+
end
|
18
|
+
|
19
|
+
def bad_test_noproc_array
|
20
|
+
array = (1..10).to_a
|
21
|
+
assert_equal(array.map, array.pmap)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_basic_array
|
25
|
+
proc = Proc.new {|x| x*x*x}
|
26
|
+
array = (1..10).to_a
|
27
|
+
assert_equal(array.map(&proc), array.pmap(&proc))
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_time_savings
|
31
|
+
start = Time.now
|
32
|
+
(1..10).pmap{ sleep 1 }
|
33
|
+
elapsed = Time.now-start
|
34
|
+
assert(elapsed < 2, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_bad_thread_limits
|
38
|
+
assert_raise(ArgumentError) {(1..10).pmap(-1){ sleep 1 }}
|
39
|
+
assert_raise(ArgumentError) {(1..10).peach(0){ sleep 1 }}
|
40
|
+
assert_raise(ArgumentError) {(1..10).peach(0.99){ sleep 1 }}
|
41
|
+
assert_raise(ArgumentError) {(1..10).pmap('a'){ sleep 1 }}
|
42
|
+
assert_raise(ArgumentError) {(1..10).peach([1,2,3]){ sleep 1 }}
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_thread_limits
|
46
|
+
start = Time.now
|
47
|
+
(1..10).pmap(5){ sleep 1 }
|
48
|
+
elapsed = Time.now-start
|
49
|
+
assert(elapsed >= 2, 'Limited threads too fast: %.1f seconds' % elapsed)
|
50
|
+
assert(elapsed < 3, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_defaut_thread_limit
|
54
|
+
start = Time.now
|
55
|
+
(1..128).pmap{ sleep 1 }
|
56
|
+
elapsed = Time.now-start
|
57
|
+
assert(elapsed >= 2, 'Limited threads too fast: %.1f seconds' % elapsed)
|
58
|
+
assert(elapsed < 3, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pmap
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.2
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Bruce Adams
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-03-03 00:00:00 -05:00
|
14
|
+
default_executable:
|
15
|
+
dependencies: []
|
16
|
+
|
17
|
+
description: "Add parallel methods into Enumerable: pmap and peach"
|
18
|
+
email:
|
19
|
+
- bruce.adams@acm.org
|
20
|
+
executables: []
|
21
|
+
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files: []
|
25
|
+
|
26
|
+
files:
|
27
|
+
- .gitignore
|
28
|
+
- Gemfile
|
29
|
+
- LICENSE
|
30
|
+
- README.md
|
31
|
+
- Rakefile
|
32
|
+
- lib/pmap.rb
|
33
|
+
- lib/pmap/version.rb
|
34
|
+
- pmap.gemspec
|
35
|
+
- test/pmap_test.rb
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: ""
|
38
|
+
licenses: []
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.5.0
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: "Add parallel methods into Enumerable: pmap and peach"
|
64
|
+
test_files: []
|
65
|
+
|