pmap 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/Gemfile +4 -0
- data/LICENSE +13 -0
- data/README.md +51 -0
- data/Rakefile +2 -0
- data/lib/pmap.rb +47 -0
- data/lib/pmap/version.rb +3 -0
- data/pmap.gemspec +19 -0
- data/test/pmap_test.rb +60 -0
- metadata +65 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2011 Bruce Adams
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
pmap
|
2
|
+
====
|
3
|
+
|
4
|
+
This Ruby gem adds two methods to any Enumerable (notably including
|
5
|
+
any Array). The two added methods are:
|
6
|
+
|
7
|
+
* _pmap_ parallel map
|
8
|
+
* _peach_ parallel each
|
9
|
+
|
10
|
+
Threading in Ruby has limitations.
|
11
|
+
----------------------------------
|
12
|
+
|
13
|
+
Matz Ruby 1.8.* uses _green_ threads. All Ruby threads are run within
|
14
|
+
a single thread in a single process. A single Ruby program will never
|
15
|
+
use more than a single core of a mutli-core machine.
|
16
|
+
|
17
|
+
Matz Ruby 1.9.* uses _native_ threads. Each Ruby thread maps directly
|
18
|
+
to a thread in the underlying operating system. In theory, a single
|
19
|
+
Ruby program can use multpile cores. Unfortunately, there is a global
|
20
|
+
interpreter lock _GIL_ that causes single-threaded behavior.
|
21
|
+
|
22
|
+
JRuby also uses _native_ threads. JRuby avoids the global interpreter
|
23
|
+
lock, allowing a single Ruby program to really use multiple CPU cores.
|
24
|
+
|
25
|
+
Threading useful for remote IO, such as HTTP
|
26
|
+
--------------------------------------------
|
27
|
+
|
28
|
+
Despite the Matz Ruby threading limitations, IO bound actions can
|
29
|
+
greatly benefit from multi-threading. A very typical use is making
|
30
|
+
multiple HTTP requests in parallel. Issuing those requests in separate
|
31
|
+
Ruby threads means the requests will be issued very quickly, well
|
32
|
+
before the responses start coming back. As responses come back, they
|
33
|
+
will be processed as they arrive.
|
34
|
+
|
35
|
+
Example
|
36
|
+
-------
|
37
|
+
|
38
|
+
Suppose that we have a function get_quote that calls out to a stock
|
39
|
+
quote service to get a current stock price. The response time for
|
40
|
+
get_quote ranges averages 0.5 seconds.
|
41
|
+
|
42
|
+
stock_symbols = [:ibm, :goog, :appl, :msft, :hp, :orcl]
|
43
|
+
|
44
|
+
# This will take about three seconds;
|
45
|
+
# an eternity if you want to render a web page.
|
46
|
+
stock_quotes = stock_symbols.map {|s| get_quote(s)}
|
47
|
+
|
48
|
+
# Replacing "map" with "pmap" speeds it up.
|
49
|
+
# This will take about half a second;
|
50
|
+
# however long the single slowest response took.
|
51
|
+
stock_quotes = stock_symbols.pmap {|s| get_quote(s)}
|
data/Rakefile
ADDED
data/lib/pmap.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
|
2
|
+
# I'd prefer to create this as a module named "Pmap" and then poke
|
3
|
+
# "Pmap" into "Enumerable". I haven't figured out how to do it.
|
4
|
+
# So, I directly reopen "Enumerable" and add "p" methods...
|
5
|
+
|
6
|
+
require 'thread' unless defined?(Mutex)
|
7
|
+
|
8
|
+
# Global variable for the default thread pool size.
|
9
|
+
$pmap_default_thread_count ||= 64
|
10
|
+
|
11
|
+
module Enumerable
|
12
|
+
# Parallel "map" for any Enumerable.
|
13
|
+
# Requires a block of code to run for each Enumerable item.
|
14
|
+
# [thread_count] is number of threads to create. Optional.
|
15
|
+
def pmap(thread_count=nil, &proc)
|
16
|
+
raise ArgumentError, "thread_count must be at least one." unless
|
17
|
+
thread_count.nil? or (thread_count.respond_to?(:>=) and thread_count >= 1)
|
18
|
+
# This seems overly fussy... (code smell)
|
19
|
+
in_array = self.to_a # I'm not sure how expensive this is...
|
20
|
+
size = in_array.size
|
21
|
+
thread_count = [thread_count||$pmap_default_thread_count, size].min
|
22
|
+
out_array = Array.new(size)
|
23
|
+
semaphore = Mutex.new
|
24
|
+
index = -1 # Our use of index is protected by semaphore
|
25
|
+
threads = (0...thread_count).map {
|
26
|
+
Thread.new {
|
27
|
+
i = nil
|
28
|
+
while (semaphore.synchronize {i = (index += 1)}; i < size)
|
29
|
+
out_array[i] = yield(in_array[i])
|
30
|
+
end
|
31
|
+
}
|
32
|
+
}
|
33
|
+
threads.each {|t| t.join}
|
34
|
+
out_array
|
35
|
+
end
|
36
|
+
|
37
|
+
# Parallel "each" for any Enumerable.
|
38
|
+
# Requires a block of code to run for each Enumerable item.
|
39
|
+
# [thread_count] is number of threads to create. Optional.
|
40
|
+
def peach(thread_count=nil, &proc)
|
41
|
+
# This is doing some extra work: building a return array that is
|
42
|
+
# thrown away. How can I share the core code of "pmap" here and omit
|
43
|
+
# the output array creation?
|
44
|
+
pmap(thread_count, &proc)
|
45
|
+
self
|
46
|
+
end
|
47
|
+
end
|
data/lib/pmap/version.rb
ADDED
data/pmap.gemspec
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
require 'pmap/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'pmap'
|
7
|
+
s.version = Pmap::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ['Bruce Adams']
|
10
|
+
s.email = ['bruce.adams@acm.org']
|
11
|
+
s.homepage = ''
|
12
|
+
s.summary = %q{Add parallel methods into Enumerable: pmap and peach}
|
13
|
+
s.description = %q{Add parallel methods into Enumerable: pmap and peach}
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ['lib']
|
19
|
+
end
|
data/test/pmap_test.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'pmap'
|
5
|
+
|
6
|
+
class Pmap_Test < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def bad_test_noproc_range
|
9
|
+
range = (1..10)
|
10
|
+
assert_equal(range.map, range.pmap)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_basic_range
|
14
|
+
proc = Proc.new {|x| x*x}
|
15
|
+
range = (1..10)
|
16
|
+
assert_equal(range.map(&proc), range.pmap(&proc))
|
17
|
+
end
|
18
|
+
|
19
|
+
def bad_test_noproc_array
|
20
|
+
array = (1..10).to_a
|
21
|
+
assert_equal(array.map, array.pmap)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_basic_array
|
25
|
+
proc = Proc.new {|x| x*x*x}
|
26
|
+
array = (1..10).to_a
|
27
|
+
assert_equal(array.map(&proc), array.pmap(&proc))
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_time_savings
|
31
|
+
start = Time.now
|
32
|
+
(1..10).pmap{ sleep 1 }
|
33
|
+
elapsed = Time.now-start
|
34
|
+
assert(elapsed < 2, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_bad_thread_limits
|
38
|
+
assert_raise(ArgumentError) {(1..10).pmap(-1){ sleep 1 }}
|
39
|
+
assert_raise(ArgumentError) {(1..10).peach(0){ sleep 1 }}
|
40
|
+
assert_raise(ArgumentError) {(1..10).peach(0.99){ sleep 1 }}
|
41
|
+
assert_raise(ArgumentError) {(1..10).pmap('a'){ sleep 1 }}
|
42
|
+
assert_raise(ArgumentError) {(1..10).peach([1,2,3]){ sleep 1 }}
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_thread_limits
|
46
|
+
start = Time.now
|
47
|
+
(1..10).pmap(5){ sleep 1 }
|
48
|
+
elapsed = Time.now-start
|
49
|
+
assert(elapsed >= 2, 'Limited threads too fast: %.1f seconds' % elapsed)
|
50
|
+
assert(elapsed < 3, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_defaut_thread_limit
|
54
|
+
start = Time.now
|
55
|
+
(1..128).pmap{ sleep 1 }
|
56
|
+
elapsed = Time.now-start
|
57
|
+
assert(elapsed >= 2, 'Limited threads too fast: %.1f seconds' % elapsed)
|
58
|
+
assert(elapsed < 3, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pmap
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.2
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Bruce Adams
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-03-03 00:00:00 -05:00
|
14
|
+
default_executable:
|
15
|
+
dependencies: []
|
16
|
+
|
17
|
+
description: "Add parallel methods into Enumerable: pmap and peach"
|
18
|
+
email:
|
19
|
+
- bruce.adams@acm.org
|
20
|
+
executables: []
|
21
|
+
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files: []
|
25
|
+
|
26
|
+
files:
|
27
|
+
- .gitignore
|
28
|
+
- Gemfile
|
29
|
+
- LICENSE
|
30
|
+
- README.md
|
31
|
+
- Rakefile
|
32
|
+
- lib/pmap.rb
|
33
|
+
- lib/pmap/version.rb
|
34
|
+
- pmap.gemspec
|
35
|
+
- test/pmap_test.rb
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: ""
|
38
|
+
licenses: []
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.5.0
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: "Add parallel methods into Enumerable: pmap and peach"
|
64
|
+
test_files: []
|
65
|
+
|