pmap 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ *~
2
+ *.gem
3
+ .bundle
4
+ .rvmrc
5
+ Gemfile.lock
6
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in pmap.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright 2011 Bruce Adams
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,51 @@
1
+ pmap
2
+ ====
3
+
4
+ This Ruby gem adds two methods to any Enumerable (notably including
5
+ any Array). The two added methods are:
6
+
7
+ * _pmap_ parallel map
8
+ * _peach_ parallel each
9
+
10
+ Threading in Ruby has limitations.
11
+ ----------------------------------
12
+
13
+ Matz Ruby 1.8.* uses _green_ threads. All Ruby threads are run within
14
+ a single thread in a single process. A single Ruby program will never
15
+ use more than a single core of a mutli-core machine.
16
+
17
+ Matz Ruby 1.9.* uses _native_ threads. Each Ruby thread maps directly
18
+ to a thread in the underlying operating system. In theory, a single
19
+ Ruby program can use multpile cores. Unfortunately, there is a global
20
+ interpreter lock _GIL_ that causes single-threaded behavior.
21
+
22
+ JRuby also uses _native_ threads. JRuby avoids the global interpreter
23
+ lock, allowing a single Ruby program to really use multiple CPU cores.
24
+
25
+ Threading useful for remote IO, such as HTTP
26
+ --------------------------------------------
27
+
28
+ Despite the Matz Ruby threading limitations, IO bound actions can
29
+ greatly benefit from multi-threading. A very typical use is making
30
+ multiple HTTP requests in parallel. Issuing those requests in separate
31
+ Ruby threads means the requests will be issued very quickly, well
32
+ before the responses start coming back. As responses come back, they
33
+ will be processed as they arrive.
34
+
35
+ Example
36
+ -------
37
+
38
+ Suppose that we have a function get_quote that calls out to a stock
39
+ quote service to get a current stock price. The response time for
40
+ get_quote ranges averages 0.5 seconds.
41
+
42
+ stock_symbols = [:ibm, :goog, :appl, :msft, :hp, :orcl]
43
+
44
+ # This will take about three seconds;
45
+ # an eternity if you want to render a web page.
46
+ stock_quotes = stock_symbols.map {|s| get_quote(s)}
47
+
48
+ # Replacing "map" with "pmap" speeds it up.
49
+ # This will take about half a second;
50
+ # however long the single slowest response took.
51
+ stock_quotes = stock_symbols.pmap {|s| get_quote(s)}
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,47 @@
1
+
2
+ # I'd prefer to create this as a module named "Pmap" and then poke
3
+ # "Pmap" into "Enumerable". I haven't figured out how to do it.
4
+ # So, I directly reopen "Enumerable" and add "p" methods...
5
+
6
+ require 'thread' unless defined?(Mutex)
7
+
8
+ # Global variable for the default thread pool size.
9
+ $pmap_default_thread_count ||= 64
10
+
11
+ module Enumerable
12
+ # Parallel "map" for any Enumerable.
13
+ # Requires a block of code to run for each Enumerable item.
14
+ # [thread_count] is number of threads to create. Optional.
15
+ def pmap(thread_count=nil, &proc)
16
+ raise ArgumentError, "thread_count must be at least one." unless
17
+ thread_count.nil? or (thread_count.respond_to?(:>=) and thread_count >= 1)
18
+ # This seems overly fussy... (code smell)
19
+ in_array = self.to_a # I'm not sure how expensive this is...
20
+ size = in_array.size
21
+ thread_count = [thread_count||$pmap_default_thread_count, size].min
22
+ out_array = Array.new(size)
23
+ semaphore = Mutex.new
24
+ index = -1 # Our use of index is protected by semaphore
25
+ threads = (0...thread_count).map {
26
+ Thread.new {
27
+ i = nil
28
+ while (semaphore.synchronize {i = (index += 1)}; i < size)
29
+ out_array[i] = yield(in_array[i])
30
+ end
31
+ }
32
+ }
33
+ threads.each {|t| t.join}
34
+ out_array
35
+ end
36
+
37
+ # Parallel "each" for any Enumerable.
38
+ # Requires a block of code to run for each Enumerable item.
39
+ # [thread_count] is number of threads to create. Optional.
40
+ def peach(thread_count=nil, &proc)
41
+ # This is doing some extra work: building a return array that is
42
+ # thrown away. How can I share the core code of "pmap" here and omit
43
+ # the output array creation?
44
+ pmap(thread_count, &proc)
45
+ self
46
+ end
47
+ end
@@ -0,0 +1,3 @@
1
+ module Pmap
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+ require 'pmap/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'pmap'
7
+ s.version = Pmap::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ['Bruce Adams']
10
+ s.email = ['bruce.adams@acm.org']
11
+ s.homepage = ''
12
+ s.summary = %q{Add parallel methods into Enumerable: pmap and peach}
13
+ s.description = %q{Add parallel methods into Enumerable: pmap and peach}
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ['lib']
19
+ end
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'pmap'
5
+
6
+ class Pmap_Test < Test::Unit::TestCase
7
+
8
+ def bad_test_noproc_range
9
+ range = (1..10)
10
+ assert_equal(range.map, range.pmap)
11
+ end
12
+
13
+ def test_basic_range
14
+ proc = Proc.new {|x| x*x}
15
+ range = (1..10)
16
+ assert_equal(range.map(&proc), range.pmap(&proc))
17
+ end
18
+
19
+ def bad_test_noproc_array
20
+ array = (1..10).to_a
21
+ assert_equal(array.map, array.pmap)
22
+ end
23
+
24
+ def test_basic_array
25
+ proc = Proc.new {|x| x*x*x}
26
+ array = (1..10).to_a
27
+ assert_equal(array.map(&proc), array.pmap(&proc))
28
+ end
29
+
30
+ def test_time_savings
31
+ start = Time.now
32
+ (1..10).pmap{ sleep 1 }
33
+ elapsed = Time.now-start
34
+ assert(elapsed < 2, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
35
+ end
36
+
37
+ def test_bad_thread_limits
38
+ assert_raise(ArgumentError) {(1..10).pmap(-1){ sleep 1 }}
39
+ assert_raise(ArgumentError) {(1..10).peach(0){ sleep 1 }}
40
+ assert_raise(ArgumentError) {(1..10).peach(0.99){ sleep 1 }}
41
+ assert_raise(ArgumentError) {(1..10).pmap('a'){ sleep 1 }}
42
+ assert_raise(ArgumentError) {(1..10).peach([1,2,3]){ sleep 1 }}
43
+ end
44
+
45
+ def test_thread_limits
46
+ start = Time.now
47
+ (1..10).pmap(5){ sleep 1 }
48
+ elapsed = Time.now-start
49
+ assert(elapsed >= 2, 'Limited threads too fast: %.1f seconds' % elapsed)
50
+ assert(elapsed < 3, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
51
+ end
52
+
53
+ def test_defaut_thread_limit
54
+ start = Time.now
55
+ (1..128).pmap{ sleep 1 }
56
+ elapsed = Time.now-start
57
+ assert(elapsed >= 2, 'Limited threads too fast: %.1f seconds' % elapsed)
58
+ assert(elapsed < 3, 'Parallel sleeps too slow: %.1f seconds' % elapsed)
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pmap
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.2
6
+ platform: ruby
7
+ authors:
8
+ - Bruce Adams
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-03-03 00:00:00 -05:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description: "Add parallel methods into Enumerable: pmap and peach"
18
+ email:
19
+ - bruce.adams@acm.org
20
+ executables: []
21
+
22
+ extensions: []
23
+
24
+ extra_rdoc_files: []
25
+
26
+ files:
27
+ - .gitignore
28
+ - Gemfile
29
+ - LICENSE
30
+ - README.md
31
+ - Rakefile
32
+ - lib/pmap.rb
33
+ - lib/pmap/version.rb
34
+ - pmap.gemspec
35
+ - test/pmap_test.rb
36
+ has_rdoc: true
37
+ homepage: ""
38
+ licenses: []
39
+
40
+ post_install_message:
41
+ rdoc_options: []
42
+
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.5.0
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: "Add parallel methods into Enumerable: pmap and peach"
64
+ test_files: []
65
+