pezra-parallel_each 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.txt ADDED
@@ -0,0 +1,58 @@
1
+ = paralleleach
2
+
3
+ * http://github.com/pezra/parallel_each
4
+
5
+ == DESCRIPTION:
6
+
7
+ This is a set of Enumerable like methods that perform the blocks in parallel.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * FIX (list of features or problems)
12
+
13
+ == SYNOPSIS:
14
+
15
+ require 'rubygems'
16
+ require 'parallel_each'
17
+
18
+ (1..1000).p_each(15) do |i|
19
+ # do something with i
20
+ end
21
+
22
+ This is much like Enumerable#each except that the block would be
23
+ executed in 15 different threads simultaneously. On the green
24
+ threaded MRI this can result in significant performance gains if the
25
+ block is IO bound. On native thread implementation the gains might be
26
+ significant for CPU bound operations.
27
+
28
+ == REQUIREMENTS:
29
+
30
+
31
+ == INSTALL:
32
+
33
+ * sudo gem install parallel_each
34
+
35
+ == LICENSE:
36
+
37
+ (The MIT License)
38
+
39
+ Copyright (c) 2008 Peter Williams
40
+
41
+ Permission is hereby granted, free of charge, to any person obtaining
42
+ a copy of this software and associated documentation files (the
43
+ 'Software'), to deal in the Software without restriction, including
44
+ without limitation the rights to use, copy, modify, merge, publish,
45
+ distribute, sublicense, and/or sell copies of the Software, and to
46
+ permit persons to whom the Software is furnished to do so, subject to
47
+ the following conditions:
48
+
49
+ The above copyright notice and this permission notice shall be
50
+ included in all copies or substantial portions of the Software.
51
+
52
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
53
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
54
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
55
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
56
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
57
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
58
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,86 @@
1
+ class AsyncTaskError < Exception
2
+ attr_reader :cause
3
+
4
+ def initialize(exception)
5
+ @cause = exception
6
+ super("Worker error: #{cause.message} (#{cause.class.name})")
7
+ end
8
+ end
9
+
10
+
11
+ class BoundedAsyncTaskRunner
12
+ attr_reader :max_workers
13
+
14
+ # Create a new BoundedAsyncTaskRunner.
15
+ #
16
+ # @param [Integer] max_workers the maximum number of asynchronous
17
+ # tasks to simultaneously execute.
18
+ def initialize(max_workers=20)
19
+ @threads = Array.new
20
+ @max_workers = max_workers
21
+ end
22
+
23
+ # Execute the provided block asynchronously.
24
+ #
25
+ # @param *args Objects to yield to the provided block when it is
26
+ # invoked.
27
+ #
28
+ # @yield The task to execute asynchronously.
29
+ #
30
+ # @yieldparam *args The arguments passed into this method.
31
+ #
32
+ # @raise AsyncTaskError If any previously executed task has failed
33
+ # (i.e. raised an error) an AsyncTaskError will be raised instead
34
+ # of executing the async task.
35
+ def do(*args, &block)
36
+ purge_dead
37
+ until @threads.size < max_workers
38
+ purge_dead
39
+ break if @threads.size < max_workers
40
+ # We have still have the maximum number of active threads so we
41
+ # need to wait for one to finish.
42
+
43
+ @threads.first.join(0.1) # Join on the oldest thread because it will probably finish first.
44
+ end
45
+ # We can start new thread without exceeding the threshold
46
+
47
+ thread_args = [block] + args
48
+
49
+ @threads << Thread.start([block] + args) do |args|
50
+ task = args.shift
51
+
52
+ begin
53
+ task.call(*args)
54
+ rescue Exception => e
55
+ AsyncTaskError.new(e)
56
+ end
57
+ end
58
+ end
59
+ alias :do_async :do
60
+
61
+
62
+ # Blocks until all currently running tasks to complete.
63
+ #
64
+ # @raise AsyncTaskError If any previously executed task has failed
65
+ # (i.e. raised an error) an AsyncTaskError will be raised.
66
+ def wait_for_all_to_finish
67
+ until @threads.empty?
68
+ @threads.first.join
69
+ purge_dead
70
+ end
71
+ end
72
+
73
+ private
74
+ def purge_dead
75
+ @threads.each do |t|
76
+ next if t.alive? # ignore the alive ones
77
+
78
+ if t.value.kind_of?(AsyncTaskError)
79
+ raise(t.value)
80
+ else
81
+ @threads.delete(t)
82
+ end
83
+ end
84
+ end
85
+
86
+ end
@@ -0,0 +1,14 @@
1
+ require 'pathname'
2
+ require Pathname(__FILE__).dirname + './bounded_async_task_runner'
3
+
4
+ module Enumerable
5
+ def p_each(max_tasks=20, &block)
6
+ runner = BoundedAsyncTaskRunner.new(max_tasks)
7
+
8
+ each do |item|
9
+ runner.do(item, &block)
10
+ end
11
+
12
+ runnder.wait_for_all_to_finish
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "parallel_each"
3
+ s.version = "0.1.1"
4
+ s.date = "2008-11-05"
5
+ s.summary = "Parallelized Enumerable methods"
6
+ s.email = "pezra@barelyenough.org"
7
+ s.homepage = "http://github.com/pezra/parallel_each"
8
+ s.description = "Provides enumerable methods with a configurable level of parallelism. Suitable for parallelizing non-trivial operations on large Enumerables."
9
+ s.has_rdoc = true
10
+ s.authors = ["Peter Williams"]
11
+ s.files = ["README.txt",
12
+ "parallel_each.gemspec",
13
+ "lib/bounded_async_task_runner.rb",
14
+ "lib/parallel_each.rb"]
15
+ s.test_files = ["spec/bounded_async_task_runner_spec.rb",
16
+ "spec/spec_helper.rb"]
17
+ s.rdoc_options = ["--main", "README.txt"]
18
+ s.extra_rdoc_files = ["README.txt"]
19
+ end
@@ -0,0 +1,65 @@
1
+ require 'pathname'
2
+ require Pathname(__FILE__).dirname + 'spec_helper'
3
+
4
+ require 'bounded_async_task_runner'
5
+
6
+ describe BoundedAsyncTaskRunner do
7
+ before do
8
+ @runner = BoundedAsyncTaskRunner.new(5)
9
+ end
10
+
11
+ describe "#do(&block)" do
12
+ it "should run tasks" do
13
+ out = Array.new
14
+
15
+ @runner.do(out) {|out| out << :marker}
16
+ @runner.wait_for_all_to_finish
17
+
18
+ out.should == [:marker]
19
+ end
20
+
21
+ it "should run jobs as the same time" do
22
+ out = Array.new
23
+
24
+ @runner.do(out) {|out| sleep(0.1); out << :a}
25
+ @runner.do(out) {|out| out << :b}
26
+
27
+ @runner.wait_for_all_to_finish
28
+ out.should == [:b, :a]
29
+ end
30
+
31
+ it "should raise exception if any tasks fail" do
32
+ lambda {
33
+ @runner.do { raise ArgumentError, "testing" }
34
+ @runner.wait_for_all_to_finish
35
+ }.should raise_error(AsyncTaskError, "Worker error: testing (ArgumentError)")
36
+ end
37
+
38
+ it "should included exception raised by task AsyncTaskError exception" do
39
+ exception = ArgumentError.new("testing")
40
+ @runner.do(exception) { raise(exception) }
41
+ begin
42
+ @runner.wait_for_all_to_finish
43
+ rescue AsyncTaskError => e
44
+ e.cause.should == exception
45
+ end
46
+ end
47
+
48
+ it "should not run more than the max number of tasks at any one moment" do
49
+ # I don't know how to test this...
50
+ end
51
+ end
52
+
53
+ describe "#wait_for_all_to_finish" do
54
+ it "should not return until all tasks are finished" do
55
+ out = Array.new
56
+
57
+ 3.times do
58
+ @runner.do(out) {|out| sleep(0.1); out << :a}
59
+ end
60
+ @runner.wait_for_all_to_finish
61
+
62
+ out.should == [:a, :a, :a]
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ require 'spec'
4
+
5
+ $LOAD_PATH << Pathname(__FILE__).dirname + '../lib'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pezra-parallel_each
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Peter Williams
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-11-05 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Provides enumerable methods with a configurable level of parallelism. Suitable for parallelizing non-trivial operations on large Enumerables.
17
+ email: pezra@barelyenough.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.txt
24
+ files:
25
+ - README.txt
26
+ - parallel_each.gemspec
27
+ - lib/bounded_async_task_runner.rb
28
+ - lib/parallel_each.rb
29
+ has_rdoc: true
30
+ homepage: http://github.com/pezra/parallel_each
31
+ post_install_message:
32
+ rdoc_options:
33
+ - --main
34
+ - README.txt
35
+ require_paths:
36
+ - lib
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: "0"
42
+ version:
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ version:
49
+ requirements: []
50
+
51
+ rubyforge_project:
52
+ rubygems_version: 1.2.0
53
+ signing_key:
54
+ specification_version: 2
55
+ summary: Parallelized Enumerable methods
56
+ test_files:
57
+ - spec/bounded_async_task_runner_spec.rb
58
+ - spec/spec_helper.rb