pezra-parallel_each 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +58 -0
- data/lib/bounded_async_task_runner.rb +86 -0
- data/lib/parallel_each.rb +14 -0
- data/parallel_each.gemspec +19 -0
- data/spec/bounded_async_task_runner_spec.rb +65 -0
- data/spec/spec_helper.rb +9 -0
- metadata +58 -0
data/README.txt
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
= paralleleach
|
2
|
+
|
3
|
+
* http://github.com/pezra/parallel_each
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
This is a set of Enumerable like methods that perform the blocks in parallel.
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
* FIX (list of features or problems)
|
12
|
+
|
13
|
+
== SYNOPSIS:
|
14
|
+
|
15
|
+
require 'rubygems'
|
16
|
+
require 'parallel_each'
|
17
|
+
|
18
|
+
(1..1000).p_each(15) do |i|
|
19
|
+
# do something with i
|
20
|
+
end
|
21
|
+
|
22
|
+
This is much like Enumerable#each except that the block would be
|
23
|
+
executed in 15 different threads simultaneously. On the green
|
24
|
+
threaded MRI this can result in significant performance gains if the
|
25
|
+
block is IO bound. On native thread implementation the gains might be
|
26
|
+
significant for CPU bound operations.
|
27
|
+
|
28
|
+
== REQUIREMENTS:
|
29
|
+
|
30
|
+
|
31
|
+
== INSTALL:
|
32
|
+
|
33
|
+
* sudo gem install parallel_each
|
34
|
+
|
35
|
+
== LICENSE:
|
36
|
+
|
37
|
+
(The MIT License)
|
38
|
+
|
39
|
+
Copyright (c) 2008 Peter Williams
|
40
|
+
|
41
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
42
|
+
a copy of this software and associated documentation files (the
|
43
|
+
'Software'), to deal in the Software without restriction, including
|
44
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
45
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
46
|
+
permit persons to whom the Software is furnished to do so, subject to
|
47
|
+
the following conditions:
|
48
|
+
|
49
|
+
The above copyright notice and this permission notice shall be
|
50
|
+
included in all copies or substantial portions of the Software.
|
51
|
+
|
52
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
53
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
54
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
55
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
56
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
57
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
58
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,86 @@
|
|
1
|
+
class AsyncTaskError < Exception
|
2
|
+
attr_reader :cause
|
3
|
+
|
4
|
+
def initialize(exception)
|
5
|
+
@cause = exception
|
6
|
+
super("Worker error: #{cause.message} (#{cause.class.name})")
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
class BoundedAsyncTaskRunner
|
12
|
+
attr_reader :max_workers
|
13
|
+
|
14
|
+
# Create a new BoundedAsyncTaskRunner.
|
15
|
+
#
|
16
|
+
# @param [Integer] max_workers the maximum number of asynchronous
|
17
|
+
# tasks to simultaneously execute.
|
18
|
+
def initialize(max_workers=20)
|
19
|
+
@threads = Array.new
|
20
|
+
@max_workers = max_workers
|
21
|
+
end
|
22
|
+
|
23
|
+
# Execute the provided block asynchronously.
|
24
|
+
#
|
25
|
+
# @param *args Objects to yield to the provided block when it is
|
26
|
+
# invoked.
|
27
|
+
#
|
28
|
+
# @yield The task to execute asynchronously.
|
29
|
+
#
|
30
|
+
# @yieldparam *args The arguments passed into this method.
|
31
|
+
#
|
32
|
+
# @raise AsyncTaskError If any previously executed task has failed
|
33
|
+
# (i.e. raised an error) an AsyncTaskError will be raised instead
|
34
|
+
# of executing the async task.
|
35
|
+
def do(*args, &block)
|
36
|
+
purge_dead
|
37
|
+
until @threads.size < max_workers
|
38
|
+
purge_dead
|
39
|
+
break if @threads.size < max_workers
|
40
|
+
# We have still have the maximum number of active threads so we
|
41
|
+
# need to wait for one to finish.
|
42
|
+
|
43
|
+
@threads.first.join(0.1) # Join on the oldest thread because it will probably finish first.
|
44
|
+
end
|
45
|
+
# We can start new thread without exceeding the threshold
|
46
|
+
|
47
|
+
thread_args = [block] + args
|
48
|
+
|
49
|
+
@threads << Thread.start([block] + args) do |args|
|
50
|
+
task = args.shift
|
51
|
+
|
52
|
+
begin
|
53
|
+
task.call(*args)
|
54
|
+
rescue Exception => e
|
55
|
+
AsyncTaskError.new(e)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
alias :do_async :do
|
60
|
+
|
61
|
+
|
62
|
+
# Blocks until all currently running tasks to complete.
|
63
|
+
#
|
64
|
+
# @raise AsyncTaskError If any previously executed task has failed
|
65
|
+
# (i.e. raised an error) an AsyncTaskError will be raised.
|
66
|
+
def wait_for_all_to_finish
|
67
|
+
until @threads.empty?
|
68
|
+
@threads.first.join
|
69
|
+
purge_dead
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
def purge_dead
|
75
|
+
@threads.each do |t|
|
76
|
+
next if t.alive? # ignore the alive ones
|
77
|
+
|
78
|
+
if t.value.kind_of?(AsyncTaskError)
|
79
|
+
raise(t.value)
|
80
|
+
else
|
81
|
+
@threads.delete(t)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require Pathname(__FILE__).dirname + './bounded_async_task_runner'
|
3
|
+
|
4
|
+
module Enumerable
|
5
|
+
def p_each(max_tasks=20, &block)
|
6
|
+
runner = BoundedAsyncTaskRunner.new(max_tasks)
|
7
|
+
|
8
|
+
each do |item|
|
9
|
+
runner.do(item, &block)
|
10
|
+
end
|
11
|
+
|
12
|
+
runnder.wait_for_all_to_finish
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "parallel_each"
|
3
|
+
s.version = "0.1.1"
|
4
|
+
s.date = "2008-11-05"
|
5
|
+
s.summary = "Parallelized Enumerable methods"
|
6
|
+
s.email = "pezra@barelyenough.org"
|
7
|
+
s.homepage = "http://github.com/pezra/parallel_each"
|
8
|
+
s.description = "Provides enumerable methods with a configurable level of parallelism. Suitable for parallelizing non-trivial operations on large Enumerables."
|
9
|
+
s.has_rdoc = true
|
10
|
+
s.authors = ["Peter Williams"]
|
11
|
+
s.files = ["README.txt",
|
12
|
+
"parallel_each.gemspec",
|
13
|
+
"lib/bounded_async_task_runner.rb",
|
14
|
+
"lib/parallel_each.rb"]
|
15
|
+
s.test_files = ["spec/bounded_async_task_runner_spec.rb",
|
16
|
+
"spec/spec_helper.rb"]
|
17
|
+
s.rdoc_options = ["--main", "README.txt"]
|
18
|
+
s.extra_rdoc_files = ["README.txt"]
|
19
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require Pathname(__FILE__).dirname + 'spec_helper'
|
3
|
+
|
4
|
+
require 'bounded_async_task_runner'
|
5
|
+
|
6
|
+
describe BoundedAsyncTaskRunner do
|
7
|
+
before do
|
8
|
+
@runner = BoundedAsyncTaskRunner.new(5)
|
9
|
+
end
|
10
|
+
|
11
|
+
describe "#do(&block)" do
|
12
|
+
it "should run tasks" do
|
13
|
+
out = Array.new
|
14
|
+
|
15
|
+
@runner.do(out) {|out| out << :marker}
|
16
|
+
@runner.wait_for_all_to_finish
|
17
|
+
|
18
|
+
out.should == [:marker]
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should run jobs as the same time" do
|
22
|
+
out = Array.new
|
23
|
+
|
24
|
+
@runner.do(out) {|out| sleep(0.1); out << :a}
|
25
|
+
@runner.do(out) {|out| out << :b}
|
26
|
+
|
27
|
+
@runner.wait_for_all_to_finish
|
28
|
+
out.should == [:b, :a]
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should raise exception if any tasks fail" do
|
32
|
+
lambda {
|
33
|
+
@runner.do { raise ArgumentError, "testing" }
|
34
|
+
@runner.wait_for_all_to_finish
|
35
|
+
}.should raise_error(AsyncTaskError, "Worker error: testing (ArgumentError)")
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should included exception raised by task AsyncTaskError exception" do
|
39
|
+
exception = ArgumentError.new("testing")
|
40
|
+
@runner.do(exception) { raise(exception) }
|
41
|
+
begin
|
42
|
+
@runner.wait_for_all_to_finish
|
43
|
+
rescue AsyncTaskError => e
|
44
|
+
e.cause.should == exception
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should not run more than the max number of tasks at any one moment" do
|
49
|
+
# I don't know how to test this...
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "#wait_for_all_to_finish" do
|
54
|
+
it "should not return until all tasks are finished" do
|
55
|
+
out = Array.new
|
56
|
+
|
57
|
+
3.times do
|
58
|
+
@runner.do(out) {|out| sleep(0.1); out << :a}
|
59
|
+
end
|
60
|
+
@runner.wait_for_all_to_finish
|
61
|
+
|
62
|
+
out.should == [:a, :a, :a]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pezra-parallel_each
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Peter Williams
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-11-05 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Provides enumerable methods with a configurable level of parallelism. Suitable for parallelizing non-trivial operations on large Enumerables.
|
17
|
+
email: pezra@barelyenough.org
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.txt
|
24
|
+
files:
|
25
|
+
- README.txt
|
26
|
+
- parallel_each.gemspec
|
27
|
+
- lib/bounded_async_task_runner.rb
|
28
|
+
- lib/parallel_each.rb
|
29
|
+
has_rdoc: true
|
30
|
+
homepage: http://github.com/pezra/parallel_each
|
31
|
+
post_install_message:
|
32
|
+
rdoc_options:
|
33
|
+
- --main
|
34
|
+
- README.txt
|
35
|
+
require_paths:
|
36
|
+
- lib
|
37
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: "0"
|
42
|
+
version:
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
version:
|
49
|
+
requirements: []
|
50
|
+
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 1.2.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 2
|
55
|
+
summary: Parallelized Enumerable methods
|
56
|
+
test_files:
|
57
|
+
- spec/bounded_async_task_runner_spec.rb
|
58
|
+
- spec/spec_helper.rb
|