threadlimiter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,15 @@
1
+ # Copyright Erik Veenstra <threadlimiter@erikveen.dds.nl>
2
+ #
3
+ # This program is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU General Public License,
5
+ # version 2, as published by the Free Software Foundation.
6
+ #
7
+ # This program is distributed in the hope that it will be
8
+ # useful, but WITHOUT ANY WARRANTY; without even the implied
9
+ # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10
+ # PURPOSE. See the GNU General Public License for more details.
11
+ #
12
+ # You should have received a copy of the GNU General Public
13
+ # License along with this program; if not, write to the Free
14
+ # Software Foundation, Inc., 59 Temple Place, Suite 330,
15
+ # Boston, MA 02111-1307 USA.
data/README ADDED
@@ -0,0 +1,43 @@
1
+ ThreadLimiter forks threads like Thread.fork(), but limits the
2
+ number of concurrently running threads.
3
+
4
+ ThreadLimiter isn't a thread pool. Each fork really starts a
5
+ new thread.
6
+
7
+ Example: Get the titles of a large collections of URL's.
8
+
9
+ The traditional way, using Thread directly:
10
+
11
+ urls = [.....] # A lot of URL's. Maybe even thousends.
12
+
13
+ titles =
14
+ urls.collect do |url|
15
+ Thread.fork do
16
+ # ... get the title of the url...
17
+ end
18
+ end.collect do |thread|
19
+ thread.value
20
+ end
21
+
22
+ With ThreadLimiter#fork():
23
+
24
+ thread_limiter = ThreadLimiter.new(10) # Max. 10 concurrently running threads.
25
+ urls = [.....] # A lot of URL's. Maybe even thousends.
26
+
27
+ titles =
28
+ urls.collect do |url|
29
+ thread_limiter.fork do
30
+ # ... get the title of the url...
31
+ end
32
+ end.collect do |thread|
33
+ thread.value
34
+ end
35
+
36
+ With Enumerable#threaded_collect():
37
+
38
+ urls = [.....] # A lot of URL's. Maybe even thousends.
39
+
40
+ titles =
41
+ urls.threaded_collect(10) do |url| # Max. 10 concurrently running threads.
42
+ # ... get the title of the url...
43
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,120 @@
1
+ module Enumerable
2
+ # Like Enumerable#collect(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
3
+ # Set <i>limit</i> to 0 to use plain old collect() without any threading.
4
+
5
+ def threaded_collect(limit=-1, &block)
6
+ if limit == 0
7
+ self.collect(&block)
8
+ else
9
+ thread_limiter = ThreadLimiter.new(limit)
10
+
11
+ self.collect do |object|
12
+ if block.arity > 1 and object.kind_of?(Enumerable)
13
+ thread_limiter.fork(*object.to_a, &block)
14
+ else
15
+ thread_limiter.fork(object, &block)
16
+ end
17
+ end.collect do |thread|
18
+ thread.value
19
+ end
20
+ end
21
+ end
22
+
23
+ # Like Enumerable#collect(), but all blocks are clustered.
24
+ # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
25
+ # Set <i>number_of_clusters</i> to -1 to skip clustering.
26
+
27
+ def clustered_threaded_collect(number_of_clusters=-1, &block)
28
+ if number_of_clusters <= 0
29
+ threaded_collect(number_of_clusters, &block)
30
+ else
31
+ clusters = [] # One cluster per thread.
32
+ last_pos = nil
33
+ res = []
34
+
35
+ self.each_with_index do |object, pos|
36
+ (clusters[pos%number_of_clusters] ||= []) << object
37
+
38
+ last_pos = pos
39
+ end
40
+
41
+ clusters.threaded_collect(-1) do |cluster|
42
+ cluster.collect do |object|
43
+ if block.arity > 1 and object.kind_of?(Enumerable)
44
+ yield(*object.to_a)
45
+ else
46
+ yield(object)
47
+ end
48
+ end + (cluster.length == clusters[0].length ? [] : [nil]) # Add padding nil, in order to be able to transpose
49
+ end.transpose.each do |array|
50
+ res.concat(array)
51
+ end
52
+
53
+ res[0..last_pos] # Remove padding nils.
54
+ end
55
+ end
56
+
57
+ # Like Enumerable#select(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
58
+ # Set <i>limit</i> to 0 to use plain old select() without any threading.
59
+
60
+ def threaded_select(limit=-1, &block)
61
+ if limit == 0
62
+ self.select(&block)
63
+ else
64
+ self.zip(self.threaded_collect(limit=-1, &block)).inject([]){|r, (o, b)| r << o if b ; r}
65
+ end
66
+ end
67
+
68
+ # Like Enumerable#reject(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
69
+ # Set <i>limit</i> to 0 to use plain old reject() without any threading.
70
+
71
+ def threaded_reject(limit=-1, &block)
72
+ if limit == 0
73
+ self.reject(&block)
74
+ else
75
+ self.zip(self.threaded_collect(limit=-1, &block)).inject([]){|r, (o, b)| r << o unless b ; r}
76
+ end
77
+ end
78
+
79
+ # Like Enumerable#each(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
80
+ # Set <i>limit</i> to 0 to use plain old each() without any threading.
81
+
82
+ def threaded_each(limit=-1, &block)
83
+ if limit == 0
84
+ self.each(&block)
85
+ else
86
+ threaded_collect(limit=-1, &block)
87
+
88
+ self
89
+ end
90
+ end
91
+
92
+ # Like Enumerable#select(), but all blocks are clustered.
93
+ # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
94
+ # Set <i>number_of_clusters</i> to -1 to skip clustering.
95
+
96
+ def clustered_threaded_select(number_of_clusters=-1, &block)
97
+ self.zip(self.clustered_threaded_collect(number_of_clusters=-1, &block)).inject([]){|r, (o, b)| r << o if b ; r}
98
+ end
99
+
100
+ # Like Enumerable#reject(), but all blocks are clustered.
101
+ # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
102
+ # Set <i>number_of_clusters</i> to -1 to skip clustering.
103
+
104
+ def clustered_threaded_reject(number_of_clusters=-1, &block)
105
+ self.zip(self.clustered_threaded_collect(number_of_clusters=-1, &block)).inject([]){|r, (o, b)| r << o unless b ; r}
106
+ end
107
+
108
+ # Like Enumerable#each(), but all blocks are clustered.
109
+ # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
110
+ # Set <i>number_of_clusters</i> to -1 to skip clustering.
111
+
112
+ def clustered_threaded_each(number_of_clusters=-1, &block)
113
+ clustered_threaded_collect(number_of_clusters=-1, &block)
114
+
115
+ self
116
+ end
117
+
118
+ alias threaded_map threaded_collect
119
+ alias clustered_threaded_map clustered_threaded_collect
120
+ end
@@ -0,0 +1,52 @@
1
+ # Fork threads like Thread.fork, but limit the number of concurrently running
2
+ # threads.
3
+ #
4
+ # ThreadLimiter isn't a thread pool. Each fork really starts a new thread.
5
+
6
+ class ThreadLimiter
7
+ # Initialize the ThreadLimter.
8
+ # The optional parameter <i>limit</i> is the maximum number of concurrently running threads.
9
+ # Set <i>limit</i> to -1 or 0 to fork threads without limiting the number of concurrently running threads.
10
+
11
+ def initialize(limit=-1)
12
+ @limit = limit # The maximum number of concurrently running threads.
13
+ @running = 0 # The number of currently running threads.
14
+
15
+ @mutex = Mutex.new
16
+ @cv = ConditionVariable.new
17
+ end
18
+
19
+ # Fork a thread.
20
+ # The given block is run within the thread.
21
+ # It behaves like Thread.fork().
22
+ # In fact, it invokes Thread.fork() and returns its result.
23
+ # The list of arguments is passed to Thread.fork().
24
+
25
+ def fork(*args, &block)
26
+ if @limit <= 0
27
+ Thread.fork(*args, &block)
28
+ else
29
+ @mutex.synchronize do
30
+ while @running >= @limit
31
+ @cv.wait(@mutex)
32
+ end
33
+
34
+ @running += 1
35
+ end
36
+
37
+ Thread.fork do
38
+ begin
39
+ res = yield(*args)
40
+ ensure
41
+ @mutex.synchronize do
42
+ @running -= 1
43
+ end
44
+
45
+ @cv.signal if @limit > 0
46
+ end
47
+
48
+ res
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,4 @@
1
+ require "thread"
2
+
3
+ require "threadlimiter/threadlimiter"
4
+ require "threadlimiter/enumerable"
data/test/test.rb ADDED
@@ -0,0 +1,180 @@
1
+ require "test/unit"
2
+ require "threadlimiter"
3
+
4
+ class ThreadLimiterTest < Test::Unit::TestCase
5
+ def go(limit)
6
+ input = (1..100).collect{rand}
7
+ threadlimiter = ThreadLimiter.new(limit)
8
+
9
+ threads =
10
+ input.collect do |m|
11
+ threadlimiter.fork(m) do |n|
12
+ Kernel.sleep 0.01
13
+
14
+ n
15
+ end
16
+ end
17
+
18
+ assert_equal([Thread], threads.collect{|t| t.class}.uniq)
19
+
20
+ output = threads.collect{|t| t.value}
21
+
22
+ assert_equal(input.to_a , output)
23
+ assert_equal(0 , threadlimiter.instance_eval{@running})
24
+ assert_equal(limit , threadlimiter.instance_eval{@limit})
25
+ end
26
+
27
+ def test_with_limit
28
+ go(10)
29
+ end
30
+
31
+ def test_with_no_limit
32
+ go(-1)
33
+ end
34
+
35
+ def test_with_zero_limit
36
+ go(0)
37
+ end
38
+ end
39
+
40
+ class ThreadLimiterEnumerableTest < Test::Unit::TestCase
41
+ def test_with_empty_enumerables
42
+ assert_equal([], [].threaded_collect(10){2})
43
+ assert_equal([], [].threaded_collect(-1){2})
44
+ assert_equal([], [].threaded_collect(0){2})
45
+ end
46
+
47
+ def test_threaded_collect_with_no_arguments_with_no_limit
48
+ input = (1..100).zip(101..200)
49
+ output = input.threaded_collect(-1){2}
50
+ should_be = input.collect{2}
51
+
52
+ assert_equal(should_be, output)
53
+ end
54
+
55
+ def test_threaded_collect_with_no_arguments_with_limit
56
+ input = (1..100).zip(101..200)
57
+ output = input.threaded_collect(10){2}
58
+ should_be = input.collect{2}
59
+
60
+ assert_equal(should_be, output)
61
+ end
62
+
63
+ def test_threaded_collect_with_no_arguments_with_zero_limit
64
+ input = (1..100).zip(101..200)
65
+ output = input.threaded_collect(0){2}
66
+ should_be = input.collect{2}
67
+
68
+ assert_equal(should_be, output)
69
+ end
70
+
71
+ def test_threaded_collect_with_one_argument_with_no_limit
72
+ input = (1..100).zip(101..200)
73
+ output = input.threaded_collect(-1){|x| x * 2}
74
+ should_be = input.collect{|x| x * 2}
75
+
76
+ assert_equal(should_be, output)
77
+ end
78
+
79
+ def test_threaded_collect_with_one_argument_with_limit
80
+ input = (1..100).zip(101..200)
81
+ output = input.threaded_collect(10){|x| x * 2}
82
+ should_be = input.collect{|x| x * 2}
83
+
84
+ assert_equal(should_be, output)
85
+ end
86
+
87
+ def test_threaded_collect_with_one_argument_with_zero_limit
88
+ input = (1..100).zip(101..200)
89
+ output = input.threaded_collect(0){|x| x * 2}
90
+ should_be = input.collect{|x| x * 2}
91
+
92
+ assert_equal(should_be, output)
93
+ end
94
+
95
+ def test_threaded_collect_with_two_arguments_with_no_limit
96
+ input = (1..100).zip(101..200)
97
+ output = input.threaded_collect(-1){|x, y| x * y}
98
+ should_be = input.collect{|x, y| x * y}
99
+
100
+ assert_equal(should_be, output)
101
+ end
102
+
103
+ def test_threaded_collect_with_two_arguments_with_limit
104
+ input = (1..100).zip(101..200)
105
+ output = input.threaded_collect(10){|x, y| x * y}
106
+ should_be = input.collect{|x, y| x * y}
107
+
108
+ assert_equal(should_be, output)
109
+ end
110
+
111
+ def test_threaded_collect_with_two_arguments_with_zero_limit
112
+ input = (1..100).zip(101..200)
113
+ output = input.threaded_collect(0){|x, y| x * y}
114
+ should_be = input.collect{|x, y| x * y}
115
+
116
+ assert_equal(should_be, output)
117
+ end
118
+
119
+ def test_threaded_select
120
+ input = (1..100).zip(101..200)
121
+ output = input.threaded_select{|x, y| (x*y) % 2 == 0}
122
+ should_be = input.select{|x, y| (x*y) % 2 == 0}
123
+
124
+ assert_equal(should_be, output)
125
+ end
126
+
127
+ def test_threaded_reject
128
+ input = (1..100).zip(101..200)
129
+ output = input.threaded_reject{|x, y| (x*y) % 2 == 0}
130
+ should_be = input.reject{|x, y| (x*y) % 2 == 0}
131
+
132
+ assert_equal(should_be, output)
133
+ end
134
+
135
+ def test_threaded_each
136
+ input = (1..100).zip(101..200)
137
+ output = input.threaded_each{|x, y| (x*y) % 2 == 0}
138
+
139
+ assert_equal(input, output)
140
+ end
141
+
142
+ def test_clustered_threaded_collect_with_one_argument
143
+ input = (1..100).zip(101..200)
144
+ output = input.clustered_threaded_collect(10){|x| x * 2}
145
+ should_be = input.collect{|x| x * 2}
146
+
147
+ assert_equal(should_be, output)
148
+ end
149
+
150
+ def test_clustered_threaded_collect_with_two_arguments
151
+ input = (1..100).zip(101..200)
152
+ output = input.clustered_threaded_collect(10){|x, y| x * y}
153
+ should_be = input.collect{|x, y| x * y}
154
+
155
+ assert_equal(should_be, output)
156
+ end
157
+
158
+ def test_clustered_threaded_select
159
+ input = (1..100).zip(101..200)
160
+ output = input.clustered_threaded_select(10){|x, y| (x*y) % 2 == 0}
161
+ should_be = input.select{|x, y| (x*y) % 2 == 0}
162
+
163
+ assert_equal(should_be, output)
164
+ end
165
+
166
+ def test_clustered_threaded_reject
167
+ input = (1..100).zip(101..200)
168
+ output = input.clustered_threaded_reject(10){|x, y| (x*y) % 2 == 0}
169
+ should_be = input.reject{|x, y| (x*y) % 2 == 0}
170
+
171
+ assert_equal(should_be, output)
172
+ end
173
+
174
+ def test_clustered_threaded_each
175
+ input = (1..100).zip(101..200)
176
+ output = input.clustered_threaded_each(10){|x, y| (x*y) % 2 == 0}
177
+
178
+ assert_equal(input, output)
179
+ end
180
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: threadlimiter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Erik Veenstra
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-07-12 00:00:00 +02:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Fork threads like Thread.fork, but limit the number of concurrently running threads.
17
+ email: threadlimiter@erikveen.dds.nl
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/threadlimiter
26
+ - lib/threadlimiter/threadlimiter.rb
27
+ - lib/threadlimiter/enumerable.rb
28
+ - lib/threadlimiter.rb
29
+ - README
30
+ - LICENSE
31
+ - VERSION
32
+ has_rdoc: true
33
+ homepage: http://www.erikveen.dds.nl/threadlimiter/index.html
34
+ post_install_message:
35
+ rdoc_options:
36
+ - README
37
+ - LICENSE
38
+ - VERSION
39
+ - --title
40
+ - threadlimiter (0.1.0)
41
+ - --main
42
+ - README
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ version:
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ requirements: []
58
+
59
+ rubyforge_project: threadlimiter
60
+ rubygems_version: 1.1.1
61
+ signing_key:
62
+ specification_version: 2
63
+ summary: Fork threads like Thread.fork, but limit the number of concurrently running threads.
64
+ test_files:
65
+ - test/test.rb