RubyGems - threadlimiter - Versions diffs - 0.1.0 - Mend

threadlimiter 0.1.0

Files changed (8) hide show

data/LICENSE +15 -0
data/README +43 -0
data/VERSION +1 -0
data/lib/threadlimiter/enumerable.rb +120 -0
data/lib/threadlimiter/threadlimiter.rb +52 -0
data/lib/threadlimiter.rb +4 -0
data/test/test.rb +180 -0
metadata +65 -0

data/LICENSE ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright Erik Veenstra <threadlimiter@erikveen.dds.nl>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA.

data/README ADDED Viewed

@@ -0,0 +1,43 @@
+ThreadLimiter forks threads like Thread.fork(), but limits the
+number of concurrently running threads.
+ThreadLimiter isn't a thread pool. Each fork really starts a
+new thread.
+Example: Get the titles of a large collections of URL's.
+The traditional way, using Thread directly:
+  urls           = [.....]                 # A lot of URL's. Maybe even thousends.
+  titles =
+  urls.collect do |url|
+    Thread.fork do
+      # ... get the title of the url...
+    end
+  end.collect do |thread|
+    thread.value
+  end
+With ThreadLimiter#fork():
+  thread_limiter = ThreadLimiter.new(10)   # Max. 10 concurrently running threads.
+  urls           = [.....]                 # A lot of URL's. Maybe even thousends.
+  titles =
+  urls.collect do |url|
+    thread_limiter.fork do
+      # ... get the title of the url...
+    end
+  end.collect do |thread|
+    thread.value
+  end
+With Enumerable#threaded_collect():
+  urls           = [.....]                 # A lot of URL's. Maybe even thousends.
+  titles =
+  urls.threaded_collect(10) do |url|       # Max. 10 concurrently running threads.
+    # ... get the title of the url...
+  end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.0

data/lib/threadlimiter/enumerable.rb ADDED Viewed

@@ -0,0 +1,120 @@
+module Enumerable
+  # Like Enumerable#collect(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
+  # Set <i>limit</i> to 0 to use plain old collect() without any threading.
+  def threaded_collect(limit=-1, &block)
+    if limit == 0
+      self.collect(&block)
+    else
+      thread_limiter	= ThreadLimiter.new(limit)
+      self.collect do |object|
+        if block.arity > 1 and object.kind_of?(Enumerable)
+          thread_limiter.fork(*object.to_a, &block)
+        else
+          thread_limiter.fork(object, &block)
+        end
+      end.collect do |thread|
+        thread.value
+      end
+    end
+  end
+  # Like Enumerable#collect(), but all blocks are clustered.
+  # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
+  # Set <i>number_of_clusters</i> to -1 to skip clustering.
+  def clustered_threaded_collect(number_of_clusters=-1, &block)
+    if number_of_clusters <= 0
+      threaded_collect(number_of_clusters, &block)
+    else
+      clusters	= []	# One cluster per thread.
+      last_pos	= nil
+      res	= []
+      self.each_with_index do |object, pos|
+        (clusters[pos%number_of_clusters] ||= []) << object
+        last_pos	= pos
+      end
+      clusters.threaded_collect(-1) do |cluster|
+        cluster.collect do |object|
+          if block.arity > 1 and object.kind_of?(Enumerable)
+            yield(*object.to_a)
+          else
+            yield(object)
+          end
+        end + (cluster.length == clusters[0].length ? [] : [nil])	# Add padding nil, in order to be able to transpose
+      end.transpose.each do |array|
+        res.concat(array)
+      end
+      res[0..last_pos]	# Remove padding nils.
+    end
+  end
+  # Like Enumerable#select(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
+  # Set <i>limit</i> to 0 to use plain old select() without any threading.
+  def threaded_select(limit=-1, &block)
+    if limit == 0
+      self.select(&block)
+    else
+      self.zip(self.threaded_collect(limit=-1, &block)).inject([]){|r, (o, b)| r << o if b ; r}
+    end
+  end
+  # Like Enumerable#reject(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
+  # Set <i>limit</i> to 0 to use plain old reject() without any threading.
+  def threaded_reject(limit=-1, &block)
+    if limit == 0
+      self.reject(&block)
+    else
+      self.zip(self.threaded_collect(limit=-1, &block)).inject([]){|r, (o, b)| r << o unless b ; r}
+    end
+  end
+  # Like Enumerable#each(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
+  # Set <i>limit</i> to 0 to use plain old each() without any threading.
+  def threaded_each(limit=-1, &block)
+    if limit == 0
+      self.each(&block)
+    else
+      threaded_collect(limit=-1, &block)
+      self
+    end
+  end
+  # Like Enumerable#select(), but all blocks are clustered.
+  # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
+  # Set <i>number_of_clusters</i> to -1 to skip clustering.
+  def clustered_threaded_select(number_of_clusters=-1, &block)
+    self.zip(self.clustered_threaded_collect(number_of_clusters=-1, &block)).inject([]){|r, (o, b)| r << o if b ; r}
+  end
+  # Like Enumerable#reject(), but all blocks are clustered.
+  # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
+  # Set <i>number_of_clusters</i> to -1 to skip clustering.
+  def clustered_threaded_reject(number_of_clusters=-1, &block)
+    self.zip(self.clustered_threaded_collect(number_of_clusters=-1, &block)).inject([]){|r, (o, b)| r << o unless b ; r}
+  end
+  # Like Enumerable#each(), but all blocks are clustered.
+  # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
+  # Set <i>number_of_clusters</i> to -1 to skip clustering.
+  def clustered_threaded_each(number_of_clusters=-1, &block)
+    clustered_threaded_collect(number_of_clusters=-1, &block)
+    self
+  end
+  alias threaded_map		threaded_collect
+  alias clustered_threaded_map	clustered_threaded_collect
+end

data/lib/threadlimiter/threadlimiter.rb ADDED Viewed

@@ -0,0 +1,52 @@
+# Fork threads like Thread.fork, but limit the number of concurrently running
+# threads.
+#
+# ThreadLimiter isn't a thread pool. Each fork really starts a new thread.
+class ThreadLimiter
+  # Initialize the ThreadLimter.
+  # The optional parameter <i>limit</i> is the maximum number of concurrently running threads.
+  # Set <i>limit</i> to -1 or 0 to fork threads without limiting the number of concurrently running threads.
+  def initialize(limit=-1)
+    @limit	= limit	# The maximum number of concurrently running threads.
+    @running	= 0	# The number of currently running threads.
+    @mutex	= Mutex.new
+    @cv		= ConditionVariable.new
+  end
+  # Fork a thread.
+  # The given block is run within the thread.
+  # It behaves like Thread.fork().
+  # In fact, it invokes Thread.fork() and returns its result.
+  # The list of arguments is passed to Thread.fork().
+  def fork(*args, &block)
+    if @limit <= 0
+      Thread.fork(*args, &block)
+    else
+      @mutex.synchronize do
+        while @running >= @limit
+          @cv.wait(@mutex)
+        end
+        @running	+= 1
+      end
+      Thread.fork do
+        begin
+          res	= yield(*args)
+        ensure
+          @mutex.synchronize do
+            @running	-= 1
+          end
+          @cv.signal	if @limit > 0
+        end
+        res
+      end
+    end
+  end
+end

data/lib/threadlimiter.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require "thread"
+require "threadlimiter/threadlimiter"
+require "threadlimiter/enumerable"

data/test/test.rb ADDED Viewed

@@ -0,0 +1,180 @@
+require "test/unit"
+require "threadlimiter"
+class ThreadLimiterTest < Test::Unit::TestCase
+  def go(limit)
+    input		= (1..100).collect{rand}
+    threadlimiter	= ThreadLimiter.new(limit)
+    threads =
+    input.collect do |m|
+      threadlimiter.fork(m) do |n|
+        Kernel.sleep 0.01
+        n
+      end
+    end
+    assert_equal([Thread], threads.collect{|t| t.class}.uniq)
+    output	= threads.collect{|t| t.value}
+    assert_equal(input.to_a	, output)
+    assert_equal(0		, threadlimiter.instance_eval{@running})
+    assert_equal(limit		, threadlimiter.instance_eval{@limit})
+  end
+  def test_with_limit
+    go(10)
+  end
+  def test_with_no_limit
+    go(-1)
+  end
+  def test_with_zero_limit
+    go(0)
+  end
+end
+class ThreadLimiterEnumerableTest < Test::Unit::TestCase
+  def test_with_empty_enumerables
+    assert_equal([], [].threaded_collect(10){2})
+    assert_equal([], [].threaded_collect(-1){2})
+    assert_equal([], [].threaded_collect(0){2})
+  end
+  def test_threaded_collect_with_no_arguments_with_no_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(-1){2}
+    should_be	= input.collect{2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_no_arguments_with_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(10){2}
+    should_be	= input.collect{2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_no_arguments_with_zero_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(0){2}
+    should_be	= input.collect{2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_one_argument_with_no_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(-1){|x| x * 2}
+    should_be	= input.collect{|x| x * 2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_one_argument_with_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(10){|x| x * 2}
+    should_be	= input.collect{|x| x * 2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_one_argument_with_zero_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(0){|x| x * 2}
+    should_be	= input.collect{|x| x * 2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_two_arguments_with_no_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(-1){|x, y| x * y}
+    should_be	= input.collect{|x, y| x * y}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_two_arguments_with_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(10){|x, y| x * y}
+    should_be	= input.collect{|x, y| x * y}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_two_arguments_with_zero_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(0){|x, y| x * y}
+    should_be	= input.collect{|x, y| x * y}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_select
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_select{|x, y| (x*y) % 2 == 0}
+    should_be	= input.select{|x, y| (x*y) % 2 == 0}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_reject
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_reject{|x, y| (x*y) % 2 == 0}
+    should_be	= input.reject{|x, y| (x*y) % 2 == 0}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_each
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_each{|x, y| (x*y) % 2 == 0}
+    assert_equal(input, output)
+  end
+  def test_clustered_threaded_collect_with_one_argument
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_collect(10){|x| x * 2}
+    should_be	= input.collect{|x| x * 2}
+    assert_equal(should_be, output)
+  end
+  def test_clustered_threaded_collect_with_two_arguments
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_collect(10){|x, y| x * y}
+    should_be	= input.collect{|x, y| x * y}
+    assert_equal(should_be, output)
+  end
+  def test_clustered_threaded_select
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_select(10){|x, y| (x*y) % 2 == 0}
+    should_be	= input.select{|x, y| (x*y) % 2 == 0}
+    assert_equal(should_be, output)
+  end
+  def test_clustered_threaded_reject
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_reject(10){|x, y| (x*y) % 2 == 0}
+    should_be	= input.reject{|x, y| (x*y) % 2 == 0}
+    assert_equal(should_be, output)
+  end
+  def test_clustered_threaded_each
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_each(10){|x, y| (x*y) % 2 == 0}
+    assert_equal(input, output)
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,65 @@
+--- !ruby/object:Gem::Specification
+name: threadlimiter
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Erik Veenstra
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2008-07-12 00:00:00 +02:00
+default_executable:
+dependencies: []
+description: Fork threads like Thread.fork, but limit the number of concurrently running threads.
+email: threadlimiter@erikveen.dds.nl
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/threadlimiter
+- lib/threadlimiter/threadlimiter.rb
+- lib/threadlimiter/enumerable.rb
+- lib/threadlimiter.rb
+- README
+- LICENSE
+- VERSION
+has_rdoc: true
+homepage: http://www.erikveen.dds.nl/threadlimiter/index.html
+post_install_message:
+rdoc_options:
+- README
+- LICENSE
+- VERSION
+- --title
+- threadlimiter (0.1.0)
+- --main
+- README
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project: threadlimiter
+rubygems_version: 1.1.1
+signing_key:
+specification_version: 2
+summary: Fork threads like Thread.fork, but limit the number of concurrently running threads.
+test_files:
+- test/test.rb