RubyGems - threadlimiter - Versions diffs - 0.1.0 - Mend

threadlimiter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/LICENSE +15 -0
data/README +43 -0
data/VERSION +1 -0
data/lib/threadlimiter/enumerable.rb +120 -0
data/lib/threadlimiter/threadlimiter.rb +52 -0
data/lib/threadlimiter.rb +4 -0
data/test/test.rb +180 -0
metadata +65 -0

data/LICENSE ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright Erik Veenstra <threadlimiter@erikveen.dds.nl>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA.

data/README ADDED Viewed

@@ -0,0 +1,43 @@
+ThreadLimiter forks threads like Thread.fork(), but limits the
+number of concurrently running threads.
+ThreadLimiter isn't a thread pool. Each fork really starts a
+new thread.
+Example: Get the titles of a large collections of URL's.
+The traditional way, using Thread directly:
+  urls           = [.....]                 # A lot of URL's. Maybe even thousends.
+  titles =
+  urls.collect do |url|
+    Thread.fork do
+      # ... get the title of the url...
+    end
+  end.collect do |thread|
+    thread.value
+  end
+With ThreadLimiter#fork():
+  thread_limiter = ThreadLimiter.new(10)   # Max. 10 concurrently running threads.
+  urls           = [.....]                 # A lot of URL's. Maybe even thousends.
+  titles =
+  urls.collect do |url|
+    thread_limiter.fork do
+      # ... get the title of the url...
+    end
+  end.collect do |thread|
+    thread.value
+  end
+With Enumerable#threaded_collect():
+  urls           = [.....]                 # A lot of URL's. Maybe even thousends.
+  titles =
+  urls.threaded_collect(10) do |url|       # Max. 10 concurrently running threads.
+    # ... get the title of the url...
+  end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.0

data/lib/threadlimiter/enumerable.rb ADDED Viewed

@@ -0,0 +1,120 @@
+module Enumerable
+  # Like Enumerable#collect(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
+  # Set <i>limit</i> to 0 to use plain old collect() without any threading.
+  def threaded_collect(limit=-1, &block)
+    if limit == 0
+      self.collect(&block)
+    else
+      thread_limiter	= ThreadLimiter.new(limit)
+      self.collect do |object|
+        if block.arity > 1 and object.kind_of?(Enumerable)
+          thread_limiter.fork(*object.to_a, &block)
+        else
+          thread_limiter.fork(object, &block)
+        end
+      end.collect do |thread|
+        thread.value
+      end
+    end
+  end
+  # Like Enumerable#collect(), but all blocks are clustered.
+  # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
+  # Set <i>number_of_clusters</i> to -1 to skip clustering.
+  def clustered_threaded_collect(number_of_clusters=-1, &block)
+    if number_of_clusters <= 0
+      threaded_collect(number_of_clusters, &block)
+    else
+      clusters	= []	# One cluster per thread.
+      last_pos	= nil
+      res	= []
+      self.each_with_index do |object, pos|
+        (clusters[pos%number_of_clusters] ||= []) << object
+        last_pos	= pos
+      end
+      clusters.threaded_collect(-1) do |cluster|
+        cluster.collect do |object|
+          if block.arity > 1 and object.kind_of?(Enumerable)
+            yield(*object.to_a)
+          else
+            yield(object)
+          end
+        end + (cluster.length == clusters[0].length ? [] : [nil])	# Add padding nil, in order to be able to transpose
+      end.transpose.each do |array|
+        res.concat(array)
+      end
+      res[0..last_pos]	# Remove padding nils.
+    end
+  end
+  # Like Enumerable#select(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
+  # Set <i>limit</i> to 0 to use plain old select() without any threading.
+  def threaded_select(limit=-1, &block)
+    if limit == 0
+      self.select(&block)
+    else
+      self.zip(self.threaded_collect(limit=-1, &block)).inject([]){|r, (o, b)| r << o if b ; r}
+    end
+  end
+  # Like Enumerable#reject(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
+  # Set <i>limit</i> to 0 to use plain old reject() without any threading.
+  def threaded_reject(limit=-1, &block)
+    if limit == 0
+      self.reject(&block)
+    else
+      self.zip(self.threaded_collect(limit=-1, &block)).inject([]){|r, (o, b)| r << o unless b ; r}
+    end
+  end
+  # Like Enumerable#each(), but each block is run concurrently in a thread, using ThreadLimiter.new(<i>limit</i>) and its fork().
+  # Set <i>limit</i> to 0 to use plain old each() without any threading.
+  def threaded_each(limit=-1, &block)
+    if limit == 0
+      self.each(&block)
+    else
+      threaded_collect(limit=-1, &block)
+      self
+    end
+  end
+  # Like Enumerable#select(), but all blocks are clustered.
+  # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
+  # Set <i>number_of_clusters</i> to -1 to skip clustering.
+  def clustered_threaded_select(number_of_clusters=-1, &block)
+    self.zip(self.clustered_threaded_collect(number_of_clusters=-1, &block)).inject([]){|r, (o, b)| r << o if b ; r}
+  end
+  # Like Enumerable#reject(), but all blocks are clustered.
+  # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
+  # Set <i>number_of_clusters</i> to -1 to skip clustering.
+  def clustered_threaded_reject(number_of_clusters=-1, &block)
+    self.zip(self.clustered_threaded_collect(number_of_clusters=-1, &block)).inject([]){|r, (o, b)| r << o unless b ; r}
+  end
+  # Like Enumerable#each(), but all blocks are clustered.
+  # Each cluster is run concurrently in a thread, using ThreadLimiter.new(<i>number_of_clusters</i>) and its fork().
+  # Set <i>number_of_clusters</i> to -1 to skip clustering.
+  def clustered_threaded_each(number_of_clusters=-1, &block)
+    clustered_threaded_collect(number_of_clusters=-1, &block)
+    self
+  end
+  alias threaded_map		threaded_collect
+  alias clustered_threaded_map	clustered_threaded_collect
+end

data/lib/threadlimiter/threadlimiter.rb ADDED Viewed

@@ -0,0 +1,52 @@
+# Fork threads like Thread.fork, but limit the number of concurrently running
+# threads.
+#
+# ThreadLimiter isn't a thread pool. Each fork really starts a new thread.
+class ThreadLimiter
+  # Initialize the ThreadLimter.
+  # The optional parameter <i>limit</i> is the maximum number of concurrently running threads.
+  # Set <i>limit</i> to -1 or 0 to fork threads without limiting the number of concurrently running threads.
+  def initialize(limit=-1)
+    @limit	= limit	# The maximum number of concurrently running threads.
+    @running	= 0	# The number of currently running threads.
+    @mutex	= Mutex.new
+    @cv		= ConditionVariable.new
+  end
+  # Fork a thread.
+  # The given block is run within the thread.
+  # It behaves like Thread.fork().
+  # In fact, it invokes Thread.fork() and returns its result.
+  # The list of arguments is passed to Thread.fork().
+  def fork(*args, &block)
+    if @limit <= 0
+      Thread.fork(*args, &block)
+    else
+      @mutex.synchronize do
+        while @running >= @limit
+          @cv.wait(@mutex)
+        end
+        @running	+= 1
+      end
+      Thread.fork do
+        begin
+          res	= yield(*args)
+        ensure
+          @mutex.synchronize do
+            @running	-= 1
+          end
+          @cv.signal	if @limit > 0
+        end
+        res
+      end
+    end
+  end
+end

data/lib/threadlimiter.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require "thread"
+require "threadlimiter/threadlimiter"
+require "threadlimiter/enumerable"

data/test/test.rb ADDED Viewed

@@ -0,0 +1,180 @@
+require "test/unit"
+require "threadlimiter"
+class ThreadLimiterTest < Test::Unit::TestCase
+  def go(limit)
+    input		= (1..100).collect{rand}
+    threadlimiter	= ThreadLimiter.new(limit)
+    threads =
+    input.collect do |m|
+      threadlimiter.fork(m) do |n|
+        Kernel.sleep 0.01
+        n
+      end
+    end
+    assert_equal([Thread], threads.collect{|t| t.class}.uniq)
+    output	= threads.collect{|t| t.value}
+    assert_equal(input.to_a	, output)
+    assert_equal(0		, threadlimiter.instance_eval{@running})
+    assert_equal(limit		, threadlimiter.instance_eval{@limit})
+  end
+  def test_with_limit
+    go(10)
+  end
+  def test_with_no_limit
+    go(-1)
+  end
+  def test_with_zero_limit
+    go(0)
+  end
+end
+class ThreadLimiterEnumerableTest < Test::Unit::TestCase
+  def test_with_empty_enumerables
+    assert_equal([], [].threaded_collect(10){2})
+    assert_equal([], [].threaded_collect(-1){2})
+    assert_equal([], [].threaded_collect(0){2})
+  end
+  def test_threaded_collect_with_no_arguments_with_no_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(-1){2}
+    should_be	= input.collect{2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_no_arguments_with_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(10){2}
+    should_be	= input.collect{2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_no_arguments_with_zero_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(0){2}
+    should_be	= input.collect{2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_one_argument_with_no_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(-1){|x| x * 2}
+    should_be	= input.collect{|x| x * 2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_one_argument_with_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(10){|x| x * 2}
+    should_be	= input.collect{|x| x * 2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_one_argument_with_zero_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(0){|x| x * 2}
+    should_be	= input.collect{|x| x * 2}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_two_arguments_with_no_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(-1){|x, y| x * y}
+    should_be	= input.collect{|x, y| x * y}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_two_arguments_with_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(10){|x, y| x * y}
+    should_be	= input.collect{|x, y| x * y}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_collect_with_two_arguments_with_zero_limit
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_collect(0){|x, y| x * y}
+    should_be	= input.collect{|x, y| x * y}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_select
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_select{|x, y| (x*y) % 2 == 0}
+    should_be	= input.select{|x, y| (x*y) % 2 == 0}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_reject
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_reject{|x, y| (x*y) % 2 == 0}
+    should_be	= input.reject{|x, y| (x*y) % 2 == 0}
+    assert_equal(should_be, output)
+  end
+  def test_threaded_each
+    input	= (1..100).zip(101..200)
+    output	= input.threaded_each{|x, y| (x*y) % 2 == 0}
+    assert_equal(input, output)
+  end
+  def test_clustered_threaded_collect_with_one_argument
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_collect(10){|x| x * 2}
+    should_be	= input.collect{|x| x * 2}
+    assert_equal(should_be, output)
+  end
+  def test_clustered_threaded_collect_with_two_arguments
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_collect(10){|x, y| x * y}
+    should_be	= input.collect{|x, y| x * y}
+    assert_equal(should_be, output)
+  end
+  def test_clustered_threaded_select
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_select(10){|x, y| (x*y) % 2 == 0}
+    should_be	= input.select{|x, y| (x*y) % 2 == 0}
+    assert_equal(should_be, output)
+  end
+  def test_clustered_threaded_reject
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_reject(10){|x, y| (x*y) % 2 == 0}
+    should_be	= input.reject{|x, y| (x*y) % 2 == 0}
+    assert_equal(should_be, output)
+  end
+  def test_clustered_threaded_each
+    input	= (1..100).zip(101..200)
+    output	= input.clustered_threaded_each(10){|x, y| (x*y) % 2 == 0}
+    assert_equal(input, output)
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,65 @@
+--- !ruby/object:Gem::Specification
+name: threadlimiter
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Erik Veenstra
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2008-07-12 00:00:00 +02:00
+default_executable:
+dependencies: []
+description: Fork threads like Thread.fork, but limit the number of concurrently running threads.
+email: threadlimiter@erikveen.dds.nl
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/threadlimiter
+- lib/threadlimiter/threadlimiter.rb
+- lib/threadlimiter/enumerable.rb
+- lib/threadlimiter.rb
+- README
+- LICENSE
+- VERSION
+has_rdoc: true
+homepage: http://www.erikveen.dds.nl/threadlimiter/index.html
+post_install_message:
+rdoc_options:
+- README
+- LICENSE
+- VERSION
+- --title
+- threadlimiter (0.1.0)
+- --main
+- README
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project: threadlimiter
+rubygems_version: 1.1.1
+signing_key:
+specification_version: 2
+summary: Fork threads like Thread.fork, but limit the number of concurrently running threads.
+test_files:
+- test/test.rb