RubyGems - threadz - Versions diffs - 1.1.0.rc2 → 1.1.0.rc3 - Mend

threadz 1.1.0.rc2 → 1.1.0.rc3

Files changed (11) hide show

data/Gemfile.lock +1 -1
data/README.rdoc +12 -6
data/lib/threadz/atomic_integer.rb +1 -1
data/lib/threadz/batch.rb +53 -38
data/lib/threadz/directive.rb +1 -1
data/lib/threadz/errors.rb +2 -7
data/lib/threadz/sleeper.rb +1 -1
data/lib/threadz/thread_pool.rb +29 -21
data/lib/threadz/version.rb +1 -1
data/spec/threadz_spec.rb +4 -4
metadata +4 -1

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    threadz (1.1.0.rc1)
+    threadz (1.1.0.rc3)
 GEM
   remote: http://rubygems.org/

data/README.rdoc CHANGED Viewed

@@ -42,14 +42,20 @@ This is a thread pool library that you can do two main things with, which I'll d
   b.wait_until_done(:timeout => 0.1)
   puts b.completed? ? "finished!" : "didn't finish"
-  # Exception handling: well-supported, see the specs though.  Much better examples.
+  # Error handling
+  b = T3.new_batch(:max_retries => 3)
+  b << lambda { raise }
+  b.wait_until_done
+  puts b.errors
-The thread pool is also smart -- depending on load, it can either spawn or cull additional threads (at a rate you can set).
+  b = T3.new_batch(:max_retries => 3, :error_handler => lambda { |error, control| puts "Error! #{error}" })
+  b << lambda { raise }
+  b.wait_until_done
-== Examples
+  # See the specs for more error handling stuff.  Much better examples.
-For examples, please see the well-documented specs.  They're all fairly simple and straightforward.  Please message me if they're not.
+The thread pool is also smart -- depending on load, it can either spawn or cull additional threads (at a rate you can set).
-== Disclaimer
+== Examples
-Consider this product in late alpha.  There are still some bugs to be worked out and the API may change.
+For examples, please see the well-documented specs.  They're all fairly simple and straightforward.  Please message me if you have issues that aren't answered by reading the spec.

data/lib/threadz/atomic_integer.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Threadz
 	# Provides a thread-safe integer counter thing.
 	# The code used in this file, while slightly verbose, is to optimize
 	# performance.  Avoiding additional method calls and blocks is preferred.
-	class AtomicInteger
+	class AtomicInteger # :nodoc:
 		def initialize(value)
 			@value = value
 			@mutex = Mutex.new

data/lib/threadz/batch.rb CHANGED Viewed

@@ -1,28 +1,41 @@
 ['atomic_integer', 'sleeper', 'errors'].each { |lib| require File.join(File.dirname(__FILE__), lib) }
 module Threadz
-  # A batch is a collection of jobs you care about that gets pushed off to
+  # A batch is a (typically related) collection of jobs that execute together on
   # the attached thread pool.  The calling thread can be signaled when the
   # batch has completed executing, or a block can be executed.
+  # The easiest way to create a batch is with the ThreadPool method ThreadPool#new_batch:
+  #  tp = Threadz::ThreadPool.new
+  #  tp.new_batch(args)
+  # The options to new_batch get passed to Batch#initialize.
   class Batch
     # Creates a new batch attached to the given threadpool.  A number of options
     # are available:
-    # +:latent+:: If latent, none of the jobs in the batch will actually start
-    #            executing until the +start+ method is called.
+    # :latent [false]:: If latent, none of the jobs in the batch will actually start
+    #                   executing until the #start method is called.
+    # :max_retries [0]:: Specifies the maximum number of times to automatically retry a failed
+    #                    job.  Defaults to 0.
+    # :error_handler [nil]:: Specifies the error handler to be invoked in the case of an error.
+    #                        It will be called like so: handler.call(error, control) where +error+ is the underlying error and
+    #                        +control+ is a Control for the job that had the error.
     def initialize(threadpool, opts={})
       @threadpool = threadpool
       @job_lock = Mutex.new
       @jobs_count = AtomicInteger.new(0)
-      @when_done_blocks = []
+      @when_done_callbacks = []
       @sleeper = ::Threadz::Sleeper.new
-      @error_lock = Mutex.new
+      @error_lock = Mutex.new # Locked whenever the list of errors is read or modified
       @job_errors = []
       @error_handler_errors = []
       @error_handler = opts[:error_handler]
       if @error_handler && !@error_handler.respond_to?(:call)
         raise ArgumentError.new("ErrorHandler must respond to #call")
       end
-      @max_retries = opts[:max_retries] || 3
+      @max_retries = opts[:max_retries] || 0
       @verbose = opts[:verbose]
       ## Options
@@ -37,13 +50,13 @@ module Threadz
       @job_queue = Queue.new if @latent
     end
-    # Add a new job to the batch.  If this is a latent batch, the job can't
+    # Add a new job to the batch.  If this is a latent batch, the job won't
     # be scheduled until the batch is #start'ed; otherwise it may start
     # immediately.  The job can be anything that responds to +call+ or an
     # array of objects that respond to +call+.
     def push(job)
       if job.is_a? Array
-        job.each {|j| self << j}
+        job.each { |j| self.push(j) }
       elsif job.respond_to? :call
         @jobs_count.increment
         if @latent && !@started
@@ -58,10 +71,9 @@ module Threadz
     alias << push
-    # Put the current thread to sleep until the batch is done processing.
-    # There are options available:
-    # +:timeout+:: If specified, will only wait for at least this many seconds
-    #              for the batch to finish.  Typically used with #completed?
+    # Blocks until the batch is done processing.
+    # +:timeout+ [nil]:: If specified, will only wait for this many seconds
+    #                    for the batch to finish.  Typically used with #completed?
     def wait_until_done(opts={})
       raise "Threadz: thread deadlocked because batch job was never started" if @latent && !@started
@@ -80,46 +92,51 @@ module Threadz
     # Returns the list of errors that occurred in the jobs
     def job_errors
-      arr = nil
-      @error_lock.synchronize { arr = @job_errors.dup }
-      arr
+      @error_lock.synchronize { @job_errors.dup }
     end
     # Returns the list of errors that occurred in the error handler
     def error_handler_errors
-      arr = nil
-      @error_lock.synchronize { arr = @error_handler_errors.dup }
-      arr
+      @error_lock.synchronize { @error_handler_errors.dup }
     end
-    # If this is a latent batch, start processing all of the jobs in the queue.
+    # If this is a +latent+ batch, start processing all of the jobs in the queue.
     def start
-      @job_lock.synchronize {  # in case another thread tries to push new jobs onto the queue while we're starting
-        if @latent
+      @job_lock.synchronize do  # in case another thread tries to push new jobs onto the queue while we're starting
+        if @latent && !@started
           @started = true
           until @job_queue.empty?
-            send_to_threadpool(@job_queue.pop)
+            job = @job_queue.pop
+            send_to_threadpool(job)
           end
-          return true
-        else
-          return false
         end
-      }
+      end
     end
     # Execute a given block when the batch has finished processing.  If the batch
     # has already finished executing, execute immediately.
     def when_done(&block)
-      @job_lock.synchronize { completed? ? block.call : @when_done_blocks << block }
+      call_block = false
+      @job_lock.synchronize do
+        if completed?
+          call_block = true
+        else
+          @when_done_callbacks << block
+        end
+      end
+      yield if call_block
     end
     private
     def handle_done
       @sleeper.broadcast
-      @when_done_blocks.each do |b|
-        b.call
+      callbacks = nil
+      @job_lock.synchronize do
+        callbacks = @when_done_callbacks.dup
+        @when_done_callbacks.clear
       end
-      @when_done_blocks = []
+      callbacks.each { |b|  b.call }
     end
     def send_to_threadpool(job)
@@ -144,14 +161,12 @@ module Threadz
             retry unless retries >= @max_retries
           end
         end
-        # Lock in case we get two threads at the "fork in the road" at the same time
-        # Note: locking here actually creates undesirable behavior.  Still investigating why,
-        # seems like it should be useful.
-        #@job_lock.lock
-        @jobs_count.decrement
-        # fork in the road
-        handle_done if completed?
-        #@job_lock.unlock
+        should_handle_done = false
+        @job_lock.synchronize do
+          @jobs_count.decrement
+          should_handle_done = completed?
+        end
+        handle_done if should_handle_done
       end
     end
   end

data/lib/threadz/directive.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Threadz
 	# Directives: Special instructions for threads that are communicated via the queue
-	class Directive
+	class Directive # :nodoc: all
 			# The thread that consumes this directive immediately dies
       SUICIDE_PILL = "__THREADZ_SUICIDE_PILL"
 	end

data/lib/threadz/errors.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 module Threadz
+  # Generic class that all Threadz errors are a subclass of.
   class ThreadzError < StandardError; end
+  # Thrown when a Job is the origin of an error.  The original set of errors are available in the #errors field.
   class JobError < ThreadzError
     attr_reader :errors
     def initialize(errors)
@@ -8,11 +10,4 @@ module Threadz
       @errors = errors
     end
   end
-  class ErrorHandlerError < ThreadzError
-    attr_reader :error
-    def initialize(error)
-      super("An error occurred in the error handler itself (see #error)")
-      @error = error
-    end
-  end
 end

data/lib/threadz/sleeper.rb CHANGED Viewed

@@ -2,7 +2,7 @@ require 'thread'
 require 'timeout'
 module Threadz
-	class Sleeper
+	class Sleeper # :nodoc: all
 		def initialize
 			@waiters = Queue.new
 		end

data/lib/threadz/thread_pool.rb CHANGED Viewed

@@ -16,20 +16,25 @@ module Threadz
     # Creates a new thread pool into which you can queue jobs.
     # There are a number of options:
-    # :initial_size:: The number of threads you start out with initially.  Also, the minimum number of threads.
-    #                 By default, this is 10.
-    # :maximum_size:: The highest number of threads that can be allocated.  By default, this is the minimum size x 5.
-    # :kill_threshold:: Constant that determines when new threads are needed or when threads can be killed off.
-    #                   If the internally tracked kill score falls to positive kill_threshold, then a thread is killed off and the
-    #                   kill score is reset.  If the kill score rises to negative kill_threshold, then a new thread
-    #                   is created and the kill score is reset.  Every 0.1 seconds, the state of all threads in the
-    #                   pool is checked.  If there is more than one idle thread (and we're above minimum size), the
-    #                   kill score is incremented by THREADS_IDLE_SCORE for each idle thread.  If there are no idle threads
-    #                   (and we're below maximum size) the kill score is decremented by THREADS_KILL_SCORE for each queued job.
-    #                   If the thread pool is being perfectly utilized (no queued work or idle workers), the kill score will decay
-    #                   and lose 10% of its value.
-    #                   In the default case of kill_threshold=10, if the thread pool is overworked for 10 consecutive checks (that is,
-    #                   1 second), a new thread will be created and the counter reset.  Similarly, if the thread pool is underutilized
+    # :initial_size [10]:: The number of threads you start out with initially.  Also, the minimum number of threads.
+    # :maximum_size [+initial_size+ * 5]:: The highest number of threads that can be allocated.
+    # :kill_threshold [10]::
+    #                   Constant that determines when new threads are needed or when threads can be killed off.
+    #                   To understand what this means, I'll briefly (ha) explain what's called the +killscore+, which is used to gauge
+    #                   utilization over time of the threadpool.  It's just a number, and it starts at 0.  It has a special relationship
+    #                   to the +kill_threshold+, which will now be explained.
+    #                   If the +killscore+ rises to positive +kill_threshold+, this indicates that the threadpool is *underutilized*,
+    #                   a thread is killed off (if we're over the minimum number of threads), and the +killscore+ is reset to 0.
+    #                   If the +killscore+ falls to negative kill_threshold, this indicates that the threadpool is *overutilized*,
+    #                   a new thread is created (if we're under the maximum number of threads), and the +killscore+ is reset to 0.
+    #
+    #                   Every 0.1 seconds, the state of all threads in the pool is checked.
+    #                   * If there is at least one idle thread (and we're above minimum size), the +killscore+ is incremented by THREADS_IDLE_SCORE for each idle thread.
+    #                   * If there are no idle threads (and we're below maximum size) the +killscore+ is decremented by THREADS_KILL_SCORE for each queued job.
+    #                   * If the thread pool is being perfectly utilized (no queued work or idle workers), the +killscore+ will decay by 10%.
+    #
+    #                   In the default case of kill_threshold=10, if the thread pool is overworked by one job for 10 consecutive checks (that is,
+    #                   1 second), a new thread will be created and the counter reset.  Similarly, if the thread pool is underutilized by one thread
     #                   for 10 consecutive checks, an idle thread will be culled.  If you want the thread pool to scale more quickly with
     #                   demand, try lowering the kill_threshold value.
     def initialize(opts={})
@@ -46,23 +51,24 @@ module Threadz
       spawn_watch_thread
     end
+    # Returns the number of worker threads this pool is currently managing.
     def thread_count
       @worker_threads_count.value
     end
     # Push a process onto the job queue for the thread pool to pick up.
     # Note that using this method, you can't keep track of when the job
-    # finishes.  If you care about when it finishes, use batches.
+    # finishes.  If you care about when it finishes, use a Batch (using #new_batch).
     def process(callback = nil, &block)
       callback ||= block
       @queue << Control.new(callback)
       nil
     end
-    # Return a new batch that's attached into this thread pool.  See Threadz::ThreadPool::Batch
-    # for documention on opts.
+    # Return a new batch that's attached into this thread pool.  See Batch#new
+    # for documention on +opts+.
     def new_batch(opts={})
-      return Batch.new(self, opts)
+      Batch.new(self, opts)
     end
     private
@@ -88,6 +94,8 @@ module Threadz
     end
     # Kill a thread after it completes its current job
+    # NOTE: Currently this doesn't really work because it pushes a "suicide pill" on the END of the list of jobs,
+    # due to a technical limitation with Ruby's standard Queue.
     def kill_thread
       # TODO: ideally this would be unshift, but Queues don't have that.  Come up with an alternative.
       @queue << Directive::SUICIDE_PILL
@@ -107,11 +115,11 @@ module Threadz
             @killscore -= THREADS_BUSY_SCORE * @queue.length
           else
-            # Decay,
-            if(@killscore != 0)
+            # Decay
+            if @killscore != 0 # documented
               @killscore *= 0.9
             end
-            if(@killscore.abs < 1)
+            if @killscore.abs < 1
               @killscore = 0
             end
           end

data/lib/threadz/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 module Threadz
-  VERSION = "1.1.0.rc2"
+  VERSION = "1.1.0.rc3"
 end

data/spec/threadz_spec.rb CHANGED Viewed

@@ -233,12 +233,12 @@ describe Threadz do
           b.wait_until_done
           error.should_not be_nil
         end
-        it "should retry up to 3 times by default" do
+        it "should not retry by default" do
           count = 0
           b = @T.new_batch(:error_handler => lambda { |e, ctrl| count += 1 })
           b << lambda { raise }
           b.wait_until_done
-          count.should == 3
+          count.should == 1
         end
         it "should retry up to the designated number of times" do
           count = 0
@@ -260,8 +260,8 @@ describe Threadz do
           b = @T.new_batch(:error_handler => lambda { |e, ctrl| raise })
           b << lambda { raise }
           b.wait_until_done
-          b.job_errors.length.should == 3
-          b.error_handler_errors.length.should == 3
+          b.job_errors.length.should == 1
+          b.error_handler_errors.length.should == 1
         end
         it "should allow you to respond to errors on a per-job basis" do
           job1 = lambda { 1 + 2 }

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: threadz
 version: !ruby/object:Gem::Version
-  version: 1.1.0.rc2
+  version: 1.1.0.rc3
   prerelease: 6
 platform: ruby
 authors:
@@ -69,6 +69,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
   - - ! '>='
     - !ruby/object:Gem::Version
       version: '0'
+      segments:
+      - 0
+      hash: 2507925423504722291
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements: