RubyGems - cabiri - Versions diffs - 0.0.1 → 0.0.2 - Mend

cabiri 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

data/lib/cabiri.rb +108 -11
metadata +4 -4

data/lib/cabiri.rb CHANGED Viewed

@@ -3,14 +3,23 @@ require 'logger'
 module Cabiri
   class JobQueue
-    # the only thing here that is not self evident is the use of self_pipe.
-    # This will be used by the wait_until_finished method to implement a
-    # blocking wait. More information can be found in the comments of that
-    # method.
+    # - remaining_jobs:   array that contains jobs that have yet to run
+    # - active_job_pids:  array that contains the pids of jobs that are currently running
+    # - jobs_info:        array that keeps track of the state of each job
+    # - pid_to_index:     hash that maps the pid of a job to an index in the jobs_info array
+    # - uid_to_index:     hash that maps the uid of a job to an index in the jobs_info array
+    # - self_pipe:        a pipe that is used by the main process to implement a blocking wait for the
+    #                     wait_until_finished method. Both endpoints have sync set to true to prevent the
+    #                     kernel from buffering any messages.
+    # - logger:           a logger to help log errors
     def initialize
       @remaining_jobs = []
       @active_jobs_pids = []
+      @jobs_info = []
+      @pid_to_index = {}
+      @uid_to_index = {}
       @self_pipe = IO.pipe()
       @self_pipe[0].sync = true
       @self_pipe[1].sync = true
@@ -18,10 +27,13 @@ module Cabiri
       @logger = Logger.new($stdout)
     end
+    # add a job to the remaining_jobs array
     def add(&block)
       @remaining_jobs << block
     end
+    # check if there is more work to be done. The work is finished if there are no jobs waiting to be run
+    # and there are no jobs currently being run.
     def finished?
       @remaining_jobs.empty? and @active_jobs_pids.empty?
     end
@@ -31,7 +43,7 @@ module Cabiri
     # the last job of the queue is finished, the start method will close the
     # write end of this pipe. This causes the kernel to notice that nothing can
     # write to the pipe anymore and thus the kernel sends an EOF down this pipe,
-    # which in turn causes IO.select to return.
+    # which in turn causes the blocking IO.select to return.
     # When IO.select returns we close the read end of the pipe, such that any
     # future calls to the wait_until_finished method can return immediately.
     def wait_until_finished
@@ -41,7 +53,9 @@ module Cabiri
       end
     end
-    # here we start by defining a signal handler that deals with SIGCHLD signals
+    # here we start by creating a uid to index mapping. We also add an entry for each
+    # job to the jobs_info array.
+    # Next we define a signal handler that deals with SIGCHLD signals
     # (a signal that indicates that a child process has terminated). When we receive
     # such a signal we get the pid and make sure that the child process was one of
     # the jobs belonging to the job queue.
@@ -54,13 +68,27 @@ module Cabiri
     # You might also notice that old_handler is being used to redirect this signal to
     # a possible other previously defined SIGCHLD signal handler.
     # Also note that we close the write end of the self_pipe when there are no jobs left.
-    # See the comments on the wait_until_finished method for more information on this.
+    # See the comments on the wait_until_finished method for why this is important.
     def start(max_active_jobs)
+      # create job mappings and initialize job info
+      @remaining_jobs.each_with_index do |job, index|
+        uid = job.to_s
+        @uid_to_index[uid] = index
+        @jobs_info[index] = {}
+        @jobs_info[index][:pid] = nil
+        @jobs_info[index][:pipe] = nil
+        @jobs_info[index][:error] = nil
+        @jobs_info[index][:state] = :waiting
+        @jobs_info[index][:result] = nil
+      end
+      # define signal handler
       old_handler = trap(:CLD) do
         begin
           while pid = Process.wait(-1, Process::WNOHANG)
             if(@active_jobs_pids.include?(pid))
-              @active_jobs_pids.delete(pid)
+              handle_finished_job(pid)
               fill_job_slots(max_active_jobs)
               @self_pipe[1].close if finished?
             end
@@ -70,6 +98,7 @@ module Cabiri
         end
       end
+      # start scheduling first batch of jobs
       fill_job_slots(max_active_jobs)
     end
@@ -80,13 +109,81 @@ module Cabiri
     def fill_job_slots(max_active_jobs)
       while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
         begin
-          @active_jobs_pids << Adeona.spawn_child(:detach => false) { @remaining_jobs[0].call }
+          start_next_job
         rescue => ex
-          @logger.warn(self.class.to_s) { "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs[0].to_s}. Exception info: #{ex.to_s}." }
+          handle_error(ex)
         ensure
-          @remaining_jobs.delete_at(0)
+          @remaining_jobs.shift
         end
       end
     end
+    # when starting a new job we first create a pipe. This pipe will be our mechanism to pass any
+    # data returned by the job process to the main process. Next, we create a job process by using
+    # the Adeona gem. The spawn_child method acts like fork(), but adds some extra protection to
+    # prevent orphaned processes. Inside this job process we close the read endpoint of the pipe and
+    # set sync to true for the write endpoint in order to prevent the kernel from buffering any messages.
+    # We continue by letting the job do its work and storing the result in a var called 'result'. The
+    # next step looks a bit weird. We mentioned that we want to use pipes to communicate data, but pipes
+    # weren't designed to transport data structures like arrays and hashes, instead they are meant for text.
+    # So we use a trick. We use Marshal.dump to convert our result (which could be an array, a number,
+    # a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
+    # array into a special string designed for transporting binary data as text. This text can now be send
+    # through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
+    # pipe and set sync to true. The rest of the code here should require no comments.
+    def start_next_job
+      pipe = IO.pipe()
+      job = @remaining_jobs.first
+      pid = Adeona.spawn_child(:detach => false) do
+        pipe[0].close
+        pipe[1].sync = true
+        result = job.call
+        pipe[1].puts [Marshal.dump(result)].pack("m")
+      end
+      pipe[1].close
+      pipe[0].sync = true
+      index = @uid_to_index[job.to_s]
+      @active_jobs_pids << pid
+      @pid_to_index[pid] = index
+      @jobs_info[index][:pid] = pid
+      @jobs_info[index][:pipe] = pipe
+      @jobs_info[index][:state] = :running
+    end
+    # when a job finishes, we remove its pid from the array that keeps track of active processes.
+    # Next we read the result that we sent over the pipe and then close the pipe's read endpoint.
+    # We take the received text data, turn it into a byte stream and then load this information
+    # in order to obtain the resulting data from the job.
+    def handle_finished_job(pid)
+      index = @pid_to_index[pid]
+      @active_jobs_pids.delete(pid)
+      pipe = @jobs_info[index][:pipe]
+      result = pipe[0].read
+      pipe[0].close
+      @jobs_info[index][:result] = Marshal.load(result.unpack("m")[0])
+      @jobs_info[index][:state] = :finished
+    end
+    # when there is an exception, we log the error and set the relevant fields in the jobs_info data
+    def handle_error(ex)
+      job = @remaining_jobs.first
+      index = @uid_to_index[job.to_s]
+      error = "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs.first.to_s}. Exception info: #{ex.to_s}."
+      @logger.warn(self.class.to_s) { error }
+      @jobs_info[index][:error] = error
+      @jobs_info[index][:state] = :error
+    end
+    # this allows users to query the state of their jobs
+    def get_info(index)
+      @jobs_info[index]
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: cabiri
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-05-01 00:00:00.000000000Z
+date: 2012-05-19 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: adeona
-  requirement: &70273552307120 !ruby/object:Gem::Requirement
+  requirement: &70302667643740 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *70273552307120
+  version_requirements: *70302667643740
 description: An easy and intuitive Ruby job queue.
 email: tomvaneyck@gmail.com
 executables: []