cabiri 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/cabiri.rb +108 -11
  2. metadata +4 -4
data/lib/cabiri.rb CHANGED
@@ -3,14 +3,23 @@ require 'logger'
3
3
 
4
4
  module Cabiri
5
5
  class JobQueue
6
- # the only thing here that is not self evident is the use of self_pipe.
7
- # This will be used by the wait_until_finished method to implement a
8
- # blocking wait. More information can be found in the comments of that
9
- # method.
6
+ # - remaining_jobs: array that contains jobs that have yet to run
7
+ # - active_job_pids: array that contains the pids of jobs that are currently running
8
+ # - jobs_info: array that keeps track of the state of each job
9
+ # - pid_to_index: hash that maps the pid of a job to an index in the jobs_info array
10
+ # - uid_to_index: hash that maps the uid of a job to an index in the jobs_info array
11
+ # - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
12
+ # wait_until_finished method. Both endpoints have sync set to true to prevent the
13
+ # kernel from buffering any messages.
14
+ # - logger: a logger to help log errors
10
15
  def initialize
11
16
  @remaining_jobs = []
12
17
  @active_jobs_pids = []
13
18
 
19
+ @jobs_info = []
20
+ @pid_to_index = {}
21
+ @uid_to_index = {}
22
+
14
23
  @self_pipe = IO.pipe()
15
24
  @self_pipe[0].sync = true
16
25
  @self_pipe[1].sync = true
@@ -18,10 +27,13 @@ module Cabiri
18
27
  @logger = Logger.new($stdout)
19
28
  end
20
29
 
30
+ # add a job to the remaining_jobs array
21
31
  def add(&block)
22
32
  @remaining_jobs << block
23
33
  end
24
34
 
35
+ # check if there is more work to be done. The work is finished if there are no jobs waiting to be run
36
+ # and there are no jobs currently being run.
25
37
  def finished?
26
38
  @remaining_jobs.empty? and @active_jobs_pids.empty?
27
39
  end
@@ -31,7 +43,7 @@ module Cabiri
31
43
  # the last job of the queue is finished, the start method will close the
32
44
  # write end of this pipe. This causes the kernel to notice that nothing can
33
45
  # write to the pipe anymore and thus the kernel sends an EOF down this pipe,
34
- # which in turn causes IO.select to return.
46
+ # which in turn causes the blocking IO.select to return.
35
47
  # When IO.select returns we close the read end of the pipe, such that any
36
48
  # future calls to the wait_until_finished method can return immediately.
37
49
  def wait_until_finished
@@ -41,7 +53,9 @@ module Cabiri
41
53
  end
42
54
  end
43
55
 
44
- # here we start by defining a signal handler that deals with SIGCHLD signals
56
+ # here we start by creating a uid to index mapping. We also add an entry for each
57
+ # job to the jobs_info array.
58
+ # Next we define a signal handler that deals with SIGCHLD signals
45
59
  # (a signal that indicates that a child process has terminated). When we receive
46
60
  # such a signal we get the pid and make sure that the child process was one of
47
61
  # the jobs belonging to the job queue.
@@ -54,13 +68,27 @@ module Cabiri
54
68
  # You might also notice that old_handler is being used to redirect this signal to
55
69
  # a possible other previously defined SIGCHLD signal handler.
56
70
  # Also note that we close the write end of the self_pipe when there are no jobs left.
57
- # See the comments on the wait_until_finished method for more information on this.
71
+ # See the comments on the wait_until_finished method for why this is important.
58
72
  def start(max_active_jobs)
73
+ # create job mappings and initialize job info
74
+ @remaining_jobs.each_with_index do |job, index|
75
+ uid = job.to_s
76
+ @uid_to_index[uid] = index
77
+
78
+ @jobs_info[index] = {}
79
+ @jobs_info[index][:pid] = nil
80
+ @jobs_info[index][:pipe] = nil
81
+ @jobs_info[index][:error] = nil
82
+ @jobs_info[index][:state] = :waiting
83
+ @jobs_info[index][:result] = nil
84
+ end
85
+
86
+ # define signal handler
59
87
  old_handler = trap(:CLD) do
60
88
  begin
61
89
  while pid = Process.wait(-1, Process::WNOHANG)
62
90
  if(@active_jobs_pids.include?(pid))
63
- @active_jobs_pids.delete(pid)
91
+ handle_finished_job(pid)
64
92
  fill_job_slots(max_active_jobs)
65
93
  @self_pipe[1].close if finished?
66
94
  end
@@ -70,6 +98,7 @@ module Cabiri
70
98
  end
71
99
  end
72
100
 
101
+ # start scheduling first batch of jobs
73
102
  fill_job_slots(max_active_jobs)
74
103
  end
75
104
 
@@ -80,13 +109,81 @@ module Cabiri
80
109
  def fill_job_slots(max_active_jobs)
81
110
  while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
82
111
  begin
83
- @active_jobs_pids << Adeona.spawn_child(:detach => false) { @remaining_jobs[0].call }
112
+ start_next_job
84
113
  rescue => ex
85
- @logger.warn(self.class.to_s) { "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs[0].to_s}. Exception info: #{ex.to_s}." }
114
+ handle_error(ex)
86
115
  ensure
87
- @remaining_jobs.delete_at(0)
116
+ @remaining_jobs.shift
88
117
  end
89
118
  end
90
119
  end
120
+
121
+ # when starting a new job we first create a pipe. This pipe will be our mechanism to pass any
122
+ # data returned by the job process to the main process. Next, we create a job process by using
123
+ # the Adeona gem. The spawn_child method acts like fork(), but adds some extra protection to
124
+ # prevent orphaned processes. Inside this job process we close the read endpoint of the pipe and
125
+ # set sync to true for the write endpoint in order to prevent the kernel from buffering any messages.
126
+ # We continue by letting the job do its work and storing the result in a var called 'result'. The
127
+ # next step looks a bit weird. We mentioned that we want to use pipes to communicate data, but pipes
128
+ # weren't designed to transport data structures like arrays and hashes, instead they are meant for text.
129
+ # So we use a trick. We use Marshal.dump to convert our result (which could be an array, a number,
130
+ # a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
131
+ # array into a special string designed for transporting binary data as text. This text can now be send
132
+ # through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
133
+ # pipe and set sync to true. The rest of the code here should require no comments.
134
+ def start_next_job
135
+ pipe = IO.pipe()
136
+ job = @remaining_jobs.first
137
+
138
+ pid = Adeona.spawn_child(:detach => false) do
139
+ pipe[0].close
140
+ pipe[1].sync = true
141
+ result = job.call
142
+ pipe[1].puts [Marshal.dump(result)].pack("m")
143
+ end
144
+ pipe[1].close
145
+ pipe[0].sync = true
146
+
147
+ index = @uid_to_index[job.to_s]
148
+ @active_jobs_pids << pid
149
+ @pid_to_index[pid] = index
150
+
151
+ @jobs_info[index][:pid] = pid
152
+ @jobs_info[index][:pipe] = pipe
153
+ @jobs_info[index][:state] = :running
154
+ end
155
+
156
+ # when a job finishes, we remove its pid from the array that keeps track of active processes.
157
+ # Next we read the result that we sent over the pipe and then close the pipe's read endpoint.
158
+ # We take the received text data, turn it into a byte stream and then load this information
159
+ # in order to obtain the resulting data from the job.
160
+ def handle_finished_job(pid)
161
+ index = @pid_to_index[pid]
162
+ @active_jobs_pids.delete(pid)
163
+
164
+ pipe = @jobs_info[index][:pipe]
165
+ result = pipe[0].read
166
+ pipe[0].close
167
+
168
+ @jobs_info[index][:result] = Marshal.load(result.unpack("m")[0])
169
+ @jobs_info[index][:state] = :finished
170
+ end
171
+
172
+ # when there is an exception, we log the error and set the relevant fields in the jobs_info data
173
+ def handle_error(ex)
174
+ job = @remaining_jobs.first
175
+ index = @uid_to_index[job.to_s]
176
+
177
+ error = "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs.first.to_s}. Exception info: #{ex.to_s}."
178
+ @logger.warn(self.class.to_s) { error }
179
+
180
+ @jobs_info[index][:error] = error
181
+ @jobs_info[index][:state] = :error
182
+ end
183
+
184
+ # this allows users to query the state of their jobs
185
+ def get_info(index)
186
+ @jobs_info[index]
187
+ end
91
188
  end
92
189
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cabiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-01 00:00:00.000000000Z
12
+ date: 2012-05-19 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: adeona
16
- requirement: &70273552307120 !ruby/object:Gem::Requirement
16
+ requirement: &70302667643740 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70273552307120
24
+ version_requirements: *70302667643740
25
25
  description: An easy and intuitive Ruby job queue.
26
26
  email: tomvaneyck@gmail.com
27
27
  executables: []