cabiri 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/cabiri.rb +108 -11
  2. metadata +4 -4
data/lib/cabiri.rb CHANGED
@@ -3,14 +3,23 @@ require 'logger'
3
3
 
4
4
  module Cabiri
5
5
  class JobQueue
6
- # the only thing here that is not self evident is the use of self_pipe.
7
- # This will be used by the wait_until_finished method to implement a
8
- # blocking wait. More information can be found in the comments of that
9
- # method.
6
+ # - remaining_jobs: array that contains jobs that have yet to run
7
+ # - active_job_pids: array that contains the pids of jobs that are currently running
8
+ # - jobs_info: array that keeps track of the state of each job
9
+ # - pid_to_index: hash that maps the pid of a job to an index in the jobs_info array
10
+ # - uid_to_index: hash that maps the uid of a job to an index in the jobs_info array
11
+ # - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
12
+ # wait_until_finished method. Both endpoints have sync set to true to prevent the
13
+ # kernel from buffering any messages.
14
+ # - logger: a logger to help log errors
10
15
  def initialize
11
16
  @remaining_jobs = []
12
17
  @active_jobs_pids = []
13
18
 
19
+ @jobs_info = []
20
+ @pid_to_index = {}
21
+ @uid_to_index = {}
22
+
14
23
  @self_pipe = IO.pipe()
15
24
  @self_pipe[0].sync = true
16
25
  @self_pipe[1].sync = true
@@ -18,10 +27,13 @@ module Cabiri
18
27
  @logger = Logger.new($stdout)
19
28
  end
20
29
 
30
+ # add a job to the remaining_jobs array
21
31
  def add(&block)
22
32
  @remaining_jobs << block
23
33
  end
24
34
 
35
+ # check if there is more work to be done. The work is finished if there are no jobs waiting to be run
36
+ # and there are no jobs currently being run.
25
37
  def finished?
26
38
  @remaining_jobs.empty? and @active_jobs_pids.empty?
27
39
  end
@@ -31,7 +43,7 @@ module Cabiri
31
43
  # the last job of the queue is finished, the start method will close the
32
44
  # write end of this pipe. This causes the kernel to notice that nothing can
33
45
  # write to the pipe anymore and thus the kernel sends an EOF down this pipe,
34
- # which in turn causes IO.select to return.
46
+ # which in turn causes the blocking IO.select to return.
35
47
  # When IO.select returns we close the read end of the pipe, such that any
36
48
  # future calls to the wait_until_finished method can return immediately.
37
49
  def wait_until_finished
@@ -41,7 +53,9 @@ module Cabiri
41
53
  end
42
54
  end
43
55
 
44
- # here we start by defining a signal handler that deals with SIGCHLD signals
56
+ # here we start by creating a uid to index mapping. We also add an entry for each
57
+ # job to the jobs_info array.
58
+ # Next we define a signal handler that deals with SIGCHLD signals
45
59
  # (a signal that indicates that a child process has terminated). When we receive
46
60
  # such a signal we get the pid and make sure that the child process was one of
47
61
  # the jobs belonging to the job queue.
@@ -54,13 +68,27 @@ module Cabiri
54
68
  # You might also notice that old_handler is being used to redirect this signal to
55
69
  # a possible other previously defined SIGCHLD signal handler.
56
70
  # Also note that we close the write end of the self_pipe when there are no jobs left.
57
- # See the comments on the wait_until_finished method for more information on this.
71
+ # See the comments on the wait_until_finished method for why this is important.
58
72
  def start(max_active_jobs)
73
+ # create job mappings and initialize job info
74
+ @remaining_jobs.each_with_index do |job, index|
75
+ uid = job.to_s
76
+ @uid_to_index[uid] = index
77
+
78
+ @jobs_info[index] = {}
79
+ @jobs_info[index][:pid] = nil
80
+ @jobs_info[index][:pipe] = nil
81
+ @jobs_info[index][:error] = nil
82
+ @jobs_info[index][:state] = :waiting
83
+ @jobs_info[index][:result] = nil
84
+ end
85
+
86
+ # define signal handler
59
87
  old_handler = trap(:CLD) do
60
88
  begin
61
89
  while pid = Process.wait(-1, Process::WNOHANG)
62
90
  if(@active_jobs_pids.include?(pid))
63
- @active_jobs_pids.delete(pid)
91
+ handle_finished_job(pid)
64
92
  fill_job_slots(max_active_jobs)
65
93
  @self_pipe[1].close if finished?
66
94
  end
@@ -70,6 +98,7 @@ module Cabiri
70
98
  end
71
99
  end
72
100
 
101
+ # start scheduling first batch of jobs
73
102
  fill_job_slots(max_active_jobs)
74
103
  end
75
104
 
@@ -80,13 +109,81 @@ module Cabiri
80
109
  def fill_job_slots(max_active_jobs)
81
110
  while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
82
111
  begin
83
- @active_jobs_pids << Adeona.spawn_child(:detach => false) { @remaining_jobs[0].call }
112
+ start_next_job
84
113
  rescue => ex
85
- @logger.warn(self.class.to_s) { "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs[0].to_s}. Exception info: #{ex.to_s}." }
114
+ handle_error(ex)
86
115
  ensure
87
- @remaining_jobs.delete_at(0)
116
+ @remaining_jobs.shift
88
117
  end
89
118
  end
90
119
  end
120
+
121
+ # when starting a new job we first create a pipe. This pipe will be our mechanism to pass any
122
+ # data returned by the job process to the main process. Next, we create a job process by using
123
+ # the Adeona gem. The spawn_child method acts like fork(), but adds some extra protection to
124
+ # prevent orphaned processes. Inside this job process we close the read endpoint of the pipe and
125
+ # set sync to true for the write endpoint in order to prevent the kernel from buffering any messages.
126
+ # We continue by letting the job do its work and storing the result in a var called 'result'. The
127
+ # next step looks a bit weird. We mentioned that we want to use pipes to communicate data, but pipes
128
+ # weren't designed to transport data structures like arrays and hashes, instead they are meant for text.
129
+ # So we use a trick. We use Marshal.dump to convert our result (which could be an array, a number,
130
+ # a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
131
+ # array into a special string designed for transporting binary data as text. This text can now be send
132
+ # through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
133
+ # pipe and set sync to true. The rest of the code here should require no comments.
134
+ def start_next_job
135
+ pipe = IO.pipe()
136
+ job = @remaining_jobs.first
137
+
138
+ pid = Adeona.spawn_child(:detach => false) do
139
+ pipe[0].close
140
+ pipe[1].sync = true
141
+ result = job.call
142
+ pipe[1].puts [Marshal.dump(result)].pack("m")
143
+ end
144
+ pipe[1].close
145
+ pipe[0].sync = true
146
+
147
+ index = @uid_to_index[job.to_s]
148
+ @active_jobs_pids << pid
149
+ @pid_to_index[pid] = index
150
+
151
+ @jobs_info[index][:pid] = pid
152
+ @jobs_info[index][:pipe] = pipe
153
+ @jobs_info[index][:state] = :running
154
+ end
155
+
156
+ # when a job finishes, we remove its pid from the array that keeps track of active processes.
157
+ # Next we read the result that we sent over the pipe and then close the pipe's read endpoint.
158
+ # We take the received text data, turn it into a byte stream and then load this information
159
+ # in order to obtain the resulting data from the job.
160
+ def handle_finished_job(pid)
161
+ index = @pid_to_index[pid]
162
+ @active_jobs_pids.delete(pid)
163
+
164
+ pipe = @jobs_info[index][:pipe]
165
+ result = pipe[0].read
166
+ pipe[0].close
167
+
168
+ @jobs_info[index][:result] = Marshal.load(result.unpack("m")[0])
169
+ @jobs_info[index][:state] = :finished
170
+ end
171
+
172
+ # when there is an exception, we log the error and set the relevant fields in the jobs_info data
173
+ def handle_error(ex)
174
+ job = @remaining_jobs.first
175
+ index = @uid_to_index[job.to_s]
176
+
177
+ error = "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs.first.to_s}. Exception info: #{ex.to_s}."
178
+ @logger.warn(self.class.to_s) { error }
179
+
180
+ @jobs_info[index][:error] = error
181
+ @jobs_info[index][:state] = :error
182
+ end
183
+
184
+ # this allows users to query the state of their jobs
185
+ def get_info(index)
186
+ @jobs_info[index]
187
+ end
91
188
  end
92
189
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cabiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-01 00:00:00.000000000Z
12
+ date: 2012-05-19 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: adeona
16
- requirement: &70273552307120 !ruby/object:Gem::Requirement
16
+ requirement: &70302667643740 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70273552307120
24
+ version_requirements: *70302667643740
25
25
  description: An easy and intuitive Ruby job queue.
26
26
  email: tomvaneyck@gmail.com
27
27
  executables: []