cabiri 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/cabiri.rb +108 -11
- metadata +4 -4
data/lib/cabiri.rb
CHANGED
@@ -3,14 +3,23 @@ require 'logger'
|
|
3
3
|
|
4
4
|
module Cabiri
|
5
5
|
class JobQueue
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
6
|
+
# - remaining_jobs: array that contains jobs that have yet to run
|
7
|
+
# - active_job_pids: array that contains the pids of jobs that are currently running
|
8
|
+
# - jobs_info: array that keeps track of the state of each job
|
9
|
+
# - pid_to_index: hash that maps the pid of a job to an index in the jobs_info array
|
10
|
+
# - uid_to_index: hash that maps the uid of a job to an index in the jobs_info array
|
11
|
+
# - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
|
12
|
+
# wait_until_finished method. Both endpoints have sync set to true to prevent the
|
13
|
+
# kernel from buffering any messages.
|
14
|
+
# - logger: a logger to help log errors
|
10
15
|
def initialize
|
11
16
|
@remaining_jobs = []
|
12
17
|
@active_jobs_pids = []
|
13
18
|
|
19
|
+
@jobs_info = []
|
20
|
+
@pid_to_index = {}
|
21
|
+
@uid_to_index = {}
|
22
|
+
|
14
23
|
@self_pipe = IO.pipe()
|
15
24
|
@self_pipe[0].sync = true
|
16
25
|
@self_pipe[1].sync = true
|
@@ -18,10 +27,13 @@ module Cabiri
|
|
18
27
|
@logger = Logger.new($stdout)
|
19
28
|
end
|
20
29
|
|
30
|
+
# add a job to the remaining_jobs array
|
21
31
|
def add(&block)
|
22
32
|
@remaining_jobs << block
|
23
33
|
end
|
24
34
|
|
35
|
+
# check if there is more work to be done. The work is finished if there are no jobs waiting to be run
|
36
|
+
# and there are no jobs currently being run.
|
25
37
|
def finished?
|
26
38
|
@remaining_jobs.empty? and @active_jobs_pids.empty?
|
27
39
|
end
|
@@ -31,7 +43,7 @@ module Cabiri
|
|
31
43
|
# the last job of the queue is finished, the start method will close the
|
32
44
|
# write end of this pipe. This causes the kernel to notice that nothing can
|
33
45
|
# write to the pipe anymore and thus the kernel sends an EOF down this pipe,
|
34
|
-
# which in turn causes IO.select to return.
|
46
|
+
# which in turn causes the blocking IO.select to return.
|
35
47
|
# When IO.select returns we close the read end of the pipe, such that any
|
36
48
|
# future calls to the wait_until_finished method can return immediately.
|
37
49
|
def wait_until_finished
|
@@ -41,7 +53,9 @@ module Cabiri
|
|
41
53
|
end
|
42
54
|
end
|
43
55
|
|
44
|
-
# here we start by
|
56
|
+
# here we start by creating a uid to index mapping. We also add an entry for each
|
57
|
+
# job to the jobs_info array.
|
58
|
+
# Next we define a signal handler that deals with SIGCHLD signals
|
45
59
|
# (a signal that indicates that a child process has terminated). When we receive
|
46
60
|
# such a signal we get the pid and make sure that the child process was one of
|
47
61
|
# the jobs belonging to the job queue.
|
@@ -54,13 +68,27 @@ module Cabiri
|
|
54
68
|
# You might also notice that old_handler is being used to redirect this signal to
|
55
69
|
# a possible other previously defined SIGCHLD signal handler.
|
56
70
|
# Also note that we close the write end of the self_pipe when there are no jobs left.
|
57
|
-
# See the comments on the wait_until_finished method for
|
71
|
+
# See the comments on the wait_until_finished method for why this is important.
|
58
72
|
def start(max_active_jobs)
|
73
|
+
# create job mappings and initialize job info
|
74
|
+
@remaining_jobs.each_with_index do |job, index|
|
75
|
+
uid = job.to_s
|
76
|
+
@uid_to_index[uid] = index
|
77
|
+
|
78
|
+
@jobs_info[index] = {}
|
79
|
+
@jobs_info[index][:pid] = nil
|
80
|
+
@jobs_info[index][:pipe] = nil
|
81
|
+
@jobs_info[index][:error] = nil
|
82
|
+
@jobs_info[index][:state] = :waiting
|
83
|
+
@jobs_info[index][:result] = nil
|
84
|
+
end
|
85
|
+
|
86
|
+
# define signal handler
|
59
87
|
old_handler = trap(:CLD) do
|
60
88
|
begin
|
61
89
|
while pid = Process.wait(-1, Process::WNOHANG)
|
62
90
|
if(@active_jobs_pids.include?(pid))
|
63
|
-
|
91
|
+
handle_finished_job(pid)
|
64
92
|
fill_job_slots(max_active_jobs)
|
65
93
|
@self_pipe[1].close if finished?
|
66
94
|
end
|
@@ -70,6 +98,7 @@ module Cabiri
|
|
70
98
|
end
|
71
99
|
end
|
72
100
|
|
101
|
+
# start scheduling first batch of jobs
|
73
102
|
fill_job_slots(max_active_jobs)
|
74
103
|
end
|
75
104
|
|
@@ -80,13 +109,81 @@ module Cabiri
|
|
80
109
|
def fill_job_slots(max_active_jobs)
|
81
110
|
while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
|
82
111
|
begin
|
83
|
-
|
112
|
+
start_next_job
|
84
113
|
rescue => ex
|
85
|
-
|
114
|
+
handle_error(ex)
|
86
115
|
ensure
|
87
|
-
@remaining_jobs.
|
116
|
+
@remaining_jobs.shift
|
88
117
|
end
|
89
118
|
end
|
90
119
|
end
|
120
|
+
|
121
|
+
# when starting a new job we first create a pipe. This pipe will be our mechanism to pass any
|
122
|
+
# data returned by the job process to the main process. Next, we create a job process by using
|
123
|
+
# the Adeona gem. The spawn_child method acts like fork(), but adds some extra protection to
|
124
|
+
# prevent orphaned processes. Inside this job process we close the read endpoint of the pipe and
|
125
|
+
# set sync to true for the write endpoint in order to prevent the kernel from buffering any messages.
|
126
|
+
# We continue by letting the job do its work and storing the result in a var called 'result'. The
|
127
|
+
# next step looks a bit weird. We mentioned that we want to use pipes to communicate data, but pipes
|
128
|
+
# weren't designed to transport data structures like arrays and hashes, instead they are meant for text.
|
129
|
+
# So we use a trick. We use Marshal.dump to convert our result (which could be an array, a number,
|
130
|
+
# a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
|
131
|
+
# array into a special string designed for transporting binary data as text. This text can now be send
|
132
|
+
# through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
|
133
|
+
# pipe and set sync to true. The rest of the code here should require no comments.
|
134
|
+
def start_next_job
|
135
|
+
pipe = IO.pipe()
|
136
|
+
job = @remaining_jobs.first
|
137
|
+
|
138
|
+
pid = Adeona.spawn_child(:detach => false) do
|
139
|
+
pipe[0].close
|
140
|
+
pipe[1].sync = true
|
141
|
+
result = job.call
|
142
|
+
pipe[1].puts [Marshal.dump(result)].pack("m")
|
143
|
+
end
|
144
|
+
pipe[1].close
|
145
|
+
pipe[0].sync = true
|
146
|
+
|
147
|
+
index = @uid_to_index[job.to_s]
|
148
|
+
@active_jobs_pids << pid
|
149
|
+
@pid_to_index[pid] = index
|
150
|
+
|
151
|
+
@jobs_info[index][:pid] = pid
|
152
|
+
@jobs_info[index][:pipe] = pipe
|
153
|
+
@jobs_info[index][:state] = :running
|
154
|
+
end
|
155
|
+
|
156
|
+
# when a job finishes, we remove its pid from the array that keeps track of active processes.
|
157
|
+
# Next we read the result that we sent over the pipe and then close the pipe's read endpoint.
|
158
|
+
# We take the received text data, turn it into a byte stream and then load this information
|
159
|
+
# in order to obtain the resulting data from the job.
|
160
|
+
def handle_finished_job(pid)
|
161
|
+
index = @pid_to_index[pid]
|
162
|
+
@active_jobs_pids.delete(pid)
|
163
|
+
|
164
|
+
pipe = @jobs_info[index][:pipe]
|
165
|
+
result = pipe[0].read
|
166
|
+
pipe[0].close
|
167
|
+
|
168
|
+
@jobs_info[index][:result] = Marshal.load(result.unpack("m")[0])
|
169
|
+
@jobs_info[index][:state] = :finished
|
170
|
+
end
|
171
|
+
|
172
|
+
# when there is an exception, we log the error and set the relevant fields in the jobs_info data
|
173
|
+
def handle_error(ex)
|
174
|
+
job = @remaining_jobs.first
|
175
|
+
index = @uid_to_index[job.to_s]
|
176
|
+
|
177
|
+
error = "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs.first.to_s}. Exception info: #{ex.to_s}."
|
178
|
+
@logger.warn(self.class.to_s) { error }
|
179
|
+
|
180
|
+
@jobs_info[index][:error] = error
|
181
|
+
@jobs_info[index][:state] = :error
|
182
|
+
end
|
183
|
+
|
184
|
+
# this allows users to query the state of their jobs
|
185
|
+
def get_info(index)
|
186
|
+
@jobs_info[index]
|
187
|
+
end
|
91
188
|
end
|
92
189
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cabiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-
|
12
|
+
date: 2012-05-19 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: adeona
|
16
|
-
requirement: &
|
16
|
+
requirement: &70302667643740 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70302667643740
|
25
25
|
description: An easy and intuitive Ruby job queue.
|
26
26
|
email: tomvaneyck@gmail.com
|
27
27
|
executables: []
|