cabiri 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cabiri.rb +108 -11
- metadata +4 -4
data/lib/cabiri.rb
CHANGED
@@ -3,14 +3,23 @@ require 'logger'
|
|
3
3
|
|
4
4
|
module Cabiri
|
5
5
|
class JobQueue
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
6
|
+
# - remaining_jobs: array that contains jobs that have yet to run
|
7
|
+
# - active_job_pids: array that contains the pids of jobs that are currently running
|
8
|
+
# - jobs_info: array that keeps track of the state of each job
|
9
|
+
# - pid_to_index: hash that maps the pid of a job to an index in the jobs_info array
|
10
|
+
# - uid_to_index: hash that maps the uid of a job to an index in the jobs_info array
|
11
|
+
# - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
|
12
|
+
# wait_until_finished method. Both endpoints have sync set to true to prevent the
|
13
|
+
# kernel from buffering any messages.
|
14
|
+
# - logger: a logger to help log errors
|
10
15
|
def initialize
|
11
16
|
@remaining_jobs = []
|
12
17
|
@active_jobs_pids = []
|
13
18
|
|
19
|
+
@jobs_info = []
|
20
|
+
@pid_to_index = {}
|
21
|
+
@uid_to_index = {}
|
22
|
+
|
14
23
|
@self_pipe = IO.pipe()
|
15
24
|
@self_pipe[0].sync = true
|
16
25
|
@self_pipe[1].sync = true
|
@@ -18,10 +27,13 @@ module Cabiri
|
|
18
27
|
@logger = Logger.new($stdout)
|
19
28
|
end
|
20
29
|
|
30
|
+
# add a job to the remaining_jobs array
|
21
31
|
def add(&block)
|
22
32
|
@remaining_jobs << block
|
23
33
|
end
|
24
34
|
|
35
|
+
# check if there is more work to be done. The work is finished if there are no jobs waiting to be run
|
36
|
+
# and there are no jobs currently being run.
|
25
37
|
def finished?
|
26
38
|
@remaining_jobs.empty? and @active_jobs_pids.empty?
|
27
39
|
end
|
@@ -31,7 +43,7 @@ module Cabiri
|
|
31
43
|
# the last job of the queue is finished, the start method will close the
|
32
44
|
# write end of this pipe. This causes the kernel to notice that nothing can
|
33
45
|
# write to the pipe anymore and thus the kernel sends an EOF down this pipe,
|
34
|
-
# which in turn causes IO.select to return.
|
46
|
+
# which in turn causes the blocking IO.select to return.
|
35
47
|
# When IO.select returns we close the read end of the pipe, such that any
|
36
48
|
# future calls to the wait_until_finished method can return immediately.
|
37
49
|
def wait_until_finished
|
@@ -41,7 +53,9 @@ module Cabiri
|
|
41
53
|
end
|
42
54
|
end
|
43
55
|
|
44
|
-
# here we start by
|
56
|
+
# here we start by creating a uid to index mapping. We also add an entry for each
|
57
|
+
# job to the jobs_info array.
|
58
|
+
# Next we define a signal handler that deals with SIGCHLD signals
|
45
59
|
# (a signal that indicates that a child process has terminated). When we receive
|
46
60
|
# such a signal we get the pid and make sure that the child process was one of
|
47
61
|
# the jobs belonging to the job queue.
|
@@ -54,13 +68,27 @@ module Cabiri
|
|
54
68
|
# You might also notice that old_handler is being used to redirect this signal to
|
55
69
|
# a possible other previously defined SIGCHLD signal handler.
|
56
70
|
# Also note that we close the write end of the self_pipe when there are no jobs left.
|
57
|
-
# See the comments on the wait_until_finished method for
|
71
|
+
# See the comments on the wait_until_finished method for why this is important.
|
58
72
|
def start(max_active_jobs)
|
73
|
+
# create job mappings and initialize job info
|
74
|
+
@remaining_jobs.each_with_index do |job, index|
|
75
|
+
uid = job.to_s
|
76
|
+
@uid_to_index[uid] = index
|
77
|
+
|
78
|
+
@jobs_info[index] = {}
|
79
|
+
@jobs_info[index][:pid] = nil
|
80
|
+
@jobs_info[index][:pipe] = nil
|
81
|
+
@jobs_info[index][:error] = nil
|
82
|
+
@jobs_info[index][:state] = :waiting
|
83
|
+
@jobs_info[index][:result] = nil
|
84
|
+
end
|
85
|
+
|
86
|
+
# define signal handler
|
59
87
|
old_handler = trap(:CLD) do
|
60
88
|
begin
|
61
89
|
while pid = Process.wait(-1, Process::WNOHANG)
|
62
90
|
if(@active_jobs_pids.include?(pid))
|
63
|
-
|
91
|
+
handle_finished_job(pid)
|
64
92
|
fill_job_slots(max_active_jobs)
|
65
93
|
@self_pipe[1].close if finished?
|
66
94
|
end
|
@@ -70,6 +98,7 @@ module Cabiri
|
|
70
98
|
end
|
71
99
|
end
|
72
100
|
|
101
|
+
# start scheduling first batch of jobs
|
73
102
|
fill_job_slots(max_active_jobs)
|
74
103
|
end
|
75
104
|
|
@@ -80,13 +109,81 @@ module Cabiri
|
|
80
109
|
def fill_job_slots(max_active_jobs)
|
81
110
|
while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
|
82
111
|
begin
|
83
|
-
|
112
|
+
start_next_job
|
84
113
|
rescue => ex
|
85
|
-
|
114
|
+
handle_error(ex)
|
86
115
|
ensure
|
87
|
-
@remaining_jobs.
|
116
|
+
@remaining_jobs.shift
|
88
117
|
end
|
89
118
|
end
|
90
119
|
end
|
120
|
+
|
121
|
+
# when starting a new job we first create a pipe. This pipe will be our mechanism to pass any
|
122
|
+
# data returned by the job process to the main process. Next, we create a job process by using
|
123
|
+
# the Adeona gem. The spawn_child method acts like fork(), but adds some extra protection to
|
124
|
+
# prevent orphaned processes. Inside this job process we close the read endpoint of the pipe and
|
125
|
+
# set sync to true for the write endpoint in order to prevent the kernel from buffering any messages.
|
126
|
+
# We continue by letting the job do its work and storing the result in a var called 'result'. The
|
127
|
+
# next step looks a bit weird. We mentioned that we want to use pipes to communicate data, but pipes
|
128
|
+
# weren't designed to transport data structures like arrays and hashes, instead they are meant for text.
|
129
|
+
# So we use a trick. We use Marshal.dump to convert our result (which could be an array, a number,
|
130
|
+
# a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
|
131
|
+
# array into a special string designed for transporting binary data as text. This text can now be send
|
132
|
+
# through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
|
133
|
+
# pipe and set sync to true. The rest of the code here should require no comments.
|
134
|
+
def start_next_job
|
135
|
+
pipe = IO.pipe()
|
136
|
+
job = @remaining_jobs.first
|
137
|
+
|
138
|
+
pid = Adeona.spawn_child(:detach => false) do
|
139
|
+
pipe[0].close
|
140
|
+
pipe[1].sync = true
|
141
|
+
result = job.call
|
142
|
+
pipe[1].puts [Marshal.dump(result)].pack("m")
|
143
|
+
end
|
144
|
+
pipe[1].close
|
145
|
+
pipe[0].sync = true
|
146
|
+
|
147
|
+
index = @uid_to_index[job.to_s]
|
148
|
+
@active_jobs_pids << pid
|
149
|
+
@pid_to_index[pid] = index
|
150
|
+
|
151
|
+
@jobs_info[index][:pid] = pid
|
152
|
+
@jobs_info[index][:pipe] = pipe
|
153
|
+
@jobs_info[index][:state] = :running
|
154
|
+
end
|
155
|
+
|
156
|
+
# when a job finishes, we remove its pid from the array that keeps track of active processes.
|
157
|
+
# Next we read the result that we sent over the pipe and then close the pipe's read endpoint.
|
158
|
+
# We take the received text data, turn it into a byte stream and then load this information
|
159
|
+
# in order to obtain the resulting data from the job.
|
160
|
+
def handle_finished_job(pid)
|
161
|
+
index = @pid_to_index[pid]
|
162
|
+
@active_jobs_pids.delete(pid)
|
163
|
+
|
164
|
+
pipe = @jobs_info[index][:pipe]
|
165
|
+
result = pipe[0].read
|
166
|
+
pipe[0].close
|
167
|
+
|
168
|
+
@jobs_info[index][:result] = Marshal.load(result.unpack("m")[0])
|
169
|
+
@jobs_info[index][:state] = :finished
|
170
|
+
end
|
171
|
+
|
172
|
+
# when there is an exception, we log the error and set the relevant fields in the jobs_info data
|
173
|
+
def handle_error(ex)
|
174
|
+
job = @remaining_jobs.first
|
175
|
+
index = @uid_to_index[job.to_s]
|
176
|
+
|
177
|
+
error = "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs.first.to_s}. Exception info: #{ex.to_s}."
|
178
|
+
@logger.warn(self.class.to_s) { error }
|
179
|
+
|
180
|
+
@jobs_info[index][:error] = error
|
181
|
+
@jobs_info[index][:state] = :error
|
182
|
+
end
|
183
|
+
|
184
|
+
# this allows users to query the state of their jobs
|
185
|
+
def get_info(index)
|
186
|
+
@jobs_info[index]
|
187
|
+
end
|
91
188
|
end
|
92
189
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cabiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-
|
12
|
+
date: 2012-05-19 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: adeona
|
16
|
-
requirement: &
|
16
|
+
requirement: &70302667643740 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70302667643740
|
25
25
|
description: An easy and intuitive Ruby job queue.
|
26
26
|
email: tomvaneyck@gmail.com
|
27
27
|
executables: []
|