cabiri 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/cabiri.rb +28 -35
- metadata +7 -7
data/lib/cabiri.rb
CHANGED
@@ -11,6 +11,8 @@ module Cabiri
|
|
11
11
|
# - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
|
12
12
|
# wait_until_finished method. Both endpoints have sync set to true to prevent the
|
13
13
|
# kernel from buffering any messages.
|
14
|
+
# - mutex: a mutex that is used to treat the code that deals with extracting results from
|
15
|
+
# finished processes and spawning new processes as a critical section
|
14
16
|
# - logger: a logger to help log errors
|
15
17
|
def initialize
|
16
18
|
@remaining_jobs = []
|
@@ -24,6 +26,7 @@ module Cabiri
|
|
24
26
|
@self_pipe[0].sync = true
|
25
27
|
@self_pipe[1].sync = true
|
26
28
|
|
29
|
+
@mutex = Mutex.new
|
27
30
|
@logger = Logger.new($stdout)
|
28
31
|
end
|
29
32
|
|
@@ -53,22 +56,8 @@ module Cabiri
|
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
56
|
-
# here we start by creating a uid to index mapping
|
57
|
-
# job to the jobs_info array.
|
58
|
-
# Next we define a signal handler that deals with SIGCHLD signals
|
59
|
-
# (a signal that indicates that a child process has terminated). When we receive
|
60
|
-
# such a signal we get the pid and make sure that the child process was one of
|
61
|
-
# the jobs belonging to the job queue.
|
62
|
-
# This needs to be done inside a while loop as two or more child processes exiting
|
63
|
-
# in quick succession might only generate one signal. For example, the first dead
|
64
|
-
# child process will generate a SIGCHLD. However, when a second process dies quickly
|
65
|
-
# afterwards and the previous SIGCHLD signal has not yet been handled, this second
|
66
|
-
# process won't send a second SIGCHLD signal, but will instead assume that the
|
67
|
-
# SIGCHLD handler knows to look for multiple dead processes.
|
68
|
-
# You might also notice that old_handler is being used to redirect this signal to
|
69
|
-
# a possible other previously defined SIGCHLD signal handler.
|
70
|
-
# Also note that we close the write end of the self_pipe when there are no jobs left.
|
71
|
-
# See the comments on the wait_until_finished method for why this is important.
|
59
|
+
# here we start by creating a uid to index mapping and add an entry for each
|
60
|
+
# job to the jobs_info array. We then schedule the first batch of jobs.
|
72
61
|
def start(max_active_jobs)
|
73
62
|
# create job mappings and initialize job info
|
74
63
|
@remaining_jobs.each_with_index do |job, index|
|
@@ -79,23 +68,8 @@ module Cabiri
|
|
79
68
|
@jobs_info[index][:pid] = nil
|
80
69
|
@jobs_info[index][:pipe] = nil
|
81
70
|
@jobs_info[index][:error] = nil
|
82
|
-
@jobs_info[index][:state] = :waiting
|
83
71
|
@jobs_info[index][:result] = nil
|
84
|
-
|
85
|
-
|
86
|
-
# define signal handler
|
87
|
-
old_handler = trap(:CLD) do
|
88
|
-
begin
|
89
|
-
while pid = Process.wait(-1, Process::WNOHANG)
|
90
|
-
if(@active_jobs_pids.include?(pid))
|
91
|
-
handle_finished_job(pid)
|
92
|
-
fill_job_slots(max_active_jobs)
|
93
|
-
@self_pipe[1].close if finished?
|
94
|
-
end
|
95
|
-
old_handler.call if old_handler.respond_to?(:call)
|
96
|
-
end
|
97
|
-
rescue Errno::ECHILD
|
98
|
-
end
|
72
|
+
@jobs_info[index][:state] = :waiting
|
99
73
|
end
|
100
74
|
|
101
75
|
# start scheduling first batch of jobs
|
@@ -109,7 +83,7 @@ module Cabiri
|
|
109
83
|
def fill_job_slots(max_active_jobs)
|
110
84
|
while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
|
111
85
|
begin
|
112
|
-
start_next_job
|
86
|
+
start_next_job(max_active_jobs)
|
113
87
|
rescue => ex
|
114
88
|
handle_error(ex)
|
115
89
|
ensure
|
@@ -130,8 +104,18 @@ module Cabiri
|
|
130
104
|
# a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
|
131
105
|
# array into a special string designed for transporting binary data as text. This text can now be send
|
132
106
|
# through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
|
133
|
-
# pipe and set sync to true. The
|
134
|
-
|
107
|
+
# pipe and set sync to true. The next few lines hould require no comment.
|
108
|
+
# We finish by creating a thread that waits for the newly created job to end. This thread is responsible
|
109
|
+
# for extracting information from the finished job and spawning new jobs. Also note that we close the
|
110
|
+
# write end of the self_pipe when there are no jobs left. See the comments on the wait_until_finished
|
111
|
+
# method for why this is important.
|
112
|
+
# Notice how the inside of the thread is wrapped inside a mutex. This is required to prevent a race
|
113
|
+
# condition from occurring when two or more jobs return in quick succession. When the first job
|
114
|
+
# returns, its thread will start scheduling new processes, but this can take some time. If a second
|
115
|
+
# job returns before the thread of the first job is done scheduling, it will start doing scheduling
|
116
|
+
# work as well. So now you have two threads simultaneously doing scheduling work, and the end result
|
117
|
+
# will be unpredictable.
|
118
|
+
def start_next_job(max_active_jobs)
|
135
119
|
pipe = IO.pipe()
|
136
120
|
job = @remaining_jobs.first
|
137
121
|
|
@@ -151,6 +135,15 @@ module Cabiri
|
|
151
135
|
@jobs_info[index][:pid] = pid
|
152
136
|
@jobs_info[index][:pipe] = pipe
|
153
137
|
@jobs_info[index][:state] = :running
|
138
|
+
|
139
|
+
Thread.new(pid) do |my_pid|
|
140
|
+
Process.waitpid(my_pid)
|
141
|
+
@mutex.synchronize do
|
142
|
+
handle_finished_job(my_pid)
|
143
|
+
fill_job_slots(max_active_jobs)
|
144
|
+
@self_pipe[1].close if finished?
|
145
|
+
end
|
146
|
+
end
|
154
147
|
end
|
155
148
|
|
156
149
|
# when a job finishes, we remove its pid from the array that keeps track of active processes.
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cabiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-19 00:00:00.
|
12
|
+
date: 2012-05-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: adeona
|
16
|
-
requirement: &
|
16
|
+
requirement: &70339103396180 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,8 +21,8 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
25
|
-
description: An easy and intuitive Ruby job queue.
|
24
|
+
version_requirements: *70339103396180
|
25
|
+
description: An easy and intuitive Ruby job queue for working with parallel processes.
|
26
26
|
email: tomvaneyck@gmail.com
|
27
27
|
executables: []
|
28
28
|
extensions: []
|
@@ -49,8 +49,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
49
49
|
version: '0'
|
50
50
|
requirements: []
|
51
51
|
rubyforge_project:
|
52
|
-
rubygems_version: 1.8.
|
52
|
+
rubygems_version: 1.8.10
|
53
53
|
signing_key:
|
54
54
|
specification_version: 3
|
55
|
-
summary: An easy and intuitive Ruby job queue.
|
55
|
+
summary: An easy and intuitive Ruby job queue for working with parallel processes.
|
56
56
|
test_files: []
|