cabiri 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cabiri.rb +28 -35
- metadata +7 -7
data/lib/cabiri.rb
CHANGED
@@ -11,6 +11,8 @@ module Cabiri
|
|
11
11
|
# - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
|
12
12
|
# wait_until_finished method. Both endpoints have sync set to true to prevent the
|
13
13
|
# kernel from buffering any messages.
|
14
|
+
# - mutex: a mutex that is used to treat the code that deals with extracting results from
|
15
|
+
# finished processes and spawning new processes as a critical section
|
14
16
|
# - logger: a logger to help log errors
|
15
17
|
def initialize
|
16
18
|
@remaining_jobs = []
|
@@ -24,6 +26,7 @@ module Cabiri
|
|
24
26
|
@self_pipe[0].sync = true
|
25
27
|
@self_pipe[1].sync = true
|
26
28
|
|
29
|
+
@mutex = Mutex.new
|
27
30
|
@logger = Logger.new($stdout)
|
28
31
|
end
|
29
32
|
|
@@ -53,22 +56,8 @@ module Cabiri
|
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
56
|
-
# here we start by creating a uid to index mapping
|
57
|
-
# job to the jobs_info array.
|
58
|
-
# Next we define a signal handler that deals with SIGCHLD signals
|
59
|
-
# (a signal that indicates that a child process has terminated). When we receive
|
60
|
-
# such a signal we get the pid and make sure that the child process was one of
|
61
|
-
# the jobs belonging to the job queue.
|
62
|
-
# This needs to be done inside a while loop as two or more child processes exiting
|
63
|
-
# in quick succession might only generate one signal. For example, the first dead
|
64
|
-
# child process will generate a SIGCHLD. However, when a second process dies quickly
|
65
|
-
# afterwards and the previous SIGCHLD signal has not yet been handled, this second
|
66
|
-
# process won't send a second SIGCHLD signal, but will instead assume that the
|
67
|
-
# SIGCHLD handler knows to look for multiple dead processes.
|
68
|
-
# You might also notice that old_handler is being used to redirect this signal to
|
69
|
-
# a possible other previously defined SIGCHLD signal handler.
|
70
|
-
# Also note that we close the write end of the self_pipe when there are no jobs left.
|
71
|
-
# See the comments on the wait_until_finished method for why this is important.
|
59
|
+
# here we start by creating a uid to index mapping and add an entry for each
|
60
|
+
# job to the jobs_info array. We then schedule the first batch of jobs.
|
72
61
|
def start(max_active_jobs)
|
73
62
|
# create job mappings and initialize job info
|
74
63
|
@remaining_jobs.each_with_index do |job, index|
|
@@ -79,23 +68,8 @@ module Cabiri
|
|
79
68
|
@jobs_info[index][:pid] = nil
|
80
69
|
@jobs_info[index][:pipe] = nil
|
81
70
|
@jobs_info[index][:error] = nil
|
82
|
-
@jobs_info[index][:state] = :waiting
|
83
71
|
@jobs_info[index][:result] = nil
|
84
|
-
|
85
|
-
|
86
|
-
# define signal handler
|
87
|
-
old_handler = trap(:CLD) do
|
88
|
-
begin
|
89
|
-
while pid = Process.wait(-1, Process::WNOHANG)
|
90
|
-
if(@active_jobs_pids.include?(pid))
|
91
|
-
handle_finished_job(pid)
|
92
|
-
fill_job_slots(max_active_jobs)
|
93
|
-
@self_pipe[1].close if finished?
|
94
|
-
end
|
95
|
-
old_handler.call if old_handler.respond_to?(:call)
|
96
|
-
end
|
97
|
-
rescue Errno::ECHILD
|
98
|
-
end
|
72
|
+
@jobs_info[index][:state] = :waiting
|
99
73
|
end
|
100
74
|
|
101
75
|
# start scheduling first batch of jobs
|
@@ -109,7 +83,7 @@ module Cabiri
|
|
109
83
|
def fill_job_slots(max_active_jobs)
|
110
84
|
while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
|
111
85
|
begin
|
112
|
-
start_next_job
|
86
|
+
start_next_job(max_active_jobs)
|
113
87
|
rescue => ex
|
114
88
|
handle_error(ex)
|
115
89
|
ensure
|
@@ -130,8 +104,18 @@ module Cabiri
|
|
130
104
|
# a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
|
131
105
|
# array into a special string designed for transporting binary data as text. This text can now be send
|
132
106
|
# through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
|
133
|
-
# pipe and set sync to true. The
|
134
|
-
|
107
|
+
# pipe and set sync to true. The next few lines hould require no comment.
|
108
|
+
# We finish by creating a thread that waits for the newly created job to end. This thread is responsible
|
109
|
+
# for extracting information from the finished job and spawning new jobs. Also note that we close the
|
110
|
+
# write end of the self_pipe when there are no jobs left. See the comments on the wait_until_finished
|
111
|
+
# method for why this is important.
|
112
|
+
# Notice how the inside of the thread is wrapped inside a mutex. This is required to prevent a race
|
113
|
+
# condition from occurring when two or more jobs return in quick succession. When the first job
|
114
|
+
# returns, its thread will start scheduling new processes, but this can take some time. If a second
|
115
|
+
# job returns before the thread of the first job is done scheduling, it will start doing scheduling
|
116
|
+
# work as well. So now you have two threads simultaneously doing scheduling work, and the end result
|
117
|
+
# will be unpredictable.
|
118
|
+
def start_next_job(max_active_jobs)
|
135
119
|
pipe = IO.pipe()
|
136
120
|
job = @remaining_jobs.first
|
137
121
|
|
@@ -151,6 +135,15 @@ module Cabiri
|
|
151
135
|
@jobs_info[index][:pid] = pid
|
152
136
|
@jobs_info[index][:pipe] = pipe
|
153
137
|
@jobs_info[index][:state] = :running
|
138
|
+
|
139
|
+
Thread.new(pid) do |my_pid|
|
140
|
+
Process.waitpid(my_pid)
|
141
|
+
@mutex.synchronize do
|
142
|
+
handle_finished_job(my_pid)
|
143
|
+
fill_job_slots(max_active_jobs)
|
144
|
+
@self_pipe[1].close if finished?
|
145
|
+
end
|
146
|
+
end
|
154
147
|
end
|
155
148
|
|
156
149
|
# when a job finishes, we remove its pid from the array that keeps track of active processes.
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cabiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-19 00:00:00.
|
12
|
+
date: 2012-05-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: adeona
|
16
|
-
requirement: &
|
16
|
+
requirement: &70339103396180 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,8 +21,8 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
25
|
-
description: An easy and intuitive Ruby job queue.
|
24
|
+
version_requirements: *70339103396180
|
25
|
+
description: An easy and intuitive Ruby job queue for working with parallel processes.
|
26
26
|
email: tomvaneyck@gmail.com
|
27
27
|
executables: []
|
28
28
|
extensions: []
|
@@ -49,8 +49,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
49
49
|
version: '0'
|
50
50
|
requirements: []
|
51
51
|
rubyforge_project:
|
52
|
-
rubygems_version: 1.8.
|
52
|
+
rubygems_version: 1.8.10
|
53
53
|
signing_key:
|
54
54
|
specification_version: 3
|
55
|
-
summary: An easy and intuitive Ruby job queue.
|
55
|
+
summary: An easy and intuitive Ruby job queue for working with parallel processes.
|
56
56
|
test_files: []
|