cabiri 0.0.4 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cabiri.rb +108 -151
- metadata +4 -15
data/lib/cabiri.rb
CHANGED
@@ -1,182 +1,139 @@
|
|
1
|
-
require 'adeona'
|
2
|
-
require 'logger'
|
3
|
-
|
4
1
|
module Cabiri
|
5
|
-
class
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
@
|
19
|
-
@
|
2
|
+
class Job
|
3
|
+
attr_accessor :id
|
4
|
+
attr_accessor :pid
|
5
|
+
attr_accessor :block
|
6
|
+
attr_accessor :result
|
7
|
+
attr_accessor :pipe
|
8
|
+
attr_accessor :lifeline
|
9
|
+
|
10
|
+
def initialize(id, &block)
|
11
|
+
@id = id
|
12
|
+
@pid = nil
|
13
|
+
@block = block
|
14
|
+
@result = nil
|
15
|
+
@pipe = nil
|
16
|
+
@lifeline = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def activate!
|
20
|
+
@pipe = IO.pipe
|
21
|
+
@lifeline = IO.pipe
|
20
22
|
|
21
|
-
@
|
22
|
-
|
23
|
-
|
23
|
+
@pid = fork do
|
24
|
+
@pipe[0].close
|
25
|
+
@pipe[1].sync = true
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
|
27
|
+
@lifeline[1].close
|
28
|
+
@lifeline[0].sync = true
|
29
|
+
|
30
|
+
begin
|
31
|
+
lifeline_thread = Thread.new(Thread.current) do |main_thread|
|
32
|
+
result = IO.select([@lifeline[0]], nil, nil, nil)
|
33
|
+
main_thread.raise "Killing job '#{@id}' as connection with parent process was lost."
|
34
|
+
end
|
35
|
+
result = @block.call
|
36
|
+
@pipe[1].puts [Marshal.dump(result)].pack("m")
|
37
|
+
rescue => e
|
38
|
+
puts "Exception (#{e}) in block: #{@block.inspect}"
|
39
|
+
end
|
40
|
+
end
|
28
41
|
|
29
|
-
@
|
30
|
-
@
|
42
|
+
@pipe[1].close
|
43
|
+
@pipe[0].sync = true
|
44
|
+
|
45
|
+
@lifeline[0].close
|
46
|
+
@lifeline[1].sync = true
|
31
47
|
end
|
32
48
|
|
33
|
-
|
34
|
-
|
35
|
-
@
|
49
|
+
def finish!
|
50
|
+
@result = Marshal.load(@pipe[0].read.unpack("m")[0])
|
51
|
+
@pipe[0].close
|
52
|
+
@lifeline[1].close
|
53
|
+
Process.waitpid(@pid)
|
36
54
|
end
|
55
|
+
end
|
37
56
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
57
|
+
class JobQueue
|
58
|
+
attr_accessor :pending_jobs
|
59
|
+
attr_accessor :active_jobs
|
60
|
+
attr_accessor :finished_jobs
|
61
|
+
|
62
|
+
def initialize
|
63
|
+
@pending_jobs = []
|
64
|
+
@active_jobs = []
|
65
|
+
@finished_jobs = {}
|
42
66
|
end
|
43
67
|
|
44
|
-
|
45
|
-
|
46
|
-
# the last job of the queue is finished, the start method will close the
|
47
|
-
# write end of this pipe. This causes the kernel to notice that nothing can
|
48
|
-
# write to the pipe anymore and thus the kernel sends an EOF down this pipe,
|
49
|
-
# which in turn causes the blocking IO.select to return.
|
50
|
-
# When IO.select returns we close the read end of the pipe, such that any
|
51
|
-
# future calls to the wait_until_finished method can return immediately.
|
52
|
-
def wait_until_finished
|
53
|
-
if(!@self_pipe[0].closed?)
|
54
|
-
IO.select([@self_pipe[0]])
|
55
|
-
@self_pipe[0].close
|
56
|
-
end
|
68
|
+
def add(id, &block)
|
69
|
+
@pending_jobs << Job.new(id, &block)
|
57
70
|
end
|
58
71
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
# create job mappings and initialize job info
|
63
|
-
@remaining_jobs.each_with_index do |job, index|
|
64
|
-
uid = job.to_s
|
65
|
-
@uid_to_index[uid] = index
|
66
|
-
|
67
|
-
@jobs_info[index] = {}
|
68
|
-
@jobs_info[index][:pid] = nil
|
69
|
-
@jobs_info[index][:pipe] = nil
|
70
|
-
@jobs_info[index][:error] = nil
|
71
|
-
@jobs_info[index][:result] = nil
|
72
|
-
@jobs_info[index][:state] = :waiting
|
73
|
-
end
|
72
|
+
def pending_jobs_available?
|
73
|
+
@pending_jobs.length >= 1
|
74
|
+
end
|
74
75
|
|
75
|
-
|
76
|
-
|
76
|
+
def active_jobs_available?
|
77
|
+
@active_jobs.length >= 1
|
77
78
|
end
|
78
79
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
def
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
rescue => ex
|
88
|
-
handle_error(ex)
|
89
|
-
ensure
|
90
|
-
@remaining_jobs.shift
|
91
|
-
end
|
80
|
+
def finished?
|
81
|
+
!pending_jobs_available? && !active_jobs_available?
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_read_end_points_of_active_jobs
|
85
|
+
read_end_points = []
|
86
|
+
@active_jobs.each do |active_job|
|
87
|
+
read_end_points << active_job.pipe[0]
|
92
88
|
end
|
89
|
+
read_end_points
|
93
90
|
end
|
94
91
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
# prevent orphaned processes. Inside this job process we close the read endpoint of the pipe and
|
99
|
-
# set sync to true for the write endpoint in order to prevent the kernel from buffering any messages.
|
100
|
-
# We continue by letting the job do its work and storing the result in a var called 'result'. The
|
101
|
-
# next step looks a bit weird. We mentioned that we want to use pipes to communicate data, but pipes
|
102
|
-
# weren't designed to transport data structures like arrays and hashes, instead they are meant for text.
|
103
|
-
# So we use a trick. We use Marshal.dump to convert our result (which could be an array, a number,
|
104
|
-
# a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
|
105
|
-
# array into a special string designed for transporting binary data as text. This text can now be send
|
106
|
-
# through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
|
107
|
-
# pipe and set sync to true. The next few lines hould require no comment.
|
108
|
-
# We finish by creating a thread that waits for the newly created job to end. This thread is responsible
|
109
|
-
# for extracting information from the finished job and spawning new jobs. Also note that we close the
|
110
|
-
# write end of the self_pipe when there are no jobs left. See the comments on the wait_until_finished
|
111
|
-
# method for why this is important.
|
112
|
-
# Notice how the inside of the thread is wrapped inside a mutex. This is required to prevent a race
|
113
|
-
# condition from occurring when two or more jobs return in quick succession. When the first job
|
114
|
-
# returns, its thread will start scheduling new processes, but this can take some time. If a second
|
115
|
-
# job returns before the thread of the first job is done scheduling, it will start doing scheduling
|
116
|
-
# work as well. So now you have two threads simultaneously doing scheduling work, and the end result
|
117
|
-
# will be unpredictable.
|
118
|
-
def start_next_job(max_active_jobs)
|
119
|
-
pipe = IO.pipe()
|
120
|
-
job = @remaining_jobs.first
|
121
|
-
|
122
|
-
pid = Adeona.spawn_child(:detach => false) do
|
123
|
-
pipe[0].close
|
124
|
-
pipe[1].sync = true
|
125
|
-
result = job.call
|
126
|
-
pipe[1].puts [Marshal.dump(result)].pack("m")
|
92
|
+
def get_active_job_by_read_end_point(read_end_point)
|
93
|
+
@active_jobs.each do |active_job|
|
94
|
+
return active_job if (active_job.pipe[0] == read_end_point)
|
127
95
|
end
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
@jobs_info[index][:pid] = pid
|
136
|
-
@jobs_info[index][:pipe] = pipe
|
137
|
-
@jobs_info[index][:state] = :running
|
138
|
-
|
139
|
-
Thread.new(pid) do |my_pid|
|
140
|
-
Process.waitpid(my_pid)
|
141
|
-
@mutex.synchronize do
|
142
|
-
handle_finished_job(my_pid)
|
143
|
-
fill_job_slots(max_active_jobs)
|
144
|
-
@self_pipe[1].close if finished?
|
96
|
+
end
|
97
|
+
|
98
|
+
def start(max_active_jobs)
|
99
|
+
# start by activating as many jobs as allowed
|
100
|
+
max_active_jobs.times do
|
101
|
+
if pending_jobs_available?
|
102
|
+
activate_next_available_job
|
145
103
|
end
|
146
104
|
end
|
147
|
-
end
|
148
105
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
def handle_finished_job(pid)
|
154
|
-
index = @pid_to_index[pid]
|
155
|
-
@active_jobs_pids.delete(pid)
|
106
|
+
while active_jobs_available?
|
107
|
+
# every time IO.select gets called, we need to do something
|
108
|
+
read_end_points = get_read_end_points_of_active_jobs
|
109
|
+
read_end_points_array, _, _ = IO.select(read_end_points, nil, nil, nil)
|
156
110
|
|
157
|
-
|
158
|
-
|
159
|
-
|
111
|
+
# finish all jobs that we got returned data for
|
112
|
+
read_end_points_array.each do |read_end_point|
|
113
|
+
active_job = get_active_job_by_read_end_point(read_end_point)
|
114
|
+
finish_job(active_job)
|
115
|
+
end
|
160
116
|
|
161
|
-
|
162
|
-
|
117
|
+
# schedule as many new jobs as the number of jobs that just finished
|
118
|
+
nb_of_just_finished_jobs = read_end_points_array.length
|
119
|
+
nb_of_just_finished_jobs.times do
|
120
|
+
if pending_jobs_available?
|
121
|
+
activate_next_available_job
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
163
125
|
end
|
164
126
|
|
165
|
-
|
166
|
-
|
167
|
-
job
|
168
|
-
|
169
|
-
|
170
|
-
error = "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs.first.to_s}. Exception info: #{ex.to_s}."
|
171
|
-
@logger.warn(self.class.to_s) { error }
|
172
|
-
|
173
|
-
@jobs_info[index][:error] = error
|
174
|
-
@jobs_info[index][:state] = :error
|
127
|
+
def activate_next_available_job
|
128
|
+
job = @pending_jobs.shift
|
129
|
+
job.activate!
|
130
|
+
@active_jobs << job
|
175
131
|
end
|
176
132
|
|
177
|
-
|
178
|
-
|
179
|
-
|
133
|
+
def finish_job(job)
|
134
|
+
job = @active_jobs.delete(job)
|
135
|
+
job.finish!
|
136
|
+
@finished_jobs[job.id] = job
|
180
137
|
end
|
181
138
|
end
|
182
139
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cabiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,19 +9,8 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
13
|
-
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: adeona
|
16
|
-
requirement: &70111815020260 !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '0'
|
22
|
-
type: :runtime
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: *70111815020260
|
12
|
+
date: 2012-12-29 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
25
14
|
description: An easy and intuitive Ruby job queue for working with parallel processes.
|
26
15
|
email: tomvaneyck@gmail.com
|
27
16
|
executables: []
|
@@ -49,7 +38,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
49
38
|
version: '0'
|
50
39
|
requirements: []
|
51
40
|
rubyforge_project:
|
52
|
-
rubygems_version: 1.8.
|
41
|
+
rubygems_version: 1.8.24
|
53
42
|
signing_key:
|
54
43
|
specification_version: 3
|
55
44
|
summary: An easy and intuitive Ruby job queue for working with parallel processes.
|