cabiri 0.0.4 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/cabiri.rb +108 -151
  2. metadata +4 -15
@@ -1,182 +1,139 @@
1
- require 'adeona'
2
- require 'logger'
3
-
4
1
  module Cabiri
5
- class JobQueue
6
- # - remaining_jobs: array that contains jobs that have yet to run
7
- # - active_job_pids: array that contains the pids of jobs that are currently running
8
- # - jobs_info: array that keeps track of the state of each job
9
- # - pid_to_index: hash that maps the pid of a job to an index in the jobs_info array
10
- # - uid_to_index: hash that maps the uid of a job to an index in the jobs_info array
11
- # - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
12
- # wait_until_finished method. Both endpoints have sync set to true to prevent the
13
- # kernel from buffering any messages.
14
- # - mutex: a mutex that is used to treat the code that deals with extracting results from
15
- # finished processes and spawning new processes as a critical section
16
- # - logger: a logger to help log errors
17
- def initialize
18
- @remaining_jobs = []
19
- @active_jobs_pids = []
2
+ class Job
3
+ attr_accessor :id
4
+ attr_accessor :pid
5
+ attr_accessor :block
6
+ attr_accessor :result
7
+ attr_accessor :pipe
8
+ attr_accessor :lifeline
9
+
10
+ def initialize(id, &block)
11
+ @id = id
12
+ @pid = nil
13
+ @block = block
14
+ @result = nil
15
+ @pipe = nil
16
+ @lifeline = nil
17
+ end
18
+
19
+ def activate!
20
+ @pipe = IO.pipe
21
+ @lifeline = IO.pipe
20
22
 
21
- @jobs_info = []
22
- @pid_to_index = {}
23
- @uid_to_index = {}
23
+ @pid = fork do
24
+ @pipe[0].close
25
+ @pipe[1].sync = true
24
26
 
25
- @self_pipe = IO.pipe()
26
- @self_pipe[0].sync = true
27
- @self_pipe[1].sync = true
27
+ @lifeline[1].close
28
+ @lifeline[0].sync = true
29
+
30
+ begin
31
+ lifeline_thread = Thread.new(Thread.current) do |main_thread|
32
+ result = IO.select([@lifeline[0]], nil, nil, nil)
33
+ main_thread.raise "Killing job '#{@id}' as connection with parent process was lost."
34
+ end
35
+ result = @block.call
36
+ @pipe[1].puts [Marshal.dump(result)].pack("m")
37
+ rescue => e
38
+ puts "Exception (#{e}) in block: #{@block.inspect}"
39
+ end
40
+ end
28
41
 
29
- @mutex = Mutex.new
30
- @logger = Logger.new($stdout)
42
+ @pipe[1].close
43
+ @pipe[0].sync = true
44
+
45
+ @lifeline[0].close
46
+ @lifeline[1].sync = true
31
47
  end
32
48
 
33
- # add a job to the remaining_jobs array
34
- def add(&block)
35
- @remaining_jobs << block
49
+ def finish!
50
+ @result = Marshal.load(@pipe[0].read.unpack("m")[0])
51
+ @pipe[0].close
52
+ @lifeline[1].close
53
+ Process.waitpid(@pid)
36
54
  end
55
+ end
37
56
 
38
- # check if there is more work to be done. The work is finished if there are no jobs waiting to be run
39
- # and there are no jobs currently being run.
40
- def finished?
41
- @remaining_jobs.empty? and @active_jobs_pids.empty?
57
+ class JobQueue
58
+ attr_accessor :pending_jobs
59
+ attr_accessor :active_jobs
60
+ attr_accessor :finished_jobs
61
+
62
+ def initialize
63
+ @pending_jobs = []
64
+ @active_jobs = []
65
+ @finished_jobs = {}
42
66
  end
43
67
 
44
- # this is a blocking wait that won't return until after all jobs in the
45
- # queue are finished. The initialize method has set up a self_pipe. When
46
- # the last job of the queue is finished, the start method will close the
47
- # write end of this pipe. This causes the kernel to notice that nothing can
48
- # write to the pipe anymore and thus the kernel sends an EOF down this pipe,
49
- # which in turn causes the blocking IO.select to return.
50
- # When IO.select returns we close the read end of the pipe, such that any
51
- # future calls to the wait_until_finished method can return immediately.
52
- def wait_until_finished
53
- if(!@self_pipe[0].closed?)
54
- IO.select([@self_pipe[0]])
55
- @self_pipe[0].close
56
- end
68
+ def add(id, &block)
69
+ @pending_jobs << Job.new(id, &block)
57
70
  end
58
71
 
59
- # here we start by creating a uid to index mapping and add an entry for each
60
- # job to the jobs_info array. We then schedule the first batch of jobs.
61
- def start(max_active_jobs)
62
- # create job mappings and initialize job info
63
- @remaining_jobs.each_with_index do |job, index|
64
- uid = job.to_s
65
- @uid_to_index[uid] = index
66
-
67
- @jobs_info[index] = {}
68
- @jobs_info[index][:pid] = nil
69
- @jobs_info[index][:pipe] = nil
70
- @jobs_info[index][:error] = nil
71
- @jobs_info[index][:result] = nil
72
- @jobs_info[index][:state] = :waiting
73
- end
72
+ def pending_jobs_available?
73
+ @pending_jobs.length >= 1
74
+ end
74
75
 
75
- # start scheduling first batch of jobs
76
- fill_job_slots(max_active_jobs)
76
+ def active_jobs_available?
77
+ @active_jobs.length >= 1
77
78
  end
78
79
 
79
- # here we fill all the empty job slots. When we take a new job two things can happen.
80
- # Either we manage to successfully spawn a new process or something goes wrong and we log
81
- # it. In either case we assume that we are done with the job and remove it from the
82
- # remaining_jobs array.
83
- def fill_job_slots(max_active_jobs)
84
- while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
85
- begin
86
- start_next_job(max_active_jobs)
87
- rescue => ex
88
- handle_error(ex)
89
- ensure
90
- @remaining_jobs.shift
91
- end
80
+ def finished?
81
+ !pending_jobs_available? && !active_jobs_available?
82
+ end
83
+
84
+ def get_read_end_points_of_active_jobs
85
+ read_end_points = []
86
+ @active_jobs.each do |active_job|
87
+ read_end_points << active_job.pipe[0]
92
88
  end
89
+ read_end_points
93
90
  end
94
91
 
95
- # when starting a new job we first create a pipe. This pipe will be our mechanism to pass any
96
- # data returned by the job process to the main process. Next, we create a job process by using
97
- # the Adeona gem. The spawn_child method acts like fork(), but adds some extra protection to
98
- # prevent orphaned processes. Inside this job process we close the read endpoint of the pipe and
99
- # set sync to true for the write endpoint in order to prevent the kernel from buffering any messages.
100
- # We continue by letting the job do its work and storing the result in a var called 'result'. The
101
- # next step looks a bit weird. We mentioned that we want to use pipes to communicate data, but pipes
102
- # weren't designed to transport data structures like arrays and hashes, instead they are meant for text.
103
- # So we use a trick. We use Marshal.dump to convert our result (which could be an array, a number,
104
- # a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
105
- # array into a special string designed for transporting binary data as text. This text can now be send
106
- # through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
107
- # pipe and set sync to true. The next few lines hould require no comment.
108
- # We finish by creating a thread that waits for the newly created job to end. This thread is responsible
109
- # for extracting information from the finished job and spawning new jobs. Also note that we close the
110
- # write end of the self_pipe when there are no jobs left. See the comments on the wait_until_finished
111
- # method for why this is important.
112
- # Notice how the inside of the thread is wrapped inside a mutex. This is required to prevent a race
113
- # condition from occurring when two or more jobs return in quick succession. When the first job
114
- # returns, its thread will start scheduling new processes, but this can take some time. If a second
115
- # job returns before the thread of the first job is done scheduling, it will start doing scheduling
116
- # work as well. So now you have two threads simultaneously doing scheduling work, and the end result
117
- # will be unpredictable.
118
- def start_next_job(max_active_jobs)
119
- pipe = IO.pipe()
120
- job = @remaining_jobs.first
121
-
122
- pid = Adeona.spawn_child(:detach => false) do
123
- pipe[0].close
124
- pipe[1].sync = true
125
- result = job.call
126
- pipe[1].puts [Marshal.dump(result)].pack("m")
92
+ def get_active_job_by_read_end_point(read_end_point)
93
+ @active_jobs.each do |active_job|
94
+ return active_job if (active_job.pipe[0] == read_end_point)
127
95
  end
128
- pipe[1].close
129
- pipe[0].sync = true
130
-
131
- index = @uid_to_index[job.to_s]
132
- @active_jobs_pids << pid
133
- @pid_to_index[pid] = index
134
-
135
- @jobs_info[index][:pid] = pid
136
- @jobs_info[index][:pipe] = pipe
137
- @jobs_info[index][:state] = :running
138
-
139
- Thread.new(pid) do |my_pid|
140
- Process.waitpid(my_pid)
141
- @mutex.synchronize do
142
- handle_finished_job(my_pid)
143
- fill_job_slots(max_active_jobs)
144
- @self_pipe[1].close if finished?
96
+ end
97
+
98
+ def start(max_active_jobs)
99
+ # start by activating as many jobs as allowed
100
+ max_active_jobs.times do
101
+ if pending_jobs_available?
102
+ activate_next_available_job
145
103
  end
146
104
  end
147
- end
148
105
 
149
- # when a job finishes, we remove its pid from the array that keeps track of active processes.
150
- # Next we read the result that we sent over the pipe and then close the pipe's read endpoint.
151
- # We take the received text data, turn it into a byte stream and then load this information
152
- # in order to obtain the resulting data from the job.
153
- def handle_finished_job(pid)
154
- index = @pid_to_index[pid]
155
- @active_jobs_pids.delete(pid)
106
+ while active_jobs_available?
107
+ # every time IO.select gets called, we need to do something
108
+ read_end_points = get_read_end_points_of_active_jobs
109
+ read_end_points_array, _, _ = IO.select(read_end_points, nil, nil, nil)
156
110
 
157
- pipe = @jobs_info[index][:pipe]
158
- result = pipe[0].read
159
- pipe[0].close
111
+ # finish all jobs that we got returned data for
112
+ read_end_points_array.each do |read_end_point|
113
+ active_job = get_active_job_by_read_end_point(read_end_point)
114
+ finish_job(active_job)
115
+ end
160
116
 
161
- @jobs_info[index][:result] = Marshal.load(result.unpack("m")[0])
162
- @jobs_info[index][:state] = :finished
117
+ # schedule as many new jobs as the number of jobs that just finished
118
+ nb_of_just_finished_jobs = read_end_points_array.length
119
+ nb_of_just_finished_jobs.times do
120
+ if pending_jobs_available?
121
+ activate_next_available_job
122
+ end
123
+ end
124
+ end
163
125
  end
164
126
 
165
- # when there is an exception, we log the error and set the relevant fields in the jobs_info data
166
- def handle_error(ex)
167
- job = @remaining_jobs.first
168
- index = @uid_to_index[job.to_s]
169
-
170
- error = "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs.first.to_s}. Exception info: #{ex.to_s}."
171
- @logger.warn(self.class.to_s) { error }
172
-
173
- @jobs_info[index][:error] = error
174
- @jobs_info[index][:state] = :error
127
+ def activate_next_available_job
128
+ job = @pending_jobs.shift
129
+ job.activate!
130
+ @active_jobs << job
175
131
  end
176
132
 
177
- # this allows users to query the state of their jobs
178
- def get_info(index)
179
- @jobs_info[index]
133
+ def finish_job(job)
134
+ job = @active_jobs.delete(job)
135
+ job.finish!
136
+ @finished_jobs[job.id] = job
180
137
  end
181
138
  end
182
139
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cabiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,19 +9,8 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-19 00:00:00.000000000 Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: adeona
16
- requirement: &70111815020260 !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :runtime
23
- prerelease: false
24
- version_requirements: *70111815020260
12
+ date: 2012-12-29 00:00:00.000000000 Z
13
+ dependencies: []
25
14
  description: An easy and intuitive Ruby job queue for working with parallel processes.
26
15
  email: tomvaneyck@gmail.com
27
16
  executables: []
@@ -49,7 +38,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
49
38
  version: '0'
50
39
  requirements: []
51
40
  rubyforge_project:
52
- rubygems_version: 1.8.10
41
+ rubygems_version: 1.8.24
53
42
  signing_key:
54
43
  specification_version: 3
55
44
  summary: An easy and intuitive Ruby job queue for working with parallel processes.