cabiri 0.0.4 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/cabiri.rb +108 -151
  2. metadata +4 -15
@@ -1,182 +1,139 @@
1
- require 'adeona'
2
- require 'logger'
3
-
4
1
  module Cabiri
5
- class JobQueue
6
- # - remaining_jobs: array that contains jobs that have yet to run
7
- # - active_job_pids: array that contains the pids of jobs that are currently running
8
- # - jobs_info: array that keeps track of the state of each job
9
- # - pid_to_index: hash that maps the pid of a job to an index in the jobs_info array
10
- # - uid_to_index: hash that maps the uid of a job to an index in the jobs_info array
11
- # - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
12
- # wait_until_finished method. Both endpoints have sync set to true to prevent the
13
- # kernel from buffering any messages.
14
- # - mutex: a mutex that is used to treat the code that deals with extracting results from
15
- # finished processes and spawning new processes as a critical section
16
- # - logger: a logger to help log errors
17
- def initialize
18
- @remaining_jobs = []
19
- @active_jobs_pids = []
2
+ class Job
3
+ attr_accessor :id
4
+ attr_accessor :pid
5
+ attr_accessor :block
6
+ attr_accessor :result
7
+ attr_accessor :pipe
8
+ attr_accessor :lifeline
9
+
10
+ def initialize(id, &block)
11
+ @id = id
12
+ @pid = nil
13
+ @block = block
14
+ @result = nil
15
+ @pipe = nil
16
+ @lifeline = nil
17
+ end
18
+
19
+ def activate!
20
+ @pipe = IO.pipe
21
+ @lifeline = IO.pipe
20
22
 
21
- @jobs_info = []
22
- @pid_to_index = {}
23
- @uid_to_index = {}
23
+ @pid = fork do
24
+ @pipe[0].close
25
+ @pipe[1].sync = true
24
26
 
25
- @self_pipe = IO.pipe()
26
- @self_pipe[0].sync = true
27
- @self_pipe[1].sync = true
27
+ @lifeline[1].close
28
+ @lifeline[0].sync = true
29
+
30
+ begin
31
+ lifeline_thread = Thread.new(Thread.current) do |main_thread|
32
+ result = IO.select([@lifeline[0]], nil, nil, nil)
33
+ main_thread.raise "Killing job '#{@id}' as connection with parent process was lost."
34
+ end
35
+ result = @block.call
36
+ @pipe[1].puts [Marshal.dump(result)].pack("m")
37
+ rescue => e
38
+ puts "Exception (#{e}) in block: #{@block.inspect}"
39
+ end
40
+ end
28
41
 
29
- @mutex = Mutex.new
30
- @logger = Logger.new($stdout)
42
+ @pipe[1].close
43
+ @pipe[0].sync = true
44
+
45
+ @lifeline[0].close
46
+ @lifeline[1].sync = true
31
47
  end
32
48
 
33
- # add a job to the remaining_jobs array
34
- def add(&block)
35
- @remaining_jobs << block
49
+ def finish!
50
+ @result = Marshal.load(@pipe[0].read.unpack("m")[0])
51
+ @pipe[0].close
52
+ @lifeline[1].close
53
+ Process.waitpid(@pid)
36
54
  end
55
+ end
37
56
 
38
- # check if there is more work to be done. The work is finished if there are no jobs waiting to be run
39
- # and there are no jobs currently being run.
40
- def finished?
41
- @remaining_jobs.empty? and @active_jobs_pids.empty?
57
+ class JobQueue
58
+ attr_accessor :pending_jobs
59
+ attr_accessor :active_jobs
60
+ attr_accessor :finished_jobs
61
+
62
+ def initialize
63
+ @pending_jobs = []
64
+ @active_jobs = []
65
+ @finished_jobs = {}
42
66
  end
43
67
 
44
- # this is a blocking wait that won't return until after all jobs in the
45
- # queue are finished. The initialize method has set up a self_pipe. When
46
- # the last job of the queue is finished, the start method will close the
47
- # write end of this pipe. This causes the kernel to notice that nothing can
48
- # write to the pipe anymore and thus the kernel sends an EOF down this pipe,
49
- # which in turn causes the blocking IO.select to return.
50
- # When IO.select returns we close the read end of the pipe, such that any
51
- # future calls to the wait_until_finished method can return immediately.
52
- def wait_until_finished
53
- if(!@self_pipe[0].closed?)
54
- IO.select([@self_pipe[0]])
55
- @self_pipe[0].close
56
- end
68
+ def add(id, &block)
69
+ @pending_jobs << Job.new(id, &block)
57
70
  end
58
71
 
59
- # here we start by creating a uid to index mapping and add an entry for each
60
- # job to the jobs_info array. We then schedule the first batch of jobs.
61
- def start(max_active_jobs)
62
- # create job mappings and initialize job info
63
- @remaining_jobs.each_with_index do |job, index|
64
- uid = job.to_s
65
- @uid_to_index[uid] = index
66
-
67
- @jobs_info[index] = {}
68
- @jobs_info[index][:pid] = nil
69
- @jobs_info[index][:pipe] = nil
70
- @jobs_info[index][:error] = nil
71
- @jobs_info[index][:result] = nil
72
- @jobs_info[index][:state] = :waiting
73
- end
72
+ def pending_jobs_available?
73
+ @pending_jobs.length >= 1
74
+ end
74
75
 
75
- # start scheduling first batch of jobs
76
- fill_job_slots(max_active_jobs)
76
+ def active_jobs_available?
77
+ @active_jobs.length >= 1
77
78
  end
78
79
 
79
- # here we fill all the empty job slots. When we take a new job two things can happen.
80
- # Either we manage to successfully spawn a new process or something goes wrong and we log
81
- # it. In either case we assume that we are done with the job and remove it from the
82
- # remaining_jobs array.
83
- def fill_job_slots(max_active_jobs)
84
- while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
85
- begin
86
- start_next_job(max_active_jobs)
87
- rescue => ex
88
- handle_error(ex)
89
- ensure
90
- @remaining_jobs.shift
91
- end
80
+ def finished?
81
+ !pending_jobs_available? && !active_jobs_available?
82
+ end
83
+
84
+ def get_read_end_points_of_active_jobs
85
+ read_end_points = []
86
+ @active_jobs.each do |active_job|
87
+ read_end_points << active_job.pipe[0]
92
88
  end
89
+ read_end_points
93
90
  end
94
91
 
95
- # when starting a new job we first create a pipe. This pipe will be our mechanism to pass any
96
- # data returned by the job process to the main process. Next, we create a job process by using
97
- # the Adeona gem. The spawn_child method acts like fork(), but adds some extra protection to
98
- # prevent orphaned processes. Inside this job process we close the read endpoint of the pipe and
99
- # set sync to true for the write endpoint in order to prevent the kernel from buffering any messages.
100
- # We continue by letting the job do its work and storing the result in a var called 'result'. The
101
- # next step looks a bit weird. We mentioned that we want to use pipes to communicate data, but pipes
102
- # weren't designed to transport data structures like arrays and hashes, instead they are meant for text.
103
- # So we use a trick. We use Marshal.dump to convert our result (which could be an array, a number,
104
- # a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
105
- # array into a special string designed for transporting binary data as text. This text can now be send
106
- # through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
107
- # pipe and set sync to true. The next few lines hould require no comment.
108
- # We finish by creating a thread that waits for the newly created job to end. This thread is responsible
109
- # for extracting information from the finished job and spawning new jobs. Also note that we close the
110
- # write end of the self_pipe when there are no jobs left. See the comments on the wait_until_finished
111
- # method for why this is important.
112
- # Notice how the inside of the thread is wrapped inside a mutex. This is required to prevent a race
113
- # condition from occurring when two or more jobs return in quick succession. When the first job
114
- # returns, its thread will start scheduling new processes, but this can take some time. If a second
115
- # job returns before the thread of the first job is done scheduling, it will start doing scheduling
116
- # work as well. So now you have two threads simultaneously doing scheduling work, and the end result
117
- # will be unpredictable.
118
- def start_next_job(max_active_jobs)
119
- pipe = IO.pipe()
120
- job = @remaining_jobs.first
121
-
122
- pid = Adeona.spawn_child(:detach => false) do
123
- pipe[0].close
124
- pipe[1].sync = true
125
- result = job.call
126
- pipe[1].puts [Marshal.dump(result)].pack("m")
92
+ def get_active_job_by_read_end_point(read_end_point)
93
+ @active_jobs.each do |active_job|
94
+ return active_job if (active_job.pipe[0] == read_end_point)
127
95
  end
128
- pipe[1].close
129
- pipe[0].sync = true
130
-
131
- index = @uid_to_index[job.to_s]
132
- @active_jobs_pids << pid
133
- @pid_to_index[pid] = index
134
-
135
- @jobs_info[index][:pid] = pid
136
- @jobs_info[index][:pipe] = pipe
137
- @jobs_info[index][:state] = :running
138
-
139
- Thread.new(pid) do |my_pid|
140
- Process.waitpid(my_pid)
141
- @mutex.synchronize do
142
- handle_finished_job(my_pid)
143
- fill_job_slots(max_active_jobs)
144
- @self_pipe[1].close if finished?
96
+ end
97
+
98
+ def start(max_active_jobs)
99
+ # start by activating as many jobs as allowed
100
+ max_active_jobs.times do
101
+ if pending_jobs_available?
102
+ activate_next_available_job
145
103
  end
146
104
  end
147
- end
148
105
 
149
- # when a job finishes, we remove its pid from the array that keeps track of active processes.
150
- # Next we read the result that we sent over the pipe and then close the pipe's read endpoint.
151
- # We take the received text data, turn it into a byte stream and then load this information
152
- # in order to obtain the resulting data from the job.
153
- def handle_finished_job(pid)
154
- index = @pid_to_index[pid]
155
- @active_jobs_pids.delete(pid)
106
+ while active_jobs_available?
107
+ # every time IO.select gets called, we need to do something
108
+ read_end_points = get_read_end_points_of_active_jobs
109
+ read_end_points_array, _, _ = IO.select(read_end_points, nil, nil, nil)
156
110
 
157
- pipe = @jobs_info[index][:pipe]
158
- result = pipe[0].read
159
- pipe[0].close
111
+ # finish all jobs that we got returned data for
112
+ read_end_points_array.each do |read_end_point|
113
+ active_job = get_active_job_by_read_end_point(read_end_point)
114
+ finish_job(active_job)
115
+ end
160
116
 
161
- @jobs_info[index][:result] = Marshal.load(result.unpack("m")[0])
162
- @jobs_info[index][:state] = :finished
117
+ # schedule as many new jobs as the number of jobs that just finished
118
+ nb_of_just_finished_jobs = read_end_points_array.length
119
+ nb_of_just_finished_jobs.times do
120
+ if pending_jobs_available?
121
+ activate_next_available_job
122
+ end
123
+ end
124
+ end
163
125
  end
164
126
 
165
- # when there is an exception, we log the error and set the relevant fields in the jobs_info data
166
- def handle_error(ex)
167
- job = @remaining_jobs.first
168
- index = @uid_to_index[job.to_s]
169
-
170
- error = "Exception thrown when trying to instantiate job. Job info: #{@remaining_jobs.first.to_s}. Exception info: #{ex.to_s}."
171
- @logger.warn(self.class.to_s) { error }
172
-
173
- @jobs_info[index][:error] = error
174
- @jobs_info[index][:state] = :error
127
+ def activate_next_available_job
128
+ job = @pending_jobs.shift
129
+ job.activate!
130
+ @active_jobs << job
175
131
  end
176
132
 
177
- # this allows users to query the state of their jobs
178
- def get_info(index)
179
- @jobs_info[index]
133
+ def finish_job(job)
134
+ job = @active_jobs.delete(job)
135
+ job.finish!
136
+ @finished_jobs[job.id] = job
180
137
  end
181
138
  end
182
139
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cabiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,19 +9,8 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-19 00:00:00.000000000 Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: adeona
16
- requirement: &70111815020260 !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :runtime
23
- prerelease: false
24
- version_requirements: *70111815020260
12
+ date: 2012-12-29 00:00:00.000000000 Z
13
+ dependencies: []
25
14
  description: An easy and intuitive Ruby job queue for working with parallel processes.
26
15
  email: tomvaneyck@gmail.com
27
16
  executables: []
@@ -49,7 +38,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
49
38
  version: '0'
50
39
  requirements: []
51
40
  rubyforge_project:
52
- rubygems_version: 1.8.10
41
+ rubygems_version: 1.8.24
53
42
  signing_key:
54
43
  specification_version: 3
55
44
  summary: An easy and intuitive Ruby job queue for working with parallel processes.