cabiri 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/cabiri.rb +28 -35
  2. metadata +7 -7
data/lib/cabiri.rb CHANGED
@@ -11,6 +11,8 @@ module Cabiri
11
11
  # - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
12
12
  # wait_until_finished method. Both endpoints have sync set to true to prevent the
13
13
  # kernel from buffering any messages.
14
+ # - mutex: a mutex that is used to treat the code that deals with extracting results from
15
+ # finished processes and spawning new processes as a critical section
14
16
  # - logger: a logger to help log errors
15
17
  def initialize
16
18
  @remaining_jobs = []
@@ -24,6 +26,7 @@ module Cabiri
24
26
  @self_pipe[0].sync = true
25
27
  @self_pipe[1].sync = true
26
28
 
29
+ @mutex = Mutex.new
27
30
  @logger = Logger.new($stdout)
28
31
  end
29
32
 
@@ -53,22 +56,8 @@ module Cabiri
53
56
  end
54
57
  end
55
58
 
56
- # here we start by creating a uid to index mapping. We also add an entry for each
57
- # job to the jobs_info array.
58
- # Next we define a signal handler that deals with SIGCHLD signals
59
- # (a signal that indicates that a child process has terminated). When we receive
60
- # such a signal we get the pid and make sure that the child process was one of
61
- # the jobs belonging to the job queue.
62
- # This needs to be done inside a while loop as two or more child processes exiting
63
- # in quick succession might only generate one signal. For example, the first dead
64
- # child process will generate a SIGCHLD. However, when a second process dies quickly
65
- # afterwards and the previous SIGCHLD signal has not yet been handled, this second
66
- # process won't send a second SIGCHLD signal, but will instead assume that the
67
- # SIGCHLD handler knows to look for multiple dead processes.
68
- # You might also notice that old_handler is being used to redirect this signal to
69
- # a possible other previously defined SIGCHLD signal handler.
70
- # Also note that we close the write end of the self_pipe when there are no jobs left.
71
- # See the comments on the wait_until_finished method for why this is important.
59
+ # here we start by creating a uid to index mapping and add an entry for each
60
+ # job to the jobs_info array. We then schedule the first batch of jobs.
72
61
  def start(max_active_jobs)
73
62
  # create job mappings and initialize job info
74
63
  @remaining_jobs.each_with_index do |job, index|
@@ -79,23 +68,8 @@ module Cabiri
79
68
  @jobs_info[index][:pid] = nil
80
69
  @jobs_info[index][:pipe] = nil
81
70
  @jobs_info[index][:error] = nil
82
- @jobs_info[index][:state] = :waiting
83
71
  @jobs_info[index][:result] = nil
84
- end
85
-
86
- # define signal handler
87
- old_handler = trap(:CLD) do
88
- begin
89
- while pid = Process.wait(-1, Process::WNOHANG)
90
- if(@active_jobs_pids.include?(pid))
91
- handle_finished_job(pid)
92
- fill_job_slots(max_active_jobs)
93
- @self_pipe[1].close if finished?
94
- end
95
- old_handler.call if old_handler.respond_to?(:call)
96
- end
97
- rescue Errno::ECHILD
98
- end
72
+ @jobs_info[index][:state] = :waiting
99
73
  end
100
74
 
101
75
  # start scheduling first batch of jobs
@@ -109,7 +83,7 @@ module Cabiri
109
83
  def fill_job_slots(max_active_jobs)
110
84
  while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
111
85
  begin
112
- start_next_job
86
+ start_next_job(max_active_jobs)
113
87
  rescue => ex
114
88
  handle_error(ex)
115
89
  ensure
@@ -130,8 +104,18 @@ module Cabiri
130
104
  # a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
131
105
  # array into a special string designed for transporting binary data as text. This text can now be send
132
106
  # through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
133
- # pipe and set sync to true. The rest of the code here should require no comments.
134
- def start_next_job
107
+ # pipe and set sync to true. The next few lines hould require no comment.
108
+ # We finish by creating a thread that waits for the newly created job to end. This thread is responsible
109
+ # for extracting information from the finished job and spawning new jobs. Also note that we close the
110
+ # write end of the self_pipe when there are no jobs left. See the comments on the wait_until_finished
111
+ # method for why this is important.
112
+ # Notice how the inside of the thread is wrapped inside a mutex. This is required to prevent a race
113
+ # condition from occurring when two or more jobs return in quick succession. When the first job
114
+ # returns, its thread will start scheduling new processes, but this can take some time. If a second
115
+ # job returns before the thread of the first job is done scheduling, it will start doing scheduling
116
+ # work as well. So now you have two threads simultaneously doing scheduling work, and the end result
117
+ # will be unpredictable.
118
+ def start_next_job(max_active_jobs)
135
119
  pipe = IO.pipe()
136
120
  job = @remaining_jobs.first
137
121
 
@@ -151,6 +135,15 @@ module Cabiri
151
135
  @jobs_info[index][:pid] = pid
152
136
  @jobs_info[index][:pipe] = pipe
153
137
  @jobs_info[index][:state] = :running
138
+
139
+ Thread.new(pid) do |my_pid|
140
+ Process.waitpid(my_pid)
141
+ @mutex.synchronize do
142
+ handle_finished_job(my_pid)
143
+ fill_job_slots(max_active_jobs)
144
+ @self_pipe[1].close if finished?
145
+ end
146
+ end
154
147
  end
155
148
 
156
149
  # when a job finishes, we remove its pid from the array that keeps track of active processes.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cabiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-19 00:00:00.000000000Z
12
+ date: 2012-05-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: adeona
16
- requirement: &70302667643740 !ruby/object:Gem::Requirement
16
+ requirement: &70339103396180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,8 +21,8 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70302667643740
25
- description: An easy and intuitive Ruby job queue.
24
+ version_requirements: *70339103396180
25
+ description: An easy and intuitive Ruby job queue for working with parallel processes.
26
26
  email: tomvaneyck@gmail.com
27
27
  executables: []
28
28
  extensions: []
@@ -49,8 +49,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
49
49
  version: '0'
50
50
  requirements: []
51
51
  rubyforge_project:
52
- rubygems_version: 1.8.17
52
+ rubygems_version: 1.8.10
53
53
  signing_key:
54
54
  specification_version: 3
55
- summary: An easy and intuitive Ruby job queue.
55
+ summary: An easy and intuitive Ruby job queue for working with parallel processes.
56
56
  test_files: []