cabiri 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/cabiri.rb +28 -35
  2. metadata +7 -7
data/lib/cabiri.rb CHANGED
@@ -11,6 +11,8 @@ module Cabiri
11
11
  # - self_pipe: a pipe that is used by the main process to implement a blocking wait for the
12
12
  # wait_until_finished method. Both endpoints have sync set to true to prevent the
13
13
  # kernel from buffering any messages.
14
+ # - mutex: a mutex that is used to treat the code that deals with extracting results from
15
+ # finished processes and spawning new processes as a critical section
14
16
  # - logger: a logger to help log errors
15
17
  def initialize
16
18
  @remaining_jobs = []
@@ -24,6 +26,7 @@ module Cabiri
24
26
  @self_pipe[0].sync = true
25
27
  @self_pipe[1].sync = true
26
28
 
29
+ @mutex = Mutex.new
27
30
  @logger = Logger.new($stdout)
28
31
  end
29
32
 
@@ -53,22 +56,8 @@ module Cabiri
53
56
  end
54
57
  end
55
58
 
56
- # here we start by creating a uid to index mapping. We also add an entry for each
57
- # job to the jobs_info array.
58
- # Next we define a signal handler that deals with SIGCHLD signals
59
- # (a signal that indicates that a child process has terminated). When we receive
60
- # such a signal we get the pid and make sure that the child process was one of
61
- # the jobs belonging to the job queue.
62
- # This needs to be done inside a while loop as two or more child processes exiting
63
- # in quick succession might only generate one signal. For example, the first dead
64
- # child process will generate a SIGCHLD. However, when a second process dies quickly
65
- # afterwards and the previous SIGCHLD signal has not yet been handled, this second
66
- # process won't send a second SIGCHLD signal, but will instead assume that the
67
- # SIGCHLD handler knows to look for multiple dead processes.
68
- # You might also notice that old_handler is being used to redirect this signal to
69
- # a possible other previously defined SIGCHLD signal handler.
70
- # Also note that we close the write end of the self_pipe when there are no jobs left.
71
- # See the comments on the wait_until_finished method for why this is important.
59
+ # here we start by creating a uid to index mapping and add an entry for each
60
+ # job to the jobs_info array. We then schedule the first batch of jobs.
72
61
  def start(max_active_jobs)
73
62
  # create job mappings and initialize job info
74
63
  @remaining_jobs.each_with_index do |job, index|
@@ -79,23 +68,8 @@ module Cabiri
79
68
  @jobs_info[index][:pid] = nil
80
69
  @jobs_info[index][:pipe] = nil
81
70
  @jobs_info[index][:error] = nil
82
- @jobs_info[index][:state] = :waiting
83
71
  @jobs_info[index][:result] = nil
84
- end
85
-
86
- # define signal handler
87
- old_handler = trap(:CLD) do
88
- begin
89
- while pid = Process.wait(-1, Process::WNOHANG)
90
- if(@active_jobs_pids.include?(pid))
91
- handle_finished_job(pid)
92
- fill_job_slots(max_active_jobs)
93
- @self_pipe[1].close if finished?
94
- end
95
- old_handler.call if old_handler.respond_to?(:call)
96
- end
97
- rescue Errno::ECHILD
98
- end
72
+ @jobs_info[index][:state] = :waiting
99
73
  end
100
74
 
101
75
  # start scheduling first batch of jobs
@@ -109,7 +83,7 @@ module Cabiri
109
83
  def fill_job_slots(max_active_jobs)
110
84
  while(@active_jobs_pids.length < max_active_jobs and !@remaining_jobs.empty?)
111
85
  begin
112
- start_next_job
86
+ start_next_job(max_active_jobs)
113
87
  rescue => ex
114
88
  handle_error(ex)
115
89
  ensure
@@ -130,8 +104,18 @@ module Cabiri
130
104
  # a hash - we don't know) into a byte stream, put this information inside an array, and then convert this
131
105
  # array into a special string designed for transporting binary data as text. This text can now be send
132
106
  # through the write endpoint of the pipe. Back outside the job process we close the write endpoint of the
133
- # pipe and set sync to true. The rest of the code here should require no comments.
134
- def start_next_job
107
+ # pipe and set sync to true. The next few lines hould require no comment.
108
+ # We finish by creating a thread that waits for the newly created job to end. This thread is responsible
109
+ # for extracting information from the finished job and spawning new jobs. Also note that we close the
110
+ # write end of the self_pipe when there are no jobs left. See the comments on the wait_until_finished
111
+ # method for why this is important.
112
+ # Notice how the inside of the thread is wrapped inside a mutex. This is required to prevent a race
113
+ # condition from occurring when two or more jobs return in quick succession. When the first job
114
+ # returns, its thread will start scheduling new processes, but this can take some time. If a second
115
+ # job returns before the thread of the first job is done scheduling, it will start doing scheduling
116
+ # work as well. So now you have two threads simultaneously doing scheduling work, and the end result
117
+ # will be unpredictable.
118
+ def start_next_job(max_active_jobs)
135
119
  pipe = IO.pipe()
136
120
  job = @remaining_jobs.first
137
121
 
@@ -151,6 +135,15 @@ module Cabiri
151
135
  @jobs_info[index][:pid] = pid
152
136
  @jobs_info[index][:pipe] = pipe
153
137
  @jobs_info[index][:state] = :running
138
+
139
+ Thread.new(pid) do |my_pid|
140
+ Process.waitpid(my_pid)
141
+ @mutex.synchronize do
142
+ handle_finished_job(my_pid)
143
+ fill_job_slots(max_active_jobs)
144
+ @self_pipe[1].close if finished?
145
+ end
146
+ end
154
147
  end
155
148
 
156
149
  # when a job finishes, we remove its pid from the array that keeps track of active processes.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cabiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-19 00:00:00.000000000Z
12
+ date: 2012-05-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: adeona
16
- requirement: &70302667643740 !ruby/object:Gem::Requirement
16
+ requirement: &70339103396180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,8 +21,8 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70302667643740
25
- description: An easy and intuitive Ruby job queue.
24
+ version_requirements: *70339103396180
25
+ description: An easy and intuitive Ruby job queue for working with parallel processes.
26
26
  email: tomvaneyck@gmail.com
27
27
  executables: []
28
28
  extensions: []
@@ -49,8 +49,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
49
49
  version: '0'
50
50
  requirements: []
51
51
  rubyforge_project:
52
- rubygems_version: 1.8.17
52
+ rubygems_version: 1.8.10
53
53
  signing_key:
54
54
  specification_version: 3
55
- summary: An easy and intuitive Ruby job queue.
55
+ summary: An easy and intuitive Ruby job queue for working with parallel processes.
56
56
  test_files: []