async-container 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e2613c963c3efb08cf02c6a6299668b2b9ed83b583bbe84ebc973fa099b62e4a
4
- data.tar.gz: 9cc9cd1ac55d85be9684e9a042be6ff3e93dd44e69c2ce212ec5f7441cbf7b1d
3
+ metadata.gz: 3b5ef8adc5ee828c6c044454180bf3a0de60eca666c4fa00a9c3f6b65c3ddff6
4
+ data.tar.gz: 778afef1c04f76a74dd03feecc0af119890875bb3dcfcfb4526556615ee789aa
5
5
  SHA512:
6
- metadata.gz: ca31eb9ecbdd3c343ec77621778bcf7cfba210b5a1b7ba05a02d0e590f3574c8e0e25a4bcb85be5a6af05a601874084a2fab94dd2c422073c588816c899c7f46
7
- data.tar.gz: 6ade195ca0f67619dbffee9b6f4dd6d567ccd05b2dd069075a10403c897806c1f33c3f44b06ef91a5ea5212b85c4cbf4c3a8f1ca4e02988a9559781c991e884f
6
+ metadata.gz: 0736e1e1e2cdbed60648aac53ecf2ab258436f08f44427cc1cbc516a24a584cc9cd82449129faaf6d71a1486a3a781240de2725f6fe57a0d369ec4b6e46df4c3
7
+ data.tar.gz: 330e8827656fdae8a1943768519a9642f8dccd598e7c036b7b22ecf0043757ddccbb2b5fd1592c26b6112ce43e4aa29541876fe494beca186cadde096e071381
checksums.yaml.gz.sig CHANGED
Binary file
@@ -21,6 +21,16 @@ module Async
21
21
  end
22
22
  end
23
23
 
24
+ # Similar to {Terminate}, but represents `SIGKILL`.
25
+ class Kill < SignalException
26
+ SIGKILL = Signal.list["KILL"]
27
+
28
+ # Create a new kill error.
29
+ def initialize
30
+ super(SIGKILL)
31
+ end
32
+ end
33
+
24
34
  # Similar to {Interrupt}, but represents `SIGHUP`.
25
35
  class Restart < SignalException
26
36
  SIGHUP = Signal.list["HUP"]
@@ -231,20 +231,25 @@ module Async
231
231
  # Wait for the child process to exit.
232
232
  # @asynchronous This method may block.
233
233
  #
234
+ # @parameter timeout [Numeric | Nil] Maximum time to wait before forceful termination.
234
235
  # @returns [::Process::Status] The process exit status.
235
- def wait
236
+ def wait(timeout = 0.1)
236
237
  if @pid && @status.nil?
237
238
  Console.debug(self, "Waiting for process to exit...", pid: @pid)
238
239
 
239
240
  _, @status = ::Process.wait2(@pid, ::Process::WNOHANG)
240
241
 
241
- while @status.nil?
242
- sleep(0.1)
242
+ if @status.nil?
243
+ sleep(timeout) if timeout
243
244
 
244
245
  _, @status = ::Process.wait2(@pid, ::Process::WNOHANG)
245
246
 
246
247
  if @status.nil?
247
- Console.warn(self) {"Process #{@pid} is blocking, has it exited?"}
248
+ Console.warn(self) {"Process #{@pid} is blocking, sending kill signal..."}
249
+ self.kill!
250
+
251
+ # Wait for the process to exit:
252
+ _, @status = ::Process.wait2(@pid)
248
253
  end
249
254
  end
250
255
  end
@@ -119,36 +119,78 @@ module Async
119
119
  end
120
120
  end
121
121
 
122
- # Stop all child processes using {#terminate}.
123
- # @parameter timeout [Boolean | Numeric | Nil] If specified, invoke a graceful shutdown using {#interrupt} first.
124
- def stop(timeout = 1)
125
- Console.debug(self, "Stopping all processes...", timeout: timeout)
126
- # Use a default timeout if not specified:
127
- timeout = 1 if timeout == true
122
+ # Kill all running processes.
123
+ # This resumes the controlling fiber with an instance of {Kill}.
124
+ def kill
125
+ Console.info(self, "Sending kill to #{@running.size} running processes...")
126
+ @running.each_value do |fiber|
127
+ fiber.resume(Kill)
128
+ end
129
+ end
130
+
131
+ private def wait_for_exit(clock, timeout)
132
+ while self.any?
133
+ duration = timeout - clock.total
134
+
135
+ if duration >= 0
136
+ self.wait_for_children(duration)
137
+ else
138
+ self.wait_for_children(0)
139
+ break
140
+ end
141
+ end
142
+ end
143
+
144
+ # Stop all child processes with a multi-phase shutdown sequence.
145
+ #
146
+ # A graceful shutdown performs the following sequence:
147
+ # 1. Send SIGINT and wait up to `interrupt_timeout` seconds
148
+ # 2. Send SIGTERM and wait up to `terminate_timeout` seconds
149
+ # 3. Send SIGKILL and wait indefinitely for process cleanup
150
+ #
151
+ # If `graceful` is false, skips the SIGINT phase and goes directly to SIGTERM → SIGKILL.
152
+ #
153
+ # @parameter graceful [Boolean] Whether to send SIGINT first or skip directly to SIGTERM.
154
+ # @parameter interrupt_timeout [Numeric | Nil] Time to wait after SIGINT before escalating to SIGTERM.
155
+ # @parameter terminate_timeout [Numeric | Nil] Time to wait after SIGTERM before escalating to SIGKILL.
156
+ def stop(graceful = true, interrupt_timeout: 1, terminate_timeout: 1)
157
+ case graceful
158
+ when true
159
+ # Use defaults.
160
+ when false
161
+ interrupt_timeout = nil
162
+ when Numeric
163
+ interrupt_timeout = graceful
164
+ terminate_timeout = graceful
165
+ end
128
166
 
129
- if timeout
130
- start_time = Async::Clock.now
167
+ Console.debug(self, "Stopping all processes...", interrupt_timeout: interrupt_timeout, terminate_timeout: terminate_timeout)
168
+
169
+ # If a timeout is specified, interrupt the children first:
170
+ if interrupt_timeout
171
+ clock = Async::Clock.start
131
172
 
173
+ # Interrupt the children:
132
174
  self.interrupt
133
175
 
134
- while self.any?
135
- duration = Async::Clock.now - start_time
136
- remaining = timeout - duration
137
-
138
- if remaining >= 0
139
- self.wait_for_children(duration)
140
- else
141
- self.wait_for_children(0)
142
- break
143
- end
144
- end
176
+ # Wait for the children to exit:
177
+ self.wait_for_exit(clock, interrupt_timeout)
145
178
  end
146
179
 
147
- # Terminate all children:
148
- self.terminate if any?
180
+ if terminate_timeout
181
+ clock = Async::Clock.start
182
+
183
+ # If the children are still running, terminate them:
184
+ self.terminate
185
+
186
+ # Wait for the children to exit:
187
+ self.wait_for_exit(clock, terminate_timeout)
188
+ end
149
189
 
150
- # Wait for all children to exit:
151
- self.wait
190
+ if any?
191
+ self.kill
192
+ self.wait
193
+ end
152
194
  end
153
195
 
154
196
  # Wait for a message in the specified {Channel}.
@@ -165,6 +207,8 @@ module Async
165
207
  channel.interrupt!
166
208
  elsif result == Terminate
167
209
  channel.terminate!
210
+ elsif result == Kill
211
+ channel.kill!
168
212
  elsif result
169
213
  yield result
170
214
  elsif message = channel.receive
@@ -184,7 +228,7 @@ module Async
184
228
  # This log is a big noisy and doesn't really provide a lot of useful information.
185
229
  # Console.debug(self, "Waiting for children...", duration: duration, running: @running)
186
230
 
187
- if !@running.empty?
231
+ unless @running.empty?
188
232
  # Maybe consider using a proper event loop here:
189
233
  if ready = self.select(duration)
190
234
  ready.each do |io|
@@ -216,10 +216,20 @@ module Async
216
216
  end
217
217
 
218
218
  # Wait for the thread to exit and return he exit status.
219
+ # @asynchronous This method may block.
220
+ #
221
+ # @parameter timeout [Numeric | Nil] Maximum time to wait before forceful termination.
219
222
  # @returns [Status]
220
- def wait
223
+ def wait(timeout = 0.1)
221
224
  if @waiter
222
- @waiter.join
225
+ Console.debug(self, "Waiting for thread to exit...", timeout: timeout)
226
+
227
+ unless @waiter.join(timeout)
228
+ Console.warn(self) {"Thread #{@thread} is blocking, sending kill signal..."}
229
+ self.kill!
230
+ @waiter.join
231
+ end
232
+
223
233
  @waiter = nil
224
234
  end
225
235
 
@@ -5,6 +5,6 @@
5
5
 
6
6
  module Async
7
7
  module Container
8
- VERSION = "0.25.0"
8
+ VERSION = "0.26.0"
9
9
  end
10
10
  end
data/readme.md CHANGED
@@ -26,6 +26,10 @@ Please see the [project documentation](https://socketry.github.io/async-containe
26
26
 
27
27
  Please see the [project releases](https://socketry.github.io/async-container/releases/index) for all releases.
28
28
 
29
+ ### v0.26.0
30
+
31
+ - [Production Reliability Improvements](https://socketry.github.io/async-container/releases/index#production-reliability-improvements)
32
+
29
33
  ### v0.25.0
30
34
 
31
35
  - Introduce `async:container:notify:log:ready?` task for detecting process readiness.
data/releases.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Releases
2
2
 
3
+ ## v0.26.0
4
+
5
+ ### Production Reliability Improvements
6
+
7
+ This release significantly improves container reliability by eliminating production hangs caused by unresponsive child processes.
8
+
9
+ **SIGKILL Fallback Support**: Containers now automatically escalate to SIGKILL when child processes ignore SIGINT and SIGTERM signals. This prevents the critical production issue where containers would hang indefinitely waiting for uncooperative processes to exit.
10
+
11
+ **Hang Prevention**: Individual child processes now have timeout-based hang prevention. If a process closes its notification pipe but doesn't actually exit, the container will detect this and escalate to SIGKILL after a reasonable timeout instead of hanging forever.
12
+
13
+ **Improved Three-Phase Shutdown**: The `Group#stop()` method now uses a cleaner interrupt → terminate → kill escalation sequence with configurable timeouts for each phase, giving well-behaved processes multiple opportunities to shut down gracefully while ensuring unresponsive processes are eventually terminated.
14
+
3
15
  ## v0.25.0
4
16
 
5
17
  - Introduce `async:container:notify:log:ready?` task for detecting process readiness.
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: async-container
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.0
4
+ version: 0.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
@@ -105,7 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
105
105
  - !ruby/object:Gem::Version
106
106
  version: '0'
107
107
  requirements: []
108
- rubygems_version: 3.6.7
108
+ rubygems_version: 3.6.9
109
109
  specification_version: 4
110
110
  summary: Abstract container-based parallelism using threads and processes where appropriate.
111
111
  test_files: []
metadata.gz.sig CHANGED
Binary file