async-container 0.25.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e2613c963c3efb08cf02c6a6299668b2b9ed83b583bbe84ebc973fa099b62e4a
4
- data.tar.gz: 9cc9cd1ac55d85be9684e9a042be6ff3e93dd44e69c2ce212ec5f7441cbf7b1d
3
+ metadata.gz: 7beb84adfd9648242dc7850eab94f4d080fea57d2ff218a02950c18008b0d2c5
4
+ data.tar.gz: c558b447f4f4273350e5be255a6916020592a747243a6f5a3a937bea114b905c
5
5
  SHA512:
6
- metadata.gz: ca31eb9ecbdd3c343ec77621778bcf7cfba210b5a1b7ba05a02d0e590f3574c8e0e25a4bcb85be5a6af05a601874084a2fab94dd2c422073c588816c899c7f46
7
- data.tar.gz: 6ade195ca0f67619dbffee9b6f4dd6d567ccd05b2dd069075a10403c897806c1f33c3f44b06ef91a5ea5212b85c4cbf4c3a8f1ca4e02988a9559781c991e884f
6
+ metadata.gz: 8b6bdc0e7268ef3c39f0591c9d3d4d5609b6a0e0cec112a69d6d0563e891af0caee0af78f98afa5dcf74f4cb0b8167e69f6afeb4c7ef7437a0f8717c0d1423d3
7
+ data.tar.gz: df8b60f1e3d9b4efb9b677605cc1dad130f8be806b282c4ef6522e809d62edc985432b49d1d2458e47d142e0a489a4c4d6db00cd0194cca5af386a2addcbd5c5
checksums.yaml.gz.sig CHANGED
Binary file
@@ -21,6 +21,16 @@ module Async
21
21
  end
22
22
  end
23
23
 
24
+ # Similar to {Terminate}, but represents `SIGKILL`.
25
+ class Kill < SignalException
26
+ SIGKILL = Signal.list["KILL"]
27
+
28
+ # Create a new kill error.
29
+ def initialize
30
+ super(SIGKILL)
31
+ end
32
+ end
33
+
24
34
  # Similar to {Interrupt}, but represents `SIGHUP`.
25
35
  class Restart < SignalException
26
36
  SIGHUP = Signal.list["HUP"]
@@ -231,20 +231,25 @@ module Async
231
231
  # Wait for the child process to exit.
232
232
  # @asynchronous This method may block.
233
233
  #
234
+ # @parameter timeout [Numeric | Nil] Maximum time to wait before forceful termination.
234
235
  # @returns [::Process::Status] The process exit status.
235
- def wait
236
+ def wait(timeout = 0.1)
236
237
  if @pid && @status.nil?
237
238
  Console.debug(self, "Waiting for process to exit...", pid: @pid)
238
239
 
239
240
  _, @status = ::Process.wait2(@pid, ::Process::WNOHANG)
240
241
 
241
- while @status.nil?
242
- sleep(0.1)
242
+ if @status.nil?
243
+ sleep(timeout) if timeout
243
244
 
244
245
  _, @status = ::Process.wait2(@pid, ::Process::WNOHANG)
245
246
 
246
247
  if @status.nil?
247
- Console.warn(self) {"Process #{@pid} is blocking, has it exited?"}
248
+ Console.warn(self) {"Process #{@pid} is blocking, sending kill signal..."}
249
+ self.kill!
250
+
251
+ # Wait for the process to exit:
252
+ _, @status = ::Process.wait2(@pid)
248
253
  end
249
254
  end
250
255
  end
@@ -10,6 +10,12 @@ require_relative "error"
10
10
 
11
11
  module Async
12
12
  module Container
13
+ # The default timeout for interrupting processes, before escalating to terminating.
14
+ INTERRUPT_TIMEOUT = ENV.fetch("ASYNC_CONTAINER_INTERRUPT_TIMEOUT", 10).to_f
15
+
16
+ # The default timeout for terminating processes, before escalating to killing.
17
+ TERMINATE_TIMEOUT = ENV.fetch("ASYNC_CONTAINER_TERMINATE_TIMEOUT", 10).to_f
18
+
13
19
  # Manages a group of running processes.
14
20
  class Group
15
21
  # Initialize an empty group.
@@ -119,36 +125,78 @@ module Async
119
125
  end
120
126
  end
121
127
 
122
- # Stop all child processes using {#terminate}.
123
- # @parameter timeout [Boolean | Numeric | Nil] If specified, invoke a graceful shutdown using {#interrupt} first.
124
- def stop(timeout = 1)
125
- Console.debug(self, "Stopping all processes...", timeout: timeout)
126
- # Use a default timeout if not specified:
127
- timeout = 1 if timeout == true
128
+ # Kill all running processes.
129
+ # This resumes the controlling fiber with an instance of {Kill}.
130
+ def kill
131
+ Console.info(self, "Sending kill to #{@running.size} running processes...")
132
+ @running.each_value do |fiber|
133
+ fiber.resume(Kill)
134
+ end
135
+ end
136
+
137
+ private def wait_for_exit(clock, timeout)
138
+ while self.any?
139
+ duration = timeout - clock.total
140
+
141
+ if duration >= 0
142
+ self.wait_for_children(duration)
143
+ else
144
+ self.wait_for_children(0)
145
+ break
146
+ end
147
+ end
148
+ end
149
+
150
+ # Stop all child processes with a multi-phase shutdown sequence.
151
+ #
152
+ # A graceful shutdown performs the following sequence:
153
+ # 1. Send SIGINT and wait up to `interrupt_timeout` seconds
154
+ # 2. Send SIGTERM and wait up to `terminate_timeout` seconds
155
+ # 3. Send SIGKILL and wait indefinitely for process cleanup
156
+ #
157
+ # If `graceful` is false, skips the SIGINT phase and goes directly to SIGTERM → SIGKILL.
158
+ #
159
+ # @parameter graceful [Boolean] Whether to send SIGINT first or skip directly to SIGTERM.
160
+ # @parameter interrupt_timeout [Numeric | Nil] Time to wait after SIGINT before escalating to SIGTERM.
161
+ # @parameter terminate_timeout [Numeric | Nil] Time to wait after SIGTERM before escalating to SIGKILL.
162
+ def stop(graceful = true, interrupt_timeout: INTERRUPT_TIMEOUT, terminate_timeout: TERMINATE_TIMEOUT)
163
+ case graceful
164
+ when true
165
+ # Use defaults.
166
+ when false
167
+ interrupt_timeout = nil
168
+ when Numeric
169
+ interrupt_timeout = graceful
170
+ terminate_timeout = graceful
171
+ end
128
172
 
129
- if timeout
130
- start_time = Async::Clock.now
173
+ Console.debug(self, "Stopping all processes...", interrupt_timeout: interrupt_timeout, terminate_timeout: terminate_timeout)
174
+
175
+ # If a timeout is specified, interrupt the children first:
176
+ if interrupt_timeout
177
+ clock = Async::Clock.start
131
178
 
179
+ # Interrupt the children:
132
180
  self.interrupt
133
181
 
134
- while self.any?
135
- duration = Async::Clock.now - start_time
136
- remaining = timeout - duration
137
-
138
- if remaining >= 0
139
- self.wait_for_children(duration)
140
- else
141
- self.wait_for_children(0)
142
- break
143
- end
144
- end
182
+ # Wait for the children to exit:
183
+ self.wait_for_exit(clock, interrupt_timeout)
145
184
  end
146
185
 
147
- # Terminate all children:
148
- self.terminate if any?
186
+ if terminate_timeout
187
+ clock = Async::Clock.start
188
+
189
+ # If the children are still running, terminate them:
190
+ self.terminate
191
+
192
+ # Wait for the children to exit:
193
+ self.wait_for_exit(clock, terminate_timeout)
194
+ end
149
195
 
150
- # Wait for all children to exit:
151
- self.wait
196
+ if any?
197
+ self.kill
198
+ self.wait
199
+ end
152
200
  end
153
201
 
154
202
  # Wait for a message in the specified {Channel}.
@@ -165,6 +213,8 @@ module Async
165
213
  channel.interrupt!
166
214
  elsif result == Terminate
167
215
  channel.terminate!
216
+ elsif result == Kill
217
+ channel.kill!
168
218
  elsif result
169
219
  yield result
170
220
  elsif message = channel.receive
@@ -184,7 +234,7 @@ module Async
184
234
  # This log is a big noisy and doesn't really provide a lot of useful information.
185
235
  # Console.debug(self, "Waiting for children...", duration: duration, running: @running)
186
236
 
187
- if !@running.empty?
237
+ unless @running.empty?
188
238
  # Maybe consider using a proper event loop here:
189
239
  if ready = self.select(duration)
190
240
  ready.each do |io|
@@ -216,10 +216,20 @@ module Async
216
216
  end
217
217
 
218
218
  # Wait for the thread to exit and return he exit status.
219
+ # @asynchronous This method may block.
220
+ #
221
+ # @parameter timeout [Numeric | Nil] Maximum time to wait before forceful termination.
219
222
  # @returns [Status]
220
- def wait
223
+ def wait(timeout = 0.1)
221
224
  if @waiter
222
- @waiter.join
225
+ Console.debug(self, "Waiting for thread to exit...", timeout: timeout)
226
+
227
+ unless @waiter.join(timeout)
228
+ Console.warn(self) {"Thread #{@thread} is blocking, sending kill signal..."}
229
+ self.kill!
230
+ @waiter.join
231
+ end
232
+
223
233
  @waiter = nil
224
234
  end
225
235
 
@@ -5,6 +5,6 @@
5
5
 
6
6
  module Async
7
7
  module Container
8
- VERSION = "0.25.0"
8
+ VERSION = "0.27.0"
9
9
  end
10
10
  end
data/readme.md CHANGED
@@ -26,6 +26,15 @@ Please see the [project documentation](https://socketry.github.io/async-containe
26
26
 
27
27
  Please see the [project releases](https://socketry.github.io/async-container/releases/index) for all releases.
28
28
 
29
+ ### v0.27.0
30
+
31
+ - Increased default interrupt timeout and terminate timeout to 10 seconds each.
32
+ - Expose `ASYNC_CONTAINER_INTERRUPT_TIMEOUT` and `ASYNC_CONTAINER_TERMINATE_TIMEOUT` environment variables for configuring default timeouts.
33
+
34
+ ### v0.26.0
35
+
36
+ - [Production Reliability Improvements](https://socketry.github.io/async-container/releases/index#production-reliability-improvements)
37
+
29
38
  ### v0.25.0
30
39
 
31
40
  - Introduce `async:container:notify:log:ready?` task for detecting process readiness.
data/releases.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # Releases
2
2
 
3
+ ## v0.27.0
4
+
5
+ - Increased default interrupt timeout and terminate timeout to 10 seconds each.
6
+ - Expose `ASYNC_CONTAINER_INTERRUPT_TIMEOUT` and `ASYNC_CONTAINER_TERMINATE_TIMEOUT` environment variables for configuring default timeouts.
7
+
8
+ ## v0.26.0
9
+
10
+ ### Production Reliability Improvements
11
+
12
+ This release significantly improves container reliability by eliminating production hangs caused by unresponsive child processes.
13
+
14
+ **SIGKILL Fallback Support**: Containers now automatically escalate to SIGKILL when child processes ignore SIGINT and SIGTERM signals. This prevents the critical production issue where containers would hang indefinitely waiting for uncooperative processes to exit.
15
+
16
+ **Hang Prevention**: Individual child processes now have timeout-based hang prevention. If a process closes its notification pipe but doesn't actually exit, the container will detect this and escalate to SIGKILL after a reasonable timeout instead of hanging forever.
17
+
18
+ **Improved Three-Phase Shutdown**: The `Group#stop()` method now uses a cleaner interrupt → terminate → kill escalation sequence with configurable timeouts for each phase, giving well-behaved processes multiple opportunities to shut down gracefully while ensuring unresponsive processes are eventually terminated.
19
+
3
20
  ## v0.25.0
4
21
 
5
22
  - Introduce `async:container:notify:log:ready?` task for detecting process readiness.
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: async-container
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.0
4
+ version: 0.27.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
@@ -105,7 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
105
105
  - !ruby/object:Gem::Version
106
106
  version: '0'
107
107
  requirements: []
108
- rubygems_version: 3.6.7
108
+ rubygems_version: 3.6.9
109
109
  specification_version: 4
110
110
  summary: Abstract container-based parallelism using threads and processes where appropriate.
111
111
  test_files: []
metadata.gz.sig CHANGED
Binary file