async-container 0.29.1 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/async/container/controller.rb +19 -8
- data/lib/async/container/forked.rb +2 -2
- data/lib/async/container/generic.rb +48 -16
- data/lib/async/container/group.rb +34 -38
- data/lib/async/container/notify/pipe.rb +1 -1
- data/lib/async/container/notify/server.rb +9 -0
- data/lib/async/container/policy.rb +95 -0
- data/lib/async/container/statistics.rb +77 -1
- data/lib/async/container/threaded.rb +1 -1
- data/lib/async/container/version.rb +1 -1
- data/readme.md +13 -8
- data/releases.md +11 -0
- data.tar.gz.sig +0 -0
- metadata +2 -1
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e15ecc1c913a50b101dae8f01ec2086eee565be48c684eaee110d80f5ffa5001
|
|
4
|
+
data.tar.gz: a30a269c56373cb3ab073e1facd9c375f41f5169fdf6a1570e0748165af26e87
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b75e11606c5a2c9878f41564cc1523dcad05b30589281736060a58d2ad0ffc04bb4523d87fcba8183d48427100886bff1e20256a9b9d94729b80744dacbb8784
|
|
7
|
+
data.tar.gz: 9d07035b6912a654d38d9a0b3810d01451384c19f5c1f3ff967d41a47b99c06ca718e2f6a25e79a3cd4a6ff580d1484a97c9e4692b8b708a8eeb8aa09e2a9d98
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
|
@@ -8,6 +8,7 @@ require_relative "best"
|
|
|
8
8
|
|
|
9
9
|
require_relative "statistics"
|
|
10
10
|
require_relative "notify"
|
|
11
|
+
require_relative "policy"
|
|
11
12
|
|
|
12
13
|
module Async
|
|
13
14
|
module Container
|
|
@@ -62,11 +63,18 @@ module Async
|
|
|
62
63
|
# The current container being managed by the controller.
|
|
63
64
|
attr :container
|
|
64
65
|
|
|
66
|
+
# Create a policy for managing child lifecycle events.
|
|
67
|
+
# Can be overridden by a sub-class to provide a custom policy.
|
|
68
|
+
# @returns [Policy] The policy to use for the container.
|
|
69
|
+
def make_policy
|
|
70
|
+
Policy::DEFAULT
|
|
71
|
+
end
|
|
72
|
+
|
|
65
73
|
# Create a container for the controller.
|
|
66
74
|
# Can be overridden by a sub-class.
|
|
67
75
|
# @returns [Generic] A specific container instance to use.
|
|
68
76
|
def create_container
|
|
69
|
-
@container_class.new
|
|
77
|
+
@container_class.new(policy: self.make_policy)
|
|
70
78
|
end
|
|
71
79
|
|
|
72
80
|
# Whether the controller has a running container.
|
|
@@ -134,9 +142,6 @@ module Async
|
|
|
134
142
|
if container.failed?
|
|
135
143
|
@notify&.error!("Container failed to start!")
|
|
136
144
|
|
|
137
|
-
Console.info(self, "Stopping failed container...")
|
|
138
|
-
container.stop(false)
|
|
139
|
-
|
|
140
145
|
raise SetupError, container
|
|
141
146
|
end
|
|
142
147
|
|
|
@@ -151,9 +156,14 @@ module Async
|
|
|
151
156
|
end
|
|
152
157
|
|
|
153
158
|
@notify&.ready!(size: @container.size)
|
|
159
|
+
rescue => error
|
|
160
|
+
raise
|
|
154
161
|
ensure
|
|
155
|
-
# If we are leaving this function with an exception,
|
|
156
|
-
container
|
|
162
|
+
# If we are leaving this function with an exception, kill the container:
|
|
163
|
+
if container
|
|
164
|
+
Console.warn(self, "Stopping failed container...", exception: error)
|
|
165
|
+
container.stop(false)
|
|
166
|
+
end
|
|
157
167
|
end
|
|
158
168
|
|
|
159
169
|
# Reload the existing container. Children instances will be reloaded using `SIGHUP`.
|
|
@@ -222,9 +232,10 @@ module Async
|
|
|
222
232
|
::Thread.current.raise(Interrupt)
|
|
223
233
|
end
|
|
224
234
|
|
|
235
|
+
# SIGTERM behaves the same as SIGINT by default.
|
|
225
236
|
terminate_action = Signal.trap(:TERM) do
|
|
226
|
-
# $stderr.puts "Received TERM signal,
|
|
227
|
-
::Thread.current.raise(
|
|
237
|
+
# $stderr.puts "Received TERM signal, interrupting...", caller
|
|
238
|
+
::Thread.current.raise(Interrupt) # Same as SIGINT
|
|
228
239
|
end
|
|
229
240
|
|
|
230
241
|
hangup_action = Signal.trap(:HUP) do
|
|
@@ -102,7 +102,7 @@ module Async
|
|
|
102
102
|
::Process.fork do
|
|
103
103
|
# We use `Thread.current.raise(...)` so that exceptions are filtered through `Thread.handle_interrupt` correctly.
|
|
104
104
|
Signal.trap(:INT){::Thread.current.raise(Interrupt)}
|
|
105
|
-
Signal.trap(:TERM){::Thread.current.raise(
|
|
105
|
+
Signal.trap(:TERM){::Thread.current.raise(Interrupt)} # Same as SIGINT.
|
|
106
106
|
Signal.trap(:HUP){::Thread.current.raise(Restart)}
|
|
107
107
|
|
|
108
108
|
# This could be a configuration option:
|
|
@@ -245,7 +245,7 @@ module Async
|
|
|
245
245
|
_, @status = ::Process.wait2(@pid, ::Process::WNOHANG)
|
|
246
246
|
|
|
247
247
|
if @status.nil?
|
|
248
|
-
Console.warn(self, "Process is blocking, sending kill signal...", child: {process_id: @pid},
|
|
248
|
+
Console.warn(self, "Process is blocking, sending kill signal...", child: {process_id: @pid}, timeout: timeout)
|
|
249
249
|
self.kill!
|
|
250
250
|
|
|
251
251
|
# Wait for the process to exit:
|
|
@@ -10,6 +10,7 @@ require "async/clock"
|
|
|
10
10
|
require_relative "group"
|
|
11
11
|
require_relative "keyed"
|
|
12
12
|
require_relative "statistics"
|
|
13
|
+
require_relative "policy"
|
|
13
14
|
|
|
14
15
|
module Async
|
|
15
16
|
module Container
|
|
@@ -42,8 +43,9 @@ module Async
|
|
|
42
43
|
|
|
43
44
|
# Initialize the container.
|
|
44
45
|
#
|
|
46
|
+
# @parameter policy [Policy] The policy to use for managing child lifecycle events.
|
|
45
47
|
# @parameter options [Hash] Options passed to the {Group} instance.
|
|
46
|
-
def initialize(**options)
|
|
48
|
+
def initialize(policy: Policy::DEFAULT, **options)
|
|
47
49
|
@group = Group.new(**options)
|
|
48
50
|
@running = true
|
|
49
51
|
|
|
@@ -51,6 +53,7 @@ module Async
|
|
|
51
53
|
|
|
52
54
|
@statistics = Statistics.new
|
|
53
55
|
@keyed = {}
|
|
56
|
+
@policy = policy
|
|
54
57
|
end
|
|
55
58
|
|
|
56
59
|
# @attribute [Group] The group of running children instances.
|
|
@@ -64,6 +67,9 @@ module Async
|
|
|
64
67
|
# @attribute [Hash(Child, Hash)] The state of each child instance.
|
|
65
68
|
attr :state
|
|
66
69
|
|
|
70
|
+
# @attribute [Policy] The policy for managing child lifecycle events.
|
|
71
|
+
attr_accessor :policy
|
|
72
|
+
|
|
67
73
|
# A human readable representation of the container.
|
|
68
74
|
# @returns [String]
|
|
69
75
|
def to_s
|
|
@@ -126,7 +132,7 @@ module Async
|
|
|
126
132
|
self.sleep
|
|
127
133
|
|
|
128
134
|
if self.status?(:ready)
|
|
129
|
-
Console.
|
|
135
|
+
Console.debug(self) do |buffer|
|
|
130
136
|
buffer.puts "All ready:"
|
|
131
137
|
@state.each do |child, state|
|
|
132
138
|
buffer.puts "\t#{child.inspect}: #{state}"
|
|
@@ -141,7 +147,7 @@ module Async
|
|
|
141
147
|
# Stop the children instances.
|
|
142
148
|
# @parameter timeout [Boolean | Numeric] Whether to stop gracefully, or a specific timeout.
|
|
143
149
|
def stop(timeout = true)
|
|
144
|
-
Console.
|
|
150
|
+
Console.info(self, "Stopping container...", timeout: timeout)
|
|
145
151
|
@running = false
|
|
146
152
|
@group.stop(timeout)
|
|
147
153
|
|
|
@@ -157,18 +163,30 @@ module Async
|
|
|
157
163
|
@running = true
|
|
158
164
|
end
|
|
159
165
|
|
|
160
|
-
protected def health_check_failed
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
166
|
+
protected def health_check_failed(child, age_clock, health_check_timeout)
|
|
167
|
+
begin
|
|
168
|
+
@policy.health_check_failed(
|
|
169
|
+
self, child,
|
|
170
|
+
age: age_clock.total,
|
|
171
|
+
timeout: health_check_timeout
|
|
172
|
+
)
|
|
173
|
+
rescue => error
|
|
174
|
+
Console.error(self, "Policy error in health_check_failed!", exception: error)
|
|
175
|
+
child.kill!
|
|
176
|
+
end
|
|
165
177
|
end
|
|
166
178
|
|
|
167
|
-
protected def startup_failed
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
179
|
+
protected def startup_failed(child, age_clock, startup_timeout)
|
|
180
|
+
begin
|
|
181
|
+
@policy.startup_failed(
|
|
182
|
+
self, child,
|
|
183
|
+
age: age_clock.total,
|
|
184
|
+
timeout: startup_timeout
|
|
185
|
+
)
|
|
186
|
+
rescue => error
|
|
187
|
+
Console.error(self, "Policy error in startup_failed!", exception: error)
|
|
188
|
+
child.kill!
|
|
189
|
+
end
|
|
172
190
|
end
|
|
173
191
|
|
|
174
192
|
# Spawn a child instance into the container.
|
|
@@ -194,6 +212,13 @@ module Async
|
|
|
194
212
|
child = self.start(name, &block)
|
|
195
213
|
state = insert(key, child)
|
|
196
214
|
|
|
215
|
+
# Notify policy of spawn
|
|
216
|
+
begin
|
|
217
|
+
@policy.child_spawn(self, child, name: name, key: key)
|
|
218
|
+
rescue => error
|
|
219
|
+
Console.error(self, "Policy error in child_spawn!", exception: error)
|
|
220
|
+
end
|
|
221
|
+
|
|
197
222
|
Console.debug(self, "Started child.", child: child, spawn: {key: key, restart: restart, health_check_timeout: health_check_timeout}, statistics: @statistics)
|
|
198
223
|
|
|
199
224
|
# If a health check or startup timeout is specified, we will monitor the child process and terminate it if it does not update its state within the specified time.
|
|
@@ -211,14 +236,14 @@ module Async
|
|
|
211
236
|
# If a health check timeout is specified, we will monitor the child process and terminate it if it does not update its state within the specified time.
|
|
212
237
|
if health_check_timeout
|
|
213
238
|
if health_check_timeout < age_clock.total
|
|
214
|
-
health_check_failed
|
|
239
|
+
health_check_failed(child, age_clock, health_check_timeout)
|
|
215
240
|
end
|
|
216
241
|
end
|
|
217
242
|
else
|
|
218
243
|
# If a startup timeout is specified, we will monitor the child process and terminate it if it does not become ready within the specified time.
|
|
219
244
|
if startup_timeout
|
|
220
245
|
if startup_timeout < age_clock.total
|
|
221
|
-
startup_failed
|
|
246
|
+
startup_failed(child, age_clock, startup_timeout)
|
|
222
247
|
end
|
|
223
248
|
end
|
|
224
249
|
end
|
|
@@ -237,6 +262,13 @@ module Async
|
|
|
237
262
|
delete(key, child)
|
|
238
263
|
end
|
|
239
264
|
|
|
265
|
+
# Notify policy of exit
|
|
266
|
+
begin
|
|
267
|
+
@policy.child_exit(self, child, status: status, name: name, key: key)
|
|
268
|
+
rescue => error
|
|
269
|
+
Console.error(self, "Policy error in child_exit!", exception: error)
|
|
270
|
+
end
|
|
271
|
+
|
|
240
272
|
if status&.success?
|
|
241
273
|
Console.debug(self, "Child exited successfully.", status: status, running: @running)
|
|
242
274
|
else
|
|
@@ -244,7 +276,7 @@ module Async
|
|
|
244
276
|
Console.error(self, "Child exited with error!", status: status, running: @running)
|
|
245
277
|
end
|
|
246
278
|
|
|
247
|
-
if restart
|
|
279
|
+
if restart && @running
|
|
248
280
|
@statistics.restart!
|
|
249
281
|
else
|
|
250
282
|
break
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
# Released under the MIT License.
|
|
4
|
-
# Copyright, 2018-
|
|
4
|
+
# Copyright, 2018-2026, by Samuel Williams.
|
|
5
5
|
|
|
6
6
|
require "fiber"
|
|
7
7
|
require "async/clock"
|
|
@@ -10,11 +10,20 @@ require_relative "error"
|
|
|
10
10
|
|
|
11
11
|
module Async
|
|
12
12
|
module Container
|
|
13
|
-
# The default timeout for interrupting processes, before escalating to terminating.
|
|
14
|
-
INTERRUPT_TIMEOUT = ENV.fetch("ASYNC_CONTAINER_INTERRUPT_TIMEOUT", 10).to_f
|
|
15
|
-
|
|
16
13
|
# The default timeout for terminating processes, before escalating to killing.
|
|
17
|
-
|
|
14
|
+
GRACEFUL_TIMEOUT = ENV.fetch("ASYNC_CONTAINER_GRACEFUL_TIMEOUT", "true").then do |value|
|
|
15
|
+
case value
|
|
16
|
+
when "true"
|
|
17
|
+
true # Default timeout for graceful termination.
|
|
18
|
+
when "false"
|
|
19
|
+
false # Immediately kill the processes.
|
|
20
|
+
else
|
|
21
|
+
value.to_f
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# The default timeout for graceful termination.
|
|
26
|
+
DEFAULT_GRACEFUL_TIMEOUT = 10.0
|
|
18
27
|
|
|
19
28
|
# Manages a group of running processes.
|
|
20
29
|
class Group
|
|
@@ -155,50 +164,37 @@ module Async
|
|
|
155
164
|
# Stop all child processes with a multi-phase shutdown sequence.
|
|
156
165
|
#
|
|
157
166
|
# A graceful shutdown performs the following sequence:
|
|
158
|
-
# 1. Send SIGINT and wait up to `
|
|
159
|
-
# 2. Send
|
|
160
|
-
# 3. Send SIGKILL and wait indefinitely for process cleanup
|
|
167
|
+
# 1. Send SIGINT and wait up to `graceful` seconds if specified.
|
|
168
|
+
# 2. Send SIGKILL and wait indefinitely for process cleanup.
|
|
161
169
|
#
|
|
162
|
-
# If `graceful` is
|
|
170
|
+
# If `graceful` is true, default to `DEFAULT_GRACEFUL_TIMEOUT` (10 seconds).
|
|
171
|
+
# If `graceful` is false, skip the SIGINT phase and go directly to SIGKILL.
|
|
163
172
|
#
|
|
164
|
-
# @parameter graceful [Boolean] Whether to send SIGINT first or skip directly to
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def stop(graceful = true, interrupt_timeout: INTERRUPT_TIMEOUT, terminate_timeout: TERMINATE_TIMEOUT)
|
|
168
|
-
case graceful
|
|
169
|
-
when true
|
|
170
|
-
# Use defaults.
|
|
171
|
-
when false
|
|
172
|
-
interrupt_timeout = nil
|
|
173
|
-
when Numeric
|
|
174
|
-
interrupt_timeout = graceful
|
|
175
|
-
terminate_timeout = graceful
|
|
176
|
-
end
|
|
177
|
-
|
|
178
|
-
Console.debug(self, "Stopping all processes...", interrupt_timeout: interrupt_timeout, terminate_timeout: terminate_timeout)
|
|
173
|
+
# @parameter graceful [Boolean | Numeric] Whether to send SIGINT first or skip directly to SIGKILL.
|
|
174
|
+
def stop(graceful = GRACEFUL_TIMEOUT)
|
|
175
|
+
Console.debug(self, "Stopping all processes...", graceful: graceful)
|
|
179
176
|
|
|
180
177
|
# If a timeout is specified, interrupt the children first:
|
|
181
|
-
if
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
# Interrupt the children:
|
|
178
|
+
if graceful
|
|
179
|
+
# Send SIGINT to the children:
|
|
185
180
|
self.interrupt
|
|
186
181
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
if terminate_timeout and self.any?
|
|
192
|
-
clock = Async::Clock.start
|
|
182
|
+
if graceful == true
|
|
183
|
+
graceful = DEFAULT_GRACEFUL_TIMEOUT
|
|
184
|
+
end
|
|
193
185
|
|
|
194
|
-
|
|
195
|
-
self.terminate
|
|
186
|
+
clock = Clock.start
|
|
196
187
|
|
|
197
188
|
# Wait for the children to exit:
|
|
198
|
-
self.wait_for_exit(clock,
|
|
189
|
+
self.wait_for_exit(clock, graceful)
|
|
199
190
|
end
|
|
200
|
-
|
|
191
|
+
ensure
|
|
192
|
+
# Do our best to clean up the children:
|
|
201
193
|
if any?
|
|
194
|
+
if graceful
|
|
195
|
+
Console.warn(self, "Killing processes after graceful shutdown failed...", size: self.size, clock: clock)
|
|
196
|
+
end
|
|
197
|
+
|
|
202
198
|
self.kill
|
|
203
199
|
self.wait
|
|
204
200
|
end
|
|
@@ -129,6 +129,15 @@ module Async
|
|
|
129
129
|
end
|
|
130
130
|
end
|
|
131
131
|
end
|
|
132
|
+
|
|
133
|
+
# Wait until a "ready" message is received from the child process.
|
|
134
|
+
def wait_until_ready
|
|
135
|
+
while message = receive
|
|
136
|
+
if message[:ready] == true
|
|
137
|
+
return
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
132
141
|
end
|
|
133
142
|
end
|
|
134
143
|
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Released under the MIT License.
|
|
4
|
+
# Copyright, 2026, by Samuel Williams.
|
|
5
|
+
|
|
6
|
+
module Async
|
|
7
|
+
module Container
|
|
8
|
+
# A policy for managing container behavior and responding to child process lifecycle events.
|
|
9
|
+
class Policy
|
|
10
|
+
# Called when a child is spawned.
|
|
11
|
+
# @parameter container [Generic] The container.
|
|
12
|
+
# @parameter child [Child] The child process.
|
|
13
|
+
# @parameter name [String] The name of the child.
|
|
14
|
+
# @parameter key [Symbol] An optional key for the child.
|
|
15
|
+
def child_spawn(container, child, name:, key:)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Called when a child exits.
|
|
19
|
+
# @parameter container [Generic] The container.
|
|
20
|
+
# @parameter child [Child] The child process.
|
|
21
|
+
# @parameter status [Process::Status] The exit status.
|
|
22
|
+
# @parameter name [String] The name of the child.
|
|
23
|
+
# @parameter key [Symbol] An optional key for the child.
|
|
24
|
+
def child_exit(container, child, status:, name:, key:)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Called when a health check fails.
|
|
28
|
+
# Subclasses can override to implement custom behavior (e.g., alerting before killing).
|
|
29
|
+
# @parameter container [Generic] The container.
|
|
30
|
+
# @parameter child [Child] The child process.
|
|
31
|
+
# @parameter age [Numeric] How long the child has been running.
|
|
32
|
+
# @parameter timeout [Numeric] The health check timeout that was exceeded.
|
|
33
|
+
def health_check_failed(container, child, age:, timeout:)
|
|
34
|
+
Console.warn(container, "Health check failed!", child: child, age: age, timeout: timeout)
|
|
35
|
+
child.kill!
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Called when startup fails (child doesn't become ready in time).
|
|
39
|
+
# Subclasses can override to implement custom behavior (e.g., alerting before killing).
|
|
40
|
+
# @parameter container [Generic] The container.
|
|
41
|
+
# @parameter child [Child] The child process.
|
|
42
|
+
# @parameter age [Numeric] How long the child has been running.
|
|
43
|
+
# @parameter timeout [Numeric] The startup timeout that was exceeded.
|
|
44
|
+
def startup_failed(container, child, age:, timeout:)
|
|
45
|
+
Console.warn(container, "Startup failed!", child: child, age: age, timeout: timeout)
|
|
46
|
+
child.kill!
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Helper method to check if a status indicates a segfault.
|
|
50
|
+
# @parameter status [Process::Status] The exit status.
|
|
51
|
+
# @returns [Boolean] Whether the process was terminated by SIGSEGV.
|
|
52
|
+
def segfault?(status)
|
|
53
|
+
status&.termsig == Signal.list["SEGV"]
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Helper method to check if a status indicates an abort.
|
|
57
|
+
# @parameter status [Process::Status] The exit status.
|
|
58
|
+
# @returns [Boolean] Whether the process was terminated by SIGABRT.
|
|
59
|
+
def abort?(status)
|
|
60
|
+
status&.termsig == Signal.list["ABRT"]
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Helper method to check if a status indicates the process was killed.
|
|
64
|
+
# @parameter status [Process::Status] The exit status.
|
|
65
|
+
# @returns [Boolean] Whether the process was terminated by SIGKILL.
|
|
66
|
+
def killed?(status)
|
|
67
|
+
status&.termsig == Signal.list["KILL"]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Helper method to check if a status indicates success.
|
|
71
|
+
# @parameter status [Process::Status] The exit status.
|
|
72
|
+
# @returns [Boolean] Whether the process exited successfully.
|
|
73
|
+
def success?(status)
|
|
74
|
+
status&.success?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Helper method to get the signal that terminated the process.
|
|
78
|
+
# @parameter status [Process::Status] The exit status.
|
|
79
|
+
# @returns [Integer, nil] The signal number, or nil if not terminated by signal.
|
|
80
|
+
def signal(status)
|
|
81
|
+
status&.termsig
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Helper method to get the exit code.
|
|
85
|
+
# @parameter status [Process::Status] The exit status.
|
|
86
|
+
# @returns [Integer, nil] The exit code, or nil if terminated by signal.
|
|
87
|
+
def exit_code(status)
|
|
88
|
+
status&.exitstatus
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# The default policy instance.
|
|
92
|
+
DEFAULT = self.new.freeze
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -9,11 +9,75 @@ module Async
|
|
|
9
9
|
module Container
|
|
10
10
|
# Tracks various statistics relating to child instances in a container.
|
|
11
11
|
class Statistics
|
|
12
|
+
# Tracks rate information over a sliding time window using a circular buffer.
|
|
13
|
+
class Rate
|
|
14
|
+
# Initialize the event rate counter.
|
|
15
|
+
#
|
|
16
|
+
# @parameter window [Integer] The time window in seconds for rate calculations.
|
|
17
|
+
def initialize(window: 60)
|
|
18
|
+
@window = window
|
|
19
|
+
@samples = [0] * @window
|
|
20
|
+
@last_update = Array.new(@window, 0)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Get the current time in seconds.
|
|
24
|
+
# @returns [Integer] The current monotonic time in seconds.
|
|
25
|
+
def now
|
|
26
|
+
::Process.clock_gettime(::Process::CLOCK_MONOTONIC).to_i
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Add a value to the current time slot.
|
|
30
|
+
# @parameter value [Numeric] The value to add (default: 1)
|
|
31
|
+
# @parameter time [Integer] The current time in seconds (default: monotonic time)
|
|
32
|
+
def add(value = 1, time: self.now)
|
|
33
|
+
index = time % @samples.size
|
|
34
|
+
|
|
35
|
+
# If this slot hasn't been updated in a full window cycle, reset it
|
|
36
|
+
if (time - @last_update[index]) >= @window
|
|
37
|
+
@samples[index] = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
@samples[index] += value
|
|
41
|
+
@last_update[index] = time
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Get the total count in the current window.
|
|
45
|
+
# @parameter time [Integer] The current time in seconds (default: monotonic time)
|
|
46
|
+
# @returns [Numeric] The sum of all samples in the window.
|
|
47
|
+
def total(time: self.now)
|
|
48
|
+
@samples.each_with_index.sum do |value, index|
|
|
49
|
+
# Only count samples that are within the window (inclusive of window boundary)
|
|
50
|
+
if (time - @last_update[index]) <= @window
|
|
51
|
+
value
|
|
52
|
+
else
|
|
53
|
+
0
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Get the rate per second over the window.
|
|
59
|
+
# @parameter time [Integer] The current time in seconds (default: monotonic time)
|
|
60
|
+
# @returns [Float] The average rate per second.
|
|
61
|
+
def per_second(time: self.now)
|
|
62
|
+
total(time: time).to_f / @window
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Get the rate per minute over the window.
|
|
66
|
+
# @parameter time [Integer] The current time in seconds (default: monotonic time)
|
|
67
|
+
# @returns [Float] The average rate per minute.
|
|
68
|
+
def per_minute(time: self.now)
|
|
69
|
+
per_second(time: time) * 60
|
|
70
|
+
end
|
|
71
|
+
end
|
|
12
72
|
# Initialize the statistics all to 0.
|
|
13
|
-
|
|
73
|
+
# @parameter window [Integer] The time window in seconds for rate calculations.
|
|
74
|
+
def initialize(window: 60)
|
|
14
75
|
@spawns = 0
|
|
15
76
|
@restarts = 0
|
|
16
77
|
@failures = 0
|
|
78
|
+
|
|
79
|
+
@restart_rate = Rate.new(window: window)
|
|
80
|
+
@failure_rate = Rate.new(window: window)
|
|
17
81
|
end
|
|
18
82
|
|
|
19
83
|
# How many child instances have been spawned.
|
|
@@ -36,13 +100,23 @@ module Async
|
|
|
36
100
|
# Increment the number of restarts by 1.
|
|
37
101
|
def restart!
|
|
38
102
|
@restarts += 1
|
|
103
|
+
@restart_rate.add(1)
|
|
39
104
|
end
|
|
40
105
|
|
|
41
106
|
# Increment the number of failures by 1.
|
|
42
107
|
def failure!
|
|
43
108
|
@failures += 1
|
|
109
|
+
@failure_rate.add(1)
|
|
44
110
|
end
|
|
45
111
|
|
|
112
|
+
# Get the restart rate tracker.
|
|
113
|
+
# @attribute [Rate]
|
|
114
|
+
attr :restart_rate
|
|
115
|
+
|
|
116
|
+
# Get the failure rate tracker.
|
|
117
|
+
# @attribute [Rate]
|
|
118
|
+
attr :failure_rate
|
|
119
|
+
|
|
46
120
|
# Whether there have been any failures.
|
|
47
121
|
# @returns [Boolean] If the failure count is greater than 0.
|
|
48
122
|
def failed?
|
|
@@ -65,6 +139,8 @@ module Async
|
|
|
65
139
|
spawns: @spawns,
|
|
66
140
|
restarts: @restarts,
|
|
67
141
|
failures: @failures,
|
|
142
|
+
restart_rate: @restart_rate.per_second,
|
|
143
|
+
failure_rate: @failure_rate.per_second,
|
|
68
144
|
}
|
|
69
145
|
end
|
|
70
146
|
|
|
@@ -225,7 +225,7 @@ module Async
|
|
|
225
225
|
Console.debug(self, "Waiting for thread to exit...", child: {thread_id: @thread.object_id}, timeout: timeout)
|
|
226
226
|
|
|
227
227
|
unless @waiter.join(timeout)
|
|
228
|
-
Console.warn(self, "Thread is blocking, sending kill signal...", child: {thread_id: @thread.object_id},
|
|
228
|
+
Console.warn(self, "Thread is blocking, sending kill signal...", child: {thread_id: @thread.object_id}, timeout: timeout)
|
|
229
229
|
self.kill!
|
|
230
230
|
@waiter.join
|
|
231
231
|
end
|
data/readme.md
CHANGED
|
@@ -18,6 +18,8 @@ Please see the [project documentation](https://socketry.github.io/async-containe
|
|
|
18
18
|
|
|
19
19
|
- [Getting Started](https://socketry.github.io/async-container/guides/getting-started/index) - This guide explains how to use `async-container` to build basic scalable systems.
|
|
20
20
|
|
|
21
|
+
- [Container Policies](https://socketry.github.io/async-container/guides/policies/index) - This guide explains how to use policies to monitor container health and implement custom failure handling strategies.
|
|
22
|
+
|
|
21
23
|
- [Systemd Integration](https://socketry.github.io/async-container/guides/systemd-integration/index) - This guide explains how to use `async-container` with systemd to manage your application as a service.
|
|
22
24
|
|
|
23
25
|
- [Kubernetes Integration](https://socketry.github.io/async-container/guides/kubernetes-integration/index) - This guide explains how to use `async-container` with Kubernetes to manage your application as a containerized service.
|
|
@@ -26,6 +28,17 @@ Please see the [project documentation](https://socketry.github.io/async-containe
|
|
|
26
28
|
|
|
27
29
|
Please see the [project releases](https://socketry.github.io/async-container/releases/index) for all releases.
|
|
28
30
|
|
|
31
|
+
### v0.31.0
|
|
32
|
+
|
|
33
|
+
- Introduce `Async::Container::Policy` for managing child lifecycle events and implementing custom failure handling strategies.
|
|
34
|
+
- Add `Async::Container::Statistics::Rate` for tracking failure and restart rates over sliding time windows.
|
|
35
|
+
- Fix restart counter to only increment when actually restarting (check `@running` flag).
|
|
36
|
+
|
|
37
|
+
### v0.30.0
|
|
38
|
+
|
|
39
|
+
- `SIGTERM` is now graceful, the same as `SIGINT`, for better compatibility with Kubernetes and systemd.
|
|
40
|
+
- `ASYNC_CONTAINER_INTERRUPT_TIMEOUT` and `ASYNC_CONTAINER_TERMINATE_TIMEOUT` are removed and replaced by `ASYNC_CONTAINER_GRACEFUL_TIMEOUT`.
|
|
41
|
+
|
|
29
42
|
### v0.29.0
|
|
30
43
|
|
|
31
44
|
- Introduce `Client#healthy!` for sending health check messages.
|
|
@@ -61,14 +74,6 @@ Please see the [project releases](https://socketry.github.io/async-container/rel
|
|
|
61
74
|
- Increased default interrupt timeout and terminate timeout to 10 seconds each.
|
|
62
75
|
- Expose `ASYNC_CONTAINER_INTERRUPT_TIMEOUT` and `ASYNC_CONTAINER_TERMINATE_TIMEOUT` environment variables for configuring default timeouts.
|
|
63
76
|
|
|
64
|
-
### v0.26.0
|
|
65
|
-
|
|
66
|
-
- [Production Reliability Improvements](https://socketry.github.io/async-container/releases/index#production-reliability-improvements)
|
|
67
|
-
|
|
68
|
-
### v0.25.0
|
|
69
|
-
|
|
70
|
-
- Introduce `async:container:notify:log:ready?` task for detecting process readiness.
|
|
71
|
-
|
|
72
77
|
## Contributing
|
|
73
78
|
|
|
74
79
|
We welcome contributions to this project.
|
data/releases.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# Releases
|
|
2
2
|
|
|
3
|
+
## v0.31.0
|
|
4
|
+
|
|
5
|
+
- Introduce `Async::Container::Policy` for managing child lifecycle events and implementing custom failure handling strategies.
|
|
6
|
+
- Add `Async::Container::Statistics::Rate` for tracking failure and restart rates over sliding time windows.
|
|
7
|
+
- Fix restart counter to only increment when actually restarting (check `@running` flag).
|
|
8
|
+
|
|
9
|
+
## v0.30.0
|
|
10
|
+
|
|
11
|
+
- `SIGTERM` is now graceful, the same as `SIGINT`, for better compatibility with Kubernetes and systemd.
|
|
12
|
+
- `ASYNC_CONTAINER_INTERRUPT_TIMEOUT` and `ASYNC_CONTAINER_TERMINATE_TIMEOUT` are removed and replaced by `ASYNC_CONTAINER_GRACEFUL_TIMEOUT`.
|
|
13
|
+
|
|
3
14
|
## v0.29.0
|
|
4
15
|
|
|
5
16
|
- Introduce `Client#healthy!` for sending health check messages.
|
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: async-container
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.31.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Samuel Williams
|
|
@@ -78,6 +78,7 @@ files:
|
|
|
78
78
|
- lib/async/container/notify/pipe.rb
|
|
79
79
|
- lib/async/container/notify/server.rb
|
|
80
80
|
- lib/async/container/notify/socket.rb
|
|
81
|
+
- lib/async/container/policy.rb
|
|
81
82
|
- lib/async/container/statistics.rb
|
|
82
83
|
- lib/async/container/threaded.rb
|
|
83
84
|
- lib/async/container/version.rb
|
metadata.gz.sig
CHANGED
|
Binary file
|