async-container 0.30.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/async/container/controller.rb +9 -1
- data/lib/async/container/generic.rb +46 -14
- data/lib/async/container/policy.rb +95 -0
- data/lib/async/container/statistics.rb +77 -1
- data/lib/async/container/version.rb +1 -1
- data/readme.md +8 -4
- data/releases.md +6 -0
- data.tar.gz.sig +0 -0
- metadata +2 -1
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e15ecc1c913a50b101dae8f01ec2086eee565be48c684eaee110d80f5ffa5001
|
|
4
|
+
data.tar.gz: a30a269c56373cb3ab073e1facd9c375f41f5169fdf6a1570e0748165af26e87
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b75e11606c5a2c9878f41564cc1523dcad05b30589281736060a58d2ad0ffc04bb4523d87fcba8183d48427100886bff1e20256a9b9d94729b80744dacbb8784
|
|
7
|
+
data.tar.gz: 9d07035b6912a654d38d9a0b3810d01451384c19f5c1f3ff967d41a47b99c06ca718e2f6a25e79a3cd4a6ff580d1484a97c9e4692b8b708a8eeb8aa09e2a9d98
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
|
@@ -8,6 +8,7 @@ require_relative "best"
|
|
|
8
8
|
|
|
9
9
|
require_relative "statistics"
|
|
10
10
|
require_relative "notify"
|
|
11
|
+
require_relative "policy"
|
|
11
12
|
|
|
12
13
|
module Async
|
|
13
14
|
module Container
|
|
@@ -62,11 +63,18 @@ module Async
|
|
|
62
63
|
# The current container being managed by the controller.
|
|
63
64
|
attr :container
|
|
64
65
|
|
|
66
|
+
# Create a policy for managing child lifecycle events.
|
|
67
|
+
# Can be overridden by a sub-class to provide a custom policy.
|
|
68
|
+
# @returns [Policy] The policy to use for the container.
|
|
69
|
+
def make_policy
|
|
70
|
+
Policy::DEFAULT
|
|
71
|
+
end
|
|
72
|
+
|
|
65
73
|
# Create a container for the controller.
|
|
66
74
|
# Can be overridden by a sub-class.
|
|
67
75
|
# @returns [Generic] A specific container instance to use.
|
|
68
76
|
def create_container
|
|
69
|
-
@container_class.new
|
|
77
|
+
@container_class.new(policy: self.make_policy)
|
|
70
78
|
end
|
|
71
79
|
|
|
72
80
|
# Whether the controller has a running container.
|
|
@@ -10,6 +10,7 @@ require "async/clock"
|
|
|
10
10
|
require_relative "group"
|
|
11
11
|
require_relative "keyed"
|
|
12
12
|
require_relative "statistics"
|
|
13
|
+
require_relative "policy"
|
|
13
14
|
|
|
14
15
|
module Async
|
|
15
16
|
module Container
|
|
@@ -42,8 +43,9 @@ module Async
|
|
|
42
43
|
|
|
43
44
|
# Initialize the container.
|
|
44
45
|
#
|
|
46
|
+
# @parameter policy [Policy] The policy to use for managing child lifecycle events.
|
|
45
47
|
# @parameter options [Hash] Options passed to the {Group} instance.
|
|
46
|
-
def initialize(**options)
|
|
48
|
+
def initialize(policy: Policy::DEFAULT, **options)
|
|
47
49
|
@group = Group.new(**options)
|
|
48
50
|
@running = true
|
|
49
51
|
|
|
@@ -51,6 +53,7 @@ module Async
|
|
|
51
53
|
|
|
52
54
|
@statistics = Statistics.new
|
|
53
55
|
@keyed = {}
|
|
56
|
+
@policy = policy
|
|
54
57
|
end
|
|
55
58
|
|
|
56
59
|
# @attribute [Group] The group of running children instances.
|
|
@@ -64,6 +67,9 @@ module Async
|
|
|
64
67
|
# @attribute [Hash(Child, Hash)] The state of each child instance.
|
|
65
68
|
attr :state
|
|
66
69
|
|
|
70
|
+
# @attribute [Policy] The policy for managing child lifecycle events.
|
|
71
|
+
attr_accessor :policy
|
|
72
|
+
|
|
67
73
|
# A human readable representation of the container.
|
|
68
74
|
# @returns [String]
|
|
69
75
|
def to_s
|
|
@@ -157,18 +163,30 @@ module Async
|
|
|
157
163
|
@running = true
|
|
158
164
|
end
|
|
159
165
|
|
|
160
|
-
protected def health_check_failed
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
166
|
+
protected def health_check_failed(child, age_clock, health_check_timeout)
|
|
167
|
+
begin
|
|
168
|
+
@policy.health_check_failed(
|
|
169
|
+
self, child,
|
|
170
|
+
age: age_clock.total,
|
|
171
|
+
timeout: health_check_timeout
|
|
172
|
+
)
|
|
173
|
+
rescue => error
|
|
174
|
+
Console.error(self, "Policy error in health_check_failed!", exception: error)
|
|
175
|
+
child.kill!
|
|
176
|
+
end
|
|
165
177
|
end
|
|
166
178
|
|
|
167
|
-
protected def startup_failed
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
179
|
+
protected def startup_failed(child, age_clock, startup_timeout)
|
|
180
|
+
begin
|
|
181
|
+
@policy.startup_failed(
|
|
182
|
+
self, child,
|
|
183
|
+
age: age_clock.total,
|
|
184
|
+
timeout: startup_timeout
|
|
185
|
+
)
|
|
186
|
+
rescue => error
|
|
187
|
+
Console.error(self, "Policy error in startup_failed!", exception: error)
|
|
188
|
+
child.kill!
|
|
189
|
+
end
|
|
172
190
|
end
|
|
173
191
|
|
|
174
192
|
# Spawn a child instance into the container.
|
|
@@ -194,6 +212,13 @@ module Async
|
|
|
194
212
|
child = self.start(name, &block)
|
|
195
213
|
state = insert(key, child)
|
|
196
214
|
|
|
215
|
+
# Notify policy of spawn
|
|
216
|
+
begin
|
|
217
|
+
@policy.child_spawn(self, child, name: name, key: key)
|
|
218
|
+
rescue => error
|
|
219
|
+
Console.error(self, "Policy error in child_spawn!", exception: error)
|
|
220
|
+
end
|
|
221
|
+
|
|
197
222
|
Console.debug(self, "Started child.", child: child, spawn: {key: key, restart: restart, health_check_timeout: health_check_timeout}, statistics: @statistics)
|
|
198
223
|
|
|
199
224
|
# If a health check or startup timeout is specified, we will monitor the child process and terminate it if it does not update its state within the specified time.
|
|
@@ -211,14 +236,14 @@ module Async
|
|
|
211
236
|
# If a health check timeout is specified, we will monitor the child process and terminate it if it does not update its state within the specified time.
|
|
212
237
|
if health_check_timeout
|
|
213
238
|
if health_check_timeout < age_clock.total
|
|
214
|
-
health_check_failed
|
|
239
|
+
health_check_failed(child, age_clock, health_check_timeout)
|
|
215
240
|
end
|
|
216
241
|
end
|
|
217
242
|
else
|
|
218
243
|
# If a startup timeout is specified, we will monitor the child process and terminate it if it does not become ready within the specified time.
|
|
219
244
|
if startup_timeout
|
|
220
245
|
if startup_timeout < age_clock.total
|
|
221
|
-
startup_failed
|
|
246
|
+
startup_failed(child, age_clock, startup_timeout)
|
|
222
247
|
end
|
|
223
248
|
end
|
|
224
249
|
end
|
|
@@ -237,6 +262,13 @@ module Async
|
|
|
237
262
|
delete(key, child)
|
|
238
263
|
end
|
|
239
264
|
|
|
265
|
+
# Notify policy of exit
|
|
266
|
+
begin
|
|
267
|
+
@policy.child_exit(self, child, status: status, name: name, key: key)
|
|
268
|
+
rescue => error
|
|
269
|
+
Console.error(self, "Policy error in child_exit!", exception: error)
|
|
270
|
+
end
|
|
271
|
+
|
|
240
272
|
if status&.success?
|
|
241
273
|
Console.debug(self, "Child exited successfully.", status: status, running: @running)
|
|
242
274
|
else
|
|
@@ -244,7 +276,7 @@ module Async
|
|
|
244
276
|
Console.error(self, "Child exited with error!", status: status, running: @running)
|
|
245
277
|
end
|
|
246
278
|
|
|
247
|
-
if restart
|
|
279
|
+
if restart && @running
|
|
248
280
|
@statistics.restart!
|
|
249
281
|
else
|
|
250
282
|
break
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Released under the MIT License.
|
|
4
|
+
# Copyright, 2026, by Samuel Williams.
|
|
5
|
+
|
|
6
|
+
module Async
|
|
7
|
+
module Container
|
|
8
|
+
# A policy for managing container behavior and responding to child process lifecycle events.
|
|
9
|
+
class Policy
|
|
10
|
+
# Called when a child is spawned.
|
|
11
|
+
# @parameter container [Generic] The container.
|
|
12
|
+
# @parameter child [Child] The child process.
|
|
13
|
+
# @parameter name [String] The name of the child.
|
|
14
|
+
# @parameter key [Symbol] An optional key for the child.
|
|
15
|
+
def child_spawn(container, child, name:, key:)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Called when a child exits.
|
|
19
|
+
# @parameter container [Generic] The container.
|
|
20
|
+
# @parameter child [Child] The child process.
|
|
21
|
+
# @parameter status [Process::Status] The exit status.
|
|
22
|
+
# @parameter name [String] The name of the child.
|
|
23
|
+
# @parameter key [Symbol] An optional key for the child.
|
|
24
|
+
def child_exit(container, child, status:, name:, key:)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Called when a health check fails.
|
|
28
|
+
# Subclasses can override to implement custom behavior (e.g., alerting before killing).
|
|
29
|
+
# @parameter container [Generic] The container.
|
|
30
|
+
# @parameter child [Child] The child process.
|
|
31
|
+
# @parameter age [Numeric] How long the child has been running.
|
|
32
|
+
# @parameter timeout [Numeric] The health check timeout that was exceeded.
|
|
33
|
+
def health_check_failed(container, child, age:, timeout:)
|
|
34
|
+
Console.warn(container, "Health check failed!", child: child, age: age, timeout: timeout)
|
|
35
|
+
child.kill!
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Called when startup fails (child doesn't become ready in time).
|
|
39
|
+
# Subclasses can override to implement custom behavior (e.g., alerting before killing).
|
|
40
|
+
# @parameter container [Generic] The container.
|
|
41
|
+
# @parameter child [Child] The child process.
|
|
42
|
+
# @parameter age [Numeric] How long the child has been running.
|
|
43
|
+
# @parameter timeout [Numeric] The startup timeout that was exceeded.
|
|
44
|
+
def startup_failed(container, child, age:, timeout:)
|
|
45
|
+
Console.warn(container, "Startup failed!", child: child, age: age, timeout: timeout)
|
|
46
|
+
child.kill!
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Helper method to check if a status indicates a segfault.
|
|
50
|
+
# @parameter status [Process::Status] The exit status.
|
|
51
|
+
# @returns [Boolean] Whether the process was terminated by SIGSEGV.
|
|
52
|
+
def segfault?(status)
|
|
53
|
+
status&.termsig == Signal.list["SEGV"]
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Helper method to check if a status indicates an abort.
|
|
57
|
+
# @parameter status [Process::Status] The exit status.
|
|
58
|
+
# @returns [Boolean] Whether the process was terminated by SIGABRT.
|
|
59
|
+
def abort?(status)
|
|
60
|
+
status&.termsig == Signal.list["ABRT"]
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Helper method to check if a status indicates the process was killed.
|
|
64
|
+
# @parameter status [Process::Status] The exit status.
|
|
65
|
+
# @returns [Boolean] Whether the process was terminated by SIGKILL.
|
|
66
|
+
def killed?(status)
|
|
67
|
+
status&.termsig == Signal.list["KILL"]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Helper method to check if a status indicates success.
|
|
71
|
+
# @parameter status [Process::Status] The exit status.
|
|
72
|
+
# @returns [Boolean] Whether the process exited successfully.
|
|
73
|
+
def success?(status)
|
|
74
|
+
status&.success?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Helper method to get the signal that terminated the process.
|
|
78
|
+
# @parameter status [Process::Status] The exit status.
|
|
79
|
+
# @returns [Integer, nil] The signal number, or nil if not terminated by signal.
|
|
80
|
+
def signal(status)
|
|
81
|
+
status&.termsig
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Helper method to get the exit code.
|
|
85
|
+
# @parameter status [Process::Status] The exit status.
|
|
86
|
+
# @returns [Integer, nil] The exit code, or nil if terminated by signal.
|
|
87
|
+
def exit_code(status)
|
|
88
|
+
status&.exitstatus
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# The default policy instance.
|
|
92
|
+
DEFAULT = self.new.freeze
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -9,11 +9,75 @@ module Async
|
|
|
9
9
|
module Container
|
|
10
10
|
# Tracks various statistics relating to child instances in a container.
|
|
11
11
|
class Statistics
|
|
12
|
+
# Tracks rate information over a sliding time window using a circular buffer.
|
|
13
|
+
class Rate
|
|
14
|
+
# Initialize the event rate counter.
|
|
15
|
+
#
|
|
16
|
+
# @parameter window [Integer] The time window in seconds for rate calculations.
|
|
17
|
+
def initialize(window: 60)
|
|
18
|
+
@window = window
|
|
19
|
+
@samples = [0] * @window
|
|
20
|
+
@last_update = Array.new(@window, 0)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Get the current time in seconds.
|
|
24
|
+
# @returns [Integer] The current monotonic time in seconds.
|
|
25
|
+
def now
|
|
26
|
+
::Process.clock_gettime(::Process::CLOCK_MONOTONIC).to_i
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Add a value to the current time slot.
|
|
30
|
+
# @parameter value [Numeric] The value to add (default: 1)
|
|
31
|
+
# @parameter time [Integer] The current time in seconds (default: monotonic time)
|
|
32
|
+
def add(value = 1, time: self.now)
|
|
33
|
+
index = time % @samples.size
|
|
34
|
+
|
|
35
|
+
# If this slot hasn't been updated in a full window cycle, reset it
|
|
36
|
+
if (time - @last_update[index]) >= @window
|
|
37
|
+
@samples[index] = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
@samples[index] += value
|
|
41
|
+
@last_update[index] = time
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Get the total count in the current window.
|
|
45
|
+
# @parameter time [Integer] The current time in seconds (default: monotonic time)
|
|
46
|
+
# @returns [Numeric] The sum of all samples in the window.
|
|
47
|
+
def total(time: self.now)
|
|
48
|
+
@samples.each_with_index.sum do |value, index|
|
|
49
|
+
# Only count samples that are within the window (inclusive of window boundary)
|
|
50
|
+
if (time - @last_update[index]) <= @window
|
|
51
|
+
value
|
|
52
|
+
else
|
|
53
|
+
0
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Get the rate per second over the window.
|
|
59
|
+
# @parameter time [Integer] The current time in seconds (default: monotonic time)
|
|
60
|
+
# @returns [Float] The average rate per second.
|
|
61
|
+
def per_second(time: self.now)
|
|
62
|
+
total(time: time).to_f / @window
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Get the rate per minute over the window.
|
|
66
|
+
# @parameter time [Integer] The current time in seconds (default: monotonic time)
|
|
67
|
+
# @returns [Float] The average rate per minute.
|
|
68
|
+
def per_minute(time: self.now)
|
|
69
|
+
per_second(time: time) * 60
|
|
70
|
+
end
|
|
71
|
+
end
|
|
12
72
|
# Initialize the statistics all to 0.
|
|
13
|
-
|
|
73
|
+
# @parameter window [Integer] The time window in seconds for rate calculations.
|
|
74
|
+
def initialize(window: 60)
|
|
14
75
|
@spawns = 0
|
|
15
76
|
@restarts = 0
|
|
16
77
|
@failures = 0
|
|
78
|
+
|
|
79
|
+
@restart_rate = Rate.new(window: window)
|
|
80
|
+
@failure_rate = Rate.new(window: window)
|
|
17
81
|
end
|
|
18
82
|
|
|
19
83
|
# How many child instances have been spawned.
|
|
@@ -36,13 +100,23 @@ module Async
|
|
|
36
100
|
# Increment the number of restarts by 1.
|
|
37
101
|
def restart!
|
|
38
102
|
@restarts += 1
|
|
103
|
+
@restart_rate.add(1)
|
|
39
104
|
end
|
|
40
105
|
|
|
41
106
|
# Increment the number of failures by 1.
|
|
42
107
|
def failure!
|
|
43
108
|
@failures += 1
|
|
109
|
+
@failure_rate.add(1)
|
|
44
110
|
end
|
|
45
111
|
|
|
112
|
+
# Get the restart rate tracker.
|
|
113
|
+
# @attribute [Rate]
|
|
114
|
+
attr :restart_rate
|
|
115
|
+
|
|
116
|
+
# Get the failure rate tracker.
|
|
117
|
+
# @attribute [Rate]
|
|
118
|
+
attr :failure_rate
|
|
119
|
+
|
|
46
120
|
# Whether there have been any failures.
|
|
47
121
|
# @returns [Boolean] If the failure count is greater than 0.
|
|
48
122
|
def failed?
|
|
@@ -65,6 +139,8 @@ module Async
|
|
|
65
139
|
spawns: @spawns,
|
|
66
140
|
restarts: @restarts,
|
|
67
141
|
failures: @failures,
|
|
142
|
+
restart_rate: @restart_rate.per_second,
|
|
143
|
+
failure_rate: @failure_rate.per_second,
|
|
68
144
|
}
|
|
69
145
|
end
|
|
70
146
|
|
data/readme.md
CHANGED
|
@@ -18,6 +18,8 @@ Please see the [project documentation](https://socketry.github.io/async-containe
|
|
|
18
18
|
|
|
19
19
|
- [Getting Started](https://socketry.github.io/async-container/guides/getting-started/index) - This guide explains how to use `async-container` to build basic scalable systems.
|
|
20
20
|
|
|
21
|
+
- [Container Policies](https://socketry.github.io/async-container/guides/policies/index) - This guide explains how to use policies to monitor container health and implement custom failure handling strategies.
|
|
22
|
+
|
|
21
23
|
- [Systemd Integration](https://socketry.github.io/async-container/guides/systemd-integration/index) - This guide explains how to use `async-container` with systemd to manage your application as a service.
|
|
22
24
|
|
|
23
25
|
- [Kubernetes Integration](https://socketry.github.io/async-container/guides/kubernetes-integration/index) - This guide explains how to use `async-container` with Kubernetes to manage your application as a containerized service.
|
|
@@ -26,6 +28,12 @@ Please see the [project documentation](https://socketry.github.io/async-containe
|
|
|
26
28
|
|
|
27
29
|
Please see the [project releases](https://socketry.github.io/async-container/releases/index) for all releases.
|
|
28
30
|
|
|
31
|
+
### v0.31.0
|
|
32
|
+
|
|
33
|
+
- Introduce `Async::Container::Policy` for managing child lifecycle events and implementing custom failure handling strategies.
|
|
34
|
+
- Add `Async::Container::Statistics::Rate` for tracking failure and restart rates over sliding time windows.
|
|
35
|
+
- Fix restart counter to only increment when actually restarting (check `@running` flag).
|
|
36
|
+
|
|
29
37
|
### v0.30.0
|
|
30
38
|
|
|
31
39
|
- `SIGTERM` is now graceful, the same as `SIGINT`, for better compatibility with Kubernetes and systemd.
|
|
@@ -66,10 +74,6 @@ Please see the [project releases](https://socketry.github.io/async-container/rel
|
|
|
66
74
|
- Increased default interrupt timeout and terminate timeout to 10 seconds each.
|
|
67
75
|
- Expose `ASYNC_CONTAINER_INTERRUPT_TIMEOUT` and `ASYNC_CONTAINER_TERMINATE_TIMEOUT` environment variables for configuring default timeouts.
|
|
68
76
|
|
|
69
|
-
### v0.26.0
|
|
70
|
-
|
|
71
|
-
- [Production Reliability Improvements](https://socketry.github.io/async-container/releases/index#production-reliability-improvements)
|
|
72
|
-
|
|
73
77
|
## Contributing
|
|
74
78
|
|
|
75
79
|
We welcome contributions to this project.
|
data/releases.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Releases
|
|
2
2
|
|
|
3
|
+
## v0.31.0
|
|
4
|
+
|
|
5
|
+
- Introduce `Async::Container::Policy` for managing child lifecycle events and implementing custom failure handling strategies.
|
|
6
|
+
- Add `Async::Container::Statistics::Rate` for tracking failure and restart rates over sliding time windows.
|
|
7
|
+
- Fix restart counter to only increment when actually restarting (check `@running` flag).
|
|
8
|
+
|
|
3
9
|
## v0.30.0
|
|
4
10
|
|
|
5
11
|
- `SIGTERM` is now graceful, the same as `SIGINT`, for better compatibility with Kubernetes and systemd.
|
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: async-container
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.31.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Samuel Williams
|
|
@@ -78,6 +78,7 @@ files:
|
|
|
78
78
|
- lib/async/container/notify/pipe.rb
|
|
79
79
|
- lib/async/container/notify/server.rb
|
|
80
80
|
- lib/async/container/notify/socket.rb
|
|
81
|
+
- lib/async/container/policy.rb
|
|
81
82
|
- lib/async/container/statistics.rb
|
|
82
83
|
- lib/async/container/threaded.rb
|
|
83
84
|
- lib/async/container/version.rb
|
metadata.gz.sig
CHANGED
|
Binary file
|