async-container 0.30.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 84ca0883f2ae0742253c1f2bdb08c98152770c41609ed179a81f2249d9cc46f7
4
- data.tar.gz: 5d551d20be155c60557783ecf4064d0d90a3748ba18d9c66324a515208671bf0
3
+ metadata.gz: e15ecc1c913a50b101dae8f01ec2086eee565be48c684eaee110d80f5ffa5001
4
+ data.tar.gz: a30a269c56373cb3ab073e1facd9c375f41f5169fdf6a1570e0748165af26e87
5
5
  SHA512:
6
- metadata.gz: fddd4b0f2ea6f459a0ad39a3e806b776abda60cb97fbface314f6f93b0690fac817b0f8b09a486aa387c1dd06dc6f6ae3c9620d146ef4e2f50318532d36ac20d
7
- data.tar.gz: cd14547b2e6be8a9139bcadfbf5593e777d04d6f26f02cff4c808d9b2ba3c205e36acfc19af646dca95e2f1bd52edf9020d79461a6c5987304c28eb5bc271516
6
+ metadata.gz: b75e11606c5a2c9878f41564cc1523dcad05b30589281736060a58d2ad0ffc04bb4523d87fcba8183d48427100886bff1e20256a9b9d94729b80744dacbb8784
7
+ data.tar.gz: 9d07035b6912a654d38d9a0b3810d01451384c19f5c1f3ff967d41a47b99c06ca718e2f6a25e79a3cd4a6ff580d1484a97c9e4692b8b708a8eeb8aa09e2a9d98
checksums.yaml.gz.sig CHANGED
Binary file
@@ -8,6 +8,7 @@ require_relative "best"
8
8
 
9
9
  require_relative "statistics"
10
10
  require_relative "notify"
11
+ require_relative "policy"
11
12
 
12
13
  module Async
13
14
  module Container
@@ -62,11 +63,18 @@ module Async
62
63
  # The current container being managed by the controller.
63
64
  attr :container
64
65
 
66
+ # Create a policy for managing child lifecycle events.
67
+ # Can be overridden by a sub-class to provide a custom policy.
68
+ # @returns [Policy] The policy to use for the container.
69
+ def make_policy
70
+ Policy::DEFAULT
71
+ end
72
+
65
73
  # Create a container for the controller.
66
74
  # Can be overridden by a sub-class.
67
75
  # @returns [Generic] A specific container instance to use.
68
76
  def create_container
69
- @container_class.new
77
+ @container_class.new(policy: self.make_policy)
70
78
  end
71
79
 
72
80
  # Whether the controller has a running container.
@@ -10,6 +10,7 @@ require "async/clock"
10
10
  require_relative "group"
11
11
  require_relative "keyed"
12
12
  require_relative "statistics"
13
+ require_relative "policy"
13
14
 
14
15
  module Async
15
16
  module Container
@@ -42,8 +43,9 @@ module Async
42
43
 
43
44
  # Initialize the container.
44
45
  #
46
+ # @parameter policy [Policy] The policy to use for managing child lifecycle events.
45
47
  # @parameter options [Hash] Options passed to the {Group} instance.
46
- def initialize(**options)
48
+ def initialize(policy: Policy::DEFAULT, **options)
47
49
  @group = Group.new(**options)
48
50
  @running = true
49
51
 
@@ -51,6 +53,7 @@ module Async
51
53
 
52
54
  @statistics = Statistics.new
53
55
  @keyed = {}
56
+ @policy = policy
54
57
  end
55
58
 
56
59
  # @attribute [Group] The group of running children instances.
@@ -64,6 +67,9 @@ module Async
64
67
  # @attribute [Hash(Child, Hash)] The state of each child instance.
65
68
  attr :state
66
69
 
70
+ # @attribute [Policy] The policy for managing child lifecycle events.
71
+ attr_accessor :policy
72
+
67
73
  # A human readable representation of the container.
68
74
  # @returns [String]
69
75
  def to_s
@@ -157,18 +163,30 @@ module Async
157
163
  @running = true
158
164
  end
159
165
 
160
- protected def health_check_failed!(child, age_clock, health_check_timeout)
161
- Console.warn(self, "Child failed health check!", child: child, age: age_clock.total, health_check_timeout: health_check_timeout)
162
-
163
- # If the child has failed the health check, we assume the worst and kill it immediately:
164
- child.kill!
166
+ protected def health_check_failed(child, age_clock, health_check_timeout)
167
+ begin
168
+ @policy.health_check_failed(
169
+ self, child,
170
+ age: age_clock.total,
171
+ timeout: health_check_timeout
172
+ )
173
+ rescue => error
174
+ Console.error(self, "Policy error in health_check_failed!", exception: error)
175
+ child.kill!
176
+ end
165
177
  end
166
178
 
167
- protected def startup_failed!(child, age_clock, startup_timeout)
168
- Console.warn(self, "Child failed startup!", child: child, age: age_clock.total, startup_timeout: startup_timeout)
169
-
170
- # If the child has failed the startup, we assume the worst and kill it immediately:
171
- child.kill!
179
+ protected def startup_failed(child, age_clock, startup_timeout)
180
+ begin
181
+ @policy.startup_failed(
182
+ self, child,
183
+ age: age_clock.total,
184
+ timeout: startup_timeout
185
+ )
186
+ rescue => error
187
+ Console.error(self, "Policy error in startup_failed!", exception: error)
188
+ child.kill!
189
+ end
172
190
  end
173
191
 
174
192
  # Spawn a child instance into the container.
@@ -194,6 +212,13 @@ module Async
194
212
  child = self.start(name, &block)
195
213
  state = insert(key, child)
196
214
 
215
+ # Notify policy of spawn
216
+ begin
217
+ @policy.child_spawn(self, child, name: name, key: key)
218
+ rescue => error
219
+ Console.error(self, "Policy error in child_spawn!", exception: error)
220
+ end
221
+
197
222
  Console.debug(self, "Started child.", child: child, spawn: {key: key, restart: restart, health_check_timeout: health_check_timeout}, statistics: @statistics)
198
223
 
199
224
  # If a health check or startup timeout is specified, we will monitor the child process and terminate it if it does not update its state within the specified time.
@@ -211,14 +236,14 @@ module Async
211
236
  # If a health check timeout is specified, we will monitor the child process and terminate it if it does not update its state within the specified time.
212
237
  if health_check_timeout
213
238
  if health_check_timeout < age_clock.total
214
- health_check_failed!(child, age_clock, health_check_timeout)
239
+ health_check_failed(child, age_clock, health_check_timeout)
215
240
  end
216
241
  end
217
242
  else
218
243
  # If a startup timeout is specified, we will monitor the child process and terminate it if it does not become ready within the specified time.
219
244
  if startup_timeout
220
245
  if startup_timeout < age_clock.total
221
- startup_failed!(child, age_clock, startup_timeout)
246
+ startup_failed(child, age_clock, startup_timeout)
222
247
  end
223
248
  end
224
249
  end
@@ -237,6 +262,13 @@ module Async
237
262
  delete(key, child)
238
263
  end
239
264
 
265
+ # Notify policy of exit
266
+ begin
267
+ @policy.child_exit(self, child, status: status, name: name, key: key)
268
+ rescue => error
269
+ Console.error(self, "Policy error in child_exit!", exception: error)
270
+ end
271
+
240
272
  if status&.success?
241
273
  Console.debug(self, "Child exited successfully.", status: status, running: @running)
242
274
  else
@@ -244,7 +276,7 @@ module Async
244
276
  Console.error(self, "Child exited with error!", status: status, running: @running)
245
277
  end
246
278
 
247
- if restart
279
+ if restart && @running
248
280
  @statistics.restart!
249
281
  else
250
282
  break
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Released under the MIT License.
4
+ # Copyright, 2026, by Samuel Williams.
5
+
6
+ module Async
7
+ module Container
8
+ # A policy for managing container behavior and responding to child process lifecycle events.
9
+ class Policy
10
+ # Called when a child is spawned.
11
+ # @parameter container [Generic] The container.
12
+ # @parameter child [Child] The child process.
13
+ # @parameter name [String] The name of the child.
14
+ # @parameter key [Symbol] An optional key for the child.
15
+ def child_spawn(container, child, name:, key:)
16
+ end
17
+
18
+ # Called when a child exits.
19
+ # @parameter container [Generic] The container.
20
+ # @parameter child [Child] The child process.
21
+ # @parameter status [Process::Status] The exit status.
22
+ # @parameter name [String] The name of the child.
23
+ # @parameter key [Symbol] An optional key for the child.
24
+ def child_exit(container, child, status:, name:, key:)
25
+ end
26
+
27
+ # Called when a health check fails.
28
+ # Subclasses can override to implement custom behavior (e.g., alerting before killing).
29
+ # @parameter container [Generic] The container.
30
+ # @parameter child [Child] The child process.
31
+ # @parameter age [Numeric] How long the child has been running.
32
+ # @parameter timeout [Numeric] The health check timeout that was exceeded.
33
+ def health_check_failed(container, child, age:, timeout:)
34
+ Console.warn(container, "Health check failed!", child: child, age: age, timeout: timeout)
35
+ child.kill!
36
+ end
37
+
38
+ # Called when startup fails (child doesn't become ready in time).
39
+ # Subclasses can override to implement custom behavior (e.g., alerting before killing).
40
+ # @parameter container [Generic] The container.
41
+ # @parameter child [Child] The child process.
42
+ # @parameter age [Numeric] How long the child has been running.
43
+ # @parameter timeout [Numeric] The startup timeout that was exceeded.
44
+ def startup_failed(container, child, age:, timeout:)
45
+ Console.warn(container, "Startup failed!", child: child, age: age, timeout: timeout)
46
+ child.kill!
47
+ end
48
+
49
+ # Helper method to check if a status indicates a segfault.
50
+ # @parameter status [Process::Status] The exit status.
51
+ # @returns [Boolean] Whether the process was terminated by SIGSEGV.
52
+ def segfault?(status)
53
+ status&.termsig == Signal.list["SEGV"]
54
+ end
55
+
56
+ # Helper method to check if a status indicates an abort.
57
+ # @parameter status [Process::Status] The exit status.
58
+ # @returns [Boolean] Whether the process was terminated by SIGABRT.
59
+ def abort?(status)
60
+ status&.termsig == Signal.list["ABRT"]
61
+ end
62
+
63
+ # Helper method to check if a status indicates the process was killed.
64
+ # @parameter status [Process::Status] The exit status.
65
+ # @returns [Boolean] Whether the process was terminated by SIGKILL.
66
+ def killed?(status)
67
+ status&.termsig == Signal.list["KILL"]
68
+ end
69
+
70
+ # Helper method to check if a status indicates success.
71
+ # @parameter status [Process::Status] The exit status.
72
+ # @returns [Boolean] Whether the process exited successfully.
73
+ def success?(status)
74
+ status&.success?
75
+ end
76
+
77
+ # Helper method to get the signal that terminated the process.
78
+ # @parameter status [Process::Status] The exit status.
79
+ # @returns [Integer, nil] The signal number, or nil if not terminated by signal.
80
+ def signal(status)
81
+ status&.termsig
82
+ end
83
+
84
+ # Helper method to get the exit code.
85
+ # @parameter status [Process::Status] The exit status.
86
+ # @returns [Integer, nil] The exit code, or nil if terminated by signal.
87
+ def exit_code(status)
88
+ status&.exitstatus
89
+ end
90
+
91
+ # The default policy instance.
92
+ DEFAULT = self.new.freeze
93
+ end
94
+ end
95
+ end
@@ -9,11 +9,75 @@ module Async
9
9
  module Container
10
10
  # Tracks various statistics relating to child instances in a container.
11
11
  class Statistics
12
+ # Tracks rate information over a sliding time window using a circular buffer.
13
+ class Rate
14
+ # Initialize the event rate counter.
15
+ #
16
+ # @parameter window [Integer] The time window in seconds for rate calculations.
17
+ def initialize(window: 60)
18
+ @window = window
19
+ @samples = [0] * @window
20
+ @last_update = Array.new(@window, 0)
21
+ end
22
+
23
+ # Get the current time in seconds.
24
+ # @returns [Integer] The current monotonic time in seconds.
25
+ def now
26
+ ::Process.clock_gettime(::Process::CLOCK_MONOTONIC).to_i
27
+ end
28
+
29
+ # Add a value to the current time slot.
30
+ # @parameter value [Numeric] The value to add (default: 1)
31
+ # @parameter time [Integer] The current time in seconds (default: monotonic time)
32
+ def add(value = 1, time: self.now)
33
+ index = time % @samples.size
34
+
35
+ # If this slot hasn't been updated in a full window cycle, reset it
36
+ if (time - @last_update[index]) >= @window
37
+ @samples[index] = 0
38
+ end
39
+
40
+ @samples[index] += value
41
+ @last_update[index] = time
42
+ end
43
+
44
+ # Get the total count in the current window.
45
+ # @parameter time [Integer] The current time in seconds (default: monotonic time)
46
+ # @returns [Numeric] The sum of all samples in the window.
47
+ def total(time: self.now)
48
+ @samples.each_with_index.sum do |value, index|
49
+ # Only count samples that are within the window (inclusive of window boundary)
50
+ if (time - @last_update[index]) <= @window
51
+ value
52
+ else
53
+ 0
54
+ end
55
+ end
56
+ end
57
+
58
+ # Get the rate per second over the window.
59
+ # @parameter time [Integer] The current time in seconds (default: monotonic time)
60
+ # @returns [Float] The average rate per second.
61
+ def per_second(time: self.now)
62
+ total(time: time).to_f / @window
63
+ end
64
+
65
+ # Get the rate per minute over the window.
66
+ # @parameter time [Integer] The current time in seconds (default: monotonic time)
67
+ # @returns [Float] The average rate per minute.
68
+ def per_minute(time: self.now)
69
+ per_second(time: time) * 60
70
+ end
71
+ end
12
72
  # Initialize the statistics all to 0.
13
- def initialize
73
+ # @parameter window [Integer] The time window in seconds for rate calculations.
74
+ def initialize(window: 60)
14
75
  @spawns = 0
15
76
  @restarts = 0
16
77
  @failures = 0
78
+
79
+ @restart_rate = Rate.new(window: window)
80
+ @failure_rate = Rate.new(window: window)
17
81
  end
18
82
 
19
83
  # How many child instances have been spawned.
@@ -36,13 +100,23 @@ module Async
36
100
  # Increment the number of restarts by 1.
37
101
  def restart!
38
102
  @restarts += 1
103
+ @restart_rate.add(1)
39
104
  end
40
105
 
41
106
  # Increment the number of failures by 1.
42
107
  def failure!
43
108
  @failures += 1
109
+ @failure_rate.add(1)
44
110
  end
45
111
 
112
+ # Get the restart rate tracker.
113
+ # @attribute [Rate]
114
+ attr :restart_rate
115
+
116
+ # Get the failure rate tracker.
117
+ # @attribute [Rate]
118
+ attr :failure_rate
119
+
46
120
  # Whether there have been any failures.
47
121
  # @returns [Boolean] If the failure count is greater than 0.
48
122
  def failed?
@@ -65,6 +139,8 @@ module Async
65
139
  spawns: @spawns,
66
140
  restarts: @restarts,
67
141
  failures: @failures,
142
+ restart_rate: @restart_rate.per_second,
143
+ failure_rate: @failure_rate.per_second,
68
144
  }
69
145
  end
70
146
 
@@ -5,6 +5,6 @@
5
5
 
6
6
  module Async
7
7
  module Container
8
- VERSION = "0.30.0"
8
+ VERSION = "0.31.0"
9
9
  end
10
10
  end
data/readme.md CHANGED
@@ -18,6 +18,8 @@ Please see the [project documentation](https://socketry.github.io/async-containe
18
18
 
19
19
  - [Getting Started](https://socketry.github.io/async-container/guides/getting-started/index) - This guide explains how to use `async-container` to build basic scalable systems.
20
20
 
21
+ - [Container Policies](https://socketry.github.io/async-container/guides/policies/index) - This guide explains how to use policies to monitor container health and implement custom failure handling strategies.
22
+
21
23
  - [Systemd Integration](https://socketry.github.io/async-container/guides/systemd-integration/index) - This guide explains how to use `async-container` with systemd to manage your application as a service.
22
24
 
23
25
  - [Kubernetes Integration](https://socketry.github.io/async-container/guides/kubernetes-integration/index) - This guide explains how to use `async-container` with Kubernetes to manage your application as a containerized service.
@@ -26,6 +28,12 @@ Please see the [project documentation](https://socketry.github.io/async-containe
26
28
 
27
29
  Please see the [project releases](https://socketry.github.io/async-container/releases/index) for all releases.
28
30
 
31
+ ### v0.31.0
32
+
33
+ - Introduce `Async::Container::Policy` for managing child lifecycle events and implementing custom failure handling strategies.
34
+ - Add `Async::Container::Statistics::Rate` for tracking failure and restart rates over sliding time windows.
35
+ - Fix restart counter to only increment when actually restarting (check `@running` flag).
36
+
29
37
  ### v0.30.0
30
38
 
31
39
  - `SIGTERM` is now graceful, the same as `SIGINT`, for better compatibility with Kubernetes and systemd.
@@ -66,10 +74,6 @@ Please see the [project releases](https://socketry.github.io/async-container/rel
66
74
  - Increased default interrupt timeout and terminate timeout to 10 seconds each.
67
75
  - Expose `ASYNC_CONTAINER_INTERRUPT_TIMEOUT` and `ASYNC_CONTAINER_TERMINATE_TIMEOUT` environment variables for configuring default timeouts.
68
76
 
69
- ### v0.26.0
70
-
71
- - [Production Reliability Improvements](https://socketry.github.io/async-container/releases/index#production-reliability-improvements)
72
-
73
77
  ## Contributing
74
78
 
75
79
  We welcome contributions to this project.
data/releases.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Releases
2
2
 
3
+ ## v0.31.0
4
+
5
+ - Introduce `Async::Container::Policy` for managing child lifecycle events and implementing custom failure handling strategies.
6
+ - Add `Async::Container::Statistics::Rate` for tracking failure and restart rates over sliding time windows.
7
+ - Fix restart counter to only increment when actually restarting (check `@running` flag).
8
+
3
9
  ## v0.30.0
4
10
 
5
11
  - `SIGTERM` is now graceful, the same as `SIGINT`, for better compatibility with Kubernetes and systemd.
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: async-container
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.30.0
4
+ version: 0.31.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
@@ -78,6 +78,7 @@ files:
78
78
  - lib/async/container/notify/pipe.rb
79
79
  - lib/async/container/notify/server.rb
80
80
  - lib/async/container/notify/socket.rb
81
+ - lib/async/container/policy.rb
81
82
  - lib/async/container/statistics.rb
82
83
  - lib/async/container/threaded.rb
83
84
  - lib/async/container/version.rb
metadata.gz.sig CHANGED
Binary file