async-service-supervisor 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/async/service/supervisor/memory_monitor.rb +12 -35
- data/lib/async/service/supervisor/monitor.rb +55 -0
- data/lib/async/service/supervisor/process_monitor.rb +10 -30
- data/lib/async/service/supervisor/server.rb +19 -15
- data/lib/async/service/supervisor/utilization_monitor.rb +6 -29
- data/lib/async/service/supervisor/version.rb +1 -1
- data/readme.md +4 -4
- data/releases.md +4 -0
- data.tar.gz.sig +0 -0
- metadata +16 -2
- metadata.gz.sig +0 -0
- data/lib/async/service/supervisor/loop.rb +0 -40
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: acb581fece1b2ca0364e8b7f3fee2893c15d788f022377c23ba810a71276beb7
|
|
4
|
+
data.tar.gz: 68d38aa49e35ef1c63e6f214ff271daca6650dfa5bffb1d628eb0f429b5d6219
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b46f7d61a4ec984478e0fbc062ebb14dc48899a2c004dc23c1446a69f2d095e5614f4eb6fedc0a7d6f264318ea84ce2f12a69b99bff643c18b0b2ef35b13fa8d
|
|
7
|
+
data.tar.gz: a836860985fc1753ca0b116a9d929562c8b24fd9ba474043c178e2dfa26dd59caf64106ce96a3b508762560c88d5ea39fb74f72d63559f91fab425e79c1869e1
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
require "memory/leak/cluster"
|
|
7
7
|
require "set"
|
|
8
8
|
|
|
9
|
-
require_relative "
|
|
9
|
+
require_relative "monitor"
|
|
10
10
|
|
|
11
11
|
module Async
|
|
12
12
|
module Service
|
|
@@ -14,7 +14,7 @@ module Async
|
|
|
14
14
|
# Monitors worker memory usage and restarts workers that exceed limits.
|
|
15
15
|
#
|
|
16
16
|
# Uses the `memory` gem to track process memory and detect leaks.
|
|
17
|
-
class MemoryMonitor
|
|
17
|
+
class MemoryMonitor < Monitor
|
|
18
18
|
# Create a new memory monitor.
|
|
19
19
|
#
|
|
20
20
|
# @parameter interval [Integer] The interval at which to check for memory leaks.
|
|
@@ -22,7 +22,7 @@ module Async
|
|
|
22
22
|
# @parameter free_size_minimum [Integer] The minimum free memory threshold, or nil for no threshold.
|
|
23
23
|
# @parameter options [Hash] Options to pass to the cluster when adding processes.
|
|
24
24
|
def initialize(interval: 10, total_size_limit: nil, free_size_minimum: nil, **options)
|
|
25
|
-
|
|
25
|
+
super(interval: interval)
|
|
26
26
|
@cluster = Memory::Leak::Cluster.new(total_size_limit: total_size_limit, free_size_minimum: free_size_minimum)
|
|
27
27
|
|
|
28
28
|
# We use these options when adding processes to the cluster:
|
|
@@ -85,28 +85,11 @@ module Async
|
|
|
85
85
|
end
|
|
86
86
|
end
|
|
87
87
|
|
|
88
|
-
# The key used when this monitor's status is aggregated with others.
|
|
89
|
-
def self.monitor_type
|
|
90
|
-
:memory_monitor
|
|
91
|
-
end
|
|
92
|
-
|
|
93
88
|
# Serialize memory cluster data for JSON.
|
|
94
89
|
def as_json
|
|
95
90
|
@cluster.as_json
|
|
96
91
|
end
|
|
97
92
|
|
|
98
|
-
# Serialize to JSON string.
|
|
99
|
-
def to_json(...)
|
|
100
|
-
as_json.to_json(...)
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
# Get status for the memory monitor.
|
|
104
|
-
#
|
|
105
|
-
# @returns [Hash] Hash with type and data keys.
|
|
106
|
-
def status
|
|
107
|
-
{type: self.class.monitor_type, data: as_json}
|
|
108
|
-
end
|
|
109
|
-
|
|
110
93
|
# Invoked when a memory leak is detected.
|
|
111
94
|
#
|
|
112
95
|
# @parameter process_id [Integer] The process ID of the process that has a memory leak.
|
|
@@ -128,21 +111,15 @@ module Async
|
|
|
128
111
|
true
|
|
129
112
|
end
|
|
130
113
|
|
|
131
|
-
# Run the memory monitor.
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
begin
|
|
141
|
-
memory_leak_detected(process_id, monitor)
|
|
142
|
-
rescue => error
|
|
143
|
-
Console.error(self, "Failed to handle memory leak!", child: {process_id: process_id}, exception: error)
|
|
144
|
-
end
|
|
145
|
-
end
|
|
114
|
+
# Run one iteration of the memory monitor.
|
|
115
|
+
def run_once
|
|
116
|
+
@guard.synchronize do
|
|
117
|
+
# This block must return true if the process was killed.
|
|
118
|
+
@cluster.check! do |process_id, monitor|
|
|
119
|
+
begin
|
|
120
|
+
memory_leak_detected(process_id, monitor)
|
|
121
|
+
rescue => error
|
|
122
|
+
Console.error(self, "Failed to handle memory leak!", child: {process_id: process_id}, exception: error)
|
|
146
123
|
end
|
|
147
124
|
end
|
|
148
125
|
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Released under the MIT License.
|
|
4
|
+
# Copyright, 2026, by Samuel Williams.
|
|
5
|
+
|
|
6
|
+
require "async/loop"
|
|
7
|
+
|
|
8
|
+
module Async
|
|
9
|
+
module Service
|
|
10
|
+
module Supervisor
|
|
11
|
+
class Monitor
|
|
12
|
+
def initialize(interval: 1.0)
|
|
13
|
+
@interval = interval
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def as_json(...)
|
|
17
|
+
{}
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Serialize to JSON string.
|
|
21
|
+
def to_json(...)
|
|
22
|
+
as_json.to_json(...)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Get aggregated utilization status by service name.
|
|
26
|
+
#
|
|
27
|
+
# Reads utilization data from all registered workers and aggregates it
|
|
28
|
+
# by service name (from supervisor_controller.state[:name]).
|
|
29
|
+
#
|
|
30
|
+
# @returns [Hash] Hash with type and data keys.
|
|
31
|
+
def status
|
|
32
|
+
{type: self.class.name, data: as_json}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Run one iteration of the monitor.
|
|
36
|
+
def run_once
|
|
37
|
+
# This method can be overridden by subclasses to implement specific monitoring logic.
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Run the utilization monitor.
|
|
41
|
+
#
|
|
42
|
+
# Periodically aggregates utilization data from all workers.
|
|
43
|
+
#
|
|
44
|
+
# @returns [Async::Task] The task that is running the utilization monitor.
|
|
45
|
+
def run(parent: Async::Task.current)
|
|
46
|
+
parent.async do
|
|
47
|
+
Loop.periodic(interval: @interval) do
|
|
48
|
+
self.run_once
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright, 2025-2026, by Samuel Williams.
|
|
5
5
|
|
|
6
6
|
require "process/metrics"
|
|
7
|
-
require_relative "
|
|
7
|
+
require_relative "monitor"
|
|
8
8
|
|
|
9
9
|
module Async
|
|
10
10
|
module Service
|
|
@@ -15,13 +15,13 @@ module Async
|
|
|
15
15
|
# Unlike {MemoryMonitor}, this monitor captures metrics for the entire process tree
|
|
16
16
|
# by tracking the parent process ID (ppid), which is more efficient than tracking
|
|
17
17
|
# individual processes.
|
|
18
|
-
class ProcessMonitor
|
|
18
|
+
class ProcessMonitor < Monitor
|
|
19
19
|
# Create a new process monitor.
|
|
20
20
|
#
|
|
21
21
|
# @parameter interval [Integer] The interval in seconds at which to log process metrics.
|
|
22
22
|
# @parameter ppid [Integer] The parent process ID to monitor. If nil, uses the current process to capture its children.
|
|
23
23
|
def initialize(interval: 60, ppid: nil)
|
|
24
|
-
|
|
24
|
+
super(interval: interval)
|
|
25
25
|
@ppid = ppid || Process.ppid
|
|
26
26
|
end
|
|
27
27
|
|
|
@@ -67,33 +67,13 @@ module Async
|
|
|
67
67
|
{ppid: @ppid, metrics: self.metrics}
|
|
68
68
|
end
|
|
69
69
|
|
|
70
|
-
#
|
|
71
|
-
def
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
# @returns [Hash] Hash with type and data keys.
|
|
78
|
-
def status
|
|
79
|
-
{type: self.class.monitor_type, data: as_json}
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
# Run the process monitor.
|
|
83
|
-
#
|
|
84
|
-
# Periodically captures and logs process metrics for the entire process tree.
|
|
85
|
-
#
|
|
86
|
-
# @returns [Async::Task] The task that is running the process monitor.
|
|
87
|
-
def run
|
|
88
|
-
Async do
|
|
89
|
-
Loop.run(interval: @interval) do
|
|
90
|
-
metrics = self.metrics
|
|
91
|
-
|
|
92
|
-
# Log each process individually for better searchability in log platforms:
|
|
93
|
-
metrics.each do |process_id, general|
|
|
94
|
-
Console.info(self, "Process metrics captured.", general: general)
|
|
95
|
-
end
|
|
96
|
-
end
|
|
70
|
+
# Run one iteration of the process monitor.
|
|
71
|
+
def run_once
|
|
72
|
+
metrics = self.metrics
|
|
73
|
+
|
|
74
|
+
# Log each process individually for better searchability in log platforms:
|
|
75
|
+
metrics.each do |process_id, general|
|
|
76
|
+
Console.info(self, "Process metrics captured.", general: general)
|
|
97
77
|
end
|
|
98
78
|
end
|
|
99
79
|
end
|
|
@@ -87,30 +87,34 @@ module Async
|
|
|
87
87
|
# @parameter parent [Async::Task] The parent task to run under.
|
|
88
88
|
def run
|
|
89
89
|
Sync do |task|
|
|
90
|
+
barrier = Async::Barrier.new
|
|
91
|
+
|
|
90
92
|
# Start all monitors:
|
|
91
93
|
@monitors.each do |monitor|
|
|
92
|
-
monitor.run
|
|
94
|
+
monitor.run(parent: barrier)
|
|
93
95
|
rescue => error
|
|
94
96
|
Console.error(self, "Error while starting monitor!", monitor: monitor, exception: error)
|
|
95
97
|
end
|
|
96
98
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
99
|
+
barrier.async do
|
|
100
|
+
# Accept connections from workers:
|
|
101
|
+
self.accept do |connection|
|
|
102
|
+
# Create a supervisor controller for this connection:
|
|
103
|
+
supervisor_controller = SupervisorController.new(self, connection)
|
|
104
|
+
|
|
105
|
+
# Bind supervisor controller:
|
|
106
|
+
connection.bind(:supervisor, supervisor_controller)
|
|
107
|
+
|
|
108
|
+
# Run the connection:
|
|
109
|
+
connection.run
|
|
110
|
+
ensure
|
|
111
|
+
self.remove(supervisor_controller)
|
|
112
|
+
end
|
|
109
113
|
end
|
|
110
114
|
|
|
111
|
-
|
|
115
|
+
barrier.wait
|
|
112
116
|
ensure
|
|
113
|
-
|
|
117
|
+
barrier&.stop
|
|
114
118
|
end
|
|
115
119
|
end
|
|
116
120
|
end
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
require "set"
|
|
7
7
|
|
|
8
|
-
require_relative "
|
|
8
|
+
require_relative "monitor"
|
|
9
9
|
require "async/utilization"
|
|
10
10
|
|
|
11
11
|
module Async
|
|
@@ -15,7 +15,7 @@ module Async
|
|
|
15
15
|
#
|
|
16
16
|
# Uses shared memory to efficiently collect utilization metrics from workers
|
|
17
17
|
# and aggregates them by service name for monitoring and reporting.
|
|
18
|
-
class UtilizationMonitor
|
|
18
|
+
class UtilizationMonitor < Monitor
|
|
19
19
|
# Allocates and manages shared memory segments for worker utilization data.
|
|
20
20
|
#
|
|
21
21
|
# Manages a shared memory file that workers can write utilization metrics to.
|
|
@@ -195,8 +195,8 @@ module Async
|
|
|
195
195
|
# @parameter size [Integer] Total size of the shared memory buffer.
|
|
196
196
|
# @parameter segment_size [Integer] Size of each allocation segment (default: 512 bytes).
|
|
197
197
|
def initialize(path: "utilization.shm", interval: 10, size: IO::Buffer::PAGE_SIZE * 8, segment_size: 512)
|
|
198
|
+
super(interval: interval)
|
|
198
199
|
@path = path
|
|
199
|
-
@interval = interval
|
|
200
200
|
@segment_size = segment_size
|
|
201
201
|
|
|
202
202
|
@allocator = SegmentAllocator.new(path, size: size, segment_size: segment_size)
|
|
@@ -313,21 +313,6 @@ module Async
|
|
|
313
313
|
end
|
|
314
314
|
end
|
|
315
315
|
|
|
316
|
-
# Serialize to JSON string.
|
|
317
|
-
def to_json(...)
|
|
318
|
-
as_json.to_json(...)
|
|
319
|
-
end
|
|
320
|
-
|
|
321
|
-
# Get aggregated utilization status by service name.
|
|
322
|
-
#
|
|
323
|
-
# Reads utilization data from all registered workers and aggregates it
|
|
324
|
-
# by service name (from supervisor_controller.state[:name]).
|
|
325
|
-
#
|
|
326
|
-
# @returns [Hash] Hash with type and data keys.
|
|
327
|
-
def status
|
|
328
|
-
{type: self.class.monitor_type, data: as_json}
|
|
329
|
-
end
|
|
330
|
-
|
|
331
316
|
# Emit the utilization metrics.
|
|
332
317
|
#
|
|
333
318
|
# @parameter status [Hash] The utilization metrics.
|
|
@@ -335,17 +320,9 @@ module Async
|
|
|
335
320
|
Console.info(self, "Utilization:", metrics: metrics)
|
|
336
321
|
end
|
|
337
322
|
|
|
338
|
-
# Run the utilization monitor.
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
#
|
|
342
|
-
# @returns [Async::Task] The task that is running the utilization monitor.
|
|
343
|
-
def run
|
|
344
|
-
Async do
|
|
345
|
-
Loop.run(interval: @interval) do
|
|
346
|
-
self.emit(self.as_json)
|
|
347
|
-
end
|
|
348
|
-
end
|
|
323
|
+
# Run one iteration of the utilization monitor.
|
|
324
|
+
def run_once
|
|
325
|
+
self.emit(self.as_json)
|
|
349
326
|
end
|
|
350
327
|
end
|
|
351
328
|
end
|
data/readme.md
CHANGED
|
@@ -28,6 +28,10 @@ Please see the [project documentation](https://socketry.github.io/async-service-
|
|
|
28
28
|
|
|
29
29
|
Please see the [project releases](https://socketry.github.io/async-service-supervisor/releases/index) for all releases.
|
|
30
30
|
|
|
31
|
+
### v0.15.0
|
|
32
|
+
|
|
33
|
+
- Improve robustness and error handling of default monitors and server loop, ensuring that monitor failures either completely crash the server or retry appropriately, rather than leaving the server in a broken state.
|
|
34
|
+
|
|
31
35
|
### v0.14.0
|
|
32
36
|
|
|
33
37
|
- Add `Worker#make_controller` as an override point for providing a custom worker controller with additional RPCs.
|
|
@@ -73,10 +77,6 @@ Please see the [project releases](https://socketry.github.io/async-service-super
|
|
|
73
77
|
- Add `Async::Service::Supervisor::ProcessMonitor` for logging CPU and memory metrics periodically.
|
|
74
78
|
- Fix documentation to use correct `maximum_size_limit:` parameter name for `MemoryMonitor` (was incorrectly documented as `limit:`).
|
|
75
79
|
|
|
76
|
-
### v0.7.0
|
|
77
|
-
|
|
78
|
-
- If a memory leak is detected, sample memory usage for 60 seconds before exiting.
|
|
79
|
-
|
|
80
80
|
## Contributing
|
|
81
81
|
|
|
82
82
|
We welcome contributions to this project.
|
data/releases.md
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
# Releases
|
|
2
2
|
|
|
3
|
+
## v0.15.0
|
|
4
|
+
|
|
5
|
+
- Improve robustness and error handling of default monitors and server loop, ensuring that monitor failures either completely crash the server or retry appropriately, rather than leaving the server in a broken state.
|
|
6
|
+
|
|
3
7
|
## v0.14.0
|
|
4
8
|
|
|
5
9
|
- Add `Worker#make_controller` as an override point for providing a custom worker controller with additional RPCs.
|
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: async-service-supervisor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.15.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Samuel Williams
|
|
@@ -38,6 +38,20 @@ cert_chain:
|
|
|
38
38
|
-----END CERTIFICATE-----
|
|
39
39
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
40
40
|
dependencies:
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: async
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '2.38'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '2.38'
|
|
41
55
|
- !ruby/object:Gem::Dependency
|
|
42
56
|
name: async-bus
|
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -151,8 +165,8 @@ files:
|
|
|
151
165
|
- lib/async/service/supervisor/client.rb
|
|
152
166
|
- lib/async/service/supervisor/endpoint.rb
|
|
153
167
|
- lib/async/service/supervisor/environment.rb
|
|
154
|
-
- lib/async/service/supervisor/loop.rb
|
|
155
168
|
- lib/async/service/supervisor/memory_monitor.rb
|
|
169
|
+
- lib/async/service/supervisor/monitor.rb
|
|
156
170
|
- lib/async/service/supervisor/process_monitor.rb
|
|
157
171
|
- lib/async/service/supervisor/server.rb
|
|
158
172
|
- lib/async/service/supervisor/service.rb
|
metadata.gz.sig
CHANGED
|
Binary file
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# Released under the MIT License.
|
|
4
|
-
# Copyright, 2025-2026, by Samuel Williams.
|
|
5
|
-
|
|
6
|
-
module Async
|
|
7
|
-
module Service
|
|
8
|
-
module Supervisor
|
|
9
|
-
# A helper for running loops at aligned intervals.
|
|
10
|
-
module Loop
|
|
11
|
-
# A robust loop that executes a block at aligned intervals.
|
|
12
|
-
#
|
|
13
|
-
# The alignment is modulo the current clock in seconds.
|
|
14
|
-
#
|
|
15
|
-
# If an error occurs during the execution of the block, it is logged and the loop continues.
|
|
16
|
-
#
|
|
17
|
-
# @parameter interval [Integer] The interval in seconds between executions of the block.
|
|
18
|
-
def self.run(interval: 60, &block)
|
|
19
|
-
while true
|
|
20
|
-
# Compute the wait time to the next interval:
|
|
21
|
-
wait = interval - (Time.now.to_f % interval)
|
|
22
|
-
if wait.positive?
|
|
23
|
-
# Sleep until the next interval boundary:
|
|
24
|
-
sleep(wait)
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
begin
|
|
28
|
-
yield
|
|
29
|
-
rescue => error
|
|
30
|
-
Console.error(self, "Loop error:", error)
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
private_constant :Loop
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|