async-container-supervisor 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39abccaf400a7b793d8f0094e32ccee9a4a9fdad6c6f570e361cace376ebd611
4
- data.tar.gz: 2f135ee3b0979a16a899a07c760e8aeb46f2474635f9f2862c4eef43b7744961
3
+ metadata.gz: b997fac3e6c645d7740f0099a3543d27d983a86057a4cde23a85254d30a2d32f
4
+ data.tar.gz: 83f0ce708ade60a56158e615d6222788444bb2dfbc980cd6438a93dc6ef3c18b
5
5
  SHA512:
6
- metadata.gz: ffe7ddc8855501a0c30e35e925596a2aaf262d34a373608e16882295603bb1137f30be80f533adcfc480c957208c96e42907006d0564270bf209763b7d3d81a5
7
- data.tar.gz: 7b71f2cdcf3f75973fffdaf676f430e34270a6b1be3b684439e50f202854bffddf9ca8d4983a441d9e4fb01319414e80343e9f2a78e4698e964b01f94c71c58e
6
+ metadata.gz: f6baf3ac944b425114dcb952da846b73a35fbf183ddb078c4b273015b1951a7a4658b2eba72088994fee6a4df81ff5f7c9faf79ccc4cae1433ec7a4080e8c362
7
+ data.tar.gz: 2069afe1a540bc4733d7a5e6ae0c4d0270787bd500a940b6131c31427b6f21f97d0e29911eda516887e745b33d4a37210e1f935089a07af62143620e8800b2e7
checksums.yaml.gz.sig CHANGED
Binary file
@@ -28,7 +28,7 @@ service "supervisor" do
28
28
  Async::Container::Supervisor::MemoryMonitor.new(
29
29
  # Check worker memory every 10 seconds:
30
30
  interval: 10,
31
-
31
+
32
32
  # Restart workers exceeding 500MB:
33
33
  maximum_size_limit: 1024 * 1024 * 500
34
34
  )
@@ -4,6 +4,7 @@
4
4
  # Copyright, 2025, by Samuel Williams.
5
5
 
6
6
  require "json"
7
+ require "async"
7
8
 
8
9
  module Async
9
10
  module Container
@@ -79,8 +80,8 @@ module Async
79
80
  # Iterate over all responses from the call.
80
81
  #
81
82
  # @yields {|response| ...} Each response from the queue.
82
- def each(&block)
83
- while response = self.pop
83
+ def each(timeout: nil, &block)
84
+ while response = self.pop(timeout: timeout)
84
85
  yield response
85
86
  end
86
87
  end
@@ -146,20 +147,28 @@ module Async
146
147
  def self.dispatch(connection, target, id, message)
147
148
  Async do
148
149
  call = self.new(connection, id, message)
150
+ # Track the call in the connection's calls hash:
149
151
  connection.calls[id] = call
150
152
 
153
+ # Dispatch the call to the target (synchronously):
151
154
  target.dispatch(call)
152
155
 
156
+ # Stream responses back to the connection (asynchronously):
153
157
  while response = call.pop
154
158
  connection.write(id: id, **response)
155
159
  end
156
160
  ensure
157
- # If the queue is closed, we don't need to send a finished message.
161
+ # Ensure the call is removed from the connection's calls hash, otherwise it will leak:
162
+ connection.calls.delete(id)
163
+
164
+ # If the queue is closed, we don't need to send a finished message:
158
165
  unless call.closed?
159
- connection.write(id: id, finished: true)
166
+ # Ensure the call is closed, to prevent messages being buffered:
167
+ call.close
168
+
169
+ # If the above write failed, this is likely to fail too, and we can safely ignore it.
170
+ connection.write(id: id, finished: true) rescue nil
160
171
  end
161
-
162
- connection.calls.delete(id)
163
172
  end
164
173
  end
165
174
 
@@ -172,7 +181,7 @@ module Async
172
181
  # @parameter message [Hash] The call message/parameters.
173
182
  # @yields {|response| ...} Each intermediate response if block given.
174
183
  # @returns [Hash, Array] The final response or array of intermediate responses.
175
- def self.call(connection, **message, &block)
184
+ def self.call(connection, timeout: nil, **message, &block)
176
185
  id = connection.next_id
177
186
  call = self.new(connection, id, message)
178
187
 
@@ -181,11 +190,11 @@ module Async
181
190
  connection.write(id: id, **message)
182
191
 
183
192
  if block_given?
184
- call.each(&block)
193
+ call.each(timeout: timeout, &block)
185
194
  else
186
195
  intermediate = nil
187
196
 
188
- while response = call.pop
197
+ while response = call.pop(timeout: timeout)
189
198
  if response.delete(:finished)
190
199
  if intermediate
191
200
  if response.any?
@@ -204,7 +213,11 @@ module Async
204
213
  end
205
214
  end
206
215
  ensure
216
+ # Ensure the call is removed from the connection's calls hash, otherwise it will leak:
207
217
  connection.calls.delete(id)
218
+
219
+ # Ensure the call is closed, so that `Call#pop` will return `nil`.
220
+ call.close
208
221
  end
209
222
  end
210
223
  end
@@ -244,22 +257,6 @@ module Async
244
257
  @stream.flush
245
258
  end
246
259
 
247
- # Make a synchronous call and wait for a single response.
248
- #
249
- # @parameter timeout [Numeric, nil] Optional timeout for the call.
250
- # @parameter message [Hash] The call message.
251
- # @returns [Hash] The response.
252
- def call(timeout: nil, **message)
253
- id = next_id
254
- calls[id] = ::Thread::Queue.new
255
-
256
- write(id: id, **message)
257
-
258
- return calls[id].pop(timeout: timeout)
259
- ensure
260
- calls.delete(id)
261
- end
262
-
263
260
  # Read a message from the connection stream.
264
261
  #
265
262
  # @returns [Hash, nil] The parsed message or nil if stream is closed.
@@ -289,16 +286,19 @@ module Async
289
286
  #
290
287
  # @parameter target [Dispatchable] The target to dispatch calls to.
291
288
  def run(target)
289
+ # Process incoming messages from the connection:
292
290
  self.each do |message|
291
+ # If the message has an ID, it is a response to a call:
293
292
  if id = message.delete(:id)
293
+ # Find the call in the connection's calls hash:
294
294
  if call = @calls[id]
295
- # Response to a call:
295
+ # Enqueue the response for the call:
296
296
  call.push(**message)
297
297
  elsif message.key?(:do)
298
- # Incoming call:
298
+ # Otherwise, if we couldn't find an existing call, it must be a new call:
299
299
  Call.dispatch(self, target, id, message)
300
300
  else
301
- # Likely a response to a timed-out call, ignore it:
301
+ # Finally, if none of the above, it is likely a response to a timed-out call, so ignore it:
302
302
  Console.debug(self, "Ignoring message:", message)
303
303
  end
304
304
  else
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Async
4
+ module Container
5
+ module Supervisor
6
+ # A helper for running loops at aligned intervals.
7
+ module Loop
8
+ # A robust loop that executes a block at aligned intervals.
9
+ #
10
+ # The alignment is modulo the current clock in seconds.
11
+ #
12
+ # If an error occurs during the execution of the block, it is logged and the loop continues.
13
+ #
14
+ # @parameter interval [Integer] The interval in seconds between executions of the block.
15
+ def self.run(interval: 60, &block)
16
+ while true
17
+ # Compute the wait time to the next interval:
18
+ wait = interval - (Time.now.to_f % interval)
19
+ if wait.positive?
20
+ # Sleep until the next interval boundary:
21
+ sleep(wait)
22
+ end
23
+
24
+ begin
25
+ yield
26
+ rescue => error
27
+ Console.error(self, "Loop error:", error)
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ private_constant :Loop
34
+ end
35
+ end
36
+ end
@@ -6,6 +6,8 @@
6
6
  require "memory/leak/cluster"
7
7
  require "set"
8
8
 
9
+ require_relative "loop"
10
+
9
11
  module Async
10
12
  module Container
11
13
  module Supervisor
@@ -13,14 +15,12 @@ module Async
13
15
  #
14
16
  # Uses the `memory` gem to track process memory and detect leaks.
15
17
  class MemoryMonitor
16
- MEMORY_SAMPLE = {duration: 30, timeout: 30*4}
17
-
18
18
  # Create a new memory monitor.
19
19
  #
20
20
  # @parameter interval [Integer] The interval at which to check for memory leaks.
21
21
  # @parameter total_size_limit [Integer] The total size limit of all processes, or nil for no limit.
22
22
  # @parameter options [Hash] Options to pass to the cluster when adding processes.
23
- def initialize(interval: 10, total_size_limit: nil, memory_sample: MEMORY_SAMPLE, **options)
23
+ def initialize(interval: 10, total_size_limit: nil, memory_sample: false, **options)
24
24
  @interval = interval
25
25
  @cluster = Memory::Leak::Cluster.new(total_size_limit: total_size_limit)
26
26
 
@@ -32,6 +32,9 @@ module Async
32
32
  @processes = Hash.new{|hash, key| hash[key] = Set.new.compare_by_identity}
33
33
  end
34
34
 
35
+ # @attribute [Memory::Leak::Cluster] The cluster of processes being monitored.
36
+ attr_reader :cluster
37
+
35
38
  # Add a process to the memory monitor. You may override this to control how processes are added to the cluster.
36
39
  #
37
40
  # @parameter process_id [Integer] The process ID to add.
@@ -98,8 +101,14 @@ module Async
98
101
  end
99
102
 
100
103
  # Kill the process gently:
101
- Console.info(self, "Killing process!", child: {process_id: process_id})
102
- Process.kill(:INT, process_id)
104
+ begin
105
+ Console.info(self, "Killing process!", child: {process_id: process_id})
106
+ Process.kill(:INT, process_id)
107
+ rescue Errno::ESRCH
108
+ # No such process - he's dead Jim.
109
+ rescue => error
110
+ Console.warn(self, "Failed to kill process!", child: {process_id: process_id}, exception: error)
111
+ end
103
112
 
104
113
  true
105
114
  end
@@ -109,14 +118,17 @@ module Async
109
118
  # @returns [Async::Task] The task that is running the memory monitor.
110
119
  def run
111
120
  Async do
112
- while true
121
+ Loop.run(interval: @interval) do
113
122
  # This block must return true if the process was killed.
114
123
  @cluster.check! do |process_id, monitor|
115
124
  Console.error(self, "Memory leak detected!", child: {process_id: process_id}, monitor: monitor)
116
- memory_leak_detected(process_id, monitor)
125
+
126
+ begin
127
+ memory_leak_detected(process_id, monitor)
128
+ rescue => error
129
+ Console.error(self, "Failed to handle memory leak!", child: {process_id: process_id}, exception: error)
130
+ end
117
131
  end
118
-
119
- sleep(@interval)
120
132
  end
121
133
  end
122
134
  end
@@ -4,6 +4,9 @@
4
4
  # Copyright, 2025, by Samuel Williams.
5
5
 
6
6
  require "process/metrics"
7
+ require_relative "loop"
8
+
9
+ require_relative "loop"
7
10
 
8
11
  module Async
9
12
  module Container
@@ -15,14 +18,14 @@ module Async
15
18
  # by tracking the parent process ID (ppid), which is more efficient than tracking
16
19
  # individual processes.
17
20
  class ProcessMonitor
18
- # Create a new process monitor.
19
- #
20
- # @parameter interval [Integer] The interval in seconds at which to log process metrics.
21
- # @parameter ppid [Integer] The parent process ID to monitor. If nil, uses the current process to capture its children.
22
- def initialize(interval: 60, ppid: nil)
23
- @interval = interval
24
- @ppid = ppid || Process.ppid
25
- end
21
+ # Create a new process monitor.
22
+ #
23
+ # @parameter interval [Integer] The interval in seconds at which to log process metrics.
24
+ # @parameter ppid [Integer] The parent process ID to monitor. If nil, uses the current process to capture its children.
25
+ def initialize(interval: 60, ppid: nil)
26
+ @interval = interval
27
+ @ppid = ppid || Process.ppid
28
+ end
26
29
 
27
30
  # @attribute [Integer] The parent process ID being monitored.
28
31
  attr :ppid
@@ -68,21 +71,19 @@ module Async
68
71
  # Periodically captures and logs process metrics for the entire process tree.
69
72
  #
70
73
  # @returns [Async::Task] The task that is running the process monitor.
71
- def run
72
- Async do
73
- while true
74
- metrics = self.metrics
75
-
76
- # Log each process individually for better searchability in log platforms:
77
- metrics.each do |process_id, general|
78
- Console.info(self, "Process metrics captured.", general: general)
74
+ def run
75
+ Async do
76
+ Loop.run(interval: @interval) do
77
+ metrics = self.metrics
78
+
79
+ # Log each process individually for better searchability in log platforms:
80
+ metrics.each do |process_id, general|
81
+ Console.info(self, "Process metrics captured.", general: general)
82
+ end
79
83
  end
80
-
81
- sleep(@interval)
82
84
  end
83
85
  end
84
86
  end
85
- end
86
87
  end
87
88
  end
88
89
  end
@@ -39,7 +39,9 @@ module Async
39
39
  # @parameter call [Connection::Call] The registration call.
40
40
  # @parameter call[:state] [Hash] The worker state to merge (e.g. process_id).
41
41
  def do_register(call)
42
- call.connection.state.merge!(call.message[:state])
42
+ if state = call.message[:state]
43
+ call.connection.state.merge!(state)
44
+ end
43
45
 
44
46
  connection_id = SecureRandom.uuid
45
47
  call.connection.state[:connection_id] = connection_id
@@ -52,7 +54,7 @@ module Async
52
54
  Console.error(self, "Error while registering process!", monitor: monitor, exception: error)
53
55
  end
54
56
  ensure
55
- call.finish
57
+ call.finish(connection_id: connection_id)
56
58
  end
57
59
 
58
60
  # Forward an operation to a worker connection.
@@ -9,7 +9,7 @@ module Async
9
9
  module Container
10
10
  # @namespace
11
11
  module Supervisor
12
- VERSION = "0.8.0"
12
+ VERSION = "0.9.1"
13
13
  end
14
14
  end
15
15
  end
@@ -25,25 +25,32 @@ module Async
25
25
  #
26
26
  # @parameter state [Hash] The worker state to register with the supervisor.
27
27
  # @parameter endpoint [IO::Endpoint] The supervisor endpoint to connect to.
28
- def initialize(state, endpoint: Supervisor.endpoint)
28
+ def initialize(state = nil, endpoint: Supervisor.endpoint)
29
+ super(endpoint: endpoint)
29
30
  @state = state
30
- @endpoint = endpoint
31
31
  end
32
32
 
33
33
  include Dispatchable
34
34
 
35
- private def dump(call)
35
+ private def dump(call, buffer: true)
36
36
  if path = call[:path]
37
37
  File.open(path, "w") do |file|
38
38
  yield file
39
39
  end
40
40
 
41
41
  call.finish(path: path)
42
- else
42
+ elsif buffer
43
43
  buffer = StringIO.new
44
44
  yield buffer
45
45
 
46
- call.finish(data: buffer.string)
46
+ if message = call[:log]
47
+ Console.info(self, message, data: buffer.string)
48
+ call.finish
49
+ else
50
+ call.finish(data: buffer.string)
51
+ end
52
+ else
53
+ call.fail(error: {message: "Buffered output not supported!"})
47
54
  end
48
55
  end
49
56
 
@@ -69,7 +76,7 @@ module Async
69
76
  def do_memory_dump(call)
70
77
  require "objspace"
71
78
 
72
- dump(call) do |file|
79
+ dump(call, buffer: false) do |file|
73
80
  ObjectSpace.dump_all(output: file)
74
81
  end
75
82
  end
@@ -109,13 +116,9 @@ module Async
109
116
 
110
117
  report = sampler.report
111
118
 
112
- # This is a temporary log to help with debugging:
113
- buffer = StringIO.new
114
- report.print(buffer)
115
- Console.info(self, "Memory sample completed.", report: buffer.string)
116
-
117
- # Generate a report focused on retained objects (likely leaks):
118
- call.finish(report: report)
119
+ dump(call) do |file|
120
+ file.puts(report.to_s)
121
+ end
119
122
  ensure
120
123
  GC.start
121
124
  end
@@ -164,6 +167,7 @@ module Async
164
167
 
165
168
  # Register the worker with the supervisor:
166
169
  connection.call(do: :register, state: @state)
170
+ # We ignore the response (it contains the `connection_id`).
167
171
  end
168
172
  end
169
173
  end
data/readme.md CHANGED
@@ -26,6 +26,17 @@ Please see the [project documentation](https://socketry.github.io/async-containe
26
26
 
27
27
  Please see the [project releases](https://socketry.github.io/async-container-supervisor/releases/index) for all releases.
28
28
 
29
+ ### v0.9.1
30
+
31
+ - Close `Call` queue if asynchronous call fails during dispatch - further messages will fail with `ClosedQueueError`.
32
+
33
+ ### v0.9.0
34
+
35
+ - Better handling of write failures in `Connection::Call.dispatch`, ensuring we don't leak calls.
36
+ - Robust monitor loop handling - restart on failure, and align loop iterations.
37
+ - Disable memory sampler by default and use text output format.
38
+ - Introduce support for redirecting dump output to logs.
39
+
29
40
  ### v0.8.0
30
41
 
31
42
  - Add `Async::Container::Supervisor::ProcessMonitor` for logging CPU and memory metrics periodically.
data/releases.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Releases
2
2
 
3
+ ## v0.9.1
4
+
5
+ - Close `Call` queue if asynchronous call fails during dispatch - further messages will fail with `ClosedQueueError`.
6
+
7
+ ## v0.9.0
8
+
9
+ - Better handling of write failures in `Connection::Call.dispatch`, ensuring we don't leak calls.
10
+ - Robust monitor loop handling - restart on failure, and align loop iterations.
11
+ - Disable memory sampler by default and use text output format.
12
+ - Introduce support for redirecting dump output to logs.
13
+
3
14
  ## v0.8.0
4
15
 
5
16
  - Add `Async::Container::Supervisor::ProcessMonitor` for logging CPU and memory metrics periodically.
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: async-container-supervisor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
@@ -123,6 +123,7 @@ files:
123
123
  - lib/async/container/supervisor/dispatchable.rb
124
124
  - lib/async/container/supervisor/endpoint.rb
125
125
  - lib/async/container/supervisor/environment.rb
126
+ - lib/async/container/supervisor/loop.rb
126
127
  - lib/async/container/supervisor/memory_monitor.rb
127
128
  - lib/async/container/supervisor/process_monitor.rb
128
129
  - lib/async/container/supervisor/server.rb
@@ -153,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
153
154
  - !ruby/object:Gem::Version
154
155
  version: '0'
155
156
  requirements: []
156
- rubygems_version: 3.7.2
157
+ rubygems_version: 3.6.9
157
158
  specification_version: 4
158
159
  summary: A supervisor for managing multiple container processes.
159
160
  test_files: []
metadata.gz.sig CHANGED
Binary file