async-container-supervisor 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39abccaf400a7b793d8f0094e32ccee9a4a9fdad6c6f570e361cace376ebd611
4
- data.tar.gz: 2f135ee3b0979a16a899a07c760e8aeb46f2474635f9f2862c4eef43b7744961
3
+ metadata.gz: 1b9684c9b4ef621c8b92411d251478b9751cc901e251fa1b35c3fca92af18763
4
+ data.tar.gz: 5e9f9a25b01f4de9c160aa194acd2a3467215d09ccbbdfbac178c2bb1f278e58
5
5
  SHA512:
6
- metadata.gz: ffe7ddc8855501a0c30e35e925596a2aaf262d34a373608e16882295603bb1137f30be80f533adcfc480c957208c96e42907006d0564270bf209763b7d3d81a5
7
- data.tar.gz: 7b71f2cdcf3f75973fffdaf676f430e34270a6b1be3b684439e50f202854bffddf9ca8d4983a441d9e4fb01319414e80343e9f2a78e4698e964b01f94c71c58e
6
+ metadata.gz: 7fa18caa63bb5dff847b3640c63ca5c2109a3003537bf36cf8ac2947d560a8c0dc5d201b79e98c789e991db415fb2b07aa2c8a9cf94f556f2d932d68d4044dd4
7
+ data.tar.gz: 235f540925cc1ea12f1f2ef89df5cfacca213e56c4a0b39514f610a92e93e7de995e087842eb827936daa954802631ced0f5ca89ae22f7441c69763f10d7bc6b
checksums.yaml.gz.sig CHANGED
@@ -1,3 +1,5 @@
1
- q���jcuu��j��U���zj�T�I�L��R���wx�N"Z�I����}F�j����ƈ����k�J��'#j�)ӳ�c5Y�nꉚ]�
2
- |�~�8�ݿ]���מ�v��������;�e�ެ�1Q�Ar��(���"嶴�9��~�BK<�c1��
3
- g2��8��0���c���b�g*���1��d&��&��WB����-��c�`
1
+ l/XMj�
2
+ ��aޅ�c�;�m��f����Ѽ�j:��b��2��Y:��"���AB�A���fg�+�ը_� #QV���̾��ANq�
3
+ ���P�$(��gP�-��*�8��)�@%���b?mbj�
4
+ ��xyH~Nb�h{H��arJ'@!�Q#�_����=[�t��*l-<�Jlc�-?z��*F�',@�W`ϣ��s���{��/F���"W|K�a�d�K2��]tI��D|�@���e�ꞓ�\K��@pD�F��o�"�
5
+ ��x�eK9< D~ll ��@���\��Q,���z�!� Hn�Ut������;�DЎ������@yN`��?�
@@ -28,7 +28,7 @@ service "supervisor" do
28
28
  Async::Container::Supervisor::MemoryMonitor.new(
29
29
  # Check worker memory every 10 seconds:
30
30
  interval: 10,
31
-
31
+
32
32
  # Restart workers exceeding 500MB:
33
33
  maximum_size_limit: 1024 * 1024 * 500
34
34
  )
@@ -4,6 +4,7 @@
4
4
  # Copyright, 2025, by Samuel Williams.
5
5
 
6
6
  require "json"
7
+ require "async"
7
8
 
8
9
  module Async
9
10
  module Container
@@ -79,8 +80,8 @@ module Async
79
80
  # Iterate over all responses from the call.
80
81
  #
81
82
  # @yields {|response| ...} Each response from the queue.
82
- def each(&block)
83
- while response = self.pop
83
+ def each(timeout: nil, &block)
84
+ while response = self.pop(timeout: timeout)
84
85
  yield response
85
86
  end
86
87
  end
@@ -154,12 +155,14 @@ module Async
154
155
  connection.write(id: id, **response)
155
156
  end
156
157
  ensure
157
- # If the queue is closed, we don't need to send a finished message.
158
+ # Ensure the call is removed from the connection's calls hash, otherwise it will leak:
159
+ connection.calls.delete(id)
160
+
161
+ # If the queue is closed, we don't need to send a finished message:
158
162
  unless call.closed?
159
- connection.write(id: id, finished: true)
163
+ # If the above write failed, this is likely to fail too, and we can safely ignore it.
164
+ connection.write(id: id, finished: true) rescue nil
160
165
  end
161
-
162
- connection.calls.delete(id)
163
166
  end
164
167
  end
165
168
 
@@ -172,7 +175,7 @@ module Async
172
175
  # @parameter message [Hash] The call message/parameters.
173
176
  # @yields {|response| ...} Each intermediate response if block given.
174
177
  # @returns [Hash, Array] The final response or array of intermediate responses.
175
- def self.call(connection, **message, &block)
178
+ def self.call(connection, timeout: nil, **message, &block)
176
179
  id = connection.next_id
177
180
  call = self.new(connection, id, message)
178
181
 
@@ -181,11 +184,11 @@ module Async
181
184
  connection.write(id: id, **message)
182
185
 
183
186
  if block_given?
184
- call.each(&block)
187
+ call.each(timeout: timeout, &block)
185
188
  else
186
189
  intermediate = nil
187
190
 
188
- while response = call.pop
191
+ while response = call.pop(timeout: timeout)
189
192
  if response.delete(:finished)
190
193
  if intermediate
191
194
  if response.any?
@@ -244,22 +247,6 @@ module Async
244
247
  @stream.flush
245
248
  end
246
249
 
247
- # Make a synchronous call and wait for a single response.
248
- #
249
- # @parameter timeout [Numeric, nil] Optional timeout for the call.
250
- # @parameter message [Hash] The call message.
251
- # @returns [Hash] The response.
252
- def call(timeout: nil, **message)
253
- id = next_id
254
- calls[id] = ::Thread::Queue.new
255
-
256
- write(id: id, **message)
257
-
258
- return calls[id].pop(timeout: timeout)
259
- ensure
260
- calls.delete(id)
261
- end
262
-
263
250
  # Read a message from the connection stream.
264
251
  #
265
252
  # @returns [Hash, nil] The parsed message or nil if stream is closed.
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Async
4
+ module Container
5
+ module Supervisor
6
+ # A helper for running loops at aligned intervals.
7
+ module Loop
8
+ # A robust loop that executes a block at aligned intervals.
9
+ #
10
+ # The alignment is modulo the current clock in seconds.
11
+ #
12
+ # If an error occurs during the execution of the block, it is logged and the loop continues.
13
+ #
14
+ # @parameter interval [Integer] The interval in seconds between executions of the block.
15
+ def self.run(interval: 60, &block)
16
+ while true
17
+ # Compute the wait time to the next interval:
18
+ wait = interval - (Time.now.to_f % interval)
19
+ if wait.positive?
20
+ # Sleep until the next interval boundary:
21
+ sleep(wait)
22
+ end
23
+
24
+ begin
25
+ yield
26
+ rescue => error
27
+ Console.error(self, "Loop error:", error)
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ private_constant :Loop
34
+ end
35
+ end
36
+ end
@@ -6,6 +6,8 @@
6
6
  require "memory/leak/cluster"
7
7
  require "set"
8
8
 
9
+ require_relative "loop"
10
+
9
11
  module Async
10
12
  module Container
11
13
  module Supervisor
@@ -13,14 +15,12 @@ module Async
13
15
  #
14
16
  # Uses the `memory` gem to track process memory and detect leaks.
15
17
  class MemoryMonitor
16
- MEMORY_SAMPLE = {duration: 30, timeout: 30*4}
17
-
18
18
  # Create a new memory monitor.
19
19
  #
20
20
  # @parameter interval [Integer] The interval at which to check for memory leaks.
21
21
  # @parameter total_size_limit [Integer] The total size limit of all processes, or nil for no limit.
22
22
  # @parameter options [Hash] Options to pass to the cluster when adding processes.
23
- def initialize(interval: 10, total_size_limit: nil, memory_sample: MEMORY_SAMPLE, **options)
23
+ def initialize(interval: 10, total_size_limit: nil, memory_sample: false, **options)
24
24
  @interval = interval
25
25
  @cluster = Memory::Leak::Cluster.new(total_size_limit: total_size_limit)
26
26
 
@@ -32,6 +32,9 @@ module Async
32
32
  @processes = Hash.new{|hash, key| hash[key] = Set.new.compare_by_identity}
33
33
  end
34
34
 
35
+ # @attribute [Memory::Leak::Cluster] The cluster of processes being monitored.
36
+ attr_reader :cluster
37
+
35
38
  # Add a process to the memory monitor. You may override this to control how processes are added to the cluster.
36
39
  #
37
40
  # @parameter process_id [Integer] The process ID to add.
@@ -98,8 +101,14 @@ module Async
98
101
  end
99
102
 
100
103
  # Kill the process gently:
101
- Console.info(self, "Killing process!", child: {process_id: process_id})
102
- Process.kill(:INT, process_id)
104
+ begin
105
+ Console.info(self, "Killing process!", child: {process_id: process_id})
106
+ Process.kill(:INT, process_id)
107
+ rescue Errno::ESRCH
108
+ # No such process - he's dead Jim.
109
+ rescue => error
110
+ Console.warn(self, "Failed to kill process!", child: {process_id: process_id}, exception: error)
111
+ end
103
112
 
104
113
  true
105
114
  end
@@ -109,14 +118,17 @@ module Async
109
118
  # @returns [Async::Task] The task that is running the memory monitor.
110
119
  def run
111
120
  Async do
112
- while true
121
+ Loop.run(interval: @interval) do
113
122
  # This block must return true if the process was killed.
114
123
  @cluster.check! do |process_id, monitor|
115
124
  Console.error(self, "Memory leak detected!", child: {process_id: process_id}, monitor: monitor)
116
- memory_leak_detected(process_id, monitor)
125
+
126
+ begin
127
+ memory_leak_detected(process_id, monitor)
128
+ rescue => error
129
+ Console.error(self, "Failed to handle memory leak!", child: {process_id: process_id}, exception: error)
130
+ end
117
131
  end
118
-
119
- sleep(@interval)
120
132
  end
121
133
  end
122
134
  end
@@ -4,6 +4,9 @@
4
4
  # Copyright, 2025, by Samuel Williams.
5
5
 
6
6
  require "process/metrics"
7
+ require_relative "loop"
8
+
9
+ require_relative "loop"
7
10
 
8
11
  module Async
9
12
  module Container
@@ -15,14 +18,14 @@ module Async
15
18
  # by tracking the parent process ID (ppid), which is more efficient than tracking
16
19
  # individual processes.
17
20
  class ProcessMonitor
18
- # Create a new process monitor.
19
- #
20
- # @parameter interval [Integer] The interval in seconds at which to log process metrics.
21
- # @parameter ppid [Integer] The parent process ID to monitor. If nil, uses the current process to capture its children.
22
- def initialize(interval: 60, ppid: nil)
23
- @interval = interval
24
- @ppid = ppid || Process.ppid
25
- end
21
+ # Create a new process monitor.
22
+ #
23
+ # @parameter interval [Integer] The interval in seconds at which to log process metrics.
24
+ # @parameter ppid [Integer] The parent process ID to monitor. If nil, uses the current process to capture its children.
25
+ def initialize(interval: 60, ppid: nil)
26
+ @interval = interval
27
+ @ppid = ppid || Process.ppid
28
+ end
26
29
 
27
30
  # @attribute [Integer] The parent process ID being monitored.
28
31
  attr :ppid
@@ -68,21 +71,19 @@ module Async
68
71
  # Periodically captures and logs process metrics for the entire process tree.
69
72
  #
70
73
  # @returns [Async::Task] The task that is running the process monitor.
71
- def run
72
- Async do
73
- while true
74
- metrics = self.metrics
75
-
76
- # Log each process individually for better searchability in log platforms:
77
- metrics.each do |process_id, general|
78
- Console.info(self, "Process metrics captured.", general: general)
74
+ def run
75
+ Async do
76
+ Loop.run(interval: @interval) do
77
+ metrics = self.metrics
78
+
79
+ # Log each process individually for better searchability in log platforms:
80
+ metrics.each do |process_id, general|
81
+ Console.info(self, "Process metrics captured.", general: general)
82
+ end
79
83
  end
80
-
81
- sleep(@interval)
82
84
  end
83
85
  end
84
86
  end
85
- end
86
87
  end
87
88
  end
88
89
  end
@@ -39,7 +39,9 @@ module Async
39
39
  # @parameter call [Connection::Call] The registration call.
40
40
  # @parameter call[:state] [Hash] The worker state to merge (e.g. process_id).
41
41
  def do_register(call)
42
- call.connection.state.merge!(call.message[:state])
42
+ if state = call.message[:state]
43
+ call.connection.state.merge!(state)
44
+ end
43
45
 
44
46
  connection_id = SecureRandom.uuid
45
47
  call.connection.state[:connection_id] = connection_id
@@ -52,7 +54,7 @@ module Async
52
54
  Console.error(self, "Error while registering process!", monitor: monitor, exception: error)
53
55
  end
54
56
  ensure
55
- call.finish
57
+ call.finish(connection_id: connection_id)
56
58
  end
57
59
 
58
60
  # Forward an operation to a worker connection.
@@ -9,7 +9,7 @@ module Async
9
9
  module Container
10
10
  # @namespace
11
11
  module Supervisor
12
- VERSION = "0.8.0"
12
+ VERSION = "0.9.0"
13
13
  end
14
14
  end
15
15
  end
@@ -25,25 +25,32 @@ module Async
25
25
  #
26
26
  # @parameter state [Hash] The worker state to register with the supervisor.
27
27
  # @parameter endpoint [IO::Endpoint] The supervisor endpoint to connect to.
28
- def initialize(state, endpoint: Supervisor.endpoint)
28
+ def initialize(state = nil, endpoint: Supervisor.endpoint)
29
+ super(endpoint: endpoint)
29
30
  @state = state
30
- @endpoint = endpoint
31
31
  end
32
32
 
33
33
  include Dispatchable
34
34
 
35
- private def dump(call)
35
+ private def dump(call, buffer: true)
36
36
  if path = call[:path]
37
37
  File.open(path, "w") do |file|
38
38
  yield file
39
39
  end
40
40
 
41
41
  call.finish(path: path)
42
- else
42
+ elsif buffer
43
43
  buffer = StringIO.new
44
44
  yield buffer
45
45
 
46
- call.finish(data: buffer.string)
46
+ if message = call[:log]
47
+ Console.info(self, message, data: buffer.string)
48
+ call.finish
49
+ else
50
+ call.finish(data: buffer.string)
51
+ end
52
+ else
53
+ call.fail(error: {message: "Buffered output not supported!"})
47
54
  end
48
55
  end
49
56
 
@@ -69,7 +76,7 @@ module Async
69
76
  def do_memory_dump(call)
70
77
  require "objspace"
71
78
 
72
- dump(call) do |file|
79
+ dump(call, buffer: false) do |file|
73
80
  ObjectSpace.dump_all(output: file)
74
81
  end
75
82
  end
@@ -109,13 +116,9 @@ module Async
109
116
 
110
117
  report = sampler.report
111
118
 
112
- # This is a temporary log to help with debugging:
113
- buffer = StringIO.new
114
- report.print(buffer)
115
- Console.info(self, "Memory sample completed.", report: buffer.string)
116
-
117
- # Generate a report focused on retained objects (likely leaks):
118
- call.finish(report: report)
119
+ dump(call) do |file|
120
+ file.puts(report.to_s)
121
+ end
119
122
  ensure
120
123
  GC.start
121
124
  end
@@ -164,6 +167,7 @@ module Async
164
167
 
165
168
  # Register the worker with the supervisor:
166
169
  connection.call(do: :register, state: @state)
170
+ # We ignore the response (it contains the `connection_id`).
167
171
  end
168
172
  end
169
173
  end
data/readme.md CHANGED
@@ -26,6 +26,13 @@ Please see the [project documentation](https://socketry.github.io/async-containe
26
26
 
27
27
  Please see the [project releases](https://socketry.github.io/async-container-supervisor/releases/index) for all releases.
28
28
 
29
+ ### v0.9.0
30
+
31
+ - Better handling of write failures in `Connection::Call.dispatch`, ensuring we don't leak calls.
32
+ - Robust monitor loop handling - restart on failure, and align loop iterations.
33
+ - Disable memory sampler by default and use text output format.
34
+ - Introduce support for redirecting dump output to logs.
35
+
29
36
  ### v0.8.0
30
37
 
31
38
  - Add `Async::Container::Supervisor::ProcessMonitor` for logging CPU and memory metrics periodically.
data/releases.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Releases
2
2
 
3
+ ## v0.9.0
4
+
5
+ - Better handling of write failures in `Connection::Call.dispatch`, ensuring we don't leak calls.
6
+ - Robust monitor loop handling - restart on failure, and align loop iterations.
7
+ - Disable memory sampler by default and use text output format.
8
+ - Introduce support for redirecting dump output to logs.
9
+
3
10
  ## v0.8.0
4
11
 
5
12
  - Add `Async::Container::Supervisor::ProcessMonitor` for logging CPU and memory metrics periodically.
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: async-container-supervisor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
@@ -123,6 +123,7 @@ files:
123
123
  - lib/async/container/supervisor/dispatchable.rb
124
124
  - lib/async/container/supervisor/endpoint.rb
125
125
  - lib/async/container/supervisor/environment.rb
126
+ - lib/async/container/supervisor/loop.rb
126
127
  - lib/async/container/supervisor/memory_monitor.rb
127
128
  - lib/async/container/supervisor/process_monitor.rb
128
129
  - lib/async/container/supervisor/server.rb
@@ -153,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
153
154
  - !ruby/object:Gem::Version
154
155
  version: '0'
155
156
  requirements: []
156
- rubygems_version: 3.7.2
157
+ rubygems_version: 3.6.9
157
158
  specification_version: 4
158
159
  summary: A supervisor for managing multiple container processes.
159
160
  test_files: []
metadata.gz.sig CHANGED
Binary file