rspec-agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/rspec-agents +24 -0
- data/lib/async_workers/channel_config.rb +34 -0
- data/lib/async_workers/doc/process_manager_design.md +512 -0
- data/lib/async_workers/errors.rb +21 -0
- data/lib/async_workers/managed_process.rb +284 -0
- data/lib/async_workers/output_stream.rb +86 -0
- data/lib/async_workers/rpc_channel.rb +159 -0
- data/lib/async_workers/transport/base.rb +57 -0
- data/lib/async_workers/transport/stdio_transport.rb +91 -0
- data/lib/async_workers/transport/unix_socket_transport.rb +112 -0
- data/lib/async_workers/worker_group.rb +175 -0
- data/lib/async_workers.rb +17 -0
- data/lib/rspec/agents/agent_response.rb +61 -0
- data/lib/rspec/agents/agents/base.rb +123 -0
- data/lib/rspec/agents/cli.rb +342 -0
- data/lib/rspec/agents/conversation.rb +308 -0
- data/lib/rspec/agents/criterion.rb +237 -0
- data/lib/rspec/agents/doc/2026_01_22_observer-system-design.md +757 -0
- data/lib/rspec/agents/doc/2026_01_23_parallel_spec_runner-design.md +1060 -0
- data/lib/rspec/agents/doc/2026_01_27_event_serialization-design.md +294 -0
- data/lib/rspec/agents/doc/2026_01_27_experiment_aggregation_design.md +831 -0
- data/lib/rspec/agents/doc/2026_01_29_rspec-agents-studio-design.md +1332 -0
- data/lib/rspec/agents/doc/2026_01_29_testing-framework-design.md +1037 -0
- data/lib/rspec/agents/doc/2026_02_04-parallel-runner-ui.md +537 -0
- data/lib/rspec/agents/doc/2026_02_05_html_renderer_extensions.md +708 -0
- data/lib/rspec/agents/doc/scenario_guide.md +289 -0
- data/lib/rspec/agents/dsl/agent_proxy.rb +141 -0
- data/lib/rspec/agents/dsl/criterion_definition.rb +78 -0
- data/lib/rspec/agents/dsl/graph_builder.rb +38 -0
- data/lib/rspec/agents/dsl/runner_factory.rb +52 -0
- data/lib/rspec/agents/dsl/scenario_set_dsl.rb +166 -0
- data/lib/rspec/agents/dsl/test_context.rb +223 -0
- data/lib/rspec/agents/dsl/user_proxy.rb +71 -0
- data/lib/rspec/agents/dsl.rb +398 -0
- data/lib/rspec/agents/evaluation_result.rb +44 -0
- data/lib/rspec/agents/event_bus.rb +78 -0
- data/lib/rspec/agents/events.rb +141 -0
- data/lib/rspec/agents/isolated_event_bus.rb +86 -0
- data/lib/rspec/agents/judge.rb +244 -0
- data/lib/rspec/agents/llm/anthropic.rb +143 -0
- data/lib/rspec/agents/llm/base.rb +64 -0
- data/lib/rspec/agents/llm/mock.rb +181 -0
- data/lib/rspec/agents/llm/response.rb +52 -0
- data/lib/rspec/agents/matchers.rb +554 -0
- data/lib/rspec/agents/message.rb +81 -0
- data/lib/rspec/agents/metadata.rb +120 -0
- data/lib/rspec/agents/observers/base.rb +70 -0
- data/lib/rspec/agents/observers/parallel_terminal_observer.rb +151 -0
- data/lib/rspec/agents/observers/rpc_notify_observer.rb +43 -0
- data/lib/rspec/agents/observers/terminal_observer.rb +103 -0
- data/lib/rspec/agents/parallel/controller.rb +284 -0
- data/lib/rspec/agents/parallel/example_discovery.rb +153 -0
- data/lib/rspec/agents/parallel/partitioner.rb +31 -0
- data/lib/rspec/agents/parallel/run_result.rb +22 -0
- data/lib/rspec/agents/parallel/ui/interactive_ui.rb +605 -0
- data/lib/rspec/agents/parallel/ui/interleaved_ui.rb +139 -0
- data/lib/rspec/agents/parallel/ui/output_adapter.rb +127 -0
- data/lib/rspec/agents/parallel/ui/quiet_ui.rb +100 -0
- data/lib/rspec/agents/parallel/ui/ui_factory.rb +53 -0
- data/lib/rspec/agents/parallel/ui/ui_mode.rb +101 -0
- data/lib/rspec/agents/prompt_builders/base.rb +113 -0
- data/lib/rspec/agents/prompt_builders/criterion_evaluation.rb +136 -0
- data/lib/rspec/agents/prompt_builders/goal_achievement_evaluation.rb +142 -0
- data/lib/rspec/agents/prompt_builders/grounding_evaluation.rb +172 -0
- data/lib/rspec/agents/prompt_builders/intent_evaluation.rb +111 -0
- data/lib/rspec/agents/prompt_builders/topic_classification.rb +105 -0
- data/lib/rspec/agents/prompt_builders/user_simulation.rb +131 -0
- data/lib/rspec/agents/runners/headless_runner.rb +272 -0
- data/lib/rspec/agents/runners/parallel_terminal_runner.rb +220 -0
- data/lib/rspec/agents/runners/terminal_runner.rb +186 -0
- data/lib/rspec/agents/runners/user_simulator.rb +261 -0
- data/lib/rspec/agents/scenario.rb +133 -0
- data/lib/rspec/agents/scenario_loader.rb +145 -0
- data/lib/rspec/agents/serialization/conversation_renderer.rb +161 -0
- data/lib/rspec/agents/serialization/extension.rb +199 -0
- data/lib/rspec/agents/serialization/extensions/core_extension.rb +66 -0
- data/lib/rspec/agents/serialization/presenters.rb +281 -0
- data/lib/rspec/agents/serialization/run_data_aggregator.rb +197 -0
- data/lib/rspec/agents/serialization/run_data_builder.rb +189 -0
- data/lib/rspec/agents/serialization/templates/_alpine.min.js +5 -0
- data/lib/rspec/agents/serialization/templates/_base_components.css +196 -0
- data/lib/rspec/agents/serialization/templates/_base_components.js +46 -0
- data/lib/rspec/agents/serialization/templates/_conversation_fragment.html.haml +34 -0
- data/lib/rspec/agents/serialization/templates/_metadata_default.html.haml +17 -0
- data/lib/rspec/agents/serialization/templates/_scripts.js +89 -0
- data/lib/rspec/agents/serialization/templates/_styles.css +1211 -0
- data/lib/rspec/agents/serialization/templates/conversation_document.html.haml +29 -0
- data/lib/rspec/agents/serialization/templates/test_suite.html.haml +238 -0
- data/lib/rspec/agents/serialization/test_suite_renderer.rb +207 -0
- data/lib/rspec/agents/serialization.rb +374 -0
- data/lib/rspec/agents/simulator_config.rb +336 -0
- data/lib/rspec/agents/spec_executor.rb +494 -0
- data/lib/rspec/agents/stable_example_id.rb +147 -0
- data/lib/rspec/agents/templates/user_simulation.erb +9 -0
- data/lib/rspec/agents/tool_call.rb +53 -0
- data/lib/rspec/agents/topic.rb +307 -0
- data/lib/rspec/agents/topic_graph.rb +236 -0
- data/lib/rspec/agents/triggers.rb +122 -0
- data/lib/rspec/agents/turn.rb +63 -0
- data/lib/rspec/agents/turn_executor.rb +91 -0
- data/lib/rspec/agents/version.rb +7 -0
- data/lib/rspec/agents.rb +145 -0
- metadata +242 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 123d6a20179dda36dcfd3020a74c12170dde1a512ab4d8a28d6499225793a8b0
|
|
4
|
+
data.tar.gz: d985272c0f2cb2492880a47fb9818de9f3ada465daf8da4308a0691e1e714359
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 0c6a8dcef1c0aad57ed56a11d4d672785cd843dc721ce6dc5f2d9a94ff7a0d9ad7a2e9c6646397f63be95947954e3c7d876cc266af53b7fc3064e0de081857db
|
|
7
|
+
data.tar.gz: f1f8f75d7a3f80a281791d7098eefb633e276f8ccf7e854b786423801e172ceaabd276c6d80649bf87667e81a25cbc2b791adf77ccd62a0bb431e4b0467af5e5
|
data/bin/rspec-agents
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Unified CLI for rspec-agents
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# rspec-agents [run] [options] [paths...] # Single-process (default)
|
|
8
|
+
# rspec-agents parallel [options] [paths...] # Parallel with workers
|
|
9
|
+
# rspec-agents render <json_file> [options] # Render HTML from JSON
|
|
10
|
+
# rspec-agents worker # Internal: worker mode
|
|
11
|
+
|
|
12
|
+
# macOS fork safety - must be set before any Objective-C code loads
|
|
13
|
+
ENV["OBJC_DISABLE_INITIALIZE_FORK_SAFETY"] = "YES"
|
|
14
|
+
|
|
15
|
+
# Sync output streams for worker mode
|
|
16
|
+
$stdout.sync = true
|
|
17
|
+
$stderr.sync = true
|
|
18
|
+
|
|
19
|
+
require "rspec/agents"
|
|
20
|
+
require "rspec/agents/cli"
|
|
21
|
+
|
|
22
|
+
# Run CLI
|
|
23
|
+
exit_code = RSpec::Agents::CLI.run(ARGV)
|
|
24
|
+
exit(exit_code || 0)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AsyncWorkers
|
|
4
|
+
# Configuration for RPC channel mode
|
|
5
|
+
# Immutable value object using Data.define
|
|
6
|
+
ChannelConfig = Data.define(:mode, :options) do
|
|
7
|
+
# RPC over stdin/stdout, logs on stderr only
|
|
8
|
+
def self.stdio_rpc
|
|
9
|
+
new(mode: :stdio_rpc, options: {})
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# RPC over unix domain socket, logs on stdout/stderr
|
|
13
|
+
def self.unix_socket_rpc
|
|
14
|
+
new(mode: :unix_socket_rpc, options: {})
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# No RPC, just output capture
|
|
18
|
+
def self.no_rpc
|
|
19
|
+
new(mode: :no_rpc, options: {})
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def rpc_enabled?
|
|
23
|
+
mode != :no_rpc
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def stdio?
|
|
27
|
+
mode == :stdio_rpc
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def unix_socket?
|
|
31
|
+
mode == :unix_socket_rpc
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
# Process Management Library Design Document
|
|
2
|
+
|
|
3
|
+
A Ruby library for managing child processes with structured communication, built on the `async` gem and Ruby fibers.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This library provides a high-level abstraction for spawning and managing child processes with support for bidirectional JSON-based RPC communication, output streaming, health monitoring, and coordinated worker groups.
|
|
8
|
+
|
|
9
|
+
### Goals
|
|
10
|
+
|
|
11
|
+
- Spawn and manage child processes with full lifecycle control
|
|
12
|
+
- Support JSON-based RPC over multiple transport mechanisms
|
|
13
|
+
- Capture and stream process output (stdout/stderr)
|
|
14
|
+
- Detect process failures and crashes
|
|
15
|
+
- Coordinate multiple workers in a fan-out pattern
|
|
16
|
+
- Integrate cleanly with the `async` ecosystem
|
|
17
|
+
|
|
18
|
+
### Non-Goals
|
|
19
|
+
|
|
20
|
+
- Automatic restart/supervision (caller's responsibility)
|
|
21
|
+
- Inter-worker dependencies or communication
|
|
22
|
+
- Work distribution strategies (push/pull queues)
|
|
23
|
+
- Persistent message queues
|
|
24
|
+
|
|
25
|
+
## Architecture
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
29
|
+
│ WorkerGroup │
|
|
30
|
+
│ - Spawns N workers │
|
|
31
|
+
│ - Fail-fast on any worker failure │
|
|
32
|
+
│ - Provides access to individual workers │
|
|
33
|
+
└─────────────────────────────────────┬───────────────────────────┘
|
|
34
|
+
│
|
|
35
|
+
┌─────────────────────────────┼─────────────────────────────┐
|
|
36
|
+
│ │ │
|
|
37
|
+
┌────▼─────┐ ┌───────▼──┐ ┌─────────▼┐
|
|
38
|
+
│ Worker 0 │ │ Worker 1 │ │ Worker N │
|
|
39
|
+
│ │ │ │ │ │
|
|
40
|
+
│ .rpc │ │ .rpc │ │ .rpc │
|
|
41
|
+
│ .stderr │ │ .stderr │ │ .stderr │
|
|
42
|
+
│ .stdout │ │ .stdout │ │ .stdout │
|
|
43
|
+
└────┬─────┘ └────┬─────┘ └────┬─────┘
|
|
44
|
+
│ │ │
|
|
45
|
+
┌────▼─────┐ ┌────▼─────┐ ┌────▼─────┐
|
|
46
|
+
│ Managed │ │ Managed │ │ Managed │
|
|
47
|
+
│ Process │ │ Process │ │ Process │
|
|
48
|
+
└────┬─────┘ └────┴─────┘ └────┬─────┘
|
|
49
|
+
│ │ │
|
|
50
|
+
┌────▼─────┐ ┌────▼─────┐ ┌────▼─────┐
|
|
51
|
+
│Transport │ │Transport │ │Transport │
|
|
52
|
+
│(stdio/ │ │(stdio/ │ │(stdio/ │
|
|
53
|
+
│ socket) │ │ socket) │ │ socket) │
|
|
54
|
+
└──────────┘ └──────────┘ └──────────┘
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Layer Responsibilities
|
|
58
|
+
|
|
59
|
+
| Layer | Responsibility |
|
|
60
|
+
|-------|----------------|
|
|
61
|
+
| **WorkerGroup** | Spawns multiple workers, fail-fast coordination, provides access to individual workers |
|
|
62
|
+
| **ManagedProcess** | Process lifecycle (spawn, stop, kill), health monitoring, output stream management |
|
|
63
|
+
| **RpcChannel** | Message correlation (request/response), notification handling, graceful shutdown protocol |
|
|
64
|
+
| **Transport** | Raw I/O over stdio or unix sockets |
|
|
65
|
+
| **OutputStream** | Unified callback and iterator interface for streaming data |
|
|
66
|
+
|
|
67
|
+
## Core Components
|
|
68
|
+
|
|
69
|
+
### ChannelConfig
|
|
70
|
+
|
|
71
|
+
Configures how RPC communication is established with the child process.
|
|
72
|
+
|
|
73
|
+
```ruby
|
|
74
|
+
# RPC over stdin/stdout, logs on stderr only
|
|
75
|
+
ChannelConfig.stdio_rpc
|
|
76
|
+
|
|
77
|
+
# RPC over unix domain socket, logs on stdout/stderr
|
|
78
|
+
ChannelConfig.unix_socket_rpc
|
|
79
|
+
|
|
80
|
+
# No RPC, just output capture
|
|
81
|
+
ChannelConfig.no_rpc
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
| Mode | RPC Channel | Log Channels | Child Receives |
|
|
85
|
+
|------|-------------|--------------|----------------|
|
|
86
|
+
| `stdio_rpc` | stdin/stdout | stderr | Messages on stdin |
|
|
87
|
+
| `unix_socket_rpc` | Unix domain socket | stdout, stderr | `RPC_SOCKET_FD` env var |
|
|
88
|
+
| `no_rpc` | None | stdout, stderr | Nothing special |
|
|
89
|
+
|
|
90
|
+
### ManagedProcess
|
|
91
|
+
|
|
92
|
+
Wraps a single child process with lifecycle management and communication.
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
process = ManagedProcess.new(
|
|
96
|
+
command: ['ruby', 'worker.rb', '--verbose'],
|
|
97
|
+
env: { 'DEBUG' => '1', 'WORKER_ID' => '0' },
|
|
98
|
+
rpc: ChannelConfig.stdio_rpc
|
|
99
|
+
)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
#### Attributes
|
|
103
|
+
|
|
104
|
+
| Attribute | Type | Description |
|
|
105
|
+
|-----------|------|-------------|
|
|
106
|
+
| `pid` | `Integer` | OS process ID |
|
|
107
|
+
| `status` | `Symbol` | `:pending`, `:running`, `:stopping`, `:exited` |
|
|
108
|
+
| `exit_status` | `Process::Status` | Exit status (nil until exited) |
|
|
109
|
+
| `rpc` | `RpcChannel` | RPC interface (nil if `no_rpc`) |
|
|
110
|
+
| `stderr` | `OutputStream` | Stderr line stream |
|
|
111
|
+
| `stdout` | `OutputStream` | Stdout line stream (empty if RPC uses stdio) |
|
|
112
|
+
|
|
113
|
+
#### Methods
|
|
114
|
+
|
|
115
|
+
| Method | Description |
|
|
116
|
+
|--------|-------------|
|
|
117
|
+
| `start(task:)` | Spawn process and begin monitoring |
|
|
118
|
+
| `stop(timeout: 5)` | Graceful shutdown: RPC shutdown → SIGTERM → SIGKILL |
|
|
119
|
+
| `kill` | Immediate SIGKILL |
|
|
120
|
+
| `send_signal(signal)` | Send arbitrary signal |
|
|
121
|
+
| `alive?` | Check if process is running |
|
|
122
|
+
| `wait` | Block (yield fiber) until process exits |
|
|
123
|
+
| `wait(timeout:)` | Block until exit or timeout (raises `Async::TimeoutError`) |
|
|
124
|
+
| `on_exit { \|status\| }` | Register exit callback |
|
|
125
|
+
|
|
126
|
+
### RpcChannel
|
|
127
|
+
|
|
128
|
+
Handles JSON message framing, request/response correlation, and notifications.
|
|
129
|
+
|
|
130
|
+
#### Methods
|
|
131
|
+
|
|
132
|
+
| Method | Description |
|
|
133
|
+
|--------|-------------|
|
|
134
|
+
| `request(payload, timeout: nil)` | Send request, wait for response |
|
|
135
|
+
| `notify(payload)` | Send fire-and-forget message |
|
|
136
|
+
| `shutdown(timeout: 5)` | Request graceful shutdown via protocol |
|
|
137
|
+
| `notifications` | `OutputStream` of incoming notifications |
|
|
138
|
+
| `on_notification { \|msg\| }` | Callback for notifications (convenience) |
|
|
139
|
+
| `closed?` | Check if channel is closed |
|
|
140
|
+
|
|
141
|
+
> **Note:** `rpc.shutdown` sends the shutdown message and awaits acknowledgment. It does not affect process state — use `process.stop` for full lifecycle management, which calls `rpc.shutdown` internally as the first step.
|
|
142
|
+
|
|
143
|
+
### OutputStream
|
|
144
|
+
|
|
145
|
+
Unified interface for consuming streaming data via callbacks or iteration.
|
|
146
|
+
|
|
147
|
+
```ruby
|
|
148
|
+
# Callback style - inline handling
|
|
149
|
+
stream.on_data { |item| puts item }
|
|
150
|
+
|
|
151
|
+
# Iterator style - blocking, use in dedicated task
|
|
152
|
+
stream.each { |item| puts item }
|
|
153
|
+
|
|
154
|
+
# Enumerable
|
|
155
|
+
stream.each.take(10)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Both styles can be used simultaneously on the same stream.
|
|
159
|
+
|
|
160
|
+
| Method | Description |
|
|
161
|
+
|--------|-------------|
|
|
162
|
+
| `on_data { \|item\| }` | Register callback (can register multiple) |
|
|
163
|
+
| `each { \|item\| }` | Blocking iterator, yields until stream closes |
|
|
164
|
+
| `closed?` | Check if stream is closed |
|
|
165
|
+
|
|
166
|
+
### WorkerGroup
|
|
167
|
+
|
|
168
|
+
Coordinates multiple identical workers in a fan-out pattern.
|
|
169
|
+
|
|
170
|
+
```ruby
|
|
171
|
+
group = WorkerGroup.new(
|
|
172
|
+
size: 4,
|
|
173
|
+
command: ['ruby', 'worker.rb'],
|
|
174
|
+
env: { 'MODE' => 'batch' },
|
|
175
|
+
rpc: ChannelConfig.stdio_rpc
|
|
176
|
+
)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
#### Behavior
|
|
180
|
+
|
|
181
|
+
- All workers run the same command
|
|
182
|
+
- Each worker receives `WORKER_INDEX` env var (0, 1, 2, ...)
|
|
183
|
+
- If any worker exits with non-zero status, all other workers are killed immediately
|
|
184
|
+
- No automatic restart - caller handles recovery
|
|
185
|
+
|
|
186
|
+
#### Methods
|
|
187
|
+
|
|
188
|
+
| Method | Description |
|
|
189
|
+
|--------|-------------|
|
|
190
|
+
| `start(task:)` | Spawn all workers |
|
|
191
|
+
| `workers` | Returns array of all workers (also aliased as `to_a`) |
|
|
192
|
+
| `[index]` | Access worker by index |
|
|
193
|
+
| `each { \|worker\| }` | Iterate over workers |
|
|
194
|
+
| `size` | Number of workers |
|
|
195
|
+
| `stop(timeout: 5)` | Graceful shutdown of all workers (parallel) |
|
|
196
|
+
| `kill` | Immediate kill of all workers |
|
|
197
|
+
| `alive?` | True if all workers are running |
|
|
198
|
+
| `failed?` | True if any worker has failed |
|
|
199
|
+
| `failure` | The `WorkerFailure` exception (or nil) |
|
|
200
|
+
| `wait_for_failure` | Block until a worker fails |
|
|
201
|
+
|
|
202
|
+
`WorkerGroup` includes `Enumerable`, providing `map`, `select`, `each_with_index`, etc.
|
|
203
|
+
|
|
204
|
+
## Protocol Specification
|
|
205
|
+
|
|
206
|
+
### Message Format
|
|
207
|
+
|
|
208
|
+
Messages are newline-delimited JSON objects.
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
{"id":"uuid-1","action":"compute","x":42}\n
|
|
212
|
+
{"id":"uuid-2","reply_to":"uuid-1","result":84}\n
|
|
213
|
+
{"type":"progress","percent":50}\n
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### Message Fields
|
|
217
|
+
|
|
218
|
+
| Field | Required | Description |
|
|
219
|
+
|-------|----------|-------------|
|
|
220
|
+
| `id` | No | Message identifier for correlation |
|
|
221
|
+
| `reply_to` | No | References the `id` of the request being answered |
|
|
222
|
+
| `...` | - | Arbitrary payload fields |
|
|
223
|
+
|
|
224
|
+
### Message Types
|
|
225
|
+
|
|
226
|
+
| Has `id` | Has `reply_to` | Type | Description |
|
|
227
|
+
|----------|----------------|------|-------------|
|
|
228
|
+
| Yes | No | Request | Expects a response |
|
|
229
|
+
| No | No | Notification (outbound) | Fire-and-forget to child |
|
|
230
|
+
| - | Yes | Response | Reply to a request |
|
|
231
|
+
| No | No | Notification (inbound) | Unsolicited message from child |
|
|
232
|
+
|
|
233
|
+
### Graceful Shutdown Protocol
|
|
234
|
+
|
|
235
|
+
The parent sends a shutdown request:
|
|
236
|
+
|
|
237
|
+
```json
|
|
238
|
+
{"id":"shutdown-1","action":"__shutdown__"}
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
The child should:
|
|
242
|
+
1. Stop accepting new work
|
|
243
|
+
2. Complete or abort in-flight work
|
|
244
|
+
3. Send response: `{"reply_to":"shutdown-1","status":"shutting_down"}`
|
|
245
|
+
4. Exit cleanly
|
|
246
|
+
|
|
247
|
+
If the child doesn't respond within timeout, SIGTERM is sent, followed by SIGKILL.
|
|
248
|
+
|
|
249
|
+
## Transport Details
|
|
250
|
+
|
|
251
|
+
### stdio Transport
|
|
252
|
+
|
|
253
|
+
- Parent writes to child's stdin
|
|
254
|
+
- Parent reads from child's stdout
|
|
255
|
+
- stderr is separate, always captured as log output
|
|
256
|
+
- Simplest setup, no filesystem artifacts
|
|
257
|
+
|
|
258
|
+
### Unix Socket Transport
|
|
259
|
+
|
|
260
|
+
- Parent creates socket pair before spawning using `socketpair()`
|
|
261
|
+
- Both ends created atomically — no race conditions
|
|
262
|
+
- Child inherits one end via file descriptor
|
|
263
|
+
- Child receives `RPC_SOCKET_FD` environment variable
|
|
264
|
+
- Bidirectional on single socket
|
|
265
|
+
- stdout and stderr both available for logging
|
|
266
|
+
|
|
267
|
+
## Health Monitoring
|
|
268
|
+
|
|
269
|
+
The library uses two complementary mechanisms:
|
|
270
|
+
|
|
271
|
+
1. **Process status polling** - Periodic `Process.waitpid(pid, WNOHANG)` to detect exits
|
|
272
|
+
2. **File descriptor closure** - EOF on transport streams indicates process termination
|
|
273
|
+
|
|
274
|
+
No heartbeat protocol is required. The polling interval is 500ms by default.
|
|
275
|
+
|
|
276
|
+
When an exit is detected, an internal `Async::Condition` is signaled, waking any fibers blocked on `wait`.
|
|
277
|
+
|
|
278
|
+
## Error Handling
|
|
279
|
+
|
|
280
|
+
### ChannelClosedError
|
|
281
|
+
|
|
282
|
+
Raised when attempting to send on a closed RPC channel, or when a pending request's channel closes.
|
|
283
|
+
|
|
284
|
+
### WorkerFailure
|
|
285
|
+
|
|
286
|
+
Raised by `WorkerGroup` when any worker exits with non-zero status. Contains:
|
|
287
|
+
- `worker_index` - Which worker failed
|
|
288
|
+
- `exit_status` - The `Process::Status` object
|
|
289
|
+
|
|
290
|
+
### Timeout Handling
|
|
291
|
+
|
|
292
|
+
Timeouts are specified per-request only:
|
|
293
|
+
|
|
294
|
+
```ruby
|
|
295
|
+
process.rpc.request(payload, timeout: 30) # raises Async::TimeoutError
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
## Usage Examples
|
|
299
|
+
|
|
300
|
+
### Single Process with RPC
|
|
301
|
+
|
|
302
|
+
```ruby
|
|
303
|
+
Async do |task|
|
|
304
|
+
process = ManagedProcess.new(
|
|
305
|
+
command: ['ruby', 'worker.rb'],
|
|
306
|
+
env: { 'DEBUG' => '1' },
|
|
307
|
+
rpc: ChannelConfig.stdio_rpc
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
process.stderr.on_data { |line| logger.info("[worker] #{line}") }
|
|
311
|
+
process.on_exit { |status| logger.info("Worker exited: #{status}") }
|
|
312
|
+
|
|
313
|
+
process.start(task: task)
|
|
314
|
+
|
|
315
|
+
result = process.rpc.request({ action: 'compute', x: 42 }, timeout: 10)
|
|
316
|
+
|
|
317
|
+
process.stop
|
|
318
|
+
end
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Consuming Notifications via Iterator
|
|
322
|
+
|
|
323
|
+
```ruby
|
|
324
|
+
Async do |task|
|
|
325
|
+
process = ManagedProcess.new(
|
|
326
|
+
command: ['ruby', 'worker.rb'],
|
|
327
|
+
rpc: ChannelConfig.stdio_rpc
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
process.start(task: task)
|
|
331
|
+
|
|
332
|
+
# Dedicated task for notifications
|
|
333
|
+
task.async do
|
|
334
|
+
process.rpc.notifications.each do |msg|
|
|
335
|
+
case msg.payload[:type]
|
|
336
|
+
when 'progress'
|
|
337
|
+
update_progress_bar(msg.payload[:percent])
|
|
338
|
+
when 'log'
|
|
339
|
+
logger.info(msg.payload[:message])
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Dedicated task for stderr
|
|
345
|
+
task.async do
|
|
346
|
+
process.stderr.each { |line| logger.debug(line) }
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Main work
|
|
350
|
+
process.rpc.request({ action: 'long_running_task' })
|
|
351
|
+
|
|
352
|
+
process.stop
|
|
353
|
+
end
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
### Worker Group Fan-Out
|
|
357
|
+
|
|
358
|
+
```ruby
|
|
359
|
+
Async do |task|
|
|
360
|
+
group = WorkerGroup.new(
|
|
361
|
+
size: 4,
|
|
362
|
+
command: ['ruby', 'worker.rb'],
|
|
363
|
+
rpc: ChannelConfig.stdio_rpc
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
group.start(task: task)
|
|
367
|
+
|
|
368
|
+
# Set up output handlers
|
|
369
|
+
group.each_with_index do |worker, i|
|
|
370
|
+
worker.stderr.on_data { |line| logger.info("[worker-#{i}] #{line}") }
|
|
371
|
+
worker.rpc.on_notification { |msg| handle_notification(i, msg) }
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Fan-out work
|
|
375
|
+
work_items = ['a.txt', 'b.txt', 'c.txt', 'd.txt']
|
|
376
|
+
|
|
377
|
+
results = Async do |inner|
|
|
378
|
+
tasks = group.workers.zip(work_items).map do |worker, file|
|
|
379
|
+
inner.async do
|
|
380
|
+
worker.rpc.request({ action: 'process', file: file }, timeout: 30)
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
tasks.map(&:wait)
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
group.stop
|
|
387
|
+
|
|
388
|
+
rescue WorkerGroup::WorkerFailure => e
|
|
389
|
+
logger.error("Worker #{e.worker_index} failed")
|
|
390
|
+
# Group already killed remaining workers
|
|
391
|
+
end
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
### Output-Only Process (No RPC)
|
|
395
|
+
|
|
396
|
+
```ruby
|
|
397
|
+
Async do |task|
|
|
398
|
+
process = ManagedProcess.new(
|
|
399
|
+
command: ['./batch_job.sh', 'input.csv'],
|
|
400
|
+
rpc: ChannelConfig.no_rpc
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
lines = []
|
|
404
|
+
process.stdout.on_data { |line| lines << line }
|
|
405
|
+
process.stderr.on_data { |line| logger.warn(line) }
|
|
406
|
+
|
|
407
|
+
process.start(task: task)
|
|
408
|
+
|
|
409
|
+
# Wait for completion (yields fiber, no polling)
|
|
410
|
+
process.wait
|
|
411
|
+
|
|
412
|
+
puts "Captured #{lines.size} output lines"
|
|
413
|
+
puts "Exit status: #{process.exit_status}"
|
|
414
|
+
end
|
|
415
|
+
```
|
|
416
|
+
|
|
417
|
+
### Waiting with Timeout
|
|
418
|
+
|
|
419
|
+
```ruby
|
|
420
|
+
Async do |task|
|
|
421
|
+
process = ManagedProcess.new(
|
|
422
|
+
command: ['./slow_job.sh'],
|
|
423
|
+
rpc: ChannelConfig.no_rpc
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
process.start(task: task)
|
|
427
|
+
|
|
428
|
+
begin
|
|
429
|
+
process.wait(timeout: 30)
|
|
430
|
+
puts "Completed: #{process.exit_status}"
|
|
431
|
+
rescue Async::TimeoutError
|
|
432
|
+
logger.warn("Process timed out, killing")
|
|
433
|
+
process.kill
|
|
434
|
+
end
|
|
435
|
+
end
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
## Child Process Implementation Guide
|
|
439
|
+
|
|
440
|
+
Child processes must implement the protocol to communicate with the parent.
|
|
441
|
+
|
|
442
|
+
### Detecting Transport Mode
|
|
443
|
+
|
|
444
|
+
```ruby
|
|
445
|
+
if ENV['RPC_SOCKET_FD']
|
|
446
|
+
# Unix socket mode
|
|
447
|
+
socket = IO.for_fd(ENV['RPC_SOCKET_FD'].to_i)
|
|
448
|
+
run(input: socket, output: socket)
|
|
449
|
+
else
|
|
450
|
+
# stdio mode (default)
|
|
451
|
+
$stdout.sync = true
|
|
452
|
+
run(input: $stdin, output: $stdout)
|
|
453
|
+
end
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
### Message Handling Loop
|
|
457
|
+
|
|
458
|
+
```ruby
|
|
459
|
+
def run(input:, output:)
|
|
460
|
+
running = true
|
|
461
|
+
|
|
462
|
+
while running && (line = input.gets)
|
|
463
|
+
msg = JSON.parse(line.chomp, symbolize_names: true)
|
|
464
|
+
|
|
465
|
+
if msg[:action] == '__shutdown__'
|
|
466
|
+
running = false
|
|
467
|
+
send_response(output, msg[:id], { status: 'shutting_down' })
|
|
468
|
+
next
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
result = process_message(msg)
|
|
472
|
+
send_response(output, msg[:id], result) if msg[:id]
|
|
473
|
+
end
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
def send_response(output, request_id, payload)
|
|
477
|
+
response = payload.merge(reply_to: request_id)
|
|
478
|
+
output.puts(response.to_json)
|
|
479
|
+
output.flush
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
def send_notification(output, payload)
|
|
483
|
+
output.puts(payload.to_json)
|
|
484
|
+
output.flush
|
|
485
|
+
end
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
### Sending Progress Notifications
|
|
489
|
+
|
|
490
|
+
```ruby
|
|
491
|
+
def process_message(msg)
|
|
492
|
+
case msg[:action]
|
|
493
|
+
when 'long_task'
|
|
494
|
+
msg[:items].each_with_index do |item, i|
|
|
495
|
+
# Send progress notification (no id = notification)
|
|
496
|
+
send_notification(@output, {
|
|
497
|
+
type: 'progress',
|
|
498
|
+
percent: ((i + 1) * 100.0 / msg[:items].size).round
|
|
499
|
+
})
|
|
500
|
+
|
|
501
|
+
process_item(item)
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
{ status: 'complete' }
|
|
505
|
+
end
|
|
506
|
+
end
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
## Dependencies
|
|
510
|
+
|
|
511
|
+
- `async` - Fiber-based concurrency
|
|
512
|
+
- Ruby stdlib: `open3`, `json`, `socket`
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AsyncWorkers
|
|
4
|
+
# Base error class for all AsyncWorkers errors
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Raised when attempting to send on a closed RPC channel,
|
|
8
|
+
# or when a pending request's channel closes
|
|
9
|
+
class ChannelClosedError < Error; end
|
|
10
|
+
|
|
11
|
+
# Raised by WorkerGroup when any worker exits with non-zero status
|
|
12
|
+
class WorkerFailure < Error
|
|
13
|
+
attr_reader :worker_index, :exit_status
|
|
14
|
+
|
|
15
|
+
def initialize(worker_index:, exit_status:)
|
|
16
|
+
@worker_index = worker_index
|
|
17
|
+
@exit_status = exit_status
|
|
18
|
+
super("Worker #{worker_index} failed with status #{exit_status.exitstatus}")
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|