@gotgenes/pi-subagents 11.0.1 → 11.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/docs/architecture/architecture.md +20 -15
- package/docs/plans/0230-extract-concurrency-queue.md +265 -0
- package/docs/plans/0232-agent-resume-internal-observer-lifecycle.md +180 -0
- package/docs/retro/0230-extract-concurrency-queue.md +38 -0
- package/docs/retro/0232-agent-resume-internal-observer-lifecycle.md +45 -0
- package/package.json +1 -1
- package/src/index.ts +15 -3
- package/src/lifecycle/agent-manager.ts +23 -69
- package/src/lifecycle/agent.ts +35 -1
- package/src/lifecycle/concurrency-queue.ts +63 -0
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [11.2.0](https://github.com/gotgenes/pi-packages/compare/pi-subagents-v11.1.0...pi-subagents-v11.2.0) (2026-05-28)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* add Agent.resume() with internal observer lifecycle ([6cffb47](https://github.com/gotgenes/pi-packages/commit/6cffb47079e385b0ccd12e358c12357291be2ef0))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
### Bug Fixes
|
|
17
|
+
|
|
18
|
+
* release abort-signal listener when worktree setup fails ([ce2cac6](https://github.com/gotgenes/pi-packages/commit/ce2cac6788ffc90316f759e40e4df29576a70128))
|
|
19
|
+
|
|
20
|
+
## [11.1.0](https://github.com/gotgenes/pi-packages/compare/pi-subagents-v11.0.1...pi-subagents-v11.1.0) (2026-05-28)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
### Features
|
|
24
|
+
|
|
25
|
+
* **pi-subagents:** add ConcurrencyQueue class ([#230](https://github.com/gotgenes/pi-packages/issues/230)) ([9fff9b7](https://github.com/gotgenes/pi-packages/commit/9fff9b7fc318ad8bf5ac3a218ee7bf1c5e11104b))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
### Documentation
|
|
29
|
+
|
|
30
|
+
* **pi-subagents:** update architecture for ConcurrencyQueue extraction ([#230](https://github.com/gotgenes/pi-packages/issues/230)) ([4bd69e1](https://github.com/gotgenes/pi-packages/commit/4bd69e16164132400c7e0f9e4ecfd9f41842247a))
|
|
31
|
+
|
|
8
32
|
## [11.0.1](https://github.com/gotgenes/pi-packages/compare/pi-subagents-v11.0.0...pi-subagents-v11.0.1) (2026-05-28)
|
|
9
33
|
|
|
10
34
|
|
|
@@ -53,7 +53,8 @@ flowchart TB
|
|
|
53
53
|
|
|
54
54
|
subgraph lifecycle["Lifecycle domain"]
|
|
55
55
|
direction TB
|
|
56
|
-
AgentManager["AgentManager<br/>(spawn,
|
|
56
|
+
AgentManager["AgentManager<br/>(spawn, abort, collection)"]
|
|
57
|
+
ConcurrencyQueue["ConcurrencyQueue<br/>(scheduling, drain)"]
|
|
57
58
|
AgentRunner["agent-runner<br/>(session, turns, results)"]
|
|
58
59
|
Agent["Agent<br/>(status, behavior: abort/steer/worktree/run lifecycle)"]
|
|
59
60
|
ParentSnapshot["ParentSnapshot<br/>(frozen parent state)"]
|
|
@@ -120,6 +121,8 @@ classDiagram
|
|
|
120
121
|
+markError()
|
|
121
122
|
+markStopped()
|
|
122
123
|
+resetForResume()
|
|
124
|
+
+run()
|
|
125
|
+
+resume(prompt, signal)
|
|
123
126
|
+abort(): boolean
|
|
124
127
|
+queueSteer(message)
|
|
125
128
|
+flushPendingSteers(session)
|
|
@@ -135,7 +138,7 @@ classDiagram
|
|
|
135
138
|
class AgentManager {
|
|
136
139
|
+spawn(snapshot, type, prompt, config)
|
|
137
140
|
+spawnAndWait(snapshot, type, prompt, config)
|
|
138
|
-
+resume(id,
|
|
141
|
+
+resume(id, prompt, signal)
|
|
139
142
|
+getRecord(id): Agent
|
|
140
143
|
+listAgents(): Agent[]
|
|
141
144
|
+abort(id)
|
|
@@ -266,9 +269,10 @@ src/
|
|
|
266
269
|
│ └── session-dir.ts session directory derivation
|
|
267
270
|
│
|
|
268
271
|
├── lifecycle/ agent execution and state tracking
|
|
269
|
-
│ ├── agent-manager.ts collection manager +
|
|
272
|
+
│ ├── agent-manager.ts collection manager + observer wiring
|
|
270
273
|
│ ├── agent-runner.ts session creation, turn loop, tool filtering
|
|
271
274
|
│ ├── agent.ts owns full execution lifecycle (run, abort, steer, worktree)
|
|
275
|
+
│ ├── concurrency-queue.ts background agent scheduling with configurable concurrency limit
|
|
272
276
|
│ ├── parent-snapshot.ts immutable spawn-time parent state
|
|
273
277
|
│ ├── execution-state.ts session/output phase state
|
|
274
278
|
│ ├── permission-bridge.ts optional bridge to pi-permission-system registry
|
|
@@ -346,7 +350,8 @@ They declare this package as an optional peer dependency and use dynamic import
|
|
|
346
350
|
### What the core owns
|
|
347
351
|
|
|
348
352
|
- The three tools: `subagent` (née `Agent`), `get_subagent_result`, `steer_subagent`.
|
|
349
|
-
- `AgentManager` — spawn,
|
|
353
|
+
- `AgentManager` — spawn, abort, resume, collection management, observer wiring.
|
|
354
|
+
- `ConcurrencyQueue` — background agent scheduling with configurable concurrency limit.
|
|
350
355
|
- `agent-runner` — session creation, turn loop, extension binding.
|
|
351
356
|
- `permission-bridge` — optional cross-extension bridge to `@gotgenes/pi-permission-system`; registers each child session with `SubagentSessionRegistry` before `bindExtensions()` so the permission system detects in-process children deterministically.
|
|
352
357
|
Scheduled for removal in Phase 16 — replaced by lifecycle events that consumers listen for.
|
|
@@ -712,7 +717,7 @@ Agent receives three concerns at construction:
|
|
|
712
717
|
5. Clean up worktree on completion or error.
|
|
713
718
|
6. Transition status.
|
|
714
719
|
|
|
715
|
-
`AgentManager` becomes a collection manager +
|
|
720
|
+
`AgentManager` becomes a collection manager + observer wiring:
|
|
716
721
|
|
|
717
722
|
- Creates complete Agent objects, stores them in the map.
|
|
718
723
|
- Decides when to run (immediate or queue) and calls `agent.run()`.
|
|
@@ -735,15 +740,15 @@ The scheduling concern (queue, concurrency counter, drain) is tangled into `Agen
|
|
|
735
740
|
|
|
736
741
|
### Findings summary
|
|
737
742
|
|
|
738
|
-
| Finding
|
|
739
|
-
|
|
|
740
|
-
| ~~`AgentRecord` is anemic — no behavior, manager reaches in 37×~~
|
|
741
|
-
| Agent cannot run itself — manager orchestrates 10 external touches | C: Coupling | 5 | 3 |
|
|
742
|
-
| Scheduling tangled into `AgentManager` (3 fields, 3 methods) | A: Coupling | 4 | 2 |
|
|
743
|
-
| ~~`startAgent` uses `.then()`/`.catch()` instead of async/await~~
|
|
744
|
-
| ~~`onSessionCreated` callback flows through 3 layers~~
|
|
745
|
-
|
|
|
746
|
-
|
|
|
743
|
+
| Finding | Category | Impact | Risk | Priority |
|
|
744
|
+
| ---------------------------------------------------------------------- | ------------ | ------ | ---- | -------- |
|
|
745
|
+
| ~~`AgentRecord` is anemic — no behavior, manager reaches in 37×~~ | B: Oversized | 5 | 3 | ✅ |
|
|
746
|
+
| ~~Agent cannot run itself — manager orchestrates 10 external touches~~ | C: Coupling | 5 | 3 | ✅ |
|
|
747
|
+
| ~~Scheduling tangled into `AgentManager` (3 fields, 3 methods)~~ | A: Coupling | 4 | 2 | ✅ |
|
|
748
|
+
| ~~`startAgent` uses `.then()`/`.catch()` instead of async/await~~ | C: Callbacks | 3 | 2 | ✅ |
|
|
749
|
+
| ~~`onSessionCreated` callback flows through 3 layers~~ | C: Callbacks | 3 | 2 | subsumed |
|
|
750
|
+
| ~~`resume()` duplicates observer subscribe/unsubscribe pattern~~ | A: Redundant | 2 | 1 | ✅ |
|
|
751
|
+
| ~~`exec`/`registry` relay-only deps on `AgentManager`~~ | C: Coupling | 2 | 1 | ✅ |
|
|
747
752
|
|
|
748
753
|
### Step 1: Evolve AgentRecord into Agent with behavior — [#227] ✅ Complete
|
|
749
754
|
|
|
@@ -809,7 +814,7 @@ Drain calls `agent.run()` directly — no worktree setup, no args threading.
|
|
|
809
814
|
- Smell: A (tangled concerns) + C (cross-concern leak via `notifyConcurrencyChanged`)
|
|
810
815
|
- Outcome: `AgentManager` loses 3 fields, 3 methods (~40 lines); scheduling is independently testable; queue interface is trivial (agent has everything)
|
|
811
816
|
|
|
812
|
-
### Step 6: Agent.resume() with internal observer lifecycle — [#232]
|
|
817
|
+
### Step 6: Agent.resume() with internal observer lifecycle — [#232] ✅
|
|
813
818
|
|
|
814
819
|
Agent has the runner from construction.
|
|
815
820
|
`Agent.resume(prompt, signal)` manages its own observer subscription lifecycle using the same internal wiring as `run()`.
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
---
|
|
2
|
+
issue: 230
|
|
3
|
+
issue_title: "Extract ConcurrencyQueue from AgentManager (Phase 15, Step 5)"
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Extract ConcurrencyQueue from AgentManager
|
|
7
|
+
|
|
8
|
+
## Problem Statement
|
|
9
|
+
|
|
10
|
+
`AgentManager` tangles two concerns: agent collection management and scheduling.
|
|
11
|
+
The scheduling concern — `queue[]`, `runningBackground`, `_getMaxConcurrent`, `drainQueue()`, `finalizeBackgroundRun()`, `notifyConcurrencyChanged()` — is 3 fields + 3 methods of cohesive, separable logic.
|
|
12
|
+
`notifyConcurrencyChanged()` is a scheduling method exposed as a public API on the wrong object so that `SettingsManager` can poke the queue after a concurrency limit change.
|
|
13
|
+
This cross-concern leak violates SRP and prevents independent testing of the queue.
|
|
14
|
+
|
|
15
|
+
## Goals
|
|
16
|
+
|
|
17
|
+
- Extract scheduling logic into a `ConcurrencyQueue` class in `src/lifecycle/concurrency-queue.ts`.
|
|
18
|
+
- Delete `notifyConcurrencyChanged()` from `AgentManager` — settings triggers drain on the queue directly via the existing callback wiring.
|
|
19
|
+
- Make scheduling independently testable with fast, focused unit tests.
|
|
20
|
+
- `AgentManager` becomes a pure collection manager (agents Map, lookup, cleanup, iteration) + observer wiring.
|
|
21
|
+
|
|
22
|
+
## Non-Goals
|
|
23
|
+
|
|
24
|
+
- Changing `SettingsManager` internals — `onMaxConcurrentChanged` callback stays; only the wiring target changes (queue.drain instead of manager.notifyConcurrencyChanged).
|
|
25
|
+
- Extracting `Agent.resume()` — tracked in #232.
|
|
26
|
+
- Changing the concurrency semantics (limits, drain order, foreground bypass).
|
|
27
|
+
|
|
28
|
+
## Background
|
|
29
|
+
|
|
30
|
+
### Dependencies
|
|
31
|
+
|
|
32
|
+
Both dependencies are implemented:
|
|
33
|
+
|
|
34
|
+
- Issue #229 (Agent.run()) — ✅ closed.
|
|
35
|
+
Agent owns its full execution lifecycle; `startAgent` and `SpawnArgs` are gone.
|
|
36
|
+
- Issue #231 (runner self-contained) — ✅ closed.
|
|
37
|
+
Agent holds the runner at construction.
|
|
38
|
+
|
|
39
|
+
### Current queue surface in AgentManager
|
|
40
|
+
|
|
41
|
+
| Member | Kind | Purpose |
|
|
42
|
+
| --------------------------------- | ------ | ------------------------------------------------ |
|
|
43
|
+
| `queue: string[]` | field | IDs of background agents waiting to start |
|
|
44
|
+
| `runningBackground: number` | field | Count of currently running background agents |
|
|
45
|
+
| `_getMaxConcurrent: () => number` | field | Injected getter for the concurrency limit |
|
|
46
|
+
| `drainQueue()` | method | Start queued agents up to the limit |
|
|
47
|
+
| `finalizeBackgroundRun()` | method | Decrement counter, notify observer, drain |
|
|
48
|
+
| `notifyConcurrencyChanged()` | method | Public entry point for settings to trigger drain |
|
|
49
|
+
|
|
50
|
+
These 6 members form a cohesive unit — they only reference each other and the agents Map (for status checks during drain).
|
|
51
|
+
|
|
52
|
+
### Callers of queue logic in AgentManager
|
|
53
|
+
|
|
54
|
+
- `spawn()` — checks `runningBackground >= getMaxConcurrent()`, pushes to `queue` or starts.
|
|
55
|
+
- `buildObserver().onStarted` — increments `runningBackground`.
|
|
56
|
+
- `buildObserver().onRunFinished` — calls `finalizeBackgroundRun()`.
|
|
57
|
+
- `abort()` — filters `queue` to remove an aborted ID.
|
|
58
|
+
- `abortAll()` — iterates `queue`, clears it.
|
|
59
|
+
- `waitForAll()` — calls `drainQueue()`.
|
|
60
|
+
- `dispose()` — clears `queue`.
|
|
61
|
+
|
|
62
|
+
### Agent comment to update
|
|
63
|
+
|
|
64
|
+
`agent.ts` line 366 has a comment: "Queue removal stays on AgentManager until #230 extracts ConcurrencyQueue."
|
|
65
|
+
This comment should be updated to remove the forward reference.
|
|
66
|
+
|
|
67
|
+
## Design Overview
|
|
68
|
+
|
|
69
|
+
### ConcurrencyQueue class
|
|
70
|
+
|
|
71
|
+
```typescript
|
|
72
|
+
export class ConcurrencyQueue {
|
|
73
|
+
private queue: string[] = [];
|
|
74
|
+
private running = 0;
|
|
75
|
+
|
|
76
|
+
constructor(
|
|
77
|
+
private readonly getMaxConcurrent: () => number,
|
|
78
|
+
private readonly startAgent: (id: string) => void,
|
|
79
|
+
) {}
|
|
80
|
+
|
|
81
|
+
isFull(): boolean;
|
|
82
|
+
enqueue(id: string): void;
|
|
83
|
+
dequeue(id: string): boolean;
|
|
84
|
+
markStarted(): void;
|
|
85
|
+
markFinished(): void; // running--, drain()
|
|
86
|
+
drain(): void;
|
|
87
|
+
get queuedIds(): readonly string[];
|
|
88
|
+
clear(): void;
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Design decision: stored start callback
|
|
93
|
+
|
|
94
|
+
The issue proposes `drain(start: (id: string) => void)` with the callback as a parameter.
|
|
95
|
+
However, the issue also proposes `markFinished()` as no-arg with "running--, drain()" semantics — which contradicts `drain` requiring a callback parameter.
|
|
96
|
+
|
|
97
|
+
Resolution: store the `startAgent` callback at construction.
|
|
98
|
+
This makes `drain()` and `markFinished()` both no-arg, follows Tell-Don't-Ask (the queue is a self-contained unit), and avoids requiring callers to pass the same callback repeatedly.
|
|
99
|
+
|
|
100
|
+
The `startAgent` callback is provided by the wiring layer (`index.ts`) using the established forward-reference-via-closure pattern already used for `onMaxConcurrentChanged`:
|
|
101
|
+
|
|
102
|
+
```typescript
|
|
103
|
+
// index.ts
|
|
104
|
+
const queue = new ConcurrencyQueue(
|
|
105
|
+
() => settings.maxConcurrent,
|
|
106
|
+
(id) => {
|
|
107
|
+
const agent = manager.getRecord(id);
|
|
108
|
+
if (agent?.status !== "queued") return;
|
|
109
|
+
agent.promise = agent.run();
|
|
110
|
+
},
|
|
111
|
+
);
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Ordering note
|
|
115
|
+
|
|
116
|
+
`markFinished()` calls `drain()` internally.
|
|
117
|
+
The current `finalizeBackgroundRun()` order is: decrement → observer notification → drain.
|
|
118
|
+
After extraction: `queue.markFinished()` (decrement + drain) → observer notification.
|
|
119
|
+
Drain fires before the observer notification.
|
|
120
|
+
|
|
121
|
+
This reordering is safe: `drain()` only starts promises (no await), and the observer notification (`onAgentCompleted`) processes the completed agent's data without referencing queue state.
|
|
122
|
+
|
|
123
|
+
### AgentManager after extraction
|
|
124
|
+
|
|
125
|
+
```typescript
|
|
126
|
+
export interface AgentManagerOptions {
|
|
127
|
+
runner: AgentRunner;
|
|
128
|
+
worktrees: WorktreeManager;
|
|
129
|
+
queue: ConcurrencyQueue; // was: getMaxConcurrent
|
|
130
|
+
getRunConfig?: () => RunConfig;
|
|
131
|
+
observer?: AgentManagerObserver;
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
`AgentManager` loses `queue`, `runningBackground`, `_getMaxConcurrent`, `drainQueue()`, `finalizeBackgroundRun()`, `notifyConcurrencyChanged()`.
|
|
136
|
+
|
|
137
|
+
### Settings wiring
|
|
138
|
+
|
|
139
|
+
Before:
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
onMaxConcurrentChanged: () => manager.notifyConcurrencyChanged(),
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
After:
|
|
146
|
+
|
|
147
|
+
```typescript
|
|
148
|
+
onMaxConcurrentChanged: () => queue.drain(),
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
`SettingsManager` itself does not change — it still invokes the stored callback.
|
|
152
|
+
The callback wiring in `index.ts` targets the queue directly instead of the manager.
|
|
153
|
+
|
|
154
|
+
### Consumer call site (AgentManager.buildObserver)
|
|
155
|
+
|
|
156
|
+
```typescript
|
|
157
|
+
private buildObserver(options: AgentSpawnConfig): AgentLifecycleObserver {
|
|
158
|
+
return {
|
|
159
|
+
onStarted: (agent) => {
|
|
160
|
+
if (options.isBackground) this.queue.markStarted();
|
|
161
|
+
this.observer?.onAgentStarted(agent);
|
|
162
|
+
},
|
|
163
|
+
onRunFinished: (agent) => {
|
|
164
|
+
if (options.isBackground) {
|
|
165
|
+
this.queue.markFinished();
|
|
166
|
+
try { this.observer?.onAgentCompleted(agent); }
|
|
167
|
+
catch (err) { debugLog("onAgentCompleted observer", err); }
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
// onSessionCreated, onCompacted unchanged
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Test helper (createManager)
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
function createManager(overrides?: { ...; getMaxConcurrent?: () => number; }) {
|
|
179
|
+
let mgr: AgentManager;
|
|
180
|
+
const queue = new ConcurrencyQueue(
|
|
181
|
+
overrides?.getMaxConcurrent ?? (() => 4),
|
|
182
|
+
(id) => {
|
|
183
|
+
const record = mgr.getRecord(id);
|
|
184
|
+
if (record?.status !== "queued") return;
|
|
185
|
+
record.promise = record.run();
|
|
186
|
+
},
|
|
187
|
+
);
|
|
188
|
+
mgr = new AgentManager({ ..., queue });
|
|
189
|
+
return { manager: mgr, ..., queue };
|
|
190
|
+
}
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
The forward-reference-via-closure is safe because `drain()` is never called during construction.
|
|
194
|
+
The `getMaxConcurrent` parameter name stays in the test helper for readability; it's passed to `ConcurrencyQueue`.
|
|
195
|
+
|
|
196
|
+
## Module-Level Changes
|
|
197
|
+
|
|
198
|
+
| File | Change |
|
|
199
|
+
| ------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
200
|
+
| `src/lifecycle/concurrency-queue.ts` | **Add** — new `ConcurrencyQueue` class |
|
|
201
|
+
| `src/lifecycle/agent-manager.ts` | **Change** — remove 3 fields, 3 methods; add `queue: ConcurrencyQueue` to options; update `buildObserver`, `spawn`, `abort`, `abortAll`, `waitForAll`, `dispose` |
|
|
202
|
+
| `src/lifecycle/agent.ts` | **Change** — update comment on `abort()` (remove #230 forward reference) |
|
|
203
|
+
| `src/index.ts` | **Change** — create `ConcurrencyQueue`, pass to manager, wire settings to `queue.drain()` |
|
|
204
|
+
| `test/lifecycle/concurrency-queue.test.ts` | **Add** — unit tests for ConcurrencyQueue |
|
|
205
|
+
| `test/lifecycle/agent-manager.test.ts` | **Change** — update `createManager` helper to construct ConcurrencyQueue; no queue-behavior tests removed (they remain as integration tests) |
|
|
206
|
+
| `docs/architecture/architecture.md` | **Change** — add `concurrency-queue.ts` to layout listing; update agent-manager description |
|
|
207
|
+
|
|
208
|
+
## Test Impact Analysis
|
|
209
|
+
|
|
210
|
+
### New unit tests enabled by extraction
|
|
211
|
+
|
|
212
|
+
1. `isFull()` boundary — returns false when running < max, true when running >= max.
|
|
213
|
+
2. `enqueue()` / `dequeue()` — add/remove from queue, dequeue returns false for missing ID.
|
|
214
|
+
3. `markStarted()` / `markFinished()` — increment/decrement running count.
|
|
215
|
+
4. `drain()` — calls `startAgent` for each queued ID until full; skips when already full; handles empty queue.
|
|
216
|
+
5. `markFinished()` auto-drain — decrement triggers drain of next queued agent.
|
|
217
|
+
6. `clear()` — empties queue without starting agents.
|
|
218
|
+
7. `queuedIds` — snapshot of queue for iteration.
|
|
219
|
+
|
|
220
|
+
These tests were previously impossible because queue logic was interleaved with agent creation, observer notifications, and session management in `AgentManager`.
|
|
221
|
+
|
|
222
|
+
### Existing tests that stay as-is
|
|
223
|
+
|
|
224
|
+
- "queueing and concurrency with injected stubs" — integration tests verifying end-to-end spawn→queue→drain through the full AgentManager stack.
|
|
225
|
+
They still provide value as wiring tests.
|
|
226
|
+
- All observer notification tests — test observer wiring which stays in AgentManager.
|
|
227
|
+
- Bug race condition tests, worktree tests, execution state tests, lifecycle observer forwarding tests — independent of queue.
|
|
228
|
+
|
|
229
|
+
### Existing tests that need updating
|
|
230
|
+
|
|
231
|
+
- `createManager` helper — accepts `getMaxConcurrent` but passes it to `ConcurrencyQueue` constructor instead of `AgentManagerOptions`.
|
|
232
|
+
|
|
233
|
+
## TDD Order
|
|
234
|
+
|
|
235
|
+
1. **Red→Green: ConcurrencyQueue class + tests.**
|
|
236
|
+
New `src/lifecycle/concurrency-queue.ts` with `isFull`, `enqueue`, `dequeue`, `markStarted`, `markFinished`, `drain`, `clear`, `queuedIds`.
|
|
237
|
+
New `test/lifecycle/concurrency-queue.test.ts` covering: full boundary, enqueue/dequeue, start/finish counting, drain ordering, markFinished auto-drain, clear, empty-queue no-op.
|
|
238
|
+
Commit: `feat(pi-subagents): add ConcurrencyQueue class (#230)`
|
|
239
|
+
|
|
240
|
+
2. **Red→Green: Migrate AgentManager to use ConcurrencyQueue.**
|
|
241
|
+
Update `AgentManagerOptions`: replace `getMaxConcurrent` with `queue: ConcurrencyQueue`.
|
|
242
|
+
Update constructor, `buildObserver`, `spawn`, `abort`, `abortAll`, `waitForAll`, `dispose`.
|
|
243
|
+
Delete: `queue` field, `runningBackground` field, `_getMaxConcurrent` field, `notifyConcurrencyChanged()`, `drainQueue()`, `finalizeBackgroundRun()`.
|
|
244
|
+
Update `test/lifecycle/agent-manager.test.ts`: revise `createManager` helper to construct `ConcurrencyQueue` internally.
|
|
245
|
+
Update `src/index.ts`: construct `ConcurrencyQueue`, pass to `AgentManager`, wire `onMaxConcurrentChanged` to `queue.drain()`.
|
|
246
|
+
Update `src/lifecycle/agent.ts`: remove #230 forward-reference comment on `abort()`.
|
|
247
|
+
Run `pnpm run check` after this step.
|
|
248
|
+
Commit: `refactor(pi-subagents): replace inline queue with ConcurrencyQueue (#230)`
|
|
249
|
+
|
|
250
|
+
3. **Docs: Update architecture.**
|
|
251
|
+
Update `docs/architecture/architecture.md`: add `concurrency-queue.ts` to layout listing under `lifecycle/`, update `agent-manager.ts` description from "collection manager + concurrency controller" to "collection manager + observer wiring".
|
|
252
|
+
Commit: `docs(pi-subagents): update architecture for ConcurrencyQueue extraction (#230)`
|
|
253
|
+
|
|
254
|
+
## Risks and Mitigations
|
|
255
|
+
|
|
256
|
+
| Risk | Mitigation |
|
|
257
|
+
| ------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
258
|
+
| Forward-reference-via-closure in test helper and index.ts could break if drain is called during construction | ConcurrencyQueue constructor does not call drain; drain is only called after agents exist. Same pattern already used for `onMaxConcurrentChanged`. |
|
|
259
|
+
| `markFinished()` auto-drain changes ordering (drain before observer notification) | Verified: observer notification only processes the completed agent's data and does not reference queue state. Drain starts promises without awaiting — no observable behavior change. |
|
|
260
|
+
| `markStarted()` called synchronously inside drain loop could miscount | Verified: `Agent.run()` calls `observer.onStarted()` synchronously before the first await, so `markStarted()` fires before control returns to the drain while-loop. The running count is always current. |
|
|
261
|
+
| Integration tests in agent-manager.test.ts break after migration | Tests continue to work because the `createManager` helper constructs the ConcurrencyQueue internally with the same `getMaxConcurrent` semantics. Queue behavior is preserved. |
|
|
262
|
+
|
|
263
|
+
## Open Questions
|
|
264
|
+
|
|
265
|
+
None — the issue's proposed change is unambiguous and both dependencies are implemented.
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
---
|
|
2
|
+
issue: 232
|
|
3
|
+
issue_title: "Agent.resume() with internal observer lifecycle (Phase 15, Step 6)"
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Agent.resume() with internal observer lifecycle
|
|
7
|
+
|
|
8
|
+
## Problem Statement
|
|
9
|
+
|
|
10
|
+
After #229 (`Agent.run()` absorbs `startAgent`), the agent owns its entire run lifecycle but `AgentManager.resume()` still duplicates the observer subscribe/use/release pattern that `run()` handles internally.
|
|
11
|
+
The manager manually calls `subscribeAgentObserver`, wraps `runner.resume()` in a try/catch/finally, marks completion/error, and unsubscribes — the same acquire → use → release resource shape `Agent.run()` already encapsulates.
|
|
12
|
+
This is the last "manager reaches into Agent" duplication in the Phase 15 roadmap (priority 8, smell A: redundant pattern).
|
|
13
|
+
|
|
14
|
+
## Goals
|
|
15
|
+
|
|
16
|
+
- Add `Agent.resume(prompt, signal?)` that owns its observer subscription lifecycle, mirroring `run()`'s internal wiring.
|
|
17
|
+
- Reduce `AgentManager.resume()` to a guard-plus-delegation method (no `subscribeAgentObserver`, no try/finally).
|
|
18
|
+
- Preserve the existing public contract of `AgentManager.resume()` exactly: same signature, same `Agent | undefined` return, same behavior when the record or session is missing.
|
|
19
|
+
- Keep the change non-breaking (`feat:`, not `feat!:`).
|
|
20
|
+
|
|
21
|
+
## Non-Goals
|
|
22
|
+
|
|
23
|
+
- No change to `runner.resume()` / `resumeAgent()` in `agent-runner.ts`.
|
|
24
|
+
- No change to the abort semantics of resume — the parent `signal` continues to flow straight through to `runner.resume({ signal })` (resume does not route through the agent's `abortController`, matching today's behavior).
|
|
25
|
+
- No queue interaction on resume — resume is not subject to the concurrency queue, so `onStarted`/`onRunFinished` are not fired (unchanged from today).
|
|
26
|
+
- No full rewrite of the stale `AgentManager`/`Agent` class diagram in `architecture.md` — that diagram already diverged in #229 (missing `run()`, stale `setupWorktree`/`completeRun`/`setOnRunFinished` signatures); a comprehensive diagram refresh is out of scope here.
|
|
27
|
+
|
|
28
|
+
## Background
|
|
29
|
+
|
|
30
|
+
Relevant modules (all under `packages/pi-subagents/src/`):
|
|
31
|
+
|
|
32
|
+
- `lifecycle/agent.ts` — the `Agent` class.
|
|
33
|
+
Already owns the per-run listener state (`_unsub`, `_detachFn`), the `attachObserver(unsub)` / `releaseListeners()` pair, `resetForResume(startedAt)` (which calls `releaseListeners()`), and `markCompleted` / `markError`.
|
|
34
|
+
Holds `_runner` and `observer` (an `AgentLifecycleObserver`) from construction (#229).
|
|
35
|
+
`Agent.run()` is the template to follow: it wires the observer via `attachObserver(subscribeAgentObserver(session, this, { onCompact: (r, info) => this.observer?.onCompacted?.(r, info) }))`.
|
|
36
|
+
- `lifecycle/agent-manager.ts` — `AgentManager.resume()` currently does the manual subscribe/try-finally dance and imports `subscribeAgentObserver` solely for that.
|
|
37
|
+
- `observation/record-observer.ts` — `subscribeAgentObserver(session, record, options)` returns an unsubscribe function; observes `tool_execution_end`, `message_end`, `compaction_end`.
|
|
38
|
+
- `lifecycle/agent-runner.ts` — `AgentRunner.resume(session, prompt, options?)` returns `Promise<string>` (the response text).
|
|
39
|
+
|
|
40
|
+
Constraint from AGENTS.md / `package-pi-subagents` skill: pi-subagents is a narrow core; this is a pure internal refactor (Tell-Don't-Ask, "state owns its mutations") with no policy or API-surface change.
|
|
41
|
+
|
|
42
|
+
### Observer routing equivalence
|
|
43
|
+
|
|
44
|
+
The manager's old resume wired compaction to the `AgentManagerObserver`:
|
|
45
|
+
|
|
46
|
+
```typescript
|
|
47
|
+
subscribeAgentObserver(session, record, {
|
|
48
|
+
onCompact: (r, info) => this.observer?.onAgentCompacted(r, info),
|
|
49
|
+
});
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
`Agent.resume()` instead routes through the per-agent `AgentLifecycleObserver` (`this.observer?.onCompacted?.`), exactly as `run()` does.
|
|
53
|
+
That lifecycle observer is built by `AgentManager.buildObserver()`, whose `onCompacted` forwards to `this.observer?.onAgentCompacted(agent, info)`.
|
|
54
|
+
Net routing is identical — compaction events still reach the manager-level `AgentManagerObserver.onAgentCompacted`.
|
|
55
|
+
|
|
56
|
+
## Design Overview
|
|
57
|
+
|
|
58
|
+
### `Agent.resume()`
|
|
59
|
+
|
|
60
|
+
```typescript
|
|
61
|
+
async resume(prompt: string, signal?: AbortSignal): Promise<void> {
|
|
62
|
+
if (!this._runner) {
|
|
63
|
+
throw new Error("Agent not configured for execution — missing runner");
|
|
64
|
+
}
|
|
65
|
+
const session = this.session;
|
|
66
|
+
if (!session) {
|
|
67
|
+
throw new Error("Agent not configured for resume — missing session");
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
this.resetForResume(Date.now()); // sets running, clears result/error, releases stale listeners
|
|
71
|
+
this.attachObserver(subscribeAgentObserver(session, this, {
|
|
72
|
+
onCompact: (r, info) => this.observer?.onCompacted?.(r, info),
|
|
73
|
+
}));
|
|
74
|
+
|
|
75
|
+
try {
|
|
76
|
+
const responseText = await this._runner.resume(session, prompt, { signal });
|
|
77
|
+
this.markCompleted(responseText);
|
|
78
|
+
} catch (err) {
|
|
79
|
+
this.markError(err);
|
|
80
|
+
} finally {
|
|
81
|
+
this.releaseListeners();
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Decision model:
|
|
87
|
+
|
|
88
|
+
- `resetForResume()` already calls `releaseListeners()`, so any leftover handle from a prior run/resume is cleared before the new subscription is attached.
|
|
89
|
+
- The new subscription handle is stored via `attachObserver()` (reusing the `_unsub` slot shared with `run()`), and released in `finally` via `releaseListeners()`.
|
|
90
|
+
- Errors are captured (`markError`) rather than rethrown — `resume()` resolves like `run()`.
|
|
91
|
+
- The two guards (missing runner, missing session) mirror `run()`'s guard style.
|
|
92
|
+
They are defensive: the manager guards `agent?.session` before delegating, so the session guard is unreachable in normal flow but protects the invariant for direct `Agent.resume()` callers/tests.
|
|
93
|
+
|
|
94
|
+
### `AgentManager.resume()` (delegation)
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
async resume(id: string, prompt: string, signal?: AbortSignal): Promise<Agent | undefined> {
|
|
98
|
+
const agent = this.agents.get(id);
|
|
99
|
+
if (!agent?.session) return undefined;
|
|
100
|
+
await agent.resume(prompt, signal);
|
|
101
|
+
return agent;
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Edge cases preserved:
|
|
106
|
+
|
|
107
|
+
- Missing record → `undefined` (no throw).
|
|
108
|
+
- Record present but no session → `undefined` (no throw).
|
|
109
|
+
- Session present → delegate, return the agent.
|
|
110
|
+
|
|
111
|
+
After this change `agent-manager.ts` no longer references `subscribeAgentObserver` — that import must be removed.
|
|
112
|
+
`this.runner` is still used by `spawn()` (passed to the `Agent` constructor), so the `runner` field stays.
|
|
113
|
+
|
|
114
|
+
## Module-Level Changes
|
|
115
|
+
|
|
116
|
+
- `src/lifecycle/agent.ts`
|
|
117
|
+
- Add the public async method `resume(prompt: string, signal?: AbortSignal): Promise<void>` (placed near `run()` per the stepdown rule).
|
|
118
|
+
- No new imports — `subscribeAgentObserver` is already imported for `run()`.
|
|
119
|
+
- `src/lifecycle/agent-manager.ts`
|
|
120
|
+
- Replace the body of `resume()` with the guard-plus-delegation form above.
|
|
121
|
+
- Remove the now-unused `import { subscribeAgentObserver } from "#src/observation/record-observer";`.
|
|
122
|
+
- No other methods change.
|
|
123
|
+
- `src/lifecycle/agent-runner.ts` — unchanged.
|
|
124
|
+
- `src/observation/record-observer.ts` — unchanged.
|
|
125
|
+
- `docs/architecture/architecture.md` — light doc touch:
|
|
126
|
+
- In the class diagram, update `AgentManager.resume(id, snapshot, exec)` → `resume(id, prompt, signal)` and add `Agent.resume(prompt, signal)` (and, while there, `Agent.run()`, which #229 omitted).
|
|
127
|
+
- Mark Step 6 in the Phase 15 roadmap table/section as complete (`✅`).
|
|
128
|
+
- Note: the class diagram has pre-existing staleness from #229; this touch only corrects the resume-related entries, not the whole diagram.
|
|
129
|
+
|
|
130
|
+
Symbol-removal check: the only removed symbol is the `subscribeAgentObserver` import in `agent-manager.ts`.
|
|
131
|
+
`grep` confirms `subscribeAgentObserver` is still imported and used in `agent.ts` and defined in `record-observer.ts`, so the export stays live.
|
|
132
|
+
|
|
133
|
+
No file in Module-Level Changes is claimed unchanged in Non-Goals (the Non-Goals list `agent-runner.ts` and `record-observer.ts`, which are genuinely untouched).
|
|
134
|
+
|
|
135
|
+
## Test Impact Analysis
|
|
136
|
+
|
|
137
|
+
This is an extraction/relocation of behavior from the manager into the agent.
|
|
138
|
+
|
|
139
|
+
1. New unit tests enabled — `Agent.resume()` can now be tested directly on `Agent` (file `test/lifecycle/agent.test.ts`), which was previously impossible because resume logic lived only in the manager.
|
|
140
|
+
New direct coverage:
|
|
141
|
+
- `resume()` transitions to `completed` and sets `result` from the runner's response text.
|
|
142
|
+
- `resume()` transitions to `error` (and does not throw) when `runner.resume()` rejects.
|
|
143
|
+
- `resume()` subscribes the record-observer to the session (usage/compaction events accumulate on the agent) and releases the subscription in `finally` (handle cleared after completion and after error).
|
|
144
|
+
- `resume()` throws on missing runner / missing session (guard symmetry with `run()`).
|
|
145
|
+
- Compaction during resume forwards through `this.observer?.onCompacted?.`.
|
|
146
|
+
|
|
147
|
+
2. Existing tests that become redundant — none should be deleted.
|
|
148
|
+
The two manager-level resume tests in `test/lifecycle/agent-manager.test.ts` (`resume() also accumulates usage and increments compactions on the same record` and `calls injected runner.resume when resuming an agent`) now exercise the delegation + observer-forwarding integration rather than the inlined logic.
|
|
149
|
+
They stay as integration coverage of `AgentManager.resume()` → `Agent.resume()` and the `onCompacted` → `onAgentCompacted` routing.
|
|
150
|
+
`test/helpers/make-deps.test.ts` (calls `manager.resume(...)`) stays.
|
|
151
|
+
|
|
152
|
+
3. Existing tests that must stay as-is — the manager-level resume tests above genuinely exercise the manager's guard + delegation seam and the observer routing through `buildObserver`, which the agent-level tests do not cover.
|
|
153
|
+
|
|
154
|
+
## TDD Order
|
|
155
|
+
|
|
156
|
+
1. `test/lifecycle/agent.test.ts` — add `Agent.resume()` happy-path + error + guard tests, then implement `Agent.resume()` in `agent.ts`.
|
|
157
|
+
Covers: completed/result on success, error (no throw) on rejection, observer subscribe + `releaseListeners()` in `finally`, compaction forwarding via `onCompacted`, and the missing-runner / missing-session guards.
|
|
158
|
+
At this point both the new `Agent.resume()` and the old `AgentManager.resume()` body coexist (lift-and-shift: introduce the new method alongside the old logic).
|
|
159
|
+
Commit: `feat: add Agent.resume() with internal observer lifecycle`
|
|
160
|
+
2. `test/lifecycle/agent-manager.test.ts` — keep the existing resume tests green, then collapse `AgentManager.resume()` to the guard-plus-delegation form and remove the unused `subscribeAgentObserver` import in the same commit.
|
|
161
|
+
Removing the import and rewriting the body must land together — the type checker flags the unused import immediately, and the existing manager-level resume tests verify the delegation still satisfies the same contract.
|
|
162
|
+
Commit: `refactor: delegate AgentManager.resume() to Agent.resume()`
|
|
163
|
+
3. `docs/architecture/architecture.md` — update the class diagram resume entries (and add `Agent.run()`/`Agent.resume()`), mark Step 6 complete.
|
|
164
|
+
Commit: `docs: mark Phase 15 Step 6 (Agent.resume) complete`
|
|
165
|
+
|
|
166
|
+
## Risks and Mitigations
|
|
167
|
+
|
|
168
|
+
- Risk: observer routing diverges (compaction events stop reaching `onAgentCompacted`).
|
|
169
|
+
Mitigation: the existing manager-level test `resume() also accumulates usage and increments compactions on the same record` asserts `compactionCount` after resume; it stays green only if routing is preserved.
|
|
170
|
+
- Risk: listener leak if `releaseListeners()` is missed on the error path.
|
|
171
|
+
Mitigation: `releaseListeners()` is in `finally`; a dedicated agent-level test asserts the unsub handle is released after both success and error.
|
|
172
|
+
- Risk: behavior change in abort handling if resume is rerouted through `abortController`.
|
|
173
|
+
Mitigation: explicitly keep `signal` flowing straight to `runner.resume({ signal })` (Non-Goal), identical to today.
|
|
174
|
+
- Risk: removing the `subscribeAgentObserver` import while another caller still needs it.
|
|
175
|
+
Mitigation: `grep` confirms `agent.ts` is the only other importer and `record-observer.ts` still exports it.
|
|
176
|
+
|
|
177
|
+
## Open Questions
|
|
178
|
+
|
|
179
|
+
- Whether to later refresh the full `AgentManager`/`Agent` class diagram in `architecture.md` (stale since #229).
|
|
180
|
+
Deferred — out of scope for this issue; a focused follow-up can resync the whole diagram.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
---
|
|
2
|
+
issue: 230
|
|
3
|
+
issue_title: "Extract ConcurrencyQueue from AgentManager (Phase 15, Step 5)"
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Retro: #230 — Extract ConcurrencyQueue from AgentManager
|
|
7
|
+
|
|
8
|
+
## Stage: Planning (2026-05-28T20:00:00Z)
|
|
9
|
+
|
|
10
|
+
### Session summary
|
|
11
|
+
|
|
12
|
+
Produced a 3-step TDD plan for extracting the scheduling concern (3 fields, 3 methods) from `AgentManager` into a new `ConcurrencyQueue` class.
|
|
13
|
+
Both dependencies (#229 Agent.run(), #231 runner self-contained) are confirmed closed.
|
|
14
|
+
|
|
15
|
+
### Observations
|
|
16
|
+
|
|
17
|
+
- The issue's proposed API has `drain(start: (id: string) => void)` but also `markFinished()` as no-arg with "running--, drain()" semantics — a contradiction.
|
|
18
|
+
Resolved by storing the `startAgent` callback at construction, making both `drain()` and `markFinished()` no-arg.
|
|
19
|
+
This follows Tell-Don't-Ask and matches the established forward-reference-via-closure pattern already used for `onMaxConcurrentChanged`.
|
|
20
|
+
- `markFinished()` auto-drain changes the ordering from "decrement → observer → drain" to "decrement + drain → observer."
|
|
21
|
+
Verified this is safe: observer notification only processes the completed agent and drain only starts promises without awaiting.
|
|
22
|
+
- `SettingsManager` does not change — only the callback wiring in `index.ts` changes target from `manager.notifyConcurrencyChanged()` to `queue.drain()`.
|
|
23
|
+
- The `agent.ts` `abort()` method has a comment referencing #230 that should be updated in the implementation step.
|
|
24
|
+
|
|
25
|
+
## Stage: Implementation — TDD (2026-05-28T21:35:00Z)
|
|
26
|
+
|
|
27
|
+
### Session summary
|
|
28
|
+
|
|
29
|
+
Implemented all 3 TDD steps: (1) `ConcurrencyQueue` class + 22 unit tests, (2) migrated `AgentManager` to use injected `ConcurrencyQueue` and updated `index.ts` wiring + test helper, (3) architecture docs and SKILL.md updates.
|
|
30
|
+
Test count delta: 1020 → 1042 (+22 new `ConcurrencyQueue` tests, 0 removed).
|
|
31
|
+
|
|
32
|
+
### Observations
|
|
33
|
+
|
|
34
|
+
- The `createManager` test helper required the forward-reference-via-closure pattern (`let mgr` then closure then assignment) with a `prefer-const` ESLint suppression — same pattern used in production `index.ts` for `onMaxConcurrentChanged`.
|
|
35
|
+
- Pre-completion reviewer returned WARN for one stale comment (`drainQueue` reference in `waitForAll`) — fixed by amending the docs commit.
|
|
36
|
+
- No plan deviations.
|
|
37
|
+
All module-level changes matched the plan exactly.
|
|
38
|
+
- Pre-completion reviewer: WARN → fixed (stale comment).
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
---
|
|
2
|
+
issue: 232
|
|
3
|
+
issue_title: "Agent.resume() with internal observer lifecycle (Phase 15, Step 6)"
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Retro: #232 — Agent.resume() with internal observer lifecycle (Phase 15, Step 6)
|
|
7
|
+
|
|
8
|
+
## Stage: Planning (2026-05-28T18:00:00Z)
|
|
9
|
+
|
|
10
|
+
### Session summary
|
|
11
|
+
|
|
12
|
+
Produced a 3-step plan to move the observer subscribe/use/release pattern out of `AgentManager.resume()` into a new `Agent.resume(prompt, signal?)`, mirroring the `run()` wiring added in #229.
|
|
13
|
+
This is the last "manager reaches into Agent" duplication in the Phase 15 roadmap (Step 6, priority 8).
|
|
14
|
+
Confirmed the prerequisite #229 is closed and `Agent` already holds `_runner`, `observer`, `attachObserver`/`releaseListeners`, and `resetForResume`.
|
|
15
|
+
|
|
16
|
+
### Observations
|
|
17
|
+
|
|
18
|
+
- Non-breaking (`feat:`) — `AgentManager.resume()` keeps its signature and `Agent | undefined` contract; `Agent.resume()` is additive.
|
|
19
|
+
No `ask_user` needed; the issue's proposed change is concrete and unambiguous.
|
|
20
|
+
- Observer routing equivalence verified: old code wired `onCompact` → `AgentManagerObserver.onAgentCompacted`; new code routes through the per-agent `AgentLifecycleObserver.onCompacted`, which `buildObserver()` forwards to `onAgentCompacted`.
|
|
21
|
+
Net routing identical.
|
|
22
|
+
- Abort semantics intentionally preserved — `signal` flows straight to `runner.resume({ signal })`, not through the agent's `abortController` (resume differs from `run()` here; flagged as a Non-Goal to avoid accidental behavior change).
|
|
23
|
+
- Removing the `subscribeAgentObserver` import from `agent-manager.ts` must land in the same commit as the body rewrite (type checker flags the unused import). `grep` confirmed `agent.ts` remains the importer and `record-observer.ts` keeps the export live.
|
|
24
|
+
- Discovered the `architecture.md` class diagram is stale from #229 (missing `Agent.run()`, stale `setupWorktree`/`completeRun`/`setOnRunFinished` signatures, old `resume(id, snapshot, exec)`).
|
|
25
|
+
Scoped only a light touch (resume-related entries + Step 6 ✅); full diagram refresh deferred as a follow-up.
|
|
26
|
+
- Lift-and-shift TDD order: step 1 introduces `Agent.resume()` alongside the old manager logic; step 2 collapses the manager method and removes the import together.
|
|
27
|
+
Existing manager-level resume tests act as the integration safety net and stay.
|
|
28
|
+
|
|
29
|
+
## Stage: Implementation — TDD (2026-05-28T19:00:00Z)
|
|
30
|
+
|
|
31
|
+
### Session summary
|
|
32
|
+
|
|
33
|
+
Completed all 3 TDD steps in 3 commits plus a bonus `fix:` commit, totalling 4 new commits.
|
|
34
|
+
`Agent.resume()` added with full observer lifecycle, `AgentManager.resume()` collapsed to guard-plus-delegation, `subscribeAgentObserver` import removed from `agent-manager.ts`, and `architecture.md` updated.
|
|
35
|
+
Test count: 1042 → 1053 (+11).
|
|
36
|
+
|
|
37
|
+
### Observations
|
|
38
|
+
|
|
39
|
+
- **Bonus fix found mid-session:** A user question revealed a listener leak introduced in #229 — `Agent.run()` called `wireSignal()` before `setupWorktree()`, but the worktree-failure catch block returned without `releaseListeners()`, leaving the parent `AbortSignal` holding a reference to the errored agent.
|
|
40
|
+
Fixed TDD-style: failing test first (`"releases the parent-signal listener when worktree setup fails"` in `agent.test.ts`), then one-line fix adding `this.releaseListeners()` to the catch block in `run()`.
|
|
41
|
+
Committed as a separate `fix:` commit with a body attributing the regression to #229.
|
|
42
|
+
- **Pre-completion reviewer: WARN** — one non-blocking finding: the Phase 15 findings-summary table in `architecture.md` didn't mark the resolved rows (consistent pre-existing pattern from #229–#231).
|
|
43
|
+
Fixed by adding strikethrough + ✅ to all four resolved finding rows (#229 "Agent cannot run itself", #230 "Scheduling", #231 "exec/registry", #232 "resume()") in an additional `docs:` commit.
|
|
44
|
+
All other reviewer checks passed (Mermaid diagrams validated with `mmdc`, fallow clean, code design clean).
|
|
45
|
+
- **Reviewer warning resolved:** The findings table gap was pre-existing across four issues; closing it in this commit makes the table accurate going into Phase 16.
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -25,6 +25,7 @@ import { loadCustomAgents } from "#src/config/custom-agents";
|
|
|
25
25
|
import { SessionLifecycleHandler, ToolStartHandler } from "#src/handlers/index";
|
|
26
26
|
import { AgentManager, type AgentManagerObserver } from "#src/lifecycle/agent-manager";
|
|
27
27
|
import { ConcreteAgentRunner, type RunnerDeps } from "#src/lifecycle/agent-runner";
|
|
28
|
+
import { ConcurrencyQueue } from "#src/lifecycle/concurrency-queue";
|
|
28
29
|
import { buildParentSnapshot } from "#src/lifecycle/parent-snapshot";
|
|
29
30
|
import { GitWorktreeManager } from "#src/lifecycle/worktree";
|
|
30
31
|
import { buildEventData, type NotificationDetails, NotificationManager } from "#src/observation/notification";
|
|
@@ -66,12 +67,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
66
67
|
);
|
|
67
68
|
|
|
68
69
|
// Settings: owns all three in-memory values and handles load/save/emit.
|
|
69
|
-
// onMaxConcurrentChanged is wired
|
|
70
|
+
// onMaxConcurrentChanged is wired to the queue directly (closure captures by reference).
|
|
70
71
|
const settings = new SettingsManager({
|
|
71
72
|
emit: (event, payload) => pi.events.emit(event, payload),
|
|
72
73
|
cwd: process.cwd(),
|
|
73
74
|
agentDir: getAgentDir(),
|
|
74
|
-
onMaxConcurrentChanged: () =>
|
|
75
|
+
onMaxConcurrentChanged: () => queue.drain(),
|
|
75
76
|
});
|
|
76
77
|
settings.load();
|
|
77
78
|
|
|
@@ -150,11 +151,22 @@ export default function (pi: ExtensionAPI) {
|
|
|
150
151
|
registry,
|
|
151
152
|
};
|
|
152
153
|
|
|
154
|
+
// ConcurrencyQueue: scheduling extracted from AgentManager.
|
|
155
|
+
// startAgent callback forward-references manager via closure (safe — drain is never called during construction).
|
|
156
|
+
const queue = new ConcurrencyQueue(
|
|
157
|
+
() => settings.maxConcurrent,
|
|
158
|
+
(id) => {
|
|
159
|
+
const agent = manager.getRecord(id);
|
|
160
|
+
if (agent?.status !== "queued") return;
|
|
161
|
+
agent.promise = agent.run();
|
|
162
|
+
},
|
|
163
|
+
);
|
|
164
|
+
|
|
153
165
|
const manager = new AgentManager({
|
|
154
166
|
runner: new ConcreteAgentRunner(runnerDeps),
|
|
155
167
|
worktrees: new GitWorktreeManager(process.cwd()),
|
|
156
168
|
observer,
|
|
157
|
-
|
|
169
|
+
queue,
|
|
158
170
|
getRunConfig: () => settings,
|
|
159
171
|
});
|
|
160
172
|
|
|
@@ -11,10 +11,10 @@ import type { Model } from "@earendil-works/pi-ai";
|
|
|
11
11
|
import { debugLog } from "#src/debug";
|
|
12
12
|
import { Agent, type AgentLifecycleObserver } from "#src/lifecycle/agent";
|
|
13
13
|
import type { AgentRunner } from "#src/lifecycle/agent-runner";
|
|
14
|
+
import type { ConcurrencyQueue } from "#src/lifecycle/concurrency-queue";
|
|
14
15
|
import type { ParentSnapshot } from "#src/lifecycle/parent-snapshot";
|
|
15
16
|
import type { WorktreeManager } from "#src/lifecycle/worktree";
|
|
16
17
|
|
|
17
|
-
import { subscribeAgentObserver } from "#src/observation/record-observer";
|
|
18
18
|
import type { RunConfig } from "#src/runtime";
|
|
19
19
|
import type { AgentInvocation, CompactionInfo, IsolationMode, ParentSessionInfo, SubagentType, ThinkingLevel } from "#src/types";
|
|
20
20
|
|
|
@@ -27,14 +27,11 @@ export interface AgentManagerObserver {
|
|
|
27
27
|
onAgentCreated(record: Agent): void;
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
/** Default max concurrent background agents. */
|
|
31
|
-
const DEFAULT_MAX_CONCURRENT = 4;
|
|
32
|
-
|
|
33
30
|
export interface AgentManagerOptions {
|
|
34
31
|
runner: AgentRunner;
|
|
35
32
|
worktrees: WorktreeManager;
|
|
36
|
-
/**
|
|
37
|
-
|
|
33
|
+
/** Concurrency queue — owns scheduling, limit checks, and drain logic. */
|
|
34
|
+
queue: ConcurrencyQueue;
|
|
38
35
|
getRunConfig?: () => RunConfig;
|
|
39
36
|
observer?: AgentManagerObserver;
|
|
40
37
|
}
|
|
@@ -71,44 +68,35 @@ export class AgentManager {
|
|
|
71
68
|
private readonly observer?: AgentManagerObserver;
|
|
72
69
|
private readonly runner: AgentRunner;
|
|
73
70
|
private readonly worktrees: WorktreeManager;
|
|
74
|
-
private readonly
|
|
71
|
+
private readonly queue: ConcurrencyQueue;
|
|
75
72
|
private getRunConfig?: () => RunConfig;
|
|
76
73
|
|
|
77
|
-
/** Queue of background agent IDs waiting to start. */
|
|
78
|
-
private queue: string[] = [];
|
|
79
|
-
/** Number of currently running background agents. */
|
|
80
|
-
private runningBackground = 0;
|
|
81
74
|
constructor(options: AgentManagerOptions) {
|
|
82
75
|
this.runner = options.runner;
|
|
83
76
|
this.worktrees = options.worktrees;
|
|
77
|
+
this.queue = options.queue;
|
|
84
78
|
this.observer = options.observer;
|
|
85
79
|
this.getRunConfig = options.getRunConfig;
|
|
86
|
-
this._getMaxConcurrent = options.getMaxConcurrent ?? (() => DEFAULT_MAX_CONCURRENT);
|
|
87
80
|
// Cleanup completed agents after 10 minutes (but keep sessions for resume)
|
|
88
81
|
this.cleanupInterval = setInterval(() => this.cleanup(), 60_000);
|
|
89
82
|
this.cleanupInterval.unref();
|
|
90
83
|
}
|
|
91
84
|
|
|
92
|
-
/**
|
|
93
|
-
* Drain the concurrency queue after SettingsManager has updated maxConcurrent.
|
|
94
|
-
* Call this whenever the concurrency limit increases so queued agents can start.
|
|
95
|
-
*/
|
|
96
|
-
notifyConcurrencyChanged(): void {
|
|
97
|
-
this.drainQueue();
|
|
98
|
-
}
|
|
99
|
-
|
|
100
85
|
/** Compose a per-agent lifecycle observer from manager and spawn-config concerns. */
|
|
101
86
|
private buildObserver(options: AgentSpawnConfig): AgentLifecycleObserver {
|
|
102
87
|
return {
|
|
103
88
|
onStarted: (agent) => {
|
|
104
|
-
if (options.isBackground) this.
|
|
89
|
+
if (options.isBackground) this.queue.markStarted();
|
|
105
90
|
this.observer?.onAgentStarted(agent);
|
|
106
91
|
},
|
|
107
92
|
onSessionCreated: options.observer?.onSessionCreated
|
|
108
93
|
? (agent, session) => options.observer!.onSessionCreated!(agent, session)
|
|
109
94
|
: undefined,
|
|
110
95
|
onRunFinished: (agent) => {
|
|
111
|
-
if (options.isBackground)
|
|
96
|
+
if (options.isBackground) {
|
|
97
|
+
this.queue.markFinished();
|
|
98
|
+
try { this.observer?.onAgentCompleted(agent); } catch (err) { debugLog("onAgentCompleted observer", err); }
|
|
99
|
+
}
|
|
112
100
|
},
|
|
113
101
|
onCompacted: (agent, info) => {
|
|
114
102
|
this.observer?.onAgentCompacted(agent, info);
|
|
@@ -156,9 +144,9 @@ export class AgentManager {
|
|
|
156
144
|
this.observer?.onAgentCreated(record);
|
|
157
145
|
}
|
|
158
146
|
|
|
159
|
-
if (options.isBackground && !options.bypassQueue && this.
|
|
147
|
+
if (options.isBackground && !options.bypassQueue && this.queue.isFull()) {
|
|
160
148
|
// Queue it - will be started when a running agent completes
|
|
161
|
-
this.queue.
|
|
149
|
+
this.queue.enqueue(id);
|
|
162
150
|
return id;
|
|
163
151
|
}
|
|
164
152
|
|
|
@@ -166,23 +154,6 @@ export class AgentManager {
|
|
|
166
154
|
return id;
|
|
167
155
|
}
|
|
168
156
|
|
|
169
|
-
/** Decrement background counter, notify observer (crash-safe), and drain the queue. */
|
|
170
|
-
private finalizeBackgroundRun(record: Agent): void {
|
|
171
|
-
this.runningBackground--;
|
|
172
|
-
try { this.observer?.onAgentCompleted(record); } catch (err) { debugLog("onAgentCompleted observer", err); }
|
|
173
|
-
this.drainQueue();
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
/** Start queued agents up to the concurrency limit. */
|
|
177
|
-
private drainQueue() {
|
|
178
|
-
while (this.queue.length > 0 && this.runningBackground < this._getMaxConcurrent()) {
|
|
179
|
-
const id = this.queue.shift()!;
|
|
180
|
-
const record = this.agents.get(id);
|
|
181
|
-
if (record?.status !== "queued") continue;
|
|
182
|
-
record.promise = record.run();
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
|
|
186
157
|
/**
|
|
187
158
|
* Spawn an agent and wait for completion (foreground use).
|
|
188
159
|
* Foreground agents bypass the concurrency queue.
|
|
@@ -201,34 +172,17 @@ export class AgentManager {
|
|
|
201
172
|
|
|
202
173
|
/**
|
|
203
174
|
* Resume an existing agent session with a new prompt.
|
|
175
|
+
* Delegates to Agent.resume(), which owns the observer subscription lifecycle.
|
|
204
176
|
*/
|
|
205
177
|
async resume(
|
|
206
178
|
id: string,
|
|
207
179
|
prompt: string,
|
|
208
180
|
signal?: AbortSignal,
|
|
209
181
|
): Promise<Agent | undefined> {
|
|
210
|
-
const
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
record.resetForResume(Date.now());
|
|
215
|
-
|
|
216
|
-
const unsubResume = subscribeAgentObserver(session, record, {
|
|
217
|
-
onCompact: (r, info) => this.observer?.onAgentCompacted(r, info),
|
|
218
|
-
});
|
|
219
|
-
|
|
220
|
-
try {
|
|
221
|
-
const responseText = await this.runner.resume(session, prompt, {
|
|
222
|
-
signal,
|
|
223
|
-
});
|
|
224
|
-
record.markCompleted(responseText);
|
|
225
|
-
} catch (err) {
|
|
226
|
-
record.markError(err);
|
|
227
|
-
} finally {
|
|
228
|
-
unsubResume();
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
return record;
|
|
182
|
+
const agent = this.agents.get(id);
|
|
183
|
+
if (!agent?.session) return undefined;
|
|
184
|
+
await agent.resume(prompt, signal);
|
|
185
|
+
return agent;
|
|
232
186
|
}
|
|
233
187
|
|
|
234
188
|
getRecord(id: string): Agent | undefined {
|
|
@@ -247,7 +201,7 @@ export class AgentManager {
|
|
|
247
201
|
|
|
248
202
|
// Remove from queue if queued
|
|
249
203
|
if (record.status === "queued") {
|
|
250
|
-
this.queue
|
|
204
|
+
this.queue.dequeue(id);
|
|
251
205
|
record.markStopped();
|
|
252
206
|
return true;
|
|
253
207
|
}
|
|
@@ -295,14 +249,14 @@ export class AgentManager {
|
|
|
295
249
|
abortAll(): number {
|
|
296
250
|
let count = 0;
|
|
297
251
|
// Clear queued agents first
|
|
298
|
-
for (const id of this.queue) {
|
|
252
|
+
for (const id of this.queue.queuedIds) {
|
|
299
253
|
const record = this.agents.get(id);
|
|
300
254
|
if (record) {
|
|
301
255
|
record.markStopped();
|
|
302
256
|
count++;
|
|
303
257
|
}
|
|
304
258
|
}
|
|
305
|
-
this.queue
|
|
259
|
+
this.queue.clear();
|
|
306
260
|
// Abort running agents
|
|
307
261
|
for (const record of this.agents.values()) {
|
|
308
262
|
if (record.abort()) count++;
|
|
@@ -313,11 +267,11 @@ export class AgentManager {
|
|
|
313
267
|
/** Wait for all running and queued agents to complete (including queued ones). */
|
|
314
268
|
// fallow-ignore-next-line unused-class-member
|
|
315
269
|
async waitForAll(): Promise<void> {
|
|
316
|
-
// Loop because
|
|
270
|
+
// Loop because queue.drain() respects the concurrency limit - as running
|
|
317
271
|
// agents finish they start queued ones, which need awaiting too.
|
|
318
272
|
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- intentional infinite loop with explicit break
|
|
319
273
|
while (true) {
|
|
320
|
-
this.
|
|
274
|
+
this.queue.drain();
|
|
321
275
|
const pending = [...this.agents.values()]
|
|
322
276
|
.filter(r => r.status === "running" || r.status === "queued")
|
|
323
277
|
.map(r => r.promise)
|
|
@@ -330,7 +284,7 @@ export class AgentManager {
|
|
|
330
284
|
dispose() {
|
|
331
285
|
clearInterval(this.cleanupInterval);
|
|
332
286
|
// Clear queue
|
|
333
|
-
this.queue
|
|
287
|
+
this.queue.clear();
|
|
334
288
|
for (const record of this.agents.values()) {
|
|
335
289
|
record.session?.dispose();
|
|
336
290
|
}
|
package/src/lifecycle/agent.ts
CHANGED
|
@@ -250,6 +250,7 @@ export class Agent {
|
|
|
250
250
|
this.setupWorktree();
|
|
251
251
|
} catch (err) {
|
|
252
252
|
this.markError(err);
|
|
253
|
+
this.releaseListeners();
|
|
253
254
|
this.observer?.onRunFinished?.(this);
|
|
254
255
|
return;
|
|
255
256
|
}
|
|
@@ -285,6 +286,39 @@ export class Agent {
|
|
|
285
286
|
}
|
|
286
287
|
}
|
|
287
288
|
|
|
289
|
+
/**
|
|
290
|
+
* Resume an existing session with a new prompt, managing the observer
|
|
291
|
+
* subscription lifecycle internally (same wiring as run()).
|
|
292
|
+
*
|
|
293
|
+
* Requires runner and an existing session (set when the original run created it).
|
|
294
|
+
* The returned promise always resolves (errors are captured internally).
|
|
295
|
+
* The parent signal flows straight through to runner.resume — resume does not
|
|
296
|
+
* route through this.abortController.
|
|
297
|
+
*/
|
|
298
|
+
async resume(prompt: string, signal?: AbortSignal): Promise<void> {
|
|
299
|
+
if (!this._runner) {
|
|
300
|
+
throw new Error("Agent not configured for execution — missing runner");
|
|
301
|
+
}
|
|
302
|
+
const session = this.session;
|
|
303
|
+
if (!session) {
|
|
304
|
+
throw new Error("Agent not configured for resume — missing session");
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
this.resetForResume(Date.now());
|
|
308
|
+
this.attachObserver(subscribeAgentObserver(session, this, {
|
|
309
|
+
onCompact: (r, info) => this.observer?.onCompacted?.(r, info),
|
|
310
|
+
}));
|
|
311
|
+
|
|
312
|
+
try {
|
|
313
|
+
const responseText = await this._runner.resume(session, prompt, { signal });
|
|
314
|
+
this.markCompleted(responseText);
|
|
315
|
+
} catch (err) {
|
|
316
|
+
this.markError(err);
|
|
317
|
+
} finally {
|
|
318
|
+
this.releaseListeners();
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
288
322
|
/** Increment tool use count. Called by record-observer on tool_execution_end. */
|
|
289
323
|
incrementToolUses(): void {
|
|
290
324
|
this._toolUses++;
|
|
@@ -363,7 +397,7 @@ export class Agent {
|
|
|
363
397
|
/**
|
|
364
398
|
* Abort a running agent: fire AbortController and transition to stopped.
|
|
365
399
|
* Returns false if the agent is not running.
|
|
366
|
-
* Queue removal
|
|
400
|
+
* Queue removal is handled by AgentManager via ConcurrencyQueue.dequeue().
|
|
367
401
|
*/
|
|
368
402
|
abort(): boolean {
|
|
369
403
|
if (this._status !== "running") return false;
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* concurrency-queue.ts — Manages background agent scheduling with a configurable concurrency limit.
|
|
3
|
+
*
|
|
4
|
+
* Stores agent IDs (not full agent objects) and decides *when* to start them.
|
|
5
|
+
* The startAgent callback provided at construction handles the actual agent lifecycle.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export class ConcurrencyQueue {
|
|
9
|
+
private queue: string[] = [];
|
|
10
|
+
private running = 0;
|
|
11
|
+
|
|
12
|
+
constructor(
|
|
13
|
+
private readonly getMaxConcurrent: () => number,
|
|
14
|
+
private readonly startAgent: (id: string) => void,
|
|
15
|
+
) {}
|
|
16
|
+
|
|
17
|
+
/** Whether the concurrency limit has been reached. */
|
|
18
|
+
isFull(): boolean {
|
|
19
|
+
return this.running >= this.getMaxConcurrent();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Add an agent ID to the wait queue. */
|
|
23
|
+
enqueue(id: string): void {
|
|
24
|
+
this.queue.push(id);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Remove an agent ID from the queue (e.g., aborted before starting). Returns true if found. */
|
|
28
|
+
dequeue(id: string): boolean {
|
|
29
|
+
const idx = this.queue.indexOf(id);
|
|
30
|
+
if (idx === -1) return false;
|
|
31
|
+
this.queue.splice(idx, 1);
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/** Increment the running count. Called when an agent transitions to running. */
|
|
36
|
+
markStarted(): void {
|
|
37
|
+
this.running++;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Decrement the running count and drain the queue. Called when a background agent finishes. */
|
|
41
|
+
markFinished(): void {
|
|
42
|
+
this.running--;
|
|
43
|
+
this.drain();
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Start queued agents until the concurrency limit is reached. */
|
|
47
|
+
drain(): void {
|
|
48
|
+
while (this.queue.length > 0 && !this.isFull()) {
|
|
49
|
+
const id = this.queue.shift()!;
|
|
50
|
+
this.startAgent(id);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Snapshot of queued IDs for iteration (e.g., abortAll). */
|
|
55
|
+
get queuedIds(): readonly string[] {
|
|
56
|
+
return this.queue;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Clear the queue without starting any agents. */
|
|
60
|
+
clear(): void {
|
|
61
|
+
this.queue = [];
|
|
62
|
+
}
|
|
63
|
+
}
|