npm - @gotgenes/pi-subagents - Versions diffs - 16.0.0 → 16.1.0 - Mend

@gotgenes/pi-subagents 16.0.0 → 16.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md +7 -0
package/docs/architecture/architecture.md +5 -5
package/docs/plans/0381-replace-concurrency-queue-with-limiter.md +267 -0
package/docs/retro/0381-replace-concurrency-queue-with-limiter.md +49 -0
package/docs/retro/0400-include-parent-prompt-in-replace-mode.md +40 -0
package/package.json +1 -1
package/src/index.ts +8 -15
package/src/lifecycle/concurrency-limiter.ts +55 -0
package/src/lifecycle/subagent-manager.ts +38 -35
package/src/lifecycle/subagent.ts +2 -1
package/src/lifecycle/concurrency-queue.ts +0 -63

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [16.1.0](https://github.com/gotgenes/pi-packages/compare/pi-subagents-v16.0.0...pi-subagents-v16.1.0) (2026-06-14)
+### Features
+* **pi-subagents:** add ConcurrencyLimiter ([#381](https://github.com/gotgenes/pi-packages/issues/381)) ([26f4203](https://github.com/gotgenes/pi-packages/commit/26f420337094d81d39bcc3e0522e12262c7767b7))
 ## [16.0.0](https://github.com/gotgenes/pi-packages/compare/pi-subagents-v15.0.2...pi-subagents-v16.0.0) (2026-06-14)

package/docs/architecture/architecture.md CHANGED Viewed

@@ -53,7 +53,7 @@ flowchart TB
     subgraph lifecycle["Lifecycle domain"]
         direction TB
         SubagentManager["SubagentManager<br/>(spawn, abort, collection)"]
-        ConcurrencyQueue["ConcurrencyQueue<br/>(scheduling, drain)"]
+        ConcurrencyLimiter["ConcurrencyLimiter<br/>(thunk admission gate)"]
         CreateSubagentSession["createSubagentSession<br/>(assembly factory)"]
         SubagentSession["SubagentSession<br/>(turn loop, steer, dispose)"]
         Subagent["Subagent<br/>(status, behavior: abort/steer/run lifecycle)"]
@@ -283,7 +283,7 @@ src/
 │   ├── subagent-session.ts         born-complete child session: turn loop, steer, dispose
 │   ├── turn-limits.ts              normalizeMaxTurns (turn-count policy)
 │   ├── subagent.ts                 owns full execution lifecycle (run, abort, steer, workspace)
-│   ├── concurrency-queue.ts        background agent scheduling with configurable concurrency limit
+│   ├── concurrency-limiter.ts       background admission gate: schedules run thunks FIFO against the limit
 │   ├── parent-snapshot.ts          immutable spawn-time parent state
 │   ├── child-lifecycle.ts          child-execution lifecycle event publisher
 │   ├── workspace.ts                workspace provider seam (generative extension surface)
@@ -360,7 +360,7 @@ They declare this package as an optional peer dependency and use dynamic import
 - The three tools: `subagent` (née `Agent`), `get_subagent_result`, `steer_subagent`.
 - `SubagentManager` — spawn, abort, resume, collection management, observer wiring.
-- `ConcurrencyQueue` — background agent scheduling with configurable concurrency limit.
+- `ConcurrencyLimiter` — background admission gate: schedules run thunks FIFO against a configurable concurrency limit.
 - `createSubagentSession` — assembly factory: session creation and extension binding; returns a born-complete `SubagentSession`.
 - `SubagentSession` — the born-complete child session: drives the turn loop (`runTurnLoop`/`resumeTurnLoop`), steers, and disposes (firing `disposed` at true session disposal, so resume executions are registry-detected).
 - `child-lifecycle` — publishes the child-execution lifecycle (`spawning`, `session-created` before `bindExtensions()`, `completed`, `disposed`) on `pi.events`.
@@ -905,7 +905,7 @@ Priority = Impact × (6 − Risk).
 | 8    | Consolidate UI and tools test fixtures                                               | D        | 2      | 1    | 10       |
 | 9    | Resolve the cross-package settings-loader duplication                                | A        | 2      | 2    | 8        |
-#### Step 1 — Replace ConcurrencyQueue with a thunk-based ConcurrencyLimiter ([#381])
+#### Step 1 — Replace ConcurrencyQueue with a thunk-based ConcurrencyLimiter ([#381]) ✅ Complete
 - Targets: `src/lifecycle/concurrency-queue.ts` (→ `concurrency-limiter.ts`), `src/lifecycle/subagent-manager.ts`, `src/index.ts`, `test/lifecycle/concurrency-queue.test.ts`, `test/lifecycle/subagent-manager.test.ts`.
 - Smell: Category C (forward references: the queue's ID-registry design forces a start callback that reaches back into the manager, duplicated between `index.ts` and the test helper) and Category A (dual counting: the queue's `running` counter is fed by `markStarted`/`markFinished` relays in the manager's observer, mirroring state the agents already carry).
@@ -958,7 +958,7 @@ Priority = Impact × (6 − Risk).
 #### Step 7 — Consolidate lifecycle test fixtures ([#378])
-- Targets: `test/lifecycle/subagent-manager.test.ts` (766 LOC), `test/lifecycle/subagent.test.ts`, `test/lifecycle/subagent-session.test.ts`, `test/lifecycle/create-subagent-session.test.ts`, `test/lifecycle/create-subagent-session-extension-tools.test.ts`, `test/lifecycle/concurrency-queue.test.ts`, `test/helpers/`.
+- Targets: `test/lifecycle/subagent-manager.test.ts` (766 LOC), `test/lifecycle/subagent.test.ts`, `test/lifecycle/subagent-session.test.ts`, `test/lifecycle/create-subagent-session.test.ts`, `test/lifecycle/create-subagent-session-extension-tools.test.ts`, `test/lifecycle/concurrency-limiter.test.ts`, `test/helpers/`.
 - Smell: Category D — fallow reports five clone families across the lifecycle tests.
 - Change: extract the repeated spawn/run/factory arrangements into shared helpers, migrating incrementally (lift-and-shift, never a single-step rewrite of a large test file).
 - Outcome: lifecycle clone families 5 → ≤ 1; package test duplication below 600 lines.

package/docs/plans/0381-replace-concurrency-queue-with-limiter.md ADDED Viewed

@@ -0,0 +1,267 @@
+---
+issue: 381
+issue_title: "Replace ConcurrencyQueue with a thunk-based ConcurrencyLimiter"
+---
+# Replace ConcurrencyQueue with a thunk-based ConcurrencyLimiter
+## Problem Statement
+The `ConcurrencyQueue` stores background-agent IDs and decides *when* to start them, but it cannot start an agent itself.
+It compensates with a `startAgent(id)` callback that reaches back into the manager (`getRecord(id)`, status check, `run()`) — a dependency back-edge that forces forward-referenced bindings in both `index.ts` and the manager test helper.
+The queue also keeps its own `running` counter, fed by `markStarted`/`markFinished` relays in the manager's observer, duplicating state the agents already carry.
+A queued agent has `promise === undefined` until the queue starts it, which is the direct cause of `waitForAll`'s `while (true)` drain loop and its `eslint-disable`.
+These are three symptoms of one root cause: the queue schedules *identifiers it cannot act on* instead of *work it can run*.
+Scheduling thunks (`() => Promise<void>`) instead of IDs dissolves all three at the source.
+This is Phase 17 Step 1 (core consolidation), recorded in `docs/architecture/architecture.md` under "Improvement roadmap (Phase 17 — core consolidation)".
+It unblocks Phase 17 Step 3 ([#374], run-start encapsulation).
+## Goals
+- Replace `ConcurrencyQueue` (ID registry + back-edge callback) with a `ConcurrencyLimiter` that schedules run closures FIFO against a dynamic limit and knows nothing about agents, IDs, or the manager.
+- Make the dependency direction strictly `SubagentManager → ConcurrencyLimiter`: no callback back-edge, no forward-referenced bindings.
+- Derive the active count from the limiter's own task lifecycle (increment on task start, decrement on settle); delete the observer's `markStarted`/`markFinished` relays.
+- Give every spawned agent a real `promise` at spawn time, collapsing `waitForAll`'s `while (true)` drain loop and its `eslint-disable`.
+- This is a non-breaking internal refactor: the FIFO admission behavior against `maxConcurrent` is preserved, and no public API, config key, or observable behavior changes.
+## Non-Goals
+- Renaming the `bypassQueue` spawn option.
+  It is part of the published `SubagentsService` type surface (`src/service/service.ts`), so renaming it would churn the type bundle and break consumers — out of scope; track in Open Questions.
+- Folding the queued-status guard into `Subagent.start()` — that is Phase 17 Step 3 ([#374]).
+  This plan keeps the guard inside the scheduled thunk.
+- Extracting `SubagentState` or making execution deps mandatory ([#373], Step 2).
+- Any change to foreground execution (`spawnAndWait`) or to `bypassQueue` runs — both continue to invoke `record.run()` directly, never touching the limiter.
+- Touching `src/service/service.ts` or `src/service/service-adapter.ts` — `bypassQueue` flows through unchanged.
+## Background
+Relevant modules:
+- `src/lifecycle/concurrency-queue.ts` — the current `ConcurrencyQueue`: `isFull`, `enqueue`, `dequeue`, `markStarted`, `markFinished`, `drain`, `clear`, `queuedIds`.
+  Stores IDs; `drain()` calls the injected `startAgent(id)` back-edge.
+- `src/lifecycle/subagent-manager.ts` — injects the queue via `SubagentManagerOptions.queue`.
+  `buildObserver` relays `markStarted`/`markFinished`; `spawn` enqueues when `isFull()`; `abort` calls `dequeue`; `abortAll` iterates `queuedIds` + `clear()`; `waitForAll` loops `drain()` + `Promise.allSettled`; `dispose` calls `clear()`.
+- `src/index.ts` — constructs the queue with a `startAgent` callback that forward-references the manager (`manager.getRecord(id)` then `agent.run()`); wires `settings.onMaxConcurrentChanged` to `queue.drain()`.
+- `src/lifecycle/subagent.ts` — `run()` sets status to `running` synchronously (`markRunning`) before its first `await`; `run()` always resolves (errors captured internally).
+  `abort()` acts only on `running` agents; its docstring references `ConcurrencyQueue.dequeue()`.
+- `test/lifecycle/subagent-manager.test.ts` — `createManager` helper replicates the `index.ts` start callback with a `prefer-const` `eslint-disable` for the forward reference.
+- `test/lifecycle/concurrency-queue.test.ts` — unit tests for the queue (drain ordering, `markStarted`/`markFinished` counting, `enqueue`/`dequeue`).
+Constraints from AGENTS.md and skills:
+- ES2024 `Promise.withResolvers` is available and preferred (`code-design` skill).
+- The `bypassQueue` field lives in the public type bundle (`exports`, `verify:public-types`); renaming public surface is breaking (`package-pi-subagents` skill).
+- `@typescript-eslint/require-await` is enabled for `src/`; a thunk with no `await` must return a `Promise` without `async`.
+- Where the old `drain()` used `while (… && !isFull())` with `this.queue.shift()!`, prefer a bounded loop without a non-null assertion (`code-design` Biome/ESLint notes).
+The current observer-relay path (`buildObserver` → `queue.markStarted`/`markFinished`) confirmed: the queue's `running` counter mirrors the per-agent status the manager already tracks (the manager filters on `status === "running" || "queued"` in `cleanup`, `clearCompleted`, `hasRunning`, `waitForAll`).
+No production caller awaits a *queued* agent's promise (`get-result-tool.ts` guards on `status === "running"`; `spawnAndWait` is foreground; `waitForAll` filters by status), so giving queued agents a settled-on-completion promise is safe.
+## Design Overview
+### `ConcurrencyLimiter`
+A pure FIFO scheduler over thunks.
+It owns the active count and the pending queue; it has no knowledge of agents, IDs, or the manager.
+```typescript
+export class ConcurrencyLimiter {
+	private active = 0;
+	private readonly pending: Array<{ start: () => void; settle: () => void }> = [];
+	constructor(private readonly getLimit: () => number) {}
+	/**
+	 * Schedule a task to run FIFO once a slot is free.
+	 * The returned promise always settles: it follows the task's settlement when
+	 * the task runs, or resolves early if clear() drops it before it starts.
+	 */
+	schedule(task: () => Promise<void>): Promise<void> {
+		const { promise, resolve, reject } = Promise.withResolvers<void>();
+		this.pending.push({
+			start: () => {
+				this.active++;
+				task().then(resolve, reject).finally(() => {
+					this.active--;
+					this.recheck();
+				});
+			},
+			settle: resolve,
+		});
+		this.recheck();
+		return promise;
+	}
+	/** Start pending tasks until the limit is reached. Call when the limit may have grown. */
+	recheck(): void {
+		while (this.active < this.getLimit()) {
+			const next = this.pending.shift();
+			if (!next) break;
+			next.start();
+		}
+	}
+	/** Drop all pending tasks, resolving their promises without running them. */
+	clear(): void {
+		const dropped = this.pending.splice(0);
+		for (const task of dropped) task.settle();
+	}
+}
+```
+Design decisions:
+- **Active count derived from task lifecycle.**
+  `active++` happens synchronously inside `start()` before the task's first `await`; `active--` runs in `finally`.
+  This replaces the queue's `running` counter and the two observer relays.
+- **`recheck()` is bounded.**
+  The loop terminates when the limit is reached or the pending queue empties — no `while (true)`, no `this.pending.shift()!` non-null assertion.
+- **`clear()` settles dropped promises.**
+  Every `schedule()` promise becomes `record.promise`; the contract is that it always settles.
+  Dropping a thunk without resolving would leave a forever-pending `record.promise`.
+  `clear()` resolves dropped tasks so `dispose()`/`abortAll()` cannot strand a promise. (This is a few lines beyond the issue's "~40 lines" sketch; the extra `settle` handle is the deliberate cost of that invariant.)
+- **Synchronous start.**
+  When a slot is free, `schedule()` runs the thunk synchronously inside `recheck()`, so `record.run()` executes its synchronous prefix (`markRunning`) immediately — preserving today's behavior where `record.promise = record.run()` flips status to `running` at once.
+### Manager spawn call site
+```typescript
+// spawn(), background and not bypassQueue:
+record.promise = this.limiter.schedule(() => {
+	// Guard: an abort-while-queued task is a no-op (Step 3 folds this into Subagent.start()).
+	if (record.status !== "queued") return Promise.resolve();
+	return record.run();
+});
+// foreground or bypassQueue:
+record.promise = record.run();
+```
+This is Tell-Don't-Ask toward the limiter: the manager hands it work, the limiter decides timing.
+The status guard replaces `dequeue` — an aborted queued agent (status `stopped`) becomes a no-op when its slot finally opens.
+### Manager lifecycle methods
+- `buildObserver` — drop the `markStarted` (in `onStarted`) and `markFinished` (in `onRunFinished`) relays; `onRunFinished` keeps the background `onSubagentCompleted` dispatch.
+- `abort(id)` — for a `queued` agent, just `record.markStopped()` (no `dequeue`); otherwise `record.abort()`.
+- `abortAll()` — iterate agents: `markStopped()` each `queued` agent (count it), else `record.abort()`; then `this.limiter.clear()` to drop pending thunks (their promises resolve).
+- `waitForAll()` — every spawned agent has a `promise`, so the manual `drain()` loop collapses:
+  ```typescript
+  async waitForAll(): Promise<void> {
+   let pending = this.pendingPromises();
+   while (pending.length > 0) {
+    await Promise.allSettled(pending);
+    pending = this.pendingPromises();
+   }
+  }
+  private pendingPromises(): Promise<void>[] {
+   return [...this.agents.values()]
+    .filter(r => r.status === "running" || r.status === "queued")
+    .map(r => r.promise)
+    .filter((p): p is Promise<void> => p != null);
+  }
+  ```
+  The re-check loop is no longer `while (true)` and no longer drives scheduling — the limiter auto-starts queued agents as slots free, so a single `allSettled` covers the queued case.
+  The loop survives only to catch agents spawned *during* the wait.
+  The `eslint-disable @typescript-eslint/no-unnecessary-condition` is deleted.
+- `dispose()` — `this.limiter.clear()` (unchanged in intent).
+### `index.ts` wiring
+```typescript
+const settings = new SettingsManager({
+	// …
+	onMaxConcurrentChanged: () => limiter.recheck(), // forward-ref closure (settings → limiter); benign
+});
+settings.load();
+// …
+const limiter = new ConcurrencyLimiter(() => settings.maxConcurrent);
+const manager = new SubagentManager({ /* … */ limiter, /* … */ });
+```
+The only surviving forward reference is `settings → limiter` (a runtime-only closure, the same shape as today's `settings → queue.drain`).
+The `limiter → manager` back-edge (the `startAgent` callback and its explanatory comment) is **deleted entirely** — that is the structural win.
+### Edge cases
+- **Abort while queued** — `markStopped()` flips status; the scheduled thunk, when run, returns `Promise.resolve()` (no-op), settling `record.promise`.
+- **Limit decreased below active count** — `recheck()` simply starts nothing (`active < getLimit()` is false); in-flight tasks finish normally.
+- **Limit increased** — `onMaxConcurrentChanged → limiter.recheck()` starts newly-admissible pending tasks.
+- **`clear()` with in-flight tasks** — only *pending* tasks are dropped; running tasks complete and `active--` on settle.
+## Module-Level Changes
+| File                                         | Change                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| -------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `src/lifecycle/concurrency-limiter.ts`       | Add — new `ConcurrencyLimiter` (`schedule`, `recheck`, `clear`).                                                                                                                                                                                                                                                                                                                                                                                                              |
+| `src/lifecycle/concurrency-queue.ts`         | Remove — replaced by the limiter.                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| `src/lifecycle/subagent-manager.ts`          | Change — import limiter; `SubagentManagerOptions.queue` → `limiter: ConcurrencyLimiter` and the private field; drop `markStarted`/`markFinished` from `buildObserver`; `spawn` schedules a status-guarded thunk; `abort` drops `dequeue`; `abortAll` iterates agents + `limiter.clear()`; `waitForAll` simplified (add `pendingPromises` helper, delete the `while (true)` loop and its `eslint-disable`); `dispose` calls `limiter.clear()`; update the file-header comment. |
+| `src/lifecycle/subagent.ts`                  | Change — `abort()` docstring: remove the `ConcurrencyQueue.dequeue()` reference (queue removal is now a status-guard no-op).                                                                                                                                                                                                                                                                                                                                                  |
+| `src/index.ts`                               | Change — import `ConcurrencyLimiter`; construct it as `new ConcurrencyLimiter(() => settings.maxConcurrent)`; `onMaxConcurrentChanged: () => limiter.recheck()`; delete the `startAgent` callback and its forward-ref comment; inject `limiter` into the manager.                                                                                                                                                                                                             |
+| `test/lifecycle/concurrency-limiter.test.ts` | Add — limiter unit tests (no `startAgent` mock).                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| `test/lifecycle/concurrency-queue.test.ts`   | Remove — the queue is gone.                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| `test/lifecycle/subagent-manager.test.ts`    | Change — `createManager` constructs a `ConcurrencyLimiter`; delete the forward-ref `let mgr` + `prefer-const` `eslint-disable`; drop the unused `queue` field from the returned object.                                                                                                                                                                                                                                                                                       |
+| `docs/architecture/architecture.md`          | Change — Mermaid lifecycle node (`ConcurrencyQueue<br/>(scheduling, drain)` → `ConcurrencyLimiter<br/>(thunk admission gate)`); layout listing (`concurrency-queue.ts` → `concurrency-limiter.ts`); "What the core owns" bullet; mark roadmap Step 1 done; fix the Step 7 ([#378]) target filename reference.                                                                                                                                                                 |
+| `.pi/skills/package-pi-subagents/SKILL.md`   | Change — lifecycle-domain table: `concurrency-queue.ts` → `concurrency-limiter.ts` and adjust the "scheduling" wording to "concurrency admission".                                                                                                                                                                                                                                                                                                                            |
+Verified by grep that no other `src/`, `test/`, `docs/` (excluding `docs/architecture/history/` and prior plans/retros, which are historical), or `.pi/skills/` file references `ConcurrencyQueue`, `concurrency-queue`, `enqueue`, `dequeue`, `markStarted`/`markFinished` (queue), `drain`, `isFull`, or `queuedIds` for this queue.
+`SKILL.md` line 80 (Phase 15 history) keeps `ConcurrencyQueue` — it is a historical record, not current state.
+## Test Impact Analysis
+1. **New tests the change enables.**
+   `ConcurrencyLimiter` is a pure thunk scheduler with no agent/manager knowledge, so it is unit-testable with plain `() => Promise<void>` tasks and `Promise.withResolvers` gates — no `startAgent` mock, no re-entrant `markStarted` simulation.
+   New coverage: FIFO start order; slot gating (only `limit` tasks run concurrently); `active` decrement frees a slot for the next pending task on settle; `recheck()` starts newly-admissible tasks when the limit grows; dynamic limit re-evaluation; `clear()` resolves pending promises without running their tasks; a task that rejects still frees its slot.
+2. **Tests that become redundant.**
+   The entire `test/lifecycle/concurrency-queue.test.ts` (`isFull`, `enqueue`/`dequeue`, `markStarted`/`markFinished`, `drain`, auto-drain, `clear`, `queuedIds`) — those methods no longer exist; the limiter tests replace them at a cleaner seam.
+3. **Tests that stay as-is (genuinely exercise the layer).**
+   The `SubagentManager — queueing and concurrency with injected stubs` describe block asserts manager-level behavior (queued → running transition order, abort-while-queued never runs the factory, `onSubagentStarted` fires on the queued → running transition).
+   These remain valid against the manager + limiter integration and need only the `createManager` helper change (construct a `ConcurrencyLimiter`), not a behavioral rewrite.
+   The `clearCompleted does not remove running or queued agents` test (maxConcurrent=1, blocking factory) also stays.
+## TDD Order
+Priority = preparatory addition first, then the atomic interface swap, then docs.
+1. **Add `ConcurrencyLimiter` (red → green).**
+   Surface: new `test/lifecycle/concurrency-limiter.test.ts` against new `src/lifecycle/concurrency-limiter.ts`.
+   Covers FIFO start order, slot gating, `active`-frees-slot-on-settle, `recheck()` on limit growth, dynamic limit, `clear()` resolves pending without running, reject-frees-slot.
+   Pure addition — `ConcurrencyQueue` still exists and its tests still pass; the suite stays green.
+   Commit: `feat(pi-subagents): add ConcurrencyLimiter (#381)`.
+2. **Migrate `SubagentManager`, `index.ts`, and the manager test helper to the limiter; delete the queue (red → green).**
+   Surface: `src/lifecycle/subagent-manager.ts`, `src/index.ts`, `src/lifecycle/subagent.ts` (docstring), `test/lifecycle/subagent-manager.test.ts`, and deletion of `src/lifecycle/concurrency-queue.ts` + `test/lifecycle/concurrency-queue.test.ts`.
+   This is one atomic commit: changing `SubagentManagerOptions.queue` → `limiter` breaks both call sites (`index.ts` and the test helper) at the type level simultaneously, and the old test file imports the deleted source — all must land together.
+   Drop the observer relays, the `dequeue`/`drain`/`isFull`/`queuedIds` usage, the `while (true)` loop + its `eslint-disable`, and the test helper's forward-ref `eslint-disable`.
+   Run `pnpm run check` immediately after (shared-interface change with multiple call sites), then the full `pnpm --filter @gotgenes/pi-subagents exec vitest run` (the queueing/concurrency integration tests must still pass).
+   Commit: `refactor(pi-subagents): replace ConcurrencyQueue with thunk-based ConcurrencyLimiter (#381)`.
+3. **Update architecture doc and package skill (docs).**
+   Surface: `docs/architecture/architecture.md` (Mermaid node, layout listing, "What the core owns" bullet, roadmap Step 1 marked done, Step 7 filename reference) and `.pi/skills/package-pi-subagents/SKILL.md` (lifecycle-domain table entry + wording).
+   Commit: `docs(pi-subagents): update architecture and skill for ConcurrencyLimiter (#381)`.
+## Risks and Mitigations
+| Risk                                                                   | Mitigation                                                                                                                                                                                                    |
+| ---------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| A dropped pending thunk leaves `record.promise` forever pending.       | `clear()` resolves dropped tasks' promises; the limiter's contract is that every `schedule()` promise settles.                                                                                                |
+| `waitForAll` could spin or miss queued agents.                         | Queued agents now carry real promises, so a single `Promise.allSettled` covers them; the bounded re-check loop only catches agents spawned during the wait, and terminates when `pendingPromises()` is empty. |
+| An abort-while-queued no-op thunk briefly occupies a slot.             | The thunk returns a synchronously-resolved promise; `active++`/`active--` round-trip in one microtask and `recheck()` immediately pulls the next task — negligible.                                           |
+| Renaming the file/class leaves stale references.                       | Grep-verified inventory in Module-Level Changes; the migration deletes the source and its test in the same commit; docs updated in step 3.                                                                    |
+| `bypassQueue` public-surface name now slightly misnames the mechanism. | Out of scope (breaking); recorded in Open Questions.                                                                                                                                                          |
+## Open Questions
+- Should `bypassQueue` be renamed (e.g. `bypassLimiter`) for accuracy?
+  It is public type surface, so a rename is breaking and belongs in its own change — defer.
+- Should the `code-design` "narrow interface, not concrete class" guidance be applied to the manager's `limiter` field (typed as `{ schedule; clear }` rather than the concrete `ConcurrencyLimiter`)?
+  Tests construct a real limiter (it is pure and trivially constructible), so no mock-cast pressure exists today; keep the concrete type to match the issue and existing pattern, and revisit only if a test needs to substitute it.
+[#373]: https://github.com/gotgenes/pi-packages/issues/373
+[#374]: https://github.com/gotgenes/pi-packages/issues/374
+[#378]: https://github.com/gotgenes/pi-packages/issues/378

package/docs/retro/0381-replace-concurrency-queue-with-limiter.md ADDED Viewed

@@ -0,0 +1,49 @@
+---
+issue: 381
+issue_title: "Replace ConcurrencyQueue with a thunk-based ConcurrencyLimiter"
+---
+# Retro: #381 — Replace ConcurrencyQueue with a thunk-based ConcurrencyLimiter
+## Stage: Planning (2026-06-13T00:00:00Z)
+### Session summary
+Produced a 3-step TDD plan to replace the ID-registry `ConcurrencyQueue` (with its `startAgent` back-edge and `markStarted`/`markFinished` relays) with a pure `ConcurrencyLimiter` that schedules thunks FIFO against a dynamic limit.
+The design follows the architecture doc's Phase 17 Step 1 entry and the issue's revised framing closely; the plan adds concrete code sketches for `schedule`/`recheck`/`clear`, the manager call site, the simplified `waitForAll`, and `index.ts` wiring.
+### Observations
+- Author is `gotgenes` (matches the gh CLI user), so the well-specified proposal was treated as the working hypothesis; the design is unambiguous (down to the architecture-doc Step 1), so the `ask_user` gate was skipped.
+- Classified non-breaking: `ConcurrencyQueue`/`ConcurrencyLimiter` are internal — no public API, config, or observable behavior change.
+  The FIFO admission gate against `maxConcurrent` is preserved.
+- Key design decision beyond the issue sketch: `clear()` must *settle* dropped pending promises (resolve them), not just drop the thunks.
+  Every `schedule()` promise becomes `record.promise`, and the post-spawn contract is that it always settles — dropping without resolving would strand a promise.
+  This costs a small `settle` handle per pending entry (a few lines beyond the issue's "~40 lines").
+- Verified no production caller awaits a *queued* agent's promise in a blocking way (`get-result-tool.ts` guards on `status === "running"`; `spawnAndWait` is foreground/direct; `waitForAll` filters by status), confirming it is safe to give queued agents a real promise.
+- Sequencing decision: the `SubagentManagerOptions.queue` → `limiter` swap breaks both call sites (`index.ts` + the manager test helper) and the old test file imports the deleted source, so step 2 is one atomic commit (migrate consumers + delete queue + delete old test).
+- `bypassQueue` is kept as-is — it is in the published `SubagentsService` type bundle, so renaming would be breaking; deferred to Open Questions.
+- Doc inventory: grep confirmed current-state references to update are the Mermaid lifecycle node, the layout listing, the "What the core owns" bullet, the Step 7 ([#378]) target filename, and the `package-pi-subagents` SKILL lifecycle-domain table.
+  `SKILL.md` line 80 (Phase 15 history) keeps `ConcurrencyQueue` as a historical record.
+## Stage: Implementation — TDD (2026-06-13T22:15:00Z)
+### Session summary
+Executed all 3 planned TDD cycles: (1) added `ConcurrencyLimiter` + 13 unit tests, (2) migrated `SubagentManager`, `index.ts`, `subagent.ts` docstring, and the manager test helper to the limiter while deleting `concurrency-queue.ts` + its test in the same atomic commit, (3) updated `architecture.md` and the package SKILL.
+Test count went 975 → 966 (−22 deleted queue tests, +13 new limiter tests); the full suite, `check`, `lint`, and `pnpm fallow dead-code` are all green.
+### Observations
+- The plan held up cleanly — no surprises in the manager integration tests.
+  The `queueing and concurrency` describe block passed unchanged after only the `createManager` helper swap (real `ConcurrencyLimiter` instead of `ConcurrencyQueue` + forward-ref start callback), confirming those tests exercise behavior, not queue internals.
+- One deviation: a 4th commit (`90135005`, `refactor:`) fixes a stale `// before startAgent / queue drain` comment at `src/index.ts:125` that the plan's grep inventory missed (it named no removed symbol, just deleted concepts).
+  The pre-completion reviewer caught it.
+  Committed separately rather than amending the non-HEAD refactor commit, since AGENTS.md discourages interactive rebase in this environment.
+- ESLint `@typescript-eslint/no-floating-promises` fired on every bare `limiter.schedule(...)` in the limiter test (the queue's `enqueue` returned `void`; `schedule` returns a promise).
+  Resolved by prefixing unawaited calls with `void` — all such tasks either stay pending or resolve, so no unhandled rejection.
+- The `clear()`-settles-pending-promises decision (made at planning) proved correct and is covered by a dedicated test ("resolves the promises of dropped pending tasks").
+- Pre-completion reviewer: WARN (no FAILs).
+  Reviewer warnings: the single stale-comment finding at `index.ts:125` — now fixed in commit `90135005`.
+[#378]: https://github.com/gotgenes/pi-packages/issues/378

package/docs/retro/0400-include-parent-prompt-in-replace-mode.md CHANGED Viewed

@@ -41,4 +41,44 @@ Test count went from 973 to 975 (+2 net new tests) across 59 test files.
 - Pre-completion reviewer: WARN — one finding: `.pi/skills/package-pi-subagents/SKILL.md` still said "prepends" for the `<active_agent>` tag; fixed in a follow-up `docs:` commit before shipping.
 - No deviations from the plan's Module-Level Changes list; no lockfile changes; fallow dead-code exited zero.
+## Stage: Final Retrospective (2026-06-14T01:11:10Z)
+### Session summary
+Shipped #400 across three stages (Planning on `claude-opus-4-8`, TDD + Ship on `claude-sonnet-4-6`) as a single-function edit to `buildAgentPrompt()`'s replace branch plus tests and doc updates, released as `pi-subagents` v16.0.0 (major, breaking `perf!:`).
+The run was clean end-to-end: two `ask_user` gates during planning, a 3-cycle TDD pass, one pre-completion WARN resolved before push, and a no-friction release-please merge.
+### Observations
+#### What went well
+- Cross-extension investigation on demand — when the operator asked mid-`ask_user` how the `genericBase` fallback interacts with `@gotgenes/pi-anthropic-auth`, the agent read that sibling repo's `system-prompt-shaping.ts` and `request-shaping.ts` and proved no new interaction (billing header prepended unconditionally; de-fingerprinting keys off `PI_DEFAULT_PROMPT_PREFIX`, absent from the neutral `genericBase`) before answering.
+  This converted an open worry into a documented Risk row rather than a deferred unknown.
+- Emergent-scope surfacing — planning noticed that built-in `Explore`/`Plan` are replace-mode agents and so are visibly affected, then confirmed uniform application via a second `ask_user` instead of assuming.
+- Autoformat discipline — after `pi-autoformat` touched `README.md` mid-edit, the agent re-read the region before the next edit (turns 49–50) rather than matching against stale layout, avoiding a failed `oldText`.
+#### What caused friction (agent side)
+- `missing-context` (planning) — the plan listed the README's Patch 3 `<active_agent>` "prepends" wording as a doc update but missed the identical Patch 3 description in `.pi/skills/package-pi-subagents/SKILL.md`.
+  Exact-grep during planning keyed on removed strings (`You are a pi coding agent sub-agent`, `prompt_mode`); the stale prose carried none of them, so the skill file's "prepends `<active_agent>`" line was not found.
+  Impact: the pre-completion reviewer caught it as a WARN, requiring one follow-up `docs:` commit (8e93d2a4) during TDD before push — no rework beyond that, and the safety net worked as designed.
+#### What caused friction (user side)
+- None — the operator's mid-planning OAuth question was a high-value redirect that strengthened the plan, not friction.
+### Diagnostic details
+- **Model-performance correlation** — judgment-heavy planning ran on `claude-opus-4-8`; mechanical TDD execution and the deterministic ship steps ran on `claude-sonnet-4-6`.
+  Appropriate assignment in both directions; no mismatch.
+- **Unused-tool detection** — the `colgrep` skill was loaded in planning but never used; exploration was all exact-symbol grep, which was correct for known symbols.
+  The one place it would have helped is the `missing-context` friction: a semantic search like "docs describing how the active_agent tag is added to the system prompt" would likely have surfaced both the README and the SKILL.md descriptions that symbol-grep missed.
+- **Feedback-loop gap analysis** — verification ran incrementally throughout (green baseline before cycle 1, per-file `vitest` each cycle, full suite + `check` + `lint` + `fallow` after the last step).
+  No end-loaded verification.
+- **Escalation-delay tracking** — no rabbit-holes; no error sequence exceeded one tool call.
+### Changes made
+1. `.pi/prompts/plan-issue.md` — extended the Module-Level Changes grep bullet: when a step reworks a documented mechanism's behavior (rather than removing a symbol), grep `.pi/skills/package-*/SKILL.md` for the mechanism name, since reworded prose carries no removed symbol to match.
 [#180]: https://github.com/gotgenes/pi-packages/issues/180

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@gotgenes/pi-subagents",
-  "version": "16.0.0",
+  "version": "16.1.0",
   "type": "module",
   "exports": {
     ".": {

package/src/index.ts CHANGED Viewed

@@ -24,7 +24,7 @@ import { AgentTypeRegistry } from "#src/config/agent-types";
 import { loadCustomAgents } from "#src/config/custom-agents";
 import { SessionLifecycleHandler, ToolStartHandler } from "#src/handlers/index";
 import { createChildLifecyclePublisher } from "#src/lifecycle/child-lifecycle";
-import { ConcurrencyQueue } from "#src/lifecycle/concurrency-queue";
+import { ConcurrencyLimiter } from "#src/lifecycle/concurrency-limiter";
 import { createSubagentSession, type SubagentSessionDeps } from "#src/lifecycle/create-subagent-session";
 import { buildParentSnapshot } from "#src/lifecycle/parent-snapshot";
 import { SubagentManager, type SubagentManagerObserver } from "#src/lifecycle/subagent-manager";
@@ -66,12 +66,12 @@ export default function (pi: ExtensionAPI) {
   );
   // Settings: owns all three in-memory values and handles load/save/emit.
-  // onMaxConcurrentChanged is wired to the queue directly (closure captures by reference).
+  // onMaxConcurrentChanged is wired to the limiter directly (closure captures by reference).
   const settings = new SettingsManager({
     emit: (event, payload) => pi.events.emit(event, payload),
     cwd: process.cwd(),
     agentDir: getAgentDir(),
-    onMaxConcurrentChanged: () => queue.drain(),
+    onMaxConcurrentChanged: () => limiter.recheck(),
   });
   settings.load();
@@ -122,7 +122,7 @@ export default function (pi: ExtensionAPI) {
       });
     },
     onSubagentCreated(record) {
-      // Emit created event for background agents (before startAgent / queue drain).
+      // Emit created event for background agents (before limiter admission).
       pi.events.emit("subagents:created", {
         id: record.id,
         type: record.type,
@@ -150,22 +150,15 @@ export default function (pi: ExtensionAPI) {
     lifecycle: createChildLifecyclePublisher((channel, data) => pi.events.emit(channel, data)),
   };
-  // ConcurrencyQueue: scheduling extracted from SubagentManager.
-  // startAgent callback forward-references manager via closure (safe — drain is never called during construction).
-  const queue = new ConcurrencyQueue(
-    () => settings.maxConcurrent,
-    (id) => {
-      const agent = manager.getRecord(id);
-      if (agent?.status !== "queued") return;
-      agent.promise = agent.run();
-    },
-  );
+  // ConcurrencyLimiter: schedules background run thunks FIFO against the limit.
+  // It knows nothing about agents or the manager — dependency direction is strictly manager → limiter.
+  const limiter = new ConcurrencyLimiter(() => settings.maxConcurrent);
   const manager = new SubagentManager({
     createSubagentSession: (params) => createSubagentSession(params, subagentSessionDeps),
     baseCwd: process.cwd(),
     observer,
-    queue,
+    limiter,
     getRunConfig: () => settings,
   });

package/src/lifecycle/concurrency-limiter.ts ADDED Viewed

@@ -0,0 +1,55 @@
+/**
+ * concurrency-limiter.ts — FIFO admission gate for background work.
+ *
+ * Schedules run closures (thunks) against a dynamic limit, running them in
+ * scheduling order as slots free. The limiter knows nothing about agents, IDs,
+ * or the manager — it owns only the active count and the pending queue.
+ *
+ * Every scheduled promise settles: it follows the task's settlement when the
+ * task runs, or resolves early if clear() drops it before it starts.
+ */
+export class ConcurrencyLimiter {
+	private active = 0;
+	private readonly pending: Array<{ start: () => void; settle: () => void }> = [];
+	constructor(private readonly getLimit: () => number) {}
+	/**
+	 * Schedule a task to run FIFO once a slot is free.
+	 * Returns a promise that settles with the task, or resolves early if the
+	 * task is dropped by clear() before it starts.
+	 */
+	schedule(task: () => Promise<void>): Promise<void> {
+		const { promise, resolve, reject } = Promise.withResolvers<void>(); // eslint-disable-line @typescript-eslint/no-invalid-void-type -- Promise.withResolvers<void> is valid; rule does not allow void in generic fn call type args
+		this.pending.push({
+			start: () => {
+				this.active++;
+				task()
+					.then(resolve, reject)
+					.finally(() => {
+						this.active--;
+						this.recheck();
+					});
+			},
+			settle: resolve,
+		});
+		this.recheck();
+		return promise;
+	}
+	/** Start pending tasks until the limit is reached. Call when the limit may have grown. */
+	recheck(): void {
+		while (this.active < this.getLimit()) {
+			const next = this.pending.shift();
+			if (!next) break;
+			next.start();
+		}
+	}
+	/** Drop all pending tasks, resolving their promises without running them. */
+	clear(): void {
+		const dropped = this.pending.splice(0);
+		for (const task of dropped) task.settle();
+	}
+}

package/src/lifecycle/subagent-manager.ts CHANGED Viewed

@@ -2,14 +2,14 @@
  * subagent-manager.ts - Tracks subagents, background execution, resume support.
  *
  * Background agents are subject to a configurable concurrency limit (default: 4).
- * Excess agents are queued and auto-started as running agents complete.
- * Foreground agents bypass the queue (they block the parent anyway).
+ * Excess agents are scheduled on a ConcurrencyLimiter and auto-started as running
+ * agents complete. Foreground agents bypass the limiter (they block the parent anyway).
  */
 import { randomUUID } from "node:crypto";
 import type { Model } from "@earendil-works/pi-ai";
 import { debugLog } from "#src/debug";
-import type { ConcurrencyQueue } from "#src/lifecycle/concurrency-queue";
+import type { ConcurrencyLimiter } from "#src/lifecycle/concurrency-limiter";
 import type { CreateSubagentSessionParams } from "#src/lifecycle/create-subagent-session";
 import type { ParentSnapshot } from "#src/lifecycle/parent-snapshot";
 import { Subagent, type SubagentLifecycleObserver } from "#src/lifecycle/subagent";
@@ -31,8 +31,8 @@ export interface SubagentManagerObserver {
 export interface SubagentManagerOptions {
   /** Assembly factory that produces a born-complete SubagentSession per spawn. */
   createSubagentSession: (params: CreateSubagentSessionParams) => Promise<SubagentSession>;
-  /** Concurrency queue — owns scheduling, limit checks, and drain logic. */
-  queue: ConcurrencyQueue;
+  /** Concurrency limiter — schedules background run thunks FIFO against the limit. */
+  limiter: ConcurrencyLimiter;
   /** Base working directory handed to a workspace provider (the parent cwd). */
   baseCwd: string;
   getRunConfig?: () => RunConfig;
@@ -67,7 +67,7 @@ export class SubagentManager {
   private cleanupInterval: ReturnType<typeof setInterval>;
   private readonly observer?: SubagentManagerObserver;
   private readonly createSubagentSession: (params: CreateSubagentSessionParams) => Promise<SubagentSession>;
-  private readonly queue: ConcurrencyQueue;
+  private readonly limiter: ConcurrencyLimiter;
   private readonly baseCwd: string;
   private getRunConfig?: () => RunConfig;
   private _workspaceProvider?: WorkspaceProvider;
@@ -79,7 +79,7 @@ export class SubagentManager {
   constructor(options: SubagentManagerOptions) {
     this.createSubagentSession = options.createSubagentSession;
-    this.queue = options.queue;
+    this.limiter = options.limiter;
     this.baseCwd = options.baseCwd;
     this.observer = options.observer;
     this.getRunConfig = options.getRunConfig;
@@ -109,7 +109,6 @@ export class SubagentManager {
   private buildObserver(options: AgentSpawnConfig): SubagentLifecycleObserver {
     return {
       onStarted: (agent) => {
-        if (options.isBackground) this.queue.markStarted();
         this.observer?.onSubagentStarted(agent);
       },
       onSessionCreated: options.observer?.onSessionCreated
@@ -117,7 +116,6 @@ export class SubagentManager {
         : undefined,
       onRunFinished: (agent) => {
         if (options.isBackground) {
-          this.queue.markFinished();
           try { this.observer?.onSubagentCompleted(agent); } catch (err) { debugLog("onSubagentCompleted observer", err); }
         }
       },
@@ -166,9 +164,13 @@ export class SubagentManager {
       this.observer?.onSubagentCreated(record);
     }
-    if (options.isBackground && !options.bypassQueue && this.queue.isFull()) {
-      // Queue it - will be started when a running agent completes
-      this.queue.enqueue(id);
+    if (options.isBackground && !options.bypassQueue) {
+      // Schedule on the limiter — started when a slot frees. The status guard
+      // makes an abort-while-queued task a no-op (Step 3 folds it into start()).
+      record.promise = this.limiter.schedule(() => {
+        if (record.status !== "queued") return Promise.resolve();
+        return record.run();
+      });
       return id;
     }
@@ -221,9 +223,9 @@ export class SubagentManager {
     const record = this.agents.get(id);
     if (!record) return false;
-    // Remove from queue if queued
+    // A queued agent has not started; mark it stopped. Its scheduled thunk
+    // becomes a no-op (status guard) when its slot finally opens.
     if (record.status === "queued") {
-      this.queue.dequeue(id);
       record.markStopped();
       return true;
     }
@@ -269,43 +271,44 @@ export class SubagentManager {
   // fallow-ignore-next-line unused-class-member
   abortAll(): number {
     let count = 0;
-    // Clear queued agents first
-    for (const id of this.queue.queuedIds) {
-      const record = this.agents.get(id);
-      if (record) {
+    for (const record of this.agents.values()) {
+      if (record.status === "queued") {
         record.markStopped();
         count++;
+      } else if (record.abort()) {
+        count++;
       }
     }
-    this.queue.clear();
-    // Abort running agents
-    for (const record of this.agents.values()) {
-      if (record.abort()) count++;
-    }
+    // Drop pending thunks (their promises resolve).
+    this.limiter.clear();
     return count;
   }
   /** Wait for all running and queued agents to complete (including queued ones). */
   // fallow-ignore-next-line unused-class-member
   async waitForAll(): Promise<void> {
-    // Loop because queue.drain() respects the concurrency limit - as running
-    // agents finish they start queued ones, which need awaiting too.
-    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- intentional infinite loop with explicit break
-    while (true) {
-      this.queue.drain();
-      const pending = [...this.agents.values()]
-        .filter(r => r.status === "running" || r.status === "queued")
-        .map(r => r.promise)
-        .filter((p): p is Promise<void> => p != null);
-      if (pending.length === 0) break;
+    // Every spawned agent has a settled-on-completion promise (the limiter starts
+    // queued ones as slots free), so a single allSettled covers the queued case.
+    // The loop only catches agents spawned during the wait.
+    let pending = this.pendingPromises();
+    while (pending.length > 0) {
       await Promise.allSettled(pending);
+      pending = this.pendingPromises();
     }
   }
+  /** Promises of all running/queued agents that have one. */
+  private pendingPromises(): Promise<void>[] {
+    return [...this.agents.values()]
+      .filter(r => r.status === "running" || r.status === "queued")
+      .map(r => r.promise)
+      .filter((p): p is Promise<void> => p != null);
+  }
   dispose() {
     clearInterval(this.cleanupInterval);
-    // Clear queue
-    this.queue.clear();
+    // Drop pending thunks
+    this.limiter.clear();
     for (const record of this.agents.values()) {
       record.disposeSession();
     }

package/src/lifecycle/subagent.ts CHANGED Viewed

@@ -428,7 +428,8 @@ export class Subagent {
 	/**
 	 * Abort a running agent: fire AbortController and transition to stopped.
 	 * Returns false if the agent is not running.
-	 * Queue removal is handled by SubagentManager via ConcurrencyQueue.dequeue().
+	 * A still-queued agent is stopped by SubagentManager; its scheduled thunk
+	 * then no-ops on the queued-status guard.
 	 */
 	abort(): boolean {
 		if (this._status !== "running") return false;

package/src/lifecycle/concurrency-queue.ts DELETED Viewed

@@ -1,63 +0,0 @@
-/**
- * concurrency-queue.ts — Manages background agent scheduling with a configurable concurrency limit.
- *
- * Stores agent IDs (not full agent objects) and decides *when* to start them.
- * The startAgent callback provided at construction handles the actual agent lifecycle.
- */
-export class ConcurrencyQueue {
-	private queue: string[] = [];
-	private running = 0;
-	constructor(
-		private readonly getMaxConcurrent: () => number,
-		private readonly startAgent: (id: string) => void,
-	) {}
-	/** Whether the concurrency limit has been reached. */
-	isFull(): boolean {
-		return this.running >= this.getMaxConcurrent();
-	}
-	/** Add an agent ID to the wait queue. */
-	enqueue(id: string): void {
-		this.queue.push(id);
-	}
-	/** Remove an agent ID from the queue (e.g., aborted before starting). Returns true if found. */
-	dequeue(id: string): boolean {
-		const idx = this.queue.indexOf(id);
-		if (idx === -1) return false;
-		this.queue.splice(idx, 1);
-		return true;
-	}
-	/** Increment the running count. Called when an agent transitions to running. */
-	markStarted(): void {
-		this.running++;
-	}
-	/** Decrement the running count and drain the queue. Called when a background agent finishes. */
-	markFinished(): void {
-		this.running--;
-		this.drain();
-	}
-	/** Start queued agents until the concurrency limit is reached. */
-	drain(): void {
-		while (this.queue.length > 0 && !this.isFull()) {
-			const id = this.queue.shift()!;
-			this.startAgent(id);
-		}
-	}
-	/** Snapshot of queued IDs for iteration (e.g., abortAll). */
-	get queuedIds(): readonly string[] {
-		return this.queue;
-	}
-	/** Clear the queue without starting any agents. */
-	clear(): void {
-		this.queue = [];
-	}
-}