npm - @tintinweb/pi-subagents - Versions diffs - 0.6.3 → 0.7.1 - Mend

@tintinweb/pi-subagents 0.6.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +37 -0
package/README.md +55 -11
package/dist/agent-manager.d.ts +23 -1
package/dist/agent-manager.js +71 -20
package/dist/agent-runner.d.ts +27 -0
package/dist/agent-runner.js +28 -4
package/dist/index.js +236 -72
package/dist/schedule-store.d.ts +36 -0
package/dist/schedule-store.js +144 -0
package/dist/schedule.d.ts +109 -0
package/dist/schedule.js +338 -0
package/dist/settings.d.ts +10 -0
package/dist/settings.js +5 -0
package/dist/types.d.ts +46 -0
package/dist/ui/agent-widget.d.ts +15 -8
package/dist/ui/agent-widget.js +28 -7
package/dist/ui/conversation-viewer.js +6 -8
package/dist/ui/schedule-menu.d.ts +16 -0
package/dist/ui/schedule-menu.js +95 -0
package/dist/usage.d.ts +50 -0
package/dist/usage.js +49 -0
package/package.json +10 -6
package/src/agent-manager.ts +90 -20
package/src/agent-runner.ts +43 -5
package/src/index.ts +239 -63
package/src/schedule-store.ts +143 -0
package/src/schedule.ts +365 -0
package/src/settings.ts +14 -0
package/src/types.ts +52 -0
package/src/ui/agent-widget.ts +36 -6
package/src/ui/conversation-viewer.ts +6 -6
package/src/ui/schedule-menu.ts +104 -0
package/src/usage.ts +60 -0
package/.github/workflows/ci.yml +0 -21
package/biome.json +0 -26
package/dist/ui/conversation-viewer.test.d.ts +0 -1
package/dist/ui/conversation-viewer.test.js +0 -254

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,43 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.7.1] - 2026-05-07
+> **Heads-up — behavior change:**
+> - `isolation: "worktree"` now fails loud (returns an error) instead of silently falling back to the main tree. Affects users running pi in a non-git directory or a fresh repo with no commits.
+### Changed
+- **`isolation: "worktree"` now fails loud instead of silently falling back.** Previously when `createWorktree` returned undefined (not a git repo, no commits yet, or `git worktree add` failed), the agent ran in the main `cwd` with a `[WARNING: ...]` block prepended to its prompt — visible only to the LLM, never surfaced to the caller. Now the failure throws a structured error that propagates back to the `Agent` tool response; no agent record is created. Failed scheduled fires are recorded as `lastStatus: "error"` with the reason in the `subagents:scheduled` error event. Queued background spawns whose worktree creation fails when they dequeue are marked terminal-error and don't block the rest of the queue.
+### Fixed
+- **Headless `pi --print` runs no longer hang or crash after background
+subagents complete.** Cleanup timers no longer keep the process alive, and
+stale completion notifications are treated as best-effort shutdown side
+effects.
+## [0.7.0] - 2026-05-04
+> **Heads-up — behavior changes:**
+> - `subagents:completed`/`failed` event `tokens.total` now excludes `cacheRead` (previously double-counted across turns) — see Fixed [#38].
+> - Cron `?` is now a wildcard (same as `*`), not "current time value" — affects Quartz-style expressions only.
+### Changed
+- **`@mariozechner/pi-{ai,coding-agent,tui}` moved to `peerDependencies` (`>=0.70.5`).** Avoids duplicate framework instances when the host loads this extension.
+- **`@sinclair/typebox` pinned from `latest` to `^0.34.49`** so installs are reproducible.
+- **`croner` bumped 8 → 10.** Heads-up: in cron strings, `?` now means wildcard (same as `*`) instead of "current time value" — affects Quartz-style expressions only.
+### Added
+- **Master switch for scheduling** — new `schedulingEnabled` setting (default `true`) under `/agents → Settings → Scheduling`. When set to `false`: the `schedule` parameter and its guideline are stripped from the `Agent` tool spec at registration (zero LLM-context cost), the scheduler does not bind to the session, the `/agents → Scheduled jobs` menu entry is hidden, and any in-flight scheduler is stopped immediately. The schema-level removal applies on next pi session; the runtime kill (menu, fire path) takes effect immediately. Persisted at `<cwd>/.pi/subagents.json`.
+- **Schedule subagent spawns** — the `Agent` tool now accepts an optional `schedule` parameter. When set, the spawn registers a job that fires later instead of running immediately. Three formats: 6-field cron (`"0 0 9 * * 1"` — 9am every Monday), interval (`"5m"`, `"1h"`), or one-shot (`"+10m"` or ISO timestamp). Returns the job ID. Schedules are session-scoped — they reset on `/new`, restore on `/resume` (mirrors the persistence model of pi-chonky-tasks). Storage at `<cwd>/.pi/subagent-schedules/<sessionId>.json`, with PID-based file locking + atomic temp+rename for concurrent-instance safety. **Result delivery is identical to today's background-spawn completions**: when the scheduled agent finishes, the existing `subagent-notification` followUp path emits the result to the conversation — no new delivery code, no new message types. **Concurrency**: scheduled fires bypass `maxConcurrent` so a 5-minute interval can't be deferred behind 4 long-running manual agents. **Management**: `/agents` → "Scheduled jobs" lists active jobs and lets you cancel any one of them. Creation is via the `Agent` tool only — no parallel manual-create wizard in this iteration. **Events**: `subagents:scheduled` ({ type: "added" | "removed" | "updated" | "fired" | "error", … }) and `subagents:scheduler_ready` for cross-extension consumers. **Restrictions**: `schedule` is incompatible with `inherit_context` (no parent at fire time) and `resume` (schedules create fresh agents); forces `run_in_background: true`. Scheduler engine mirrors `pi-cron-schedule` (`croner` for cron, `setInterval`/`setTimeout` for interval/once); past one-shot timestamps and invalid cron expressions are caught at create time.
+- **Context-window utilization indicator in the subagent overlay** — token count is now followed by a colored `(NN%)` showing how full the subagent's context is right now (`estimateContextTokens(messages) / model.contextWindow * 100`, sourced from upstream `contextUsage.percent`). Threshold colors: <70% dim, 70–85% warning, ≥85% error. Gracefully omitted when the model has no `contextWindow` declared, or right after compaction before the next assistant turn (`tokens` is `null` in that window). The same annotation slot also surfaces a compaction count `↻N` when the agent has compacted at least once — e.g. `12.3k token (84% · ↻3)` (percent + compactions joined with `·`), `12.3k token (↻1)` (compactions only, immediately post-compaction while percent is still null). The compaction glyph stays dim regardless; the percent's threshold color carries the urgency signal. Two live overlays get the annotations (running stats line; inspect-overlay header); post-completion notifications and result/event payloads only get the count (the indicator is no longer actionable once the agent is done).
+- **Token usage and context% exposed to the parent agent** at every interaction surface — `get_subagent_result` adds `Context: NN%` to its stats line; `steer_subagent` returns a `Current state: 12.3k token · 5 tool uses · context 72% full` line so the steering agent knows whether it has room before sending more context; `task-notification` XML adds `<context_percent>NN</context_percent>` (omitted when null). All plain-text, no ANSI codes — designed for LLM consumption, not human display.
+- **New `subagents:compacted` lifecycle event** fires when a subagent's session successfully compacts. Payload: `{ id, type, description, reason: "manual" | "threshold" | "overflow", tokensBefore, compactionCount }` — `tokensBefore` is upstream's pre-compaction context size estimate; `compactionCount` is the running total for this agent (also persisted on `AgentRecord.compactionCount` and surfaced in `get_subagent_result` / `steer_subagent` / `task-notification` when > 0). Aborted compactions don't fire. Routed through a new manager-level `onCompact` constructor callback, matching the existing `onStart` / `onComplete` pattern.
+### Fixed
+- **Subagent token count was inflated 5–15× and reset mid-run** ([#38](https://github.com/tintinweb/pi-subagents/issues/38)). Two distinct bugs in the same field. (1) Upstream `getSessionStats().tokens.total` sums per-turn `cacheRead` across every assistant message — but each turn's `cacheRead` is the *cumulative* cached prefix re-read on that one API call, so summing N turns counts the prefix N times (quadratic inflation, very visible on long sessions). (2) Even with that fixed, anything derived from `session.state.messages` resets at compaction because upstream replaces the array via `this.agent.state.messages = sessionContext.messages`. Fix replaces all six display readers with a lifetime accumulator (`AgentRecord.lifetimeUsage` and `AgentActivity.lifetimeUsage` — `{ input, output, cacheWrite }`) fed by a new `onAssistantUsage` callback dispatched from `message_end` events in both `runAgent` and `resumeAgent`. The accumulator is independent of `state.messages` mutation, so it survives compaction; total = input + output + cacheWrite by construction (cacheRead deliberately excluded — same prefix-double-counting reason). The `subagents:completed`/`failed` event payload's `tokens` field is now also lifetime-accumulated for `input`, `output`, and `total` together (was: `total` lifetime, `input`/`output` session-derived → inconsistent after compaction).
+- **ESC during a foreground `Agent` call now actually stops the subagent** ([#44](https://github.com/tintinweb/pi-subagents/pull/44) — thanks [@Zeng-Zer](https://github.com/Zeng-Zer)). Pi's interrupt path is `esc → agent.abort()` on the parent → `AbortSignal` delivered to every tool's `execute(toolCallId, params, signal, …)`, but the `Agent` tool dropped that signal on the floor: subagents ran on their own independent `AbortController` inside `AgentManager`, so the parent abort was invisible and the subagent kept running until natural completion or `max_turns`. Fix threads `signal` through `Agent.execute` → `manager.spawnAndWait()` → `SpawnOptions.signal`, and `AgentManager.startAgent()` now attaches an `{ once: true }` `"abort"` listener that calls `this.abort(id)` (which sets `status: "stopped"` and aborts the child controller). The listener is detached in both `.then` and `.catch` to avoid leaking on natural settle. **Scope:** foreground only — background agents intentionally outlive the parent tool call, so their spawn deliberately does not forward `signal`. Resume path (`AgentManager.resume()`) has the same blind spot and is tracked as a follow-up.
 ## [0.6.3] - 2026-04-28
 ### Fixed

package/README.md CHANGED Viewed

@@ -28,8 +28,9 @@ https://github.com/user-attachments/assets/8685261b-9338-4fea-8dfe-1c590d5df543
 - **Skill preloading** — inject named skill files from `.pi/skills/` into agent system prompts
 - **Tool denylist** — block specific tools via `disallowed_tools` frontmatter
 - **Styled completion notifications** — background agent results render as themed, compact notification boxes (icon, stats, result preview) instead of raw XML. Expandable to show full output. Group completions render each agent individually
-- **Event bus** — lifecycle events (`subagents:created`, `started`, `completed`, `failed`, `steered`) emitted via `pi.events`, enabling other extensions to react to sub-agent activity
+- **Event bus** — lifecycle events (`subagents:created`, `started`, `completed`, `failed`, `steered`, `compacted`) emitted via `pi.events`, enabling other extensions to react to sub-agent activity
 - **Cross-extension RPC** — other pi extensions can spawn and stop subagents via the `pi.events` event bus (`subagents:rpc:ping`, `subagents:rpc:spawn`, `subagents:rpc:stop`). Standardized reply envelopes with protocol versioning. Emits `subagents:ready` on load
+- **Schedule subagents** — pass `schedule` to the `Agent` tool to fire on cron / interval / one-shot. Session-scoped jobs with PID-locked persistence; results land via the same `subagent-notification` followUp path as manual background completions; manage via `/agents → Scheduled jobs`
 ## Install
@@ -58,29 +59,67 @@ Agent({
 Foreground agents block until complete and return results inline. Background agents return an ID immediately and notify you on completion.
+### Scheduling
+Add a `schedule` field to register the agent to fire later instead of running now:
+```
+Agent({
+  subagent_type: "Explore",
+  prompt: "Look at recent commits and summarize what changed since last week",
+  description: "Weekly commit review",
+  schedule: "0 0 9 * * 1",   // 9am every Monday (6-field cron)
+})
+```
+Schedule formats:
+- **Cron** — 6-field (`second minute hour day-of-month month day-of-week`), e.g. `"0 0 9 * * 1"` for 9am every Monday, `"0 */15 * * * *"` for every 15 minutes.
+- **Interval** — `"5m"`, `"1h"`, `"30s"`, `"2d"`. Fires repeatedly at that interval.
+- **One-shot relative** — `"+10m"`, `"+2h"`, `"+1d"`. Fires once at that future time.
+- **One-shot absolute** — full ISO timestamp, e.g. `"2026-12-25T09:00:00.000Z"`.
+When a schedule fires, the spawn runs in background and its completion notification arrives in the conversation through the same `subagent-notification` followUp path as a manually-spawned background agent — your parent agent reasons about the result the same way.
+Schedules are **session-scoped**: they reset on `/new` and restore on `/resume`. List and cancel via `/agents → Scheduled jobs` (creation is the `Agent` tool's job — there is no parallel manual-create wizard). Storage at `<cwd>/.pi/subagent-schedules/<sessionId>.json` with PID-based file locking for cross-instance safety.
+**Disable the feature entirely**: `/agents → Settings → Scheduling → disabled` removes `schedule` from the `Agent` tool spec (no LLM-context cost), hides the menu entry, and stops any active scheduler. The schema-level removal takes effect on the next pi session; the runtime kill is immediate. Re-enable from the same menu.
+Restrictions:
+- `schedule` cannot be combined with `inherit_context` (no parent conversation exists at fire time) or `resume` (schedules create fresh agents).
+- `run_in_background` is forced to `true`.
+- Scheduled fires bypass the `maxConcurrent` queue so a 5-minute interval cannot be deferred behind long-running manual agents.
+- **Headless `pi -p` doesn't wait for scheduled subagents.**
 ## UI
 The extension renders a persistent widget above the editor showing all active agents:
 ```
 ● Agents
-├─ ⠹ Agent  Refactor auth module · ⟳5≤30 · 5 tool uses · 33.8k token · 12.3s
+├─ ⠹ Agent  Refactor auth module · ⟳5≤30 · 5 tool uses · 33.8k token (62%) · 12.3s
 │    ⎿  editing 2 files…
-├─ ⠹ Explore  Find auth files · ⟳3 · 3 tool uses · 12.4k token · 4.1s
+├─ ⠹ Explore  Find auth files · ⟳3 · 3 tool uses · 12.4k token (8%) · 4.1s
 │    ⎿  searching…
+├─ ⠹ Agent  Long-running task · ⟳42 · 38 tool uses · 91.0k token (84% · ↻2) · 2m17s
+│    ⎿  reading…
 └─ 2 queued
 ```
+The token field is annotated with two optional signals inside parens:
+- **`NN%`** — context-window utilization (color-coded: <70% dim, 70–85% warning, ≥85% error). Omitted when the model has no declared `contextWindow`, or briefly right after compaction.
+- **`↻N`** — number of times the session has compacted, when > 0. Stays dim; the percent's color carries urgency.
 Individual agent results render Claude Code-style in the conversation:
 | State | Example |
 |-------|---------|
-| **Running** | `⠹ ⟳3≤30 · 3 tool uses · 12.4k token` / `⎿ searching, reading 3 files…` |
-| **Completed** | `✓ ⟳8 · 5 tool uses · 33.8k token · 12.3s` / `⎿ Done` |
-| **Wrapped up** | `✓ ⟳50≤50 · 50 tool uses · 89.1k token · 45.2s` / `⎿ Wrapped up (turn limit)` |
-| **Stopped** | `■ ⟳3 · 3 tool uses · 12.4k token` / `⎿ Stopped` |
-| **Error** | `✗ ⟳3 · 3 tool uses · 12.4k token` / `⎿ Error: timeout` |
-| **Aborted** | `✗ ⟳55≤50 · 55 tool uses · 102.3k token` / `⎿ Aborted (max turns exceeded)` |
+| **Running** | `⠹ ⟳3≤30 · 3 tool uses · 12.4k token (8%)` / `⎿ searching, reading 3 files…` |
+| **Completed** | `✓ ⟳8 · 5 tool uses · 33.8k token (62%) · 12.3s` / `⎿ Done` |
+| **Wrapped up** | `✓ ⟳50≤50 · 50 tool uses · 89.1k token (84% · ↻2) · 45.2s` / `⎿ Wrapped up (turn limit)` |
+| **Stopped** | `■ ⟳3 · 3 tool uses · 12.4k token (8%)` / `⎿ Stopped` |
+| **Error** | `✗ ⟳3 · 3 tool uses · 12.4k token (8%)` / `⎿ Error: timeout` |
+| **Aborted** | `✗ ⟳55≤50 · 55 tool uses · 102.3k token (95% · ↻3)` / `⎿ Aborted (max turns exceeded)` |
 Completed results can be expanded (ctrl+o in pi) to show the full agent output inline.
@@ -304,13 +343,18 @@ Agent lifecycle events are emitted via `pi.events.emit()` so other extensions ca
 |-------|------|------------|
 | `subagents:created` | Background agent registered | `id`, `type`, `description`, `isBackground` |
 | `subagents:started` | Agent transitions to running (including queued→running) | `id`, `type`, `description` |
-| `subagents:completed` | Agent finished successfully | `id`, `type`, `durationMs`, `tokens`, `toolUses`, `result` |
+| `subagents:completed` | Agent finished successfully | `id`, `type`, `durationMs`, `tokens` (lifetime `{ input, output, total }`), `toolUses`, `result` |
 | `subagents:failed` | Agent errored, stopped, or aborted | same as completed + `error`, `status` |
 | `subagents:steered` | Steering message sent | `id`, `message` |
+| `subagents:compacted` | Agent's session successfully compacted | `id`, `type`, `description`, `reason` (`"manual"` / `"threshold"` / `"overflow"`), `tokensBefore`, `compactionCount` |
+| `subagents:scheduled` | Schedule lifecycle change | `{ type: "added" \| "removed" \| "updated" \| "fired" \| "error", … }` (job/agentId/error fields per type) |
+| `subagents:scheduler_ready` | Scheduler bound to session, enabled jobs armed | `sessionId`, `jobCount` |
 | `subagents:ready` | Extension loaded and RPC handlers registered | — |
 | `subagents:settings_loaded` | Persisted settings applied at extension init | `settings` (merged global + project) |
 | `subagents:settings_changed` | `/agents` → Settings mutation was applied | `settings`, `persisted` (`boolean` — `false` on write failure) |
+`tokens.total` = `input + output + cacheWrite`. `cacheRead` is excluded — each turn's `cacheRead` is the cumulative cached prefix re-read on that one API call, so summing per-message would over-count it. Use `contextUsage.percent` (surfaced as `(NN%)` in the widget) for current context size.
 ## Cross-Extension RPC
 Other pi extensions can spawn and stop subagents programmatically via the `pi.events` event bus, without importing this package directly.
@@ -409,7 +453,7 @@ The agent gets a full, isolated copy of the repository. On completion:
 - **No changes:** worktree is cleaned up automatically
 - **Changes made:** changes are committed to a new branch (`pi-agent-<id>`) and returned in the result
-If the worktree cannot be created (not a git repo, no commits), the agent falls back to the main working directory with a warning.
+If the worktree cannot be created (not a git repo, no commits, or `git worktree add` fails), the `Agent` tool returns a clear error instead of running unisolated — `isolation: "worktree"` is a strict guarantee, not a hint. Initialize git and commit at least once, or omit `isolation`.
 ## Skill Preloading

package/dist/agent-manager.d.ts CHANGED Viewed

@@ -11,6 +11,11 @@ import { type ToolActivity } from "./agent-runner.js";
 import type { AgentRecord, IsolationMode, SubagentType, ThinkingLevel } from "./types.js";
 export type OnAgentComplete = (record: AgentRecord) => void;
 export type OnAgentStart = (record: AgentRecord) => void;
+export type OnAgentCompact = (record: AgentRecord, info: CompactionInfo) => void;
+export type CompactionInfo = {
+    reason: "manual" | "threshold" | "overflow";
+    tokensBefore: number;
+};
 interface SpawnOptions {
     description: string;
     model?: Model<any>;
@@ -19,8 +24,16 @@ interface SpawnOptions {
     inheritContext?: boolean;
     thinkingLevel?: ThinkingLevel;
     isBackground?: boolean;
+    /**
+     * Skip the maxConcurrent queue check for this spawn — start immediately even
+     * if the configured concurrency limit would otherwise queue it. Used by the
+     * scheduler so a fired job can't be deferred past its trigger window.
+     */
+    bypassQueue?: boolean;
     /** Isolation mode — "worktree" creates a temp git worktree for the agent. */
     isolation?: IsolationMode;
+    /** Parent abort signal — when aborted, the subagent is also stopped. */
+    signal?: AbortSignal;
     /** Called on tool start/end with activity info (for streaming progress to UI). */
     onToolActivity?: (activity: ToolActivity) => void;
     /** Called on streaming text deltas from the assistant response. */
@@ -29,18 +42,27 @@ interface SpawnOptions {
     onSessionCreated?: (session: AgentSession) => void;
     /** Called at the end of each agentic turn with the cumulative count. */
     onTurnEnd?: (turnCount: number) => void;
+    /** Called once per assistant message_end with that message's usage delta. */
+    onAssistantUsage?: (usage: {
+        input: number;
+        output: number;
+        cacheWrite: number;
+    }) => void;
+    /** Called when the session successfully compacts. */
+    onCompaction?: (info: CompactionInfo) => void;
 }
 export declare class AgentManager {
     private agents;
     private cleanupInterval;
     private onComplete?;
     private onStart?;
+    private onCompact?;
     private maxConcurrent;
     /** Queue of background agents waiting to start. */
     private queue;
     /** Number of currently running background agents. */
     private runningBackground;
-    constructor(onComplete?: OnAgentComplete, maxConcurrent?: number, onStart?: OnAgentStart);
+    constructor(onComplete?: OnAgentComplete, maxConcurrent?: number, onStart?: OnAgentStart, onCompact?: OnAgentCompact);
     /** Update the max concurrent background agents limit. */
     setMaxConcurrent(n: number): void;
     getMaxConcurrent(): number;

package/dist/agent-manager.js CHANGED Viewed

@@ -7,6 +7,7 @@
  */
 import { randomUUID } from "node:crypto";
 import { resumeAgent, runAgent } from "./agent-runner.js";
+import { addUsage } from "./usage.js";
 import { cleanupWorktree, createWorktree, pruneWorktrees, } from "./worktree.js";
 /** Default max concurrent background agents. */
 const DEFAULT_MAX_CONCURRENT = 4;
@@ -15,17 +16,20 @@ export class AgentManager {
     cleanupInterval;
     onComplete;
     onStart;
+    onCompact;
     maxConcurrent;
     /** Queue of background agents waiting to start. */
     queue = [];
     /** Number of currently running background agents. */
     runningBackground = 0;
-    constructor(onComplete, maxConcurrent = DEFAULT_MAX_CONCURRENT, onStart) {
+    constructor(onComplete, maxConcurrent = DEFAULT_MAX_CONCURRENT, onStart, onCompact) {
         this.onComplete = onComplete;
         this.onStart = onStart;
+        this.onCompact = onCompact;
         this.maxConcurrent = maxConcurrent;
         // Cleanup completed agents after 10 minutes (but keep sessions for resume)
         this.cleanupInterval = setInterval(() => this.cleanup(), 60_000);
+        this.cleanupInterval.unref();
     }
     /** Update the max concurrent background agents limit. */
     setMaxConcurrent(n) {
@@ -51,40 +55,56 @@ export class AgentManager {
             toolUses: 0,
             startedAt: Date.now(),
             abortController,
+            lifetimeUsage: { input: 0, output: 0, cacheWrite: 0 },
+            compactionCount: 0,
         };
         this.agents.set(id, record);
         const args = { pi, ctx, type, prompt, options };
-        if (options.isBackground && this.runningBackground >= this.maxConcurrent) {
+        if (options.isBackground && !options.bypassQueue && this.runningBackground >= this.maxConcurrent) {
             // Queue it — will be started when a running agent completes
             this.queue.push({ id, args });
             return id;
         }
-        this.startAgent(id, record, args);
+        // startAgent can throw (e.g. strict worktree-isolation failure) — clean
+        // up the record so callers don't see an orphan in `listAgents()`.
+        try {
+            this.startAgent(id, record, args);
+        }
+        catch (err) {
+            this.agents.delete(id);
+            throw err;
+        }
         return id;
     }
     /** Actually start an agent (called immediately or from queue drain). */
     startAgent(id, record, { pi, ctx, type, prompt, options }) {
+        // Worktree isolation: try to create a temporary git worktree. Strict —
+        // fail loud if not possible (no silent fallback to main tree). Done
+        // BEFORE state mutation so a throw doesn't leave the record half-running.
+        let worktreeCwd;
+        if (options.isolation === "worktree") {
+            const wt = createWorktree(ctx.cwd, id);
+            if (!wt) {
+                throw new Error('Cannot run with isolation: "worktree" — not a git repo, no commits yet, or `git worktree add` failed. ' +
+                    'Initialize git and commit at least once, or omit `isolation`.');
+            }
+            record.worktree = wt;
+            worktreeCwd = wt.path;
+        }
         record.status = "running";
         record.startedAt = Date.now();
         if (options.isBackground)
             this.runningBackground++;
         this.onStart?.(record);
-        // Worktree isolation: create a temporary git worktree if requested
-        let worktreeCwd;
-        let worktreeWarning = "";
-        if (options.isolation === "worktree") {
-            const wt = createWorktree(ctx.cwd, id);
-            if (wt) {
-                record.worktree = wt;
-                worktreeCwd = wt.path;
-            }
-            else {
-                worktreeWarning = "\n\n[WARNING: Worktree isolation was requested but failed (not a git repo, or no commits yet). Running in the main working directory instead.]";
-            }
+        // Wire parent abort signal to stop the subagent when the parent is interrupted
+        let detachParentSignal;
+        if (options.signal) {
+            const onParentAbort = () => this.abort(id);
+            options.signal.addEventListener("abort", onParentAbort, { once: true });
+            detachParentSignal = () => options.signal.removeEventListener("abort", onParentAbort);
         }
-        // Prepend worktree warning to prompt if isolation failed
-        const effectivePrompt = worktreeWarning ? worktreeWarning + "\n\n" + prompt : prompt;
-        const promise = runAgent(ctx, type, effectivePrompt, {
+        const detach = () => { detachParentSignal?.(); detachParentSignal = undefined; };
+        const promise = runAgent(ctx, type, prompt, {
             pi,
             model: options.model,
             maxTurns: options.maxTurns,
@@ -100,6 +120,15 @@ export class AgentManager {
             },
             onTurnEnd: options.onTurnEnd,
             onTextDelta: options.onTextDelta,
+            onAssistantUsage: (usage) => {
+                addUsage(record.lifetimeUsage, usage);
+                options.onAssistantUsage?.(usage);
+            },
+            onCompaction: (info) => {
+                record.compactionCount++;
+                this.onCompact?.(record, info);
+                options.onCompaction?.(info);
+            },
             onSessionCreated: (session) => {
                 record.session = session;
                 // Flush any steers that arrived before the session was ready
@@ -120,6 +149,7 @@ export class AgentManager {
             record.result = responseText;
             record.session = session;
             record.completedAt ??= Date.now();
+            detach();
             // Final flush of streaming output file
             if (record.outputCleanup) {
                 try {
@@ -139,7 +169,10 @@ export class AgentManager {
             }
             if (options.isBackground) {
                 this.runningBackground--;
-                this.onComplete?.(record);
+                try {
+                    this.onComplete?.(record);
+                }
+                catch { /* ignore completion side-effect errors */ }
                 this.drainQueue();
             }
             return responseText;
@@ -151,6 +184,7 @@ export class AgentManager {
             }
             record.error = err instanceof Error ? err.message : String(err);
             record.completedAt ??= Date.now();
+            detach();
             // Final flush of streaming output file on error
             if (record.outputCleanup) {
                 try {
@@ -183,7 +217,17 @@ export class AgentManager {
             const record = this.agents.get(next.id);
             if (!record || record.status !== "queued")
                 continue;
-            this.startAgent(next.id, record, next.args);
+            try {
+                this.startAgent(next.id, record, next.args);
+            }
+            catch (err) {
+                // Late failure (e.g. strict worktree-isolation) — surface on the record
+                // so the user/agent can see it via /agents, then keep draining.
+                record.status = "error";
+                record.error = err instanceof Error ? err.message : String(err);
+                record.completedAt = Date.now();
+                this.onComplete?.(record);
+            }
         }
     }
     /**
@@ -214,6 +258,13 @@ export class AgentManager {
                     if (activity.type === "end")
                         record.toolUses++;
                 },
+                onAssistantUsage: (usage) => {
+                    addUsage(record.lifetimeUsage, usage);
+                },
+                onCompaction: (info) => {
+                    record.compactionCount++;
+                    this.onCompact?.(record, info);
+                },
                 signal,
             });
             record.status = "completed";

package/dist/agent-runner.d.ts CHANGED Viewed

@@ -38,6 +38,24 @@ export interface RunOptions {
     onSessionCreated?: (session: AgentSession) => void;
     /** Called at the end of each agentic turn with the cumulative count. */
     onTurnEnd?: (turnCount: number) => void;
+    /**
+     * Called once per assistant message_end with that message's usage delta.
+     * Lets callers maintain a lifetime accumulator that survives compaction
+     * (which replaces session.state.messages and resets stats-derived sums).
+     */
+    onAssistantUsage?: (usage: {
+        input: number;
+        output: number;
+        cacheWrite: number;
+    }) => void;
+    /**
+     * Called when the session successfully compacts. `tokensBefore` is upstream's
+     * pre-compaction context size estimate. Aborted compactions don't fire.
+     */
+    onCompaction?: (info: {
+        reason: "manual" | "threshold" | "overflow";
+        tokensBefore: number;
+    }) => void;
 }
 export interface RunResult {
     responseText: string;
@@ -53,6 +71,15 @@ export declare function runAgent(ctx: ExtensionContext, type: SubagentType, prom
  */
 export declare function resumeAgent(session: AgentSession, prompt: string, options?: {
     onToolActivity?: (activity: ToolActivity) => void;
+    onAssistantUsage?: (usage: {
+        input: number;
+        output: number;
+        cacheWrite: number;
+    }) => void;
+    onCompaction?: (info: {
+        reason: "manual" | "threshold" | "overflow";
+        tokensBefore: number;
+    }) => void;
     signal?: AbortSignal;
 }): Promise<string>;
 /**

package/dist/agent-runner.js CHANGED Viewed

@@ -261,6 +261,18 @@ export async function runAgent(ctx, type, prompt, options) {
         if (event.type === "tool_execution_end") {
             options.onToolActivity?.({ type: "end", toolName: event.toolName });
         }
+        if (event.type === "message_end" && event.message.role === "assistant") {
+            const u = event.message.usage;
+            if (u)
+                options.onAssistantUsage?.({
+                    input: u.input ?? 0,
+                    output: u.output ?? 0,
+                    cacheWrite: u.cacheWrite ?? 0,
+                });
+        }
+        if (event.type === "compaction_end" && !event.aborted && event.result) {
+            options.onCompaction?.({ reason: event.reason, tokensBefore: event.result.tokensBefore });
+        }
     });
     const collector = collectResponseText(session);
     const cleanupAbort = forwardAbortSignal(session, options.signal);
@@ -289,12 +301,24 @@ export async function runAgent(ctx, type, prompt, options) {
 export async function resumeAgent(session, prompt, options = {}) {
     const collector = collectResponseText(session);
     const cleanupAbort = forwardAbortSignal(session, options.signal);
-    const unsubToolUse = options.onToolActivity
+    const unsubEvents = (options.onToolActivity || options.onAssistantUsage || options.onCompaction)
         ? session.subscribe((event) => {
             if (event.type === "tool_execution_start")
-                options.onToolActivity({ type: "start", toolName: event.toolName });
+                options.onToolActivity?.({ type: "start", toolName: event.toolName });
             if (event.type === "tool_execution_end")
-                options.onToolActivity({ type: "end", toolName: event.toolName });
+                options.onToolActivity?.({ type: "end", toolName: event.toolName });
+            if (event.type === "message_end" && event.message.role === "assistant") {
+                const u = event.message.usage;
+                if (u)
+                    options.onAssistantUsage?.({
+                        input: u.input ?? 0,
+                        output: u.output ?? 0,
+                        cacheWrite: u.cacheWrite ?? 0,
+                    });
+            }
+            if (event.type === "compaction_end" && !event.aborted && event.result) {
+                options.onCompaction?.({ reason: event.reason, tokensBefore: event.result.tokensBefore });
+            }
         })
         : () => { };
     try {
@@ -302,7 +326,7 @@ export async function resumeAgent(session, prompt, options = {}) {
     }
     finally {
         collector.unsubscribe();
-        unsubToolUse();
+        unsubEvents();
         cleanupAbort();
     }
     return collector.getText().trim() || getLastAssistantText(session);