npm - @os-eco/overstory-cli - Versions diffs - 0.7.0 → 0.7.3 - Mend

@os-eco/overstory-cli 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/README.md +7 -6
package/agents/builder.md +1 -1
package/agents/coordinator.md +12 -11
package/agents/lead.md +6 -6
package/agents/monitor.md +4 -4
package/agents/reviewer.md +1 -1
package/agents/scout.md +5 -5
package/agents/supervisor.md +36 -32
package/package.json +1 -1
package/src/agents/guard-rules.ts +97 -0
package/src/agents/hooks-deployer.test.ts +6 -5
package/src/agents/hooks-deployer.ts +7 -90
package/src/agents/identity.test.ts +3 -2
package/src/agents/manifest.test.ts +4 -3
package/src/agents/overlay.test.ts +10 -9
package/src/agents/overlay.ts +5 -5
package/src/commands/agents.test.ts +10 -4
package/src/commands/clean.test.ts +3 -0
package/src/commands/completions.test.ts +8 -5
package/src/commands/completions.ts +38 -2
package/src/commands/coordinator.test.ts +1 -0
package/src/commands/coordinator.ts +15 -11
package/src/commands/costs.test.ts +9 -3
package/src/commands/dashboard.test.ts +265 -6
package/src/commands/dashboard.ts +367 -64
package/src/commands/doctor.test.ts +3 -2
package/src/commands/errors.test.ts +3 -2
package/src/commands/feed.test.ts +3 -2
package/src/commands/feed.ts +2 -29
package/src/commands/init.test.ts +1 -2
package/src/commands/init.ts +1 -8
package/src/commands/inspect.test.ts +17 -2
package/src/commands/log.test.ts +262 -8
package/src/commands/log.ts +232 -110
package/src/commands/logs.test.ts +3 -2
package/src/commands/mail.test.ts +8 -2
package/src/commands/metrics.test.ts +4 -3
package/src/commands/monitor.ts +15 -11
package/src/commands/nudge.test.ts +4 -2
package/src/commands/prime.test.ts +4 -2
package/src/commands/prime.ts +6 -2
package/src/commands/replay.test.ts +3 -2
package/src/commands/run.test.ts +3 -1
package/src/commands/sling.test.ts +142 -1
package/src/commands/sling.ts +145 -24
package/src/commands/status.test.ts +9 -8
package/src/commands/stop.test.ts +1 -0
package/src/commands/supervisor.ts +19 -12
package/src/commands/trace.test.ts +4 -2
package/src/commands/watch.test.ts +3 -2
package/src/commands/worktree.test.ts +9 -0
package/src/config.test.ts +3 -3
package/src/config.ts +29 -0
package/src/doctor/agents.test.ts +3 -2
package/src/doctor/consistency.test.ts +14 -0
package/src/doctor/logs.test.ts +3 -2
package/src/doctor/structure.test.ts +3 -2
package/src/e2e/init-sling-lifecycle.test.ts +3 -5
package/src/index.ts +3 -1
package/src/logging/color.ts +1 -1
package/src/logging/format.test.ts +110 -0
package/src/logging/format.ts +42 -1
package/src/logging/logger.test.ts +3 -2
package/src/mail/broadcast.test.ts +1 -0
package/src/mail/client.test.ts +3 -2
package/src/mail/store.test.ts +3 -2
package/src/merge/queue.test.ts +3 -2
package/src/merge/resolver.test.ts +39 -0
package/src/merge/resolver.ts +24 -5
package/src/mulch/client.test.ts +63 -2
package/src/mulch/client.ts +62 -1
package/src/runtimes/claude.test.ts +5 -4
package/src/runtimes/pi-guards.test.ts +457 -0
package/src/runtimes/pi-guards.ts +349 -0
package/src/runtimes/pi.test.ts +620 -0
package/src/runtimes/pi.ts +244 -0
package/src/runtimes/registry.test.ts +33 -0
package/src/runtimes/registry.ts +15 -2
package/src/runtimes/types.ts +63 -0
package/src/schema-consistency.test.ts +5 -2
package/src/sessions/compat.test.ts +3 -2
package/src/sessions/compat.ts +1 -0
package/src/sessions/store.test.ts +34 -2
package/src/sessions/store.ts +37 -4
package/src/test-helpers.ts +20 -1
package/src/types.ts +17 -0
package/src/watchdog/daemon.test.ts +11 -7
package/src/watchdog/daemon.ts +1 -1
package/src/watchdog/health.test.ts +1 -0
package/src/watchdog/triage.test.ts +3 -2
package/src/watchdog/triage.ts +14 -4

package/src/runtimes/pi.ts ADDED Viewed

@@ -0,0 +1,244 @@
+// Pi runtime adapter for overstory's AgentRuntime interface.
+// Implements the AgentRuntime contract for the `pi` CLI (Mario Zechner's Pi coding agent).
+import { mkdir } from "node:fs/promises";
+import { join } from "node:path";
+import type { PiRuntimeConfig, ResolvedModel } from "../types.ts";
+import { generatePiGuardExtension } from "./pi-guards.ts";
+import type {
+	AgentRuntime,
+	HooksDef,
+	OverlayContent,
+	ReadyState,
+	SpawnOpts,
+	TranscriptSummary,
+} from "./types.ts";
+/** Default Pi runtime config used when no config is provided. */
+const DEFAULT_PI_CONFIG: PiRuntimeConfig = {
+	provider: "anthropic",
+	modelMap: {
+		opus: "anthropic/claude-opus-4-6",
+		sonnet: "anthropic/claude-sonnet-4-6",
+		haiku: "anthropic/claude-haiku-4-5",
+	},
+};
+/**
+ * Pi runtime adapter.
+ *
+ * Implements AgentRuntime for the `pi` CLI (Mario Zechner's Pi coding agent).
+ * Security is enforced via Pi guard extensions rather than permission-mode flags —
+ * Pi has no --permission-mode equivalent.
+ */
+export class PiRuntime implements AgentRuntime {
+	/** Unique identifier for this runtime. */
+	readonly id = "pi";
+	/** Relative path to the instruction file within a worktree. Pi reads .claude/CLAUDE.md natively. */
+	readonly instructionPath = ".claude/CLAUDE.md";
+	private readonly config: PiRuntimeConfig;
+	constructor(config?: PiRuntimeConfig) {
+		this.config = config ?? DEFAULT_PI_CONFIG;
+	}
+	/**
+	 * Expand a model alias to a provider-qualified model ID.
+	 *
+	 * 1. If model contains "/" → already qualified, pass through
+	 * 2. If model is in modelMap → return the mapped value
+	 * 3. Otherwise → return `${provider}/${model}`
+	 */
+	expandModel(model: string): string {
+		if (model.includes("/")) return model;
+		const mapped = this.config.modelMap[model];
+		if (mapped) return mapped;
+		return `${this.config.provider}/${model}`;
+	}
+	/**
+	 * Build the shell command string to spawn an interactive Pi agent.
+	 *
+	 * Maps SpawnOpts to the `pi` CLI flags:
+	 * - `model` → `--model <model>`
+	 * - `permissionMode` is accepted but NOT mapped — Pi has no permission-mode flag.
+	 *   Security is enforced via guard extensions deployed by deployConfig().
+	 * - `appendSystemPrompt` → `--append-system-prompt '<escaped>'` (POSIX single-quote escaping)
+	 *
+	 * The `cwd` and `env` fields are handled by the tmux session creator, not embedded here.
+	 *
+	 * @param opts - Spawn options (model, appendSystemPrompt; permissionMode is ignored)
+	 * @returns Shell command string suitable for tmux new-session -c
+	 */
+	buildSpawnCommand(opts: SpawnOpts): string {
+		let cmd = `pi --model ${this.expandModel(opts.model)}`;
+		if (opts.appendSystemPrompt) {
+			// POSIX single-quote escape: end quote, backslash-quote, start quote.
+			const escaped = opts.appendSystemPrompt.replace(/'/g, "'\\''");
+			cmd += ` --append-system-prompt '${escaped}'`;
+		}
+		return cmd;
+	}
+	/**
+	 * Build the argv array for a headless one-shot Pi invocation.
+	 *
+	 * Returns an argv array suitable for `Bun.spawn()`. The `--print` flag causes Pi
+	 * to run the prompt and exit. Unlike Claude Code, the prompt is a positional argument
+	 * (last), not passed via `-p`.
+	 *
+	 * @param prompt - The prompt to pass as a positional argument
+	 * @param model - Optional model override
+	 * @returns Argv array for Bun.spawn
+	 */
+	buildPrintCommand(prompt: string, model?: string): string[] {
+		const cmd = ["pi", "--print"];
+		if (model !== undefined) {
+			cmd.push("--model", this.expandModel(model));
+		}
+		cmd.push(prompt);
+		return cmd;
+	}
+	/**
+	 * Deploy per-agent instructions and guards to a worktree.
+	 *
+	 * Writes up to three files:
+	 * 1. `.claude/CLAUDE.md` — agent's task-specific overlay. Skipped when overlay is undefined.
+	 * 2. `.pi/extensions/overstory-guard.ts` — Pi guard extension (always deployed).
+	 * 3. `.pi/settings.json` — Pi settings enabling the extensions directory (always deployed).
+	 *
+	 * @param worktreePath - Absolute path to the agent's git worktree
+	 * @param overlay - Overlay content to write as CLAUDE.md, or undefined for guard-only deployment
+	 * @param hooks - Agent identity, capability, worktree path, and optional quality gates
+	 */
+	async deployConfig(
+		worktreePath: string,
+		overlay: OverlayContent | undefined,
+		hooks: HooksDef,
+	): Promise<void> {
+		if (overlay) {
+			const claudeDir = join(worktreePath, ".claude");
+			await mkdir(claudeDir, { recursive: true });
+			await Bun.write(join(claudeDir, "CLAUDE.md"), overlay.content);
+		}
+		// Always deploy Pi guard extension.
+		const piExtDir = join(worktreePath, ".pi", "extensions");
+		await mkdir(piExtDir, { recursive: true });
+		await Bun.write(join(piExtDir, "overstory-guard.ts"), generatePiGuardExtension(hooks));
+		// Always deploy Pi settings pointing at the extensions directory.
+		const piDir = join(worktreePath, ".pi");
+		const settings = { extensions: ["./extensions"] };
+		await Bun.write(join(piDir, "settings.json"), `${JSON.stringify(settings, null, "\t")}\n`);
+	}
+	/**
+	 * Pi does not require beacon verification/resend.
+	 *
+	 * Claude Code's TUI sometimes swallows Enter during late initialization, so the
+	 * orchestrator resends the beacon until the pane leaves the "idle" state. Pi's TUI
+	 * does not have this issue AND its idle vs. processing states are indistinguishable
+	 * via detectReady (the header "pi v..." and status bar token counter are visible in
+	 * both states). Enabling the resend loop would spam Pi with duplicate beacon messages.
+	 */
+	requiresBeaconVerification(): boolean {
+		return false;
+	}
+	/**
+	 * Detect Pi TUI readiness from a tmux pane content snapshot.
+	 *
+	 * Pi shows a header containing "pi" and "model:" when the TUI has fully rendered.
+	 * Pi has no trust dialog phase.
+	 *
+	 * @param paneContent - Captured tmux pane content to analyze
+	 * @returns Current readiness phase
+	 */
+	detectReady(paneContent: string): ReadyState {
+		// Pi's TUI shows "pi v<version>" in the header and a status bar with
+		// a token usage indicator like "0.0%/200k" when fully rendered.
+		// Earlier detection checked for "model:" which Pi's TUI never contains.
+		const hasHeader = paneContent.includes("pi v");
+		const hasStatusBar = /\d+\.\d+%\/\d+k/.test(paneContent);
+		if (hasHeader && hasStatusBar) {
+			return { phase: "ready" };
+		}
+		return { phase: "loading" };
+	}
+	/**
+	 * Parse a Pi transcript JSONL file into normalized token usage.
+	 *
+	 * Pi JSONL format differs from Claude Code:
+	 * - Token counts are in `message_end` events with TOP-LEVEL `inputTokens` / `outputTokens`
+	 *   (not nested under message.usage)
+	 * - Model identity comes from `model_change` events with a `model` field
+	 *
+	 * Returns null if the file does not exist or cannot be parsed.
+	 *
+	 * @param path - Absolute path to the Pi transcript JSONL file
+	 * @returns Aggregated token usage, or null if unavailable
+	 */
+	async parseTranscript(path: string): Promise<TranscriptSummary | null> {
+		const file = Bun.file(path);
+		if (!(await file.exists())) {
+			return null;
+		}
+		try {
+			const text = await file.text();
+			const lines = text.split("\n").filter((l) => l.trim().length > 0);
+			let inputTokens = 0;
+			let outputTokens = 0;
+			let model = "";
+			for (const line of lines) {
+				let entry: Record<string, unknown>;
+				try {
+					entry = JSON.parse(line) as Record<string, unknown>;
+				} catch {
+					// Skip malformed lines — Pi transcripts may have partial writes.
+					continue;
+				}
+				if (entry.type === "message_end") {
+					// Pi top-level token fields (not nested under message.usage).
+					if (typeof entry.inputTokens === "number") {
+						inputTokens += entry.inputTokens;
+					}
+					if (typeof entry.outputTokens === "number") {
+						outputTokens += entry.outputTokens;
+					}
+				} else if (entry.type === "model_change") {
+					if (typeof entry.model === "string") {
+						model = entry.model;
+					}
+				}
+			}
+			return { inputTokens, outputTokens, model };
+		} catch {
+			return null;
+		}
+	}
+	/**
+	 * Build runtime-specific environment variables for model/provider routing.
+	 *
+	 * Returns the provider environment variables from the resolved model, or an empty
+	 * object if none are set.
+	 *
+	 * @param model - Resolved model with optional provider env vars
+	 * @returns Environment variable map (may be empty)
+	 */
+	buildEnv(model: ResolvedModel): Record<string, string> {
+		return model.env ?? {};
+	}
+}

package/src/runtimes/registry.test.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { describe, expect, it } from "bun:test";
 import type { OverstoryConfig } from "../types.ts";
 import { ClaudeRuntime } from "./claude.ts";
+import { PiRuntime } from "./pi.ts";
 import { getRuntime } from "./registry.ts";
 describe("getRuntime", () => {
@@ -50,4 +51,36 @@ describe("getRuntime", () => {
 		const b = getRuntime();
 		expect(a).not.toBe(b);
 	});
+	it("returns PiRuntime when name is 'pi'", () => {
+		const runtime = getRuntime("pi");
+		expect(runtime).toBeInstanceOf(PiRuntime);
+		expect(runtime.id).toBe("pi");
+	});
+	it("passes Pi config from OverstoryConfig to PiRuntime", () => {
+		const config = {
+			runtime: {
+				default: "pi",
+				pi: {
+					provider: "amazon-bedrock",
+					modelMap: {
+						opus: "amazon-bedrock/us.anthropic.claude-opus-4-6-v1",
+					},
+				},
+			},
+		} as unknown as OverstoryConfig;
+		const runtime = getRuntime(undefined, config) as PiRuntime;
+		expect(runtime).toBeInstanceOf(PiRuntime);
+		// Verify the config was applied by testing model expansion
+		expect(runtime.expandModel("opus")).toBe("amazon-bedrock/us.anthropic.claude-opus-4-6-v1");
+	});
+	it("Pi runtime uses defaults when no Pi config in OverstoryConfig", () => {
+		const config = { runtime: { default: "pi" } } as OverstoryConfig;
+		const runtime = getRuntime(undefined, config) as PiRuntime;
+		expect(runtime).toBeInstanceOf(PiRuntime);
+		// Should use default anthropic mappings
+		expect(runtime.expandModel("sonnet")).toBe("anthropic/claude-sonnet-4-6");
+	});
 });

package/src/runtimes/registry.ts CHANGED Viewed

@@ -3,10 +3,14 @@
 import type { OverstoryConfig } from "../types.ts";
 import { ClaudeRuntime } from "./claude.ts";
+import { PiRuntime } from "./pi.ts";
 import type { AgentRuntime } from "./types.ts";
-/** Registry of available runtime adapters (name → factory). */
-const runtimes = new Map<string, () => AgentRuntime>([["claude", () => new ClaudeRuntime()]]);
+/** Registry of config-independent runtime adapters (name → factory). */
+const runtimes = new Map<string, () => AgentRuntime>([
+	["claude", () => new ClaudeRuntime()],
+	["pi", () => new PiRuntime()],
+]);
 /**
  * Resolve a runtime adapter by name.
@@ -16,6 +20,9 @@ const runtimes = new Map<string, () => AgentRuntime>([["claude", () => new Claud
  * 2. `config.runtime.default` (if config is provided)
  * 3. `"claude"` (hardcoded fallback)
  *
+ * Special cases:
+ * - Pi runtime receives `config.runtime.pi` for model alias expansion.
+ *
  * @param name - Runtime name to resolve (e.g. "claude"). Omit to use config default.
  * @param config - Overstory config for reading the default runtime.
  * @throws {Error} If the resolved runtime name is not registered.
@@ -23,6 +30,12 @@ const runtimes = new Map<string, () => AgentRuntime>([["claude", () => new Claud
  */
 export function getRuntime(name?: string, config?: OverstoryConfig): AgentRuntime {
 	const runtimeName = name ?? config?.runtime?.default ?? "claude";
+	// Pi runtime needs config for model alias expansion.
+	if (runtimeName === "pi") {
+		return new PiRuntime(config?.runtime?.pi);
+	}
 	const factory = runtimes.get(runtimeName);
 	if (!factory) {
 		throw new Error(

package/src/runtimes/types.ts CHANGED Viewed

@@ -66,6 +66,48 @@ export interface TranscriptSummary {
 	model: string;
 }
+// === RPC Connection ===
+/**
+ * Reported state of a connected agent process.
+ * Used by RuntimeConnection.getState() to poll agent activity without tmux.
+ */
+export type ConnectionState = {
+	status: "idle" | "working" | "error";
+	/** Tool currently executing, if status is "working". */
+	currentTool?: string;
+};
+/**
+ * Handle to spawned agent process I/O streams for RPC communication.
+ * Compatible with Bun.spawn output when configured with stdin/stdout pipe.
+ */
+export interface RpcProcessHandle {
+	readonly stdin: {
+		write(data: string | Uint8Array): number | Promise<number>;
+	};
+	readonly stdout: ReadableStream<Uint8Array>;
+}
+/**
+ * Lifecycle interface for runtimes supporting direct RPC.
+ * When AgentRuntime.connect() exists, the orchestrator bypasses tmux for
+ * mail delivery (followUp), shutdown (abort), and health checks (getState).
+ * Pi implements via JSON-RPC 2.0 over stdin/stdout.
+ */
+export interface RuntimeConnection {
+	/** Send initial prompt after spawn. */
+	sendPrompt(text: string): Promise<void>;
+	/** Send follow-up message — replaces tmux send-keys. */
+	followUp(text: string): Promise<void>;
+	/** Clean shutdown — replaces SIGTERM. */
+	abort(): Promise<void>;
+	/** Query current state — replaces tmux capture-pane. */
+	getState(): Promise<ConnectionState>;
+	/** Release connection resources. */
+	close(): void;
+}
 // === Runtime Interface ===
 /**
@@ -122,4 +164,25 @@ export interface AgentRuntime {
 	 * the provider's authTokenEnv directly.
 	 */
 	buildEnv(model: ResolvedModel): Record<string, string>;
+	/**
+	 * Whether this runtime requires the beacon verification/resend loop after initial send.
+	 *
+	 * Claude Code's TUI sometimes swallows Enter during late initialization, so the
+	 * orchestrator resends the beacon if the pane still appears idle (overstory-3271).
+	 * Pi's TUI does not exhibit this behavior AND its idle/processing states are
+	 * indistinguishable via detectReady (both show the header and status bar), so
+	 * the resend loop would spam Pi with duplicate startup messages.
+	 *
+	 * Runtimes that omit this method (or return true) get the resend loop.
+	 * Pi returns false to skip it.
+	 */
+	requiresBeaconVerification?(): boolean;
+	/**
+	 * Establish direct RPC connection to running agent process.
+	 * Runtimes without RPC (Claude, Codex) omit this method.
+	 * Orchestrator checks `if (runtime.connect)` before calling, falls back to tmux when absent.
+	 */
+	connect?(process: RpcProcessHandle): RuntimeConnection;
 }

package/src/schema-consistency.test.ts CHANGED Viewed

@@ -12,7 +12,7 @@
 import { Database } from "bun:sqlite";
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import { mkdtemp, rm } from "node:fs/promises";
+import { mkdtemp } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { createEventStore } from "./events/store.ts";
@@ -21,6 +21,8 @@ import { createMergeQueue } from "./merge/queue.ts";
 import { createMetricsStore } from "./metrics/store.ts";
 import { createSessionStore } from "./sessions/store.ts";
+import { cleanupTempDir } from "./test-helpers.ts";
 /** Extract sorted column names from a table via PRAGMA table_info(). */
 function getTableColumns(db: Database, tableName: string): string[] {
 	const rows = db.prepare(`PRAGMA table_info(${tableName})`).all() as Array<{ name: string }>;
@@ -35,7 +37,7 @@ describe("SQL schema consistency", () => {
 	});
 	afterEach(async () => {
-		await rm(tmpDir, { recursive: true, force: true });
+		await cleanupTempDir(tmpDir);
 	});
 	describe("SessionStore", () => {
@@ -65,6 +67,7 @@ describe("SQL schema consistency", () => {
 				"state",
 				"task_id",
 				"tmux_session",
+				"transcript_path",
 				"worktree_path",
 			].sort();

package/src/sessions/compat.test.ts CHANGED Viewed

@@ -6,9 +6,10 @@
  */
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { mkdtemp, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
+import { cleanupTempDir } from "../test-helpers.ts";
 import { openSessionStore } from "./compat.ts";
 let tempDir: string;
@@ -22,7 +23,7 @@ beforeEach(async () => {
 });
 afterEach(async () => {
-	await rm(tempDir, { recursive: true, force: true });
+	await cleanupTempDir(tempDir);
 });
 /** Create a sessions.json with the given entries. */

package/src/sessions/compat.ts CHANGED Viewed

@@ -36,6 +36,7 @@ function normalizeSession(raw: Record<string, unknown>): AgentSession {
 		lastActivity: raw.lastActivity as string,
 		escalationLevel: (raw.escalationLevel as number) ?? 0,
 		stalledSince: (raw.stalledSince as string | null) ?? null,
+		transcriptPath: (raw.transcriptPath as string | null) ?? null,
 	};
 }

package/src/sessions/store.test.ts CHANGED Viewed

@@ -6,9 +6,10 @@
  */
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import { mkdtemp, rm } from "node:fs/promises";
+import { mkdtemp } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
+import { cleanupTempDir } from "../test-helpers.ts";
 import type { AgentSession, AgentState, InsertRun, Run, RunStore } from "../types.ts";
 import { createRunStore, createSessionStore, type SessionStore } from "./store.ts";
@@ -24,7 +25,7 @@ beforeEach(async () => {
 afterEach(async () => {
 	store.close();
-	await rm(tempDir, { recursive: true, force: true });
+	await cleanupTempDir(tempDir);
 });
 /** Helper to create an AgentSession with optional overrides. */
@@ -46,6 +47,7 @@ function makeSession(overrides: Partial<AgentSession> = {}): AgentSession {
 		lastActivity: "2026-01-15T10:00:00.000Z",
 		escalationLevel: 0,
 		stalledSince: null,
+		transcriptPath: null,
 		...overrides,
 	};
 }
@@ -137,6 +139,36 @@ describe("upsert", () => {
 		const badSession = { ...session, state: "invalid" as AgentState };
 		expect(() => store.upsert(badSession)).toThrow();
 	});
+	test("handles null transcriptPath", () => {
+		const session = makeSession({ transcriptPath: null });
+		store.upsert(session);
+		const result = store.getByName("test-agent");
+		expect(result?.transcriptPath).toBeNull();
+	});
+	test("transcriptPath roundtrips correctly", () => {
+		const session = makeSession({ transcriptPath: "/home/user/.pi/sessions/abc.jsonl" });
+		store.upsert(session);
+		const result = store.getByName("test-agent");
+		expect(result?.transcriptPath).toBe("/home/user/.pi/sessions/abc.jsonl");
+	});
+});
+// === updateTranscriptPath ===
+describe("updateTranscriptPath", () => {
+	test("sets transcript path for an existing session", () => {
+		store.upsert(makeSession({ transcriptPath: null }));
+		store.updateTranscriptPath("test-agent", "/tmp/transcript.jsonl");
+		const result = store.getByName("test-agent");
+		expect(result?.transcriptPath).toBe("/tmp/transcript.jsonl");
+	});
+	test("is a no-op for nonexistent agent", () => {
+		// Should not throw
+		store.updateTranscriptPath("nonexistent", "/tmp/transcript.jsonl");
+	});
 });
 // === getByName ===

package/src/sessions/store.ts CHANGED Viewed

@@ -28,6 +28,8 @@ export interface SessionStore {
 	updateLastActivity(agentName: string): void;
 	/** Update escalation level and stalled timestamp. */
 	updateEscalation(agentName: string, level: number, stalledSince: string | null): void;
+	/** Update the transcript path for a session. */
+	updateTranscriptPath(agentName: string, path: string): void;
 	/** Remove a session by agent name. */
 	remove(agentName: string): void;
 	/** Purge sessions matching criteria. Returns count of deleted rows. */
@@ -54,6 +56,7 @@ interface SessionRow {
 	last_activity: string;
 	escalation_level: number;
 	stalled_since: string | null;
+	transcript_path: string | null;
 }
 /** Row shape for runs table as stored in SQLite (snake_case columns). */
@@ -84,7 +87,8 @@ CREATE TABLE IF NOT EXISTS sessions (
   started_at TEXT NOT NULL,
   last_activity TEXT NOT NULL,
   escalation_level INTEGER NOT NULL DEFAULT 0,
-  stalled_since TEXT
+  stalled_since TEXT,
+  transcript_path TEXT
 )`;
 const CREATE_INDEXES = `
@@ -124,6 +128,7 @@ function rowToSession(row: SessionRow): AgentSession {
 		lastActivity: row.last_activity,
 		escalationLevel: row.escalation_level,
 		stalledSince: row.stalled_since,
+		transcriptPath: row.transcript_path,
 	};
 }
@@ -139,6 +144,18 @@ function rowToRun(row: RunRow): Run {
 	};
 }
+/**
+ * Migrate an existing sessions table to add the transcript_path column.
+ * Safe to call multiple times — only adds the column if it does not exist.
+ */
+function migrateAddTranscriptPath(db: Database): void {
+	const rows = db.prepare("PRAGMA table_info(sessions)").all() as Array<{ name: string }>;
+	const existingColumns = new Set(rows.map((r) => r.name));
+	if (!existingColumns.has("transcript_path")) {
+		db.exec("ALTER TABLE sessions ADD COLUMN transcript_path TEXT");
+	}
+}
 /**
  * Migrate an existing sessions table from bead_id to task_id column.
  * Safe to call multiple times — only renames if bead_id exists and task_id does not.
@@ -173,6 +190,8 @@ export function createSessionStore(dbPath: string): SessionStore {
 	// Migrate: rename bead_id → task_id on existing tables
 	migrateBeadIdToTaskId(db);
+	// Migrate: add transcript_path column to existing tables
+	migrateAddTranscriptPath(db);
 	// Prepare statements for frequent operations
 	const upsertStmt = db.prepare<
@@ -194,16 +213,17 @@ export function createSessionStore(dbPath: string): SessionStore {
 			$last_activity: string;
 			$escalation_level: number;
 			$stalled_since: string | null;
+			$transcript_path: string | null;
 		}
 	>(`
 		INSERT INTO sessions
 			(id, agent_name, capability, worktree_path, branch_name, task_id,
 			 tmux_session, state, pid, parent_agent, depth, run_id,
-			 started_at, last_activity, escalation_level, stalled_since)
+			 started_at, last_activity, escalation_level, stalled_since, transcript_path)
 		VALUES
 			($id, $agent_name, $capability, $worktree_path, $branch_name, $task_id,
 			 $tmux_session, $state, $pid, $parent_agent, $depth, $run_id,
-			 $started_at, $last_activity, $escalation_level, $stalled_since)
+			 $started_at, $last_activity, $escalation_level, $stalled_since, $transcript_path)
 		ON CONFLICT(agent_name) DO UPDATE SET
 			id = excluded.id,
 			capability = excluded.capability,
@@ -219,7 +239,8 @@ export function createSessionStore(dbPath: string): SessionStore {
 			started_at = excluded.started_at,
 			last_activity = excluded.last_activity,
 			escalation_level = excluded.escalation_level,
-			stalled_since = excluded.stalled_since
+			stalled_since = excluded.stalled_since,
+			transcript_path = excluded.transcript_path
 	`);
 	const getByNameStmt = db.prepare<SessionRow, { $agent_name: string }>(`
@@ -268,6 +289,13 @@ export function createSessionStore(dbPath: string): SessionStore {
 		DELETE FROM sessions WHERE agent_name = $agent_name
 	`);
+	const updateTranscriptPathStmt = db.prepare<
+		void,
+		{ $agent_name: string; $transcript_path: string }
+	>(`
+		UPDATE sessions SET transcript_path = $transcript_path WHERE agent_name = $agent_name
+	`);
 	return {
 		upsert(session: AgentSession): void {
 			upsertStmt.run({
@@ -287,6 +315,7 @@ export function createSessionStore(dbPath: string): SessionStore {
 				$last_activity: session.lastActivity,
 				$escalation_level: session.escalationLevel,
 				$stalled_since: session.stalledSince,
+				$transcript_path: session.transcriptPath,
 			});
 		},
@@ -334,6 +363,10 @@ export function createSessionStore(dbPath: string): SessionStore {
 			});
 		},
+		updateTranscriptPath(agentName: string, path: string): void {
+			updateTranscriptPathStmt.run({ $agent_name: agentName, $transcript_path: path });
+		},
 		remove(agentName: string): void {
 			removeStmt.run({ $agent_name: agentName });
 		},

package/src/test-helpers.ts CHANGED Viewed

@@ -95,9 +95,28 @@ export async function getDefaultBranch(repoDir: string): Promise<string> {
 /**
  * Remove a temp directory. Safe to call even if the directory doesn't exist.
+ *
+ * On Windows, SQLite WAL/SHM file handles may linger briefly after db.close(),
+ * causing EBUSY errors on immediate rm(). Retries with exponential backoff
+ * (up to ~1.5s total) to handle this OS-level timing issue.
  */
 export async function cleanupTempDir(dir: string): Promise<void> {
-	await rm(dir, { recursive: true, force: true });
+	const maxRetries = process.platform === "win32" ? 5 : 0;
+	for (let attempt = 0; attempt <= maxRetries; attempt++) {
+		try {
+			await rm(dir, { recursive: true, force: true });
+			return;
+		} catch (err: unknown) {
+			const code = (err as NodeJS.ErrnoException).code;
+			if (code === "EBUSY" && attempt < maxRetries) {
+				// Exponential backoff: 50, 100, 200, 400, 800ms
+				await Bun.sleep(50 * 2 ** attempt);
+				continue;
+			}
+			// Non-EBUSY or final attempt: swallow (temp dirs are cleaned by OS anyway)
+			if (code !== "ENOENT") return;
+		}
+	}
 }
 /**