@os-eco/overstory-cli 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +7 -6
  2. package/agents/builder.md +1 -1
  3. package/agents/coordinator.md +12 -11
  4. package/agents/lead.md +6 -6
  5. package/agents/monitor.md +4 -4
  6. package/agents/reviewer.md +1 -1
  7. package/agents/scout.md +5 -5
  8. package/agents/supervisor.md +36 -32
  9. package/package.json +1 -1
  10. package/src/agents/guard-rules.ts +97 -0
  11. package/src/agents/hooks-deployer.test.ts +6 -5
  12. package/src/agents/hooks-deployer.ts +7 -90
  13. package/src/agents/identity.test.ts +3 -2
  14. package/src/agents/manifest.test.ts +4 -3
  15. package/src/agents/overlay.test.ts +10 -9
  16. package/src/agents/overlay.ts +5 -5
  17. package/src/commands/agents.test.ts +10 -4
  18. package/src/commands/clean.test.ts +3 -0
  19. package/src/commands/completions.test.ts +8 -5
  20. package/src/commands/completions.ts +38 -2
  21. package/src/commands/coordinator.test.ts +1 -0
  22. package/src/commands/coordinator.ts +15 -11
  23. package/src/commands/costs.test.ts +9 -3
  24. package/src/commands/dashboard.test.ts +265 -6
  25. package/src/commands/dashboard.ts +367 -64
  26. package/src/commands/doctor.test.ts +3 -2
  27. package/src/commands/errors.test.ts +3 -2
  28. package/src/commands/feed.test.ts +3 -2
  29. package/src/commands/feed.ts +2 -29
  30. package/src/commands/init.test.ts +1 -2
  31. package/src/commands/init.ts +1 -8
  32. package/src/commands/inspect.test.ts +17 -2
  33. package/src/commands/log.test.ts +262 -8
  34. package/src/commands/log.ts +232 -110
  35. package/src/commands/logs.test.ts +3 -2
  36. package/src/commands/mail.test.ts +8 -2
  37. package/src/commands/metrics.test.ts +4 -3
  38. package/src/commands/monitor.ts +15 -11
  39. package/src/commands/nudge.test.ts +4 -2
  40. package/src/commands/prime.test.ts +4 -2
  41. package/src/commands/prime.ts +6 -2
  42. package/src/commands/replay.test.ts +3 -2
  43. package/src/commands/run.test.ts +3 -1
  44. package/src/commands/sling.test.ts +142 -1
  45. package/src/commands/sling.ts +145 -24
  46. package/src/commands/status.test.ts +9 -8
  47. package/src/commands/stop.test.ts +1 -0
  48. package/src/commands/supervisor.ts +19 -12
  49. package/src/commands/trace.test.ts +4 -2
  50. package/src/commands/watch.test.ts +3 -2
  51. package/src/commands/worktree.test.ts +9 -0
  52. package/src/config.test.ts +3 -3
  53. package/src/config.ts +29 -0
  54. package/src/doctor/agents.test.ts +3 -2
  55. package/src/doctor/consistency.test.ts +14 -0
  56. package/src/doctor/logs.test.ts +3 -2
  57. package/src/doctor/structure.test.ts +3 -2
  58. package/src/e2e/init-sling-lifecycle.test.ts +3 -5
  59. package/src/index.ts +3 -1
  60. package/src/logging/color.ts +1 -1
  61. package/src/logging/format.test.ts +110 -0
  62. package/src/logging/format.ts +42 -1
  63. package/src/logging/logger.test.ts +3 -2
  64. package/src/mail/broadcast.test.ts +1 -0
  65. package/src/mail/client.test.ts +3 -2
  66. package/src/mail/store.test.ts +3 -2
  67. package/src/merge/queue.test.ts +3 -2
  68. package/src/merge/resolver.test.ts +39 -0
  69. package/src/merge/resolver.ts +24 -5
  70. package/src/mulch/client.test.ts +63 -2
  71. package/src/mulch/client.ts +62 -1
  72. package/src/runtimes/claude.test.ts +5 -4
  73. package/src/runtimes/pi-guards.test.ts +457 -0
  74. package/src/runtimes/pi-guards.ts +349 -0
  75. package/src/runtimes/pi.test.ts +620 -0
  76. package/src/runtimes/pi.ts +244 -0
  77. package/src/runtimes/registry.test.ts +33 -0
  78. package/src/runtimes/registry.ts +15 -2
  79. package/src/runtimes/types.ts +63 -0
  80. package/src/schema-consistency.test.ts +5 -2
  81. package/src/sessions/compat.test.ts +3 -2
  82. package/src/sessions/compat.ts +1 -0
  83. package/src/sessions/store.test.ts +34 -2
  84. package/src/sessions/store.ts +37 -4
  85. package/src/test-helpers.ts +20 -1
  86. package/src/types.ts +17 -0
  87. package/src/watchdog/daemon.test.ts +11 -7
  88. package/src/watchdog/daemon.ts +1 -1
  89. package/src/watchdog/health.test.ts +1 -0
  90. package/src/watchdog/triage.test.ts +3 -2
  91. package/src/watchdog/triage.ts +14 -4
@@ -0,0 +1,244 @@
1
+ // Pi runtime adapter for overstory's AgentRuntime interface.
2
+ // Implements the AgentRuntime contract for the `pi` CLI (Mario Zechner's Pi coding agent).
3
+
4
+ import { mkdir } from "node:fs/promises";
5
+ import { join } from "node:path";
6
+ import type { PiRuntimeConfig, ResolvedModel } from "../types.ts";
7
+ import { generatePiGuardExtension } from "./pi-guards.ts";
8
+ import type {
9
+ AgentRuntime,
10
+ HooksDef,
11
+ OverlayContent,
12
+ ReadyState,
13
+ SpawnOpts,
14
+ TranscriptSummary,
15
+ } from "./types.ts";
16
+
17
+ /** Default Pi runtime config used when no config is provided. */
18
+ const DEFAULT_PI_CONFIG: PiRuntimeConfig = {
19
+ provider: "anthropic",
20
+ modelMap: {
21
+ opus: "anthropic/claude-opus-4-6",
22
+ sonnet: "anthropic/claude-sonnet-4-6",
23
+ haiku: "anthropic/claude-haiku-4-5",
24
+ },
25
+ };
26
+
27
+ /**
28
+ * Pi runtime adapter.
29
+ *
30
+ * Implements AgentRuntime for the `pi` CLI (Mario Zechner's Pi coding agent).
31
+ * Security is enforced via Pi guard extensions rather than permission-mode flags —
32
+ * Pi has no --permission-mode equivalent.
33
+ */
34
+ export class PiRuntime implements AgentRuntime {
35
+ /** Unique identifier for this runtime. */
36
+ readonly id = "pi";
37
+
38
+ /** Relative path to the instruction file within a worktree. Pi reads .claude/CLAUDE.md natively. */
39
+ readonly instructionPath = ".claude/CLAUDE.md";
40
+
41
+ private readonly config: PiRuntimeConfig;
42
+
43
+ constructor(config?: PiRuntimeConfig) {
44
+ this.config = config ?? DEFAULT_PI_CONFIG;
45
+ }
46
+
47
+ /**
48
+ * Expand a model alias to a provider-qualified model ID.
49
+ *
50
+ * 1. If model contains "/" → already qualified, pass through
51
+ * 2. If model is in modelMap → return the mapped value
52
+ * 3. Otherwise → return `${provider}/${model}`
53
+ */
54
+ expandModel(model: string): string {
55
+ if (model.includes("/")) return model;
56
+ const mapped = this.config.modelMap[model];
57
+ if (mapped) return mapped;
58
+ return `${this.config.provider}/${model}`;
59
+ }
60
+
61
+ /**
62
+ * Build the shell command string to spawn an interactive Pi agent.
63
+ *
64
+ * Maps SpawnOpts to the `pi` CLI flags:
65
+ * - `model` → `--model <model>`
66
+ * - `permissionMode` is accepted but NOT mapped — Pi has no permission-mode flag.
67
+ * Security is enforced via guard extensions deployed by deployConfig().
68
+ * - `appendSystemPrompt` → `--append-system-prompt '<escaped>'` (POSIX single-quote escaping)
69
+ *
70
+ * The `cwd` and `env` fields are handled by the tmux session creator, not embedded here.
71
+ *
72
+ * @param opts - Spawn options (model, appendSystemPrompt; permissionMode is ignored)
73
+ * @returns Shell command string suitable for tmux new-session -c
74
+ */
75
+ buildSpawnCommand(opts: SpawnOpts): string {
76
+ let cmd = `pi --model ${this.expandModel(opts.model)}`;
77
+
78
+ if (opts.appendSystemPrompt) {
79
+ // POSIX single-quote escape: end quote, backslash-quote, start quote.
80
+ const escaped = opts.appendSystemPrompt.replace(/'/g, "'\\''");
81
+ cmd += ` --append-system-prompt '${escaped}'`;
82
+ }
83
+
84
+ return cmd;
85
+ }
86
+
87
+ /**
88
+ * Build the argv array for a headless one-shot Pi invocation.
89
+ *
90
+ * Returns an argv array suitable for `Bun.spawn()`. The `--print` flag causes Pi
91
+ * to run the prompt and exit. Unlike Claude Code, the prompt is a positional argument
92
+ * (last), not passed via `-p`.
93
+ *
94
+ * @param prompt - The prompt to pass as a positional argument
95
+ * @param model - Optional model override
96
+ * @returns Argv array for Bun.spawn
97
+ */
98
+ buildPrintCommand(prompt: string, model?: string): string[] {
99
+ const cmd = ["pi", "--print"];
100
+ if (model !== undefined) {
101
+ cmd.push("--model", this.expandModel(model));
102
+ }
103
+ cmd.push(prompt);
104
+ return cmd;
105
+ }
106
+
107
+ /**
108
+ * Deploy per-agent instructions and guards to a worktree.
109
+ *
110
+ * Writes up to three files:
111
+ * 1. `.claude/CLAUDE.md` — agent's task-specific overlay. Skipped when overlay is undefined.
112
+ * 2. `.pi/extensions/overstory-guard.ts` — Pi guard extension (always deployed).
113
+ * 3. `.pi/settings.json` — Pi settings enabling the extensions directory (always deployed).
114
+ *
115
+ * @param worktreePath - Absolute path to the agent's git worktree
116
+ * @param overlay - Overlay content to write as CLAUDE.md, or undefined for guard-only deployment
117
+ * @param hooks - Agent identity, capability, worktree path, and optional quality gates
118
+ */
119
+ async deployConfig(
120
+ worktreePath: string,
121
+ overlay: OverlayContent | undefined,
122
+ hooks: HooksDef,
123
+ ): Promise<void> {
124
+ if (overlay) {
125
+ const claudeDir = join(worktreePath, ".claude");
126
+ await mkdir(claudeDir, { recursive: true });
127
+ await Bun.write(join(claudeDir, "CLAUDE.md"), overlay.content);
128
+ }
129
+
130
+ // Always deploy Pi guard extension.
131
+ const piExtDir = join(worktreePath, ".pi", "extensions");
132
+ await mkdir(piExtDir, { recursive: true });
133
+ await Bun.write(join(piExtDir, "overstory-guard.ts"), generatePiGuardExtension(hooks));
134
+
135
+ // Always deploy Pi settings pointing at the extensions directory.
136
+ const piDir = join(worktreePath, ".pi");
137
+ const settings = { extensions: ["./extensions"] };
138
+ await Bun.write(join(piDir, "settings.json"), `${JSON.stringify(settings, null, "\t")}\n`);
139
+ }
140
+
141
+ /**
142
+ * Pi does not require beacon verification/resend.
143
+ *
144
+ * Claude Code's TUI sometimes swallows Enter during late initialization, so the
145
+ * orchestrator resends the beacon until the pane leaves the "idle" state. Pi's TUI
146
+ * does not have this issue AND its idle vs. processing states are indistinguishable
147
+ * via detectReady (the header "pi v..." and status bar token counter are visible in
148
+ * both states). Enabling the resend loop would spam Pi with duplicate beacon messages.
149
+ */
150
+ requiresBeaconVerification(): boolean {
151
+ return false;
152
+ }
153
+
154
+ /**
155
+ * Detect Pi TUI readiness from a tmux pane content snapshot.
156
+ *
157
+ * Pi shows a header containing "pi" and "model:" when the TUI has fully rendered.
158
+ * Pi has no trust dialog phase.
159
+ *
160
+ * @param paneContent - Captured tmux pane content to analyze
161
+ * @returns Current readiness phase
162
+ */
163
+ detectReady(paneContent: string): ReadyState {
164
+ // Pi's TUI shows "pi v<version>" in the header and a status bar with
165
+ // a token usage indicator like "0.0%/200k" when fully rendered.
166
+ // Earlier detection checked for "model:" which Pi's TUI never contains.
167
+ const hasHeader = paneContent.includes("pi v");
168
+ const hasStatusBar = /\d+\.\d+%\/\d+k/.test(paneContent);
169
+ if (hasHeader && hasStatusBar) {
170
+ return { phase: "ready" };
171
+ }
172
+ return { phase: "loading" };
173
+ }
174
+
175
+ /**
176
+ * Parse a Pi transcript JSONL file into normalized token usage.
177
+ *
178
+ * Pi JSONL format differs from Claude Code:
179
+ * - Token counts are in `message_end` events with TOP-LEVEL `inputTokens` / `outputTokens`
180
+ * (not nested under message.usage)
181
+ * - Model identity comes from `model_change` events with a `model` field
182
+ *
183
+ * Returns null if the file does not exist or cannot be parsed.
184
+ *
185
+ * @param path - Absolute path to the Pi transcript JSONL file
186
+ * @returns Aggregated token usage, or null if unavailable
187
+ */
188
+ async parseTranscript(path: string): Promise<TranscriptSummary | null> {
189
+ const file = Bun.file(path);
190
+ if (!(await file.exists())) {
191
+ return null;
192
+ }
193
+
194
+ try {
195
+ const text = await file.text();
196
+ const lines = text.split("\n").filter((l) => l.trim().length > 0);
197
+
198
+ let inputTokens = 0;
199
+ let outputTokens = 0;
200
+ let model = "";
201
+
202
+ for (const line of lines) {
203
+ let entry: Record<string, unknown>;
204
+ try {
205
+ entry = JSON.parse(line) as Record<string, unknown>;
206
+ } catch {
207
+ // Skip malformed lines — Pi transcripts may have partial writes.
208
+ continue;
209
+ }
210
+
211
+ if (entry.type === "message_end") {
212
+ // Pi top-level token fields (not nested under message.usage).
213
+ if (typeof entry.inputTokens === "number") {
214
+ inputTokens += entry.inputTokens;
215
+ }
216
+ if (typeof entry.outputTokens === "number") {
217
+ outputTokens += entry.outputTokens;
218
+ }
219
+ } else if (entry.type === "model_change") {
220
+ if (typeof entry.model === "string") {
221
+ model = entry.model;
222
+ }
223
+ }
224
+ }
225
+
226
+ return { inputTokens, outputTokens, model };
227
+ } catch {
228
+ return null;
229
+ }
230
+ }
231
+
232
+ /**
233
+ * Build runtime-specific environment variables for model/provider routing.
234
+ *
235
+ * Returns the provider environment variables from the resolved model, or an empty
236
+ * object if none are set.
237
+ *
238
+ * @param model - Resolved model with optional provider env vars
239
+ * @returns Environment variable map (may be empty)
240
+ */
241
+ buildEnv(model: ResolvedModel): Record<string, string> {
242
+ return model.env ?? {};
243
+ }
244
+ }
@@ -1,6 +1,7 @@
1
1
  import { describe, expect, it } from "bun:test";
2
2
  import type { OverstoryConfig } from "../types.ts";
3
3
  import { ClaudeRuntime } from "./claude.ts";
4
+ import { PiRuntime } from "./pi.ts";
4
5
  import { getRuntime } from "./registry.ts";
5
6
 
6
7
  describe("getRuntime", () => {
@@ -50,4 +51,36 @@ describe("getRuntime", () => {
50
51
  const b = getRuntime();
51
52
  expect(a).not.toBe(b);
52
53
  });
54
+
55
+ it("returns PiRuntime when name is 'pi'", () => {
56
+ const runtime = getRuntime("pi");
57
+ expect(runtime).toBeInstanceOf(PiRuntime);
58
+ expect(runtime.id).toBe("pi");
59
+ });
60
+
61
+ it("passes Pi config from OverstoryConfig to PiRuntime", () => {
62
+ const config = {
63
+ runtime: {
64
+ default: "pi",
65
+ pi: {
66
+ provider: "amazon-bedrock",
67
+ modelMap: {
68
+ opus: "amazon-bedrock/us.anthropic.claude-opus-4-6-v1",
69
+ },
70
+ },
71
+ },
72
+ } as unknown as OverstoryConfig;
73
+ const runtime = getRuntime(undefined, config) as PiRuntime;
74
+ expect(runtime).toBeInstanceOf(PiRuntime);
75
+ // Verify the config was applied by testing model expansion
76
+ expect(runtime.expandModel("opus")).toBe("amazon-bedrock/us.anthropic.claude-opus-4-6-v1");
77
+ });
78
+
79
+ it("Pi runtime uses defaults when no Pi config in OverstoryConfig", () => {
80
+ const config = { runtime: { default: "pi" } } as OverstoryConfig;
81
+ const runtime = getRuntime(undefined, config) as PiRuntime;
82
+ expect(runtime).toBeInstanceOf(PiRuntime);
83
+ // Should use default anthropic mappings
84
+ expect(runtime.expandModel("sonnet")).toBe("anthropic/claude-sonnet-4-6");
85
+ });
53
86
  });
@@ -3,10 +3,14 @@
3
3
 
4
4
  import type { OverstoryConfig } from "../types.ts";
5
5
  import { ClaudeRuntime } from "./claude.ts";
6
+ import { PiRuntime } from "./pi.ts";
6
7
  import type { AgentRuntime } from "./types.ts";
7
8
 
8
- /** Registry of available runtime adapters (name → factory). */
9
- const runtimes = new Map<string, () => AgentRuntime>([["claude", () => new ClaudeRuntime()]]);
9
+ /** Registry of config-independent runtime adapters (name → factory). */
10
+ const runtimes = new Map<string, () => AgentRuntime>([
11
+ ["claude", () => new ClaudeRuntime()],
12
+ ["pi", () => new PiRuntime()],
13
+ ]);
10
14
 
11
15
  /**
12
16
  * Resolve a runtime adapter by name.
@@ -16,6 +20,9 @@ const runtimes = new Map<string, () => AgentRuntime>([["claude", () => new Claud
16
20
  * 2. `config.runtime.default` (if config is provided)
17
21
  * 3. `"claude"` (hardcoded fallback)
18
22
  *
23
+ * Special cases:
24
+ * - Pi runtime receives `config.runtime.pi` for model alias expansion.
25
+ *
19
26
  * @param name - Runtime name to resolve (e.g. "claude"). Omit to use config default.
20
27
  * @param config - Overstory config for reading the default runtime.
21
28
  * @throws {Error} If the resolved runtime name is not registered.
@@ -23,6 +30,12 @@ const runtimes = new Map<string, () => AgentRuntime>([["claude", () => new Claud
23
30
  */
24
31
  export function getRuntime(name?: string, config?: OverstoryConfig): AgentRuntime {
25
32
  const runtimeName = name ?? config?.runtime?.default ?? "claude";
33
+
34
+ // Pi runtime needs config for model alias expansion.
35
+ if (runtimeName === "pi") {
36
+ return new PiRuntime(config?.runtime?.pi);
37
+ }
38
+
26
39
  const factory = runtimes.get(runtimeName);
27
40
  if (!factory) {
28
41
  throw new Error(
@@ -66,6 +66,48 @@ export interface TranscriptSummary {
66
66
  model: string;
67
67
  }
68
68
 
69
+ // === RPC Connection ===
70
+
71
+ /**
72
+ * Reported state of a connected agent process.
73
+ * Used by RuntimeConnection.getState() to poll agent activity without tmux.
74
+ */
75
+ export type ConnectionState = {
76
+ status: "idle" | "working" | "error";
77
+ /** Tool currently executing, if status is "working". */
78
+ currentTool?: string;
79
+ };
80
+
81
+ /**
82
+ * Handle to spawned agent process I/O streams for RPC communication.
83
+ * Compatible with Bun.spawn output when configured with stdin/stdout pipe.
84
+ */
85
+ export interface RpcProcessHandle {
86
+ readonly stdin: {
87
+ write(data: string | Uint8Array): number | Promise<number>;
88
+ };
89
+ readonly stdout: ReadableStream<Uint8Array>;
90
+ }
91
+
92
+ /**
93
+ * Lifecycle interface for runtimes supporting direct RPC.
94
+ * When AgentRuntime.connect() exists, the orchestrator bypasses tmux for
95
+ * mail delivery (followUp), shutdown (abort), and health checks (getState).
96
+ * Pi implements via JSON-RPC 2.0 over stdin/stdout.
97
+ */
98
+ export interface RuntimeConnection {
99
+ /** Send initial prompt after spawn. */
100
+ sendPrompt(text: string): Promise<void>;
101
+ /** Send follow-up message — replaces tmux send-keys. */
102
+ followUp(text: string): Promise<void>;
103
+ /** Clean shutdown — replaces SIGTERM. */
104
+ abort(): Promise<void>;
105
+ /** Query current state — replaces tmux capture-pane. */
106
+ getState(): Promise<ConnectionState>;
107
+ /** Release connection resources. */
108
+ close(): void;
109
+ }
110
+
69
111
  // === Runtime Interface ===
70
112
 
71
113
  /**
@@ -122,4 +164,25 @@ export interface AgentRuntime {
122
164
  * the provider's authTokenEnv directly.
123
165
  */
124
166
  buildEnv(model: ResolvedModel): Record<string, string>;
167
+
168
+ /**
169
+ * Whether this runtime requires the beacon verification/resend loop after initial send.
170
+ *
171
+ * Claude Code's TUI sometimes swallows Enter during late initialization, so the
172
+ * orchestrator resends the beacon if the pane still appears idle (overstory-3271).
173
+ * Pi's TUI does not exhibit this behavior AND its idle/processing states are
174
+ * indistinguishable via detectReady (both show the header and status bar), so
175
+ * the resend loop would spam Pi with duplicate startup messages.
176
+ *
177
+ * Runtimes that omit this method (or return true) get the resend loop.
178
+ * Pi returns false to skip it.
179
+ */
180
+ requiresBeaconVerification?(): boolean;
181
+
182
+ /**
183
+ * Establish direct RPC connection to running agent process.
184
+ * Runtimes without RPC (Claude, Codex) omit this method.
185
+ * Orchestrator checks `if (runtime.connect)` before calling, falls back to tmux when absent.
186
+ */
187
+ connect?(process: RpcProcessHandle): RuntimeConnection;
125
188
  }
@@ -12,7 +12,7 @@
12
12
 
13
13
  import { Database } from "bun:sqlite";
14
14
  import { afterEach, beforeEach, describe, expect, test } from "bun:test";
15
- import { mkdtemp, rm } from "node:fs/promises";
15
+ import { mkdtemp } from "node:fs/promises";
16
16
  import { tmpdir } from "node:os";
17
17
  import { join } from "node:path";
18
18
  import { createEventStore } from "./events/store.ts";
@@ -21,6 +21,8 @@ import { createMergeQueue } from "./merge/queue.ts";
21
21
  import { createMetricsStore } from "./metrics/store.ts";
22
22
  import { createSessionStore } from "./sessions/store.ts";
23
23
 
24
+ import { cleanupTempDir } from "./test-helpers.ts";
25
+
24
26
  /** Extract sorted column names from a table via PRAGMA table_info(). */
25
27
  function getTableColumns(db: Database, tableName: string): string[] {
26
28
  const rows = db.prepare(`PRAGMA table_info(${tableName})`).all() as Array<{ name: string }>;
@@ -35,7 +37,7 @@ describe("SQL schema consistency", () => {
35
37
  });
36
38
 
37
39
  afterEach(async () => {
38
- await rm(tmpDir, { recursive: true, force: true });
40
+ await cleanupTempDir(tmpDir);
39
41
  });
40
42
 
41
43
  describe("SessionStore", () => {
@@ -65,6 +67,7 @@ describe("SQL schema consistency", () => {
65
67
  "state",
66
68
  "task_id",
67
69
  "tmux_session",
70
+ "transcript_path",
68
71
  "worktree_path",
69
72
  ].sort();
70
73
 
@@ -6,9 +6,10 @@
6
6
  */
7
7
 
8
8
  import { afterEach, beforeEach, describe, expect, test } from "bun:test";
9
- import { mkdtemp, rm, writeFile } from "node:fs/promises";
9
+ import { mkdtemp, writeFile } from "node:fs/promises";
10
10
  import { tmpdir } from "node:os";
11
11
  import { join } from "node:path";
12
+ import { cleanupTempDir } from "../test-helpers.ts";
12
13
  import { openSessionStore } from "./compat.ts";
13
14
 
14
15
  let tempDir: string;
@@ -22,7 +23,7 @@ beforeEach(async () => {
22
23
  });
23
24
 
24
25
  afterEach(async () => {
25
- await rm(tempDir, { recursive: true, force: true });
26
+ await cleanupTempDir(tempDir);
26
27
  });
27
28
 
28
29
  /** Create a sessions.json with the given entries. */
@@ -36,6 +36,7 @@ function normalizeSession(raw: Record<string, unknown>): AgentSession {
36
36
  lastActivity: raw.lastActivity as string,
37
37
  escalationLevel: (raw.escalationLevel as number) ?? 0,
38
38
  stalledSince: (raw.stalledSince as string | null) ?? null,
39
+ transcriptPath: (raw.transcriptPath as string | null) ?? null,
39
40
  };
40
41
  }
41
42
 
@@ -6,9 +6,10 @@
6
6
  */
7
7
 
8
8
  import { afterEach, beforeEach, describe, expect, test } from "bun:test";
9
- import { mkdtemp, rm } from "node:fs/promises";
9
+ import { mkdtemp } from "node:fs/promises";
10
10
  import { tmpdir } from "node:os";
11
11
  import { join } from "node:path";
12
+ import { cleanupTempDir } from "../test-helpers.ts";
12
13
  import type { AgentSession, AgentState, InsertRun, Run, RunStore } from "../types.ts";
13
14
  import { createRunStore, createSessionStore, type SessionStore } from "./store.ts";
14
15
 
@@ -24,7 +25,7 @@ beforeEach(async () => {
24
25
 
25
26
  afterEach(async () => {
26
27
  store.close();
27
- await rm(tempDir, { recursive: true, force: true });
28
+ await cleanupTempDir(tempDir);
28
29
  });
29
30
 
30
31
  /** Helper to create an AgentSession with optional overrides. */
@@ -46,6 +47,7 @@ function makeSession(overrides: Partial<AgentSession> = {}): AgentSession {
46
47
  lastActivity: "2026-01-15T10:00:00.000Z",
47
48
  escalationLevel: 0,
48
49
  stalledSince: null,
50
+ transcriptPath: null,
49
51
  ...overrides,
50
52
  };
51
53
  }
@@ -137,6 +139,36 @@ describe("upsert", () => {
137
139
  const badSession = { ...session, state: "invalid" as AgentState };
138
140
  expect(() => store.upsert(badSession)).toThrow();
139
141
  });
142
+
143
+ test("handles null transcriptPath", () => {
144
+ const session = makeSession({ transcriptPath: null });
145
+ store.upsert(session);
146
+ const result = store.getByName("test-agent");
147
+ expect(result?.transcriptPath).toBeNull();
148
+ });
149
+
150
+ test("transcriptPath roundtrips correctly", () => {
151
+ const session = makeSession({ transcriptPath: "/home/user/.pi/sessions/abc.jsonl" });
152
+ store.upsert(session);
153
+ const result = store.getByName("test-agent");
154
+ expect(result?.transcriptPath).toBe("/home/user/.pi/sessions/abc.jsonl");
155
+ });
156
+ });
157
+
158
+ // === updateTranscriptPath ===
159
+
160
+ describe("updateTranscriptPath", () => {
161
+ test("sets transcript path for an existing session", () => {
162
+ store.upsert(makeSession({ transcriptPath: null }));
163
+ store.updateTranscriptPath("test-agent", "/tmp/transcript.jsonl");
164
+ const result = store.getByName("test-agent");
165
+ expect(result?.transcriptPath).toBe("/tmp/transcript.jsonl");
166
+ });
167
+
168
+ test("is a no-op for nonexistent agent", () => {
169
+ // Should not throw
170
+ store.updateTranscriptPath("nonexistent", "/tmp/transcript.jsonl");
171
+ });
140
172
  });
141
173
 
142
174
  // === getByName ===
@@ -28,6 +28,8 @@ export interface SessionStore {
28
28
  updateLastActivity(agentName: string): void;
29
29
  /** Update escalation level and stalled timestamp. */
30
30
  updateEscalation(agentName: string, level: number, stalledSince: string | null): void;
31
+ /** Update the transcript path for a session. */
32
+ updateTranscriptPath(agentName: string, path: string): void;
31
33
  /** Remove a session by agent name. */
32
34
  remove(agentName: string): void;
33
35
  /** Purge sessions matching criteria. Returns count of deleted rows. */
@@ -54,6 +56,7 @@ interface SessionRow {
54
56
  last_activity: string;
55
57
  escalation_level: number;
56
58
  stalled_since: string | null;
59
+ transcript_path: string | null;
57
60
  }
58
61
 
59
62
  /** Row shape for runs table as stored in SQLite (snake_case columns). */
@@ -84,7 +87,8 @@ CREATE TABLE IF NOT EXISTS sessions (
84
87
  started_at TEXT NOT NULL,
85
88
  last_activity TEXT NOT NULL,
86
89
  escalation_level INTEGER NOT NULL DEFAULT 0,
87
- stalled_since TEXT
90
+ stalled_since TEXT,
91
+ transcript_path TEXT
88
92
  )`;
89
93
 
90
94
  const CREATE_INDEXES = `
@@ -124,6 +128,7 @@ function rowToSession(row: SessionRow): AgentSession {
124
128
  lastActivity: row.last_activity,
125
129
  escalationLevel: row.escalation_level,
126
130
  stalledSince: row.stalled_since,
131
+ transcriptPath: row.transcript_path,
127
132
  };
128
133
  }
129
134
 
@@ -139,6 +144,18 @@ function rowToRun(row: RunRow): Run {
139
144
  };
140
145
  }
141
146
 
147
+ /**
148
+ * Migrate an existing sessions table to add the transcript_path column.
149
+ * Safe to call multiple times — only adds the column if it does not exist.
150
+ */
151
+ function migrateAddTranscriptPath(db: Database): void {
152
+ const rows = db.prepare("PRAGMA table_info(sessions)").all() as Array<{ name: string }>;
153
+ const existingColumns = new Set(rows.map((r) => r.name));
154
+ if (!existingColumns.has("transcript_path")) {
155
+ db.exec("ALTER TABLE sessions ADD COLUMN transcript_path TEXT");
156
+ }
157
+ }
158
+
142
159
  /**
143
160
  * Migrate an existing sessions table from bead_id to task_id column.
144
161
  * Safe to call multiple times — only renames if bead_id exists and task_id does not.
@@ -173,6 +190,8 @@ export function createSessionStore(dbPath: string): SessionStore {
173
190
 
174
191
  // Migrate: rename bead_id → task_id on existing tables
175
192
  migrateBeadIdToTaskId(db);
193
+ // Migrate: add transcript_path column to existing tables
194
+ migrateAddTranscriptPath(db);
176
195
 
177
196
  // Prepare statements for frequent operations
178
197
  const upsertStmt = db.prepare<
@@ -194,16 +213,17 @@ export function createSessionStore(dbPath: string): SessionStore {
194
213
  $last_activity: string;
195
214
  $escalation_level: number;
196
215
  $stalled_since: string | null;
216
+ $transcript_path: string | null;
197
217
  }
198
218
  >(`
199
219
  INSERT INTO sessions
200
220
  (id, agent_name, capability, worktree_path, branch_name, task_id,
201
221
  tmux_session, state, pid, parent_agent, depth, run_id,
202
- started_at, last_activity, escalation_level, stalled_since)
222
+ started_at, last_activity, escalation_level, stalled_since, transcript_path)
203
223
  VALUES
204
224
  ($id, $agent_name, $capability, $worktree_path, $branch_name, $task_id,
205
225
  $tmux_session, $state, $pid, $parent_agent, $depth, $run_id,
206
- $started_at, $last_activity, $escalation_level, $stalled_since)
226
+ $started_at, $last_activity, $escalation_level, $stalled_since, $transcript_path)
207
227
  ON CONFLICT(agent_name) DO UPDATE SET
208
228
  id = excluded.id,
209
229
  capability = excluded.capability,
@@ -219,7 +239,8 @@ export function createSessionStore(dbPath: string): SessionStore {
219
239
  started_at = excluded.started_at,
220
240
  last_activity = excluded.last_activity,
221
241
  escalation_level = excluded.escalation_level,
222
- stalled_since = excluded.stalled_since
242
+ stalled_since = excluded.stalled_since,
243
+ transcript_path = excluded.transcript_path
223
244
  `);
224
245
 
225
246
  const getByNameStmt = db.prepare<SessionRow, { $agent_name: string }>(`
@@ -268,6 +289,13 @@ export function createSessionStore(dbPath: string): SessionStore {
268
289
  DELETE FROM sessions WHERE agent_name = $agent_name
269
290
  `);
270
291
 
292
+ const updateTranscriptPathStmt = db.prepare<
293
+ void,
294
+ { $agent_name: string; $transcript_path: string }
295
+ >(`
296
+ UPDATE sessions SET transcript_path = $transcript_path WHERE agent_name = $agent_name
297
+ `);
298
+
271
299
  return {
272
300
  upsert(session: AgentSession): void {
273
301
  upsertStmt.run({
@@ -287,6 +315,7 @@ export function createSessionStore(dbPath: string): SessionStore {
287
315
  $last_activity: session.lastActivity,
288
316
  $escalation_level: session.escalationLevel,
289
317
  $stalled_since: session.stalledSince,
318
+ $transcript_path: session.transcriptPath,
290
319
  });
291
320
  },
292
321
 
@@ -334,6 +363,10 @@ export function createSessionStore(dbPath: string): SessionStore {
334
363
  });
335
364
  },
336
365
 
366
+ updateTranscriptPath(agentName: string, path: string): void {
367
+ updateTranscriptPathStmt.run({ $agent_name: agentName, $transcript_path: path });
368
+ },
369
+
337
370
  remove(agentName: string): void {
338
371
  removeStmt.run({ $agent_name: agentName });
339
372
  },
@@ -95,9 +95,28 @@ export async function getDefaultBranch(repoDir: string): Promise<string> {
95
95
 
96
96
  /**
97
97
  * Remove a temp directory. Safe to call even if the directory doesn't exist.
98
+ *
99
+ * On Windows, SQLite WAL/SHM file handles may linger briefly after db.close(),
100
+ * causing EBUSY errors on immediate rm(). Retries with exponential backoff
101
+ * (up to ~1.5s total) to handle this OS-level timing issue.
98
102
  */
99
103
  export async function cleanupTempDir(dir: string): Promise<void> {
100
- await rm(dir, { recursive: true, force: true });
104
+ const maxRetries = process.platform === "win32" ? 5 : 0;
105
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
106
+ try {
107
+ await rm(dir, { recursive: true, force: true });
108
+ return;
109
+ } catch (err: unknown) {
110
+ const code = (err as NodeJS.ErrnoException).code;
111
+ if (code === "EBUSY" && attempt < maxRetries) {
112
+ // Exponential backoff: 50, 100, 200, 400, 800ms
113
+ await Bun.sleep(50 * 2 ** attempt);
114
+ continue;
115
+ }
116
+ // Non-EBUSY or final attempt: swallow (temp dirs are cleaned by OS anyway)
117
+ if (code !== "ENOENT") return;
118
+ }
119
+ }
101
120
  }
102
121
 
103
122
  /**