@os-eco/overstory-cli 0.6.11 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +7 -9
  2. package/agents/lead.md +20 -19
  3. package/package.json +5 -3
  4. package/src/agents/overlay.test.ts +23 -0
  5. package/src/agents/overlay.ts +5 -4
  6. package/src/commands/coordinator.ts +21 -9
  7. package/src/commands/costs.test.ts +1 -1
  8. package/src/commands/costs.ts +13 -20
  9. package/src/commands/dashboard.ts +38 -138
  10. package/src/commands/doctor.test.ts +1 -1
  11. package/src/commands/doctor.ts +2 -2
  12. package/src/commands/ecosystem.ts +2 -1
  13. package/src/commands/errors.test.ts +4 -5
  14. package/src/commands/errors.ts +4 -62
  15. package/src/commands/feed.test.ts +2 -2
  16. package/src/commands/feed.ts +12 -106
  17. package/src/commands/inspect.ts +10 -44
  18. package/src/commands/logs.ts +7 -63
  19. package/src/commands/metrics.test.ts +2 -2
  20. package/src/commands/metrics.ts +3 -17
  21. package/src/commands/monitor.ts +17 -7
  22. package/src/commands/replay.test.ts +2 -2
  23. package/src/commands/replay.ts +12 -135
  24. package/src/commands/run.ts +7 -23
  25. package/src/commands/sling.test.ts +53 -0
  26. package/src/commands/sling.ts +25 -10
  27. package/src/commands/status.ts +4 -17
  28. package/src/commands/supervisor.ts +18 -8
  29. package/src/commands/trace.test.ts +5 -6
  30. package/src/commands/trace.ts +11 -109
  31. package/src/config.ts +10 -0
  32. package/src/index.ts +2 -1
  33. package/src/logging/format.ts +214 -0
  34. package/src/logging/theme.ts +132 -0
  35. package/src/metrics/store.test.ts +46 -0
  36. package/src/metrics/store.ts +11 -0
  37. package/src/mulch/client.test.ts +20 -0
  38. package/src/mulch/client.ts +312 -45
  39. package/src/runtimes/claude.test.ts +616 -0
  40. package/src/runtimes/claude.ts +218 -0
  41. package/src/runtimes/registry.test.ts +53 -0
  42. package/src/runtimes/registry.ts +33 -0
  43. package/src/runtimes/types.ts +125 -0
  44. package/src/types.ts +4 -0
  45. package/src/worktree/tmux.test.ts +28 -13
  46. package/src/worktree/tmux.ts +14 -28
@@ -0,0 +1,218 @@
1
+ // Claude Code runtime adapter for overstory's AgentRuntime interface.
2
+ // Pure extraction — no new behavior. All implementation delegates to existing code.
3
+ // Phase 0: file exists and compiles. Callers are not rewired until Phase 2.
4
+
5
+ import { mkdir } from "node:fs/promises";
6
+ import { join } from "node:path";
7
+ import { deployHooks } from "../agents/hooks-deployer.ts";
8
+ import { estimateCost, parseTranscriptUsage } from "../metrics/transcript.ts";
9
+ import type { ResolvedModel } from "../types.ts";
10
+ import type {
11
+ AgentRuntime,
12
+ HooksDef,
13
+ OverlayContent,
14
+ ReadyState,
15
+ SpawnOpts,
16
+ TranscriptSummary,
17
+ } from "./types.ts";
18
+
19
+ /**
20
+ * Claude Code runtime adapter.
21
+ *
22
+ * Implements AgentRuntime for the `claude` CLI (Anthropic's Claude Code).
23
+ * All methods delegate to existing overstory subsystems — this adapter
24
+ * only provides the runtime-agnostic interface layer.
25
+ *
26
+ * Phase 0: file exists, compiles, and exports the class.
27
+ * Phase 2 will rewire callers (sling.ts, coordinator.ts, etc.) to use this adapter.
28
+ */
29
+ export class ClaudeRuntime implements AgentRuntime {
30
+ /** Unique identifier for this runtime. */
31
+ readonly id = "claude";
32
+
33
+ /** Relative path to the instruction file within a worktree. */
34
+ readonly instructionPath = ".claude/CLAUDE.md";
35
+
36
+ /**
37
+ * Build the shell command string to spawn an interactive Claude Code agent.
38
+ *
39
+ * Maps SpawnOpts to the `claude` CLI flags:
40
+ * - `model` → `--model <model>`
41
+ * - `permissionMode` → `--permission-mode <mode>`
42
+ * - "bypass" maps to "bypassPermissions"
43
+ * - "ask" maps to "default"
44
+ * - `appendSystemPrompt` → `--append-system-prompt '<escaped>'`
45
+ *
46
+ * The returned string is passed directly to tmux as the initial command.
47
+ * The `cwd` and `env` fields of SpawnOpts are handled by the tmux session
48
+ * creator, not embedded in the command string.
49
+ *
50
+ * @param opts - Spawn options (model, permissionMode, appendSystemPrompt)
51
+ * @returns Shell command string suitable for tmux new-session -c
52
+ */
53
+ buildSpawnCommand(opts: SpawnOpts): string {
54
+ const permMode = opts.permissionMode === "bypass" ? "bypassPermissions" : "default";
55
+ let cmd = `claude --model ${opts.model} --permission-mode ${permMode}`;
56
+
57
+ if (opts.appendSystemPrompt) {
58
+ // Single-quote the content for safe shell expansion.
59
+ // POSIX single-quoted strings cannot contain single quotes, so escape
60
+ // them using the standard technique: end quote, escaped quote, start quote.
61
+ const escaped = opts.appendSystemPrompt.replace(/'/g, "'\\''");
62
+ cmd += ` --append-system-prompt '${escaped}'`;
63
+ }
64
+
65
+ return cmd;
66
+ }
67
+
68
+ /**
69
+ * Build the argv array for a headless one-shot Claude invocation.
70
+ *
71
+ * Returns an argv array suitable for `Bun.spawn()`. The `--print` flag
72
+ * causes Claude Code to run the prompt and exit, writing output to stdout.
73
+ *
74
+ * Used by merge/resolver.ts (AI-assisted conflict resolution) and
75
+ * watchdog/triage.ts (AI-assisted failure classification).
76
+ *
77
+ * @param prompt - The prompt to pass via `-p`
78
+ * @param model - Optional model override (omit to use Claude Code's default)
79
+ * @returns Argv array for Bun.spawn
80
+ */
81
+ buildPrintCommand(prompt: string, model?: string): string[] {
82
+ const cmd = ["claude", "--print", "-p", prompt];
83
+ if (model !== undefined) {
84
+ cmd.push("--model", model);
85
+ }
86
+ return cmd;
87
+ }
88
+
89
+ /**
90
+ * Deploy per-agent instructions and guards to a worktree.
91
+ *
92
+ * For Claude Code this means writes to the worktree's `.claude/` directory:
93
+ * 1. `CLAUDE.md` — the agent's task-specific overlay (generated by ov sling).
94
+ * Skipped when overlay is undefined (hooks-only deployment for coordinator/supervisor/monitor).
95
+ * 2. `settings.local.json` — Claude Code hooks for security guards
96
+ *
97
+ * The `overlay.content` is written verbatim when provided. The hooks are generated by
98
+ * `deployHooks()` from `src/agents/hooks-deployer.ts`.
99
+ *
100
+ * @param worktreePath - Absolute path to the agent's git worktree
101
+ * @param overlay - Overlay content to write as CLAUDE.md, or undefined for hooks-only deployment
102
+ * @param hooks - Hook definition used by deployHooks
103
+ * @throws {AgentError} If the hooks template is missing or writes fail
104
+ */
105
+ async deployConfig(
106
+ worktreePath: string,
107
+ overlay: OverlayContent | undefined,
108
+ hooks: HooksDef,
109
+ ): Promise<void> {
110
+ if (overlay) {
111
+ const claudeDir = join(worktreePath, ".claude");
112
+ await mkdir(claudeDir, { recursive: true });
113
+
114
+ const claudeMdPath = join(claudeDir, "CLAUDE.md");
115
+ await Bun.write(claudeMdPath, overlay.content);
116
+ }
117
+
118
+ await deployHooks(hooks.worktreePath, hooks.agentName, hooks.capability, hooks.qualityGates);
119
+ }
120
+
121
+ /**
122
+ * Detect Claude Code TUI readiness from a tmux pane content snapshot.
123
+ *
124
+ * Uses the same heuristics as `waitForTuiReady()` in `src/worktree/tmux.ts`,
125
+ * but operates on a pre-captured pane string rather than polling tmux directly.
126
+ * The caller is responsible for capturing pane content and acting on the result
127
+ * (e.g. sending "Enter" to dismiss a trust dialog).
128
+ *
129
+ * Detection phases:
130
+ * - Trust dialog: "trust this folder" detected → `{ phase: "dialog", action: "Enter" }`
131
+ * - Ready: prompt indicator (❯ or 'Try "') AND status bar ("bypass permissions"
132
+ * or "shift+tab") both present → `{ phase: "ready" }`
133
+ * - Otherwise → `{ phase: "loading" }`
134
+ *
135
+ * @param paneContent - Captured tmux pane content to analyze
136
+ * @returns Current readiness phase
137
+ */
138
+ detectReady(paneContent: string): ReadyState {
139
+ // Trust dialog takes precedence — it replaces the normal TUI temporarily.
140
+ // The caller should send the action key to dismiss it.
141
+ if (paneContent.includes("trust this folder")) {
142
+ return { phase: "dialog", action: "Enter" };
143
+ }
144
+
145
+ // Phase 1: prompt indicator confirms Claude Code has started.
146
+ // ❯ is the claude prompt character; 'Try "' appears in the welcome banner.
147
+ const hasPrompt = paneContent.includes("\u276f") || paneContent.includes('Try "');
148
+
149
+ // Phase 2: status bar text confirms full TUI render.
150
+ const hasStatusBar =
151
+ paneContent.includes("bypass permissions") || paneContent.includes("shift+tab");
152
+
153
+ if (hasPrompt && hasStatusBar) {
154
+ return { phase: "ready" };
155
+ }
156
+
157
+ return { phase: "loading" };
158
+ }
159
+
160
+ /**
161
+ * Parse a Claude Code transcript JSONL file into normalized token usage.
162
+ *
163
+ * Reads the JSONL file at `path` and aggregates token usage across all
164
+ * assistant turns. Returns null if the file does not exist or cannot be read.
165
+ *
166
+ * Delegates to `parseTranscriptUsage()` and `estimateCost()` from
167
+ * `src/metrics/transcript.ts`. The `estimatedCostUsd` is computed but
168
+ * not exposed here because `TranscriptSummary` only carries the three
169
+ * core fields (inputTokens, outputTokens, model). Cost data is available
170
+ * via `src/metrics/transcript.ts` directly for callers that need it.
171
+ *
172
+ * @param path - Absolute path to the transcript JSONL file
173
+ * @returns Aggregated token usage, or null if unavailable
174
+ */
175
+ async parseTranscript(path: string): Promise<TranscriptSummary | null> {
176
+ const file = Bun.file(path);
177
+ if (!(await file.exists())) {
178
+ return null;
179
+ }
180
+
181
+ try {
182
+ const usage = await parseTranscriptUsage(path);
183
+ // estimateCost is called to validate the model is recognized,
184
+ // though the result is not surfaced in TranscriptSummary.
185
+ if (usage.modelUsed !== null) {
186
+ estimateCost(usage);
187
+ }
188
+ return {
189
+ inputTokens: usage.inputTokens,
190
+ outputTokens: usage.outputTokens,
191
+ model: usage.modelUsed ?? "",
192
+ };
193
+ } catch {
194
+ return null;
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Build runtime-specific environment variables for model/provider routing.
200
+ *
201
+ * Returns the provider environment variables from the resolved model.
202
+ * For Anthropic native: may include ANTHROPIC_API_KEY, ANTHROPIC_BASE_URL.
203
+ * For gateway providers: may include gateway-specific auth and routing vars.
204
+ *
205
+ * Returns an empty object if the resolved model has no provider env vars.
206
+ * Callers (sling.ts, coordinator.ts) merge this with OVERSTORY_AGENT_NAME
207
+ * and OVERSTORY_WORKTREE_PATH before passing to createSession().
208
+ *
209
+ * @param model - Resolved model with optional provider env vars
210
+ * @returns Environment variable map (may be empty)
211
+ */
212
+ buildEnv(model: ResolvedModel): Record<string, string> {
213
+ return model.env ?? {};
214
+ }
215
+ }
216
+
217
+ /** Singleton instance for use in callers that do not need DI. */
218
+ export const claudeRuntime = new ClaudeRuntime();
@@ -0,0 +1,53 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import type { OverstoryConfig } from "../types.ts";
3
+ import { ClaudeRuntime } from "./claude.ts";
4
+ import { getRuntime } from "./registry.ts";
5
+
6
+ describe("getRuntime", () => {
7
+ it("returns a ClaudeRuntime by default (no args)", () => {
8
+ const runtime = getRuntime();
9
+ expect(runtime).toBeInstanceOf(ClaudeRuntime);
10
+ expect(runtime.id).toBe("claude");
11
+ });
12
+
13
+ it('returns a ClaudeRuntime when name is "claude"', () => {
14
+ const runtime = getRuntime("claude");
15
+ expect(runtime).toBeInstanceOf(ClaudeRuntime);
16
+ expect(runtime.id).toBe("claude");
17
+ });
18
+
19
+ it("throws with a helpful message for an unknown runtime", () => {
20
+ expect(() => getRuntime("unknown-runtime")).toThrow(
21
+ 'Unknown runtime: "unknown-runtime". Available: claude',
22
+ );
23
+ });
24
+
25
+ it("uses config.runtime.default when name is omitted", () => {
26
+ const config = { runtime: { default: "claude" } } as OverstoryConfig;
27
+ const runtime = getRuntime(undefined, config);
28
+ expect(runtime).toBeInstanceOf(ClaudeRuntime);
29
+ expect(runtime.id).toBe("claude");
30
+ });
31
+
32
+ it("explicit name overrides config.runtime.default", () => {
33
+ const config = { runtime: { default: "claude" } } as OverstoryConfig;
34
+ // Both are "claude" here since that's the only registered runtime,
35
+ // but the name arg takes precedence over config.
36
+ const runtime = getRuntime("claude", config);
37
+ expect(runtime).toBeInstanceOf(ClaudeRuntime);
38
+ });
39
+
40
+ it("throws for unknown runtime even when config default is set", () => {
41
+ const config = { runtime: { default: "codex" } } as OverstoryConfig;
42
+ // No name arg — falls back to config default "codex" which is unknown.
43
+ expect(() => getRuntime(undefined, config)).toThrow(
44
+ 'Unknown runtime: "codex". Available: claude',
45
+ );
46
+ });
47
+
48
+ it("returns a new instance on each call (factory pattern)", () => {
49
+ const a = getRuntime();
50
+ const b = getRuntime();
51
+ expect(a).not.toBe(b);
52
+ });
53
+ });
@@ -0,0 +1,33 @@
1
+ // Runtime registry — maps runtime names to adapter factory functions.
2
+ // This is the ONLY module that imports concrete adapter classes.
3
+
4
+ import type { OverstoryConfig } from "../types.ts";
5
+ import { ClaudeRuntime } from "./claude.ts";
6
+ import type { AgentRuntime } from "./types.ts";
7
+
8
+ /** Registry of available runtime adapters (name → factory). */
9
+ const runtimes = new Map<string, () => AgentRuntime>([["claude", () => new ClaudeRuntime()]]);
10
+
11
+ /**
12
+ * Resolve a runtime adapter by name.
13
+ *
14
+ * Lookup order:
15
+ * 1. Explicit `name` argument (if provided)
16
+ * 2. `config.runtime.default` (if config is provided)
17
+ * 3. `"claude"` (hardcoded fallback)
18
+ *
19
+ * @param name - Runtime name to resolve (e.g. "claude"). Omit to use config default.
20
+ * @param config - Overstory config for reading the default runtime.
21
+ * @throws {Error} If the resolved runtime name is not registered.
22
+ * @returns A fresh AgentRuntime instance.
23
+ */
24
+ export function getRuntime(name?: string, config?: OverstoryConfig): AgentRuntime {
25
+ const runtimeName = name ?? config?.runtime?.default ?? "claude";
26
+ const factory = runtimes.get(runtimeName);
27
+ if (!factory) {
28
+ throw new Error(
29
+ `Unknown runtime: "${runtimeName}". Available: ${[...runtimes.keys()].join(", ")}`,
30
+ );
31
+ }
32
+ return factory();
33
+ }
@@ -0,0 +1,125 @@
1
+ // Runtime abstraction types for multi-provider agent support.
2
+ // See docs/runtime-abstraction.md for design rationale and coupling inventory.
3
+
4
+ import type { QualityGate, ResolvedModel } from "../types.ts";
5
+
6
+ // === Spawn ===
7
+
8
+ /** Options for spawning an interactive agent process. */
9
+ export interface SpawnOpts {
10
+ /** Model ref (alias or provider-qualified, e.g. "sonnet" or "openrouter/gpt-5"). */
11
+ model: string;
12
+ /** Permission mode: bypass for trusted builders, ask for interactive agents. */
13
+ permissionMode: "bypass" | "ask";
14
+ /** Optional system prompt prefix injected before the agent's base instructions. */
15
+ systemPrompt?: string;
16
+ /** Optional system prompt suffix appended after the base instructions. */
17
+ appendSystemPrompt?: string;
18
+ /** Working directory for the spawned process. */
19
+ cwd: string;
20
+ /** Additional environment variables to pass to the spawned process. */
21
+ env: Record<string, string>;
22
+ }
23
+
24
+ // === Readiness ===
25
+
26
+ /**
27
+ * Discrete phases of agent TUI readiness, detected from tmux pane content.
28
+ * Headless runtimes (codex exec, pi --mode rpc) always return { phase: "ready" }.
29
+ */
30
+ export type ReadyState =
31
+ | { phase: "loading" }
32
+ | { phase: "dialog"; action: string }
33
+ | { phase: "ready" };
34
+
35
+ // === Config Deployment ===
36
+
37
+ /** Runtime-agnostic overlay content to write into a worktree. */
38
+ export interface OverlayContent {
39
+ /** Full markdown text to write as the agent's instruction file. */
40
+ content: string;
41
+ }
42
+
43
+ /**
44
+ * Runtime-agnostic hook/guard configuration for deployment to a worktree.
45
+ * Each runtime adapter translates this into its native guard mechanism
46
+ * (e.g., settings.local.json hooks for Claude Code, guard extensions for Pi).
47
+ */
48
+ export interface HooksDef {
49
+ /** Agent name injected into hook commands. */
50
+ agentName: string;
51
+ /** Agent capability (builder, scout, reviewer, lead, etc.). */
52
+ capability: string;
53
+ /** Absolute path to the agent's worktree for path-boundary enforcement. */
54
+ worktreePath: string;
55
+ /** Quality gates agents must pass before reporting completion. */
56
+ qualityGates?: QualityGate[];
57
+ }
58
+
59
+ // === Transcripts ===
60
+
61
+ /** Normalized token usage extracted from any runtime's session transcript. */
62
+ export interface TranscriptSummary {
63
+ inputTokens: number;
64
+ outputTokens: number;
65
+ /** Model identifier as reported by the runtime (e.g. "claude-sonnet-4-6"). */
66
+ model: string;
67
+ }
68
+
69
+ // === Runtime Interface ===
70
+
71
+ /**
72
+ * Contract that all agent runtime adapters must implement.
73
+ *
74
+ * Each runtime (Claude Code, Codex, Pi, OpenCode, ...) provides a ~200-400 line
75
+ * adapter file implementing this interface. The orchestration engine calls only
76
+ * these methods — never the runtime's CLI directly.
77
+ */
78
+ export interface AgentRuntime {
79
+ /** Unique runtime identifier (e.g. "claude", "codex", "pi"). */
80
+ id: string;
81
+
82
+ /** Relative path to the instruction file within a worktree (e.g. ".claude/CLAUDE.md"). */
83
+ readonly instructionPath: string;
84
+
85
+ /** Build the shell command string to spawn an interactive agent in a tmux pane. */
86
+ buildSpawnCommand(opts: SpawnOpts): string;
87
+
88
+ /**
89
+ * Build the argv array for a headless one-shot AI call.
90
+ * Used by merge/resolver.ts and watchdog/triage.ts for AI-assisted operations.
91
+ */
92
+ buildPrintCommand(prompt: string, model?: string): string[];
93
+
94
+ /**
95
+ * Deploy per-agent instructions and guards to a worktree.
96
+ * Claude Code writes .claude/CLAUDE.md + settings.local.json hooks.
97
+ * Codex writes AGENTS.md (no hook deployment needed).
98
+ * Pi writes .claude/CLAUDE.md + a guard extension in .pi/extensions/.
99
+ * When overlay is undefined, only hooks are deployed (no instruction file written).
100
+ */
101
+ deployConfig(
102
+ worktreePath: string,
103
+ overlay: OverlayContent | undefined,
104
+ hooks: HooksDef,
105
+ ): Promise<void>;
106
+
107
+ /**
108
+ * Detect agent readiness from tmux pane content.
109
+ * Headless runtimes that exit when done should return { phase: "ready" } unconditionally.
110
+ */
111
+ detectReady(paneContent: string): ReadyState;
112
+
113
+ /**
114
+ * Parse a session transcript file into normalized token usage.
115
+ * Returns null if the transcript does not exist or cannot be parsed.
116
+ */
117
+ parseTranscript(path: string): Promise<TranscriptSummary | null>;
118
+
119
+ /**
120
+ * Build runtime-specific environment variables for model/provider routing.
121
+ * Claude Code uses ANTHROPIC_API_KEY; Codex uses OPENAI_API_KEY; Pi passes
122
+ * the provider's authTokenEnv directly.
123
+ */
124
+ buildEnv(model: ResolvedModel): Record<string, string>;
125
+ }
package/src/types.ts CHANGED
@@ -86,6 +86,10 @@ export interface OverstoryConfig {
86
86
  verbose: boolean;
87
87
  redactSecrets: boolean;
88
88
  };
89
+ runtime?: {
90
+ /** Default runtime adapter name (default: "claude"). */
91
+ default: string;
92
+ };
89
93
  }
90
94
 
91
95
  // === Agent Manifest ===
@@ -1,5 +1,6 @@
1
1
  import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test";
2
2
  import { AgentError } from "../errors.ts";
3
+ import type { ReadyState } from "../runtimes/types.ts";
3
4
  import {
4
5
  capturePaneContent,
5
6
  createSession,
@@ -940,6 +941,20 @@ describe("capturePaneContent", () => {
940
941
  });
941
942
  });
942
943
 
944
+ /** Claude-like detectReady for tests — matches the existing hardcoded behavior. */
945
+ function claudeDetectReady(paneContent: string): ReadyState {
946
+ if (paneContent.includes("trust this folder")) {
947
+ return { phase: "dialog", action: "Enter" };
948
+ }
949
+ const hasPrompt = paneContent.includes("\u276f") || paneContent.includes('Try "');
950
+ const hasStatusBar =
951
+ paneContent.includes("bypass permissions") || paneContent.includes("shift+tab");
952
+ if (hasPrompt && hasStatusBar) {
953
+ return { phase: "ready" };
954
+ }
955
+ return { phase: "loading" };
956
+ }
957
+
943
958
  describe("waitForTuiReady", () => {
944
959
  let spawnSpy: ReturnType<typeof spyOn>;
945
960
  let sleepSpy: ReturnType<typeof spyOn>;
@@ -961,7 +976,7 @@ describe("waitForTuiReady", () => {
961
976
  mockSpawnResult('Try "help" to get started\nbypass permissions', "", 0),
962
977
  );
963
978
 
964
- const ready = await waitForTuiReady("overstory-agent", 5_000, 500);
979
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 5_000, 500);
965
980
 
966
981
  expect(ready).toBe(true);
967
982
  // Should not have needed to sleep (content found on first poll)
@@ -985,7 +1000,7 @@ describe("waitForTuiReady", () => {
985
1000
  return mockSpawnResult("", "", 0);
986
1001
  });
987
1002
 
988
- const ready = await waitForTuiReady("overstory-agent", 10_000, 500);
1003
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 10_000, 500);
989
1004
 
990
1005
  expect(ready).toBe(true);
991
1006
  // Should have slept 3 times (3 empty capture-pane polls before content appeared)
@@ -996,7 +1011,7 @@ describe("waitForTuiReady", () => {
996
1011
  // Pane always empty
997
1012
  spawnSpy.mockImplementation(() => mockSpawnResult("", "", 0));
998
1013
 
999
- const ready = await waitForTuiReady("overstory-agent", 2_000, 500);
1014
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 2_000, 500);
1000
1015
 
1001
1016
  expect(ready).toBe(false);
1002
1017
  // 2000ms / 500ms = 4 polls, 4 sleeps
@@ -1006,7 +1021,7 @@ describe("waitForTuiReady", () => {
1006
1021
  test("returns false when capture-pane always fails", async () => {
1007
1022
  spawnSpy.mockImplementation(() => mockSpawnResult("", "session not found", 1));
1008
1023
 
1009
- const ready = await waitForTuiReady("dead-session", 1_000, 500);
1024
+ const ready = await waitForTuiReady("dead-session", claudeDetectReady, 1_000, 500);
1010
1025
 
1011
1026
  expect(ready).toBe(false);
1012
1027
  });
@@ -1015,7 +1030,7 @@ describe("waitForTuiReady", () => {
1015
1030
  // Return content immediately with both indicators
1016
1031
  spawnSpy.mockImplementation(() => mockSpawnResult('Try "help"\nshift+tab', "", 0));
1017
1032
 
1018
- const ready = await waitForTuiReady("overstory-agent");
1033
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady);
1019
1034
 
1020
1035
  expect(ready).toBe(true);
1021
1036
  });
@@ -1031,7 +1046,7 @@ describe("waitForTuiReady", () => {
1031
1046
  return mockSpawnResult("", "can't find session", 1);
1032
1047
  });
1033
1048
 
1034
- const ready = await waitForTuiReady("dead-session", 15_000, 500);
1049
+ const ready = await waitForTuiReady("dead-session", claudeDetectReady, 15_000, 500);
1035
1050
 
1036
1051
  expect(ready).toBe(false);
1037
1052
  // Should NOT have polled the full timeout (no sleeps — returned immediately)
@@ -1052,7 +1067,7 @@ describe("waitForTuiReady", () => {
1052
1067
  });
1053
1068
 
1054
1069
  // Use a short timeout so the test doesn't take long
1055
- const ready = await waitForTuiReady("loading-session", 1_000, 500);
1070
+ const ready = await waitForTuiReady("loading-session", claudeDetectReady, 1_000, 500);
1056
1071
 
1057
1072
  expect(ready).toBe(false);
1058
1073
  // Should have polled multiple times (not returned early)
@@ -1071,7 +1086,7 @@ describe("waitForTuiReady", () => {
1071
1086
  return mockSpawnResult("", "", 0);
1072
1087
  });
1073
1088
 
1074
- const ready = await waitForTuiReady("overstory-agent", 1_000, 500);
1089
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 1_000, 500);
1075
1090
 
1076
1091
  expect(ready).toBe(false);
1077
1092
  });
@@ -1087,7 +1102,7 @@ describe("waitForTuiReady", () => {
1087
1102
  return mockSpawnResult("", "", 0);
1088
1103
  });
1089
1104
 
1090
- const ready = await waitForTuiReady("overstory-agent", 1_000, 500);
1105
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 1_000, 500);
1091
1106
 
1092
1107
  expect(ready).toBe(false);
1093
1108
  });
@@ -1109,7 +1124,7 @@ describe("waitForTuiReady", () => {
1109
1124
  return mockSpawnResult("", "", 0);
1110
1125
  });
1111
1126
 
1112
- const ready = await waitForTuiReady("overstory-agent", 10_000, 500);
1127
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 10_000, 500);
1113
1128
 
1114
1129
  expect(ready).toBe(true);
1115
1130
  // Should have slept at least twice (2 polls with only prompt before both appeared)
@@ -1138,7 +1153,7 @@ describe("waitForTuiReady", () => {
1138
1153
  return mockSpawnResult("", "", 0);
1139
1154
  });
1140
1155
 
1141
- const ready = await waitForTuiReady("overstory-agent", 10_000, 500);
1156
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 10_000, 500);
1142
1157
 
1143
1158
  expect(ready).toBe(true);
1144
1159
  // sendKeys should have been called once to confirm the trust dialog
@@ -1169,10 +1184,10 @@ describe("waitForTuiReady", () => {
1169
1184
  return mockSpawnResult("", "", 0);
1170
1185
  });
1171
1186
 
1172
- const ready = await waitForTuiReady("overstory-agent", 10_000, 500);
1187
+ const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 10_000, 500);
1173
1188
 
1174
1189
  expect(ready).toBe(true);
1175
- // sendKeys must be called exactly once — trustHandled prevents duplicate Enter sends
1190
+ // sendKeys must be called exactly once — dialogHandled prevents duplicate Enter sends
1176
1191
  expect(sendKeysCalls).toHaveLength(1);
1177
1192
  });
1178
1193
  });
@@ -9,6 +9,7 @@
9
9
 
10
10
  import { dirname, resolve } from "node:path";
11
11
  import { AgentError } from "../errors.ts";
12
+ import type { ReadyState } from "../runtimes/types.ts";
12
13
 
13
14
  /**
14
15
  * Detect the directory containing the overstory binary.
@@ -435,58 +436,43 @@ export async function capturePaneContent(name: string, lines = 50): Promise<stri
435
436
  /**
436
437
  * Wait for a tmux session's TUI to become ready for input.
437
438
  *
438
- * Uses a two-phase readiness check:
439
- * 1. Phase 1 prompt indicator: detects or 'Try "' confirming Claude Code has started
440
- * 2. Phase 2 status bar: detects 'bypass permissions' or 'shift+tab' confirming full TUI render
441
- * Returns true only when BOTH phases have been observed.
442
- *
443
- * Additionally handles the workspace trust dialog: if 'trust this folder' is detected,
444
- * sends Enter to auto-confirm before continuing to wait for the real TUI. The trust
445
- * dialog check must precede phase checks since it replaces the normal TUI temporarily.
439
+ * Delegates all readiness detection to the provided `detectReady` callback,
440
+ * making this function runtime-agnostic. The callback inspects pane content
441
+ * and returns a ReadyState phase: "loading" (keep waiting), "dialog" (send
442
+ * Enter to dismiss, then continue), or "ready" (return true).
446
443
  *
447
444
  * @param name - Tmux session name to poll
445
+ * @param detectReady - Callback that inspects pane content and returns ReadyState
448
446
  * @param timeoutMs - Maximum time to wait before giving up (default 30s)
449
447
  * @param pollIntervalMs - Time between polls (default 500ms)
450
- * @returns true once both prompt indicator AND status bar text detected, false on timeout
448
+ * @returns true once detectReady returns { phase: "ready" }, false on timeout or dead session
451
449
  */
452
450
  export async function waitForTuiReady(
453
451
  name: string,
452
+ detectReady: (paneContent: string) => ReadyState,
454
453
  timeoutMs = 30_000,
455
454
  pollIntervalMs = 500,
456
455
  ): Promise<boolean> {
457
456
  const maxAttempts = Math.ceil(timeoutMs / pollIntervalMs);
458
- let promptSeen = false;
459
- let statusBarSeen = false;
460
- let trustHandled = false;
457
+ let dialogHandled = false;
461
458
 
462
459
  for (let i = 0; i < maxAttempts; i++) {
463
460
  const content = await capturePaneContent(name);
464
461
  if (content !== null) {
465
- // Trust dialog detection — must come before phase checks since it replaces normal TUI
466
- if (!trustHandled && content.includes("trust this folder")) {
462
+ const state = detectReady(content);
463
+
464
+ if (state.phase === "dialog" && !dialogHandled) {
467
465
  await sendKeys(name, "");
468
- trustHandled = true;
466
+ dialogHandled = true;
469
467
  await Bun.sleep(pollIntervalMs);
470
468
  continue;
471
469
  }
472
470
 
473
- // Phase 1: prompt indicator confirms Claude Code has started
474
- if (content.includes("\u276f") || content.includes('Try "')) {
475
- promptSeen = true;
476
- }
477
-
478
- // Phase 2: status bar text confirms full TUI render
479
- if (content.includes("bypass permissions") || content.includes("shift+tab")) {
480
- statusBarSeen = true;
481
- }
482
-
483
- // Return true only when both phases complete
484
- if (promptSeen && statusBarSeen) {
471
+ if (state.phase === "ready") {
485
472
  return true;
486
473
  }
487
474
  }
488
475
 
489
- // Check if session died — no point waiting if it's gone
490
476
  const alive = await isSessionAlive(name);
491
477
  if (!alive) {
492
478
  return false;