@os-eco/overstory-cli 0.6.11 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -9
- package/agents/lead.md +20 -19
- package/package.json +5 -3
- package/src/agents/overlay.test.ts +23 -0
- package/src/agents/overlay.ts +5 -4
- package/src/commands/coordinator.ts +21 -9
- package/src/commands/costs.test.ts +1 -1
- package/src/commands/costs.ts +13 -20
- package/src/commands/dashboard.ts +38 -138
- package/src/commands/doctor.test.ts +1 -1
- package/src/commands/doctor.ts +2 -2
- package/src/commands/ecosystem.ts +2 -1
- package/src/commands/errors.test.ts +4 -5
- package/src/commands/errors.ts +4 -62
- package/src/commands/feed.test.ts +2 -2
- package/src/commands/feed.ts +12 -106
- package/src/commands/inspect.ts +10 -44
- package/src/commands/logs.ts +7 -63
- package/src/commands/metrics.test.ts +2 -2
- package/src/commands/metrics.ts +3 -17
- package/src/commands/monitor.ts +17 -7
- package/src/commands/replay.test.ts +2 -2
- package/src/commands/replay.ts +12 -135
- package/src/commands/run.ts +7 -23
- package/src/commands/sling.test.ts +53 -0
- package/src/commands/sling.ts +25 -10
- package/src/commands/status.ts +4 -17
- package/src/commands/supervisor.ts +18 -8
- package/src/commands/trace.test.ts +5 -6
- package/src/commands/trace.ts +11 -109
- package/src/config.ts +10 -0
- package/src/index.ts +2 -1
- package/src/logging/format.ts +214 -0
- package/src/logging/theme.ts +132 -0
- package/src/metrics/store.test.ts +46 -0
- package/src/metrics/store.ts +11 -0
- package/src/mulch/client.test.ts +20 -0
- package/src/mulch/client.ts +312 -45
- package/src/runtimes/claude.test.ts +616 -0
- package/src/runtimes/claude.ts +218 -0
- package/src/runtimes/registry.test.ts +53 -0
- package/src/runtimes/registry.ts +33 -0
- package/src/runtimes/types.ts +125 -0
- package/src/types.ts +4 -0
- package/src/worktree/tmux.test.ts +28 -13
- package/src/worktree/tmux.ts +14 -28
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
// Claude Code runtime adapter for overstory's AgentRuntime interface.
|
|
2
|
+
// Pure extraction — no new behavior. All implementation delegates to existing code.
|
|
3
|
+
// Phase 0: file exists and compiles. Callers are not rewired until Phase 2.
|
|
4
|
+
|
|
5
|
+
import { mkdir } from "node:fs/promises";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import { deployHooks } from "../agents/hooks-deployer.ts";
|
|
8
|
+
import { estimateCost, parseTranscriptUsage } from "../metrics/transcript.ts";
|
|
9
|
+
import type { ResolvedModel } from "../types.ts";
|
|
10
|
+
import type {
|
|
11
|
+
AgentRuntime,
|
|
12
|
+
HooksDef,
|
|
13
|
+
OverlayContent,
|
|
14
|
+
ReadyState,
|
|
15
|
+
SpawnOpts,
|
|
16
|
+
TranscriptSummary,
|
|
17
|
+
} from "./types.ts";
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Claude Code runtime adapter.
|
|
21
|
+
*
|
|
22
|
+
* Implements AgentRuntime for the `claude` CLI (Anthropic's Claude Code).
|
|
23
|
+
* All methods delegate to existing overstory subsystems — this adapter
|
|
24
|
+
* only provides the runtime-agnostic interface layer.
|
|
25
|
+
*
|
|
26
|
+
* Phase 0: file exists, compiles, and exports the class.
|
|
27
|
+
* Phase 2 will rewire callers (sling.ts, coordinator.ts, etc.) to use this adapter.
|
|
28
|
+
*/
|
|
29
|
+
export class ClaudeRuntime implements AgentRuntime {
|
|
30
|
+
/** Unique identifier for this runtime. */
|
|
31
|
+
readonly id = "claude";
|
|
32
|
+
|
|
33
|
+
/** Relative path to the instruction file within a worktree. */
|
|
34
|
+
readonly instructionPath = ".claude/CLAUDE.md";
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Build the shell command string to spawn an interactive Claude Code agent.
|
|
38
|
+
*
|
|
39
|
+
* Maps SpawnOpts to the `claude` CLI flags:
|
|
40
|
+
* - `model` → `--model <model>`
|
|
41
|
+
* - `permissionMode` → `--permission-mode <mode>`
|
|
42
|
+
* - "bypass" maps to "bypassPermissions"
|
|
43
|
+
* - "ask" maps to "default"
|
|
44
|
+
* - `appendSystemPrompt` → `--append-system-prompt '<escaped>'`
|
|
45
|
+
*
|
|
46
|
+
* The returned string is passed directly to tmux as the initial command.
|
|
47
|
+
* The `cwd` and `env` fields of SpawnOpts are handled by the tmux session
|
|
48
|
+
* creator, not embedded in the command string.
|
|
49
|
+
*
|
|
50
|
+
* @param opts - Spawn options (model, permissionMode, appendSystemPrompt)
|
|
51
|
+
* @returns Shell command string suitable for tmux new-session -c
|
|
52
|
+
*/
|
|
53
|
+
buildSpawnCommand(opts: SpawnOpts): string {
|
|
54
|
+
const permMode = opts.permissionMode === "bypass" ? "bypassPermissions" : "default";
|
|
55
|
+
let cmd = `claude --model ${opts.model} --permission-mode ${permMode}`;
|
|
56
|
+
|
|
57
|
+
if (opts.appendSystemPrompt) {
|
|
58
|
+
// Single-quote the content for safe shell expansion.
|
|
59
|
+
// POSIX single-quoted strings cannot contain single quotes, so escape
|
|
60
|
+
// them using the standard technique: end quote, escaped quote, start quote.
|
|
61
|
+
const escaped = opts.appendSystemPrompt.replace(/'/g, "'\\''");
|
|
62
|
+
cmd += ` --append-system-prompt '${escaped}'`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return cmd;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Build the argv array for a headless one-shot Claude invocation.
|
|
70
|
+
*
|
|
71
|
+
* Returns an argv array suitable for `Bun.spawn()`. The `--print` flag
|
|
72
|
+
* causes Claude Code to run the prompt and exit, writing output to stdout.
|
|
73
|
+
*
|
|
74
|
+
* Used by merge/resolver.ts (AI-assisted conflict resolution) and
|
|
75
|
+
* watchdog/triage.ts (AI-assisted failure classification).
|
|
76
|
+
*
|
|
77
|
+
* @param prompt - The prompt to pass via `-p`
|
|
78
|
+
* @param model - Optional model override (omit to use Claude Code's default)
|
|
79
|
+
* @returns Argv array for Bun.spawn
|
|
80
|
+
*/
|
|
81
|
+
buildPrintCommand(prompt: string, model?: string): string[] {
|
|
82
|
+
const cmd = ["claude", "--print", "-p", prompt];
|
|
83
|
+
if (model !== undefined) {
|
|
84
|
+
cmd.push("--model", model);
|
|
85
|
+
}
|
|
86
|
+
return cmd;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Deploy per-agent instructions and guards to a worktree.
|
|
91
|
+
*
|
|
92
|
+
* For Claude Code this means writes to the worktree's `.claude/` directory:
|
|
93
|
+
* 1. `CLAUDE.md` — the agent's task-specific overlay (generated by ov sling).
|
|
94
|
+
* Skipped when overlay is undefined (hooks-only deployment for coordinator/supervisor/monitor).
|
|
95
|
+
* 2. `settings.local.json` — Claude Code hooks for security guards
|
|
96
|
+
*
|
|
97
|
+
* The `overlay.content` is written verbatim when provided. The hooks are generated by
|
|
98
|
+
* `deployHooks()` from `src/agents/hooks-deployer.ts`.
|
|
99
|
+
*
|
|
100
|
+
* @param worktreePath - Absolute path to the agent's git worktree
|
|
101
|
+
* @param overlay - Overlay content to write as CLAUDE.md, or undefined for hooks-only deployment
|
|
102
|
+
* @param hooks - Hook definition used by deployHooks
|
|
103
|
+
* @throws {AgentError} If the hooks template is missing or writes fail
|
|
104
|
+
*/
|
|
105
|
+
async deployConfig(
|
|
106
|
+
worktreePath: string,
|
|
107
|
+
overlay: OverlayContent | undefined,
|
|
108
|
+
hooks: HooksDef,
|
|
109
|
+
): Promise<void> {
|
|
110
|
+
if (overlay) {
|
|
111
|
+
const claudeDir = join(worktreePath, ".claude");
|
|
112
|
+
await mkdir(claudeDir, { recursive: true });
|
|
113
|
+
|
|
114
|
+
const claudeMdPath = join(claudeDir, "CLAUDE.md");
|
|
115
|
+
await Bun.write(claudeMdPath, overlay.content);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
await deployHooks(hooks.worktreePath, hooks.agentName, hooks.capability, hooks.qualityGates);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Detect Claude Code TUI readiness from a tmux pane content snapshot.
|
|
123
|
+
*
|
|
124
|
+
* Uses the same heuristics as `waitForTuiReady()` in `src/worktree/tmux.ts`,
|
|
125
|
+
* but operates on a pre-captured pane string rather than polling tmux directly.
|
|
126
|
+
* The caller is responsible for capturing pane content and acting on the result
|
|
127
|
+
* (e.g. sending "Enter" to dismiss a trust dialog).
|
|
128
|
+
*
|
|
129
|
+
* Detection phases:
|
|
130
|
+
* - Trust dialog: "trust this folder" detected → `{ phase: "dialog", action: "Enter" }`
|
|
131
|
+
* - Ready: prompt indicator (❯ or 'Try "') AND status bar ("bypass permissions"
|
|
132
|
+
* or "shift+tab") both present → `{ phase: "ready" }`
|
|
133
|
+
* - Otherwise → `{ phase: "loading" }`
|
|
134
|
+
*
|
|
135
|
+
* @param paneContent - Captured tmux pane content to analyze
|
|
136
|
+
* @returns Current readiness phase
|
|
137
|
+
*/
|
|
138
|
+
detectReady(paneContent: string): ReadyState {
|
|
139
|
+
// Trust dialog takes precedence — it replaces the normal TUI temporarily.
|
|
140
|
+
// The caller should send the action key to dismiss it.
|
|
141
|
+
if (paneContent.includes("trust this folder")) {
|
|
142
|
+
return { phase: "dialog", action: "Enter" };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Phase 1: prompt indicator confirms Claude Code has started.
|
|
146
|
+
// ❯ is the claude prompt character; 'Try "' appears in the welcome banner.
|
|
147
|
+
const hasPrompt = paneContent.includes("\u276f") || paneContent.includes('Try "');
|
|
148
|
+
|
|
149
|
+
// Phase 2: status bar text confirms full TUI render.
|
|
150
|
+
const hasStatusBar =
|
|
151
|
+
paneContent.includes("bypass permissions") || paneContent.includes("shift+tab");
|
|
152
|
+
|
|
153
|
+
if (hasPrompt && hasStatusBar) {
|
|
154
|
+
return { phase: "ready" };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return { phase: "loading" };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Parse a Claude Code transcript JSONL file into normalized token usage.
|
|
162
|
+
*
|
|
163
|
+
* Reads the JSONL file at `path` and aggregates token usage across all
|
|
164
|
+
* assistant turns. Returns null if the file does not exist or cannot be read.
|
|
165
|
+
*
|
|
166
|
+
* Delegates to `parseTranscriptUsage()` and `estimateCost()` from
|
|
167
|
+
* `src/metrics/transcript.ts`. The `estimatedCostUsd` is computed but
|
|
168
|
+
* not exposed here because `TranscriptSummary` only carries the three
|
|
169
|
+
* core fields (inputTokens, outputTokens, model). Cost data is available
|
|
170
|
+
* via `src/metrics/transcript.ts` directly for callers that need it.
|
|
171
|
+
*
|
|
172
|
+
* @param path - Absolute path to the transcript JSONL file
|
|
173
|
+
* @returns Aggregated token usage, or null if unavailable
|
|
174
|
+
*/
|
|
175
|
+
async parseTranscript(path: string): Promise<TranscriptSummary | null> {
|
|
176
|
+
const file = Bun.file(path);
|
|
177
|
+
if (!(await file.exists())) {
|
|
178
|
+
return null;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
try {
|
|
182
|
+
const usage = await parseTranscriptUsage(path);
|
|
183
|
+
// estimateCost is called to validate the model is recognized,
|
|
184
|
+
// though the result is not surfaced in TranscriptSummary.
|
|
185
|
+
if (usage.modelUsed !== null) {
|
|
186
|
+
estimateCost(usage);
|
|
187
|
+
}
|
|
188
|
+
return {
|
|
189
|
+
inputTokens: usage.inputTokens,
|
|
190
|
+
outputTokens: usage.outputTokens,
|
|
191
|
+
model: usage.modelUsed ?? "",
|
|
192
|
+
};
|
|
193
|
+
} catch {
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Build runtime-specific environment variables for model/provider routing.
|
|
200
|
+
*
|
|
201
|
+
* Returns the provider environment variables from the resolved model.
|
|
202
|
+
* For Anthropic native: may include ANTHROPIC_API_KEY, ANTHROPIC_BASE_URL.
|
|
203
|
+
* For gateway providers: may include gateway-specific auth and routing vars.
|
|
204
|
+
*
|
|
205
|
+
* Returns an empty object if the resolved model has no provider env vars.
|
|
206
|
+
* Callers (sling.ts, coordinator.ts) merge this with OVERSTORY_AGENT_NAME
|
|
207
|
+
* and OVERSTORY_WORKTREE_PATH before passing to createSession().
|
|
208
|
+
*
|
|
209
|
+
* @param model - Resolved model with optional provider env vars
|
|
210
|
+
* @returns Environment variable map (may be empty)
|
|
211
|
+
*/
|
|
212
|
+
buildEnv(model: ResolvedModel): Record<string, string> {
|
|
213
|
+
return model.env ?? {};
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/** Singleton instance for use in callers that do not need DI. */
|
|
218
|
+
export const claudeRuntime = new ClaudeRuntime();
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import type { OverstoryConfig } from "../types.ts";
|
|
3
|
+
import { ClaudeRuntime } from "./claude.ts";
|
|
4
|
+
import { getRuntime } from "./registry.ts";
|
|
5
|
+
|
|
6
|
+
describe("getRuntime", () => {
|
|
7
|
+
it("returns a ClaudeRuntime by default (no args)", () => {
|
|
8
|
+
const runtime = getRuntime();
|
|
9
|
+
expect(runtime).toBeInstanceOf(ClaudeRuntime);
|
|
10
|
+
expect(runtime.id).toBe("claude");
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it('returns a ClaudeRuntime when name is "claude"', () => {
|
|
14
|
+
const runtime = getRuntime("claude");
|
|
15
|
+
expect(runtime).toBeInstanceOf(ClaudeRuntime);
|
|
16
|
+
expect(runtime.id).toBe("claude");
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("throws with a helpful message for an unknown runtime", () => {
|
|
20
|
+
expect(() => getRuntime("unknown-runtime")).toThrow(
|
|
21
|
+
'Unknown runtime: "unknown-runtime". Available: claude',
|
|
22
|
+
);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("uses config.runtime.default when name is omitted", () => {
|
|
26
|
+
const config = { runtime: { default: "claude" } } as OverstoryConfig;
|
|
27
|
+
const runtime = getRuntime(undefined, config);
|
|
28
|
+
expect(runtime).toBeInstanceOf(ClaudeRuntime);
|
|
29
|
+
expect(runtime.id).toBe("claude");
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("explicit name overrides config.runtime.default", () => {
|
|
33
|
+
const config = { runtime: { default: "claude" } } as OverstoryConfig;
|
|
34
|
+
// Both are "claude" here since that's the only registered runtime,
|
|
35
|
+
// but the name arg takes precedence over config.
|
|
36
|
+
const runtime = getRuntime("claude", config);
|
|
37
|
+
expect(runtime).toBeInstanceOf(ClaudeRuntime);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("throws for unknown runtime even when config default is set", () => {
|
|
41
|
+
const config = { runtime: { default: "codex" } } as OverstoryConfig;
|
|
42
|
+
// No name arg — falls back to config default "codex" which is unknown.
|
|
43
|
+
expect(() => getRuntime(undefined, config)).toThrow(
|
|
44
|
+
'Unknown runtime: "codex". Available: claude',
|
|
45
|
+
);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("returns a new instance on each call (factory pattern)", () => {
|
|
49
|
+
const a = getRuntime();
|
|
50
|
+
const b = getRuntime();
|
|
51
|
+
expect(a).not.toBe(b);
|
|
52
|
+
});
|
|
53
|
+
});
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// Runtime registry — maps runtime names to adapter factory functions.
|
|
2
|
+
// This is the ONLY module that imports concrete adapter classes.
|
|
3
|
+
|
|
4
|
+
import type { OverstoryConfig } from "../types.ts";
|
|
5
|
+
import { ClaudeRuntime } from "./claude.ts";
|
|
6
|
+
import type { AgentRuntime } from "./types.ts";
|
|
7
|
+
|
|
8
|
+
/** Registry of available runtime adapters (name → factory). */
|
|
9
|
+
const runtimes = new Map<string, () => AgentRuntime>([["claude", () => new ClaudeRuntime()]]);
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Resolve a runtime adapter by name.
|
|
13
|
+
*
|
|
14
|
+
* Lookup order:
|
|
15
|
+
* 1. Explicit `name` argument (if provided)
|
|
16
|
+
* 2. `config.runtime.default` (if config is provided)
|
|
17
|
+
* 3. `"claude"` (hardcoded fallback)
|
|
18
|
+
*
|
|
19
|
+
* @param name - Runtime name to resolve (e.g. "claude"). Omit to use config default.
|
|
20
|
+
* @param config - Overstory config for reading the default runtime.
|
|
21
|
+
* @throws {Error} If the resolved runtime name is not registered.
|
|
22
|
+
* @returns A fresh AgentRuntime instance.
|
|
23
|
+
*/
|
|
24
|
+
export function getRuntime(name?: string, config?: OverstoryConfig): AgentRuntime {
|
|
25
|
+
const runtimeName = name ?? config?.runtime?.default ?? "claude";
|
|
26
|
+
const factory = runtimes.get(runtimeName);
|
|
27
|
+
if (!factory) {
|
|
28
|
+
throw new Error(
|
|
29
|
+
`Unknown runtime: "${runtimeName}". Available: ${[...runtimes.keys()].join(", ")}`,
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
return factory();
|
|
33
|
+
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
// Runtime abstraction types for multi-provider agent support.
|
|
2
|
+
// See docs/runtime-abstraction.md for design rationale and coupling inventory.
|
|
3
|
+
|
|
4
|
+
import type { QualityGate, ResolvedModel } from "../types.ts";
|
|
5
|
+
|
|
6
|
+
// === Spawn ===
|
|
7
|
+
|
|
8
|
+
/** Options for spawning an interactive agent process. */
|
|
9
|
+
export interface SpawnOpts {
|
|
10
|
+
/** Model ref (alias or provider-qualified, e.g. "sonnet" or "openrouter/gpt-5"). */
|
|
11
|
+
model: string;
|
|
12
|
+
/** Permission mode: bypass for trusted builders, ask for interactive agents. */
|
|
13
|
+
permissionMode: "bypass" | "ask";
|
|
14
|
+
/** Optional system prompt prefix injected before the agent's base instructions. */
|
|
15
|
+
systemPrompt?: string;
|
|
16
|
+
/** Optional system prompt suffix appended after the base instructions. */
|
|
17
|
+
appendSystemPrompt?: string;
|
|
18
|
+
/** Working directory for the spawned process. */
|
|
19
|
+
cwd: string;
|
|
20
|
+
/** Additional environment variables to pass to the spawned process. */
|
|
21
|
+
env: Record<string, string>;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// === Readiness ===
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Discrete phases of agent TUI readiness, detected from tmux pane content.
|
|
28
|
+
* Headless runtimes (codex exec, pi --mode rpc) always return { phase: "ready" }.
|
|
29
|
+
*/
|
|
30
|
+
export type ReadyState =
|
|
31
|
+
| { phase: "loading" }
|
|
32
|
+
| { phase: "dialog"; action: string }
|
|
33
|
+
| { phase: "ready" };
|
|
34
|
+
|
|
35
|
+
// === Config Deployment ===
|
|
36
|
+
|
|
37
|
+
/** Runtime-agnostic overlay content to write into a worktree. */
|
|
38
|
+
export interface OverlayContent {
|
|
39
|
+
/** Full markdown text to write as the agent's instruction file. */
|
|
40
|
+
content: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Runtime-agnostic hook/guard configuration for deployment to a worktree.
|
|
45
|
+
* Each runtime adapter translates this into its native guard mechanism
|
|
46
|
+
* (e.g., settings.local.json hooks for Claude Code, guard extensions for Pi).
|
|
47
|
+
*/
|
|
48
|
+
export interface HooksDef {
|
|
49
|
+
/** Agent name injected into hook commands. */
|
|
50
|
+
agentName: string;
|
|
51
|
+
/** Agent capability (builder, scout, reviewer, lead, etc.). */
|
|
52
|
+
capability: string;
|
|
53
|
+
/** Absolute path to the agent's worktree for path-boundary enforcement. */
|
|
54
|
+
worktreePath: string;
|
|
55
|
+
/** Quality gates agents must pass before reporting completion. */
|
|
56
|
+
qualityGates?: QualityGate[];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// === Transcripts ===
|
|
60
|
+
|
|
61
|
+
/** Normalized token usage extracted from any runtime's session transcript. */
|
|
62
|
+
export interface TranscriptSummary {
|
|
63
|
+
inputTokens: number;
|
|
64
|
+
outputTokens: number;
|
|
65
|
+
/** Model identifier as reported by the runtime (e.g. "claude-sonnet-4-6"). */
|
|
66
|
+
model: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// === Runtime Interface ===
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Contract that all agent runtime adapters must implement.
|
|
73
|
+
*
|
|
74
|
+
* Each runtime (Claude Code, Codex, Pi, OpenCode, ...) provides a ~200-400 line
|
|
75
|
+
* adapter file implementing this interface. The orchestration engine calls only
|
|
76
|
+
* these methods — never the runtime's CLI directly.
|
|
77
|
+
*/
|
|
78
|
+
export interface AgentRuntime {
|
|
79
|
+
/** Unique runtime identifier (e.g. "claude", "codex", "pi"). */
|
|
80
|
+
id: string;
|
|
81
|
+
|
|
82
|
+
/** Relative path to the instruction file within a worktree (e.g. ".claude/CLAUDE.md"). */
|
|
83
|
+
readonly instructionPath: string;
|
|
84
|
+
|
|
85
|
+
/** Build the shell command string to spawn an interactive agent in a tmux pane. */
|
|
86
|
+
buildSpawnCommand(opts: SpawnOpts): string;
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Build the argv array for a headless one-shot AI call.
|
|
90
|
+
* Used by merge/resolver.ts and watchdog/triage.ts for AI-assisted operations.
|
|
91
|
+
*/
|
|
92
|
+
buildPrintCommand(prompt: string, model?: string): string[];
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Deploy per-agent instructions and guards to a worktree.
|
|
96
|
+
* Claude Code writes .claude/CLAUDE.md + settings.local.json hooks.
|
|
97
|
+
* Codex writes AGENTS.md (no hook deployment needed).
|
|
98
|
+
* Pi writes .claude/CLAUDE.md + a guard extension in .pi/extensions/.
|
|
99
|
+
* When overlay is undefined, only hooks are deployed (no instruction file written).
|
|
100
|
+
*/
|
|
101
|
+
deployConfig(
|
|
102
|
+
worktreePath: string,
|
|
103
|
+
overlay: OverlayContent | undefined,
|
|
104
|
+
hooks: HooksDef,
|
|
105
|
+
): Promise<void>;
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Detect agent readiness from tmux pane content.
|
|
109
|
+
* Headless runtimes that exit when done should return { phase: "ready" } unconditionally.
|
|
110
|
+
*/
|
|
111
|
+
detectReady(paneContent: string): ReadyState;
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Parse a session transcript file into normalized token usage.
|
|
115
|
+
* Returns null if the transcript does not exist or cannot be parsed.
|
|
116
|
+
*/
|
|
117
|
+
parseTranscript(path: string): Promise<TranscriptSummary | null>;
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Build runtime-specific environment variables for model/provider routing.
|
|
121
|
+
* Claude Code uses ANTHROPIC_API_KEY; Codex uses OPENAI_API_KEY; Pi passes
|
|
122
|
+
* the provider's authTokenEnv directly.
|
|
123
|
+
*/
|
|
124
|
+
buildEnv(model: ResolvedModel): Record<string, string>;
|
|
125
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test";
|
|
2
2
|
import { AgentError } from "../errors.ts";
|
|
3
|
+
import type { ReadyState } from "../runtimes/types.ts";
|
|
3
4
|
import {
|
|
4
5
|
capturePaneContent,
|
|
5
6
|
createSession,
|
|
@@ -940,6 +941,20 @@ describe("capturePaneContent", () => {
|
|
|
940
941
|
});
|
|
941
942
|
});
|
|
942
943
|
|
|
944
|
+
/** Claude-like detectReady for tests — matches the existing hardcoded behavior. */
|
|
945
|
+
function claudeDetectReady(paneContent: string): ReadyState {
|
|
946
|
+
if (paneContent.includes("trust this folder")) {
|
|
947
|
+
return { phase: "dialog", action: "Enter" };
|
|
948
|
+
}
|
|
949
|
+
const hasPrompt = paneContent.includes("\u276f") || paneContent.includes('Try "');
|
|
950
|
+
const hasStatusBar =
|
|
951
|
+
paneContent.includes("bypass permissions") || paneContent.includes("shift+tab");
|
|
952
|
+
if (hasPrompt && hasStatusBar) {
|
|
953
|
+
return { phase: "ready" };
|
|
954
|
+
}
|
|
955
|
+
return { phase: "loading" };
|
|
956
|
+
}
|
|
957
|
+
|
|
943
958
|
describe("waitForTuiReady", () => {
|
|
944
959
|
let spawnSpy: ReturnType<typeof spyOn>;
|
|
945
960
|
let sleepSpy: ReturnType<typeof spyOn>;
|
|
@@ -961,7 +976,7 @@ describe("waitForTuiReady", () => {
|
|
|
961
976
|
mockSpawnResult('Try "help" to get started\nbypass permissions', "", 0),
|
|
962
977
|
);
|
|
963
978
|
|
|
964
|
-
const ready = await waitForTuiReady("overstory-agent", 5_000, 500);
|
|
979
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 5_000, 500);
|
|
965
980
|
|
|
966
981
|
expect(ready).toBe(true);
|
|
967
982
|
// Should not have needed to sleep (content found on first poll)
|
|
@@ -985,7 +1000,7 @@ describe("waitForTuiReady", () => {
|
|
|
985
1000
|
return mockSpawnResult("", "", 0);
|
|
986
1001
|
});
|
|
987
1002
|
|
|
988
|
-
const ready = await waitForTuiReady("overstory-agent", 10_000, 500);
|
|
1003
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 10_000, 500);
|
|
989
1004
|
|
|
990
1005
|
expect(ready).toBe(true);
|
|
991
1006
|
// Should have slept 3 times (3 empty capture-pane polls before content appeared)
|
|
@@ -996,7 +1011,7 @@ describe("waitForTuiReady", () => {
|
|
|
996
1011
|
// Pane always empty
|
|
997
1012
|
spawnSpy.mockImplementation(() => mockSpawnResult("", "", 0));
|
|
998
1013
|
|
|
999
|
-
const ready = await waitForTuiReady("overstory-agent", 2_000, 500);
|
|
1014
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 2_000, 500);
|
|
1000
1015
|
|
|
1001
1016
|
expect(ready).toBe(false);
|
|
1002
1017
|
// 2000ms / 500ms = 4 polls, 4 sleeps
|
|
@@ -1006,7 +1021,7 @@ describe("waitForTuiReady", () => {
|
|
|
1006
1021
|
test("returns false when capture-pane always fails", async () => {
|
|
1007
1022
|
spawnSpy.mockImplementation(() => mockSpawnResult("", "session not found", 1));
|
|
1008
1023
|
|
|
1009
|
-
const ready = await waitForTuiReady("dead-session", 1_000, 500);
|
|
1024
|
+
const ready = await waitForTuiReady("dead-session", claudeDetectReady, 1_000, 500);
|
|
1010
1025
|
|
|
1011
1026
|
expect(ready).toBe(false);
|
|
1012
1027
|
});
|
|
@@ -1015,7 +1030,7 @@ describe("waitForTuiReady", () => {
|
|
|
1015
1030
|
// Return content immediately with both indicators
|
|
1016
1031
|
spawnSpy.mockImplementation(() => mockSpawnResult('Try "help"\nshift+tab', "", 0));
|
|
1017
1032
|
|
|
1018
|
-
const ready = await waitForTuiReady("overstory-agent");
|
|
1033
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady);
|
|
1019
1034
|
|
|
1020
1035
|
expect(ready).toBe(true);
|
|
1021
1036
|
});
|
|
@@ -1031,7 +1046,7 @@ describe("waitForTuiReady", () => {
|
|
|
1031
1046
|
return mockSpawnResult("", "can't find session", 1);
|
|
1032
1047
|
});
|
|
1033
1048
|
|
|
1034
|
-
const ready = await waitForTuiReady("dead-session", 15_000, 500);
|
|
1049
|
+
const ready = await waitForTuiReady("dead-session", claudeDetectReady, 15_000, 500);
|
|
1035
1050
|
|
|
1036
1051
|
expect(ready).toBe(false);
|
|
1037
1052
|
// Should NOT have polled the full timeout (no sleeps — returned immediately)
|
|
@@ -1052,7 +1067,7 @@ describe("waitForTuiReady", () => {
|
|
|
1052
1067
|
});
|
|
1053
1068
|
|
|
1054
1069
|
// Use a short timeout so the test doesn't take long
|
|
1055
|
-
const ready = await waitForTuiReady("loading-session", 1_000, 500);
|
|
1070
|
+
const ready = await waitForTuiReady("loading-session", claudeDetectReady, 1_000, 500);
|
|
1056
1071
|
|
|
1057
1072
|
expect(ready).toBe(false);
|
|
1058
1073
|
// Should have polled multiple times (not returned early)
|
|
@@ -1071,7 +1086,7 @@ describe("waitForTuiReady", () => {
|
|
|
1071
1086
|
return mockSpawnResult("", "", 0);
|
|
1072
1087
|
});
|
|
1073
1088
|
|
|
1074
|
-
const ready = await waitForTuiReady("overstory-agent", 1_000, 500);
|
|
1089
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 1_000, 500);
|
|
1075
1090
|
|
|
1076
1091
|
expect(ready).toBe(false);
|
|
1077
1092
|
});
|
|
@@ -1087,7 +1102,7 @@ describe("waitForTuiReady", () => {
|
|
|
1087
1102
|
return mockSpawnResult("", "", 0);
|
|
1088
1103
|
});
|
|
1089
1104
|
|
|
1090
|
-
const ready = await waitForTuiReady("overstory-agent", 1_000, 500);
|
|
1105
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 1_000, 500);
|
|
1091
1106
|
|
|
1092
1107
|
expect(ready).toBe(false);
|
|
1093
1108
|
});
|
|
@@ -1109,7 +1124,7 @@ describe("waitForTuiReady", () => {
|
|
|
1109
1124
|
return mockSpawnResult("", "", 0);
|
|
1110
1125
|
});
|
|
1111
1126
|
|
|
1112
|
-
const ready = await waitForTuiReady("overstory-agent", 10_000, 500);
|
|
1127
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 10_000, 500);
|
|
1113
1128
|
|
|
1114
1129
|
expect(ready).toBe(true);
|
|
1115
1130
|
// Should have slept at least twice (2 polls with only prompt before both appeared)
|
|
@@ -1138,7 +1153,7 @@ describe("waitForTuiReady", () => {
|
|
|
1138
1153
|
return mockSpawnResult("", "", 0);
|
|
1139
1154
|
});
|
|
1140
1155
|
|
|
1141
|
-
const ready = await waitForTuiReady("overstory-agent", 10_000, 500);
|
|
1156
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 10_000, 500);
|
|
1142
1157
|
|
|
1143
1158
|
expect(ready).toBe(true);
|
|
1144
1159
|
// sendKeys should have been called once to confirm the trust dialog
|
|
@@ -1169,10 +1184,10 @@ describe("waitForTuiReady", () => {
|
|
|
1169
1184
|
return mockSpawnResult("", "", 0);
|
|
1170
1185
|
});
|
|
1171
1186
|
|
|
1172
|
-
const ready = await waitForTuiReady("overstory-agent", 10_000, 500);
|
|
1187
|
+
const ready = await waitForTuiReady("overstory-agent", claudeDetectReady, 10_000, 500);
|
|
1173
1188
|
|
|
1174
1189
|
expect(ready).toBe(true);
|
|
1175
|
-
// sendKeys must be called exactly once —
|
|
1190
|
+
// sendKeys must be called exactly once — dialogHandled prevents duplicate Enter sends
|
|
1176
1191
|
expect(sendKeysCalls).toHaveLength(1);
|
|
1177
1192
|
});
|
|
1178
1193
|
});
|
package/src/worktree/tmux.ts
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import { dirname, resolve } from "node:path";
|
|
11
11
|
import { AgentError } from "../errors.ts";
|
|
12
|
+
import type { ReadyState } from "../runtimes/types.ts";
|
|
12
13
|
|
|
13
14
|
/**
|
|
14
15
|
* Detect the directory containing the overstory binary.
|
|
@@ -435,58 +436,43 @@ export async function capturePaneContent(name: string, lines = 50): Promise<stri
|
|
|
435
436
|
/**
|
|
436
437
|
* Wait for a tmux session's TUI to become ready for input.
|
|
437
438
|
*
|
|
438
|
-
*
|
|
439
|
-
*
|
|
440
|
-
*
|
|
441
|
-
*
|
|
442
|
-
*
|
|
443
|
-
* Additionally handles the workspace trust dialog: if 'trust this folder' is detected,
|
|
444
|
-
* sends Enter to auto-confirm before continuing to wait for the real TUI. The trust
|
|
445
|
-
* dialog check must precede phase checks since it replaces the normal TUI temporarily.
|
|
439
|
+
* Delegates all readiness detection to the provided `detectReady` callback,
|
|
440
|
+
* making this function runtime-agnostic. The callback inspects pane content
|
|
441
|
+
* and returns a ReadyState phase: "loading" (keep waiting), "dialog" (send
|
|
442
|
+
* Enter to dismiss, then continue), or "ready" (return true).
|
|
446
443
|
*
|
|
447
444
|
* @param name - Tmux session name to poll
|
|
445
|
+
* @param detectReady - Callback that inspects pane content and returns ReadyState
|
|
448
446
|
* @param timeoutMs - Maximum time to wait before giving up (default 30s)
|
|
449
447
|
* @param pollIntervalMs - Time between polls (default 500ms)
|
|
450
|
-
* @returns true once
|
|
448
|
+
* @returns true once detectReady returns { phase: "ready" }, false on timeout or dead session
|
|
451
449
|
*/
|
|
452
450
|
export async function waitForTuiReady(
|
|
453
451
|
name: string,
|
|
452
|
+
detectReady: (paneContent: string) => ReadyState,
|
|
454
453
|
timeoutMs = 30_000,
|
|
455
454
|
pollIntervalMs = 500,
|
|
456
455
|
): Promise<boolean> {
|
|
457
456
|
const maxAttempts = Math.ceil(timeoutMs / pollIntervalMs);
|
|
458
|
-
let
|
|
459
|
-
let statusBarSeen = false;
|
|
460
|
-
let trustHandled = false;
|
|
457
|
+
let dialogHandled = false;
|
|
461
458
|
|
|
462
459
|
for (let i = 0; i < maxAttempts; i++) {
|
|
463
460
|
const content = await capturePaneContent(name);
|
|
464
461
|
if (content !== null) {
|
|
465
|
-
|
|
466
|
-
|
|
462
|
+
const state = detectReady(content);
|
|
463
|
+
|
|
464
|
+
if (state.phase === "dialog" && !dialogHandled) {
|
|
467
465
|
await sendKeys(name, "");
|
|
468
|
-
|
|
466
|
+
dialogHandled = true;
|
|
469
467
|
await Bun.sleep(pollIntervalMs);
|
|
470
468
|
continue;
|
|
471
469
|
}
|
|
472
470
|
|
|
473
|
-
|
|
474
|
-
if (content.includes("\u276f") || content.includes('Try "')) {
|
|
475
|
-
promptSeen = true;
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
// Phase 2: status bar text confirms full TUI render
|
|
479
|
-
if (content.includes("bypass permissions") || content.includes("shift+tab")) {
|
|
480
|
-
statusBarSeen = true;
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
// Return true only when both phases complete
|
|
484
|
-
if (promptSeen && statusBarSeen) {
|
|
471
|
+
if (state.phase === "ready") {
|
|
485
472
|
return true;
|
|
486
473
|
}
|
|
487
474
|
}
|
|
488
475
|
|
|
489
|
-
// Check if session died — no point waiting if it's gone
|
|
490
476
|
const alive = await isSessionAlive(name);
|
|
491
477
|
if (!alive) {
|
|
492
478
|
return false;
|