@os-eco/overstory-cli 0.7.4 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ import type { OverstoryConfig, ProviderConfig } from "../types.ts";
2
+ import type { DoctorCheck, DoctorCheckFn } from "./types.ts";
3
+
4
+ /** Roles that rely heavily on tool-use (function calling). */
5
+ const TOOL_HEAVY_ROLES = new Set(["builder", "scout", "merger"]);
6
+
7
+ /**
8
+ * Provider and multi-runtime configuration checks.
9
+ *
10
+ * Validates gateway provider reachability, auth tokens, model-provider references,
11
+ * and tool-use compatibility across configured runtimes.
12
+ */
13
+ export const checkProviders: DoctorCheckFn = async (
14
+ config,
15
+ _overstoryDir,
16
+ ): Promise<DoctorCheck[]> => {
17
+ const checks: DoctorCheck[] = [];
18
+
19
+ // Base check: at least one provider configured
20
+ checks.push(buildProvidersConfigured(config));
21
+
22
+ // Identify gateway providers
23
+ const gatewayEntries = Object.entries(config.providers).filter(([, p]) => p.type === "gateway");
24
+
25
+ // Check 1: provider-reachable-{name} — one per gateway provider with baseUrl
26
+ for (const [name, provider] of gatewayEntries) {
27
+ if (provider.baseUrl) {
28
+ checks.push(await checkProviderReachable(name, provider));
29
+ }
30
+ }
31
+
32
+ // Check 2: provider-auth-token-{name} — one per gateway provider with authTokenEnv
33
+ for (const [name, provider] of gatewayEntries) {
34
+ if (provider.authTokenEnv) {
35
+ checks.push(buildProviderAuthToken(name, provider));
36
+ }
37
+ }
38
+
39
+ // Check 3: tool-use-compat — one warn per tool-heavy role using a provider-prefixed model
40
+ checks.push(...buildToolUseCompat(config));
41
+
42
+ // Check 4: model-provider-ref(s) — one per provider-prefixed model, or single pass
43
+ checks.push(...buildModelProviderRefs(config));
44
+
45
+ // Check 5: gateway-api-key-reminder — only when gateway providers exist
46
+ if (gatewayEntries.length > 0) {
47
+ checks.push(buildGatewayApiKeyReminder());
48
+ }
49
+
50
+ return checks;
51
+ };
52
+
53
+ /**
54
+ * Base check: verifies at least one provider is configured.
55
+ */
56
+ function buildProvidersConfigured(config: OverstoryConfig): DoctorCheck {
57
+ const entries = Object.entries(config.providers);
58
+
59
+ if (entries.length > 0) {
60
+ return {
61
+ name: "providers-configured",
62
+ category: "providers",
63
+ status: "pass",
64
+ message: `${entries.length} provider${entries.length === 1 ? "" : "s"} configured`,
65
+ details: entries.map(([name, p]) => `${name} (${p.type})`),
66
+ };
67
+ }
68
+
69
+ return {
70
+ name: "providers-configured",
71
+ category: "providers",
72
+ status: "warn",
73
+ message: "No providers configured — add providers to config.yaml",
74
+ details: ["At least one native or gateway provider should be configured."],
75
+ };
76
+ }
77
+
78
+ /**
79
+ * Check 1: HTTP reachability of a gateway provider's baseUrl.
80
+ *
81
+ * Uses fetch() with a 5-second timeout. Any HTTP response (any status code)
82
+ * counts as reachable — only network errors or timeouts produce a warn.
83
+ */
84
+ async function checkProviderReachable(
85
+ name: string,
86
+ provider: ProviderConfig,
87
+ ): Promise<DoctorCheck> {
88
+ const baseUrl = provider.baseUrl as string; // caller guards baseUrl is defined
89
+
90
+ try {
91
+ await fetch(baseUrl, {
92
+ method: "HEAD",
93
+ signal: AbortSignal.timeout(5000),
94
+ });
95
+
96
+ return {
97
+ name: `provider-reachable-${name}`,
98
+ category: "providers",
99
+ status: "pass",
100
+ message: `Gateway provider '${name}' is reachable`,
101
+ details: [baseUrl],
102
+ };
103
+ } catch (error) {
104
+ const errorMsg = error instanceof Error ? error.message : String(error);
105
+ return {
106
+ name: `provider-reachable-${name}`,
107
+ category: "providers",
108
+ status: "warn",
109
+ message: `Gateway provider '${name}' is unreachable`,
110
+ details: [baseUrl, errorMsg],
111
+ };
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Check 2: Validate that the auth token env var for a gateway provider is set.
117
+ *
118
+ * Reports the env var NAME in details — never the value.
119
+ */
120
+ function buildProviderAuthToken(name: string, provider: ProviderConfig): DoctorCheck {
121
+ const envVar = provider.authTokenEnv as string; // caller guards authTokenEnv is defined
122
+ const value = process.env[envVar];
123
+
124
+ if (value && value.length > 0) {
125
+ return {
126
+ name: `provider-auth-token-${name}`,
127
+ category: "providers",
128
+ status: "pass",
129
+ message: `Auth token for provider '${name}' is set`,
130
+ details: [`Env var: ${envVar}`],
131
+ };
132
+ }
133
+
134
+ return {
135
+ name: `provider-auth-token-${name}`,
136
+ category: "providers",
137
+ status: "warn",
138
+ message: `Auth token for provider '${name}' is missing`,
139
+ details: [`Env var: ${envVar}`, `Set ${envVar} to authenticate with this provider.`],
140
+ };
141
+ }
142
+
143
+ /**
144
+ * Check 3: Tool-use compatibility for tool-heavy roles using non-Anthropic models.
145
+ *
146
+ * Tool-heavy roles (builder, scout, merger) rely on structured tool-use (function
147
+ * calling). Non-Anthropic models accessed via gateway providers may have different
148
+ * tool-use behavior. Emits one warn per affected role, or a single pass if none.
149
+ */
150
+ function buildToolUseCompat(config: OverstoryConfig): DoctorCheck[] {
151
+ const checks: DoctorCheck[] = [];
152
+
153
+ for (const [role, model] of Object.entries(config.models)) {
154
+ if (!TOOL_HEAVY_ROLES.has(role)) continue;
155
+ if (model === undefined) continue;
156
+ if (!model.includes("/")) continue;
157
+
158
+ checks.push({
159
+ name: "tool-use-compat",
160
+ category: "providers",
161
+ status: "warn",
162
+ message: `models.${role} uses non-Anthropic model — tool-use compatibility not guaranteed`,
163
+ details: [
164
+ `Model: ${model}`,
165
+ "Tool use (function calling) behavior varies across providers.",
166
+ "Test agent behavior thoroughly before using in production.",
167
+ ],
168
+ });
169
+ }
170
+
171
+ if (checks.length === 0) {
172
+ checks.push({
173
+ name: "tool-use-compat",
174
+ category: "providers",
175
+ status: "pass",
176
+ message: "No tool-heavy roles use non-Anthropic models",
177
+ });
178
+ }
179
+
180
+ return checks;
181
+ }
182
+
183
+ /**
184
+ * Check 4: Validate that provider-prefixed model references point to configured providers.
185
+ *
186
+ * For each config.models entry containing '/' (provider-qualified), extracts the
187
+ * provider name and verifies it exists in config.providers. Emits one check per
188
+ * provider-prefixed model, or a single pass if no such models exist.
189
+ */
190
+ function buildModelProviderRefs(config: OverstoryConfig): DoctorCheck[] {
191
+ const checks: DoctorCheck[] = [];
192
+
193
+ for (const [role, model] of Object.entries(config.models)) {
194
+ if (model === undefined) continue;
195
+ if (!model.includes("/")) continue;
196
+
197
+ const providerName = model.split("/")[0];
198
+ if (!providerName) continue;
199
+
200
+ if (config.providers[providerName]) {
201
+ checks.push({
202
+ name: "model-provider-ref",
203
+ category: "providers",
204
+ status: "pass",
205
+ message: `models.${role} references defined provider '${providerName}'`,
206
+ details: [`Model: ${model}`],
207
+ });
208
+ } else {
209
+ checks.push({
210
+ name: "model-provider-ref",
211
+ category: "providers",
212
+ status: "fail",
213
+ message: `models.${role} references undefined provider '${providerName}'`,
214
+ details: [
215
+ `Model: ${model}`,
216
+ `Provider '${providerName}' is not defined in config.yaml providers section.`,
217
+ `Add it: providers:\n ${providerName}:\n type: gateway\n baseUrl: https://...`,
218
+ ],
219
+ });
220
+ }
221
+ }
222
+
223
+ if (checks.length === 0) {
224
+ checks.push({
225
+ name: "model-provider-refs",
226
+ category: "providers",
227
+ status: "pass",
228
+ message: "No provider-prefixed model references",
229
+ });
230
+ }
231
+
232
+ return checks;
233
+ }
234
+
235
+ /**
236
+ * Check 5: Reminder about ANTHROPIC_API_KEY when gateway providers are configured.
237
+ *
238
+ * Agents spawned via gateway routes receive ANTHROPIC_API_KEY="" so they use the
239
+ * gateway instead of Anthropic directly. Any direct Anthropic API calls (e.g.,
240
+ * from merge/resolver.ts) require a separate key.
241
+ */
242
+ function buildGatewayApiKeyReminder(): DoctorCheck {
243
+ return {
244
+ name: "gateway-api-key-reminder",
245
+ category: "providers",
246
+ status: "warn",
247
+ message:
248
+ "Gateway providers configured — agents using gateway routes will have ANTHROPIC_API_KEY set to empty string. Direct Anthropic API calls require a separate key.",
249
+ };
250
+ }
@@ -13,7 +13,8 @@ export type DoctorCategory =
13
13
  | "merge"
14
14
  | "logs"
15
15
  | "version"
16
- | "ecosystem";
16
+ | "ecosystem"
17
+ | "providers";
17
18
 
18
19
  /** Result of a single doctor health check. */
19
20
  export interface DoctorCheck {
@@ -3,6 +3,7 @@ import { readdir, stat } from "node:fs/promises";
3
3
  import { join } from "node:path";
4
4
  import { createManifestLoader } from "../agents/manifest.ts";
5
5
  import { writeOverlay } from "../agents/overlay.ts";
6
+ import type { Spawner } from "../commands/init.ts";
6
7
  import { initCommand } from "../commands/init.ts";
7
8
  import { loadConfig } from "../config.ts";
8
9
  import { cleanupTempDir, createTempGitRepo } from "../test-helpers.ts";
@@ -15,10 +16,14 @@ import type { OverlayConfig } from "../types.ts";
15
16
  * fresh temp git repo (NOT the overstory repo itself), then verifying all
16
17
  * artifacts, loading config + manifest via real APIs, and generating an overlay.
17
18
  *
18
- * Uses real filesystem and real git repos. No mocks.
19
+ * Uses real filesystem and real git repos.
20
+ * Uses a no-op spawner so ecosystem CLIs (ml/sd/cn) don't need to be installed in CI.
19
21
  * Suppresses stdout because initCommand prints status lines.
20
22
  */
21
23
 
24
+ /** No-op spawner that treats all ecosystem tools as "not installed". */
25
+ const noopSpawner: Spawner = async () => ({ exitCode: 1, stdout: "", stderr: "not found" });
26
+
22
27
  const EXPECTED_AGENT_DEFS = [
23
28
  "builder.md",
24
29
  "coordinator.md",
@@ -51,7 +56,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
51
56
  });
52
57
 
53
58
  test("init creates all expected artifacts", async () => {
54
- await initCommand({});
59
+ await initCommand({ _spawner: noopSpawner });
55
60
 
56
61
  const overstoryDir = join(tempDir, ".overstory");
57
62
 
@@ -91,7 +96,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
91
96
  });
92
97
 
93
98
  test("loadConfig returns valid config pointing to temp dir", async () => {
94
- await initCommand({});
99
+ await initCommand({ _spawner: noopSpawner });
95
100
 
96
101
  const config = await loadConfig(tempDir);
97
102
 
@@ -109,7 +114,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
109
114
  });
110
115
 
111
116
  test("manifest loads successfully with all 7 agents (supervisor deprecated)", async () => {
112
- await initCommand({});
117
+ await initCommand({ _spawner: noopSpawner });
113
118
 
114
119
  const manifestPath = join(tempDir, ".overstory", "agent-manifest.json");
115
120
  const agentDefsDir = join(tempDir, ".overstory", "agent-defs");
@@ -143,7 +148,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
143
148
  });
144
149
 
145
150
  test("manifest capability index is consistent", async () => {
146
- await initCommand({});
151
+ await initCommand({ _spawner: noopSpawner });
147
152
 
148
153
  const manifestPath = join(tempDir, ".overstory", "agent-manifest.json");
149
154
  const agentDefsDir = join(tempDir, ".overstory", "agent-defs");
@@ -165,7 +170,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
165
170
  });
166
171
 
167
172
  test("overlay generation works for external project", async () => {
168
- await initCommand({});
173
+ await initCommand({ _spawner: noopSpawner });
169
174
 
170
175
  const agentDefsDir = join(tempDir, ".overstory", "agent-defs");
171
176
  const baseDefinition = await Bun.file(join(agentDefsDir, "builder.md")).text();
@@ -213,7 +218,7 @@ describe("E2E: init→sling lifecycle on external project", () => {
213
218
  // init → load config → load manifest → generate overlay
214
219
 
215
220
  // Step 1: Init
216
- await initCommand({});
221
+ await initCommand({ _spawner: noopSpawner });
217
222
 
218
223
  // Step 2: Load config
219
224
  const config = await loadConfig(tempDir);
package/src/index.ts CHANGED
@@ -45,7 +45,7 @@ import { OverstoryError, WorktreeError } from "./errors.ts";
45
45
  import { jsonError } from "./json.ts";
46
46
  import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
47
47
 
48
- export const VERSION = "0.7.4";
48
+ export const VERSION = "0.7.6";
49
49
 
50
50
  const rawArgs = process.argv.slice(2);
51
51
 
@@ -228,10 +228,19 @@ program.addCommand(createCompletionsCommand());
228
228
  // Unmigrated commands — passthrough pattern
229
229
  program
230
230
  .command("init")
231
- .description("Initialize .overstory/ in current project")
231
+ .description("Initialize .overstory/ and bootstrap os-eco ecosystem tools")
232
232
  .option("--force", "Reinitialize even if .overstory/ already exists")
233
233
  .option("-y, --yes", "Accept all defaults without prompting (non-interactive mode)")
234
234
  .option("--name <name>", "Project name (skips auto-detection)")
235
+ .option(
236
+ "--tools <list>",
237
+ "Comma-separated list of ecosystem tools to bootstrap (default: mulch,seeds,canopy)",
238
+ )
239
+ .option("--skip-mulch", "Skip mulch bootstrap")
240
+ .option("--skip-seeds", "Skip seeds bootstrap")
241
+ .option("--skip-canopy", "Skip canopy bootstrap")
242
+ .option("--skip-onboard", "Skip CLAUDE.md onboarding step for ecosystem tools")
243
+ .option("--json", "Output result as JSON")
235
244
  .action(async (opts) => {
236
245
  await initCommand(opts);
237
246
  });
@@ -26,8 +26,9 @@ export interface ModelPricing {
26
26
  cacheCreationPerMTok: number;
27
27
  }
28
28
 
29
- /** Hardcoded pricing for known Claude models. */
29
+ /** Pricing for known AI models across providers. */
30
30
  const MODEL_PRICING: Record<string, ModelPricing> = {
31
+ // --- Claude ---
31
32
  opus: {
32
33
  inputPerMTok: 15,
33
34
  outputPerMTok: 75,
@@ -46,18 +47,72 @@ const MODEL_PRICING: Record<string, ModelPricing> = {
46
47
  cacheReadPerMTok: 0.08, // 10% of input
47
48
  cacheCreationPerMTok: 0.2, // 25% of input
48
49
  },
50
+ // --- OpenAI GPT ---
51
+ "gpt-4o-mini": {
52
+ inputPerMTok: 0.15,
53
+ outputPerMTok: 0.6,
54
+ cacheReadPerMTok: 0.075, // 50% of input
55
+ cacheCreationPerMTok: 0.15,
56
+ },
57
+ "gpt-4o": {
58
+ inputPerMTok: 2.5,
59
+ outputPerMTok: 10,
60
+ cacheReadPerMTok: 1.25,
61
+ cacheCreationPerMTok: 2.5,
62
+ },
63
+ "gpt-5": {
64
+ inputPerMTok: 10,
65
+ outputPerMTok: 40,
66
+ cacheReadPerMTok: 5,
67
+ cacheCreationPerMTok: 10,
68
+ },
69
+ o1: {
70
+ inputPerMTok: 15,
71
+ outputPerMTok: 60,
72
+ cacheReadPerMTok: 7.5,
73
+ cacheCreationPerMTok: 15,
74
+ },
75
+ o3: {
76
+ inputPerMTok: 10,
77
+ outputPerMTok: 40,
78
+ cacheReadPerMTok: 5,
79
+ cacheCreationPerMTok: 10,
80
+ },
81
+ // --- Google Gemini ---
82
+ "gemini-flash": {
83
+ inputPerMTok: 0.1,
84
+ outputPerMTok: 0.4,
85
+ cacheReadPerMTok: 0.025,
86
+ cacheCreationPerMTok: 0.1,
87
+ },
88
+ "gemini-pro": {
89
+ inputPerMTok: 1.25,
90
+ outputPerMTok: 5,
91
+ cacheReadPerMTok: 0.3125,
92
+ cacheCreationPerMTok: 1.25,
93
+ },
49
94
  };
50
95
 
51
96
  /**
52
97
  * Determine the pricing tier for a given model string.
53
- * Matches on substring: "opus" -> opus pricing, "sonnet" -> sonnet, "haiku" -> haiku.
98
+ * Matches on substring in priority order to avoid ambiguous overlaps.
54
99
  * Returns null if unrecognized.
55
100
  */
56
101
  export function getPricingForModel(model: string): ModelPricing | null {
57
102
  const lower = model.toLowerCase();
103
+ // --- Claude ---
58
104
  if (lower.includes("opus")) return MODEL_PRICING.opus ?? null;
59
105
  if (lower.includes("sonnet")) return MODEL_PRICING.sonnet ?? null;
60
106
  if (lower.includes("haiku")) return MODEL_PRICING.haiku ?? null;
107
+ // --- OpenAI GPT --- (gpt-4o-mini before gpt-4o; o3 before o1)
108
+ if (lower.includes("gpt-4o-mini")) return MODEL_PRICING["gpt-4o-mini"] ?? null;
109
+ if (lower.includes("gpt-4o")) return MODEL_PRICING["gpt-4o"] ?? null;
110
+ if (lower.includes("gpt-5")) return MODEL_PRICING["gpt-5"] ?? null;
111
+ if (lower.includes("o3")) return MODEL_PRICING.o3 ?? null;
112
+ if (lower.includes("o1")) return MODEL_PRICING.o1 ?? null;
113
+ // --- Google Gemini --- (flash before generic gemini+pro check)
114
+ if (lower.includes("flash")) return MODEL_PRICING["gemini-flash"] ?? null;
115
+ if (lower.includes("gemini") && lower.includes("pro")) return MODEL_PRICING["gemini-pro"] ?? null;
61
116
  return null;
62
117
  }
63
118
 
@@ -224,6 +224,44 @@ describe("getSessionsByAgent", () => {
224
224
  });
225
225
  });
226
226
 
227
+ // === getSessionsByTask ===
228
+
229
+ describe("getSessionsByTask", () => {
230
+ test("returns sessions matching task_id", () => {
231
+ store.recordSession(makeSession({ agentName: "agent-1", taskId: "task-A" }));
232
+ store.recordSession(makeSession({ agentName: "agent-2", taskId: "task-A" }));
233
+ store.recordSession(makeSession({ agentName: "agent-3", taskId: "task-B" }));
234
+
235
+ const sessions = store.getSessionsByTask("task-A");
236
+ expect(sessions).toHaveLength(2);
237
+ expect(sessions.every((s) => s.taskId === "task-A")).toBe(true);
238
+ });
239
+
240
+ test("returns empty array for unknown task_id", () => {
241
+ store.recordSession(makeSession({ agentName: "agent-1", taskId: "task-A" }));
242
+
243
+ expect(store.getSessionsByTask("nonexistent")).toEqual([]);
244
+ });
245
+
246
+ test("returns sessions ordered by started_at DESC", () => {
247
+ store.recordSession(
248
+ makeSession({ agentName: "agent-1", taskId: "task-X", startedAt: "2026-01-01T10:00:00Z" }),
249
+ );
250
+ store.recordSession(
251
+ makeSession({ agentName: "agent-2", taskId: "task-X", startedAt: "2026-01-01T12:00:00Z" }),
252
+ );
253
+ store.recordSession(
254
+ makeSession({ agentName: "agent-3", taskId: "task-X", startedAt: "2026-01-01T11:00:00Z" }),
255
+ );
256
+
257
+ const sessions = store.getSessionsByTask("task-X");
258
+ expect(sessions).toHaveLength(3);
259
+ expect(sessions[0]?.startedAt).toBe("2026-01-01T12:00:00Z"); // most recent first
260
+ expect(sessions[1]?.startedAt).toBe("2026-01-01T11:00:00Z");
261
+ expect(sessions[2]?.startedAt).toBe("2026-01-01T10:00:00Z");
262
+ });
263
+ });
264
+
227
265
  // === getAverageDuration ===
228
266
 
229
267
  describe("getAverageDuration", () => {
@@ -13,6 +13,7 @@ export interface MetricsStore {
13
13
  getRecentSessions(limit?: number): SessionMetrics[];
14
14
  getSessionsByAgent(agentName: string): SessionMetrics[];
15
15
  getSessionsByRun(runId: string): SessionMetrics[];
16
+ getSessionsByTask(taskId: string): SessionMetrics[];
16
17
  getAverageDuration(capability?: string): number;
17
18
  /** Count the total number of sessions in the database (no limit cap). */
18
19
  countSessions(): number;
@@ -250,6 +251,10 @@ export function createMetricsStore(dbPath: string): MetricsStore {
250
251
  SELECT * FROM sessions WHERE run_id = $run_id ORDER BY started_at DESC
251
252
  `);
252
253
 
254
+ const byTaskStmt = db.prepare<SessionRow, { $task_id: string }>(`
255
+ SELECT * FROM sessions WHERE task_id = $task_id ORDER BY started_at DESC
256
+ `);
257
+
253
258
  const avgDurationAllStmt = db.prepare<{ avg_duration: number | null }, Record<string, never>>(`
254
259
  SELECT AVG(duration_ms) AS avg_duration FROM sessions WHERE completed_at IS NOT NULL
255
260
  `);
@@ -342,6 +347,11 @@ export function createMetricsStore(dbPath: string): MetricsStore {
342
347
  return rows.map(rowToMetrics);
343
348
  },
344
349
 
350
+ getSessionsByTask(taskId: string): SessionMetrics[] {
351
+ const rows = byTaskStmt.all({ $task_id: taskId });
352
+ return rows.map(rowToMetrics);
353
+ },
354
+
345
355
  getAverageDuration(capability?: string): number {
346
356
  if (capability !== undefined) {
347
357
  const row = avgDurationByCapStmt.get({ $capability: capability });
@@ -311,12 +311,38 @@ describe("estimateCost", () => {
311
311
  outputTokens: 1_000_000,
312
312
  cacheReadTokens: 0,
313
313
  cacheCreationTokens: 0,
314
- modelUsed: "gpt-4o",
314
+ modelUsed: "unknown-model-xyz",
315
315
  });
316
316
 
317
317
  expect(cost).toBeNull();
318
318
  });
319
319
 
320
+ test("calculates cost for gpt-4o", () => {
321
+ const cost = estimateCost({
322
+ inputTokens: 1_000_000,
323
+ outputTokens: 1_000_000,
324
+ cacheReadTokens: 1_000_000,
325
+ cacheCreationTokens: 1_000_000,
326
+ modelUsed: "gpt-4o",
327
+ });
328
+
329
+ // gpt-4o: input=2.5, output=10, cacheRead=1.25, cacheCreation=2.5 => total=16.25
330
+ expect(cost).toBeCloseTo(16.25, 2);
331
+ });
332
+
333
+ test("calculates cost for gemini flash", () => {
334
+ const cost = estimateCost({
335
+ inputTokens: 1_000_000,
336
+ outputTokens: 1_000_000,
337
+ cacheReadTokens: 1_000_000,
338
+ cacheCreationTokens: 1_000_000,
339
+ modelUsed: "gemini-2.5-flash",
340
+ });
341
+
342
+ // gemini-flash: input=0.1, output=0.4, cacheRead=0.025, cacheCreation=0.1 => total=0.625
343
+ expect(cost).toBeCloseTo(0.625, 3);
344
+ });
345
+
320
346
  test("returns null when modelUsed is null", () => {
321
347
  const cost = estimateCost({
322
348
  inputTokens: 1_000_000,
@@ -392,9 +418,65 @@ describe("getPricingForModel", () => {
392
418
  });
393
419
 
394
420
  test("returns null for unknown model", () => {
395
- const pricing = getPricingForModel("gpt-4o");
421
+ const pricing = getPricingForModel("unknown-model-xyz");
396
422
  expect(pricing).toBeNull();
397
423
  });
424
+
425
+ test("matches gpt-4o", () => {
426
+ const pricing = getPricingForModel("gpt-4o");
427
+ expect(pricing).not.toBeNull();
428
+ if (pricing !== null) {
429
+ expect(pricing.inputPerMTok).toBe(2.5);
430
+ }
431
+ });
432
+
433
+ test("matches gpt-4o-mini", () => {
434
+ const pricing = getPricingForModel("gpt-4o-mini");
435
+ expect(pricing).not.toBeNull();
436
+ if (pricing !== null) {
437
+ expect(pricing.inputPerMTok).toBe(0.15);
438
+ }
439
+ });
440
+
441
+ test("matches gpt-5", () => {
442
+ const pricing = getPricingForModel("gpt-5");
443
+ expect(pricing).not.toBeNull();
444
+ if (pricing !== null) {
445
+ expect(pricing.inputPerMTok).toBe(10);
446
+ }
447
+ });
448
+
449
+ test("matches o1", () => {
450
+ const pricing = getPricingForModel("o1");
451
+ expect(pricing).not.toBeNull();
452
+ if (pricing !== null) {
453
+ expect(pricing.inputPerMTok).toBe(15);
454
+ }
455
+ });
456
+
457
+ test("matches o3", () => {
458
+ const pricing = getPricingForModel("o3");
459
+ expect(pricing).not.toBeNull();
460
+ if (pricing !== null) {
461
+ expect(pricing.inputPerMTok).toBe(10);
462
+ }
463
+ });
464
+
465
+ test("matches gemini flash", () => {
466
+ const pricing = getPricingForModel("gemini-2.5-flash");
467
+ expect(pricing).not.toBeNull();
468
+ if (pricing !== null) {
469
+ expect(pricing.inputPerMTok).toBe(0.1);
470
+ }
471
+ });
472
+
473
+ test("matches gemini pro", () => {
474
+ const pricing = getPricingForModel("gemini-2.5-pro");
475
+ expect(pricing).not.toBeNull();
476
+ if (pricing !== null) {
477
+ expect(pricing.inputPerMTok).toBe(1.25);
478
+ }
479
+ });
398
480
  });
399
481
 
400
482
  // === re-export parity ===
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * This is a Claude Code-specific JSONL parser that extracts token usage data
5
5
  * from assistant-type entries in transcript files at
6
- * ~/.claude/projects/{project-slug}/{session-id}.jsonl.
6
+ * the runtime-specific transcript directory (e.g. ~/.claude/projects/ for Claude Code).
7
7
  *
8
8
  * Runtime-agnostic pricing logic lives in ./pricing.ts. Other runtimes
9
9
  * implement their own transcript parsing via AgentRuntime.parseTranscript().
@@ -73,6 +73,46 @@ describe("ClaudeRuntime", () => {
73
73
  );
74
74
  });
75
75
 
76
+ test("with appendSystemPromptFile uses $(cat ...) expansion", () => {
77
+ const opts: SpawnOpts = {
78
+ model: "opus",
79
+ permissionMode: "bypass",
80
+ cwd: "/project",
81
+ env: {},
82
+ appendSystemPromptFile: "/project/.overstory/agent-defs/coordinator.md",
83
+ };
84
+ const cmd = runtime.buildSpawnCommand(opts);
85
+ expect(cmd).toBe(
86
+ `claude --model opus --permission-mode bypassPermissions --append-system-prompt "$(cat '/project/.overstory/agent-defs/coordinator.md')"`,
87
+ );
88
+ });
89
+
90
+ test("appendSystemPromptFile with single quotes in path", () => {
91
+ const opts: SpawnOpts = {
92
+ model: "opus",
93
+ permissionMode: "bypass",
94
+ cwd: "/project",
95
+ env: {},
96
+ appendSystemPromptFile: "/project/it's a path/agent.md",
97
+ };
98
+ const cmd = runtime.buildSpawnCommand(opts);
99
+ expect(cmd).toContain("$(cat '/project/it'\\''s a path/agent.md')");
100
+ });
101
+
102
+ test("appendSystemPromptFile takes precedence over appendSystemPrompt", () => {
103
+ const opts: SpawnOpts = {
104
+ model: "opus",
105
+ permissionMode: "bypass",
106
+ cwd: "/project",
107
+ env: {},
108
+ appendSystemPromptFile: "/project/.overstory/agent-defs/coordinator.md",
109
+ appendSystemPrompt: "This inline content should be ignored",
110
+ };
111
+ const cmd = runtime.buildSpawnCommand(opts);
112
+ expect(cmd).toContain("$(cat ");
113
+ expect(cmd).not.toContain("This inline content should be ignored");
114
+ });
115
+
76
116
  test("without appendSystemPrompt omits the flag", () => {
77
117
  const opts: SpawnOpts = {
78
118
  model: "haiku",