npm - @os-eco/overstory-cli - Versions diffs - 0.7.7 → 0.7.8 - Mend

@os-eco/overstory-cli 0.7.7 → 0.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +101 -1
package/package.json +1 -1
package/src/commands/coordinator.test.ts +131 -2
package/src/commands/coordinator.ts +40 -9
package/src/commands/costs.test.ts +5 -0
package/src/commands/costs.ts +1 -1
package/src/commands/log.ts +2 -0
package/src/commands/sling.test.ts +63 -1
package/src/commands/sling.ts +37 -2
package/src/config.test.ts +68 -0
package/src/config.ts +16 -0
package/src/index.ts +2 -1
package/src/metrics/pricing.test.ts +258 -0
package/src/metrics/store.test.ts +227 -0
package/src/metrics/store.ts +40 -5
package/src/schema-consistency.test.ts +1 -0
package/src/types.ts +8 -0
package/src/worktree/tmux.test.ts +49 -0
package/src/worktree/tmux.ts +33 -0

package/src/config.test.ts CHANGED Viewed

@@ -775,6 +775,74 @@ project:
 		await expect(loadConfig(tempDir)).rejects.toThrow(ValidationError);
 	});
+	test("resets negative shellInitDelayMs to 0 with warning", async () => {
+		await writeConfig("runtime:\n  shellInitDelayMs: -100\n");
+		const origWrite = process.stderr.write;
+		let capturedStderr = "";
+		process.stderr.write = ((s: string | Uint8Array) => {
+			if (typeof s === "string") capturedStderr += s;
+			return true;
+		}) as typeof process.stderr.write;
+		try {
+			const config = await loadConfig(tempDir);
+			expect(config.runtime?.shellInitDelayMs).toBe(0);
+		} finally {
+			process.stderr.write = origWrite;
+		}
+		expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
+	});
+	test("resets Infinity shellInitDelayMs to 0 with warning", async () => {
+		await writeConfig("runtime:\n  shellInitDelayMs: .inf\n");
+		const origWrite = process.stderr.write;
+		let capturedStderr = "";
+		process.stderr.write = ((s: string | Uint8Array) => {
+			if (typeof s === "string") capturedStderr += s;
+			return true;
+		}) as typeof process.stderr.write;
+		try {
+			const config = await loadConfig(tempDir);
+			expect(config.runtime?.shellInitDelayMs).toBe(0);
+		} finally {
+			process.stderr.write = origWrite;
+		}
+		expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
+	});
+	test("warns when shellInitDelayMs exceeds 30s", async () => {
+		await writeConfig("runtime:\n  shellInitDelayMs: 60000\n");
+		const origWrite = process.stderr.write;
+		let capturedStderr = "";
+		process.stderr.write = ((s: string | Uint8Array) => {
+			if (typeof s === "string") capturedStderr += s;
+			return true;
+		}) as typeof process.stderr.write;
+		try {
+			const config = await loadConfig(tempDir);
+			expect(config.runtime?.shellInitDelayMs).toBe(60000);
+		} finally {
+			process.stderr.write = origWrite;
+		}
+		expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs is 60000ms");
+	});
+	test("accepts valid shellInitDelayMs without warning", async () => {
+		await writeConfig("runtime:\n  shellInitDelayMs: 2000\n");
+		const origWrite = process.stderr.write;
+		let capturedStderr = "";
+		process.stderr.write = ((s: string | Uint8Array) => {
+			if (typeof s === "string") capturedStderr += s;
+			return true;
+		}) as typeof process.stderr.write;
+		try {
+			const config = await loadConfig(tempDir);
+			expect(config.runtime?.shellInitDelayMs).toBe(2000);
+		} finally {
+			process.stderr.write = origWrite;
+		}
+		expect(capturedStderr).not.toContain("shellInitDelayMs");
+	});
 	test("rejects qualityGate with empty description", async () => {
 		await writeConfig(`
 project:

package/src/config.ts CHANGED Viewed

@@ -64,6 +64,7 @@ export const DEFAULT_CONFIG: OverstoryConfig = {
 	},
 	runtime: {
 		default: "claude",
+		shellInitDelayMs: 0,
 		pi: {
 			provider: "anthropic",
 			modelMap: {
@@ -664,6 +665,21 @@ function validateConfig(config: OverstoryConfig): void {
 		}
 	}
+	// runtime.shellInitDelayMs: validate if present
+	if (config.runtime?.shellInitDelayMs !== undefined) {
+		const delay = config.runtime.shellInitDelayMs;
+		if (typeof delay !== "number" || delay < 0 || !Number.isFinite(delay)) {
+			process.stderr.write(
+				`[overstory] WARNING: runtime.shellInitDelayMs must be a non-negative number. Got: ${delay}. Using default (0).\n`,
+			);
+			config.runtime.shellInitDelayMs = 0;
+		} else if (delay > 30_000) {
+			process.stderr.write(
+				`[overstory] WARNING: runtime.shellInitDelayMs is ${delay}ms (>${30}s). This adds delay before every agent spawn. Consider a lower value.\n`,
+			);
+		}
+	}
 	// models: validate each value — accepts aliases and provider-prefixed refs
 	const validAliases = ["sonnet", "opus", "haiku"];
 	const toolHeavyRoles = ["builder", "scout"];

package/src/index.ts CHANGED Viewed

@@ -45,7 +45,7 @@ import { OverstoryError, WorktreeError } from "./errors.ts";
 import { jsonError } from "./json.ts";
 import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
-export const VERSION = "0.7.7";
+export const VERSION = "0.7.8";
 const rawArgs = process.argv.slice(2);
@@ -267,6 +267,7 @@ program
 	.option("--no-scout-check", "Suppress the parentHasScouts scout-before-build warning")
 	.option("--dispatch-max-agents <n>", "Per-lead max agents ceiling (injected into overlay)")
 	.option("--runtime <name>", "Runtime adapter (default: config or claude)")
+	.option("--base-branch <branch>", "Base branch for worktree creation (default: current HEAD)")
 	.option("--json", "Output result as JSON")
 	.action(async (taskId, opts) => {
 		await slingCommand(taskId, opts);

package/src/metrics/pricing.test.ts ADDED Viewed

@@ -0,0 +1,258 @@
+import { describe, expect, test } from "bun:test";
+import { estimateCost, getPricingForModel } from "./pricing";
+describe("getPricingForModel()", () => {
+	describe("Claude tiers", () => {
+		test("matches opus by substring in full model ID", () => {
+			const result = getPricingForModel("claude-opus-4-20250514");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(15);
+			expect(result?.outputPerMTok).toBe(75);
+		});
+		test("matches sonnet by substring in full model ID", () => {
+			const result = getPricingForModel("claude-sonnet-4-20250514");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(3);
+			expect(result?.outputPerMTok).toBe(15);
+		});
+		test("matches haiku by substring in full model ID", () => {
+			const result = getPricingForModel("claude-haiku-3-5-20241022");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(0.8);
+			expect(result?.outputPerMTok).toBe(4);
+		});
+	});
+	describe("OpenAI tiers", () => {
+		test("matches gpt-4o-mini", () => {
+			const result = getPricingForModel("gpt-4o-mini");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(0.15);
+		});
+		test("matches gpt-4o", () => {
+			const result = getPricingForModel("gpt-4o");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(2.5);
+		});
+		test("matches gpt-5", () => {
+			const result = getPricingForModel("gpt-5");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(10);
+		});
+		test("matches o3", () => {
+			const result = getPricingForModel("o3");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(10);
+			expect(result?.outputPerMTok).toBe(40);
+		});
+		test("matches o1", () => {
+			const result = getPricingForModel("o1");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(15);
+			expect(result?.outputPerMTok).toBe(60);
+		});
+	});
+	describe("Priority ordering", () => {
+		test("gpt-4o-mini matches before gpt-4o (substring overlap)", () => {
+			const mini = getPricingForModel("gpt-4o-mini");
+			const full = getPricingForModel("gpt-4o");
+			expect(mini).not.toBeNull();
+			expect(full).not.toBeNull();
+			if (mini === null || full === null) return;
+			// gpt-4o-mini is cheaper
+			expect(mini.inputPerMTok).toBeLessThan(full.inputPerMTok);
+			// A model string "gpt-4o-mini" resolves to mini pricing, not gpt-4o
+			expect(mini.inputPerMTok).toBe(0.15);
+		});
+		test("o3 matches before o1 (o1 string contains o1, o3 does not contain o1)", () => {
+			const o3 = getPricingForModel("o3");
+			const o1 = getPricingForModel("o1");
+			expect(o3).not.toBeNull();
+			expect(o1).not.toBeNull();
+			if (o3 === null || o1 === null) return;
+			expect(o3.outputPerMTok).toBe(40);
+			expect(o1.outputPerMTok).toBe(60);
+		});
+	});
+	describe("Gemini tiers", () => {
+		test("matches gemini-flash by 'flash' substring", () => {
+			const result = getPricingForModel("gemini-flash-2.0");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(0.1);
+			expect(result?.outputPerMTok).toBe(0.4);
+		});
+		test("matches gemini-pro by 'gemini' + 'pro' substrings", () => {
+			const result = getPricingForModel("gemini-2.0-pro-exp");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(1.25);
+			expect(result?.outputPerMTok).toBe(5);
+		});
+	});
+	describe("Case insensitivity", () => {
+		test("Claude-OPUS-4 resolves correctly", () => {
+			const result = getPricingForModel("Claude-OPUS-4");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(15);
+		});
+		test("SONNET resolves correctly", () => {
+			const result = getPricingForModel("SONNET");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(3);
+		});
+		test("Haiku resolves correctly", () => {
+			const result = getPricingForModel("Haiku");
+			expect(result).not.toBeNull();
+			expect(result?.inputPerMTok).toBe(0.8);
+		});
+	});
+	describe("Unknown models", () => {
+		test("returns null for llama-3-70b", () => {
+			expect(getPricingForModel("llama-3-70b")).toBeNull();
+		});
+		test("returns null for empty string", () => {
+			expect(getPricingForModel("")).toBeNull();
+		});
+		test("returns null for random gibberish", () => {
+			expect(getPricingForModel("xyzzy-foo-bar-9000")).toBeNull();
+		});
+	});
+});
+describe("estimateCost()", () => {
+	test("Typical Claude Opus usage: 1M input, 100K output, 500K cacheRead, 200K cacheCreation → $24.00", () => {
+		const cost = estimateCost({
+			inputTokens: 1_000_000,
+			outputTokens: 100_000,
+			cacheReadTokens: 500_000,
+			cacheCreationTokens: 200_000,
+			modelUsed: "claude-opus-4-20250514",
+		});
+		// inputCost = 1 * 15 = 15.00
+		// outputCost = 0.1 * 75 = 7.50
+		// cacheReadCost = 0.5 * 1.5 = 0.75
+		// cacheCreationCost = 0.2 * 3.75 = 0.75
+		// total = 24.00
+		expect(cost).toBe(24.0);
+	});
+	test("Typical Claude Sonnet usage: 500K input, 50K output, 100K cacheRead, 50K cacheCreation", () => {
+		const cost = estimateCost({
+			inputTokens: 500_000,
+			outputTokens: 50_000,
+			cacheReadTokens: 100_000,
+			cacheCreationTokens: 50_000,
+			modelUsed: "claude-sonnet-4-20250514",
+		});
+		// inputCost = 0.5 * 3 = 1.50
+		// outputCost = 0.05 * 15 = 0.75
+		// cacheReadCost = 0.1 * 0.3 = 0.03
+		// cacheCreationCost = 0.05 * 0.75 = 0.0375
+		// total = 2.3175
+		expect(cost).toBeCloseTo(2.3175, 4);
+	});
+	test("Zero tokens returns 0 (not null)", () => {
+		const cost = estimateCost({
+			inputTokens: 0,
+			outputTokens: 0,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			modelUsed: "claude-opus-4",
+		});
+		expect(cost).toBe(0);
+	});
+	test("Null modelUsed returns null", () => {
+		const cost = estimateCost({
+			inputTokens: 1000,
+			outputTokens: 500,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			modelUsed: null,
+		});
+		expect(cost).toBeNull();
+	});
+	test("Unknown model returns null", () => {
+		const cost = estimateCost({
+			inputTokens: 1000,
+			outputTokens: 500,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			modelUsed: "llama-3-70b",
+		});
+		expect(cost).toBeNull();
+	});
+	test("Input-only usage: only inputTokens > 0, rest zero", () => {
+		const cost = estimateCost({
+			inputTokens: 1_000_000,
+			outputTokens: 0,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			modelUsed: "claude-sonnet-4",
+		});
+		// inputCost = 1 * 3 = 3.00
+		expect(cost).toBe(3.0);
+	});
+	test("Output-only usage: only outputTokens > 0, rest zero", () => {
+		const cost = estimateCost({
+			inputTokens: 0,
+			outputTokens: 1_000_000,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			modelUsed: "claude-sonnet-4",
+		});
+		// outputCost = 1 * 15 = 15.00
+		expect(cost).toBe(15.0);
+	});
+	test("Cache-heavy usage: large cacheRead + cacheCreation, verify math", () => {
+		const cost = estimateCost({
+			inputTokens: 0,
+			outputTokens: 0,
+			cacheReadTokens: 10_000_000,
+			cacheCreationTokens: 5_000_000,
+			modelUsed: "claude-opus-4",
+		});
+		// cacheReadCost = 10 * 1.5 = 15.00
+		// cacheCreationCost = 5 * 3.75 = 18.75
+		// total = 33.75
+		expect(cost).toBeCloseTo(33.75, 5);
+	});
+});
+describe("Cache pricing ratios", () => {
+	test("Claude cache read is 10% of input price (verified on opus)", () => {
+		const pricing = getPricingForModel("claude-opus-4");
+		expect(pricing).not.toBeNull();
+		if (pricing === null) return;
+		const ratio = pricing.cacheReadPerMTok / pricing.inputPerMTok;
+		expect(ratio).toBeCloseTo(0.1, 10);
+	});
+	test("Claude cache creation is 25% of input price (verified on sonnet)", () => {
+		const pricing = getPricingForModel("claude-sonnet-4");
+		expect(pricing).not.toBeNull();
+		if (pricing === null) return;
+		const ratio = pricing.cacheCreationPerMTok / pricing.inputPerMTok;
+		expect(ratio).toBeCloseTo(0.25, 10);
+	});
+});

package/src/metrics/store.test.ts CHANGED Viewed

@@ -535,6 +535,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 100,
 			estimatedCostUsd: 0.15,
 			modelUsed: "claude-sonnet-4-5",
+			runId: null,
 			createdAt: new Date().toISOString(),
 		};
@@ -558,6 +559,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: 0.01,
 			modelUsed: "claude-sonnet-4-5",
+			runId: null,
 			createdAt: new Date(now - 60_000).toISOString(), // 1 min ago
 		});
@@ -569,6 +571,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: 0.02,
 			modelUsed: "claude-sonnet-4-5",
+			runId: null,
 			createdAt: new Date(now).toISOString(), // now (most recent)
 		});
@@ -580,6 +583,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: 0.03,
 			modelUsed: "claude-sonnet-4-5",
+			runId: null,
 			createdAt: new Date(now - 30_000).toISOString(), // 30s ago
 		});
@@ -606,6 +610,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: time1,
 		});
@@ -617,6 +622,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: time2,
 		});
@@ -638,6 +644,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: new Date().toISOString(),
 		});
@@ -649,6 +656,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: new Date().toISOString(),
 		});
@@ -666,6 +674,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: new Date().toISOString(),
 		});
@@ -677,6 +686,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: new Date().toISOString(),
 		});
@@ -698,6 +708,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: new Date(now - 120_000).toISOString(), // 2 min ago
 		});
@@ -709,6 +720,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: new Date(now - 10_000).toISOString(), // 10s ago (recent)
 		});
@@ -729,6 +741,7 @@ describe("token snapshots", () => {
 			cacheCreationTokens: 0,
 			estimatedCostUsd: null,
 			modelUsed: null,
+			runId: null,
 			createdAt: new Date().toISOString(),
 		});
@@ -740,6 +753,220 @@ describe("token snapshots", () => {
 		expect(snapshots).toHaveLength(1);
 		expect(snapshots[0]?.agentName).toBe("test-agent");
 	});
+	test("runId roundtrips correctly through snapshot record and retrieval", () => {
+		const now = Date.now();
+		store.recordSnapshot({
+			agentName: "agent-a",
+			inputTokens: 100,
+			outputTokens: 50,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-abc",
+			createdAt: new Date(now).toISOString(),
+		});
+		store.recordSnapshot({
+			agentName: "agent-b",
+			inputTokens: 200,
+			outputTokens: 100,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: null,
+			createdAt: new Date(now).toISOString(),
+		});
+		const snapshots = store.getLatestSnapshots();
+		const agentA = snapshots.find((s) => s.agentName === "agent-a");
+		const agentB = snapshots.find((s) => s.agentName === "agent-b");
+		expect(agentA?.runId).toBe("run-abc");
+		expect(agentB?.runId).toBeNull();
+	});
+	test("getLatestSnapshots(runId) returns only snapshots matching that run", () => {
+		const now = Date.now();
+		store.recordSnapshot({
+			agentName: "agent-a",
+			inputTokens: 100,
+			outputTokens: 50,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-001",
+			createdAt: new Date(now).toISOString(),
+		});
+		store.recordSnapshot({
+			agentName: "agent-b",
+			inputTokens: 200,
+			outputTokens: 100,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-001",
+			createdAt: new Date(now).toISOString(),
+		});
+		store.recordSnapshot({
+			agentName: "agent-c",
+			inputTokens: 300,
+			outputTokens: 150,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-002",
+			createdAt: new Date(now).toISOString(),
+		});
+		const run001Snapshots = store.getLatestSnapshots("run-001");
+		expect(run001Snapshots).toHaveLength(2);
+		expect(run001Snapshots.every((s) => s.runId === "run-001")).toBe(true);
+		const run002Snapshots = store.getLatestSnapshots("run-002");
+		expect(run002Snapshots).toHaveLength(1);
+		expect(run002Snapshots[0]?.agentName).toBe("agent-c");
+	});
+	test("getLatestSnapshots(runId) returns empty array for unknown run", () => {
+		store.recordSnapshot({
+			agentName: "agent-a",
+			inputTokens: 100,
+			outputTokens: 50,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-001",
+			createdAt: new Date().toISOString(),
+		});
+		const snapshots = store.getLatestSnapshots("run-nonexistent");
+		expect(snapshots).toEqual([]);
+	});
+	test("getLatestSnapshots(runId) excludes snapshots with null run_id", () => {
+		const now = Date.now();
+		store.recordSnapshot({
+			agentName: "agent-a",
+			inputTokens: 100,
+			outputTokens: 50,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: null, // no run
+			createdAt: new Date(now).toISOString(),
+		});
+		store.recordSnapshot({
+			agentName: "agent-b",
+			inputTokens: 200,
+			outputTokens: 100,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-001",
+			createdAt: new Date(now).toISOString(),
+		});
+		const run001Snapshots = store.getLatestSnapshots("run-001");
+		expect(run001Snapshots).toHaveLength(1);
+		expect(run001Snapshots[0]?.agentName).toBe("agent-b");
+	});
+	test("getLatestSnapshots(runId) returns latest per agent within the run", () => {
+		const now = Date.now();
+		// Two snapshots for agent-a in run-001: should only get the latest
+		store.recordSnapshot({
+			agentName: "agent-a",
+			inputTokens: 100,
+			outputTokens: 50,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-001",
+			createdAt: new Date(now - 30_000).toISOString(), // older
+		});
+		store.recordSnapshot({
+			agentName: "agent-a",
+			inputTokens: 500,
+			outputTokens: 250,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-001",
+			createdAt: new Date(now).toISOString(), // latest
+		});
+		const snapshots = store.getLatestSnapshots("run-001");
+		expect(snapshots).toHaveLength(1);
+		expect(snapshots[0]?.inputTokens).toBe(500); // most recent
+	});
+	test("migration adds run_id to existing token_snapshots table", () => {
+		store.close();
+		// Create a DB with old token_snapshots schema (no run_id column)
+		const { Database } = require("bun:sqlite");
+		const oldDb = new Database(dbPath);
+		oldDb.exec("DROP TABLE IF EXISTS token_snapshots");
+		oldDb.exec(`
+			CREATE TABLE token_snapshots (
+				id INTEGER PRIMARY KEY AUTOINCREMENT,
+				agent_name TEXT NOT NULL,
+				input_tokens INTEGER NOT NULL DEFAULT 0,
+				output_tokens INTEGER NOT NULL DEFAULT 0,
+				cache_read_tokens INTEGER NOT NULL DEFAULT 0,
+				cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
+				estimated_cost_usd REAL,
+				model_used TEXT,
+				created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%f','now'))
+			)
+		`);
+		oldDb.exec(`
+			INSERT INTO token_snapshots (agent_name, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, created_at)
+			VALUES ('old-agent', 100, 50, 0, 0, '2026-01-01T00:00:00.000Z')
+		`);
+		oldDb.close();
+		// Re-open with createMetricsStore which should migrate
+		store = createMetricsStore(dbPath);
+		// Old row should be readable with null run_id
+		const snapshots = store.getLatestSnapshots();
+		expect(snapshots).toHaveLength(1);
+		expect(snapshots[0]?.agentName).toBe("old-agent");
+		expect(snapshots[0]?.runId).toBeNull();
+		// New rows with run_id should work
+		store.recordSnapshot({
+			agentName: "new-agent",
+			inputTokens: 200,
+			outputTokens: 100,
+			cacheReadTokens: 0,
+			cacheCreationTokens: 0,
+			estimatedCostUsd: null,
+			modelUsed: null,
+			runId: "run-xyz",
+			createdAt: new Date().toISOString(),
+		});
+		const newSnapshots = store.getLatestSnapshots("run-xyz");
+		expect(newSnapshots).toHaveLength(1);
+		expect(newSnapshots[0]?.runId).toBe("run-xyz");
+	});
 });
 // === close ===