@os-eco/overstory-cli 0.7.7 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -796,6 +796,7 @@ describe("costsCommand", () => {
796
796
  cacheCreationTokens: 100,
797
797
  estimatedCostUsd: 0.15,
798
798
  modelUsed: "claude-sonnet-4-5",
799
+ runId: null,
799
800
  createdAt: new Date().toISOString(),
800
801
  });
801
802
  metricsStore.close();
@@ -853,6 +854,7 @@ describe("costsCommand", () => {
853
854
  cacheCreationTokens: 100,
854
855
  estimatedCostUsd: 0.15,
855
856
  modelUsed: "claude-sonnet-4-5",
857
+ runId: null,
856
858
  createdAt: new Date().toISOString(),
857
859
  });
858
860
  metricsStore.close();
@@ -937,6 +939,7 @@ describe("costsCommand", () => {
937
939
  cacheCreationTokens: 0,
938
940
  estimatedCostUsd: 0.15,
939
941
  modelUsed: "claude-sonnet-4-5",
942
+ runId: null,
940
943
  createdAt: new Date().toISOString(),
941
944
  });
942
945
  metricsStore.recordSnapshot({
@@ -947,6 +950,7 @@ describe("costsCommand", () => {
947
950
  cacheCreationTokens: 0,
948
951
  estimatedCostUsd: 0.25,
949
952
  modelUsed: "claude-sonnet-4-5",
953
+ runId: null,
950
954
  createdAt: new Date().toISOString(),
951
955
  });
952
956
  metricsStore.close();
@@ -997,6 +1001,7 @@ describe("costsCommand", () => {
997
1001
  cacheCreationTokens: 0,
998
1002
  estimatedCostUsd: 0.3,
999
1003
  modelUsed: "claude-sonnet-4-5",
1004
+ runId: null,
1000
1005
  createdAt: new Date().toISOString(),
1001
1006
  });
1002
1007
  metricsStore.close();
@@ -367,7 +367,7 @@ async function executeCosts(opts: CostsOpts): Promise<void> {
367
367
  const { store: sessionStore } = openSessionStore(overstoryDir);
368
368
 
369
369
  try {
370
- const snapshots = metricsStore.getLatestSnapshots();
370
+ const snapshots = metricsStore.getLatestSnapshots(runId ?? undefined);
371
371
  if (snapshots.length === 0) {
372
372
  if (json) {
373
373
  jsonOutput("costs", {
@@ -166,6 +166,7 @@ describe("initCommand: .overstory/.gitignore", () => {
166
166
  expect(content).toContain("!hooks.json\n");
167
167
  expect(content).toContain("!groups.json\n");
168
168
  expect(content).toContain("!agent-defs/\n");
169
+ expect(content).toContain("!agent-defs/**\n");
169
170
 
170
171
  // Verify it matches the exported constant
171
172
  expect(content).toBe(OVERSTORY_GITIGNORE);
@@ -588,6 +588,7 @@ export const OVERSTORY_GITIGNORE = `# Wildcard+whitelist: ignore everything, whi
588
588
  !hooks.json
589
589
  !groups.json
590
590
  !agent-defs/
591
+ !agent-defs/**
591
592
  !README.md
592
593
  `;
593
594
 
@@ -583,6 +583,7 @@ async function runLog(opts: {
583
583
  const cost = estimateCost(usage);
584
584
  const metricsDbPath = join(config.project.root, ".overstory", "metrics.db");
585
585
  const metricsStore = createMetricsStore(metricsDbPath);
586
+ const agentSession = getAgentSession(config.project.root, opts.agent);
586
587
  metricsStore.recordSnapshot({
587
588
  agentName: opts.agent,
588
589
  inputTokens: usage.inputTokens,
@@ -591,6 +592,7 @@ async function runLog(opts: {
591
592
  cacheCreationTokens: usage.cacheCreationTokens,
592
593
  estimatedCostUsd: cost,
593
594
  modelUsed: usage.modelUsed,
595
+ runId: agentSession?.runId ?? null,
594
596
  createdAt: new Date().toISOString(),
595
597
  });
596
598
  metricsStore.close();
@@ -366,6 +366,7 @@ recentTasks: []
366
366
  !hooks.json
367
367
  !groups.json
368
368
  !agent-defs/
369
+ !agent-defs/**
369
370
  !README.md
370
371
  `;
371
372
 
@@ -1,8 +1,13 @@
1
- import { describe, expect, test } from "bun:test";
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { realpathSync } from "node:fs";
3
+ import { mkdtemp } from "node:fs/promises";
4
+ import { tmpdir } from "node:os";
5
+ import { join } from "node:path";
2
6
  import { resolveModel, resolveProviderEnv } from "../agents/manifest.ts";
3
7
  import { HierarchyError } from "../errors.ts";
4
8
  import { ClaudeRuntime } from "../runtimes/claude.ts";
5
9
  import { getRuntime } from "../runtimes/registry.ts";
10
+ import { cleanupTempDir, createTempGitRepo } from "../test-helpers.ts";
6
11
  import type { AgentManifest, OverstoryConfig } from "../types.ts";
7
12
  import {
8
13
  type AutoDispatchOptions,
@@ -15,6 +20,7 @@ import {
15
20
  checkRunSessionLimit,
16
21
  checkTaskLock,
17
22
  extractMulchRecordIds,
23
+ getCurrentBranch,
18
24
  inferDomainsFromFiles,
19
25
  isRunningAsRoot,
20
26
  parentHasScouts,
@@ -1274,3 +1280,59 @@ describe("extractMulchRecordIds", () => {
1274
1280
  expect(result).toContainEqual({ id: "mx-2ce43d", domain: "typescript" });
1275
1281
  });
1276
1282
  });
1283
+
1284
+ describe("getCurrentBranch", () => {
1285
+ let repoDir: string;
1286
+
1287
+ beforeEach(async () => {
1288
+ repoDir = realpathSync(await createTempGitRepo());
1289
+ });
1290
+
1291
+ afterEach(async () => {
1292
+ await cleanupTempDir(repoDir);
1293
+ });
1294
+
1295
+ test("returns the current branch name", async () => {
1296
+ const branch = await getCurrentBranch(repoDir);
1297
+ expect(branch).toMatch(/^(main|master)$/);
1298
+ });
1299
+
1300
+ test("returns feature branch name after checkout", async () => {
1301
+ const proc = Bun.spawn(["git", "checkout", "-b", "feature/test-branch"], {
1302
+ cwd: repoDir,
1303
+ stdout: "pipe",
1304
+ stderr: "pipe",
1305
+ });
1306
+ await proc.exited;
1307
+ const branch = await getCurrentBranch(repoDir);
1308
+ expect(branch).toBe("feature/test-branch");
1309
+ });
1310
+
1311
+ test("returns null for detached HEAD", async () => {
1312
+ const hashProc = Bun.spawn(["git", "rev-parse", "HEAD"], {
1313
+ cwd: repoDir,
1314
+ stdout: "pipe",
1315
+ stderr: "pipe",
1316
+ });
1317
+ const hash = (await new Response(hashProc.stdout).text()).trim();
1318
+ await hashProc.exited;
1319
+ const proc = Bun.spawn(["git", "checkout", hash], {
1320
+ cwd: repoDir,
1321
+ stdout: "pipe",
1322
+ stderr: "pipe",
1323
+ });
1324
+ await proc.exited;
1325
+ const branch = await getCurrentBranch(repoDir);
1326
+ expect(branch).toBeNull();
1327
+ });
1328
+
1329
+ test("returns null for non-git directory", async () => {
1330
+ const tmpDir = realpathSync(await mkdtemp(join(tmpdir(), "overstory-notgit-")));
1331
+ try {
1332
+ const branch = await getCurrentBranch(tmpDir);
1333
+ expect(branch).toBeNull();
1334
+ } finally {
1335
+ await cleanupTempDir(tmpDir);
1336
+ }
1337
+ });
1338
+ });
@@ -124,6 +124,7 @@ export interface SlingOptions {
124
124
  dispatchMaxAgents?: string;
125
125
  runtime?: string;
126
126
  noScoutCheck?: boolean;
127
+ baseBranch?: string;
127
128
  }
128
129
 
129
130
  export interface AutoDispatchOptions {
@@ -389,6 +390,28 @@ export function extractMulchRecordIds(primeText: string): Array<{ id: string; do
389
390
  return results;
390
391
  }
391
392
 
393
+ /**
394
+ * Get the current git branch name for the repo at the given path.
395
+ *
396
+ * Returns null if in detached HEAD state, the directory is not a git repo,
397
+ * or git exits non-zero.
398
+ *
399
+ * @param repoRoot - Absolute path to the git repository root
400
+ */
401
+ export async function getCurrentBranch(repoRoot: string): Promise<string | null> {
402
+ const proc = Bun.spawn(["git", "rev-parse", "--abbrev-ref", "HEAD"], {
403
+ cwd: repoRoot,
404
+ stdout: "pipe",
405
+ stderr: "pipe",
406
+ });
407
+ const [stdout, exitCode] = await Promise.all([new Response(proc.stdout).text(), proc.exited]);
408
+ if (exitCode !== 0) return null;
409
+ const branch = stdout.trim();
410
+ // "HEAD" is returned when in detached HEAD state
411
+ if (branch === "HEAD" || branch === "") return null;
412
+ return branch;
413
+ }
414
+
392
415
  /**
393
416
  * Entry point for `ov sling <task-id> [flags]`.
394
417
  *
@@ -658,11 +681,17 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
658
681
  const worktreeBaseDir = join(config.project.root, config.worktrees.baseDir);
659
682
  await mkdir(worktreeBaseDir, { recursive: true });
660
683
 
684
+ // Resolve base branch: --base-branch flag > current HEAD > config.project.canonicalBranch
685
+ const baseBranch =
686
+ opts.baseBranch ??
687
+ (await getCurrentBranch(config.project.root)) ??
688
+ config.project.canonicalBranch;
689
+
661
690
  const { path: worktreePath, branch: branchName } = await createWorktree({
662
691
  repoRoot: config.project.root,
663
692
  baseDir: worktreeBaseDir,
664
693
  agentName: name,
665
- baseBranch: config.project.canonicalBranch,
694
+ baseBranch,
666
695
  taskId: taskId,
667
696
  });
668
697
 
@@ -862,7 +891,13 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
862
891
  runStore.close();
863
892
  }
864
893
 
865
- // 13b. Wait for Claude Code TUI to render before sending input.
894
+ // 13b. Give slow shells time to finish initializing before polling for TUI readiness.
895
+ const shellDelay = config.runtime?.shellInitDelayMs ?? 0;
896
+ if (shellDelay > 0) {
897
+ await Bun.sleep(shellDelay);
898
+ }
899
+
900
+ // Wait for Claude Code TUI to render before sending input.
866
901
  // Polling capture-pane is more reliable than a fixed sleep because
867
902
  // TUI init time varies by machine load and model state.
868
903
  await waitForTuiReady(tmuxSessionName, (content) => runtime.detectReady(content));
@@ -775,6 +775,74 @@ project:
775
775
  await expect(loadConfig(tempDir)).rejects.toThrow(ValidationError);
776
776
  });
777
777
 
778
+ test("resets negative shellInitDelayMs to 0 with warning", async () => {
779
+ await writeConfig("runtime:\n shellInitDelayMs: -100\n");
780
+ const origWrite = process.stderr.write;
781
+ let capturedStderr = "";
782
+ process.stderr.write = ((s: string | Uint8Array) => {
783
+ if (typeof s === "string") capturedStderr += s;
784
+ return true;
785
+ }) as typeof process.stderr.write;
786
+ try {
787
+ const config = await loadConfig(tempDir);
788
+ expect(config.runtime?.shellInitDelayMs).toBe(0);
789
+ } finally {
790
+ process.stderr.write = origWrite;
791
+ }
792
+ expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
793
+ });
794
+
795
+ test("resets Infinity shellInitDelayMs to 0 with warning", async () => {
796
+ await writeConfig("runtime:\n shellInitDelayMs: .inf\n");
797
+ const origWrite = process.stderr.write;
798
+ let capturedStderr = "";
799
+ process.stderr.write = ((s: string | Uint8Array) => {
800
+ if (typeof s === "string") capturedStderr += s;
801
+ return true;
802
+ }) as typeof process.stderr.write;
803
+ try {
804
+ const config = await loadConfig(tempDir);
805
+ expect(config.runtime?.shellInitDelayMs).toBe(0);
806
+ } finally {
807
+ process.stderr.write = origWrite;
808
+ }
809
+ expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
810
+ });
811
+
812
+ test("warns when shellInitDelayMs exceeds 30s", async () => {
813
+ await writeConfig("runtime:\n shellInitDelayMs: 60000\n");
814
+ const origWrite = process.stderr.write;
815
+ let capturedStderr = "";
816
+ process.stderr.write = ((s: string | Uint8Array) => {
817
+ if (typeof s === "string") capturedStderr += s;
818
+ return true;
819
+ }) as typeof process.stderr.write;
820
+ try {
821
+ const config = await loadConfig(tempDir);
822
+ expect(config.runtime?.shellInitDelayMs).toBe(60000);
823
+ } finally {
824
+ process.stderr.write = origWrite;
825
+ }
826
+ expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs is 60000ms");
827
+ });
828
+
829
+ test("accepts valid shellInitDelayMs without warning", async () => {
830
+ await writeConfig("runtime:\n shellInitDelayMs: 2000\n");
831
+ const origWrite = process.stderr.write;
832
+ let capturedStderr = "";
833
+ process.stderr.write = ((s: string | Uint8Array) => {
834
+ if (typeof s === "string") capturedStderr += s;
835
+ return true;
836
+ }) as typeof process.stderr.write;
837
+ try {
838
+ const config = await loadConfig(tempDir);
839
+ expect(config.runtime?.shellInitDelayMs).toBe(2000);
840
+ } finally {
841
+ process.stderr.write = origWrite;
842
+ }
843
+ expect(capturedStderr).not.toContain("shellInitDelayMs");
844
+ });
845
+
778
846
  test("rejects qualityGate with empty description", async () => {
779
847
  await writeConfig(`
780
848
  project:
package/src/config.ts CHANGED
@@ -64,6 +64,7 @@ export const DEFAULT_CONFIG: OverstoryConfig = {
64
64
  },
65
65
  runtime: {
66
66
  default: "claude",
67
+ shellInitDelayMs: 0,
67
68
  pi: {
68
69
  provider: "anthropic",
69
70
  modelMap: {
@@ -664,6 +665,21 @@ function validateConfig(config: OverstoryConfig): void {
664
665
  }
665
666
  }
666
667
 
668
+ // runtime.shellInitDelayMs: validate if present
669
+ if (config.runtime?.shellInitDelayMs !== undefined) {
670
+ const delay = config.runtime.shellInitDelayMs;
671
+ if (typeof delay !== "number" || delay < 0 || !Number.isFinite(delay)) {
672
+ process.stderr.write(
673
+ `[overstory] WARNING: runtime.shellInitDelayMs must be a non-negative number. Got: ${delay}. Using default (0).\n`,
674
+ );
675
+ config.runtime.shellInitDelayMs = 0;
676
+ } else if (delay > 30_000) {
677
+ process.stderr.write(
678
+ `[overstory] WARNING: runtime.shellInitDelayMs is ${delay}ms (>${30}s). This adds delay before every agent spawn. Consider a lower value.\n`,
679
+ );
680
+ }
681
+ }
682
+
667
683
  // models: validate each value — accepts aliases and provider-prefixed refs
668
684
  const validAliases = ["sonnet", "opus", "haiku"];
669
685
  const toolHeavyRoles = ["builder", "scout"];
@@ -114,6 +114,7 @@ describe("checkStructure", () => {
114
114
  !hooks.json
115
115
  !groups.json
116
116
  !agent-defs/
117
+ !agent-defs/**
117
118
  `,
118
119
  );
119
120
 
@@ -111,6 +111,7 @@ export const checkStructure: DoctorCheckFn = async (
111
111
  "!hooks.json",
112
112
  "!groups.json",
113
113
  "!agent-defs/",
114
+ "!agent-defs/**",
114
115
  ];
115
116
 
116
117
  try {
package/src/index.ts CHANGED
@@ -45,7 +45,7 @@ import { OverstoryError, WorktreeError } from "./errors.ts";
45
45
  import { jsonError } from "./json.ts";
46
46
  import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
47
47
 
48
- export const VERSION = "0.7.7";
48
+ export const VERSION = "0.7.9";
49
49
 
50
50
  const rawArgs = process.argv.slice(2);
51
51
 
@@ -267,6 +267,7 @@ program
267
267
  .option("--no-scout-check", "Suppress the parentHasScouts scout-before-build warning")
268
268
  .option("--dispatch-max-agents <n>", "Per-lead max agents ceiling (injected into overlay)")
269
269
  .option("--runtime <name>", "Runtime adapter (default: config or claude)")
270
+ .option("--base-branch <branch>", "Base branch for worktree creation (default: current HEAD)")
270
271
  .option("--json", "Output result as JSON")
271
272
  .action(async (taskId, opts) => {
272
273
  await slingCommand(taskId, opts);
@@ -0,0 +1,258 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { estimateCost, getPricingForModel } from "./pricing";
3
+
4
+ describe("getPricingForModel()", () => {
5
+ describe("Claude tiers", () => {
6
+ test("matches opus by substring in full model ID", () => {
7
+ const result = getPricingForModel("claude-opus-4-20250514");
8
+ expect(result).not.toBeNull();
9
+ expect(result?.inputPerMTok).toBe(15);
10
+ expect(result?.outputPerMTok).toBe(75);
11
+ });
12
+
13
+ test("matches sonnet by substring in full model ID", () => {
14
+ const result = getPricingForModel("claude-sonnet-4-20250514");
15
+ expect(result).not.toBeNull();
16
+ expect(result?.inputPerMTok).toBe(3);
17
+ expect(result?.outputPerMTok).toBe(15);
18
+ });
19
+
20
+ test("matches haiku by substring in full model ID", () => {
21
+ const result = getPricingForModel("claude-haiku-3-5-20241022");
22
+ expect(result).not.toBeNull();
23
+ expect(result?.inputPerMTok).toBe(0.8);
24
+ expect(result?.outputPerMTok).toBe(4);
25
+ });
26
+ });
27
+
28
+ describe("OpenAI tiers", () => {
29
+ test("matches gpt-4o-mini", () => {
30
+ const result = getPricingForModel("gpt-4o-mini");
31
+ expect(result).not.toBeNull();
32
+ expect(result?.inputPerMTok).toBe(0.15);
33
+ });
34
+
35
+ test("matches gpt-4o", () => {
36
+ const result = getPricingForModel("gpt-4o");
37
+ expect(result).not.toBeNull();
38
+ expect(result?.inputPerMTok).toBe(2.5);
39
+ });
40
+
41
+ test("matches gpt-5", () => {
42
+ const result = getPricingForModel("gpt-5");
43
+ expect(result).not.toBeNull();
44
+ expect(result?.inputPerMTok).toBe(10);
45
+ });
46
+
47
+ test("matches o3", () => {
48
+ const result = getPricingForModel("o3");
49
+ expect(result).not.toBeNull();
50
+ expect(result?.inputPerMTok).toBe(10);
51
+ expect(result?.outputPerMTok).toBe(40);
52
+ });
53
+
54
+ test("matches o1", () => {
55
+ const result = getPricingForModel("o1");
56
+ expect(result).not.toBeNull();
57
+ expect(result?.inputPerMTok).toBe(15);
58
+ expect(result?.outputPerMTok).toBe(60);
59
+ });
60
+ });
61
+
62
+ describe("Priority ordering", () => {
63
+ test("gpt-4o-mini matches before gpt-4o (substring overlap)", () => {
64
+ const mini = getPricingForModel("gpt-4o-mini");
65
+ const full = getPricingForModel("gpt-4o");
66
+ expect(mini).not.toBeNull();
67
+ expect(full).not.toBeNull();
68
+ if (mini === null || full === null) return;
69
+ // gpt-4o-mini is cheaper
70
+ expect(mini.inputPerMTok).toBeLessThan(full.inputPerMTok);
71
+ // A model string "gpt-4o-mini" resolves to mini pricing, not gpt-4o
72
+ expect(mini.inputPerMTok).toBe(0.15);
73
+ });
74
+
75
+ test("o3 matches before o1 (o1 string contains o1, o3 does not contain o1)", () => {
76
+ const o3 = getPricingForModel("o3");
77
+ const o1 = getPricingForModel("o1");
78
+ expect(o3).not.toBeNull();
79
+ expect(o1).not.toBeNull();
80
+ if (o3 === null || o1 === null) return;
81
+ expect(o3.outputPerMTok).toBe(40);
82
+ expect(o1.outputPerMTok).toBe(60);
83
+ });
84
+ });
85
+
86
+ describe("Gemini tiers", () => {
87
+ test("matches gemini-flash by 'flash' substring", () => {
88
+ const result = getPricingForModel("gemini-flash-2.0");
89
+ expect(result).not.toBeNull();
90
+ expect(result?.inputPerMTok).toBe(0.1);
91
+ expect(result?.outputPerMTok).toBe(0.4);
92
+ });
93
+
94
+ test("matches gemini-pro by 'gemini' + 'pro' substrings", () => {
95
+ const result = getPricingForModel("gemini-2.0-pro-exp");
96
+ expect(result).not.toBeNull();
97
+ expect(result?.inputPerMTok).toBe(1.25);
98
+ expect(result?.outputPerMTok).toBe(5);
99
+ });
100
+ });
101
+
102
+ describe("Case insensitivity", () => {
103
+ test("Claude-OPUS-4 resolves correctly", () => {
104
+ const result = getPricingForModel("Claude-OPUS-4");
105
+ expect(result).not.toBeNull();
106
+ expect(result?.inputPerMTok).toBe(15);
107
+ });
108
+
109
+ test("SONNET resolves correctly", () => {
110
+ const result = getPricingForModel("SONNET");
111
+ expect(result).not.toBeNull();
112
+ expect(result?.inputPerMTok).toBe(3);
113
+ });
114
+
115
+ test("Haiku resolves correctly", () => {
116
+ const result = getPricingForModel("Haiku");
117
+ expect(result).not.toBeNull();
118
+ expect(result?.inputPerMTok).toBe(0.8);
119
+ });
120
+ });
121
+
122
+ describe("Unknown models", () => {
123
+ test("returns null for llama-3-70b", () => {
124
+ expect(getPricingForModel("llama-3-70b")).toBeNull();
125
+ });
126
+
127
+ test("returns null for empty string", () => {
128
+ expect(getPricingForModel("")).toBeNull();
129
+ });
130
+
131
+ test("returns null for random gibberish", () => {
132
+ expect(getPricingForModel("xyzzy-foo-bar-9000")).toBeNull();
133
+ });
134
+ });
135
+ });
136
+
137
+ describe("estimateCost()", () => {
138
+ test("Typical Claude Opus usage: 1M input, 100K output, 500K cacheRead, 200K cacheCreation → $24.00", () => {
139
+ const cost = estimateCost({
140
+ inputTokens: 1_000_000,
141
+ outputTokens: 100_000,
142
+ cacheReadTokens: 500_000,
143
+ cacheCreationTokens: 200_000,
144
+ modelUsed: "claude-opus-4-20250514",
145
+ });
146
+ // inputCost = 1 * 15 = 15.00
147
+ // outputCost = 0.1 * 75 = 7.50
148
+ // cacheReadCost = 0.5 * 1.5 = 0.75
149
+ // cacheCreationCost = 0.2 * 3.75 = 0.75
150
+ // total = 24.00
151
+ expect(cost).toBe(24.0);
152
+ });
153
+
154
+ test("Typical Claude Sonnet usage: 500K input, 50K output, 100K cacheRead, 50K cacheCreation", () => {
155
+ const cost = estimateCost({
156
+ inputTokens: 500_000,
157
+ outputTokens: 50_000,
158
+ cacheReadTokens: 100_000,
159
+ cacheCreationTokens: 50_000,
160
+ modelUsed: "claude-sonnet-4-20250514",
161
+ });
162
+ // inputCost = 0.5 * 3 = 1.50
163
+ // outputCost = 0.05 * 15 = 0.75
164
+ // cacheReadCost = 0.1 * 0.3 = 0.03
165
+ // cacheCreationCost = 0.05 * 0.75 = 0.0375
166
+ // total = 2.3175
167
+ expect(cost).toBeCloseTo(2.3175, 4);
168
+ });
169
+
170
+ test("Zero tokens returns 0 (not null)", () => {
171
+ const cost = estimateCost({
172
+ inputTokens: 0,
173
+ outputTokens: 0,
174
+ cacheReadTokens: 0,
175
+ cacheCreationTokens: 0,
176
+ modelUsed: "claude-opus-4",
177
+ });
178
+ expect(cost).toBe(0);
179
+ });
180
+
181
+ test("Null modelUsed returns null", () => {
182
+ const cost = estimateCost({
183
+ inputTokens: 1000,
184
+ outputTokens: 500,
185
+ cacheReadTokens: 0,
186
+ cacheCreationTokens: 0,
187
+ modelUsed: null,
188
+ });
189
+ expect(cost).toBeNull();
190
+ });
191
+
192
+ test("Unknown model returns null", () => {
193
+ const cost = estimateCost({
194
+ inputTokens: 1000,
195
+ outputTokens: 500,
196
+ cacheReadTokens: 0,
197
+ cacheCreationTokens: 0,
198
+ modelUsed: "llama-3-70b",
199
+ });
200
+ expect(cost).toBeNull();
201
+ });
202
+
203
+ test("Input-only usage: only inputTokens > 0, rest zero", () => {
204
+ const cost = estimateCost({
205
+ inputTokens: 1_000_000,
206
+ outputTokens: 0,
207
+ cacheReadTokens: 0,
208
+ cacheCreationTokens: 0,
209
+ modelUsed: "claude-sonnet-4",
210
+ });
211
+ // inputCost = 1 * 3 = 3.00
212
+ expect(cost).toBe(3.0);
213
+ });
214
+
215
+ test("Output-only usage: only outputTokens > 0, rest zero", () => {
216
+ const cost = estimateCost({
217
+ inputTokens: 0,
218
+ outputTokens: 1_000_000,
219
+ cacheReadTokens: 0,
220
+ cacheCreationTokens: 0,
221
+ modelUsed: "claude-sonnet-4",
222
+ });
223
+ // outputCost = 1 * 15 = 15.00
224
+ expect(cost).toBe(15.0);
225
+ });
226
+
227
+ test("Cache-heavy usage: large cacheRead + cacheCreation, verify math", () => {
228
+ const cost = estimateCost({
229
+ inputTokens: 0,
230
+ outputTokens: 0,
231
+ cacheReadTokens: 10_000_000,
232
+ cacheCreationTokens: 5_000_000,
233
+ modelUsed: "claude-opus-4",
234
+ });
235
+ // cacheReadCost = 10 * 1.5 = 15.00
236
+ // cacheCreationCost = 5 * 3.75 = 18.75
237
+ // total = 33.75
238
+ expect(cost).toBeCloseTo(33.75, 5);
239
+ });
240
+ });
241
+
242
+ describe("Cache pricing ratios", () => {
243
+ test("Claude cache read is 10% of input price (verified on opus)", () => {
244
+ const pricing = getPricingForModel("claude-opus-4");
245
+ expect(pricing).not.toBeNull();
246
+ if (pricing === null) return;
247
+ const ratio = pricing.cacheReadPerMTok / pricing.inputPerMTok;
248
+ expect(ratio).toBeCloseTo(0.1, 10);
249
+ });
250
+
251
+ test("Claude cache creation is 25% of input price (verified on sonnet)", () => {
252
+ const pricing = getPricingForModel("claude-sonnet-4");
253
+ expect(pricing).not.toBeNull();
254
+ if (pricing === null) return;
255
+ const ratio = pricing.cacheCreationPerMTok / pricing.inputPerMTok;
256
+ expect(ratio).toBeCloseTo(0.25, 10);
257
+ });
258
+ });