@os-eco/overstory-cli 0.7.7 → 0.7.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -3
- package/package.json +1 -1
- package/src/agents/manifest.test.ts +168 -1
- package/src/agents/manifest.ts +23 -2
- package/src/commands/agents.ts +1 -0
- package/src/commands/coordinator.test.ts +131 -2
- package/src/commands/coordinator.ts +40 -9
- package/src/commands/costs.test.ts +5 -0
- package/src/commands/costs.ts +1 -1
- package/src/commands/init.test.ts +1 -0
- package/src/commands/init.ts +1 -0
- package/src/commands/log.ts +2 -0
- package/src/commands/prime.test.ts +1 -0
- package/src/commands/sling.test.ts +63 -1
- package/src/commands/sling.ts +37 -2
- package/src/config.test.ts +68 -0
- package/src/config.ts +16 -0
- package/src/doctor/structure.test.ts +1 -0
- package/src/doctor/structure.ts +1 -0
- package/src/index.ts +2 -1
- package/src/metrics/pricing.test.ts +258 -0
- package/src/metrics/store.test.ts +227 -0
- package/src/metrics/store.ts +40 -5
- package/src/runtimes/gemini.test.ts +537 -0
- package/src/runtimes/gemini.ts +235 -0
- package/src/runtimes/registry.test.ts +15 -1
- package/src/runtimes/registry.ts +2 -0
- package/src/schema-consistency.test.ts +1 -0
- package/src/types.ts +8 -0
- package/src/worktree/tmux.test.ts +49 -0
- package/src/worktree/tmux.ts +33 -0
|
@@ -796,6 +796,7 @@ describe("costsCommand", () => {
|
|
|
796
796
|
cacheCreationTokens: 100,
|
|
797
797
|
estimatedCostUsd: 0.15,
|
|
798
798
|
modelUsed: "claude-sonnet-4-5",
|
|
799
|
+
runId: null,
|
|
799
800
|
createdAt: new Date().toISOString(),
|
|
800
801
|
});
|
|
801
802
|
metricsStore.close();
|
|
@@ -853,6 +854,7 @@ describe("costsCommand", () => {
|
|
|
853
854
|
cacheCreationTokens: 100,
|
|
854
855
|
estimatedCostUsd: 0.15,
|
|
855
856
|
modelUsed: "claude-sonnet-4-5",
|
|
857
|
+
runId: null,
|
|
856
858
|
createdAt: new Date().toISOString(),
|
|
857
859
|
});
|
|
858
860
|
metricsStore.close();
|
|
@@ -937,6 +939,7 @@ describe("costsCommand", () => {
|
|
|
937
939
|
cacheCreationTokens: 0,
|
|
938
940
|
estimatedCostUsd: 0.15,
|
|
939
941
|
modelUsed: "claude-sonnet-4-5",
|
|
942
|
+
runId: null,
|
|
940
943
|
createdAt: new Date().toISOString(),
|
|
941
944
|
});
|
|
942
945
|
metricsStore.recordSnapshot({
|
|
@@ -947,6 +950,7 @@ describe("costsCommand", () => {
|
|
|
947
950
|
cacheCreationTokens: 0,
|
|
948
951
|
estimatedCostUsd: 0.25,
|
|
949
952
|
modelUsed: "claude-sonnet-4-5",
|
|
953
|
+
runId: null,
|
|
950
954
|
createdAt: new Date().toISOString(),
|
|
951
955
|
});
|
|
952
956
|
metricsStore.close();
|
|
@@ -997,6 +1001,7 @@ describe("costsCommand", () => {
|
|
|
997
1001
|
cacheCreationTokens: 0,
|
|
998
1002
|
estimatedCostUsd: 0.3,
|
|
999
1003
|
modelUsed: "claude-sonnet-4-5",
|
|
1004
|
+
runId: null,
|
|
1000
1005
|
createdAt: new Date().toISOString(),
|
|
1001
1006
|
});
|
|
1002
1007
|
metricsStore.close();
|
package/src/commands/costs.ts
CHANGED
|
@@ -367,7 +367,7 @@ async function executeCosts(opts: CostsOpts): Promise<void> {
|
|
|
367
367
|
const { store: sessionStore } = openSessionStore(overstoryDir);
|
|
368
368
|
|
|
369
369
|
try {
|
|
370
|
-
const snapshots = metricsStore.getLatestSnapshots();
|
|
370
|
+
const snapshots = metricsStore.getLatestSnapshots(runId ?? undefined);
|
|
371
371
|
if (snapshots.length === 0) {
|
|
372
372
|
if (json) {
|
|
373
373
|
jsonOutput("costs", {
|
|
@@ -166,6 +166,7 @@ describe("initCommand: .overstory/.gitignore", () => {
|
|
|
166
166
|
expect(content).toContain("!hooks.json\n");
|
|
167
167
|
expect(content).toContain("!groups.json\n");
|
|
168
168
|
expect(content).toContain("!agent-defs/\n");
|
|
169
|
+
expect(content).toContain("!agent-defs/**\n");
|
|
169
170
|
|
|
170
171
|
// Verify it matches the exported constant
|
|
171
172
|
expect(content).toBe(OVERSTORY_GITIGNORE);
|
package/src/commands/init.ts
CHANGED
package/src/commands/log.ts
CHANGED
|
@@ -583,6 +583,7 @@ async function runLog(opts: {
|
|
|
583
583
|
const cost = estimateCost(usage);
|
|
584
584
|
const metricsDbPath = join(config.project.root, ".overstory", "metrics.db");
|
|
585
585
|
const metricsStore = createMetricsStore(metricsDbPath);
|
|
586
|
+
const agentSession = getAgentSession(config.project.root, opts.agent);
|
|
586
587
|
metricsStore.recordSnapshot({
|
|
587
588
|
agentName: opts.agent,
|
|
588
589
|
inputTokens: usage.inputTokens,
|
|
@@ -591,6 +592,7 @@ async function runLog(opts: {
|
|
|
591
592
|
cacheCreationTokens: usage.cacheCreationTokens,
|
|
592
593
|
estimatedCostUsd: cost,
|
|
593
594
|
modelUsed: usage.modelUsed,
|
|
595
|
+
runId: agentSession?.runId ?? null,
|
|
594
596
|
createdAt: new Date().toISOString(),
|
|
595
597
|
});
|
|
596
598
|
metricsStore.close();
|
|
@@ -1,8 +1,13 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { realpathSync } from "node:fs";
|
|
3
|
+
import { mkdtemp } from "node:fs/promises";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
2
6
|
import { resolveModel, resolveProviderEnv } from "../agents/manifest.ts";
|
|
3
7
|
import { HierarchyError } from "../errors.ts";
|
|
4
8
|
import { ClaudeRuntime } from "../runtimes/claude.ts";
|
|
5
9
|
import { getRuntime } from "../runtimes/registry.ts";
|
|
10
|
+
import { cleanupTempDir, createTempGitRepo } from "../test-helpers.ts";
|
|
6
11
|
import type { AgentManifest, OverstoryConfig } from "../types.ts";
|
|
7
12
|
import {
|
|
8
13
|
type AutoDispatchOptions,
|
|
@@ -15,6 +20,7 @@ import {
|
|
|
15
20
|
checkRunSessionLimit,
|
|
16
21
|
checkTaskLock,
|
|
17
22
|
extractMulchRecordIds,
|
|
23
|
+
getCurrentBranch,
|
|
18
24
|
inferDomainsFromFiles,
|
|
19
25
|
isRunningAsRoot,
|
|
20
26
|
parentHasScouts,
|
|
@@ -1274,3 +1280,59 @@ describe("extractMulchRecordIds", () => {
|
|
|
1274
1280
|
expect(result).toContainEqual({ id: "mx-2ce43d", domain: "typescript" });
|
|
1275
1281
|
});
|
|
1276
1282
|
});
|
|
1283
|
+
|
|
1284
|
+
describe("getCurrentBranch", () => {
|
|
1285
|
+
let repoDir: string;
|
|
1286
|
+
|
|
1287
|
+
beforeEach(async () => {
|
|
1288
|
+
repoDir = realpathSync(await createTempGitRepo());
|
|
1289
|
+
});
|
|
1290
|
+
|
|
1291
|
+
afterEach(async () => {
|
|
1292
|
+
await cleanupTempDir(repoDir);
|
|
1293
|
+
});
|
|
1294
|
+
|
|
1295
|
+
test("returns the current branch name", async () => {
|
|
1296
|
+
const branch = await getCurrentBranch(repoDir);
|
|
1297
|
+
expect(branch).toMatch(/^(main|master)$/);
|
|
1298
|
+
});
|
|
1299
|
+
|
|
1300
|
+
test("returns feature branch name after checkout", async () => {
|
|
1301
|
+
const proc = Bun.spawn(["git", "checkout", "-b", "feature/test-branch"], {
|
|
1302
|
+
cwd: repoDir,
|
|
1303
|
+
stdout: "pipe",
|
|
1304
|
+
stderr: "pipe",
|
|
1305
|
+
});
|
|
1306
|
+
await proc.exited;
|
|
1307
|
+
const branch = await getCurrentBranch(repoDir);
|
|
1308
|
+
expect(branch).toBe("feature/test-branch");
|
|
1309
|
+
});
|
|
1310
|
+
|
|
1311
|
+
test("returns null for detached HEAD", async () => {
|
|
1312
|
+
const hashProc = Bun.spawn(["git", "rev-parse", "HEAD"], {
|
|
1313
|
+
cwd: repoDir,
|
|
1314
|
+
stdout: "pipe",
|
|
1315
|
+
stderr: "pipe",
|
|
1316
|
+
});
|
|
1317
|
+
const hash = (await new Response(hashProc.stdout).text()).trim();
|
|
1318
|
+
await hashProc.exited;
|
|
1319
|
+
const proc = Bun.spawn(["git", "checkout", hash], {
|
|
1320
|
+
cwd: repoDir,
|
|
1321
|
+
stdout: "pipe",
|
|
1322
|
+
stderr: "pipe",
|
|
1323
|
+
});
|
|
1324
|
+
await proc.exited;
|
|
1325
|
+
const branch = await getCurrentBranch(repoDir);
|
|
1326
|
+
expect(branch).toBeNull();
|
|
1327
|
+
});
|
|
1328
|
+
|
|
1329
|
+
test("returns null for non-git directory", async () => {
|
|
1330
|
+
const tmpDir = realpathSync(await mkdtemp(join(tmpdir(), "overstory-notgit-")));
|
|
1331
|
+
try {
|
|
1332
|
+
const branch = await getCurrentBranch(tmpDir);
|
|
1333
|
+
expect(branch).toBeNull();
|
|
1334
|
+
} finally {
|
|
1335
|
+
await cleanupTempDir(tmpDir);
|
|
1336
|
+
}
|
|
1337
|
+
});
|
|
1338
|
+
});
|
package/src/commands/sling.ts
CHANGED
|
@@ -124,6 +124,7 @@ export interface SlingOptions {
|
|
|
124
124
|
dispatchMaxAgents?: string;
|
|
125
125
|
runtime?: string;
|
|
126
126
|
noScoutCheck?: boolean;
|
|
127
|
+
baseBranch?: string;
|
|
127
128
|
}
|
|
128
129
|
|
|
129
130
|
export interface AutoDispatchOptions {
|
|
@@ -389,6 +390,28 @@ export function extractMulchRecordIds(primeText: string): Array<{ id: string; do
|
|
|
389
390
|
return results;
|
|
390
391
|
}
|
|
391
392
|
|
|
393
|
+
/**
|
|
394
|
+
* Get the current git branch name for the repo at the given path.
|
|
395
|
+
*
|
|
396
|
+
* Returns null if in detached HEAD state, the directory is not a git repo,
|
|
397
|
+
* or git exits non-zero.
|
|
398
|
+
*
|
|
399
|
+
* @param repoRoot - Absolute path to the git repository root
|
|
400
|
+
*/
|
|
401
|
+
export async function getCurrentBranch(repoRoot: string): Promise<string | null> {
|
|
402
|
+
const proc = Bun.spawn(["git", "rev-parse", "--abbrev-ref", "HEAD"], {
|
|
403
|
+
cwd: repoRoot,
|
|
404
|
+
stdout: "pipe",
|
|
405
|
+
stderr: "pipe",
|
|
406
|
+
});
|
|
407
|
+
const [stdout, exitCode] = await Promise.all([new Response(proc.stdout).text(), proc.exited]);
|
|
408
|
+
if (exitCode !== 0) return null;
|
|
409
|
+
const branch = stdout.trim();
|
|
410
|
+
// "HEAD" is returned when in detached HEAD state
|
|
411
|
+
if (branch === "HEAD" || branch === "") return null;
|
|
412
|
+
return branch;
|
|
413
|
+
}
|
|
414
|
+
|
|
392
415
|
/**
|
|
393
416
|
* Entry point for `ov sling <task-id> [flags]`.
|
|
394
417
|
*
|
|
@@ -658,11 +681,17 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
658
681
|
const worktreeBaseDir = join(config.project.root, config.worktrees.baseDir);
|
|
659
682
|
await mkdir(worktreeBaseDir, { recursive: true });
|
|
660
683
|
|
|
684
|
+
// Resolve base branch: --base-branch flag > current HEAD > config.project.canonicalBranch
|
|
685
|
+
const baseBranch =
|
|
686
|
+
opts.baseBranch ??
|
|
687
|
+
(await getCurrentBranch(config.project.root)) ??
|
|
688
|
+
config.project.canonicalBranch;
|
|
689
|
+
|
|
661
690
|
const { path: worktreePath, branch: branchName } = await createWorktree({
|
|
662
691
|
repoRoot: config.project.root,
|
|
663
692
|
baseDir: worktreeBaseDir,
|
|
664
693
|
agentName: name,
|
|
665
|
-
baseBranch
|
|
694
|
+
baseBranch,
|
|
666
695
|
taskId: taskId,
|
|
667
696
|
});
|
|
668
697
|
|
|
@@ -862,7 +891,13 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
862
891
|
runStore.close();
|
|
863
892
|
}
|
|
864
893
|
|
|
865
|
-
// 13b.
|
|
894
|
+
// 13b. Give slow shells time to finish initializing before polling for TUI readiness.
|
|
895
|
+
const shellDelay = config.runtime?.shellInitDelayMs ?? 0;
|
|
896
|
+
if (shellDelay > 0) {
|
|
897
|
+
await Bun.sleep(shellDelay);
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
// Wait for Claude Code TUI to render before sending input.
|
|
866
901
|
// Polling capture-pane is more reliable than a fixed sleep because
|
|
867
902
|
// TUI init time varies by machine load and model state.
|
|
868
903
|
await waitForTuiReady(tmuxSessionName, (content) => runtime.detectReady(content));
|
package/src/config.test.ts
CHANGED
|
@@ -775,6 +775,74 @@ project:
|
|
|
775
775
|
await expect(loadConfig(tempDir)).rejects.toThrow(ValidationError);
|
|
776
776
|
});
|
|
777
777
|
|
|
778
|
+
test("resets negative shellInitDelayMs to 0 with warning", async () => {
|
|
779
|
+
await writeConfig("runtime:\n shellInitDelayMs: -100\n");
|
|
780
|
+
const origWrite = process.stderr.write;
|
|
781
|
+
let capturedStderr = "";
|
|
782
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
783
|
+
if (typeof s === "string") capturedStderr += s;
|
|
784
|
+
return true;
|
|
785
|
+
}) as typeof process.stderr.write;
|
|
786
|
+
try {
|
|
787
|
+
const config = await loadConfig(tempDir);
|
|
788
|
+
expect(config.runtime?.shellInitDelayMs).toBe(0);
|
|
789
|
+
} finally {
|
|
790
|
+
process.stderr.write = origWrite;
|
|
791
|
+
}
|
|
792
|
+
expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
|
|
793
|
+
});
|
|
794
|
+
|
|
795
|
+
test("resets Infinity shellInitDelayMs to 0 with warning", async () => {
|
|
796
|
+
await writeConfig("runtime:\n shellInitDelayMs: .inf\n");
|
|
797
|
+
const origWrite = process.stderr.write;
|
|
798
|
+
let capturedStderr = "";
|
|
799
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
800
|
+
if (typeof s === "string") capturedStderr += s;
|
|
801
|
+
return true;
|
|
802
|
+
}) as typeof process.stderr.write;
|
|
803
|
+
try {
|
|
804
|
+
const config = await loadConfig(tempDir);
|
|
805
|
+
expect(config.runtime?.shellInitDelayMs).toBe(0);
|
|
806
|
+
} finally {
|
|
807
|
+
process.stderr.write = origWrite;
|
|
808
|
+
}
|
|
809
|
+
expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
|
|
810
|
+
});
|
|
811
|
+
|
|
812
|
+
test("warns when shellInitDelayMs exceeds 30s", async () => {
|
|
813
|
+
await writeConfig("runtime:\n shellInitDelayMs: 60000\n");
|
|
814
|
+
const origWrite = process.stderr.write;
|
|
815
|
+
let capturedStderr = "";
|
|
816
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
817
|
+
if (typeof s === "string") capturedStderr += s;
|
|
818
|
+
return true;
|
|
819
|
+
}) as typeof process.stderr.write;
|
|
820
|
+
try {
|
|
821
|
+
const config = await loadConfig(tempDir);
|
|
822
|
+
expect(config.runtime?.shellInitDelayMs).toBe(60000);
|
|
823
|
+
} finally {
|
|
824
|
+
process.stderr.write = origWrite;
|
|
825
|
+
}
|
|
826
|
+
expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs is 60000ms");
|
|
827
|
+
});
|
|
828
|
+
|
|
829
|
+
test("accepts valid shellInitDelayMs without warning", async () => {
|
|
830
|
+
await writeConfig("runtime:\n shellInitDelayMs: 2000\n");
|
|
831
|
+
const origWrite = process.stderr.write;
|
|
832
|
+
let capturedStderr = "";
|
|
833
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
834
|
+
if (typeof s === "string") capturedStderr += s;
|
|
835
|
+
return true;
|
|
836
|
+
}) as typeof process.stderr.write;
|
|
837
|
+
try {
|
|
838
|
+
const config = await loadConfig(tempDir);
|
|
839
|
+
expect(config.runtime?.shellInitDelayMs).toBe(2000);
|
|
840
|
+
} finally {
|
|
841
|
+
process.stderr.write = origWrite;
|
|
842
|
+
}
|
|
843
|
+
expect(capturedStderr).not.toContain("shellInitDelayMs");
|
|
844
|
+
});
|
|
845
|
+
|
|
778
846
|
test("rejects qualityGate with empty description", async () => {
|
|
779
847
|
await writeConfig(`
|
|
780
848
|
project:
|
package/src/config.ts
CHANGED
|
@@ -64,6 +64,7 @@ export const DEFAULT_CONFIG: OverstoryConfig = {
|
|
|
64
64
|
},
|
|
65
65
|
runtime: {
|
|
66
66
|
default: "claude",
|
|
67
|
+
shellInitDelayMs: 0,
|
|
67
68
|
pi: {
|
|
68
69
|
provider: "anthropic",
|
|
69
70
|
modelMap: {
|
|
@@ -664,6 +665,21 @@ function validateConfig(config: OverstoryConfig): void {
|
|
|
664
665
|
}
|
|
665
666
|
}
|
|
666
667
|
|
|
668
|
+
// runtime.shellInitDelayMs: validate if present
|
|
669
|
+
if (config.runtime?.shellInitDelayMs !== undefined) {
|
|
670
|
+
const delay = config.runtime.shellInitDelayMs;
|
|
671
|
+
if (typeof delay !== "number" || delay < 0 || !Number.isFinite(delay)) {
|
|
672
|
+
process.stderr.write(
|
|
673
|
+
`[overstory] WARNING: runtime.shellInitDelayMs must be a non-negative number. Got: ${delay}. Using default (0).\n`,
|
|
674
|
+
);
|
|
675
|
+
config.runtime.shellInitDelayMs = 0;
|
|
676
|
+
} else if (delay > 30_000) {
|
|
677
|
+
process.stderr.write(
|
|
678
|
+
`[overstory] WARNING: runtime.shellInitDelayMs is ${delay}ms (>${30}s). This adds delay before every agent spawn. Consider a lower value.\n`,
|
|
679
|
+
);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
667
683
|
// models: validate each value — accepts aliases and provider-prefixed refs
|
|
668
684
|
const validAliases = ["sonnet", "opus", "haiku"];
|
|
669
685
|
const toolHeavyRoles = ["builder", "scout"];
|
package/src/doctor/structure.ts
CHANGED
package/src/index.ts
CHANGED
|
@@ -45,7 +45,7 @@ import { OverstoryError, WorktreeError } from "./errors.ts";
|
|
|
45
45
|
import { jsonError } from "./json.ts";
|
|
46
46
|
import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
|
|
47
47
|
|
|
48
|
-
export const VERSION = "0.7.
|
|
48
|
+
export const VERSION = "0.7.9";
|
|
49
49
|
|
|
50
50
|
const rawArgs = process.argv.slice(2);
|
|
51
51
|
|
|
@@ -267,6 +267,7 @@ program
|
|
|
267
267
|
.option("--no-scout-check", "Suppress the parentHasScouts scout-before-build warning")
|
|
268
268
|
.option("--dispatch-max-agents <n>", "Per-lead max agents ceiling (injected into overlay)")
|
|
269
269
|
.option("--runtime <name>", "Runtime adapter (default: config or claude)")
|
|
270
|
+
.option("--base-branch <branch>", "Base branch for worktree creation (default: current HEAD)")
|
|
270
271
|
.option("--json", "Output result as JSON")
|
|
271
272
|
.action(async (taskId, opts) => {
|
|
272
273
|
await slingCommand(taskId, opts);
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { estimateCost, getPricingForModel } from "./pricing";
|
|
3
|
+
|
|
4
|
+
describe("getPricingForModel()", () => {
|
|
5
|
+
describe("Claude tiers", () => {
|
|
6
|
+
test("matches opus by substring in full model ID", () => {
|
|
7
|
+
const result = getPricingForModel("claude-opus-4-20250514");
|
|
8
|
+
expect(result).not.toBeNull();
|
|
9
|
+
expect(result?.inputPerMTok).toBe(15);
|
|
10
|
+
expect(result?.outputPerMTok).toBe(75);
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
test("matches sonnet by substring in full model ID", () => {
|
|
14
|
+
const result = getPricingForModel("claude-sonnet-4-20250514");
|
|
15
|
+
expect(result).not.toBeNull();
|
|
16
|
+
expect(result?.inputPerMTok).toBe(3);
|
|
17
|
+
expect(result?.outputPerMTok).toBe(15);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test("matches haiku by substring in full model ID", () => {
|
|
21
|
+
const result = getPricingForModel("claude-haiku-3-5-20241022");
|
|
22
|
+
expect(result).not.toBeNull();
|
|
23
|
+
expect(result?.inputPerMTok).toBe(0.8);
|
|
24
|
+
expect(result?.outputPerMTok).toBe(4);
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
describe("OpenAI tiers", () => {
|
|
29
|
+
test("matches gpt-4o-mini", () => {
|
|
30
|
+
const result = getPricingForModel("gpt-4o-mini");
|
|
31
|
+
expect(result).not.toBeNull();
|
|
32
|
+
expect(result?.inputPerMTok).toBe(0.15);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test("matches gpt-4o", () => {
|
|
36
|
+
const result = getPricingForModel("gpt-4o");
|
|
37
|
+
expect(result).not.toBeNull();
|
|
38
|
+
expect(result?.inputPerMTok).toBe(2.5);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("matches gpt-5", () => {
|
|
42
|
+
const result = getPricingForModel("gpt-5");
|
|
43
|
+
expect(result).not.toBeNull();
|
|
44
|
+
expect(result?.inputPerMTok).toBe(10);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test("matches o3", () => {
|
|
48
|
+
const result = getPricingForModel("o3");
|
|
49
|
+
expect(result).not.toBeNull();
|
|
50
|
+
expect(result?.inputPerMTok).toBe(10);
|
|
51
|
+
expect(result?.outputPerMTok).toBe(40);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test("matches o1", () => {
|
|
55
|
+
const result = getPricingForModel("o1");
|
|
56
|
+
expect(result).not.toBeNull();
|
|
57
|
+
expect(result?.inputPerMTok).toBe(15);
|
|
58
|
+
expect(result?.outputPerMTok).toBe(60);
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe("Priority ordering", () => {
|
|
63
|
+
test("gpt-4o-mini matches before gpt-4o (substring overlap)", () => {
|
|
64
|
+
const mini = getPricingForModel("gpt-4o-mini");
|
|
65
|
+
const full = getPricingForModel("gpt-4o");
|
|
66
|
+
expect(mini).not.toBeNull();
|
|
67
|
+
expect(full).not.toBeNull();
|
|
68
|
+
if (mini === null || full === null) return;
|
|
69
|
+
// gpt-4o-mini is cheaper
|
|
70
|
+
expect(mini.inputPerMTok).toBeLessThan(full.inputPerMTok);
|
|
71
|
+
// A model string "gpt-4o-mini" resolves to mini pricing, not gpt-4o
|
|
72
|
+
expect(mini.inputPerMTok).toBe(0.15);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test("o3 matches before o1 (o1 string contains o1, o3 does not contain o1)", () => {
|
|
76
|
+
const o3 = getPricingForModel("o3");
|
|
77
|
+
const o1 = getPricingForModel("o1");
|
|
78
|
+
expect(o3).not.toBeNull();
|
|
79
|
+
expect(o1).not.toBeNull();
|
|
80
|
+
if (o3 === null || o1 === null) return;
|
|
81
|
+
expect(o3.outputPerMTok).toBe(40);
|
|
82
|
+
expect(o1.outputPerMTok).toBe(60);
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
describe("Gemini tiers", () => {
|
|
87
|
+
test("matches gemini-flash by 'flash' substring", () => {
|
|
88
|
+
const result = getPricingForModel("gemini-flash-2.0");
|
|
89
|
+
expect(result).not.toBeNull();
|
|
90
|
+
expect(result?.inputPerMTok).toBe(0.1);
|
|
91
|
+
expect(result?.outputPerMTok).toBe(0.4);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("matches gemini-pro by 'gemini' + 'pro' substrings", () => {
|
|
95
|
+
const result = getPricingForModel("gemini-2.0-pro-exp");
|
|
96
|
+
expect(result).not.toBeNull();
|
|
97
|
+
expect(result?.inputPerMTok).toBe(1.25);
|
|
98
|
+
expect(result?.outputPerMTok).toBe(5);
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe("Case insensitivity", () => {
|
|
103
|
+
test("Claude-OPUS-4 resolves correctly", () => {
|
|
104
|
+
const result = getPricingForModel("Claude-OPUS-4");
|
|
105
|
+
expect(result).not.toBeNull();
|
|
106
|
+
expect(result?.inputPerMTok).toBe(15);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
test("SONNET resolves correctly", () => {
|
|
110
|
+
const result = getPricingForModel("SONNET");
|
|
111
|
+
expect(result).not.toBeNull();
|
|
112
|
+
expect(result?.inputPerMTok).toBe(3);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test("Haiku resolves correctly", () => {
|
|
116
|
+
const result = getPricingForModel("Haiku");
|
|
117
|
+
expect(result).not.toBeNull();
|
|
118
|
+
expect(result?.inputPerMTok).toBe(0.8);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
describe("Unknown models", () => {
|
|
123
|
+
test("returns null for llama-3-70b", () => {
|
|
124
|
+
expect(getPricingForModel("llama-3-70b")).toBeNull();
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("returns null for empty string", () => {
|
|
128
|
+
expect(getPricingForModel("")).toBeNull();
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test("returns null for random gibberish", () => {
|
|
132
|
+
expect(getPricingForModel("xyzzy-foo-bar-9000")).toBeNull();
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
describe("estimateCost()", () => {
|
|
138
|
+
test("Typical Claude Opus usage: 1M input, 100K output, 500K cacheRead, 200K cacheCreation → $24.00", () => {
|
|
139
|
+
const cost = estimateCost({
|
|
140
|
+
inputTokens: 1_000_000,
|
|
141
|
+
outputTokens: 100_000,
|
|
142
|
+
cacheReadTokens: 500_000,
|
|
143
|
+
cacheCreationTokens: 200_000,
|
|
144
|
+
modelUsed: "claude-opus-4-20250514",
|
|
145
|
+
});
|
|
146
|
+
// inputCost = 1 * 15 = 15.00
|
|
147
|
+
// outputCost = 0.1 * 75 = 7.50
|
|
148
|
+
// cacheReadCost = 0.5 * 1.5 = 0.75
|
|
149
|
+
// cacheCreationCost = 0.2 * 3.75 = 0.75
|
|
150
|
+
// total = 24.00
|
|
151
|
+
expect(cost).toBe(24.0);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
test("Typical Claude Sonnet usage: 500K input, 50K output, 100K cacheRead, 50K cacheCreation", () => {
|
|
155
|
+
const cost = estimateCost({
|
|
156
|
+
inputTokens: 500_000,
|
|
157
|
+
outputTokens: 50_000,
|
|
158
|
+
cacheReadTokens: 100_000,
|
|
159
|
+
cacheCreationTokens: 50_000,
|
|
160
|
+
modelUsed: "claude-sonnet-4-20250514",
|
|
161
|
+
});
|
|
162
|
+
// inputCost = 0.5 * 3 = 1.50
|
|
163
|
+
// outputCost = 0.05 * 15 = 0.75
|
|
164
|
+
// cacheReadCost = 0.1 * 0.3 = 0.03
|
|
165
|
+
// cacheCreationCost = 0.05 * 0.75 = 0.0375
|
|
166
|
+
// total = 2.3175
|
|
167
|
+
expect(cost).toBeCloseTo(2.3175, 4);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test("Zero tokens returns 0 (not null)", () => {
|
|
171
|
+
const cost = estimateCost({
|
|
172
|
+
inputTokens: 0,
|
|
173
|
+
outputTokens: 0,
|
|
174
|
+
cacheReadTokens: 0,
|
|
175
|
+
cacheCreationTokens: 0,
|
|
176
|
+
modelUsed: "claude-opus-4",
|
|
177
|
+
});
|
|
178
|
+
expect(cost).toBe(0);
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
test("Null modelUsed returns null", () => {
|
|
182
|
+
const cost = estimateCost({
|
|
183
|
+
inputTokens: 1000,
|
|
184
|
+
outputTokens: 500,
|
|
185
|
+
cacheReadTokens: 0,
|
|
186
|
+
cacheCreationTokens: 0,
|
|
187
|
+
modelUsed: null,
|
|
188
|
+
});
|
|
189
|
+
expect(cost).toBeNull();
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
test("Unknown model returns null", () => {
|
|
193
|
+
const cost = estimateCost({
|
|
194
|
+
inputTokens: 1000,
|
|
195
|
+
outputTokens: 500,
|
|
196
|
+
cacheReadTokens: 0,
|
|
197
|
+
cacheCreationTokens: 0,
|
|
198
|
+
modelUsed: "llama-3-70b",
|
|
199
|
+
});
|
|
200
|
+
expect(cost).toBeNull();
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test("Input-only usage: only inputTokens > 0, rest zero", () => {
|
|
204
|
+
const cost = estimateCost({
|
|
205
|
+
inputTokens: 1_000_000,
|
|
206
|
+
outputTokens: 0,
|
|
207
|
+
cacheReadTokens: 0,
|
|
208
|
+
cacheCreationTokens: 0,
|
|
209
|
+
modelUsed: "claude-sonnet-4",
|
|
210
|
+
});
|
|
211
|
+
// inputCost = 1 * 3 = 3.00
|
|
212
|
+
expect(cost).toBe(3.0);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
test("Output-only usage: only outputTokens > 0, rest zero", () => {
|
|
216
|
+
const cost = estimateCost({
|
|
217
|
+
inputTokens: 0,
|
|
218
|
+
outputTokens: 1_000_000,
|
|
219
|
+
cacheReadTokens: 0,
|
|
220
|
+
cacheCreationTokens: 0,
|
|
221
|
+
modelUsed: "claude-sonnet-4",
|
|
222
|
+
});
|
|
223
|
+
// outputCost = 1 * 15 = 15.00
|
|
224
|
+
expect(cost).toBe(15.0);
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
test("Cache-heavy usage: large cacheRead + cacheCreation, verify math", () => {
|
|
228
|
+
const cost = estimateCost({
|
|
229
|
+
inputTokens: 0,
|
|
230
|
+
outputTokens: 0,
|
|
231
|
+
cacheReadTokens: 10_000_000,
|
|
232
|
+
cacheCreationTokens: 5_000_000,
|
|
233
|
+
modelUsed: "claude-opus-4",
|
|
234
|
+
});
|
|
235
|
+
// cacheReadCost = 10 * 1.5 = 15.00
|
|
236
|
+
// cacheCreationCost = 5 * 3.75 = 18.75
|
|
237
|
+
// total = 33.75
|
|
238
|
+
expect(cost).toBeCloseTo(33.75, 5);
|
|
239
|
+
});
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
describe("Cache pricing ratios", () => {
|
|
243
|
+
test("Claude cache read is 10% of input price (verified on opus)", () => {
|
|
244
|
+
const pricing = getPricingForModel("claude-opus-4");
|
|
245
|
+
expect(pricing).not.toBeNull();
|
|
246
|
+
if (pricing === null) return;
|
|
247
|
+
const ratio = pricing.cacheReadPerMTok / pricing.inputPerMTok;
|
|
248
|
+
expect(ratio).toBeCloseTo(0.1, 10);
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
test("Claude cache creation is 25% of input price (verified on sonnet)", () => {
|
|
252
|
+
const pricing = getPricingForModel("claude-sonnet-4");
|
|
253
|
+
expect(pricing).not.toBeNull();
|
|
254
|
+
if (pricing === null) return;
|
|
255
|
+
const ratio = pricing.cacheCreationPerMTok / pricing.inputPerMTok;
|
|
256
|
+
expect(ratio).toBeCloseTo(0.25, 10);
|
|
257
|
+
});
|
|
258
|
+
});
|