@os-eco/overstory-cli 0.7.7 → 0.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -1
- package/package.json +1 -1
- package/src/commands/coordinator.test.ts +131 -2
- package/src/commands/coordinator.ts +40 -9
- package/src/commands/costs.test.ts +5 -0
- package/src/commands/costs.ts +1 -1
- package/src/commands/log.ts +2 -0
- package/src/commands/sling.test.ts +63 -1
- package/src/commands/sling.ts +37 -2
- package/src/config.test.ts +68 -0
- package/src/config.ts +16 -0
- package/src/index.ts +2 -1
- package/src/metrics/pricing.test.ts +258 -0
- package/src/metrics/store.test.ts +227 -0
- package/src/metrics/store.ts +40 -5
- package/src/schema-consistency.test.ts +1 -0
- package/src/types.ts +8 -0
- package/src/worktree/tmux.test.ts +49 -0
- package/src/worktree/tmux.ts +33 -0
package/src/config.test.ts
CHANGED
|
@@ -775,6 +775,74 @@ project:
|
|
|
775
775
|
await expect(loadConfig(tempDir)).rejects.toThrow(ValidationError);
|
|
776
776
|
});
|
|
777
777
|
|
|
778
|
+
test("resets negative shellInitDelayMs to 0 with warning", async () => {
|
|
779
|
+
await writeConfig("runtime:\n shellInitDelayMs: -100\n");
|
|
780
|
+
const origWrite = process.stderr.write;
|
|
781
|
+
let capturedStderr = "";
|
|
782
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
783
|
+
if (typeof s === "string") capturedStderr += s;
|
|
784
|
+
return true;
|
|
785
|
+
}) as typeof process.stderr.write;
|
|
786
|
+
try {
|
|
787
|
+
const config = await loadConfig(tempDir);
|
|
788
|
+
expect(config.runtime?.shellInitDelayMs).toBe(0);
|
|
789
|
+
} finally {
|
|
790
|
+
process.stderr.write = origWrite;
|
|
791
|
+
}
|
|
792
|
+
expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
|
|
793
|
+
});
|
|
794
|
+
|
|
795
|
+
test("resets Infinity shellInitDelayMs to 0 with warning", async () => {
|
|
796
|
+
await writeConfig("runtime:\n shellInitDelayMs: .inf\n");
|
|
797
|
+
const origWrite = process.stderr.write;
|
|
798
|
+
let capturedStderr = "";
|
|
799
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
800
|
+
if (typeof s === "string") capturedStderr += s;
|
|
801
|
+
return true;
|
|
802
|
+
}) as typeof process.stderr.write;
|
|
803
|
+
try {
|
|
804
|
+
const config = await loadConfig(tempDir);
|
|
805
|
+
expect(config.runtime?.shellInitDelayMs).toBe(0);
|
|
806
|
+
} finally {
|
|
807
|
+
process.stderr.write = origWrite;
|
|
808
|
+
}
|
|
809
|
+
expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
|
|
810
|
+
});
|
|
811
|
+
|
|
812
|
+
test("warns when shellInitDelayMs exceeds 30s", async () => {
|
|
813
|
+
await writeConfig("runtime:\n shellInitDelayMs: 60000\n");
|
|
814
|
+
const origWrite = process.stderr.write;
|
|
815
|
+
let capturedStderr = "";
|
|
816
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
817
|
+
if (typeof s === "string") capturedStderr += s;
|
|
818
|
+
return true;
|
|
819
|
+
}) as typeof process.stderr.write;
|
|
820
|
+
try {
|
|
821
|
+
const config = await loadConfig(tempDir);
|
|
822
|
+
expect(config.runtime?.shellInitDelayMs).toBe(60000);
|
|
823
|
+
} finally {
|
|
824
|
+
process.stderr.write = origWrite;
|
|
825
|
+
}
|
|
826
|
+
expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs is 60000ms");
|
|
827
|
+
});
|
|
828
|
+
|
|
829
|
+
test("accepts valid shellInitDelayMs without warning", async () => {
|
|
830
|
+
await writeConfig("runtime:\n shellInitDelayMs: 2000\n");
|
|
831
|
+
const origWrite = process.stderr.write;
|
|
832
|
+
let capturedStderr = "";
|
|
833
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
834
|
+
if (typeof s === "string") capturedStderr += s;
|
|
835
|
+
return true;
|
|
836
|
+
}) as typeof process.stderr.write;
|
|
837
|
+
try {
|
|
838
|
+
const config = await loadConfig(tempDir);
|
|
839
|
+
expect(config.runtime?.shellInitDelayMs).toBe(2000);
|
|
840
|
+
} finally {
|
|
841
|
+
process.stderr.write = origWrite;
|
|
842
|
+
}
|
|
843
|
+
expect(capturedStderr).not.toContain("shellInitDelayMs");
|
|
844
|
+
});
|
|
845
|
+
|
|
778
846
|
test("rejects qualityGate with empty description", async () => {
|
|
779
847
|
await writeConfig(`
|
|
780
848
|
project:
|
package/src/config.ts
CHANGED
|
@@ -64,6 +64,7 @@ export const DEFAULT_CONFIG: OverstoryConfig = {
|
|
|
64
64
|
},
|
|
65
65
|
runtime: {
|
|
66
66
|
default: "claude",
|
|
67
|
+
shellInitDelayMs: 0,
|
|
67
68
|
pi: {
|
|
68
69
|
provider: "anthropic",
|
|
69
70
|
modelMap: {
|
|
@@ -664,6 +665,21 @@ function validateConfig(config: OverstoryConfig): void {
|
|
|
664
665
|
}
|
|
665
666
|
}
|
|
666
667
|
|
|
668
|
+
// runtime.shellInitDelayMs: validate if present
|
|
669
|
+
if (config.runtime?.shellInitDelayMs !== undefined) {
|
|
670
|
+
const delay = config.runtime.shellInitDelayMs;
|
|
671
|
+
if (typeof delay !== "number" || delay < 0 || !Number.isFinite(delay)) {
|
|
672
|
+
process.stderr.write(
|
|
673
|
+
`[overstory] WARNING: runtime.shellInitDelayMs must be a non-negative number. Got: ${delay}. Using default (0).\n`,
|
|
674
|
+
);
|
|
675
|
+
config.runtime.shellInitDelayMs = 0;
|
|
676
|
+
} else if (delay > 30_000) {
|
|
677
|
+
process.stderr.write(
|
|
678
|
+
`[overstory] WARNING: runtime.shellInitDelayMs is ${delay}ms (>${30}s). This adds delay before every agent spawn. Consider a lower value.\n`,
|
|
679
|
+
);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
667
683
|
// models: validate each value — accepts aliases and provider-prefixed refs
|
|
668
684
|
const validAliases = ["sonnet", "opus", "haiku"];
|
|
669
685
|
const toolHeavyRoles = ["builder", "scout"];
|
package/src/index.ts
CHANGED
|
@@ -45,7 +45,7 @@ import { OverstoryError, WorktreeError } from "./errors.ts";
|
|
|
45
45
|
import { jsonError } from "./json.ts";
|
|
46
46
|
import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
|
|
47
47
|
|
|
48
|
-
export const VERSION = "0.7.
|
|
48
|
+
export const VERSION = "0.7.8";
|
|
49
49
|
|
|
50
50
|
const rawArgs = process.argv.slice(2);
|
|
51
51
|
|
|
@@ -267,6 +267,7 @@ program
|
|
|
267
267
|
.option("--no-scout-check", "Suppress the parentHasScouts scout-before-build warning")
|
|
268
268
|
.option("--dispatch-max-agents <n>", "Per-lead max agents ceiling (injected into overlay)")
|
|
269
269
|
.option("--runtime <name>", "Runtime adapter (default: config or claude)")
|
|
270
|
+
.option("--base-branch <branch>", "Base branch for worktree creation (default: current HEAD)")
|
|
270
271
|
.option("--json", "Output result as JSON")
|
|
271
272
|
.action(async (taskId, opts) => {
|
|
272
273
|
await slingCommand(taskId, opts);
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { estimateCost, getPricingForModel } from "./pricing";
|
|
3
|
+
|
|
4
|
+
describe("getPricingForModel()", () => {
|
|
5
|
+
describe("Claude tiers", () => {
|
|
6
|
+
test("matches opus by substring in full model ID", () => {
|
|
7
|
+
const result = getPricingForModel("claude-opus-4-20250514");
|
|
8
|
+
expect(result).not.toBeNull();
|
|
9
|
+
expect(result?.inputPerMTok).toBe(15);
|
|
10
|
+
expect(result?.outputPerMTok).toBe(75);
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
test("matches sonnet by substring in full model ID", () => {
|
|
14
|
+
const result = getPricingForModel("claude-sonnet-4-20250514");
|
|
15
|
+
expect(result).not.toBeNull();
|
|
16
|
+
expect(result?.inputPerMTok).toBe(3);
|
|
17
|
+
expect(result?.outputPerMTok).toBe(15);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test("matches haiku by substring in full model ID", () => {
|
|
21
|
+
const result = getPricingForModel("claude-haiku-3-5-20241022");
|
|
22
|
+
expect(result).not.toBeNull();
|
|
23
|
+
expect(result?.inputPerMTok).toBe(0.8);
|
|
24
|
+
expect(result?.outputPerMTok).toBe(4);
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
describe("OpenAI tiers", () => {
|
|
29
|
+
test("matches gpt-4o-mini", () => {
|
|
30
|
+
const result = getPricingForModel("gpt-4o-mini");
|
|
31
|
+
expect(result).not.toBeNull();
|
|
32
|
+
expect(result?.inputPerMTok).toBe(0.15);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test("matches gpt-4o", () => {
|
|
36
|
+
const result = getPricingForModel("gpt-4o");
|
|
37
|
+
expect(result).not.toBeNull();
|
|
38
|
+
expect(result?.inputPerMTok).toBe(2.5);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("matches gpt-5", () => {
|
|
42
|
+
const result = getPricingForModel("gpt-5");
|
|
43
|
+
expect(result).not.toBeNull();
|
|
44
|
+
expect(result?.inputPerMTok).toBe(10);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test("matches o3", () => {
|
|
48
|
+
const result = getPricingForModel("o3");
|
|
49
|
+
expect(result).not.toBeNull();
|
|
50
|
+
expect(result?.inputPerMTok).toBe(10);
|
|
51
|
+
expect(result?.outputPerMTok).toBe(40);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test("matches o1", () => {
|
|
55
|
+
const result = getPricingForModel("o1");
|
|
56
|
+
expect(result).not.toBeNull();
|
|
57
|
+
expect(result?.inputPerMTok).toBe(15);
|
|
58
|
+
expect(result?.outputPerMTok).toBe(60);
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe("Priority ordering", () => {
|
|
63
|
+
test("gpt-4o-mini matches before gpt-4o (substring overlap)", () => {
|
|
64
|
+
const mini = getPricingForModel("gpt-4o-mini");
|
|
65
|
+
const full = getPricingForModel("gpt-4o");
|
|
66
|
+
expect(mini).not.toBeNull();
|
|
67
|
+
expect(full).not.toBeNull();
|
|
68
|
+
if (mini === null || full === null) return;
|
|
69
|
+
// gpt-4o-mini is cheaper
|
|
70
|
+
expect(mini.inputPerMTok).toBeLessThan(full.inputPerMTok);
|
|
71
|
+
// A model string "gpt-4o-mini" resolves to mini pricing, not gpt-4o
|
|
72
|
+
expect(mini.inputPerMTok).toBe(0.15);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test("o3 matches before o1 (o1 string contains o1, o3 does not contain o1)", () => {
|
|
76
|
+
const o3 = getPricingForModel("o3");
|
|
77
|
+
const o1 = getPricingForModel("o1");
|
|
78
|
+
expect(o3).not.toBeNull();
|
|
79
|
+
expect(o1).not.toBeNull();
|
|
80
|
+
if (o3 === null || o1 === null) return;
|
|
81
|
+
expect(o3.outputPerMTok).toBe(40);
|
|
82
|
+
expect(o1.outputPerMTok).toBe(60);
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
describe("Gemini tiers", () => {
|
|
87
|
+
test("matches gemini-flash by 'flash' substring", () => {
|
|
88
|
+
const result = getPricingForModel("gemini-flash-2.0");
|
|
89
|
+
expect(result).not.toBeNull();
|
|
90
|
+
expect(result?.inputPerMTok).toBe(0.1);
|
|
91
|
+
expect(result?.outputPerMTok).toBe(0.4);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("matches gemini-pro by 'gemini' + 'pro' substrings", () => {
|
|
95
|
+
const result = getPricingForModel("gemini-2.0-pro-exp");
|
|
96
|
+
expect(result).not.toBeNull();
|
|
97
|
+
expect(result?.inputPerMTok).toBe(1.25);
|
|
98
|
+
expect(result?.outputPerMTok).toBe(5);
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe("Case insensitivity", () => {
|
|
103
|
+
test("Claude-OPUS-4 resolves correctly", () => {
|
|
104
|
+
const result = getPricingForModel("Claude-OPUS-4");
|
|
105
|
+
expect(result).not.toBeNull();
|
|
106
|
+
expect(result?.inputPerMTok).toBe(15);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
test("SONNET resolves correctly", () => {
|
|
110
|
+
const result = getPricingForModel("SONNET");
|
|
111
|
+
expect(result).not.toBeNull();
|
|
112
|
+
expect(result?.inputPerMTok).toBe(3);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test("Haiku resolves correctly", () => {
|
|
116
|
+
const result = getPricingForModel("Haiku");
|
|
117
|
+
expect(result).not.toBeNull();
|
|
118
|
+
expect(result?.inputPerMTok).toBe(0.8);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
describe("Unknown models", () => {
|
|
123
|
+
test("returns null for llama-3-70b", () => {
|
|
124
|
+
expect(getPricingForModel("llama-3-70b")).toBeNull();
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("returns null for empty string", () => {
|
|
128
|
+
expect(getPricingForModel("")).toBeNull();
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test("returns null for random gibberish", () => {
|
|
132
|
+
expect(getPricingForModel("xyzzy-foo-bar-9000")).toBeNull();
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
describe("estimateCost()", () => {
|
|
138
|
+
test("Typical Claude Opus usage: 1M input, 100K output, 500K cacheRead, 200K cacheCreation → $24.00", () => {
|
|
139
|
+
const cost = estimateCost({
|
|
140
|
+
inputTokens: 1_000_000,
|
|
141
|
+
outputTokens: 100_000,
|
|
142
|
+
cacheReadTokens: 500_000,
|
|
143
|
+
cacheCreationTokens: 200_000,
|
|
144
|
+
modelUsed: "claude-opus-4-20250514",
|
|
145
|
+
});
|
|
146
|
+
// inputCost = 1 * 15 = 15.00
|
|
147
|
+
// outputCost = 0.1 * 75 = 7.50
|
|
148
|
+
// cacheReadCost = 0.5 * 1.5 = 0.75
|
|
149
|
+
// cacheCreationCost = 0.2 * 3.75 = 0.75
|
|
150
|
+
// total = 24.00
|
|
151
|
+
expect(cost).toBe(24.0);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
test("Typical Claude Sonnet usage: 500K input, 50K output, 100K cacheRead, 50K cacheCreation", () => {
|
|
155
|
+
const cost = estimateCost({
|
|
156
|
+
inputTokens: 500_000,
|
|
157
|
+
outputTokens: 50_000,
|
|
158
|
+
cacheReadTokens: 100_000,
|
|
159
|
+
cacheCreationTokens: 50_000,
|
|
160
|
+
modelUsed: "claude-sonnet-4-20250514",
|
|
161
|
+
});
|
|
162
|
+
// inputCost = 0.5 * 3 = 1.50
|
|
163
|
+
// outputCost = 0.05 * 15 = 0.75
|
|
164
|
+
// cacheReadCost = 0.1 * 0.3 = 0.03
|
|
165
|
+
// cacheCreationCost = 0.05 * 0.75 = 0.0375
|
|
166
|
+
// total = 2.3175
|
|
167
|
+
expect(cost).toBeCloseTo(2.3175, 4);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
test("Zero tokens returns 0 (not null)", () => {
|
|
171
|
+
const cost = estimateCost({
|
|
172
|
+
inputTokens: 0,
|
|
173
|
+
outputTokens: 0,
|
|
174
|
+
cacheReadTokens: 0,
|
|
175
|
+
cacheCreationTokens: 0,
|
|
176
|
+
modelUsed: "claude-opus-4",
|
|
177
|
+
});
|
|
178
|
+
expect(cost).toBe(0);
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
test("Null modelUsed returns null", () => {
|
|
182
|
+
const cost = estimateCost({
|
|
183
|
+
inputTokens: 1000,
|
|
184
|
+
outputTokens: 500,
|
|
185
|
+
cacheReadTokens: 0,
|
|
186
|
+
cacheCreationTokens: 0,
|
|
187
|
+
modelUsed: null,
|
|
188
|
+
});
|
|
189
|
+
expect(cost).toBeNull();
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
test("Unknown model returns null", () => {
|
|
193
|
+
const cost = estimateCost({
|
|
194
|
+
inputTokens: 1000,
|
|
195
|
+
outputTokens: 500,
|
|
196
|
+
cacheReadTokens: 0,
|
|
197
|
+
cacheCreationTokens: 0,
|
|
198
|
+
modelUsed: "llama-3-70b",
|
|
199
|
+
});
|
|
200
|
+
expect(cost).toBeNull();
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test("Input-only usage: only inputTokens > 0, rest zero", () => {
|
|
204
|
+
const cost = estimateCost({
|
|
205
|
+
inputTokens: 1_000_000,
|
|
206
|
+
outputTokens: 0,
|
|
207
|
+
cacheReadTokens: 0,
|
|
208
|
+
cacheCreationTokens: 0,
|
|
209
|
+
modelUsed: "claude-sonnet-4",
|
|
210
|
+
});
|
|
211
|
+
// inputCost = 1 * 3 = 3.00
|
|
212
|
+
expect(cost).toBe(3.0);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
test("Output-only usage: only outputTokens > 0, rest zero", () => {
|
|
216
|
+
const cost = estimateCost({
|
|
217
|
+
inputTokens: 0,
|
|
218
|
+
outputTokens: 1_000_000,
|
|
219
|
+
cacheReadTokens: 0,
|
|
220
|
+
cacheCreationTokens: 0,
|
|
221
|
+
modelUsed: "claude-sonnet-4",
|
|
222
|
+
});
|
|
223
|
+
// outputCost = 1 * 15 = 15.00
|
|
224
|
+
expect(cost).toBe(15.0);
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
test("Cache-heavy usage: large cacheRead + cacheCreation, verify math", () => {
|
|
228
|
+
const cost = estimateCost({
|
|
229
|
+
inputTokens: 0,
|
|
230
|
+
outputTokens: 0,
|
|
231
|
+
cacheReadTokens: 10_000_000,
|
|
232
|
+
cacheCreationTokens: 5_000_000,
|
|
233
|
+
modelUsed: "claude-opus-4",
|
|
234
|
+
});
|
|
235
|
+
// cacheReadCost = 10 * 1.5 = 15.00
|
|
236
|
+
// cacheCreationCost = 5 * 3.75 = 18.75
|
|
237
|
+
// total = 33.75
|
|
238
|
+
expect(cost).toBeCloseTo(33.75, 5);
|
|
239
|
+
});
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
describe("Cache pricing ratios", () => {
|
|
243
|
+
test("Claude cache read is 10% of input price (verified on opus)", () => {
|
|
244
|
+
const pricing = getPricingForModel("claude-opus-4");
|
|
245
|
+
expect(pricing).not.toBeNull();
|
|
246
|
+
if (pricing === null) return;
|
|
247
|
+
const ratio = pricing.cacheReadPerMTok / pricing.inputPerMTok;
|
|
248
|
+
expect(ratio).toBeCloseTo(0.1, 10);
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
test("Claude cache creation is 25% of input price (verified on sonnet)", () => {
|
|
252
|
+
const pricing = getPricingForModel("claude-sonnet-4");
|
|
253
|
+
expect(pricing).not.toBeNull();
|
|
254
|
+
if (pricing === null) return;
|
|
255
|
+
const ratio = pricing.cacheCreationPerMTok / pricing.inputPerMTok;
|
|
256
|
+
expect(ratio).toBeCloseTo(0.25, 10);
|
|
257
|
+
});
|
|
258
|
+
});
|
|
@@ -535,6 +535,7 @@ describe("token snapshots", () => {
|
|
|
535
535
|
cacheCreationTokens: 100,
|
|
536
536
|
estimatedCostUsd: 0.15,
|
|
537
537
|
modelUsed: "claude-sonnet-4-5",
|
|
538
|
+
runId: null,
|
|
538
539
|
createdAt: new Date().toISOString(),
|
|
539
540
|
};
|
|
540
541
|
|
|
@@ -558,6 +559,7 @@ describe("token snapshots", () => {
|
|
|
558
559
|
cacheCreationTokens: 0,
|
|
559
560
|
estimatedCostUsd: 0.01,
|
|
560
561
|
modelUsed: "claude-sonnet-4-5",
|
|
562
|
+
runId: null,
|
|
561
563
|
createdAt: new Date(now - 60_000).toISOString(), // 1 min ago
|
|
562
564
|
});
|
|
563
565
|
|
|
@@ -569,6 +571,7 @@ describe("token snapshots", () => {
|
|
|
569
571
|
cacheCreationTokens: 0,
|
|
570
572
|
estimatedCostUsd: 0.02,
|
|
571
573
|
modelUsed: "claude-sonnet-4-5",
|
|
574
|
+
runId: null,
|
|
572
575
|
createdAt: new Date(now).toISOString(), // now (most recent)
|
|
573
576
|
});
|
|
574
577
|
|
|
@@ -580,6 +583,7 @@ describe("token snapshots", () => {
|
|
|
580
583
|
cacheCreationTokens: 0,
|
|
581
584
|
estimatedCostUsd: 0.03,
|
|
582
585
|
modelUsed: "claude-sonnet-4-5",
|
|
586
|
+
runId: null,
|
|
583
587
|
createdAt: new Date(now - 30_000).toISOString(), // 30s ago
|
|
584
588
|
});
|
|
585
589
|
|
|
@@ -606,6 +610,7 @@ describe("token snapshots", () => {
|
|
|
606
610
|
cacheCreationTokens: 0,
|
|
607
611
|
estimatedCostUsd: null,
|
|
608
612
|
modelUsed: null,
|
|
613
|
+
runId: null,
|
|
609
614
|
createdAt: time1,
|
|
610
615
|
});
|
|
611
616
|
|
|
@@ -617,6 +622,7 @@ describe("token snapshots", () => {
|
|
|
617
622
|
cacheCreationTokens: 0,
|
|
618
623
|
estimatedCostUsd: null,
|
|
619
624
|
modelUsed: null,
|
|
625
|
+
runId: null,
|
|
620
626
|
createdAt: time2,
|
|
621
627
|
});
|
|
622
628
|
|
|
@@ -638,6 +644,7 @@ describe("token snapshots", () => {
|
|
|
638
644
|
cacheCreationTokens: 0,
|
|
639
645
|
estimatedCostUsd: null,
|
|
640
646
|
modelUsed: null,
|
|
647
|
+
runId: null,
|
|
641
648
|
createdAt: new Date().toISOString(),
|
|
642
649
|
});
|
|
643
650
|
|
|
@@ -649,6 +656,7 @@ describe("token snapshots", () => {
|
|
|
649
656
|
cacheCreationTokens: 0,
|
|
650
657
|
estimatedCostUsd: null,
|
|
651
658
|
modelUsed: null,
|
|
659
|
+
runId: null,
|
|
652
660
|
createdAt: new Date().toISOString(),
|
|
653
661
|
});
|
|
654
662
|
|
|
@@ -666,6 +674,7 @@ describe("token snapshots", () => {
|
|
|
666
674
|
cacheCreationTokens: 0,
|
|
667
675
|
estimatedCostUsd: null,
|
|
668
676
|
modelUsed: null,
|
|
677
|
+
runId: null,
|
|
669
678
|
createdAt: new Date().toISOString(),
|
|
670
679
|
});
|
|
671
680
|
|
|
@@ -677,6 +686,7 @@ describe("token snapshots", () => {
|
|
|
677
686
|
cacheCreationTokens: 0,
|
|
678
687
|
estimatedCostUsd: null,
|
|
679
688
|
modelUsed: null,
|
|
689
|
+
runId: null,
|
|
680
690
|
createdAt: new Date().toISOString(),
|
|
681
691
|
});
|
|
682
692
|
|
|
@@ -698,6 +708,7 @@ describe("token snapshots", () => {
|
|
|
698
708
|
cacheCreationTokens: 0,
|
|
699
709
|
estimatedCostUsd: null,
|
|
700
710
|
modelUsed: null,
|
|
711
|
+
runId: null,
|
|
701
712
|
createdAt: new Date(now - 120_000).toISOString(), // 2 min ago
|
|
702
713
|
});
|
|
703
714
|
|
|
@@ -709,6 +720,7 @@ describe("token snapshots", () => {
|
|
|
709
720
|
cacheCreationTokens: 0,
|
|
710
721
|
estimatedCostUsd: null,
|
|
711
722
|
modelUsed: null,
|
|
723
|
+
runId: null,
|
|
712
724
|
createdAt: new Date(now - 10_000).toISOString(), // 10s ago (recent)
|
|
713
725
|
});
|
|
714
726
|
|
|
@@ -729,6 +741,7 @@ describe("token snapshots", () => {
|
|
|
729
741
|
cacheCreationTokens: 0,
|
|
730
742
|
estimatedCostUsd: null,
|
|
731
743
|
modelUsed: null,
|
|
744
|
+
runId: null,
|
|
732
745
|
createdAt: new Date().toISOString(),
|
|
733
746
|
});
|
|
734
747
|
|
|
@@ -740,6 +753,220 @@ describe("token snapshots", () => {
|
|
|
740
753
|
expect(snapshots).toHaveLength(1);
|
|
741
754
|
expect(snapshots[0]?.agentName).toBe("test-agent");
|
|
742
755
|
});
|
|
756
|
+
|
|
757
|
+
test("runId roundtrips correctly through snapshot record and retrieval", () => {
|
|
758
|
+
const now = Date.now();
|
|
759
|
+
store.recordSnapshot({
|
|
760
|
+
agentName: "agent-a",
|
|
761
|
+
inputTokens: 100,
|
|
762
|
+
outputTokens: 50,
|
|
763
|
+
cacheReadTokens: 0,
|
|
764
|
+
cacheCreationTokens: 0,
|
|
765
|
+
estimatedCostUsd: null,
|
|
766
|
+
modelUsed: null,
|
|
767
|
+
runId: "run-abc",
|
|
768
|
+
createdAt: new Date(now).toISOString(),
|
|
769
|
+
});
|
|
770
|
+
|
|
771
|
+
store.recordSnapshot({
|
|
772
|
+
agentName: "agent-b",
|
|
773
|
+
inputTokens: 200,
|
|
774
|
+
outputTokens: 100,
|
|
775
|
+
cacheReadTokens: 0,
|
|
776
|
+
cacheCreationTokens: 0,
|
|
777
|
+
estimatedCostUsd: null,
|
|
778
|
+
modelUsed: null,
|
|
779
|
+
runId: null,
|
|
780
|
+
createdAt: new Date(now).toISOString(),
|
|
781
|
+
});
|
|
782
|
+
|
|
783
|
+
const snapshots = store.getLatestSnapshots();
|
|
784
|
+
const agentA = snapshots.find((s) => s.agentName === "agent-a");
|
|
785
|
+
const agentB = snapshots.find((s) => s.agentName === "agent-b");
|
|
786
|
+
|
|
787
|
+
expect(agentA?.runId).toBe("run-abc");
|
|
788
|
+
expect(agentB?.runId).toBeNull();
|
|
789
|
+
});
|
|
790
|
+
|
|
791
|
+
test("getLatestSnapshots(runId) returns only snapshots matching that run", () => {
|
|
792
|
+
const now = Date.now();
|
|
793
|
+
store.recordSnapshot({
|
|
794
|
+
agentName: "agent-a",
|
|
795
|
+
inputTokens: 100,
|
|
796
|
+
outputTokens: 50,
|
|
797
|
+
cacheReadTokens: 0,
|
|
798
|
+
cacheCreationTokens: 0,
|
|
799
|
+
estimatedCostUsd: null,
|
|
800
|
+
modelUsed: null,
|
|
801
|
+
runId: "run-001",
|
|
802
|
+
createdAt: new Date(now).toISOString(),
|
|
803
|
+
});
|
|
804
|
+
|
|
805
|
+
store.recordSnapshot({
|
|
806
|
+
agentName: "agent-b",
|
|
807
|
+
inputTokens: 200,
|
|
808
|
+
outputTokens: 100,
|
|
809
|
+
cacheReadTokens: 0,
|
|
810
|
+
cacheCreationTokens: 0,
|
|
811
|
+
estimatedCostUsd: null,
|
|
812
|
+
modelUsed: null,
|
|
813
|
+
runId: "run-001",
|
|
814
|
+
createdAt: new Date(now).toISOString(),
|
|
815
|
+
});
|
|
816
|
+
|
|
817
|
+
store.recordSnapshot({
|
|
818
|
+
agentName: "agent-c",
|
|
819
|
+
inputTokens: 300,
|
|
820
|
+
outputTokens: 150,
|
|
821
|
+
cacheReadTokens: 0,
|
|
822
|
+
cacheCreationTokens: 0,
|
|
823
|
+
estimatedCostUsd: null,
|
|
824
|
+
modelUsed: null,
|
|
825
|
+
runId: "run-002",
|
|
826
|
+
createdAt: new Date(now).toISOString(),
|
|
827
|
+
});
|
|
828
|
+
|
|
829
|
+
const run001Snapshots = store.getLatestSnapshots("run-001");
|
|
830
|
+
expect(run001Snapshots).toHaveLength(2);
|
|
831
|
+
expect(run001Snapshots.every((s) => s.runId === "run-001")).toBe(true);
|
|
832
|
+
|
|
833
|
+
const run002Snapshots = store.getLatestSnapshots("run-002");
|
|
834
|
+
expect(run002Snapshots).toHaveLength(1);
|
|
835
|
+
expect(run002Snapshots[0]?.agentName).toBe("agent-c");
|
|
836
|
+
});
|
|
837
|
+
|
|
838
|
+
test("getLatestSnapshots(runId) returns empty array for unknown run", () => {
|
|
839
|
+
store.recordSnapshot({
|
|
840
|
+
agentName: "agent-a",
|
|
841
|
+
inputTokens: 100,
|
|
842
|
+
outputTokens: 50,
|
|
843
|
+
cacheReadTokens: 0,
|
|
844
|
+
cacheCreationTokens: 0,
|
|
845
|
+
estimatedCostUsd: null,
|
|
846
|
+
modelUsed: null,
|
|
847
|
+
runId: "run-001",
|
|
848
|
+
createdAt: new Date().toISOString(),
|
|
849
|
+
});
|
|
850
|
+
|
|
851
|
+
const snapshots = store.getLatestSnapshots("run-nonexistent");
|
|
852
|
+
expect(snapshots).toEqual([]);
|
|
853
|
+
});
|
|
854
|
+
|
|
855
|
+
test("getLatestSnapshots(runId) excludes snapshots with null run_id", () => {
|
|
856
|
+
const now = Date.now();
|
|
857
|
+
store.recordSnapshot({
|
|
858
|
+
agentName: "agent-a",
|
|
859
|
+
inputTokens: 100,
|
|
860
|
+
outputTokens: 50,
|
|
861
|
+
cacheReadTokens: 0,
|
|
862
|
+
cacheCreationTokens: 0,
|
|
863
|
+
estimatedCostUsd: null,
|
|
864
|
+
modelUsed: null,
|
|
865
|
+
runId: null, // no run
|
|
866
|
+
createdAt: new Date(now).toISOString(),
|
|
867
|
+
});
|
|
868
|
+
|
|
869
|
+
store.recordSnapshot({
|
|
870
|
+
agentName: "agent-b",
|
|
871
|
+
inputTokens: 200,
|
|
872
|
+
outputTokens: 100,
|
|
873
|
+
cacheReadTokens: 0,
|
|
874
|
+
cacheCreationTokens: 0,
|
|
875
|
+
estimatedCostUsd: null,
|
|
876
|
+
modelUsed: null,
|
|
877
|
+
runId: "run-001",
|
|
878
|
+
createdAt: new Date(now).toISOString(),
|
|
879
|
+
});
|
|
880
|
+
|
|
881
|
+
const run001Snapshots = store.getLatestSnapshots("run-001");
|
|
882
|
+
expect(run001Snapshots).toHaveLength(1);
|
|
883
|
+
expect(run001Snapshots[0]?.agentName).toBe("agent-b");
|
|
884
|
+
});
|
|
885
|
+
|
|
886
|
+
test("getLatestSnapshots(runId) returns latest per agent within the run", () => {
|
|
887
|
+
const now = Date.now();
|
|
888
|
+
// Two snapshots for agent-a in run-001: should only get the latest
|
|
889
|
+
store.recordSnapshot({
|
|
890
|
+
agentName: "agent-a",
|
|
891
|
+
inputTokens: 100,
|
|
892
|
+
outputTokens: 50,
|
|
893
|
+
cacheReadTokens: 0,
|
|
894
|
+
cacheCreationTokens: 0,
|
|
895
|
+
estimatedCostUsd: null,
|
|
896
|
+
modelUsed: null,
|
|
897
|
+
runId: "run-001",
|
|
898
|
+
createdAt: new Date(now - 30_000).toISOString(), // older
|
|
899
|
+
});
|
|
900
|
+
|
|
901
|
+
store.recordSnapshot({
|
|
902
|
+
agentName: "agent-a",
|
|
903
|
+
inputTokens: 500,
|
|
904
|
+
outputTokens: 250,
|
|
905
|
+
cacheReadTokens: 0,
|
|
906
|
+
cacheCreationTokens: 0,
|
|
907
|
+
estimatedCostUsd: null,
|
|
908
|
+
modelUsed: null,
|
|
909
|
+
runId: "run-001",
|
|
910
|
+
createdAt: new Date(now).toISOString(), // latest
|
|
911
|
+
});
|
|
912
|
+
|
|
913
|
+
const snapshots = store.getLatestSnapshots("run-001");
|
|
914
|
+
expect(snapshots).toHaveLength(1);
|
|
915
|
+
expect(snapshots[0]?.inputTokens).toBe(500); // most recent
|
|
916
|
+
});
|
|
917
|
+
|
|
918
|
+
test("migration adds run_id to existing token_snapshots table", () => {
|
|
919
|
+
store.close();
|
|
920
|
+
|
|
921
|
+
// Create a DB with old token_snapshots schema (no run_id column)
|
|
922
|
+
const { Database } = require("bun:sqlite");
|
|
923
|
+
const oldDb = new Database(dbPath);
|
|
924
|
+
oldDb.exec("DROP TABLE IF EXISTS token_snapshots");
|
|
925
|
+
oldDb.exec(`
|
|
926
|
+
CREATE TABLE token_snapshots (
|
|
927
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
928
|
+
agent_name TEXT NOT NULL,
|
|
929
|
+
input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
930
|
+
output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
931
|
+
cache_read_tokens INTEGER NOT NULL DEFAULT 0,
|
|
932
|
+
cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
|
|
933
|
+
estimated_cost_usd REAL,
|
|
934
|
+
model_used TEXT,
|
|
935
|
+
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%f','now'))
|
|
936
|
+
)
|
|
937
|
+
`);
|
|
938
|
+
oldDb.exec(`
|
|
939
|
+
INSERT INTO token_snapshots (agent_name, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, created_at)
|
|
940
|
+
VALUES ('old-agent', 100, 50, 0, 0, '2026-01-01T00:00:00.000Z')
|
|
941
|
+
`);
|
|
942
|
+
oldDb.close();
|
|
943
|
+
|
|
944
|
+
// Re-open with createMetricsStore which should migrate
|
|
945
|
+
store = createMetricsStore(dbPath);
|
|
946
|
+
|
|
947
|
+
// Old row should be readable with null run_id
|
|
948
|
+
const snapshots = store.getLatestSnapshots();
|
|
949
|
+
expect(snapshots).toHaveLength(1);
|
|
950
|
+
expect(snapshots[0]?.agentName).toBe("old-agent");
|
|
951
|
+
expect(snapshots[0]?.runId).toBeNull();
|
|
952
|
+
|
|
953
|
+
// New rows with run_id should work
|
|
954
|
+
store.recordSnapshot({
|
|
955
|
+
agentName: "new-agent",
|
|
956
|
+
inputTokens: 200,
|
|
957
|
+
outputTokens: 100,
|
|
958
|
+
cacheReadTokens: 0,
|
|
959
|
+
cacheCreationTokens: 0,
|
|
960
|
+
estimatedCostUsd: null,
|
|
961
|
+
modelUsed: null,
|
|
962
|
+
runId: "run-xyz",
|
|
963
|
+
createdAt: new Date().toISOString(),
|
|
964
|
+
});
|
|
965
|
+
|
|
966
|
+
const newSnapshots = store.getLatestSnapshots("run-xyz");
|
|
967
|
+
expect(newSnapshots).toHaveLength(1);
|
|
968
|
+
expect(newSnapshots[0]?.runId).toBe("run-xyz");
|
|
969
|
+
});
|
|
743
970
|
});
|
|
744
971
|
|
|
745
972
|
// === close ===
|