@os-eco/overstory-cli 0.7.7 → 0.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -775,6 +775,74 @@ project:
775
775
  await expect(loadConfig(tempDir)).rejects.toThrow(ValidationError);
776
776
  });
777
777
 
778
+ test("resets negative shellInitDelayMs to 0 with warning", async () => {
779
+ await writeConfig("runtime:\n shellInitDelayMs: -100\n");
780
+ const origWrite = process.stderr.write;
781
+ let capturedStderr = "";
782
+ process.stderr.write = ((s: string | Uint8Array) => {
783
+ if (typeof s === "string") capturedStderr += s;
784
+ return true;
785
+ }) as typeof process.stderr.write;
786
+ try {
787
+ const config = await loadConfig(tempDir);
788
+ expect(config.runtime?.shellInitDelayMs).toBe(0);
789
+ } finally {
790
+ process.stderr.write = origWrite;
791
+ }
792
+ expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
793
+ });
794
+
795
+ test("resets Infinity shellInitDelayMs to 0 with warning", async () => {
796
+ await writeConfig("runtime:\n shellInitDelayMs: .inf\n");
797
+ const origWrite = process.stderr.write;
798
+ let capturedStderr = "";
799
+ process.stderr.write = ((s: string | Uint8Array) => {
800
+ if (typeof s === "string") capturedStderr += s;
801
+ return true;
802
+ }) as typeof process.stderr.write;
803
+ try {
804
+ const config = await loadConfig(tempDir);
805
+ expect(config.runtime?.shellInitDelayMs).toBe(0);
806
+ } finally {
807
+ process.stderr.write = origWrite;
808
+ }
809
+ expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs");
810
+ });
811
+
812
+ test("warns when shellInitDelayMs exceeds 30s", async () => {
813
+ await writeConfig("runtime:\n shellInitDelayMs: 60000\n");
814
+ const origWrite = process.stderr.write;
815
+ let capturedStderr = "";
816
+ process.stderr.write = ((s: string | Uint8Array) => {
817
+ if (typeof s === "string") capturedStderr += s;
818
+ return true;
819
+ }) as typeof process.stderr.write;
820
+ try {
821
+ const config = await loadConfig(tempDir);
822
+ expect(config.runtime?.shellInitDelayMs).toBe(60000);
823
+ } finally {
824
+ process.stderr.write = origWrite;
825
+ }
826
+ expect(capturedStderr).toContain("WARNING: runtime.shellInitDelayMs is 60000ms");
827
+ });
828
+
829
+ test("accepts valid shellInitDelayMs without warning", async () => {
830
+ await writeConfig("runtime:\n shellInitDelayMs: 2000\n");
831
+ const origWrite = process.stderr.write;
832
+ let capturedStderr = "";
833
+ process.stderr.write = ((s: string | Uint8Array) => {
834
+ if (typeof s === "string") capturedStderr += s;
835
+ return true;
836
+ }) as typeof process.stderr.write;
837
+ try {
838
+ const config = await loadConfig(tempDir);
839
+ expect(config.runtime?.shellInitDelayMs).toBe(2000);
840
+ } finally {
841
+ process.stderr.write = origWrite;
842
+ }
843
+ expect(capturedStderr).not.toContain("shellInitDelayMs");
844
+ });
845
+
778
846
  test("rejects qualityGate with empty description", async () => {
779
847
  await writeConfig(`
780
848
  project:
package/src/config.ts CHANGED
@@ -64,6 +64,7 @@ export const DEFAULT_CONFIG: OverstoryConfig = {
64
64
  },
65
65
  runtime: {
66
66
  default: "claude",
67
+ shellInitDelayMs: 0,
67
68
  pi: {
68
69
  provider: "anthropic",
69
70
  modelMap: {
@@ -664,6 +665,21 @@ function validateConfig(config: OverstoryConfig): void {
664
665
  }
665
666
  }
666
667
 
668
+ // runtime.shellInitDelayMs: validate if present
669
+ if (config.runtime?.shellInitDelayMs !== undefined) {
670
+ const delay = config.runtime.shellInitDelayMs;
671
+ if (typeof delay !== "number" || delay < 0 || !Number.isFinite(delay)) {
672
+ process.stderr.write(
673
+ `[overstory] WARNING: runtime.shellInitDelayMs must be a non-negative number. Got: ${delay}. Using default (0).\n`,
674
+ );
675
+ config.runtime.shellInitDelayMs = 0;
676
+ } else if (delay > 30_000) {
677
+ process.stderr.write(
678
+ `[overstory] WARNING: runtime.shellInitDelayMs is ${delay}ms (>${30}s). This adds delay before every agent spawn. Consider a lower value.\n`,
679
+ );
680
+ }
681
+ }
682
+
667
683
  // models: validate each value — accepts aliases and provider-prefixed refs
668
684
  const validAliases = ["sonnet", "opus", "haiku"];
669
685
  const toolHeavyRoles = ["builder", "scout"];
package/src/index.ts CHANGED
@@ -45,7 +45,7 @@ import { OverstoryError, WorktreeError } from "./errors.ts";
45
45
  import { jsonError } from "./json.ts";
46
46
  import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
47
47
 
48
- export const VERSION = "0.7.7";
48
+ export const VERSION = "0.7.8";
49
49
 
50
50
  const rawArgs = process.argv.slice(2);
51
51
 
@@ -267,6 +267,7 @@ program
267
267
  .option("--no-scout-check", "Suppress the parentHasScouts scout-before-build warning")
268
268
  .option("--dispatch-max-agents <n>", "Per-lead max agents ceiling (injected into overlay)")
269
269
  .option("--runtime <name>", "Runtime adapter (default: config or claude)")
270
+ .option("--base-branch <branch>", "Base branch for worktree creation (default: current HEAD)")
270
271
  .option("--json", "Output result as JSON")
271
272
  .action(async (taskId, opts) => {
272
273
  await slingCommand(taskId, opts);
@@ -0,0 +1,258 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { estimateCost, getPricingForModel } from "./pricing";
3
+
4
+ describe("getPricingForModel()", () => {
5
+ describe("Claude tiers", () => {
6
+ test("matches opus by substring in full model ID", () => {
7
+ const result = getPricingForModel("claude-opus-4-20250514");
8
+ expect(result).not.toBeNull();
9
+ expect(result?.inputPerMTok).toBe(15);
10
+ expect(result?.outputPerMTok).toBe(75);
11
+ });
12
+
13
+ test("matches sonnet by substring in full model ID", () => {
14
+ const result = getPricingForModel("claude-sonnet-4-20250514");
15
+ expect(result).not.toBeNull();
16
+ expect(result?.inputPerMTok).toBe(3);
17
+ expect(result?.outputPerMTok).toBe(15);
18
+ });
19
+
20
+ test("matches haiku by substring in full model ID", () => {
21
+ const result = getPricingForModel("claude-haiku-3-5-20241022");
22
+ expect(result).not.toBeNull();
23
+ expect(result?.inputPerMTok).toBe(0.8);
24
+ expect(result?.outputPerMTok).toBe(4);
25
+ });
26
+ });
27
+
28
+ describe("OpenAI tiers", () => {
29
+ test("matches gpt-4o-mini", () => {
30
+ const result = getPricingForModel("gpt-4o-mini");
31
+ expect(result).not.toBeNull();
32
+ expect(result?.inputPerMTok).toBe(0.15);
33
+ });
34
+
35
+ test("matches gpt-4o", () => {
36
+ const result = getPricingForModel("gpt-4o");
37
+ expect(result).not.toBeNull();
38
+ expect(result?.inputPerMTok).toBe(2.5);
39
+ });
40
+
41
+ test("matches gpt-5", () => {
42
+ const result = getPricingForModel("gpt-5");
43
+ expect(result).not.toBeNull();
44
+ expect(result?.inputPerMTok).toBe(10);
45
+ });
46
+
47
+ test("matches o3", () => {
48
+ const result = getPricingForModel("o3");
49
+ expect(result).not.toBeNull();
50
+ expect(result?.inputPerMTok).toBe(10);
51
+ expect(result?.outputPerMTok).toBe(40);
52
+ });
53
+
54
+ test("matches o1", () => {
55
+ const result = getPricingForModel("o1");
56
+ expect(result).not.toBeNull();
57
+ expect(result?.inputPerMTok).toBe(15);
58
+ expect(result?.outputPerMTok).toBe(60);
59
+ });
60
+ });
61
+
62
+ describe("Priority ordering", () => {
63
+ test("gpt-4o-mini matches before gpt-4o (substring overlap)", () => {
64
+ const mini = getPricingForModel("gpt-4o-mini");
65
+ const full = getPricingForModel("gpt-4o");
66
+ expect(mini).not.toBeNull();
67
+ expect(full).not.toBeNull();
68
+ if (mini === null || full === null) return;
69
+ // gpt-4o-mini is cheaper
70
+ expect(mini.inputPerMTok).toBeLessThan(full.inputPerMTok);
71
+ // A model string "gpt-4o-mini" resolves to mini pricing, not gpt-4o
72
+ expect(mini.inputPerMTok).toBe(0.15);
73
+ });
74
+
75
+ test("o3 matches before o1 (o1 string contains o1, o3 does not contain o1)", () => {
76
+ const o3 = getPricingForModel("o3");
77
+ const o1 = getPricingForModel("o1");
78
+ expect(o3).not.toBeNull();
79
+ expect(o1).not.toBeNull();
80
+ if (o3 === null || o1 === null) return;
81
+ expect(o3.outputPerMTok).toBe(40);
82
+ expect(o1.outputPerMTok).toBe(60);
83
+ });
84
+ });
85
+
86
+ describe("Gemini tiers", () => {
87
+ test("matches gemini-flash by 'flash' substring", () => {
88
+ const result = getPricingForModel("gemini-flash-2.0");
89
+ expect(result).not.toBeNull();
90
+ expect(result?.inputPerMTok).toBe(0.1);
91
+ expect(result?.outputPerMTok).toBe(0.4);
92
+ });
93
+
94
+ test("matches gemini-pro by 'gemini' + 'pro' substrings", () => {
95
+ const result = getPricingForModel("gemini-2.0-pro-exp");
96
+ expect(result).not.toBeNull();
97
+ expect(result?.inputPerMTok).toBe(1.25);
98
+ expect(result?.outputPerMTok).toBe(5);
99
+ });
100
+ });
101
+
102
+ describe("Case insensitivity", () => {
103
+ test("Claude-OPUS-4 resolves correctly", () => {
104
+ const result = getPricingForModel("Claude-OPUS-4");
105
+ expect(result).not.toBeNull();
106
+ expect(result?.inputPerMTok).toBe(15);
107
+ });
108
+
109
+ test("SONNET resolves correctly", () => {
110
+ const result = getPricingForModel("SONNET");
111
+ expect(result).not.toBeNull();
112
+ expect(result?.inputPerMTok).toBe(3);
113
+ });
114
+
115
+ test("Haiku resolves correctly", () => {
116
+ const result = getPricingForModel("Haiku");
117
+ expect(result).not.toBeNull();
118
+ expect(result?.inputPerMTok).toBe(0.8);
119
+ });
120
+ });
121
+
122
+ describe("Unknown models", () => {
123
+ test("returns null for llama-3-70b", () => {
124
+ expect(getPricingForModel("llama-3-70b")).toBeNull();
125
+ });
126
+
127
+ test("returns null for empty string", () => {
128
+ expect(getPricingForModel("")).toBeNull();
129
+ });
130
+
131
+ test("returns null for random gibberish", () => {
132
+ expect(getPricingForModel("xyzzy-foo-bar-9000")).toBeNull();
133
+ });
134
+ });
135
+ });
136
+
137
+ describe("estimateCost()", () => {
138
+ test("Typical Claude Opus usage: 1M input, 100K output, 500K cacheRead, 200K cacheCreation → $24.00", () => {
139
+ const cost = estimateCost({
140
+ inputTokens: 1_000_000,
141
+ outputTokens: 100_000,
142
+ cacheReadTokens: 500_000,
143
+ cacheCreationTokens: 200_000,
144
+ modelUsed: "claude-opus-4-20250514",
145
+ });
146
+ // inputCost = 1 * 15 = 15.00
147
+ // outputCost = 0.1 * 75 = 7.50
148
+ // cacheReadCost = 0.5 * 1.5 = 0.75
149
+ // cacheCreationCost = 0.2 * 3.75 = 0.75
150
+ // total = 24.00
151
+ expect(cost).toBe(24.0);
152
+ });
153
+
154
+ test("Typical Claude Sonnet usage: 500K input, 50K output, 100K cacheRead, 50K cacheCreation", () => {
155
+ const cost = estimateCost({
156
+ inputTokens: 500_000,
157
+ outputTokens: 50_000,
158
+ cacheReadTokens: 100_000,
159
+ cacheCreationTokens: 50_000,
160
+ modelUsed: "claude-sonnet-4-20250514",
161
+ });
162
+ // inputCost = 0.5 * 3 = 1.50
163
+ // outputCost = 0.05 * 15 = 0.75
164
+ // cacheReadCost = 0.1 * 0.3 = 0.03
165
+ // cacheCreationCost = 0.05 * 0.75 = 0.0375
166
+ // total = 2.3175
167
+ expect(cost).toBeCloseTo(2.3175, 4);
168
+ });
169
+
170
+ test("Zero tokens returns 0 (not null)", () => {
171
+ const cost = estimateCost({
172
+ inputTokens: 0,
173
+ outputTokens: 0,
174
+ cacheReadTokens: 0,
175
+ cacheCreationTokens: 0,
176
+ modelUsed: "claude-opus-4",
177
+ });
178
+ expect(cost).toBe(0);
179
+ });
180
+
181
+ test("Null modelUsed returns null", () => {
182
+ const cost = estimateCost({
183
+ inputTokens: 1000,
184
+ outputTokens: 500,
185
+ cacheReadTokens: 0,
186
+ cacheCreationTokens: 0,
187
+ modelUsed: null,
188
+ });
189
+ expect(cost).toBeNull();
190
+ });
191
+
192
+ test("Unknown model returns null", () => {
193
+ const cost = estimateCost({
194
+ inputTokens: 1000,
195
+ outputTokens: 500,
196
+ cacheReadTokens: 0,
197
+ cacheCreationTokens: 0,
198
+ modelUsed: "llama-3-70b",
199
+ });
200
+ expect(cost).toBeNull();
201
+ });
202
+
203
+ test("Input-only usage: only inputTokens > 0, rest zero", () => {
204
+ const cost = estimateCost({
205
+ inputTokens: 1_000_000,
206
+ outputTokens: 0,
207
+ cacheReadTokens: 0,
208
+ cacheCreationTokens: 0,
209
+ modelUsed: "claude-sonnet-4",
210
+ });
211
+ // inputCost = 1 * 3 = 3.00
212
+ expect(cost).toBe(3.0);
213
+ });
214
+
215
+ test("Output-only usage: only outputTokens > 0, rest zero", () => {
216
+ const cost = estimateCost({
217
+ inputTokens: 0,
218
+ outputTokens: 1_000_000,
219
+ cacheReadTokens: 0,
220
+ cacheCreationTokens: 0,
221
+ modelUsed: "claude-sonnet-4",
222
+ });
223
+ // outputCost = 1 * 15 = 15.00
224
+ expect(cost).toBe(15.0);
225
+ });
226
+
227
+ test("Cache-heavy usage: large cacheRead + cacheCreation, verify math", () => {
228
+ const cost = estimateCost({
229
+ inputTokens: 0,
230
+ outputTokens: 0,
231
+ cacheReadTokens: 10_000_000,
232
+ cacheCreationTokens: 5_000_000,
233
+ modelUsed: "claude-opus-4",
234
+ });
235
+ // cacheReadCost = 10 * 1.5 = 15.00
236
+ // cacheCreationCost = 5 * 3.75 = 18.75
237
+ // total = 33.75
238
+ expect(cost).toBeCloseTo(33.75, 5);
239
+ });
240
+ });
241
+
242
+ describe("Cache pricing ratios", () => {
243
+ test("Claude cache read is 10% of input price (verified on opus)", () => {
244
+ const pricing = getPricingForModel("claude-opus-4");
245
+ expect(pricing).not.toBeNull();
246
+ if (pricing === null) return;
247
+ const ratio = pricing.cacheReadPerMTok / pricing.inputPerMTok;
248
+ expect(ratio).toBeCloseTo(0.1, 10);
249
+ });
250
+
251
+ test("Claude cache creation is 25% of input price (verified on sonnet)", () => {
252
+ const pricing = getPricingForModel("claude-sonnet-4");
253
+ expect(pricing).not.toBeNull();
254
+ if (pricing === null) return;
255
+ const ratio = pricing.cacheCreationPerMTok / pricing.inputPerMTok;
256
+ expect(ratio).toBeCloseTo(0.25, 10);
257
+ });
258
+ });
@@ -535,6 +535,7 @@ describe("token snapshots", () => {
535
535
  cacheCreationTokens: 100,
536
536
  estimatedCostUsd: 0.15,
537
537
  modelUsed: "claude-sonnet-4-5",
538
+ runId: null,
538
539
  createdAt: new Date().toISOString(),
539
540
  };
540
541
 
@@ -558,6 +559,7 @@ describe("token snapshots", () => {
558
559
  cacheCreationTokens: 0,
559
560
  estimatedCostUsd: 0.01,
560
561
  modelUsed: "claude-sonnet-4-5",
562
+ runId: null,
561
563
  createdAt: new Date(now - 60_000).toISOString(), // 1 min ago
562
564
  });
563
565
 
@@ -569,6 +571,7 @@ describe("token snapshots", () => {
569
571
  cacheCreationTokens: 0,
570
572
  estimatedCostUsd: 0.02,
571
573
  modelUsed: "claude-sonnet-4-5",
574
+ runId: null,
572
575
  createdAt: new Date(now).toISOString(), // now (most recent)
573
576
  });
574
577
 
@@ -580,6 +583,7 @@ describe("token snapshots", () => {
580
583
  cacheCreationTokens: 0,
581
584
  estimatedCostUsd: 0.03,
582
585
  modelUsed: "claude-sonnet-4-5",
586
+ runId: null,
583
587
  createdAt: new Date(now - 30_000).toISOString(), // 30s ago
584
588
  });
585
589
 
@@ -606,6 +610,7 @@ describe("token snapshots", () => {
606
610
  cacheCreationTokens: 0,
607
611
  estimatedCostUsd: null,
608
612
  modelUsed: null,
613
+ runId: null,
609
614
  createdAt: time1,
610
615
  });
611
616
 
@@ -617,6 +622,7 @@ describe("token snapshots", () => {
617
622
  cacheCreationTokens: 0,
618
623
  estimatedCostUsd: null,
619
624
  modelUsed: null,
625
+ runId: null,
620
626
  createdAt: time2,
621
627
  });
622
628
 
@@ -638,6 +644,7 @@ describe("token snapshots", () => {
638
644
  cacheCreationTokens: 0,
639
645
  estimatedCostUsd: null,
640
646
  modelUsed: null,
647
+ runId: null,
641
648
  createdAt: new Date().toISOString(),
642
649
  });
643
650
 
@@ -649,6 +656,7 @@ describe("token snapshots", () => {
649
656
  cacheCreationTokens: 0,
650
657
  estimatedCostUsd: null,
651
658
  modelUsed: null,
659
+ runId: null,
652
660
  createdAt: new Date().toISOString(),
653
661
  });
654
662
 
@@ -666,6 +674,7 @@ describe("token snapshots", () => {
666
674
  cacheCreationTokens: 0,
667
675
  estimatedCostUsd: null,
668
676
  modelUsed: null,
677
+ runId: null,
669
678
  createdAt: new Date().toISOString(),
670
679
  });
671
680
 
@@ -677,6 +686,7 @@ describe("token snapshots", () => {
677
686
  cacheCreationTokens: 0,
678
687
  estimatedCostUsd: null,
679
688
  modelUsed: null,
689
+ runId: null,
680
690
  createdAt: new Date().toISOString(),
681
691
  });
682
692
 
@@ -698,6 +708,7 @@ describe("token snapshots", () => {
698
708
  cacheCreationTokens: 0,
699
709
  estimatedCostUsd: null,
700
710
  modelUsed: null,
711
+ runId: null,
701
712
  createdAt: new Date(now - 120_000).toISOString(), // 2 min ago
702
713
  });
703
714
 
@@ -709,6 +720,7 @@ describe("token snapshots", () => {
709
720
  cacheCreationTokens: 0,
710
721
  estimatedCostUsd: null,
711
722
  modelUsed: null,
723
+ runId: null,
712
724
  createdAt: new Date(now - 10_000).toISOString(), // 10s ago (recent)
713
725
  });
714
726
 
@@ -729,6 +741,7 @@ describe("token snapshots", () => {
729
741
  cacheCreationTokens: 0,
730
742
  estimatedCostUsd: null,
731
743
  modelUsed: null,
744
+ runId: null,
732
745
  createdAt: new Date().toISOString(),
733
746
  });
734
747
 
@@ -740,6 +753,220 @@ describe("token snapshots", () => {
740
753
  expect(snapshots).toHaveLength(1);
741
754
  expect(snapshots[0]?.agentName).toBe("test-agent");
742
755
  });
756
+
757
+ test("runId roundtrips correctly through snapshot record and retrieval", () => {
758
+ const now = Date.now();
759
+ store.recordSnapshot({
760
+ agentName: "agent-a",
761
+ inputTokens: 100,
762
+ outputTokens: 50,
763
+ cacheReadTokens: 0,
764
+ cacheCreationTokens: 0,
765
+ estimatedCostUsd: null,
766
+ modelUsed: null,
767
+ runId: "run-abc",
768
+ createdAt: new Date(now).toISOString(),
769
+ });
770
+
771
+ store.recordSnapshot({
772
+ agentName: "agent-b",
773
+ inputTokens: 200,
774
+ outputTokens: 100,
775
+ cacheReadTokens: 0,
776
+ cacheCreationTokens: 0,
777
+ estimatedCostUsd: null,
778
+ modelUsed: null,
779
+ runId: null,
780
+ createdAt: new Date(now).toISOString(),
781
+ });
782
+
783
+ const snapshots = store.getLatestSnapshots();
784
+ const agentA = snapshots.find((s) => s.agentName === "agent-a");
785
+ const agentB = snapshots.find((s) => s.agentName === "agent-b");
786
+
787
+ expect(agentA?.runId).toBe("run-abc");
788
+ expect(agentB?.runId).toBeNull();
789
+ });
790
+
791
+ test("getLatestSnapshots(runId) returns only snapshots matching that run", () => {
792
+ const now = Date.now();
793
+ store.recordSnapshot({
794
+ agentName: "agent-a",
795
+ inputTokens: 100,
796
+ outputTokens: 50,
797
+ cacheReadTokens: 0,
798
+ cacheCreationTokens: 0,
799
+ estimatedCostUsd: null,
800
+ modelUsed: null,
801
+ runId: "run-001",
802
+ createdAt: new Date(now).toISOString(),
803
+ });
804
+
805
+ store.recordSnapshot({
806
+ agentName: "agent-b",
807
+ inputTokens: 200,
808
+ outputTokens: 100,
809
+ cacheReadTokens: 0,
810
+ cacheCreationTokens: 0,
811
+ estimatedCostUsd: null,
812
+ modelUsed: null,
813
+ runId: "run-001",
814
+ createdAt: new Date(now).toISOString(),
815
+ });
816
+
817
+ store.recordSnapshot({
818
+ agentName: "agent-c",
819
+ inputTokens: 300,
820
+ outputTokens: 150,
821
+ cacheReadTokens: 0,
822
+ cacheCreationTokens: 0,
823
+ estimatedCostUsd: null,
824
+ modelUsed: null,
825
+ runId: "run-002",
826
+ createdAt: new Date(now).toISOString(),
827
+ });
828
+
829
+ const run001Snapshots = store.getLatestSnapshots("run-001");
830
+ expect(run001Snapshots).toHaveLength(2);
831
+ expect(run001Snapshots.every((s) => s.runId === "run-001")).toBe(true);
832
+
833
+ const run002Snapshots = store.getLatestSnapshots("run-002");
834
+ expect(run002Snapshots).toHaveLength(1);
835
+ expect(run002Snapshots[0]?.agentName).toBe("agent-c");
836
+ });
837
+
838
+ test("getLatestSnapshots(runId) returns empty array for unknown run", () => {
839
+ store.recordSnapshot({
840
+ agentName: "agent-a",
841
+ inputTokens: 100,
842
+ outputTokens: 50,
843
+ cacheReadTokens: 0,
844
+ cacheCreationTokens: 0,
845
+ estimatedCostUsd: null,
846
+ modelUsed: null,
847
+ runId: "run-001",
848
+ createdAt: new Date().toISOString(),
849
+ });
850
+
851
+ const snapshots = store.getLatestSnapshots("run-nonexistent");
852
+ expect(snapshots).toEqual([]);
853
+ });
854
+
855
+ test("getLatestSnapshots(runId) excludes snapshots with null run_id", () => {
856
+ const now = Date.now();
857
+ store.recordSnapshot({
858
+ agentName: "agent-a",
859
+ inputTokens: 100,
860
+ outputTokens: 50,
861
+ cacheReadTokens: 0,
862
+ cacheCreationTokens: 0,
863
+ estimatedCostUsd: null,
864
+ modelUsed: null,
865
+ runId: null, // no run
866
+ createdAt: new Date(now).toISOString(),
867
+ });
868
+
869
+ store.recordSnapshot({
870
+ agentName: "agent-b",
871
+ inputTokens: 200,
872
+ outputTokens: 100,
873
+ cacheReadTokens: 0,
874
+ cacheCreationTokens: 0,
875
+ estimatedCostUsd: null,
876
+ modelUsed: null,
877
+ runId: "run-001",
878
+ createdAt: new Date(now).toISOString(),
879
+ });
880
+
881
+ const run001Snapshots = store.getLatestSnapshots("run-001");
882
+ expect(run001Snapshots).toHaveLength(1);
883
+ expect(run001Snapshots[0]?.agentName).toBe("agent-b");
884
+ });
885
+
886
+ test("getLatestSnapshots(runId) returns latest per agent within the run", () => {
887
+ const now = Date.now();
888
+ // Two snapshots for agent-a in run-001: should only get the latest
889
+ store.recordSnapshot({
890
+ agentName: "agent-a",
891
+ inputTokens: 100,
892
+ outputTokens: 50,
893
+ cacheReadTokens: 0,
894
+ cacheCreationTokens: 0,
895
+ estimatedCostUsd: null,
896
+ modelUsed: null,
897
+ runId: "run-001",
898
+ createdAt: new Date(now - 30_000).toISOString(), // older
899
+ });
900
+
901
+ store.recordSnapshot({
902
+ agentName: "agent-a",
903
+ inputTokens: 500,
904
+ outputTokens: 250,
905
+ cacheReadTokens: 0,
906
+ cacheCreationTokens: 0,
907
+ estimatedCostUsd: null,
908
+ modelUsed: null,
909
+ runId: "run-001",
910
+ createdAt: new Date(now).toISOString(), // latest
911
+ });
912
+
913
+ const snapshots = store.getLatestSnapshots("run-001");
914
+ expect(snapshots).toHaveLength(1);
915
+ expect(snapshots[0]?.inputTokens).toBe(500); // most recent
916
+ });
917
+
918
+ test("migration adds run_id to existing token_snapshots table", () => {
919
+ store.close();
920
+
921
+ // Create a DB with old token_snapshots schema (no run_id column)
922
+ const { Database } = require("bun:sqlite");
923
+ const oldDb = new Database(dbPath);
924
+ oldDb.exec("DROP TABLE IF EXISTS token_snapshots");
925
+ oldDb.exec(`
926
+ CREATE TABLE token_snapshots (
927
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
928
+ agent_name TEXT NOT NULL,
929
+ input_tokens INTEGER NOT NULL DEFAULT 0,
930
+ output_tokens INTEGER NOT NULL DEFAULT 0,
931
+ cache_read_tokens INTEGER NOT NULL DEFAULT 0,
932
+ cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
933
+ estimated_cost_usd REAL,
934
+ model_used TEXT,
935
+ created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%f','now'))
936
+ )
937
+ `);
938
+ oldDb.exec(`
939
+ INSERT INTO token_snapshots (agent_name, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, created_at)
940
+ VALUES ('old-agent', 100, 50, 0, 0, '2026-01-01T00:00:00.000Z')
941
+ `);
942
+ oldDb.close();
943
+
944
+ // Re-open with createMetricsStore which should migrate
945
+ store = createMetricsStore(dbPath);
946
+
947
+ // Old row should be readable with null run_id
948
+ const snapshots = store.getLatestSnapshots();
949
+ expect(snapshots).toHaveLength(1);
950
+ expect(snapshots[0]?.agentName).toBe("old-agent");
951
+ expect(snapshots[0]?.runId).toBeNull();
952
+
953
+ // New rows with run_id should work
954
+ store.recordSnapshot({
955
+ agentName: "new-agent",
956
+ inputTokens: 200,
957
+ outputTokens: 100,
958
+ cacheReadTokens: 0,
959
+ cacheCreationTokens: 0,
960
+ estimatedCostUsd: null,
961
+ modelUsed: null,
962
+ runId: "run-xyz",
963
+ createdAt: new Date().toISOString(),
964
+ });
965
+
966
+ const newSnapshots = store.getLatestSnapshots("run-xyz");
967
+ expect(newSnapshots).toHaveLength(1);
968
+ expect(newSnapshots[0]?.runId).toBe("run-xyz");
969
+ });
743
970
  });
744
971
 
745
972
  // === close ===