nodebench-mcp 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/dist/dashboard/operatingDashboardHtml.js +2 -1
  2. package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
  3. package/dist/dashboard/operatingServer.js +3 -2
  4. package/dist/dashboard/operatingServer.js.map +1 -1
  5. package/dist/db.js +51 -3
  6. package/dist/db.js.map +1 -1
  7. package/dist/index.js +13 -16
  8. package/dist/index.js.map +1 -1
  9. package/dist/packageInfo.d.ts +3 -0
  10. package/dist/packageInfo.js +32 -0
  11. package/dist/packageInfo.js.map +1 -0
  12. package/dist/sandboxApi.js +2 -1
  13. package/dist/sandboxApi.js.map +1 -1
  14. package/dist/tools/boilerplateTools.js +10 -9
  15. package/dist/tools/boilerplateTools.js.map +1 -1
  16. package/dist/tools/documentationTools.js +2 -1
  17. package/dist/tools/documentationTools.js.map +1 -1
  18. package/dist/tools/progressiveDiscoveryTools.js +2 -1
  19. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  20. package/dist/tools/toolRegistry.js +11 -0
  21. package/dist/tools/toolRegistry.js.map +1 -1
  22. package/dist/toolsetRegistry.js +74 -1
  23. package/dist/toolsetRegistry.js.map +1 -1
  24. package/package.json +4 -3
  25. package/dist/__tests__/analytics.test.d.ts +0 -11
  26. package/dist/__tests__/analytics.test.js +0 -546
  27. package/dist/__tests__/analytics.test.js.map +0 -1
  28. package/dist/__tests__/architectComplex.test.d.ts +0 -1
  29. package/dist/__tests__/architectComplex.test.js +0 -373
  30. package/dist/__tests__/architectComplex.test.js.map +0 -1
  31. package/dist/__tests__/architectSmoke.test.d.ts +0 -1
  32. package/dist/__tests__/architectSmoke.test.js +0 -92
  33. package/dist/__tests__/architectSmoke.test.js.map +0 -1
  34. package/dist/__tests__/audit-registry.d.ts +0 -1
  35. package/dist/__tests__/audit-registry.js +0 -60
  36. package/dist/__tests__/audit-registry.js.map +0 -1
  37. package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
  38. package/dist/__tests__/batchAutopilot.test.js +0 -218
  39. package/dist/__tests__/batchAutopilot.test.js.map +0 -1
  40. package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
  41. package/dist/__tests__/cliSubcommands.test.js +0 -138
  42. package/dist/__tests__/cliSubcommands.test.js.map +0 -1
  43. package/dist/__tests__/comparativeBench.test.d.ts +0 -1
  44. package/dist/__tests__/comparativeBench.test.js +0 -722
  45. package/dist/__tests__/comparativeBench.test.js.map +0 -1
  46. package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
  47. package/dist/__tests__/critterCalibrationEval.js +0 -370
  48. package/dist/__tests__/critterCalibrationEval.js.map +0 -1
  49. package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
  50. package/dist/__tests__/dynamicLoading.test.js +0 -280
  51. package/dist/__tests__/dynamicLoading.test.js.map +0 -1
  52. package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
  53. package/dist/__tests__/embeddingProvider.test.js +0 -86
  54. package/dist/__tests__/embeddingProvider.test.js.map +0 -1
  55. package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
  56. package/dist/__tests__/evalDatasetBench.test.js +0 -738
  57. package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
  58. package/dist/__tests__/evalHarness.test.d.ts +0 -1
  59. package/dist/__tests__/evalHarness.test.js +0 -1107
  60. package/dist/__tests__/evalHarness.test.js.map +0 -1
  61. package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
  62. package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
  63. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
  64. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
  65. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
  66. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
  67. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
  68. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
  69. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
  70. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
  71. package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
  72. package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
  73. package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
  74. package/dist/__tests__/forecastingDogfood.test.js +0 -284
  75. package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
  76. package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
  77. package/dist/__tests__/forecastingScoring.test.js +0 -202
  78. package/dist/__tests__/forecastingScoring.test.js.map +0 -1
  79. package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
  80. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
  81. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
  82. package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
  83. package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
  84. package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
  85. package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
  86. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
  87. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
  88. package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
  89. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
  90. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
  91. package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
  92. package/dist/__tests__/helpers/answerMatch.js +0 -267
  93. package/dist/__tests__/helpers/answerMatch.js.map +0 -1
  94. package/dist/__tests__/helpers/textLlm.d.ts +0 -25
  95. package/dist/__tests__/helpers/textLlm.js +0 -214
  96. package/dist/__tests__/helpers/textLlm.js.map +0 -1
  97. package/dist/__tests__/localDashboard.test.d.ts +0 -1
  98. package/dist/__tests__/localDashboard.test.js +0 -226
  99. package/dist/__tests__/localDashboard.test.js.map +0 -1
  100. package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
  101. package/dist/__tests__/multiHopDogfood.test.js +0 -303
  102. package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
  103. package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
  104. package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
  105. package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
  106. package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
  107. package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
  108. package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
  109. package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
  110. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
  111. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
  112. package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
  113. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
  114. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
  115. package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
  116. package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
  117. package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
  118. package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
  119. package/dist/__tests__/openclawDogfood.test.js +0 -535
  120. package/dist/__tests__/openclawDogfood.test.js.map +0 -1
  121. package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
  122. package/dist/__tests__/openclawMessaging.test.js +0 -232
  123. package/dist/__tests__/openclawMessaging.test.js.map +0 -1
  124. package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
  125. package/dist/__tests__/presetRealWorldBench.test.js +0 -859
  126. package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
  127. package/dist/__tests__/tools.test.d.ts +0 -1
  128. package/dist/__tests__/tools.test.js +0 -3201
  129. package/dist/__tests__/tools.test.js.map +0 -1
  130. package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
  131. package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
  132. package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
  133. package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
  134. package/dist/__tests__/traceabilityDogfood.test.js +0 -241
  135. package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
  136. package/dist/__tests__/webmcpTools.test.d.ts +0 -7
  137. package/dist/__tests__/webmcpTools.test.js +0 -195
  138. package/dist/__tests__/webmcpTools.test.js.map +0 -1
  139. package/dist/benchmarks/testProviderBus.d.ts +0 -7
  140. package/dist/benchmarks/testProviderBus.js +0 -272
  141. package/dist/benchmarks/testProviderBus.js.map +0 -1
  142. package/dist/hooks/postCompaction.d.ts +0 -14
  143. package/dist/hooks/postCompaction.js +0 -51
  144. package/dist/hooks/postCompaction.js.map +0 -1
  145. package/dist/security/__tests__/security.test.d.ts +0 -8
  146. package/dist/security/__tests__/security.test.js +0 -295
  147. package/dist/security/__tests__/security.test.js.map +0 -1
  148. package/dist/sync/hyperloopEval.test.d.ts +0 -4
  149. package/dist/sync/hyperloopEval.test.js +0 -60
  150. package/dist/sync/hyperloopEval.test.js.map +0 -1
  151. package/dist/sync/store.test.d.ts +0 -4
  152. package/dist/sync/store.test.js +0 -43
  153. package/dist/sync/store.test.js.map +0 -1
  154. package/dist/tools/documentTools.d.ts +0 -5
  155. package/dist/tools/documentTools.js +0 -524
  156. package/dist/tools/documentTools.js.map +0 -1
  157. package/dist/tools/financialTools.d.ts +0 -10
  158. package/dist/tools/financialTools.js +0 -403
  159. package/dist/tools/financialTools.js.map +0 -1
  160. package/dist/tools/memoryTools.d.ts +0 -5
  161. package/dist/tools/memoryTools.js +0 -137
  162. package/dist/tools/memoryTools.js.map +0 -1
  163. package/dist/tools/planningTools.d.ts +0 -5
  164. package/dist/tools/planningTools.js +0 -147
  165. package/dist/tools/planningTools.js.map +0 -1
  166. package/dist/tools/searchTools.d.ts +0 -5
  167. package/dist/tools/searchTools.js +0 -145
  168. package/dist/tools/searchTools.js.map +0 -1
@@ -1,284 +0,0 @@
1
- /**
2
- * Forecasting OS — Integration Tests (Dogfood)
3
- *
4
- * Full lifecycle tests: create → evidence → update → resolve → track record → calibration.
5
- * Uses MCP tools directly (SQLite-backed, no Convex dependency).
6
- *
7
- * Run: npx vitest run src/__tests__/forecastingDogfood.test.ts
8
- */
9
- import { describe, it, expect, beforeAll } from "vitest";
10
- import { forecastingTools } from "../tools/forecastingTools.js";
11
- // ─── Helpers ────────────────────────────────────────────────────────────────
12
- const toolMap = new Map(forecastingTools.map((t) => [t.name, t]));
13
- async function callTool(name, args) {
14
- const tool = toolMap.get(name);
15
- if (!tool)
16
- throw new Error(`Tool ${name} not found`);
17
- const result = await tool.handler(args);
18
- const text = result.find((r) => r.type === "text")?.text;
19
- if (!text)
20
- throw new Error(`Tool ${name} returned no text`);
21
- return JSON.parse(text);
22
- }
23
- // ─── Tool Structure ─────────────────────────────────────────────────────────
24
- describe("Forecasting tools: structure", () => {
25
- it("should have 9 tools", () => {
26
- expect(forecastingTools.length).toBe(9);
27
- });
28
- it("every tool has name, description, inputSchema, handler", () => {
29
- for (const tool of forecastingTools) {
30
- expect(tool.name).toBeTruthy();
31
- expect(tool.description).toBeTruthy();
32
- expect(tool.inputSchema).toBeDefined();
33
- expect(typeof tool.handler).toBe("function");
34
- }
35
- });
36
- it("tool names match expected list", () => {
37
- const names = forecastingTools.map((t) => t.name).sort();
38
- expect(names).toEqual([
39
- "add_forecast_evidence",
40
- "compute_calibration",
41
- "create_forecast",
42
- "get_active_forecasts",
43
- "get_forecast_chain",
44
- "get_forecast_evidence",
45
- "get_forecast_track_record",
46
- "resolve_forecast",
47
- "update_forecast_probability",
48
- ]);
49
- });
50
- });
51
- // ─── Full Lifecycle ─────────────────────────────────────────────────────────
52
- describe("Forecasting lifecycle", () => {
53
- let forecastId;
54
- it("create_forecast — creates a binary forecast", async () => {
55
- const result = await callTool("create_forecast", {
56
- question: "Will GPT-5 be released by 2026-12-31?",
57
- forecastType: "binary",
58
- resolutionDate: "2026-12-31",
59
- resolutionCriteria: "OpenAI announces GPT-5 on official blog or press release",
60
- probability: 0.5,
61
- baseRate: 0.6,
62
- refreshFrequency: "weekly",
63
- topDrivers: ["Historical 18-month release cadence", "Sam Altman interview hints"],
64
- topCounterarguments: ["No official roadmap published"],
65
- tags: ["ai_tech"],
66
- });
67
- expect(result.forecastId).toBeTruthy();
68
- expect(result.status).toBe("active");
69
- expect(result.probability).toBe(0.5);
70
- forecastId = result.forecastId;
71
- });
72
- it("add_forecast_evidence — adds supporting evidence", async () => {
73
- const result = await callTool("add_forecast_evidence", {
74
- forecastId,
75
- sourceUrl: "https://example.com/sam-altman-interview",
76
- sourceTitle: "Sam Altman Interview on AI Progress",
77
- sourceType: "news",
78
- excerpt: "Altman hints at a major model release in the second half of 2026",
79
- signal: "supporting",
80
- impactOnProbability: 0.1,
81
- });
82
- expect(result.evidenceId).toBeTruthy();
83
- expect(result.signal).toBe("supporting");
84
- });
85
- it("add_forecast_evidence — adds disconfirming evidence", async () => {
86
- const result = await callTool("add_forecast_evidence", {
87
- forecastId,
88
- sourceUrl: "https://example.com/compute-shortage",
89
- sourceTitle: "GPU Shortage Report Q1 2026",
90
- sourceType: "filing",
91
- excerpt: "Major cloud providers report 40% compute capacity shortfall for large model training",
92
- signal: "disconfirming",
93
- });
94
- expect(result.evidenceId).toBeTruthy();
95
- expect(result.signal).toBe("disconfirming");
96
- });
97
- it("add_forecast_evidence — deduplicates by URL", async () => {
98
- const result = await callTool("add_forecast_evidence", {
99
- forecastId,
100
- sourceUrl: "https://example.com/sam-altman-interview",
101
- sourceTitle: "Duplicate",
102
- sourceType: "news",
103
- excerpt: "Duplicate entry",
104
- signal: "supporting",
105
- });
106
- expect(result.error).toContain("already exists");
107
- });
108
- it("get_forecast_evidence — returns evidence", async () => {
109
- const result = await callTool("get_forecast_evidence", {
110
- forecastId,
111
- });
112
- expect(result.count).toBe(2);
113
- expect(result.evidence.length).toBe(2);
114
- });
115
- it("get_forecast_evidence — filters by signal", async () => {
116
- const result = await callTool("get_forecast_evidence", {
117
- forecastId,
118
- signal: "supporting",
119
- });
120
- expect(result.count).toBe(1);
121
- });
122
- it("update_forecast_probability — updates with reasoning", async () => {
123
- const result = await callTool("update_forecast_probability", {
124
- forecastId,
125
- probability: 0.65,
126
- topDrivers: [
127
- "Sam Altman interview hint",
128
- "Historical 18-month cadence",
129
- "Competitor pressure from Google Gemini",
130
- ],
131
- topCounterarguments: [
132
- "GPU shortage may delay training",
133
- "No official roadmap",
134
- ],
135
- reasoning: "Soft leadership signal + historical pattern outweigh compute concerns",
136
- });
137
- expect(result.previousProbability).toBe(0.5);
138
- expect(result.newProbability).toBe(0.65);
139
- expect(result.diff).toBe("50% → 65% (+15pp)");
140
- });
141
- it("update_forecast_probability — validates range", async () => {
142
- const result = await callTool("update_forecast_probability", {
143
- forecastId,
144
- probability: 1.5,
145
- reasoning: "Invalid",
146
- });
147
- expect(result.error).toContain("between 0 and 1");
148
- });
149
- it("get_forecast_chain — returns full audit trail", async () => {
150
- const result = await callTool("get_forecast_chain", {
151
- forecastId,
152
- });
153
- expect(result.forecast).toBeTruthy();
154
- expect(result.evidence.length).toBe(2);
155
- expect(result.updateHistory.length).toBe(1);
156
- expect(result.resolution).toBeNull();
157
- expect(result.summary.evidenceCount).toBe(2);
158
- expect(result.summary.updateCount).toBe(1);
159
- expect(result.summary.isResolved).toBe(false);
160
- });
161
- it("get_active_forecasts — lists active forecasts", async () => {
162
- const result = await callTool("get_active_forecasts", {});
163
- expect(result.count).toBeGreaterThanOrEqual(1);
164
- const forecasts = result.forecasts;
165
- const found = forecasts.find((f) => f.id === forecastId);
166
- expect(found).toBeTruthy();
167
- expect(found?.status).toBe("active");
168
- });
169
- it("resolve_forecast — resolves with Brier score", async () => {
170
- const result = await callTool("resolve_forecast", {
171
- forecastId,
172
- outcome: "yes",
173
- resolutionNotes: "GPT-5 announced on 2026-11-15 via OpenAI blog post",
174
- resolutionSourceUrl: "https://openai.com/gpt-5-announcement",
175
- });
176
- expect(result.status).toBe("resolved");
177
- expect(result.outcome).toBe("yes");
178
- // Brier: (0.65 - 1)^2 = 0.1225
179
- expect(result.brierScore).toBeCloseTo(0.1225, 3);
180
- // Log: -log(0.65) ≈ 0.431
181
- expect(result.logScore).toBeCloseTo(0.431, 2);
182
- });
183
- it("resolve_forecast — cannot resolve twice", async () => {
184
- const result = await callTool("resolve_forecast", {
185
- forecastId,
186
- outcome: "no",
187
- resolutionNotes: "Already resolved",
188
- });
189
- expect(result.error).toContain("already resolved");
190
- });
191
- it("get_forecast_track_record — shows Brier aggregate", async () => {
192
- const result = await callTool("get_forecast_track_record", {});
193
- expect(result.scoredCount).toBeGreaterThanOrEqual(1);
194
- // SQLite persists between test runs, so overallBrier is average across ALL
195
- // resolved forecasts (not just this run). Use a reasonable bound instead.
196
- expect(result.overallBrier).toBeGreaterThan(0);
197
- expect(result.overallBrier).toBeLessThan(0.5);
198
- });
199
- });
200
- // ─── Multi-Forecast Calibration ─────────────────────────────────────────────
201
- describe("Forecasting calibration", () => {
202
- beforeAll(async () => {
203
- // Create and resolve 5 forecasts with known outcomes for calibration
204
- const scenarios = [
205
- { probability: 0.9, outcome: "yes" }, // Brier: 0.01
206
- { probability: 0.8, outcome: "yes" }, // Brier: 0.04
207
- { probability: 0.3, outcome: "no" }, // Brier: 0.09
208
- { probability: 0.1, outcome: "no" }, // Brier: 0.01
209
- { probability: 0.6, outcome: "yes" }, // Brier: 0.16
210
- ];
211
- for (let i = 0; i < scenarios.length; i++) {
212
- const create = await callTool("create_forecast", {
213
- question: `Calibration test forecast ${i + 1}?`,
214
- resolutionDate: "2026-01-01",
215
- resolutionCriteria: `Test criteria ${i + 1}`,
216
- probability: scenarios[i].probability,
217
- tags: ["test_calibration"],
218
- });
219
- await callTool("resolve_forecast", {
220
- forecastId: create.forecastId,
221
- outcome: scenarios[i].outcome,
222
- resolutionNotes: `Test resolution ${i + 1}`,
223
- });
224
- }
225
- });
226
- it("compute_calibration — returns 10 bins", async () => {
227
- const result = await callTool("compute_calibration", {});
228
- expect(result.bins).toBeTruthy();
229
- expect(result.bins.length).toBe(10);
230
- expect(result.overallBrier).toBeTruthy();
231
- expect(typeof result.overallBrier).toBe("number");
232
- expect(result.forecastCount).toBeGreaterThanOrEqual(5);
233
- });
234
- it("get_forecast_track_record — aggregate includes all resolved", async () => {
235
- const result = await callTool("get_forecast_track_record", {});
236
- // At least 6 resolved (1 from lifecycle + 5 from calibration)
237
- expect(result.scoredCount).toBeGreaterThanOrEqual(6);
238
- // Average Brier should be reasonable
239
- expect(result.overallBrier).toBeLessThan(0.25);
240
- });
241
- });
242
- // ─── Edge Cases ─────────────────────────────────────────────────────────────
243
- describe("Forecasting edge cases", () => {
244
- it("create_forecast — rejects invalid probability", async () => {
245
- const result = await callTool("create_forecast", {
246
- question: "Invalid prob test",
247
- resolutionDate: "2026-12-31",
248
- resolutionCriteria: "Test",
249
- probability: -0.1,
250
- });
251
- expect(result.error).toContain("between 0 and 1");
252
- });
253
- it("resolve_forecast — ambiguous outcome excluded from scoring", async () => {
254
- const create = await callTool("create_forecast", {
255
- question: "Ambiguous resolution test?",
256
- resolutionDate: "2026-12-31",
257
- resolutionCriteria: "Test",
258
- probability: 0.7,
259
- });
260
- const resolve = await callTool("resolve_forecast", {
261
- forecastId: create.forecastId,
262
- outcome: "ambiguous",
263
- resolutionNotes: "Resolution criteria were unclear",
264
- });
265
- expect(resolve.brierScore).toBeNull();
266
- expect(resolve.logScore).toBeNull();
267
- });
268
- it("get_forecast_chain — returns error for nonexistent forecast", async () => {
269
- const result = await callTool("get_forecast_chain", {
270
- forecastId: "nonexistent_id",
271
- });
272
- expect(result.error).toContain("not found");
273
- });
274
- it("get_active_forecasts — filters by tags", async () => {
275
- const result = await callTool("get_active_forecasts", {
276
- tags: ["test_calibration"],
277
- });
278
- // All calibration forecasts are resolved, so none should be active
279
- const forecasts = result.forecasts;
280
- const calibration = forecasts.filter((f) => f.tags.includes("test_calibration"));
281
- expect(calibration.length).toBe(0);
282
- });
283
- });
284
- //# sourceMappingURL=forecastingDogfood.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"forecastingDogfood.test.js","sourceRoot":"","sources":["../../src/__tests__/forecastingDogfood.test.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAGhE,+EAA+E;AAE/E,MAAM,OAAO,GAAG,IAAI,GAAG,CACrB,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CACzC,CAAC;AAEF,KAAK,UAAU,QAAQ,CACrB,IAAY,EACZ,IAA6B;IAE7B,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,QAAQ,IAAI,YAAY,CAAC,CAAC;IACrD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,IAAI,CAAC;IACzD,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,QAAQ,IAAI,mBAAmB,CAAC,CAAC;IAC5D,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,+EAA+E;AAE/E,QAAQ,CAAC,8BAA8B,EAAE,GAAG,EAAE;IAC5C,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,GAAG,EAAE;QAChE,KAAK,MAAM,IAAI,IAAI,gBAAgB,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;YAC/B,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,UAAU,EAAE,CAAC;YACtC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC;YACvC,MAAM,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,KAAK,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,MAAM,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC;YACpB,uBAAuB;YACvB,qBAAqB;YACrB,iBAAiB;YACjB,sBAAsB;YACtB,oBAAoB;YACpB,uBAAuB;YACvB,2BAA2B;YAC3B,kBAAkB;YAClB,6BAA6B;SAC9B,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,UAAkB,CAAC;IAEvB,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC3D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE;YAC/C,QAAQ,EAAE,uCAAuC;YACjD,YAAY,EAAE,QAAQ;YACtB,cAAc,EAAE,YAAY;YAC5B,kBAAkB,EAAE,0DAA0D;YAC9E,WAAW,EAAE,GAAG;YAChB,QAAQ,EAAE,GAAG;YACb,gBAAgB,EAAE,QAAQ;YAC1B,UAAU,EAAE,CAAC,qCAAqC,EAAE,4BAA4B,CAAC;YACjF,mBAAmB,EAAE,CAAC,+BAA+B,CAAC;YACtD,IAAI,EAAE,CAAC,SAAS,CAAC;SAClB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,UAAU,EAAE,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrC,UAAU,GAAG,MAAM,CAAC,UAAoB,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;YACV,SAAS,EAAE,0CAA0C;YACrD,WAAW,EAAE,qCAAqC;YAClD,UAAU,EAAE,MAAM;YAClB,OAAO,EAAE,kEAAkE;YAC3E,MAAM,EAAE,YAAY;YACpB,mBAAmB,EAAE,GAAG;SACzB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,UAAU,EAAE,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;YACV,SAAS,EAAE,sCAAsC;YACjD,WAAW,EAAE,6BAA6B;YAC1C,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,sFAAsF;YAC/F,MAAM,EAAE,eAAe;SACxB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,UAAU,EAAE,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC3D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;YACV,SAAS,EAAE,0CAA0C;YACrD,WAAW,EAAE,WAAW;YACxB,UAAU,EAAE,MAAM;YAClB,OAAO,EAAE,iBAAiB;YAC1B,MAAM,EAAE,YAAY;SACrB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;SACX,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAE,MAAM,CAAC,QAAsB,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACzD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;YACV,MAAM,EAAE,YAAY;SACrB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,KAAK,IAAI,EAAE;QACpE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,6BAA6B,EAAE;YAC3D,UAAU;YACV,WAAW,EAAE,IAAI;YACjB,UAAU,EAAE;gBACV,2BAA2B;gBAC3B,6BAA6B;gBAC7B,wCAAwC;aACzC;YACD,mBAAmB,EAAE;gBACnB,iCAAiC;gBACjC,qBAAqB;aACtB;YACD,SAAS,EAAE,uEAAuE;SACnF,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,6BAA6B,EAAE;YAC3D,UAAU;YACV,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,SAAS;SACrB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE;YAClD,UAAU;SACX,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,UAAU,EAAE,CAAC;QACrC,MAAM,CAAE,MAAM,CAAC,QAAsB,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACtD,MAAM,CAAE,MAAM,CAAC,aAA2B,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,QAAQ,EAAE,CAAC;QACrC,MAAM,CAAE,MAAM,CAAC,OAAmC,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1E,MAAM,CAAE,MAAM,CAAC,OAAmC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAE,MAAM,CAAC,OAAmC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC7E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC;QAE1D,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,MAAM,CAAC,SAA2C,CAAC;QACrE,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,UAAU,CAAC,CAAC;QACzD,MAAM,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;QAC3B,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,kBAAkB,EAAE;YAChD,UAAU;YACV,OAAO,EAAE,KAAK;YACd,eAAe,EAAE,oDAAoD;YACrE,mBAAmB,EAAE,uCAAuC;SAC7D,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnC,+BAA+B;QAC/B,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACjD,0BAA0B;QAC1B,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,kBAAkB,EAAE;YAChD,UAAU;YACV,OAAO,EAAE,IAAI;YACb,eAAe,EAAE,kBAAkB;SACpC,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC;QAE/D,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACrD,2EAA2E;QAC3E,0EAA0E;QAC1E,MAAM,CAAC,MAAM,CAAC,YAAsB,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,MAAM,CAAC,YAAsB,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,qEAAqE;QACrE,MAAM,SAAS,GAAG;YAChB,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,EAAI,cAAc;YACtD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,EAAI,cAAc;YACtD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,EAAK,cAAc;YACtD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,EAAK,cAAc;YACtD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,EAAI,cAAc;SACvD,CAAC;QAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE;gBAC/C,QAAQ,EAAE,6BAA6B,CAAC,GAAG,CAAC,GAAG;gBAC/C,cAAc,EAAE,YAAY;gBAC5B,kBAAkB,EAAE,iBAAiB,CAAC,GAAG,CAAC,EAAE;gBAC5C,WAAW,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,WAAW;gBACrC,IAAI,EAAE,CAAC,kBAAkB,CAAC;aAC3B,CAAC,CAAC;YAEH,MAAM,QAAQ,CAAC,kBAAkB,EAAE;gBACjC,UAAU,EAAG,MAAkC,CAAC,UAAU;gBAC1D,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO;gBAC7B,eAAe,EAAE,mBAAmB,CAAC,GAAG,CAAC,EAAE;aAC5C,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC;QAEzD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;QACjC,MAAM,CAAE,MAAM,CAAC,IAAkB,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnD,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,UAAU,EAAE,CAAC;QACzC,MAAM,CAAC,OAAO,MAAM,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6DAA6D,EAAE,KAAK,IAAI,EAAE;QAC3E,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC;QAE/D,8DAA8D;QAC9D,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACrD,qCAAqC;QACrC,MAAM,CAAC,MAAM,CAAC,YAAsB,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;IACtC,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE;YAC/C,QAAQ,EAAE,mBAAmB;YAC7B,cAAc,EAAE,YAAY;YAC5B,kBAAkB,EAAE,MAAM;YAC1B,WAAW,EAAE,CAAC,GAAG;SAClB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;QAC1E,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE;YAC/C,QAAQ,EAAE,4BAA4B;YACtC,cAAc,EAAE,YAAY;YAC5B,kBAAkB,EAAE,MAAM;YAC1B,WAAW,EAAE,GAAG;SACjB,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,kBAAkB,EAAE;YACjD,UAAU,EAAG,MAAkC,CAAC,UAAU;YAC1D,OAAO,EAAE,WAAW;YACpB,eAAe,EAAE,kCAAkC;SACpD,CAAC,CAAC;QAEH,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,QAAQ,EAAE,CAAC;QACtC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,QAAQ,EAAE,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6DAA6D,EAAE,KAAK,IAAI,EAAE;QAC3E,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE;YAClD,UAAU,EAAE,gBAAgB;SAC7B,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;QACtD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,sBAAsB,EAAE;YACpD,IAAI,EAAE,CAAC,kBAAkB,CAAC;SAC3B,CAAC,CAAC;QAEH,mEAAmE;QACnE,MAAM,SAAS,GAAG,MAAM,CAAC,SAA2C,CAAC;QACrE,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACxC,CAAC,CAAC,IAAiB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAClD,CAAC;QACF,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,9 +0,0 @@
1
- /**
2
- * Forecasting Scoring Engine — Unit Tests
3
- *
4
- * Tests for brierScore, logScore, calibrationBins, isotonicCalibrate,
5
- * averageBrier, averageLogScore, formatForecastDiff.
6
- *
7
- * All functions are pure — no Convex, no SQLite, no network.
8
- */
9
- export {};
@@ -1,202 +0,0 @@
1
- /**
2
- * Forecasting Scoring Engine — Unit Tests
3
- *
4
- * Tests for brierScore, logScore, calibrationBins, isotonicCalibrate,
5
- * averageBrier, averageLogScore, formatForecastDiff.
6
- *
7
- * All functions are pure — no Convex, no SQLite, no network.
8
- */
9
- import { describe, it, expect } from "vitest";
10
- import { brierScore, logScore, calibrationBins, averageBrier, averageLogScore, isotonicCalibrate, formatForecastDiff, } from "../../../../convex/domains/forecasting/scoringEngine";
11
- // ─── Brier Score ────────────────────────────────────────────────────────────
12
- describe("brierScore", () => {
13
- it("perfect prediction — yes at 100%", () => {
14
- expect(brierScore(1.0, "yes")).toBe(0);
15
- });
16
- it("perfect prediction — no at 0%", () => {
17
- expect(brierScore(0.0, "no")).toBe(0);
18
- });
19
- it("worst prediction — yes at 0%", () => {
20
- expect(brierScore(0.0, "yes")).toBe(1);
21
- });
22
- it("worst prediction — no at 100%", () => {
23
- expect(brierScore(1.0, "no")).toBe(1);
24
- });
25
- it("coin flip — 50% on yes", () => {
26
- expect(brierScore(0.5, "yes")).toBe(0.25);
27
- });
28
- it("coin flip — 50% on no", () => {
29
- expect(brierScore(0.5, "no")).toBe(0.25);
30
- });
31
- it("65% on yes outcome", () => {
32
- expect(brierScore(0.65, "yes")).toBeCloseTo(0.1225, 4);
33
- });
34
- it("80% on no outcome", () => {
35
- expect(brierScore(0.8, "no")).toBeCloseTo(0.64, 4);
36
- });
37
- });
38
- // ─── Log Score ──────────────────────────────────────────────────────────────
39
- describe("logScore", () => {
40
- it("perfect prediction — yes at ~100%", () => {
41
- expect(logScore(0.999, "yes")).toBeCloseTo(0.001, 2);
42
- });
43
- it("perfect prediction — no at ~0%", () => {
44
- expect(logScore(0.001, "no")).toBeCloseTo(0.001, 2);
45
- });
46
- it("worst prediction — yes at ~0%", () => {
47
- // -log(0.001) ≈ 6.9
48
- expect(logScore(0.001, "yes")).toBeCloseTo(6.908, 1);
49
- });
50
- it("coin flip — 50% on yes", () => {
51
- // -log(0.5) ≈ 0.693
52
- expect(logScore(0.5, "yes")).toBeCloseTo(0.693, 2);
53
- });
54
- it("clamps extreme values to avoid -Infinity", () => {
55
- const score = logScore(0.0, "yes");
56
- expect(Number.isFinite(score)).toBe(true);
57
- expect(score).toBeGreaterThan(0);
58
- });
59
- it("clamps 1.0 for no outcome", () => {
60
- const score = logScore(1.0, "no");
61
- expect(Number.isFinite(score)).toBe(true);
62
- expect(score).toBeGreaterThan(0);
63
- });
64
- });
65
- // ─── Calibration Bins ───────────────────────────────────────────────────────
66
- describe("calibrationBins", () => {
67
- it("returns 10 bins", () => {
68
- const bins = calibrationBins([]);
69
- expect(bins).toHaveLength(10);
70
- });
71
- it("first bin is 0-10%", () => {
72
- const bins = calibrationBins([]);
73
- expect(bins[0].binLabel).toBe("0-10%");
74
- expect(bins[0].predictedProb).toBe(0.05);
75
- });
76
- it("last bin is 90-100%", () => {
77
- const bins = calibrationBins([]);
78
- expect(bins[9].binLabel).toBe("90-100%");
79
- expect(bins[9].predictedProb).toBe(0.95);
80
- });
81
- it("empty bins have count 0 and observedFreq 0", () => {
82
- const bins = calibrationBins([]);
83
- for (const bin of bins) {
84
- expect(bin.count).toBe(0);
85
- expect(bin.observedFreq).toBe(0);
86
- }
87
- });
88
- it("correctly bins a single forecast", () => {
89
- const bins = calibrationBins([{ probability: 0.75, outcome: "yes" }]);
90
- const bin70 = bins[7]; // 70-80%
91
- expect(bin70.count).toBe(1);
92
- expect(bin70.observedFreq).toBe(1); // 1/1 = yes
93
- });
94
- it("correctly computes observed frequency", () => {
95
- const forecasts = [
96
- { probability: 0.55, outcome: "yes" },
97
- { probability: 0.52, outcome: "no" },
98
- { probability: 0.58, outcome: "yes" },
99
- { probability: 0.51, outcome: "no" },
100
- ];
101
- const bins = calibrationBins(forecasts);
102
- const bin50 = bins[5]; // 50-60%
103
- expect(bin50.count).toBe(4);
104
- expect(bin50.observedFreq).toBe(0.5); // 2/4
105
- });
106
- it("boundary value 1.0 goes in 90-100% bin", () => {
107
- const bins = calibrationBins([{ probability: 1.0, outcome: "yes" }]);
108
- expect(bins[9].count).toBe(1);
109
- });
110
- it("boundary value 0.0 goes in 0-10% bin", () => {
111
- const bins = calibrationBins([{ probability: 0.0, outcome: "no" }]);
112
- expect(bins[0].count).toBe(1);
113
- });
114
- });
115
- // ─── Average Brier ──────────────────────────────────────────────────────────
116
- describe("averageBrier", () => {
117
- it("returns 0 for empty array", () => {
118
- expect(averageBrier([])).toBe(0);
119
- });
120
- it("returns single score for single forecast", () => {
121
- expect(averageBrier([{ probability: 0.7, outcome: "yes" }])).toBeCloseTo(0.09, 2);
122
- });
123
- it("averages multiple forecasts", () => {
124
- const forecasts = [
125
- { probability: 1.0, outcome: "yes" }, // 0
126
- { probability: 0.0, outcome: "yes" }, // 1
127
- ];
128
- expect(averageBrier(forecasts)).toBe(0.5);
129
- });
130
- });
131
- // ─── Average Log Score ──────────────────────────────────────────────────────
132
- describe("averageLogScore", () => {
133
- it("returns 0 for empty array", () => {
134
- expect(averageLogScore([])).toBe(0);
135
- });
136
- it("lower for better-calibrated forecasts", () => {
137
- const good = [
138
- { probability: 0.9, outcome: "yes" },
139
- { probability: 0.1, outcome: "no" },
140
- ];
141
- const bad = [
142
- { probability: 0.1, outcome: "yes" },
143
- { probability: 0.9, outcome: "no" },
144
- ];
145
- expect(averageLogScore(good)).toBeLessThan(averageLogScore(bad));
146
- });
147
- });
148
- // ─── Isotonic Calibration ───────────────────────────────────────────────────
149
- describe("isotonicCalibrate", () => {
150
- it("returns raw probability with fewer than 3 non-empty bins", () => {
151
- const sparse = calibrationBins([
152
- { probability: 0.15, outcome: "yes" },
153
- { probability: 0.85, outcome: "no" },
154
- ]);
155
- expect(isotonicCalibrate(0.5, sparse)).toBe(0.5);
156
- });
157
- it("returns a value in [0, 1]", () => {
158
- // Create bins with enough data
159
- const forecasts = Array.from({ length: 50 }, (_, i) => ({
160
- probability: (i + 0.5) / 50,
161
- outcome: (Math.random() > 0.5 ? "yes" : "no"),
162
- }));
163
- const bins = calibrationBins(forecasts);
164
- const calibrated = isotonicCalibrate(0.7, bins);
165
- expect(calibrated).toBeGreaterThanOrEqual(0);
166
- expect(calibrated).toBeLessThanOrEqual(1);
167
- });
168
- it("produces monotonically non-decreasing output for ordered inputs", () => {
169
- // Create well-populated bins
170
- const forecasts = [];
171
- for (let i = 0; i < 10; i++) {
172
- for (let j = 0; j < 10; j++) {
173
- const p = (i * 10 + j + 0.5) / 100;
174
- // Roughly calibrated: higher p → more "yes"
175
- const outcome = Math.random() < p ? "yes" : "no";
176
- forecasts.push({ probability: p, outcome: outcome });
177
- }
178
- }
179
- const bins = calibrationBins(forecasts);
180
- const inputs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
181
- const outputs = inputs.map((p) => isotonicCalibrate(p, bins));
182
- for (let i = 0; i < outputs.length - 1; i++) {
183
- expect(outputs[i]).toBeLessThanOrEqual(outputs[i + 1] + 0.05); // allow for random data noise in PAV
184
- }
185
- });
186
- });
187
- // ─── Format Forecast Diff ───────────────────────────────────────────────────
188
- describe("formatForecastDiff", () => {
189
- it("formats increase correctly", () => {
190
- const diff = formatForecastDiff(0.35, 0.55, "New evidence from Fed minutes");
191
- expect(diff).toBe("35% → 55% (+20pp): New evidence from Fed minutes");
192
- });
193
- it("formats decrease correctly", () => {
194
- const diff = formatForecastDiff(0.8, 0.6, "CEO resignation");
195
- expect(diff).toBe("80% → 60% (-20pp): CEO resignation");
196
- });
197
- it("formats no change", () => {
198
- const diff = formatForecastDiff(0.5, 0.5, "No new evidence");
199
- expect(diff).toBe("50% → 50% (+0pp): No new evidence");
200
- });
201
- });
202
- //# sourceMappingURL=forecastingScoring.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"forecastingScoring.test.js","sourceRoot":"","sources":["../../src/__tests__/forecastingScoring.test.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,UAAU,EACV,QAAQ,EACR,eAAe,EACf,YAAY,EACZ,eAAe,EACf,iBAAiB,EACjB,kBAAkB,GACnB,MAAM,sDAAsD,CAAC;AAE9D,+EAA+E;AAE/E,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAChC,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;IACxB,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,oBAAoB;QACpB,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAChC,oBAAoB;QACpB,MAAM,CAAC,QAAQ,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,iBAAiB,EAAE,GAAG,EAAE;QACzB,MAAM,IAAI,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,IAAI,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,IAAI,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,IAAI,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,IAAI,GAAG,eAAe,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;QACtE,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS;QAChC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,SAAS,GAAG;YAChB,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAc,EAAE;YAC9C,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,IAAa,EAAE;YAC7C,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAc,EAAE;YAC9C,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,IAAa,EAAE;SAC9C,CAAC;QACF,MAAM,IAAI,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS;QAChC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,IAAI,GAAG,eAAe,CAAC,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;QACrE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,IAAI,GAAG,eAAe,CAAC,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACpE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,CAAC,YAAY,CAAC,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CACtE,IAAI,EACJ,CAAC,CACF,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,SAAS,GAAG;YAChB,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAc,EAAE,EAAE,IAAI;YACnD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAc,EAAE,EAAE,IAAI;SACpD,CAAC;QACF,MAAM,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,IAAI,GAAG;YACX,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAc,EAAE;YAC7C,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAa,EAAE;SAC7C,CAAC;QACF,MAAM,GAAG,GAAG;YACV,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAc,EAAE;YAC7C,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAa,EAAE;SAC7C,CAAC;QACF,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,YAAY,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,EAAE,CAAC,0DAA0D,EAAE,GAAG,EAAE;QAClE,MAAM,MAAM,GAAG,eAAe,CAAC;YAC7B,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;YACrC,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE;SACrC,CAAC,CAAC;QACH,MAAM,CAAC,iBAAiB,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,+BAA+B;QAC/B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACtD,WAAW,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,EAAE;YAC3B,OAAO,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAiB;SAC9D,CAAC,CAAC,CAAC;QACJ,MAAM,IAAI,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,iBAAiB,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,CAAC,UAAU,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iEAAiE,EAAE,GAAG,EAAE;QACzE,6BAA6B;QAC7B,MAAM,SAAS,GAA0D,EAAE,CAAC;QAC5E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;gBACnC,4CAA4C;gBAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;gBACjD,SAAS,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,OAAO,EAAE,OAAuB,EAAE,CAAC,CAAC;YACvE,CAAC;QACH,CAAC;QACD,MAAM,IAAI,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QAExC,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;QAE9D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,qCAAqC;QACtG,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,IAAI,GAAG,kBAAkB,CAAC,IAAI,EAAE,IAAI,EAAE,+BAA+B,CAAC,CAAC;QAC7E,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CACf,kDAAkD,CACnD,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,IAAI,GAAG,kBAAkB,CAAC,GAAG,EAAE,GAAG,EAAE,iBAAiB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,IAAI,GAAG,kBAAkB,CAAC,GAAG,EAAE,GAAG,EAAE,iBAAiB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,15 +0,0 @@
1
- /**
2
- * GAIA audio-backed capability/accuracy benchmark: LLM-only vs LLM+NodeBench MCP local audio tools.
3
- *
4
- * This lane targets GAIA tasks that include audio attachments (MP3/WAV/etc).
5
- * We provide deterministic local transcription via NodeBench MCP tools and score answers against
6
- * the ground-truth "Final answer" (stored locally under `.cache/gaia`, gitignored).
7
- *
8
- * Safety:
9
- * - GAIA is gated. Do not commit fixtures that contain prompts/answers.
10
- * - This test logs only task IDs and aggregate metrics (no prompt/answer text).
11
- *
12
- * Disabled by default (cost + rate limits). Run with:
13
- * NODEBENCH_RUN_GAIA_CAPABILITY=1 npm --prefix packages/mcp-local run test
14
- */
15
- export {};