nodebench-mcp 2.70.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. package/README.md +95 -41
  2. package/dist/agents/alertRouter.d.ts +38 -0
  3. package/dist/agents/alertRouter.js +151 -0
  4. package/dist/agents/alertRouter.js.map +1 -0
  5. package/dist/agents/entityMemory.d.ts +40 -0
  6. package/dist/agents/entityMemory.js +64 -0
  7. package/dist/agents/entityMemory.js.map +1 -0
  8. package/dist/agents/subAgents.d.ts +35 -0
  9. package/dist/agents/subAgents.js +62 -0
  10. package/dist/agents/subAgents.js.map +1 -0
  11. package/dist/benchmarks/benchmarkRunner.js +14 -0
  12. package/dist/benchmarks/benchmarkRunner.js.map +1 -1
  13. package/dist/benchmarks/chainEval.js +107 -0
  14. package/dist/benchmarks/chainEval.js.map +1 -1
  15. package/dist/benchmarks/llmJudgeEval.js +85 -0
  16. package/dist/benchmarks/llmJudgeEval.js.map +1 -1
  17. package/dist/benchmarks/searchQualityEval.js +118 -5
  18. package/dist/benchmarks/searchQualityEval.js.map +1 -1
  19. package/dist/cli/search.d.ts +13 -0
  20. package/dist/cli/search.js +130 -0
  21. package/dist/cli/search.js.map +1 -0
  22. package/dist/dashboard/operatingDashboardHtml.js +2 -1
  23. package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
  24. package/dist/dashboard/operatingServer.js +3 -2
  25. package/dist/dashboard/operatingServer.js.map +1 -1
  26. package/dist/db.d.ts +6 -2
  27. package/dist/db.js +521 -6
  28. package/dist/db.js.map +1 -1
  29. package/dist/index.js +349 -67
  30. package/dist/index.js.map +1 -1
  31. package/dist/packageInfo.d.ts +3 -0
  32. package/dist/packageInfo.js +32 -0
  33. package/dist/packageInfo.js.map +1 -0
  34. package/dist/profiler/behaviorStore.d.ts +97 -0
  35. package/dist/profiler/behaviorStore.js +276 -0
  36. package/dist/profiler/behaviorStore.js.map +1 -0
  37. package/dist/profiler/eventCollector.d.ts +119 -0
  38. package/dist/profiler/eventCollector.js +267 -0
  39. package/dist/profiler/eventCollector.js.map +1 -0
  40. package/dist/profiler/index.d.ts +15 -0
  41. package/dist/profiler/index.js +16 -0
  42. package/dist/profiler/index.js.map +1 -0
  43. package/dist/profiler/mcpProxy.d.ts +49 -0
  44. package/dist/profiler/mcpProxy.js +123 -0
  45. package/dist/profiler/mcpProxy.js.map +1 -0
  46. package/dist/profiler/modelRouter.d.ts +30 -0
  47. package/dist/profiler/modelRouter.js +99 -0
  48. package/dist/profiler/modelRouter.js.map +1 -0
  49. package/dist/profiler/otelReceiver.d.ts +17 -0
  50. package/dist/profiler/otelReceiver.js +62 -0
  51. package/dist/profiler/otelReceiver.js.map +1 -0
  52. package/dist/profiler/proofEngine.d.ts +41 -0
  53. package/dist/profiler/proofEngine.js +93 -0
  54. package/dist/profiler/proofEngine.js.map +1 -0
  55. package/dist/profiler/workflowTemplates.d.ts +41 -0
  56. package/dist/profiler/workflowTemplates.js +95 -0
  57. package/dist/profiler/workflowTemplates.js.map +1 -0
  58. package/dist/providers/localMemoryProvider.js +3 -2
  59. package/dist/providers/localMemoryProvider.js.map +1 -1
  60. package/dist/runtimeConfig.d.ts +11 -0
  61. package/dist/runtimeConfig.js +27 -0
  62. package/dist/runtimeConfig.js.map +1 -0
  63. package/dist/sandboxApi.js +2 -1
  64. package/dist/sandboxApi.js.map +1 -1
  65. package/dist/security/auditLog.js +8 -3
  66. package/dist/security/auditLog.js.map +1 -1
  67. package/dist/subconscious/blocks.d.ts +43 -0
  68. package/dist/subconscious/blocks.js +158 -0
  69. package/dist/subconscious/blocks.js.map +1 -0
  70. package/dist/subconscious/classifier.d.ts +22 -0
  71. package/dist/subconscious/classifier.js +118 -0
  72. package/dist/subconscious/classifier.js.map +1 -0
  73. package/dist/subconscious/graphEngine.d.ts +65 -0
  74. package/dist/subconscious/graphEngine.js +234 -0
  75. package/dist/subconscious/graphEngine.js.map +1 -0
  76. package/dist/subconscious/index.d.ts +19 -0
  77. package/dist/subconscious/index.js +20 -0
  78. package/dist/subconscious/index.js.map +1 -0
  79. package/dist/subconscious/tools.d.ts +5 -0
  80. package/dist/subconscious/tools.js +255 -0
  81. package/dist/subconscious/tools.js.map +1 -0
  82. package/dist/subconscious/whisperPolicy.d.ts +20 -0
  83. package/dist/subconscious/whisperPolicy.js +171 -0
  84. package/dist/subconscious/whisperPolicy.js.map +1 -0
  85. package/dist/sweep/engine.d.ts +27 -0
  86. package/dist/sweep/engine.js +244 -0
  87. package/dist/sweep/engine.js.map +1 -0
  88. package/dist/sweep/index.d.ts +9 -0
  89. package/dist/sweep/index.js +8 -0
  90. package/dist/sweep/index.js.map +1 -0
  91. package/dist/sweep/sources/github_trending.d.ts +6 -0
  92. package/dist/sweep/sources/github_trending.js +37 -0
  93. package/dist/sweep/sources/github_trending.js.map +1 -0
  94. package/dist/sweep/sources/hackernews.d.ts +7 -0
  95. package/dist/sweep/sources/hackernews.js +57 -0
  96. package/dist/sweep/sources/hackernews.js.map +1 -0
  97. package/dist/sweep/sources/openbb_finance.d.ts +9 -0
  98. package/dist/sweep/sources/openbb_finance.js +46 -0
  99. package/dist/sweep/sources/openbb_finance.js.map +1 -0
  100. package/dist/sweep/sources/producthunt.d.ts +6 -0
  101. package/dist/sweep/sources/producthunt.js +41 -0
  102. package/dist/sweep/sources/producthunt.js.map +1 -0
  103. package/dist/sweep/sources/web_signals.d.ts +7 -0
  104. package/dist/sweep/sources/web_signals.js +63 -0
  105. package/dist/sweep/sources/web_signals.js.map +1 -0
  106. package/dist/sweep/sources/yahoo_finance.d.ts +6 -0
  107. package/dist/sweep/sources/yahoo_finance.js +47 -0
  108. package/dist/sweep/sources/yahoo_finance.js.map +1 -0
  109. package/dist/sweep/types.d.ts +50 -0
  110. package/dist/sweep/types.js +9 -0
  111. package/dist/sweep/types.js.map +1 -0
  112. package/dist/sync/founderEpisodeStore.d.ts +98 -0
  113. package/dist/sync/founderEpisodeStore.js +230 -0
  114. package/dist/sync/founderEpisodeStore.js.map +1 -0
  115. package/dist/sync/hyperloopArchive.d.ts +51 -0
  116. package/dist/sync/hyperloopArchive.js +153 -0
  117. package/dist/sync/hyperloopArchive.js.map +1 -0
  118. package/dist/sync/hyperloopEval.d.ts +123 -0
  119. package/dist/sync/hyperloopEval.js +389 -0
  120. package/dist/sync/hyperloopEval.js.map +1 -0
  121. package/dist/sync/protocol.d.ts +172 -0
  122. package/dist/sync/protocol.js +9 -0
  123. package/dist/sync/protocol.js.map +1 -0
  124. package/dist/sync/sessionMemory.d.ts +47 -0
  125. package/dist/sync/sessionMemory.js +138 -0
  126. package/dist/sync/sessionMemory.js.map +1 -0
  127. package/dist/sync/store.d.ts +384 -0
  128. package/dist/sync/store.js +1435 -0
  129. package/dist/sync/store.js.map +1 -0
  130. package/dist/sync/syncBridgeClient.d.ts +30 -0
  131. package/dist/sync/syncBridgeClient.js +172 -0
  132. package/dist/sync/syncBridgeClient.js.map +1 -0
  133. package/dist/tools/autonomousDeliveryTools.d.ts +2 -0
  134. package/dist/tools/autonomousDeliveryTools.js +1104 -0
  135. package/dist/tools/autonomousDeliveryTools.js.map +1 -0
  136. package/dist/tools/boilerplateTools.js +10 -9
  137. package/dist/tools/boilerplateTools.js.map +1 -1
  138. package/dist/tools/claudeCodeIngestTools.d.ts +10 -0
  139. package/dist/tools/claudeCodeIngestTools.js +347 -0
  140. package/dist/tools/claudeCodeIngestTools.js.map +1 -0
  141. package/dist/tools/coreWorkflowTools.d.ts +2 -0
  142. package/dist/tools/coreWorkflowTools.js +488 -0
  143. package/dist/tools/coreWorkflowTools.js.map +1 -0
  144. package/dist/tools/deltaTools.d.ts +15 -0
  145. package/dist/tools/deltaTools.js +1522 -0
  146. package/dist/tools/deltaTools.js.map +1 -0
  147. package/dist/tools/documentationTools.js +2 -1
  148. package/dist/tools/documentationTools.js.map +1 -1
  149. package/dist/tools/entityLookupTools.d.ts +14 -0
  150. package/dist/tools/entityLookupTools.js +159 -0
  151. package/dist/tools/entityLookupTools.js.map +1 -0
  152. package/dist/tools/entityTemporalTools.d.ts +12 -0
  153. package/dist/tools/entityTemporalTools.js +330 -0
  154. package/dist/tools/entityTemporalTools.js.map +1 -0
  155. package/dist/tools/founderLocalPipeline.d.ts +215 -0
  156. package/dist/tools/founderLocalPipeline.js +1516 -2
  157. package/dist/tools/founderLocalPipeline.js.map +1 -1
  158. package/dist/tools/founderOperatingModel.d.ts +120 -0
  159. package/dist/tools/founderOperatingModel.js +469 -0
  160. package/dist/tools/founderOperatingModel.js.map +1 -0
  161. package/dist/tools/founderOperatingModelTools.d.ts +2 -0
  162. package/dist/tools/founderOperatingModelTools.js +169 -0
  163. package/dist/tools/founderOperatingModelTools.js.map +1 -0
  164. package/dist/tools/founderStrategicOpsTools.d.ts +2 -0
  165. package/dist/tools/founderStrategicOpsTools.js +1310 -0
  166. package/dist/tools/founderStrategicOpsTools.js.map +1 -0
  167. package/dist/tools/graphifyTools.d.ts +19 -0
  168. package/dist/tools/graphifyTools.js +375 -0
  169. package/dist/tools/graphifyTools.js.map +1 -0
  170. package/dist/tools/index.d.ts +3 -0
  171. package/dist/tools/index.js +4 -0
  172. package/dist/tools/index.js.map +1 -1
  173. package/dist/tools/monteCarloTools.d.ts +16 -0
  174. package/dist/tools/monteCarloTools.js +225 -0
  175. package/dist/tools/monteCarloTools.js.map +1 -0
  176. package/dist/tools/packetCompilerTools.d.ts +12 -0
  177. package/dist/tools/packetCompilerTools.js +322 -0
  178. package/dist/tools/packetCompilerTools.js.map +1 -0
  179. package/dist/tools/planSynthesisTools.d.ts +15 -0
  180. package/dist/tools/planSynthesisTools.js +455 -0
  181. package/dist/tools/planSynthesisTools.js.map +1 -0
  182. package/dist/tools/profilerTools.d.ts +20 -0
  183. package/dist/tools/profilerTools.js +364 -0
  184. package/dist/tools/profilerTools.js.map +1 -0
  185. package/dist/tools/progressiveDiscoveryTools.js +2 -1
  186. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  187. package/dist/tools/savingsTools.d.ts +11 -0
  188. package/dist/tools/savingsTools.js +155 -0
  189. package/dist/tools/savingsTools.js.map +1 -0
  190. package/dist/tools/scenarioCompilerTools.d.ts +14 -0
  191. package/dist/tools/scenarioCompilerTools.js +290 -0
  192. package/dist/tools/scenarioCompilerTools.js.map +1 -0
  193. package/dist/tools/sharedContextTools.d.ts +2 -0
  194. package/dist/tools/sharedContextTools.js +423 -0
  195. package/dist/tools/sharedContextTools.js.map +1 -0
  196. package/dist/tools/sitemapTools.d.ts +15 -0
  197. package/dist/tools/sitemapTools.js +560 -0
  198. package/dist/tools/sitemapTools.js.map +1 -0
  199. package/dist/tools/sweepTools.d.ts +9 -0
  200. package/dist/tools/sweepTools.js +112 -0
  201. package/dist/tools/sweepTools.js.map +1 -0
  202. package/dist/tools/syncBridgeTools.d.ts +2 -0
  203. package/dist/tools/syncBridgeTools.js +258 -0
  204. package/dist/tools/syncBridgeTools.js.map +1 -0
  205. package/dist/tools/toolRegistry.js +1223 -45
  206. package/dist/tools/toolRegistry.js.map +1 -1
  207. package/dist/tools/workspaceTools.d.ts +19 -0
  208. package/dist/tools/workspaceTools.js +762 -0
  209. package/dist/tools/workspaceTools.js.map +1 -0
  210. package/dist/toolsetRegistry.js +162 -3
  211. package/dist/toolsetRegistry.js.map +1 -1
  212. package/package.json +39 -38
  213. package/rules/nodebench-agentic-reliability.md +32 -0
  214. package/rules/nodebench-analyst-diagnostic.md +25 -0
  215. package/rules/nodebench-auto-qa.md +31 -0
  216. package/rules/nodebench-completion-traceability.md +22 -0
  217. package/rules/nodebench-flywheel-continuous.md +25 -0
  218. package/rules/nodebench-pre-release-review.md +24 -0
  219. package/rules/nodebench-qa-dogfood.md +26 -0
  220. package/rules/nodebench-scenario-testing.md +30 -0
  221. package/rules/nodebench-self-direction.md +23 -0
  222. package/rules/nodebench-self-judge-loop.md +24 -0
  223. package/scripts/install.sh +215 -0
  224. package/dist/__tests__/analytics.test.d.ts +0 -11
  225. package/dist/__tests__/analytics.test.js +0 -546
  226. package/dist/__tests__/analytics.test.js.map +0 -1
  227. package/dist/__tests__/architectComplex.test.d.ts +0 -1
  228. package/dist/__tests__/architectComplex.test.js +0 -373
  229. package/dist/__tests__/architectComplex.test.js.map +0 -1
  230. package/dist/__tests__/architectSmoke.test.d.ts +0 -1
  231. package/dist/__tests__/architectSmoke.test.js +0 -92
  232. package/dist/__tests__/architectSmoke.test.js.map +0 -1
  233. package/dist/__tests__/audit-registry.d.ts +0 -1
  234. package/dist/__tests__/audit-registry.js +0 -60
  235. package/dist/__tests__/audit-registry.js.map +0 -1
  236. package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
  237. package/dist/__tests__/batchAutopilot.test.js +0 -218
  238. package/dist/__tests__/batchAutopilot.test.js.map +0 -1
  239. package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
  240. package/dist/__tests__/cliSubcommands.test.js +0 -138
  241. package/dist/__tests__/cliSubcommands.test.js.map +0 -1
  242. package/dist/__tests__/comparativeBench.test.d.ts +0 -1
  243. package/dist/__tests__/comparativeBench.test.js +0 -722
  244. package/dist/__tests__/comparativeBench.test.js.map +0 -1
  245. package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
  246. package/dist/__tests__/critterCalibrationEval.js +0 -370
  247. package/dist/__tests__/critterCalibrationEval.js.map +0 -1
  248. package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
  249. package/dist/__tests__/dynamicLoading.test.js +0 -280
  250. package/dist/__tests__/dynamicLoading.test.js.map +0 -1
  251. package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
  252. package/dist/__tests__/embeddingProvider.test.js +0 -86
  253. package/dist/__tests__/embeddingProvider.test.js.map +0 -1
  254. package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
  255. package/dist/__tests__/evalDatasetBench.test.js +0 -738
  256. package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
  257. package/dist/__tests__/evalHarness.test.d.ts +0 -1
  258. package/dist/__tests__/evalHarness.test.js +0 -1107
  259. package/dist/__tests__/evalHarness.test.js.map +0 -1
  260. package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
  261. package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
  262. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
  263. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
  264. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
  265. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
  266. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
  267. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
  268. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
  269. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
  270. package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
  271. package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
  272. package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
  273. package/dist/__tests__/forecastingDogfood.test.js +0 -284
  274. package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
  275. package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
  276. package/dist/__tests__/forecastingScoring.test.js +0 -202
  277. package/dist/__tests__/forecastingScoring.test.js.map +0 -1
  278. package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
  279. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
  280. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
  281. package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
  282. package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
  283. package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
  284. package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
  285. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
  286. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
  287. package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
  288. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
  289. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
  290. package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
  291. package/dist/__tests__/helpers/answerMatch.js +0 -267
  292. package/dist/__tests__/helpers/answerMatch.js.map +0 -1
  293. package/dist/__tests__/helpers/textLlm.d.ts +0 -25
  294. package/dist/__tests__/helpers/textLlm.js +0 -214
  295. package/dist/__tests__/helpers/textLlm.js.map +0 -1
  296. package/dist/__tests__/localDashboard.test.d.ts +0 -1
  297. package/dist/__tests__/localDashboard.test.js +0 -226
  298. package/dist/__tests__/localDashboard.test.js.map +0 -1
  299. package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
  300. package/dist/__tests__/multiHopDogfood.test.js +0 -303
  301. package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
  302. package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
  303. package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
  304. package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
  305. package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
  306. package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
  307. package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
  308. package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
  309. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
  310. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
  311. package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
  312. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
  313. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
  314. package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
  315. package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
  316. package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
  317. package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
  318. package/dist/__tests__/openclawDogfood.test.js +0 -535
  319. package/dist/__tests__/openclawDogfood.test.js.map +0 -1
  320. package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
  321. package/dist/__tests__/openclawMessaging.test.js +0 -232
  322. package/dist/__tests__/openclawMessaging.test.js.map +0 -1
  323. package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
  324. package/dist/__tests__/presetRealWorldBench.test.js +0 -859
  325. package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
  326. package/dist/__tests__/tools.test.d.ts +0 -1
  327. package/dist/__tests__/tools.test.js +0 -3201
  328. package/dist/__tests__/tools.test.js.map +0 -1
  329. package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
  330. package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
  331. package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
  332. package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
  333. package/dist/__tests__/traceabilityDogfood.test.js +0 -241
  334. package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
  335. package/dist/__tests__/webmcpTools.test.d.ts +0 -7
  336. package/dist/__tests__/webmcpTools.test.js +0 -195
  337. package/dist/__tests__/webmcpTools.test.js.map +0 -1
  338. package/dist/benchmarks/testProviderBus.d.ts +0 -7
  339. package/dist/benchmarks/testProviderBus.js +0 -272
  340. package/dist/benchmarks/testProviderBus.js.map +0 -1
  341. package/dist/hooks/postCompaction.d.ts +0 -14
  342. package/dist/hooks/postCompaction.js +0 -51
  343. package/dist/hooks/postCompaction.js.map +0 -1
  344. package/dist/security/__tests__/security.test.d.ts +0 -8
  345. package/dist/security/__tests__/security.test.js +0 -295
  346. package/dist/security/__tests__/security.test.js.map +0 -1
  347. package/dist/tools/documentTools.d.ts +0 -5
  348. package/dist/tools/documentTools.js +0 -524
  349. package/dist/tools/documentTools.js.map +0 -1
  350. package/dist/tools/financialTools.d.ts +0 -10
  351. package/dist/tools/financialTools.js +0 -403
  352. package/dist/tools/financialTools.js.map +0 -1
  353. package/dist/tools/memoryTools.d.ts +0 -5
  354. package/dist/tools/memoryTools.js +0 -137
  355. package/dist/tools/memoryTools.js.map +0 -1
  356. package/dist/tools/planningTools.d.ts +0 -5
  357. package/dist/tools/planningTools.js +0 -147
  358. package/dist/tools/planningTools.js.map +0 -1
  359. package/dist/tools/searchTools.d.ts +0 -5
  360. package/dist/tools/searchTools.js +0 -145
  361. package/dist/tools/searchTools.js.map +0 -1
@@ -1,284 +0,0 @@
1
- /**
2
- * Forecasting OS — Integration Tests (Dogfood)
3
- *
4
- * Full lifecycle tests: create → evidence → update → resolve → track record → calibration.
5
- * Uses MCP tools directly (SQLite-backed, no Convex dependency).
6
- *
7
- * Run: npx vitest run src/__tests__/forecastingDogfood.test.ts
8
- */
9
- import { describe, it, expect, beforeAll } from "vitest";
10
- import { forecastingTools } from "../tools/forecastingTools.js";
11
- // ─── Helpers ────────────────────────────────────────────────────────────────
12
- const toolMap = new Map(forecastingTools.map((t) => [t.name, t]));
13
- async function callTool(name, args) {
14
- const tool = toolMap.get(name);
15
- if (!tool)
16
- throw new Error(`Tool ${name} not found`);
17
- const result = await tool.handler(args);
18
- const text = result.find((r) => r.type === "text")?.text;
19
- if (!text)
20
- throw new Error(`Tool ${name} returned no text`);
21
- return JSON.parse(text);
22
- }
23
- // ─── Tool Structure ─────────────────────────────────────────────────────────
24
- describe("Forecasting tools: structure", () => {
25
- it("should have 9 tools", () => {
26
- expect(forecastingTools.length).toBe(9);
27
- });
28
- it("every tool has name, description, inputSchema, handler", () => {
29
- for (const tool of forecastingTools) {
30
- expect(tool.name).toBeTruthy();
31
- expect(tool.description).toBeTruthy();
32
- expect(tool.inputSchema).toBeDefined();
33
- expect(typeof tool.handler).toBe("function");
34
- }
35
- });
36
- it("tool names match expected list", () => {
37
- const names = forecastingTools.map((t) => t.name).sort();
38
- expect(names).toEqual([
39
- "add_forecast_evidence",
40
- "compute_calibration",
41
- "create_forecast",
42
- "get_active_forecasts",
43
- "get_forecast_chain",
44
- "get_forecast_evidence",
45
- "get_forecast_track_record",
46
- "resolve_forecast",
47
- "update_forecast_probability",
48
- ]);
49
- });
50
- });
51
- // ─── Full Lifecycle ─────────────────────────────────────────────────────────
52
- describe("Forecasting lifecycle", () => {
53
- let forecastId;
54
- it("create_forecast — creates a binary forecast", async () => {
55
- const result = await callTool("create_forecast", {
56
- question: "Will GPT-5 be released by 2026-12-31?",
57
- forecastType: "binary",
58
- resolutionDate: "2026-12-31",
59
- resolutionCriteria: "OpenAI announces GPT-5 on official blog or press release",
60
- probability: 0.5,
61
- baseRate: 0.6,
62
- refreshFrequency: "weekly",
63
- topDrivers: ["Historical 18-month release cadence", "Sam Altman interview hints"],
64
- topCounterarguments: ["No official roadmap published"],
65
- tags: ["ai_tech"],
66
- });
67
- expect(result.forecastId).toBeTruthy();
68
- expect(result.status).toBe("active");
69
- expect(result.probability).toBe(0.5);
70
- forecastId = result.forecastId;
71
- });
72
- it("add_forecast_evidence — adds supporting evidence", async () => {
73
- const result = await callTool("add_forecast_evidence", {
74
- forecastId,
75
- sourceUrl: "https://example.com/sam-altman-interview",
76
- sourceTitle: "Sam Altman Interview on AI Progress",
77
- sourceType: "news",
78
- excerpt: "Altman hints at a major model release in the second half of 2026",
79
- signal: "supporting",
80
- impactOnProbability: 0.1,
81
- });
82
- expect(result.evidenceId).toBeTruthy();
83
- expect(result.signal).toBe("supporting");
84
- });
85
- it("add_forecast_evidence — adds disconfirming evidence", async () => {
86
- const result = await callTool("add_forecast_evidence", {
87
- forecastId,
88
- sourceUrl: "https://example.com/compute-shortage",
89
- sourceTitle: "GPU Shortage Report Q1 2026",
90
- sourceType: "filing",
91
- excerpt: "Major cloud providers report 40% compute capacity shortfall for large model training",
92
- signal: "disconfirming",
93
- });
94
- expect(result.evidenceId).toBeTruthy();
95
- expect(result.signal).toBe("disconfirming");
96
- });
97
- it("add_forecast_evidence — deduplicates by URL", async () => {
98
- const result = await callTool("add_forecast_evidence", {
99
- forecastId,
100
- sourceUrl: "https://example.com/sam-altman-interview",
101
- sourceTitle: "Duplicate",
102
- sourceType: "news",
103
- excerpt: "Duplicate entry",
104
- signal: "supporting",
105
- });
106
- expect(result.error).toContain("already exists");
107
- });
108
- it("get_forecast_evidence — returns evidence", async () => {
109
- const result = await callTool("get_forecast_evidence", {
110
- forecastId,
111
- });
112
- expect(result.count).toBe(2);
113
- expect(result.evidence.length).toBe(2);
114
- });
115
- it("get_forecast_evidence — filters by signal", async () => {
116
- const result = await callTool("get_forecast_evidence", {
117
- forecastId,
118
- signal: "supporting",
119
- });
120
- expect(result.count).toBe(1);
121
- });
122
- it("update_forecast_probability — updates with reasoning", async () => {
123
- const result = await callTool("update_forecast_probability", {
124
- forecastId,
125
- probability: 0.65,
126
- topDrivers: [
127
- "Sam Altman interview hint",
128
- "Historical 18-month cadence",
129
- "Competitor pressure from Google Gemini",
130
- ],
131
- topCounterarguments: [
132
- "GPU shortage may delay training",
133
- "No official roadmap",
134
- ],
135
- reasoning: "Soft leadership signal + historical pattern outweigh compute concerns",
136
- });
137
- expect(result.previousProbability).toBe(0.5);
138
- expect(result.newProbability).toBe(0.65);
139
- expect(result.diff).toBe("50% → 65% (+15pp)");
140
- });
141
- it("update_forecast_probability — validates range", async () => {
142
- const result = await callTool("update_forecast_probability", {
143
- forecastId,
144
- probability: 1.5,
145
- reasoning: "Invalid",
146
- });
147
- expect(result.error).toContain("between 0 and 1");
148
- });
149
- it("get_forecast_chain — returns full audit trail", async () => {
150
- const result = await callTool("get_forecast_chain", {
151
- forecastId,
152
- });
153
- expect(result.forecast).toBeTruthy();
154
- expect(result.evidence.length).toBe(2);
155
- expect(result.updateHistory.length).toBe(1);
156
- expect(result.resolution).toBeNull();
157
- expect(result.summary.evidenceCount).toBe(2);
158
- expect(result.summary.updateCount).toBe(1);
159
- expect(result.summary.isResolved).toBe(false);
160
- });
161
- it("get_active_forecasts — lists active forecasts", async () => {
162
- const result = await callTool("get_active_forecasts", {});
163
- expect(result.count).toBeGreaterThanOrEqual(1);
164
- const forecasts = result.forecasts;
165
- const found = forecasts.find((f) => f.id === forecastId);
166
- expect(found).toBeTruthy();
167
- expect(found?.status).toBe("active");
168
- });
169
- it("resolve_forecast — resolves with Brier score", async () => {
170
- const result = await callTool("resolve_forecast", {
171
- forecastId,
172
- outcome: "yes",
173
- resolutionNotes: "GPT-5 announced on 2026-11-15 via OpenAI blog post",
174
- resolutionSourceUrl: "https://openai.com/gpt-5-announcement",
175
- });
176
- expect(result.status).toBe("resolved");
177
- expect(result.outcome).toBe("yes");
178
- // Brier: (0.65 - 1)^2 = 0.1225
179
- expect(result.brierScore).toBeCloseTo(0.1225, 3);
180
- // Log: -log(0.65) ≈ 0.431
181
- expect(result.logScore).toBeCloseTo(0.431, 2);
182
- });
183
- it("resolve_forecast — cannot resolve twice", async () => {
184
- const result = await callTool("resolve_forecast", {
185
- forecastId,
186
- outcome: "no",
187
- resolutionNotes: "Already resolved",
188
- });
189
- expect(result.error).toContain("already resolved");
190
- });
191
- it("get_forecast_track_record — shows Brier aggregate", async () => {
192
- const result = await callTool("get_forecast_track_record", {});
193
- expect(result.scoredCount).toBeGreaterThanOrEqual(1);
194
- // SQLite persists between test runs, so overallBrier is average across ALL
195
- // resolved forecasts (not just this run). Use a reasonable bound instead.
196
- expect(result.overallBrier).toBeGreaterThan(0);
197
- expect(result.overallBrier).toBeLessThan(0.5);
198
- });
199
- });
200
- // ─── Multi-Forecast Calibration ─────────────────────────────────────────────
201
- describe("Forecasting calibration", () => {
202
- beforeAll(async () => {
203
- // Create and resolve 5 forecasts with known outcomes for calibration
204
- const scenarios = [
205
- { probability: 0.9, outcome: "yes" }, // Brier: 0.01
206
- { probability: 0.8, outcome: "yes" }, // Brier: 0.04
207
- { probability: 0.3, outcome: "no" }, // Brier: 0.09
208
- { probability: 0.1, outcome: "no" }, // Brier: 0.01
209
- { probability: 0.6, outcome: "yes" }, // Brier: 0.16
210
- ];
211
- for (let i = 0; i < scenarios.length; i++) {
212
- const create = await callTool("create_forecast", {
213
- question: `Calibration test forecast ${i + 1}?`,
214
- resolutionDate: "2026-01-01",
215
- resolutionCriteria: `Test criteria ${i + 1}`,
216
- probability: scenarios[i].probability,
217
- tags: ["test_calibration"],
218
- });
219
- await callTool("resolve_forecast", {
220
- forecastId: create.forecastId,
221
- outcome: scenarios[i].outcome,
222
- resolutionNotes: `Test resolution ${i + 1}`,
223
- });
224
- }
225
- });
226
- it("compute_calibration — returns 10 bins", async () => {
227
- const result = await callTool("compute_calibration", {});
228
- expect(result.bins).toBeTruthy();
229
- expect(result.bins.length).toBe(10);
230
- expect(result.overallBrier).toBeTruthy();
231
- expect(typeof result.overallBrier).toBe("number");
232
- expect(result.forecastCount).toBeGreaterThanOrEqual(5);
233
- });
234
- it("get_forecast_track_record — aggregate includes all resolved", async () => {
235
- const result = await callTool("get_forecast_track_record", {});
236
- // At least 6 resolved (1 from lifecycle + 5 from calibration)
237
- expect(result.scoredCount).toBeGreaterThanOrEqual(6);
238
- // Average Brier should be reasonable
239
- expect(result.overallBrier).toBeLessThan(0.25);
240
- });
241
- });
242
- // ─── Edge Cases ─────────────────────────────────────────────────────────────
243
- describe("Forecasting edge cases", () => {
244
- it("create_forecast — rejects invalid probability", async () => {
245
- const result = await callTool("create_forecast", {
246
- question: "Invalid prob test",
247
- resolutionDate: "2026-12-31",
248
- resolutionCriteria: "Test",
249
- probability: -0.1,
250
- });
251
- expect(result.error).toContain("between 0 and 1");
252
- });
253
- it("resolve_forecast — ambiguous outcome excluded from scoring", async () => {
254
- const create = await callTool("create_forecast", {
255
- question: "Ambiguous resolution test?",
256
- resolutionDate: "2026-12-31",
257
- resolutionCriteria: "Test",
258
- probability: 0.7,
259
- });
260
- const resolve = await callTool("resolve_forecast", {
261
- forecastId: create.forecastId,
262
- outcome: "ambiguous",
263
- resolutionNotes: "Resolution criteria were unclear",
264
- });
265
- expect(resolve.brierScore).toBeNull();
266
- expect(resolve.logScore).toBeNull();
267
- });
268
- it("get_forecast_chain — returns error for nonexistent forecast", async () => {
269
- const result = await callTool("get_forecast_chain", {
270
- forecastId: "nonexistent_id",
271
- });
272
- expect(result.error).toContain("not found");
273
- });
274
- it("get_active_forecasts — filters by tags", async () => {
275
- const result = await callTool("get_active_forecasts", {
276
- tags: ["test_calibration"],
277
- });
278
- // All calibration forecasts are resolved, so none should be active
279
- const forecasts = result.forecasts;
280
- const calibration = forecasts.filter((f) => f.tags.includes("test_calibration"));
281
- expect(calibration.length).toBe(0);
282
- });
283
- });
284
- //# sourceMappingURL=forecastingDogfood.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"forecastingDogfood.test.js","sourceRoot":"","sources":["../../src/__tests__/forecastingDogfood.test.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAGhE,+EAA+E;AAE/E,MAAM,OAAO,GAAG,IAAI,GAAG,CACrB,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CACzC,CAAC;AAEF,KAAK,UAAU,QAAQ,CACrB,IAAY,EACZ,IAA6B;IAE7B,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,QAAQ,IAAI,YAAY,CAAC,CAAC;IACrD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,IAAI,CAAC;IACzD,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,QAAQ,IAAI,mBAAmB,CAAC,CAAC;IAC5D,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,+EAA+E;AAE/E,QAAQ,CAAC,8BAA8B,EAAE,GAAG,EAAE;IAC5C,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,GAAG,EAAE;QAChE,KAAK,MAAM,IAAI,IAAI,gBAAgB,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;YAC/B,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,UAAU,EAAE,CAAC;YACtC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC;YACvC,MAAM,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,KAAK,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,MAAM,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC;YACpB,uBAAuB;YACvB,qBAAqB;YACrB,iBAAiB;YACjB,sBAAsB;YACtB,oBAAoB;YACpB,uBAAuB;YACvB,2BAA2B;YAC3B,kBAAkB;YAClB,6BAA6B;SAC9B,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,UAAkB,CAAC;IAEvB,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC3D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE;YAC/C,QAAQ,EAAE,uCAAuC;YACjD,YAAY,EAAE,QAAQ;YACtB,cAAc,EAAE,YAAY;YAC5B,kBAAkB,EAAE,0DAA0D;YAC9E,WAAW,EAAE,GAAG;YAChB,QAAQ,EAAE,GAAG;YACb,gBAAgB,EAAE,QAAQ;YAC1B,UAAU,EAAE,CAAC,qCAAqC,EAAE,4BAA4B,CAAC;YACjF,mBAAmB,EAAE,CAAC,+BAA+B,CAAC;YACtD,IAAI,EAAE,CAAC,SAAS,CAAC;SAClB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,UAAU,EAAE,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrC,UAAU,GAAG,MAAM,CAAC,UAAoB,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;YACV,SAAS,EAAE,0CAA0C;YACrD,WAAW,EAAE,qCAAqC;YAClD,UAAU,EAAE,MAAM;YAClB,OAAO,EAAE,kEAAkE;YAC3E,MAAM,EAAE,YAAY;YACpB,mBAAmB,EAAE,GAAG;SACzB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,UAAU,EAAE,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;YACV,SAAS,EAAE,sCAAsC;YACjD,WAAW,EAAE,6BAA6B;YAC1C,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,sFAAsF;YAC/F,MAAM,EAAE,eAAe;SACxB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,UAAU,EAAE,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC3D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;YACV,SAAS,EAAE,0CAA0C;YACrD,WAAW,EAAE,WAAW;YACxB,UAAU,EAAE,MAAM;YAClB,OAAO,EAAE,iBAAiB;YAC1B,MAAM,EAAE,YAAY;SACrB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;SACX,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAE,MAAM,CAAC,QAAsB,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACzD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,uBAAuB,EAAE;YACrD,UAAU;YACV,MAAM,EAAE,YAAY;SACrB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,KAAK,IAAI,EAAE;QACpE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,6BAA6B,EAAE;YAC3D,UAAU;YACV,WAAW,EAAE,IAAI;YACjB,UAAU,EAAE;gBACV,2BAA2B;gBAC3B,6BAA6B;gBAC7B,wCAAwC;aACzC;YACD,mBAAmB,EAAE;gBACnB,iCAAiC;gBACjC,qBAAqB;aACtB;YACD,SAAS,EAAE,uEAAuE;SACnF,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,6BAA6B,EAAE;YAC3D,UAAU;YACV,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,SAAS;SACrB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE;YAClD,UAAU;SACX,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,UAAU,EAAE,CAAC;QACrC,MAAM,CAAE,MAAM,CAAC,QAAsB,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACtD,MAAM,CAAE,MAAM,CAAC,aAA2B,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,QAAQ,EAAE,CAAC;QACrC,MAAM,CAAE,MAAM,CAAC,OAAmC,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1E,MAAM,CAAE,MAAM,CAAC,OAAmC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAE,MAAM,CAAC,OAAmC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC7E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC;QAE1D,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,MAAM,CAAC,SAA2C,CAAC;QACrE,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,UAAU,CAAC,CAAC;QACzD,MAAM,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;QAC3B,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,kBAAkB,EAAE;YAChD,UAAU;YACV,OAAO,EAAE,KAAK;YACd,eAAe,EAAE,oDAAoD;YACrE,mBAAmB,EAAE,uCAAuC;SAC7D,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnC,+BAA+B;QAC/B,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACjD,0BAA0B;QAC1B,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,kBAAkB,EAAE;YAChD,UAAU;YACV,OAAO,EAAE,IAAI;YACb,eAAe,EAAE,kBAAkB;SACpC,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC;QAE/D,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACrD,2EAA2E;QAC3E,0EAA0E;QAC1E,MAAM,CAAC,MAAM,CAAC,YAAsB,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,MAAM,CAAC,YAAsB,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,qEAAqE;QACrE,MAAM,SAAS,GAAG;YAChB,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,EAAI,cAAc;YACtD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,EAAI,cAAc;YACtD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,EAAK,cAAc;YACtD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,EAAK,cAAc;YACtD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,EAAI,cAAc;SACvD,CAAC;QAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE;gBAC/C,QAAQ,EAAE,6BAA6B,CAAC,GAAG,CAAC,GAAG;gBAC/C,cAAc,EAAE,YAAY;gBAC5B,kBAAkB,EAAE,iBAAiB,CAAC,GAAG,CAAC,EAAE;gBAC5C,WAAW,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,WAAW;gBACrC,IAAI,EAAE,CAAC,kBAAkB,CAAC;aAC3B,CAAC,CAAC;YAEH,MAAM,QAAQ,CAAC,kBAAkB,EAAE;gBACjC,UAAU,EAAG,MAAkC,CAAC,UAAU;gBAC1D,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO;gBAC7B,eAAe,EAAE,mBAAmB,CAAC,GAAG,CAAC,EAAE;aAC5C,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC;QAEzD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;QACjC,MAAM,CAAE,MAAM,CAAC,IAAkB,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnD,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,UAAU,EAAE,CAAC;QACzC,MAAM,CAAC,OAAO,MAAM,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6DAA6D,EAAE,KAAK,IAAI,EAAE;QAC3E,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,2BAA2B,EAAE,EAAE,CAAC,CAAC;QAE/D,8DAA8D;QAC9D,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACrD,qCAAqC;QACrC,MAAM,CAAC,MAAM,CAAC,YAAsB,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;IACtC,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE;YAC/C,QAAQ,EAAE,mBAAmB;YAC7B,cAAc,EAAE,YAAY;YAC5B,kBAAkB,EAAE,MAAM;YAC1B,WAAW,EAAE,CAAC,GAAG;SAClB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;QAC1E,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE;YAC/C,QAAQ,EAAE,4BAA4B;YACtC,cAAc,EAAE,YAAY;YAC5B,kBAAkB,EAAE,MAAM;YAC1B,WAAW,EAAE,GAAG;SACjB,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,kBAAkB,EAAE;YACjD,UAAU,EAAG,MAAkC,CAAC,UAAU;YAC1D,OAAO,EAAE,WAAW;YACpB,eAAe,EAAE,kCAAkC;SACpD,CAAC,CAAC;QAEH,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,QAAQ,EAAE,CAAC;QACtC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,QAAQ,EAAE,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6DAA6D,EAAE,KAAK,IAAI,EAAE;QAC3E,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE;YAClD,UAAU,EAAE,gBAAgB;SAC7B,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;QACtD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,sBAAsB,EAAE;YACpD,IAAI,EAAE,CAAC,kBAAkB,CAAC;SAC3B,CAAC,CAAC;QAEH,mEAAmE;QACnE,MAAM,SAAS,GAAG,MAAM,CAAC,SAA2C,CAAC;QACrE,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACxC,CAAC,CAAC,IAAiB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAClD,CAAC;QACF,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,9 +0,0 @@
1
- /**
2
- * Forecasting Scoring Engine — Unit Tests
3
- *
4
- * Tests for brierScore, logScore, calibrationBins, isotonicCalibrate,
5
- * averageBrier, averageLogScore, formatForecastDiff.
6
- *
7
- * All functions are pure — no Convex, no SQLite, no network.
8
- */
9
- export {};
@@ -1,202 +0,0 @@
1
- /**
2
- * Forecasting Scoring Engine — Unit Tests
3
- *
4
- * Tests for brierScore, logScore, calibrationBins, isotonicCalibrate,
5
- * averageBrier, averageLogScore, formatForecastDiff.
6
- *
7
- * All functions are pure — no Convex, no SQLite, no network.
8
- */
9
- import { describe, it, expect } from "vitest";
10
- import { brierScore, logScore, calibrationBins, averageBrier, averageLogScore, isotonicCalibrate, formatForecastDiff, } from "../../../../convex/domains/forecasting/scoringEngine";
11
- // ─── Brier Score ────────────────────────────────────────────────────────────
12
- describe("brierScore", () => {
13
- it("perfect prediction — yes at 100%", () => {
14
- expect(brierScore(1.0, "yes")).toBe(0);
15
- });
16
- it("perfect prediction — no at 0%", () => {
17
- expect(brierScore(0.0, "no")).toBe(0);
18
- });
19
- it("worst prediction — yes at 0%", () => {
20
- expect(brierScore(0.0, "yes")).toBe(1);
21
- });
22
- it("worst prediction — no at 100%", () => {
23
- expect(brierScore(1.0, "no")).toBe(1);
24
- });
25
- it("coin flip — 50% on yes", () => {
26
- expect(brierScore(0.5, "yes")).toBe(0.25);
27
- });
28
- it("coin flip — 50% on no", () => {
29
- expect(brierScore(0.5, "no")).toBe(0.25);
30
- });
31
- it("65% on yes outcome", () => {
32
- expect(brierScore(0.65, "yes")).toBeCloseTo(0.1225, 4);
33
- });
34
- it("80% on no outcome", () => {
35
- expect(brierScore(0.8, "no")).toBeCloseTo(0.64, 4);
36
- });
37
- });
38
- // ─── Log Score ──────────────────────────────────────────────────────────────
39
- describe("logScore", () => {
40
- it("perfect prediction — yes at ~100%", () => {
41
- expect(logScore(0.999, "yes")).toBeCloseTo(0.001, 2);
42
- });
43
- it("perfect prediction — no at ~0%", () => {
44
- expect(logScore(0.001, "no")).toBeCloseTo(0.001, 2);
45
- });
46
- it("worst prediction — yes at ~0%", () => {
47
- // -log(0.001) ≈ 6.9
48
- expect(logScore(0.001, "yes")).toBeCloseTo(6.908, 1);
49
- });
50
- it("coin flip — 50% on yes", () => {
51
- // -log(0.5) ≈ 0.693
52
- expect(logScore(0.5, "yes")).toBeCloseTo(0.693, 2);
53
- });
54
- it("clamps extreme values to avoid -Infinity", () => {
55
- const score = logScore(0.0, "yes");
56
- expect(Number.isFinite(score)).toBe(true);
57
- expect(score).toBeGreaterThan(0);
58
- });
59
- it("clamps 1.0 for no outcome", () => {
60
- const score = logScore(1.0, "no");
61
- expect(Number.isFinite(score)).toBe(true);
62
- expect(score).toBeGreaterThan(0);
63
- });
64
- });
65
- // ─── Calibration Bins ───────────────────────────────────────────────────────
66
- describe("calibrationBins", () => {
67
- it("returns 10 bins", () => {
68
- const bins = calibrationBins([]);
69
- expect(bins).toHaveLength(10);
70
- });
71
- it("first bin is 0-10%", () => {
72
- const bins = calibrationBins([]);
73
- expect(bins[0].binLabel).toBe("0-10%");
74
- expect(bins[0].predictedProb).toBe(0.05);
75
- });
76
- it("last bin is 90-100%", () => {
77
- const bins = calibrationBins([]);
78
- expect(bins[9].binLabel).toBe("90-100%");
79
- expect(bins[9].predictedProb).toBe(0.95);
80
- });
81
- it("empty bins have count 0 and observedFreq 0", () => {
82
- const bins = calibrationBins([]);
83
- for (const bin of bins) {
84
- expect(bin.count).toBe(0);
85
- expect(bin.observedFreq).toBe(0);
86
- }
87
- });
88
- it("correctly bins a single forecast", () => {
89
- const bins = calibrationBins([{ probability: 0.75, outcome: "yes" }]);
90
- const bin70 = bins[7]; // 70-80%
91
- expect(bin70.count).toBe(1);
92
- expect(bin70.observedFreq).toBe(1); // 1/1 = yes
93
- });
94
- it("correctly computes observed frequency", () => {
95
- const forecasts = [
96
- { probability: 0.55, outcome: "yes" },
97
- { probability: 0.52, outcome: "no" },
98
- { probability: 0.58, outcome: "yes" },
99
- { probability: 0.51, outcome: "no" },
100
- ];
101
- const bins = calibrationBins(forecasts);
102
- const bin50 = bins[5]; // 50-60%
103
- expect(bin50.count).toBe(4);
104
- expect(bin50.observedFreq).toBe(0.5); // 2/4
105
- });
106
- it("boundary value 1.0 goes in 90-100% bin", () => {
107
- const bins = calibrationBins([{ probability: 1.0, outcome: "yes" }]);
108
- expect(bins[9].count).toBe(1);
109
- });
110
- it("boundary value 0.0 goes in 0-10% bin", () => {
111
- const bins = calibrationBins([{ probability: 0.0, outcome: "no" }]);
112
- expect(bins[0].count).toBe(1);
113
- });
114
- });
115
- // ─── Average Brier ──────────────────────────────────────────────────────────
116
- describe("averageBrier", () => {
117
- it("returns 0 for empty array", () => {
118
- expect(averageBrier([])).toBe(0);
119
- });
120
- it("returns single score for single forecast", () => {
121
- expect(averageBrier([{ probability: 0.7, outcome: "yes" }])).toBeCloseTo(0.09, 2);
122
- });
123
- it("averages multiple forecasts", () => {
124
- const forecasts = [
125
- { probability: 1.0, outcome: "yes" }, // 0
126
- { probability: 0.0, outcome: "yes" }, // 1
127
- ];
128
- expect(averageBrier(forecasts)).toBe(0.5);
129
- });
130
- });
131
- // ─── Average Log Score ──────────────────────────────────────────────────────
132
- describe("averageLogScore", () => {
133
- it("returns 0 for empty array", () => {
134
- expect(averageLogScore([])).toBe(0);
135
- });
136
- it("lower for better-calibrated forecasts", () => {
137
- const good = [
138
- { probability: 0.9, outcome: "yes" },
139
- { probability: 0.1, outcome: "no" },
140
- ];
141
- const bad = [
142
- { probability: 0.1, outcome: "yes" },
143
- { probability: 0.9, outcome: "no" },
144
- ];
145
- expect(averageLogScore(good)).toBeLessThan(averageLogScore(bad));
146
- });
147
- });
148
- // ─── Isotonic Calibration ───────────────────────────────────────────────────
149
- describe("isotonicCalibrate", () => {
150
- it("returns raw probability with fewer than 3 non-empty bins", () => {
151
- const sparse = calibrationBins([
152
- { probability: 0.15, outcome: "yes" },
153
- { probability: 0.85, outcome: "no" },
154
- ]);
155
- expect(isotonicCalibrate(0.5, sparse)).toBe(0.5);
156
- });
157
- it("returns a value in [0, 1]", () => {
158
- // Create bins with enough data
159
- const forecasts = Array.from({ length: 50 }, (_, i) => ({
160
- probability: (i + 0.5) / 50,
161
- outcome: (Math.random() > 0.5 ? "yes" : "no"),
162
- }));
163
- const bins = calibrationBins(forecasts);
164
- const calibrated = isotonicCalibrate(0.7, bins);
165
- expect(calibrated).toBeGreaterThanOrEqual(0);
166
- expect(calibrated).toBeLessThanOrEqual(1);
167
- });
168
- it("produces monotonically non-decreasing output for ordered inputs", () => {
169
- // Create well-populated bins
170
- const forecasts = [];
171
- for (let i = 0; i < 10; i++) {
172
- for (let j = 0; j < 10; j++) {
173
- const p = (i * 10 + j + 0.5) / 100;
174
- // Roughly calibrated: higher p → more "yes"
175
- const outcome = Math.random() < p ? "yes" : "no";
176
- forecasts.push({ probability: p, outcome: outcome });
177
- }
178
- }
179
- const bins = calibrationBins(forecasts);
180
- const inputs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
181
- const outputs = inputs.map((p) => isotonicCalibrate(p, bins));
182
- for (let i = 0; i < outputs.length - 1; i++) {
183
- expect(outputs[i]).toBeLessThanOrEqual(outputs[i + 1] + 0.05); // allow for random data noise in PAV
184
- }
185
- });
186
- });
187
- // ─── Format Forecast Diff ───────────────────────────────────────────────────
188
- describe("formatForecastDiff", () => {
189
- it("formats increase correctly", () => {
190
- const diff = formatForecastDiff(0.35, 0.55, "New evidence from Fed minutes");
191
- expect(diff).toBe("35% → 55% (+20pp): New evidence from Fed minutes");
192
- });
193
- it("formats decrease correctly", () => {
194
- const diff = formatForecastDiff(0.8, 0.6, "CEO resignation");
195
- expect(diff).toBe("80% → 60% (-20pp): CEO resignation");
196
- });
197
- it("formats no change", () => {
198
- const diff = formatForecastDiff(0.5, 0.5, "No new evidence");
199
- expect(diff).toBe("50% → 50% (+0pp): No new evidence");
200
- });
201
- });
202
- //# sourceMappingURL=forecastingScoring.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"forecastingScoring.test.js","sourceRoot":"","sources":["../../src/__tests__/forecastingScoring.test.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,UAAU,EACV,QAAQ,EACR,eAAe,EACf,YAAY,EACZ,eAAe,EACf,iBAAiB,EACjB,kBAAkB,GACnB,MAAM,sDAAsD,CAAC;AAE9D,+EAA+E;AAE/E,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAChC,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;IACxB,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,oBAAoB;QACpB,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAChC,oBAAoB;QACpB,MAAM,CAAC,QAAQ,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,iBAAiB,EAAE,GAAG,EAAE;QACzB,MAAM,IAAI,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,IAAI,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,IAAI,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,IAAI,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,IAAI,GAAG,eAAe,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;QACtE,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS;QAChC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,SAAS,GAAG;YAChB,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAc,EAAE;YAC9C,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,IAAa,EAAE;YAC7C,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAc,EAAE;YAC9C,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,IAAa,EAAE;SAC9C,CAAC;QACF,MAAM,IAAI,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS;QAChC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,IAAI,GAAG,eAAe,CAAC,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;QACrE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,IAAI,GAAG,eAAe,CAAC,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACpE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,CAAC,YAAY,CAAC,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CACtE,IAAI,EACJ,CAAC,CACF,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,SAAS,GAAG;YAChB,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAc,EAAE,EAAE,IAAI;YACnD,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAc,EAAE,EAAE,IAAI;SACpD,CAAC;QACF,MAAM,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,IAAI,GAAG;YACX,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAc,EAAE;YAC7C,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAa,EAAE;SAC7C,CAAC;QACF,MAAM,GAAG,GAAG;YACV,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,KAAc,EAAE;YAC7C,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,EAAE,IAAa,EAAE;SAC7C,CAAC;QACF,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,YAAY,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,EAAE,CAAC,0DAA0D,EAAE,GAAG,EAAE;QAClE,MAAM,MAAM,GAAG,eAAe,CAAC;YAC7B,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;YACrC,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE;SACrC,CAAC,CAAC;QACH,MAAM,CAAC,iBAAiB,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,+BAA+B;QAC/B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACtD,WAAW,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,EAAE;YAC3B,OAAO,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAiB;SAC9D,CAAC,CAAC,CAAC;QACJ,MAAM,IAAI,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,iBAAiB,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,UAAU,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,CAAC,UAAU,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iEAAiE,EAAE,GAAG,EAAE;QACzE,6BAA6B;QAC7B,MAAM,SAAS,GAA0D,EAAE,CAAC;QAC5E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;gBACnC,4CAA4C;gBAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;gBACjD,SAAS,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,OAAO,EAAE,OAAuB,EAAE,CAAC,CAAC;YACvE,CAAC;QACH,CAAC;QACD,MAAM,IAAI,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;QAExC,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;QAE9D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,qCAAqC;QACtG,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,+EAA+E;AAE/E,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,IAAI,GAAG,kBAAkB,CAAC,IAAI,EAAE,IAAI,EAAE,+BAA+B,CAAC,CAAC;QAC7E,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CACf,kDAAkD,CACnD,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,IAAI,GAAG,kBAAkB,CAAC,GAAG,EAAE,GAAG,EAAE,iBAAiB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,IAAI,GAAG,kBAAkB,CAAC,GAAG,EAAE,GAAG,EAAE,iBAAiB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,15 +0,0 @@
1
- /**
2
- * GAIA audio-backed capability/accuracy benchmark: LLM-only vs LLM+NodeBench MCP local audio tools.
3
- *
4
- * This lane targets GAIA tasks that include audio attachments (MP3/WAV/etc).
5
- * We provide deterministic local transcription via NodeBench MCP tools and score answers against
6
- * the ground-truth "Final answer" (stored locally under `.cache/gaia`, gitignored).
7
- *
8
- * Safety:
9
- * - GAIA is gated. Do not commit fixtures that contain prompts/answers.
10
- * - This test logs only task IDs and aggregate metrics (no prompt/answer text).
11
- *
12
- * Disabled by default (cost + rate limits). Run with:
13
- * NODEBENCH_RUN_GAIA_CAPABILITY=1 npm --prefix packages/mcp-local run test
14
- */
15
- export {};