nodebench-mcp 2.70.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. package/README.md +95 -41
  2. package/dist/agents/alertRouter.d.ts +38 -0
  3. package/dist/agents/alertRouter.js +151 -0
  4. package/dist/agents/alertRouter.js.map +1 -0
  5. package/dist/agents/entityMemory.d.ts +40 -0
  6. package/dist/agents/entityMemory.js +64 -0
  7. package/dist/agents/entityMemory.js.map +1 -0
  8. package/dist/agents/subAgents.d.ts +35 -0
  9. package/dist/agents/subAgents.js +62 -0
  10. package/dist/agents/subAgents.js.map +1 -0
  11. package/dist/benchmarks/benchmarkRunner.js +14 -0
  12. package/dist/benchmarks/benchmarkRunner.js.map +1 -1
  13. package/dist/benchmarks/chainEval.js +107 -0
  14. package/dist/benchmarks/chainEval.js.map +1 -1
  15. package/dist/benchmarks/llmJudgeEval.js +85 -0
  16. package/dist/benchmarks/llmJudgeEval.js.map +1 -1
  17. package/dist/benchmarks/searchQualityEval.js +118 -5
  18. package/dist/benchmarks/searchQualityEval.js.map +1 -1
  19. package/dist/cli/search.d.ts +13 -0
  20. package/dist/cli/search.js +130 -0
  21. package/dist/cli/search.js.map +1 -0
  22. package/dist/dashboard/operatingDashboardHtml.js +2 -1
  23. package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
  24. package/dist/dashboard/operatingServer.js +3 -2
  25. package/dist/dashboard/operatingServer.js.map +1 -1
  26. package/dist/db.d.ts +6 -2
  27. package/dist/db.js +521 -6
  28. package/dist/db.js.map +1 -1
  29. package/dist/index.js +349 -67
  30. package/dist/index.js.map +1 -1
  31. package/dist/packageInfo.d.ts +3 -0
  32. package/dist/packageInfo.js +32 -0
  33. package/dist/packageInfo.js.map +1 -0
  34. package/dist/profiler/behaviorStore.d.ts +97 -0
  35. package/dist/profiler/behaviorStore.js +276 -0
  36. package/dist/profiler/behaviorStore.js.map +1 -0
  37. package/dist/profiler/eventCollector.d.ts +119 -0
  38. package/dist/profiler/eventCollector.js +267 -0
  39. package/dist/profiler/eventCollector.js.map +1 -0
  40. package/dist/profiler/index.d.ts +15 -0
  41. package/dist/profiler/index.js +16 -0
  42. package/dist/profiler/index.js.map +1 -0
  43. package/dist/profiler/mcpProxy.d.ts +49 -0
  44. package/dist/profiler/mcpProxy.js +123 -0
  45. package/dist/profiler/mcpProxy.js.map +1 -0
  46. package/dist/profiler/modelRouter.d.ts +30 -0
  47. package/dist/profiler/modelRouter.js +99 -0
  48. package/dist/profiler/modelRouter.js.map +1 -0
  49. package/dist/profiler/otelReceiver.d.ts +17 -0
  50. package/dist/profiler/otelReceiver.js +62 -0
  51. package/dist/profiler/otelReceiver.js.map +1 -0
  52. package/dist/profiler/proofEngine.d.ts +41 -0
  53. package/dist/profiler/proofEngine.js +93 -0
  54. package/dist/profiler/proofEngine.js.map +1 -0
  55. package/dist/profiler/workflowTemplates.d.ts +41 -0
  56. package/dist/profiler/workflowTemplates.js +95 -0
  57. package/dist/profiler/workflowTemplates.js.map +1 -0
  58. package/dist/providers/localMemoryProvider.js +3 -2
  59. package/dist/providers/localMemoryProvider.js.map +1 -1
  60. package/dist/runtimeConfig.d.ts +11 -0
  61. package/dist/runtimeConfig.js +27 -0
  62. package/dist/runtimeConfig.js.map +1 -0
  63. package/dist/sandboxApi.js +2 -1
  64. package/dist/sandboxApi.js.map +1 -1
  65. package/dist/security/auditLog.js +8 -3
  66. package/dist/security/auditLog.js.map +1 -1
  67. package/dist/subconscious/blocks.d.ts +43 -0
  68. package/dist/subconscious/blocks.js +158 -0
  69. package/dist/subconscious/blocks.js.map +1 -0
  70. package/dist/subconscious/classifier.d.ts +22 -0
  71. package/dist/subconscious/classifier.js +118 -0
  72. package/dist/subconscious/classifier.js.map +1 -0
  73. package/dist/subconscious/graphEngine.d.ts +65 -0
  74. package/dist/subconscious/graphEngine.js +234 -0
  75. package/dist/subconscious/graphEngine.js.map +1 -0
  76. package/dist/subconscious/index.d.ts +19 -0
  77. package/dist/subconscious/index.js +20 -0
  78. package/dist/subconscious/index.js.map +1 -0
  79. package/dist/subconscious/tools.d.ts +5 -0
  80. package/dist/subconscious/tools.js +255 -0
  81. package/dist/subconscious/tools.js.map +1 -0
  82. package/dist/subconscious/whisperPolicy.d.ts +20 -0
  83. package/dist/subconscious/whisperPolicy.js +171 -0
  84. package/dist/subconscious/whisperPolicy.js.map +1 -0
  85. package/dist/sweep/engine.d.ts +27 -0
  86. package/dist/sweep/engine.js +244 -0
  87. package/dist/sweep/engine.js.map +1 -0
  88. package/dist/sweep/index.d.ts +9 -0
  89. package/dist/sweep/index.js +8 -0
  90. package/dist/sweep/index.js.map +1 -0
  91. package/dist/sweep/sources/github_trending.d.ts +6 -0
  92. package/dist/sweep/sources/github_trending.js +37 -0
  93. package/dist/sweep/sources/github_trending.js.map +1 -0
  94. package/dist/sweep/sources/hackernews.d.ts +7 -0
  95. package/dist/sweep/sources/hackernews.js +57 -0
  96. package/dist/sweep/sources/hackernews.js.map +1 -0
  97. package/dist/sweep/sources/openbb_finance.d.ts +9 -0
  98. package/dist/sweep/sources/openbb_finance.js +46 -0
  99. package/dist/sweep/sources/openbb_finance.js.map +1 -0
  100. package/dist/sweep/sources/producthunt.d.ts +6 -0
  101. package/dist/sweep/sources/producthunt.js +41 -0
  102. package/dist/sweep/sources/producthunt.js.map +1 -0
  103. package/dist/sweep/sources/web_signals.d.ts +7 -0
  104. package/dist/sweep/sources/web_signals.js +63 -0
  105. package/dist/sweep/sources/web_signals.js.map +1 -0
  106. package/dist/sweep/sources/yahoo_finance.d.ts +6 -0
  107. package/dist/sweep/sources/yahoo_finance.js +47 -0
  108. package/dist/sweep/sources/yahoo_finance.js.map +1 -0
  109. package/dist/sweep/types.d.ts +50 -0
  110. package/dist/sweep/types.js +9 -0
  111. package/dist/sweep/types.js.map +1 -0
  112. package/dist/sync/founderEpisodeStore.d.ts +98 -0
  113. package/dist/sync/founderEpisodeStore.js +230 -0
  114. package/dist/sync/founderEpisodeStore.js.map +1 -0
  115. package/dist/sync/hyperloopArchive.d.ts +51 -0
  116. package/dist/sync/hyperloopArchive.js +153 -0
  117. package/dist/sync/hyperloopArchive.js.map +1 -0
  118. package/dist/sync/hyperloopEval.d.ts +123 -0
  119. package/dist/sync/hyperloopEval.js +389 -0
  120. package/dist/sync/hyperloopEval.js.map +1 -0
  121. package/dist/sync/protocol.d.ts +172 -0
  122. package/dist/sync/protocol.js +9 -0
  123. package/dist/sync/protocol.js.map +1 -0
  124. package/dist/sync/sessionMemory.d.ts +47 -0
  125. package/dist/sync/sessionMemory.js +138 -0
  126. package/dist/sync/sessionMemory.js.map +1 -0
  127. package/dist/sync/store.d.ts +384 -0
  128. package/dist/sync/store.js +1435 -0
  129. package/dist/sync/store.js.map +1 -0
  130. package/dist/sync/syncBridgeClient.d.ts +30 -0
  131. package/dist/sync/syncBridgeClient.js +172 -0
  132. package/dist/sync/syncBridgeClient.js.map +1 -0
  133. package/dist/tools/autonomousDeliveryTools.d.ts +2 -0
  134. package/dist/tools/autonomousDeliveryTools.js +1104 -0
  135. package/dist/tools/autonomousDeliveryTools.js.map +1 -0
  136. package/dist/tools/boilerplateTools.js +10 -9
  137. package/dist/tools/boilerplateTools.js.map +1 -1
  138. package/dist/tools/claudeCodeIngestTools.d.ts +10 -0
  139. package/dist/tools/claudeCodeIngestTools.js +347 -0
  140. package/dist/tools/claudeCodeIngestTools.js.map +1 -0
  141. package/dist/tools/coreWorkflowTools.d.ts +2 -0
  142. package/dist/tools/coreWorkflowTools.js +488 -0
  143. package/dist/tools/coreWorkflowTools.js.map +1 -0
  144. package/dist/tools/deltaTools.d.ts +15 -0
  145. package/dist/tools/deltaTools.js +1522 -0
  146. package/dist/tools/deltaTools.js.map +1 -0
  147. package/dist/tools/documentationTools.js +2 -1
  148. package/dist/tools/documentationTools.js.map +1 -1
  149. package/dist/tools/entityLookupTools.d.ts +14 -0
  150. package/dist/tools/entityLookupTools.js +159 -0
  151. package/dist/tools/entityLookupTools.js.map +1 -0
  152. package/dist/tools/entityTemporalTools.d.ts +12 -0
  153. package/dist/tools/entityTemporalTools.js +330 -0
  154. package/dist/tools/entityTemporalTools.js.map +1 -0
  155. package/dist/tools/founderLocalPipeline.d.ts +215 -0
  156. package/dist/tools/founderLocalPipeline.js +1516 -2
  157. package/dist/tools/founderLocalPipeline.js.map +1 -1
  158. package/dist/tools/founderOperatingModel.d.ts +120 -0
  159. package/dist/tools/founderOperatingModel.js +469 -0
  160. package/dist/tools/founderOperatingModel.js.map +1 -0
  161. package/dist/tools/founderOperatingModelTools.d.ts +2 -0
  162. package/dist/tools/founderOperatingModelTools.js +169 -0
  163. package/dist/tools/founderOperatingModelTools.js.map +1 -0
  164. package/dist/tools/founderStrategicOpsTools.d.ts +2 -0
  165. package/dist/tools/founderStrategicOpsTools.js +1310 -0
  166. package/dist/tools/founderStrategicOpsTools.js.map +1 -0
  167. package/dist/tools/graphifyTools.d.ts +19 -0
  168. package/dist/tools/graphifyTools.js +375 -0
  169. package/dist/tools/graphifyTools.js.map +1 -0
  170. package/dist/tools/index.d.ts +3 -0
  171. package/dist/tools/index.js +4 -0
  172. package/dist/tools/index.js.map +1 -1
  173. package/dist/tools/monteCarloTools.d.ts +16 -0
  174. package/dist/tools/monteCarloTools.js +225 -0
  175. package/dist/tools/monteCarloTools.js.map +1 -0
  176. package/dist/tools/packetCompilerTools.d.ts +12 -0
  177. package/dist/tools/packetCompilerTools.js +322 -0
  178. package/dist/tools/packetCompilerTools.js.map +1 -0
  179. package/dist/tools/planSynthesisTools.d.ts +15 -0
  180. package/dist/tools/planSynthesisTools.js +455 -0
  181. package/dist/tools/planSynthesisTools.js.map +1 -0
  182. package/dist/tools/profilerTools.d.ts +20 -0
  183. package/dist/tools/profilerTools.js +364 -0
  184. package/dist/tools/profilerTools.js.map +1 -0
  185. package/dist/tools/progressiveDiscoveryTools.js +2 -1
  186. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  187. package/dist/tools/savingsTools.d.ts +11 -0
  188. package/dist/tools/savingsTools.js +155 -0
  189. package/dist/tools/savingsTools.js.map +1 -0
  190. package/dist/tools/scenarioCompilerTools.d.ts +14 -0
  191. package/dist/tools/scenarioCompilerTools.js +290 -0
  192. package/dist/tools/scenarioCompilerTools.js.map +1 -0
  193. package/dist/tools/sharedContextTools.d.ts +2 -0
  194. package/dist/tools/sharedContextTools.js +423 -0
  195. package/dist/tools/sharedContextTools.js.map +1 -0
  196. package/dist/tools/sitemapTools.d.ts +15 -0
  197. package/dist/tools/sitemapTools.js +560 -0
  198. package/dist/tools/sitemapTools.js.map +1 -0
  199. package/dist/tools/sweepTools.d.ts +9 -0
  200. package/dist/tools/sweepTools.js +112 -0
  201. package/dist/tools/sweepTools.js.map +1 -0
  202. package/dist/tools/syncBridgeTools.d.ts +2 -0
  203. package/dist/tools/syncBridgeTools.js +258 -0
  204. package/dist/tools/syncBridgeTools.js.map +1 -0
  205. package/dist/tools/toolRegistry.js +1223 -45
  206. package/dist/tools/toolRegistry.js.map +1 -1
  207. package/dist/tools/workspaceTools.d.ts +19 -0
  208. package/dist/tools/workspaceTools.js +762 -0
  209. package/dist/tools/workspaceTools.js.map +1 -0
  210. package/dist/toolsetRegistry.js +162 -3
  211. package/dist/toolsetRegistry.js.map +1 -1
  212. package/package.json +39 -38
  213. package/rules/nodebench-agentic-reliability.md +32 -0
  214. package/rules/nodebench-analyst-diagnostic.md +25 -0
  215. package/rules/nodebench-auto-qa.md +31 -0
  216. package/rules/nodebench-completion-traceability.md +22 -0
  217. package/rules/nodebench-flywheel-continuous.md +25 -0
  218. package/rules/nodebench-pre-release-review.md +24 -0
  219. package/rules/nodebench-qa-dogfood.md +26 -0
  220. package/rules/nodebench-scenario-testing.md +30 -0
  221. package/rules/nodebench-self-direction.md +23 -0
  222. package/rules/nodebench-self-judge-loop.md +24 -0
  223. package/scripts/install.sh +215 -0
  224. package/dist/__tests__/analytics.test.d.ts +0 -11
  225. package/dist/__tests__/analytics.test.js +0 -546
  226. package/dist/__tests__/analytics.test.js.map +0 -1
  227. package/dist/__tests__/architectComplex.test.d.ts +0 -1
  228. package/dist/__tests__/architectComplex.test.js +0 -373
  229. package/dist/__tests__/architectComplex.test.js.map +0 -1
  230. package/dist/__tests__/architectSmoke.test.d.ts +0 -1
  231. package/dist/__tests__/architectSmoke.test.js +0 -92
  232. package/dist/__tests__/architectSmoke.test.js.map +0 -1
  233. package/dist/__tests__/audit-registry.d.ts +0 -1
  234. package/dist/__tests__/audit-registry.js +0 -60
  235. package/dist/__tests__/audit-registry.js.map +0 -1
  236. package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
  237. package/dist/__tests__/batchAutopilot.test.js +0 -218
  238. package/dist/__tests__/batchAutopilot.test.js.map +0 -1
  239. package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
  240. package/dist/__tests__/cliSubcommands.test.js +0 -138
  241. package/dist/__tests__/cliSubcommands.test.js.map +0 -1
  242. package/dist/__tests__/comparativeBench.test.d.ts +0 -1
  243. package/dist/__tests__/comparativeBench.test.js +0 -722
  244. package/dist/__tests__/comparativeBench.test.js.map +0 -1
  245. package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
  246. package/dist/__tests__/critterCalibrationEval.js +0 -370
  247. package/dist/__tests__/critterCalibrationEval.js.map +0 -1
  248. package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
  249. package/dist/__tests__/dynamicLoading.test.js +0 -280
  250. package/dist/__tests__/dynamicLoading.test.js.map +0 -1
  251. package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
  252. package/dist/__tests__/embeddingProvider.test.js +0 -86
  253. package/dist/__tests__/embeddingProvider.test.js.map +0 -1
  254. package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
  255. package/dist/__tests__/evalDatasetBench.test.js +0 -738
  256. package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
  257. package/dist/__tests__/evalHarness.test.d.ts +0 -1
  258. package/dist/__tests__/evalHarness.test.js +0 -1107
  259. package/dist/__tests__/evalHarness.test.js.map +0 -1
  260. package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
  261. package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
  262. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
  263. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
  264. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
  265. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
  266. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
  267. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
  268. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
  269. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
  270. package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
  271. package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
  272. package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
  273. package/dist/__tests__/forecastingDogfood.test.js +0 -284
  274. package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
  275. package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
  276. package/dist/__tests__/forecastingScoring.test.js +0 -202
  277. package/dist/__tests__/forecastingScoring.test.js.map +0 -1
  278. package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
  279. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
  280. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
  281. package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
  282. package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
  283. package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
  284. package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
  285. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
  286. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
  287. package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
  288. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
  289. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
  290. package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
  291. package/dist/__tests__/helpers/answerMatch.js +0 -267
  292. package/dist/__tests__/helpers/answerMatch.js.map +0 -1
  293. package/dist/__tests__/helpers/textLlm.d.ts +0 -25
  294. package/dist/__tests__/helpers/textLlm.js +0 -214
  295. package/dist/__tests__/helpers/textLlm.js.map +0 -1
  296. package/dist/__tests__/localDashboard.test.d.ts +0 -1
  297. package/dist/__tests__/localDashboard.test.js +0 -226
  298. package/dist/__tests__/localDashboard.test.js.map +0 -1
  299. package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
  300. package/dist/__tests__/multiHopDogfood.test.js +0 -303
  301. package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
  302. package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
  303. package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
  304. package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
  305. package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
  306. package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
  307. package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
  308. package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
  309. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
  310. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
  311. package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
  312. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
  313. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
  314. package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
  315. package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
  316. package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
  317. package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
  318. package/dist/__tests__/openclawDogfood.test.js +0 -535
  319. package/dist/__tests__/openclawDogfood.test.js.map +0 -1
  320. package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
  321. package/dist/__tests__/openclawMessaging.test.js +0 -232
  322. package/dist/__tests__/openclawMessaging.test.js.map +0 -1
  323. package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
  324. package/dist/__tests__/presetRealWorldBench.test.js +0 -859
  325. package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
  326. package/dist/__tests__/tools.test.d.ts +0 -1
  327. package/dist/__tests__/tools.test.js +0 -3201
  328. package/dist/__tests__/tools.test.js.map +0 -1
  329. package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
  330. package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
  331. package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
  332. package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
  333. package/dist/__tests__/traceabilityDogfood.test.js +0 -241
  334. package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
  335. package/dist/__tests__/webmcpTools.test.d.ts +0 -7
  336. package/dist/__tests__/webmcpTools.test.js +0 -195
  337. package/dist/__tests__/webmcpTools.test.js.map +0 -1
  338. package/dist/benchmarks/testProviderBus.d.ts +0 -7
  339. package/dist/benchmarks/testProviderBus.js +0 -272
  340. package/dist/benchmarks/testProviderBus.js.map +0 -1
  341. package/dist/hooks/postCompaction.d.ts +0 -14
  342. package/dist/hooks/postCompaction.js +0 -51
  343. package/dist/hooks/postCompaction.js.map +0 -1
  344. package/dist/security/__tests__/security.test.d.ts +0 -8
  345. package/dist/security/__tests__/security.test.js +0 -295
  346. package/dist/security/__tests__/security.test.js.map +0 -1
  347. package/dist/tools/documentTools.d.ts +0 -5
  348. package/dist/tools/documentTools.js +0 -524
  349. package/dist/tools/documentTools.js.map +0 -1
  350. package/dist/tools/financialTools.d.ts +0 -10
  351. package/dist/tools/financialTools.js +0 -403
  352. package/dist/tools/financialTools.js.map +0 -1
  353. package/dist/tools/memoryTools.d.ts +0 -5
  354. package/dist/tools/memoryTools.js +0 -137
  355. package/dist/tools/memoryTools.js.map +0 -1
  356. package/dist/tools/planningTools.d.ts +0 -5
  357. package/dist/tools/planningTools.js +0 -147
  358. package/dist/tools/planningTools.js.map +0 -1
  359. package/dist/tools/searchTools.d.ts +0 -5
  360. package/dist/tools/searchTools.js +0 -145
  361. package/dist/tools/searchTools.js.map +0 -1
@@ -1,265 +0,0 @@
1
- /**
2
- * GAIA audio-backed capability/accuracy benchmark: LLM-only vs LLM+NodeBench MCP local audio tools.
3
- *
4
- * This lane targets GAIA tasks that include audio attachments (MP3/WAV/etc).
5
- * We provide deterministic local transcription via NodeBench MCP tools and score answers against
6
- * the ground-truth "Final answer" (stored locally under `.cache/gaia`, gitignored).
7
- *
8
- * Safety:
9
- * - GAIA is gated. Do not commit fixtures that contain prompts/answers.
10
- * - This test logs only task IDs and aggregate metrics (no prompt/answer text).
11
- *
12
- * Disabled by default (cost + rate limits). Run with:
13
- * NODEBENCH_RUN_GAIA_CAPABILITY=1 npm --prefix packages/mcp-local run test
14
- */
15
- import { describe, expect, it } from "vitest";
16
- import { existsSync, readFileSync } from "node:fs";
17
- import { mkdir, readFile, writeFile } from "node:fs/promises";
18
- import path from "node:path";
19
- import { fileURLToPath } from "node:url";
20
- import { performance } from "node:perf_hooks";
21
- import { localFileTools } from "../tools/localFileTools.js";
22
- import { createTextLlmClient, generateTextFromHistory, } from "./helpers/textLlm.js";
23
- import { answersMatchWithJudge, autoDiscoverJudge } from "./helpers/answerMatch.js";
24
- const shouldRun = process.env.NODEBENCH_RUN_GAIA_CAPABILITY === "1";
25
- const shouldWriteReport = process.env.NODEBENCH_WRITE_GAIA_REPORT === "1";
26
- async function safeWriteJson(filePath, payload) {
27
- try {
28
- await mkdir(path.dirname(filePath), { recursive: true });
29
- await writeFile(filePath, JSON.stringify(payload, null, 2) + "\n", "utf8");
30
- }
31
- catch (err) {
32
- console.warn(`[gaia-capability-audio] report write failed: ${err?.message ?? String(err)}`);
33
- }
34
- }
35
- function resolveRepoRoot() {
36
- const testDir = path.dirname(fileURLToPath(import.meta.url));
37
- return path.resolve(testDir, "../../../..");
38
- }
39
- function resolveCapabilityAudioFixturePath() {
40
- const override = process.env.NODEBENCH_GAIA_CAPABILITY_AUDIO_FIXTURE_PATH;
41
- if (override) {
42
- if (path.isAbsolute(override))
43
- return override;
44
- const repoRoot = resolveRepoRoot();
45
- return path.resolve(repoRoot, override);
46
- }
47
- const config = process.env.NODEBENCH_GAIA_CAPABILITY_CONFIG ?? "2023_all";
48
- const split = process.env.NODEBENCH_GAIA_CAPABILITY_SPLIT ?? "validation";
49
- const repoRoot = resolveRepoRoot();
50
- return path.join(repoRoot, ".cache", "gaia", `gaia_capability_audio_${config}_${split}.sample.json`);
51
- }
52
- function loadDotEnvLocalIfPresent() {
53
- const repoRoot = resolveRepoRoot();
54
- const envPath = path.join(repoRoot, ".env.local");
55
- if (!existsSync(envPath))
56
- return;
57
- const text = readFileSync(envPath, "utf8");
58
- for (const rawLine of text.split(/\r?\n/)) {
59
- const line = rawLine.trim();
60
- if (!line || line.startsWith("#"))
61
- continue;
62
- const idx = line.indexOf("=");
63
- if (idx <= 0)
64
- continue;
65
- const key = line.slice(0, idx).trim();
66
- let value = line.slice(idx + 1).trim();
67
- if ((value.startsWith("\"") && value.endsWith("\"")) ||
68
- (value.startsWith("'") && value.endsWith("'"))) {
69
- value = value.slice(1, -1);
70
- }
71
- if (!process.env[key])
72
- process.env[key] = value;
73
- }
74
- }
75
- async function llmGenerateText(llm, history) {
76
- const temperature = Number.parseFloat(process.env.NODEBENCH_GAIA_CAPABILITY_TEMPERATURE ?? "0");
77
- return generateTextFromHistory(llm, history, {
78
- temperature: Number.isFinite(temperature) ? temperature : 0,
79
- maxOutputTokens: 1024,
80
- });
81
- }
82
- async function baselineAnswer(llm, task) {
83
- const contents = [
84
- {
85
- role: "user",
86
- parts: [
87
- {
88
- text: `Answer the question using your existing knowledge only. Do not browse the web.\n\nReturn ONLY the final answer, no explanation.\n\nQuestion:\n${task.prompt}`,
89
- },
90
- ],
91
- },
92
- ];
93
- return llmGenerateText(llm, contents);
94
- }
95
- async function loadFixture(filePath) {
96
- const raw = await readFile(filePath, "utf8");
97
- const json = JSON.parse(raw);
98
- return json;
99
- }
100
- function createToolIndex(tools) {
101
- const m = new Map();
102
- for (const t of tools)
103
- m.set(t.name, t);
104
- return m;
105
- }
106
- async function toolAugmentedAnswerFromAudio(llm, task, opts) {
107
- const localPath = String(task.localFilePath ?? "").trim();
108
- if (!localPath)
109
- throw new Error("Task missing localFilePath");
110
- const toolIndex = createToolIndex(localFileTools);
111
- const tool = toolIndex.get("transcribe_audio_file");
112
- if (!tool)
113
- throw new Error("Missing tool: transcribe_audio_file");
114
- if (opts.maxToolCalls < 1) {
115
- throw new Error("maxToolCalls must be >= 1 to run audio lane");
116
- }
117
- const transcript = (await tool.handler({
118
- path: localPath,
119
- model: process.env.NODEBENCH_AUDIO_MODEL ?? "tiny.en",
120
- maxChars: 20000,
121
- timeoutMs: 300000,
122
- }));
123
- const transcriptText = String(transcript?.text ?? "").trim();
124
- if (!transcriptText) {
125
- throw new Error("Empty transcript from transcribe_audio_file");
126
- }
127
- const contents = [
128
- {
129
- role: "user",
130
- parts: [
131
- {
132
- text: `You are given a transcript of an attached audio file. Use it to answer the question.\n\nRules:\n- Do not browse the web.\n- Return ONLY the final answer, no explanation.\n\nQuestion:\n${task.prompt}\n\nAudio transcript:\n${transcriptText}`,
133
- },
134
- ],
135
- },
136
- ];
137
- const answer = await llmGenerateText(llm, contents);
138
- return { answer, toolCalls: 1 };
139
- }
140
- describe("GAIA capability: audio lane", () => {
141
- const testFn = shouldRun ? it : it.skip;
142
- testFn("should measure accuracy delta on a small GAIA audio subset", async () => {
143
- loadDotEnvLocalIfPresent();
144
- const fixturePath = resolveCapabilityAudioFixturePath();
145
- if (!existsSync(fixturePath)) {
146
- throw new Error(`Missing GAIA audio fixture at ${fixturePath}. Generate it with: python packages/mcp-local/src/__tests__/fixtures/generateGaiaCapabilityAudioFixture.py`);
147
- }
148
- const baselineModel = process.env.NODEBENCH_GAIA_BASELINE_MODEL ?? "gemini-3-flash-preview";
149
- const toolsModel = process.env.NODEBENCH_GAIA_TOOLS_MODEL ?? baselineModel;
150
- const baselineLlm = await createTextLlmClient({ model: baselineModel });
151
- const toolsLlm = await createTextLlmClient({ model: toolsModel });
152
- const baselineModelLabel = `${baselineLlm.provider}:${baselineLlm.model}`;
153
- const toolsModelLabel = `${toolsLlm.provider}:${toolsLlm.model}`;
154
- const fixture = await loadFixture(fixturePath);
155
- expect(Array.isArray(fixture.tasks)).toBe(true);
156
- expect(fixture.tasks.length).toBeGreaterThan(0);
157
- const requestedLimit = Number.parseInt(process.env.NODEBENCH_GAIA_CAPABILITY_TASK_LIMIT ?? "4", 10);
158
- const taskLimit = Math.max(1, Math.min(fixture.tasks.length, Number.isFinite(requestedLimit) ? requestedLimit : 4));
159
- const tasks = fixture.tasks.slice(0, taskLimit);
160
- const requestedConcurrency = Number.parseInt(process.env.NODEBENCH_GAIA_CAPABILITY_CONCURRENCY ?? "1", 10);
161
- const concurrency = Math.max(1, Math.min(tasks.length, Number.isFinite(requestedConcurrency) ? requestedConcurrency : 1));
162
- const maxToolCalls = Number.parseInt(process.env.NODEBENCH_GAIA_CAPABILITY_MAX_TOOL_CALLS ?? "1", 10);
163
- // Auto-discover judge (free OpenRouter → paid LLM → deterministic-only)
164
- const judge = await autoDiscoverJudge(toolsLlm);
165
- const results = new Array(tasks.length);
166
- let nextIndex = 0;
167
- const workers = Array.from({ length: concurrency }, () => (async () => {
168
- while (true) {
169
- const idx = nextIndex++;
170
- if (idx >= tasks.length)
171
- return;
172
- const task = tasks[idx];
173
- try {
174
- const baseStart = performance.now();
175
- const base = await baselineAnswer(baselineLlm, task);
176
- const baseMs = performance.now() - baseStart;
177
- const toolsStart = performance.now();
178
- const tools = await toolAugmentedAnswerFromAudio(toolsLlm, task, { maxToolCalls });
179
- const toolsMs = performance.now() - toolsStart;
180
- const baseJudge = await answersMatchWithJudge(task.expectedAnswer, base, judge);
181
- const toolsJudge = await answersMatchWithJudge(task.expectedAnswer, tools.answer, judge);
182
- results[idx] = {
183
- taskId: task.id,
184
- baselineCorrect: baseJudge.match,
185
- toolsCorrect: toolsJudge.match,
186
- baselineMs: baseMs,
187
- toolsMs,
188
- toolCalls: tools.toolCalls,
189
- judgeProvider: toolsJudge.judgeProvider,
190
- judgeInvoked: toolsJudge.judgeInvoked,
191
- };
192
- }
193
- catch (err) {
194
- results[idx] = {
195
- taskId: task.id,
196
- baselineCorrect: false,
197
- toolsCorrect: false,
198
- baselineMs: 0,
199
- toolsMs: 0,
200
- toolCalls: 0,
201
- error: err?.message ?? String(err),
202
- };
203
- }
204
- }
205
- })());
206
- await Promise.all(workers);
207
- const baselineCorrect = results.filter((r) => r.baselineCorrect).length;
208
- const toolsCorrect = results.filter((r) => r.toolsCorrect).length;
209
- const baselinePassRate = (baselineCorrect / results.length) * 100;
210
- const toolsPassRate = (toolsCorrect / results.length) * 100;
211
- const avgBaseMs = results.reduce((sum, r) => sum + r.baselineMs, 0) / results.length;
212
- const avgToolsMs = results.reduce((sum, r) => sum + r.toolsMs, 0) / results.length;
213
- const avgToolCalls = results.reduce((sum, r) => sum + r.toolCalls, 0) / results.length;
214
- const improved = results.filter((r) => !r.baselineCorrect && r.toolsCorrect).length;
215
- const regressions = results.filter((r) => r.baselineCorrect && !r.toolsCorrect).length;
216
- console.log(`[gaia-capability-audio] tasks=${results.length} baseline=${baselineCorrect}/${results.length} (${baselinePassRate.toFixed(1)}%) tools=${toolsCorrect}/${results.length} (${toolsPassRate.toFixed(1)}%) delta=${(toolsPassRate - baselinePassRate).toFixed(1)}% improved=${improved} regressions=${regressions} avgToolCalls=${avgToolCalls.toFixed(2)}`);
217
- const toolsMode = (process.env.NODEBENCH_GAIA_CAPABILITY_TOOLS_MODE ?? "audio").toLowerCase();
218
- const publicSummary = {
219
- suiteId: "gaia_capability_audio",
220
- lane: "audio",
221
- generatedAtIso: new Date().toISOString(),
222
- config: fixture.config,
223
- split: fixture.split,
224
- taskCount: results.length,
225
- concurrency,
226
- baseline: {
227
- model: baselineModelLabel,
228
- correct: baselineCorrect,
229
- passRatePct: baselinePassRate,
230
- avgMs: avgBaseMs,
231
- },
232
- tools: {
233
- model: toolsModelLabel,
234
- mode: toolsMode,
235
- correct: toolsCorrect,
236
- passRatePct: toolsPassRate,
237
- avgMs: avgToolsMs,
238
- avgToolCalls,
239
- },
240
- improved,
241
- regressions,
242
- notes: "GAIA audio lane (audio attachments). No prompts/answers persisted; only aggregate metrics are written to public/evals.",
243
- };
244
- if (shouldWriteReport) {
245
- const repoRoot = resolveRepoRoot();
246
- await safeWriteJson(path.join(repoRoot, "public", "evals", "gaia_capability_audio_latest.json"), publicSummary);
247
- const detailed = {
248
- ...publicSummary,
249
- results: results.map((r) => ({
250
- taskId: r.taskId,
251
- baselineCorrect: r.baselineCorrect,
252
- toolsCorrect: r.toolsCorrect,
253
- baselineMs: Math.round(r.baselineMs),
254
- toolsMs: Math.round(r.toolsMs),
255
- toolCalls: r.toolCalls,
256
- ...(r.error ? { error: r.error } : {}),
257
- })),
258
- };
259
- const stamp = new Date().toISOString().replace(/[:.]/g, "-");
260
- await safeWriteJson(path.join(repoRoot, ".cache", "gaia", "reports", `gaia_capability_audio_${fixture.config}_${fixture.split}_${stamp}.json`), detailed);
261
- }
262
- expect(toolsPassRate).toBeGreaterThanOrEqual(baselinePassRate);
263
- }, 600000);
264
- });
265
- //# sourceMappingURL=gaiaCapabilityAudioEval.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"gaiaCapabilityAudioEval.test.js","sourceRoot":"","sources":["../../src/__tests__/gaiaCapabilityAudioEval.test.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAE9C,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAE5D,OAAO,EACL,mBAAmB,EACnB,uBAAuB,GAGxB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AA4CpF,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,6BAA6B,KAAK,GAAG,CAAC;AACpE,MAAM,iBAAiB,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA2B,KAAK,GAAG,CAAC;AAwB1E,KAAK,UAAU,aAAa,CAAC,QAAgB,EAAE,OAAgB;IAC7D,IAAI,CAAC;QACH,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,MAAM,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,MAAM,CAAC,CAAC;IAC7E,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO,CAAC,IAAI,CAAC,gDAAgD,GAAG,EAAE,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC9F,CAAC;AACH,CAAC;AAED,SAAS,eAAe;IACtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7D,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,iCAAiC;IACxC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,4CAA4C,CAAC;IAC1E,IAAI,QAAQ,EAAE,CAAC;QACb,IAAI,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;YAAE,OAAO,QAAQ,CAAC;QAC/C,MAAM,QAAQ,GAAG,eAAe,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC1C,CAAC;IAED,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,gCAAgC,IAAI,UAAU,CAAC;IAC1E,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,YAAY,CAAC;IAC1E,MAAM,QAAQ,GAAG,eAAe,EAAE,CAAC;IACnC,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,yBAAyB,MAAM,IAAI,KAAK,cAAc,CAAC,CAAC;AACvG,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,QAAQ,GAAG,eAAe,EAAE,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAClD,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO;IAEjC,MAAM,IAAI,GAAG,YAAY,CAAC,OAAO,EAAE,MAAM,CAAW,CAAC;IACrD,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAC5C,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,GAAG,IAAI,CAAC;YAAE,SAAS;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,IACE,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAChD,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAC9C,CAAC;YACD,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7B,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;IAClD,CAAC;AACH,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,GAAkB,EAAE,OAAgC;IACjF,MAAM,WAAW,GAAG,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,qCAAqC,IAAI,GAAG,CAAC,CAAC;IAChG,OAAO,uBAAuB,CAAC,GAAG,EAAE,OAAO,EAAE;QAC3C,WAAW,EAAE,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAC3D,eAAe,EAAE,IAAI;KACtB,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,GAAkB,EAAE,IAAoB;IACpE,MAAM,QAAQ,GAA4B;QACxC;YACE,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE;gBACL;oBACE,IAAI,EAAE,iJAAiJ,IAAI,CAAC,MAAM,EAAE;iBACrK;aACF;SACF;KACF,CAAC;IACF,OAAO,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;AACxC,CAAC;AAED,KAAK,UAAU,WAAW,CAAC,QAAgB;IACzC,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAsB,CAAC;IAClD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,eAAe,CAAC,KAAgB;IACvC,MAAM,CAAC,GAAG,IAAI,GAAG,EAAmB,CAAC;IACrC,KAAK,MAAM,CAAC,IAAI,KAAK;QAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACxC,OAAO,CAAC,CAAC;AACX,CAAC;AAED,KAAK,UAAU,4BAA4B,CACzC,GAAkB,EAClB,IAAoB,EACpB,IAA8B;IAE9B,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1D,IAAI,CAAC,SAAS;QAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAE9D,MAAM,SAAS,GAAG,eAAe,CAAC,cAAc,CAAC,CAAC;IAClD,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;IACpD,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;IAElE,IAAI,IAAI,CAAC,YAAY,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,UAAU,GAAG,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC;QACrC,IAAI,EAAE,SAAS;QACf,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,SAAS;QACrD,QAAQ,EAAE,KAAK;QACf,SAAS,EAAE,MAAM;KAClB,CAAC,CAAQ,CAAC;IAEX,MAAM,cAAc,GAAG,MAAM,CAAC,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7D,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,QAAQ,GAA4B;QACxC;YACE,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE;gBACL;oBACE,IAAI,EAAE,2LAA2L,IAAI,CAAC,MAAM,0BAA0B,cAAc,EAAE;iBACvP;aACF;SACF;KACF,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACpD,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;AAClC,CAAC;AAED,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;IAC3C,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC;IAExC,MAAM,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;QAC9E,wBAAwB,EAAE,CAAC;QAE3B,MAAM,WAAW,GAAG,iCAAiC,EAAE,CAAC;QACxD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CACb,iCAAiC,WAAW,4GAA4G,CACzJ,CAAC;QACJ,CAAC;QAED,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,6BAA6B,IAAI,wBAAwB,CAAC;QAC5F,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,aAAa,CAAC;QAC3E,MAAM,WAAW,GAAG,MAAM,mBAAmB,CAAC,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC,CAAC;QACxE,MAAM,QAAQ,GAAG,MAAM,mBAAmB,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;QAClE,MAAM,kBAAkB,GAAG,GAAG,WAAW,CAAC,QAAQ,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;QAC1E,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QAEjE,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,WAAW,CAAC,CAAC;QAC/C,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,cAAc,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,oCAAoC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QACpG,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CACrF,CAAC;QACF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAEhD,MAAM,oBAAoB,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,qCAAqC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QAC3G,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CACzF,CAAC;QAEF,MAAM,YAAY,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,wCAAwC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QAEtG,wEAAwE;QACxE,MAAM,KAAK,GAAG,MAAM,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAEhD,MAAM,OAAO,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACxD,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,CACvD,CAAC,KAAK,IAAI,EAAE;YACV,OAAO,IAAI,EAAE,CAAC;gBACZ,MAAM,GAAG,GAAG,SAAS,EAAE,CAAC;gBACxB,IAAI,GAAG,IAAI,KAAK,CAAC,MAAM;oBAAE,OAAO;gBAEhC,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;gBAExB,IAAI,CAAC;oBACH,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;oBACpC,MAAM,IAAI,GAAG,MAAM,cAAc,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;oBACrD,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;oBAE7C,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;oBACrC,MAAM,KAAK,GAAG,MAAM,4BAA4B,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,YAAY,EAAE,CAAC,CAAC;oBACnF,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC;oBAE/C,MAAM,SAAS,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;oBAChF,MAAM,UAAU,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;oBAEzF,OAAO,CAAC,GAAG,CAAC,GAAG;wBACb,MAAM,EAAE,IAAI,CAAC,EAAE;wBACf,eAAe,EAAE,SAAS,CAAC,KAAK;wBAChC,YAAY,EAAE,UAAU,CAAC,KAAK;wBAC9B,UAAU,EAAE,MAAM;wBAClB,OAAO;wBACP,SAAS,EAAE,KAAK,CAAC,SAAS;wBAC1B,aAAa,EAAE,UAAU,CAAC,aAAa;wBACvC,YAAY,EAAE,UAAU,CAAC,YAAY;qBACtC,CAAC;gBACJ,CAAC;gBAAC,OAAO,GAAQ,EAAE,CAAC;oBAClB,OAAO,CAAC,GAAG,CAAC,GAAG;wBACb,MAAM,EAAE,IAAI,CAAC,EAAE;wBACf,eAAe,EAAE,KAAK;wBACtB,YAAY,EAAE,KAAK;wBACnB,UAAU,EAAE,CAAC;wBACb,OAAO,EAAE,CAAC;wBACV,SAAS,EAAE,CAAC;wBACZ,KAAK,EAAE,GAAG,EAAE,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC;qBACnC,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC,CAAC,EAAE,CACL,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAE3B,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC;QACxE,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,MAAM,CAAC;QAClE,MAAM,gBAAgB,GAAG,CAAC,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;QAClE,MAAM,aAAa,GAAG,CAAC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;QAC5D,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QACrF,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QACnF,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QAEvF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,MAAM,CAAC;QACpF,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,MAAM,CAAC;QAEvF,OAAO,CAAC,GAAG,CACT,iCAAiC,OAAO,CAAC,MAAM,aAAa,eAAe,IAAI,OAAO,CAAC,MAAM,KAAK,gBAAgB,CAAC,OAAO,CACxH,CAAC,CACF,YAAY,YAAY,IAAI,OAAO,CAAC,MAAM,KAAK,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAClF,aAAa,GAAG,gBAAgB,CACjC,CAAC,OAAO,CAAC,CAAC,CAAC,cAAc,QAAQ,gBAAgB,WAAW,iBAAiB,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CACxG,CAAC;QAEF,MAAM,SAAS,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,oCAAoC,IAAI,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QAC9F,MAAM,aAAa,GAAqC;YACtD,OAAO,EAAE,uBAAuB;YAChC,IAAI,EAAE,OAAO;YACb,cAAc,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACxC,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,SAAS,EAAE,OAAO,CAAC,MAAM;YACzB,WAAW;YACX,QAAQ,EAAE;gBACR,KAAK,EAAE,kBAAkB;gBACzB,OAAO,EAAE,eAAe;gBACxB,WAAW,EAAE,gBAAgB;gBAC7B,KAAK,EAAE,SAAS;aACjB;YACD,KAAK,EAAE;gBACL,KAAK,EAAE,eAAe;gBACtB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,YAAY;gBACrB,WAAW,EAAE,aAAa;gBAC1B,KAAK,EAAE,UAAU;gBACjB,YAAY;aACb;YACD,QAAQ;YACR,WAAW;YACX,KAAK,EACH,wHAAwH;SAC3H,CAAC;QAEF,IAAI,iBAAiB,EAAE,CAAC;YACtB,MAAM,QAAQ,GAAG,eAAe,EAAE,CAAC;YACnC,MAAM,aAAa,CACjB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,mCAAmC,CAAC,EAC3E,aAAa,CACd,CAAC;YAEF,MAAM,QAAQ,GAAG;gBACf,GAAG,aAAa;gBAChB,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC3B,MAAM,EAAE,CAAC,CAAC,MAAM;oBAChB,eAAe,EAAE,CAAC,CAAC,eAAe;oBAClC,YAAY,EAAE,CAAC,CAAC,YAAY;oBAC5B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC;oBACpC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC;oBAC9B,SAAS,EAAE,CAAC,CAAC,SAAS;oBACtB,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBACvC,CAAC,CAAC;aACJ,CAAC;YACF,MAAM,KAAK,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAC7D,MAAM,aAAa,CACjB,IAAI,CAAC,IAAI,CACP,QAAQ,EACR,QAAQ,EACR,MAAM,EACN,SAAS,EACT,yBAAyB,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,KAAK,IAAI,KAAK,OAAO,CACzE,EACD,QAAQ,CACT,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,aAAa,CAAC,CAAC,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IACjE,CAAC,EAAE,MAAM,CAAC,CAAC;AACb,CAAC,CAAC,CAAC"}
@@ -1,14 +0,0 @@
1
- /**
2
- * GAIA capability/accuracy benchmark: LLM-only vs LLM+NodeBench MCP tools.
3
- *
4
- * This test attempts to solve a small GAIA subset and scores answers against
5
- * the ground-truth "Final answer" (stored locally under `.cache/gaia`, gitignored).
6
- *
7
- * Safety:
8
- * - GAIA is gated. Do not commit fixtures that contain prompts/answers.
9
- * - This test logs only task IDs and aggregate metrics (no prompt/answer text).
10
- *
11
- * Disabled by default (cost + rate limits + external network). Run with:
12
- * NODEBENCH_RUN_GAIA_CAPABILITY=1 npm --prefix packages/mcp-local run test
13
- */
14
- export {};