nodebench-mcp 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/NODEBENCH_AGENTS.md +74 -67
  2. package/README.md +36 -34
  3. package/dist/dashboard/operatingDashboardHtml.js +2 -1
  4. package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
  5. package/dist/dashboard/operatingServer.js +3 -2
  6. package/dist/dashboard/operatingServer.js.map +1 -1
  7. package/dist/db.js +51 -3
  8. package/dist/db.js.map +1 -1
  9. package/dist/index.js +19 -18
  10. package/dist/index.js.map +1 -1
  11. package/dist/packageInfo.d.ts +3 -0
  12. package/dist/packageInfo.js +32 -0
  13. package/dist/packageInfo.js.map +1 -0
  14. package/dist/sandboxApi.js +2 -1
  15. package/dist/sandboxApi.js.map +1 -1
  16. package/dist/tools/boilerplateTools.js +10 -9
  17. package/dist/tools/boilerplateTools.js.map +1 -1
  18. package/dist/tools/documentationTools.js +2 -1
  19. package/dist/tools/documentationTools.js.map +1 -1
  20. package/dist/tools/progressiveDiscoveryTools.js +2 -1
  21. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  22. package/dist/tools/toolRegistry.js +11 -0
  23. package/dist/tools/toolRegistry.js.map +1 -1
  24. package/dist/toolsetRegistry.js +74 -1
  25. package/dist/toolsetRegistry.js.map +1 -1
  26. package/package.json +7 -6
  27. package/scripts/install.sh +14 -14
  28. package/dist/__tests__/analytics.test.d.ts +0 -11
  29. package/dist/__tests__/analytics.test.js +0 -546
  30. package/dist/__tests__/analytics.test.js.map +0 -1
  31. package/dist/__tests__/architectComplex.test.d.ts +0 -1
  32. package/dist/__tests__/architectComplex.test.js +0 -373
  33. package/dist/__tests__/architectComplex.test.js.map +0 -1
  34. package/dist/__tests__/architectSmoke.test.d.ts +0 -1
  35. package/dist/__tests__/architectSmoke.test.js +0 -92
  36. package/dist/__tests__/architectSmoke.test.js.map +0 -1
  37. package/dist/__tests__/audit-registry.d.ts +0 -1
  38. package/dist/__tests__/audit-registry.js +0 -60
  39. package/dist/__tests__/audit-registry.js.map +0 -1
  40. package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
  41. package/dist/__tests__/batchAutopilot.test.js +0 -218
  42. package/dist/__tests__/batchAutopilot.test.js.map +0 -1
  43. package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
  44. package/dist/__tests__/cliSubcommands.test.js +0 -138
  45. package/dist/__tests__/cliSubcommands.test.js.map +0 -1
  46. package/dist/__tests__/comparativeBench.test.d.ts +0 -1
  47. package/dist/__tests__/comparativeBench.test.js +0 -722
  48. package/dist/__tests__/comparativeBench.test.js.map +0 -1
  49. package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
  50. package/dist/__tests__/critterCalibrationEval.js +0 -370
  51. package/dist/__tests__/critterCalibrationEval.js.map +0 -1
  52. package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
  53. package/dist/__tests__/dynamicLoading.test.js +0 -280
  54. package/dist/__tests__/dynamicLoading.test.js.map +0 -1
  55. package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
  56. package/dist/__tests__/embeddingProvider.test.js +0 -86
  57. package/dist/__tests__/embeddingProvider.test.js.map +0 -1
  58. package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
  59. package/dist/__tests__/evalDatasetBench.test.js +0 -738
  60. package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
  61. package/dist/__tests__/evalHarness.test.d.ts +0 -1
  62. package/dist/__tests__/evalHarness.test.js +0 -1107
  63. package/dist/__tests__/evalHarness.test.js.map +0 -1
  64. package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
  65. package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
  66. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
  67. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
  68. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
  69. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
  70. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
  71. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
  72. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
  73. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
  74. package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
  75. package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
  76. package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
  77. package/dist/__tests__/forecastingDogfood.test.js +0 -284
  78. package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
  79. package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
  80. package/dist/__tests__/forecastingScoring.test.js +0 -202
  81. package/dist/__tests__/forecastingScoring.test.js.map +0 -1
  82. package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
  83. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
  84. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
  85. package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
  86. package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
  87. package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
  88. package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
  89. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
  90. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
  91. package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
  92. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
  93. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
  94. package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
  95. package/dist/__tests__/helpers/answerMatch.js +0 -267
  96. package/dist/__tests__/helpers/answerMatch.js.map +0 -1
  97. package/dist/__tests__/helpers/textLlm.d.ts +0 -25
  98. package/dist/__tests__/helpers/textLlm.js +0 -214
  99. package/dist/__tests__/helpers/textLlm.js.map +0 -1
  100. package/dist/__tests__/localDashboard.test.d.ts +0 -1
  101. package/dist/__tests__/localDashboard.test.js +0 -226
  102. package/dist/__tests__/localDashboard.test.js.map +0 -1
  103. package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
  104. package/dist/__tests__/multiHopDogfood.test.js +0 -303
  105. package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
  106. package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
  107. package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
  108. package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
  109. package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
  110. package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
  111. package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
  112. package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
  113. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
  114. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
  115. package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
  116. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
  117. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
  118. package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
  119. package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
  120. package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
  121. package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
  122. package/dist/__tests__/openclawDogfood.test.js +0 -535
  123. package/dist/__tests__/openclawDogfood.test.js.map +0 -1
  124. package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
  125. package/dist/__tests__/openclawMessaging.test.js +0 -232
  126. package/dist/__tests__/openclawMessaging.test.js.map +0 -1
  127. package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
  128. package/dist/__tests__/presetRealWorldBench.test.js +0 -859
  129. package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
  130. package/dist/__tests__/tools.test.d.ts +0 -1
  131. package/dist/__tests__/tools.test.js +0 -3201
  132. package/dist/__tests__/tools.test.js.map +0 -1
  133. package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
  134. package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
  135. package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
  136. package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
  137. package/dist/__tests__/traceabilityDogfood.test.js +0 -241
  138. package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
  139. package/dist/__tests__/webmcpTools.test.d.ts +0 -7
  140. package/dist/__tests__/webmcpTools.test.js +0 -195
  141. package/dist/__tests__/webmcpTools.test.js.map +0 -1
  142. package/dist/benchmarks/testProviderBus.d.ts +0 -7
  143. package/dist/benchmarks/testProviderBus.js +0 -272
  144. package/dist/benchmarks/testProviderBus.js.map +0 -1
  145. package/dist/hooks/postCompaction.d.ts +0 -14
  146. package/dist/hooks/postCompaction.js +0 -51
  147. package/dist/hooks/postCompaction.js.map +0 -1
  148. package/dist/security/__tests__/security.test.d.ts +0 -8
  149. package/dist/security/__tests__/security.test.js +0 -295
  150. package/dist/security/__tests__/security.test.js.map +0 -1
  151. package/dist/sync/hyperloopEval.test.d.ts +0 -4
  152. package/dist/sync/hyperloopEval.test.js +0 -60
  153. package/dist/sync/hyperloopEval.test.js.map +0 -1
  154. package/dist/sync/store.test.d.ts +0 -4
  155. package/dist/sync/store.test.js +0 -43
  156. package/dist/sync/store.test.js.map +0 -1
  157. package/dist/tools/documentTools.d.ts +0 -5
  158. package/dist/tools/documentTools.js +0 -524
  159. package/dist/tools/documentTools.js.map +0 -1
  160. package/dist/tools/financialTools.d.ts +0 -10
  161. package/dist/tools/financialTools.js +0 -403
  162. package/dist/tools/financialTools.js.map +0 -1
  163. package/dist/tools/memoryTools.d.ts +0 -5
  164. package/dist/tools/memoryTools.js +0 -137
  165. package/dist/tools/memoryTools.js.map +0 -1
  166. package/dist/tools/planningTools.d.ts +0 -5
  167. package/dist/tools/planningTools.js +0 -147
  168. package/dist/tools/planningTools.js.map +0 -1
  169. package/dist/tools/searchTools.d.ts +0 -5
  170. package/dist/tools/searchTools.js +0 -145
  171. package/dist/tools/searchTools.js.map +0 -1
@@ -1,220 +0,0 @@
1
- /**
2
- * Open-source dataset benchmark for long-running tasks (SWE-bench lane).
3
- *
4
- * This test uses SWE-bench Verified issue tasks and runs task workflows
5
- * through NodeBench MCP tools in parallel "subagent" workers.
6
- */
7
- import { describe, expect, it } from "vitest";
8
- import datasetFixture from "./fixtures/swebench_verified.sample.json";
9
- import { verificationTools } from "../tools/verificationTools.js";
10
- import { reconTools } from "../tools/reconTools.js";
11
- import { evalTools } from "../tools/evalTools.js";
12
- import { qualityGateTools } from "../tools/qualityGateTools.js";
13
- import { flywheelTools } from "../tools/flywheelTools.js";
14
- import { learningTools } from "../tools/learningTools.js";
15
- import { documentationTools } from "../tools/documentationTools.js";
16
- import { agentBootstrapTools } from "../tools/agentBootstrapTools.js";
17
- import { createMetaTools } from "../tools/metaTools.js";
18
- const fixture = datasetFixture;
19
- const domainTools = [
20
- ...verificationTools,
21
- ...evalTools,
22
- ...qualityGateTools,
23
- ...learningTools,
24
- ...flywheelTools,
25
- ...reconTools,
26
- ...documentationTools,
27
- ...agentBootstrapTools,
28
- ];
29
- const allTools = [...domainTools, ...createMetaTools(domainTools)];
30
- const openDatasetToolCallLog = [];
31
- function findTool(name) {
32
- const tool = allTools.find((candidate) => candidate.name === name);
33
- if (!tool)
34
- throw new Error(`Tool not found: ${name}`);
35
- return tool;
36
- }
37
- async function callTool(name, args, taskId, stage) {
38
- const tool = findTool(name);
39
- try {
40
- const result = await tool.handler(args);
41
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: true });
42
- return result;
43
- }
44
- catch (error) {
45
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: false });
46
- throw error;
47
- }
48
- }
49
- async function runDatasetTask(task, workerIndex) {
50
- const started = Date.now();
51
- const recon = (await callTool("run_recon", {
52
- target: `SWE-bench Verified task ${task.id}`,
53
- description: `Open-source long-running engineering benchmark (${task.repo}, ${task.difficulty}).`,
54
- projectContext: {
55
- techStack: "TypeScript, MCP, SQLite",
56
- architecture: "Parallel subagent MCP workflow benchmark",
57
- },
58
- }, task.id, "recon_start"));
59
- await callTool("log_recon_finding", {
60
- sessionId: recon.sessionId,
61
- category: "dataset",
62
- summary: `Ingested SWE-bench task ${task.id} (${task.repo}) with failToPass=${task.failToPassCount}.`,
63
- sourceUrl: `${fixture.sourceUrl}`,
64
- relevance: "Real-world software issue benchmark for MCP orchestration quality.",
65
- actionItems: "Run in parallel worker pool and enforce mandatory flywheel checks.",
66
- }, task.id, "recon_log");
67
- let discovered = (await callTool("findTools", {
68
- query: task.prompt.slice(0, 600),
69
- category: "verification",
70
- }, task.id, "find_tools"));
71
- if (!Array.isArray(discovered?.tools) || discovered.tools.length === 0) {
72
- discovered = (await callTool("findTools", { query: "software bugfix verification eval workflow", category: "eval" }, task.id, "find_tools_fallback"));
73
- }
74
- expect(Array.isArray(discovered.tools)).toBe(true);
75
- expect(discovered.tools.length).toBeGreaterThan(0);
76
- const difficultyLower = task.difficulty.toLowerCase();
77
- const methodologyTopic = difficultyLower.includes("1-4 hours") || task.complexityScore >= 140
78
- ? "mandatory_flywheel"
79
- : "verification";
80
- const methodology = (await callTool("getMethodology", { topic: methodologyTopic }, task.id, "get_methodology"));
81
- expect(methodology.title).toBeTruthy();
82
- expect(Array.isArray(methodology.steps)).toBe(true);
83
- expect(methodology.steps.length).toBeGreaterThan(0);
84
- const evalRun = (await callTool("start_eval_run", {
85
- name: `open-dataset-swebench-${task.id}-${Date.now()}`,
86
- description: `SWE-bench scenario (${task.repo}, worker ${workerIndex})`,
87
- cases: [
88
- {
89
- input: task.prompt,
90
- intent: `Coordinate long-running SWE-bench workflow for ${task.id}`,
91
- expected: "Discover strategy, run eval bookkeeping, and complete closed-loop plus mandatory flywheel checks.",
92
- },
93
- ],
94
- }, task.id, "start_eval_run"));
95
- await callTool("record_eval_result", {
96
- caseId: evalRun.caseIds[0],
97
- verdict: "pass",
98
- score: 1,
99
- actual: `Discovered ${discovered.tools.length} tools and completed SWE-bench workflow.`,
100
- telemetry: {
101
- dataset: fixture.dataset,
102
- split: fixture.split,
103
- taskId: task.id,
104
- repo: task.repo,
105
- difficulty: task.difficulty,
106
- workerIndex,
107
- statementLength: task.statementLength,
108
- hintLength: task.hintLength,
109
- failToPassCount: task.failToPassCount,
110
- passToPassCount: task.passToPassCount,
111
- complexityScore: task.complexityScore,
112
- },
113
- }, task.id, "record_eval_result");
114
- const evalSummary = (await callTool("complete_eval_run", { runId: evalRun.runId }, task.id, "complete_eval_run"));
115
- expect(evalSummary.status).toBe("completed");
116
- expect(evalSummary.summary.passed).toBe(1);
117
- const closedLoop = (await callTool("run_closed_loop", {
118
- steps: [
119
- { step: "compile", passed: true, output: `Compile checks for ${task.id}` },
120
- { step: "lint", passed: true, output: `Lint checks for ${task.id}` },
121
- { step: "test", passed: true, output: `Parallel benchmark checks for ${task.id}` },
122
- ],
123
- }, task.id, "run_closed_loop"));
124
- expect(closedLoop.allPassed).toBe(true);
125
- const flywheel = (await callTool("run_mandatory_flywheel", {
126
- target: `Open-source SWE-bench task ${task.id}`,
127
- steps: [
128
- { stepName: "static_analysis", passed: true, output: "Types and schemas validated." },
129
- { stepName: "happy_path_test", passed: true, output: "Dataset task completed end-to-end." },
130
- { stepName: "failure_path_test", passed: true, output: "Fallback discovery query validated." },
131
- { stepName: "gap_analysis", passed: true, output: "No blocking gaps for this task." },
132
- { stepName: "fix_and_reverify", passed: true, output: "No rework required after checks." },
133
- { stepName: "deploy_and_document", passed: true, output: "Benchmark result documented." },
134
- ],
135
- }, task.id, "run_mandatory_flywheel"));
136
- expect(flywheel.passed).toBe(true);
137
- const knowledge = (await callTool("search_all_knowledge", { query: task.id, limit: 10 }, task.id, "search_all_knowledge"));
138
- expect(typeof knowledge.totalResults).toBe("number");
139
- expect(knowledge.totalResults).toBeGreaterThan(0);
140
- return {
141
- taskId: task.id,
142
- workerIndex,
143
- ok: true,
144
- elapsedMs: Date.now() - started,
145
- discoveredTools: discovered.tools.length,
146
- knowledgeHits: knowledge.totalResults,
147
- };
148
- }
149
- async function runWorkerPool(tasks, concurrency) {
150
- const boundedConcurrency = Math.max(1, Math.min(concurrency, tasks.length));
151
- const results = new Array(tasks.length);
152
- let nextIndex = 0;
153
- const workers = Array.from({ length: boundedConcurrency }, (_, workerIndex) => (async () => {
154
- while (true) {
155
- const taskIndex = nextIndex++;
156
- if (taskIndex >= tasks.length)
157
- return;
158
- const task = tasks[taskIndex];
159
- try {
160
- results[taskIndex] = await runDatasetTask(task, workerIndex);
161
- }
162
- catch (error) {
163
- results[taskIndex] = {
164
- taskId: task.id,
165
- workerIndex,
166
- ok: false,
167
- elapsedMs: 0,
168
- discoveredTools: 0,
169
- knowledgeHits: 0,
170
- error: error instanceof Error ? error.message : String(error),
171
- };
172
- }
173
- }
174
- })());
175
- await Promise.all(workers);
176
- return results;
177
- }
178
- describe("Scenario: Open-Source Long-Running Dataset (SWE-bench Parallel Subagents)", () => {
179
- it("should execute SWE-bench tasks with parallel MCP subagent workflows", async () => {
180
- expect(Array.isArray(fixture.tasks)).toBe(true);
181
- expect(fixture.tasks.length).toBeGreaterThan(0);
182
- const requestedTaskLimit = Number.parseInt(process.env.NODEBENCH_SWEBENCH_TASK_LIMIT ?? "8", 10);
183
- const taskLimit = Math.max(1, Math.min(fixture.tasks.length, Number.isFinite(requestedTaskLimit) ? requestedTaskLimit : 8));
184
- const requestedConcurrency = Number.parseInt(process.env.NODEBENCH_SWEBENCH_CONCURRENCY ?? "4", 10);
185
- const concurrency = Math.max(1, Math.min(taskLimit, Number.isFinite(requestedConcurrency) ? requestedConcurrency : 4));
186
- const tasks = fixture.tasks.slice(0, taskLimit);
187
- const started = Date.now();
188
- const results = await runWorkerPool(tasks, concurrency);
189
- const elapsedMs = Date.now() - started;
190
- const failed = results.filter((result) => !result.ok);
191
- const passed = results.filter((result) => result.ok);
192
- const calledTools = new Set(openDatasetToolCallLog.map((entry) => entry.tool));
193
- const requiredTools = [
194
- "run_recon",
195
- "log_recon_finding",
196
- "findTools",
197
- "getMethodology",
198
- "start_eval_run",
199
- "record_eval_result",
200
- "complete_eval_run",
201
- "run_closed_loop",
202
- "run_mandatory_flywheel",
203
- "search_all_knowledge",
204
- ];
205
- console.log(`[open-dataset-swebench] dataset=${fixture.dataset} split=${fixture.split} tasks=${taskLimit} concurrency=${concurrency} pass=${passed.length}/${results.length} elapsedMs=${elapsedMs} toolCalls=${openDatasetToolCallLog.length}`);
206
- if (failed.length > 0) {
207
- console.error("[open-dataset-swebench] failures:", failed.map((result) => ({
208
- taskId: result.taskId,
209
- workerIndex: result.workerIndex,
210
- error: result.error,
211
- })));
212
- }
213
- expect(failed.length).toBe(0);
214
- expect(passed.length).toBe(taskLimit);
215
- for (const requiredTool of requiredTools) {
216
- expect(calledTools.has(requiredTool)).toBe(true);
217
- }
218
- });
219
- });
220
- //# sourceMappingURL=openDatasetParallelEvalSwebench.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openDatasetParallelEvalSwebench.test.js","sourceRoot":"","sources":["../../src/__tests__/openDatasetParallelEvalSwebench.test.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,cAAc,MAAM,0CAA0C,CAAC;AACtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AA4CxD,MAAM,OAAO,GAAG,cAAgC,CAAC;AAEjD,MAAM,WAAW,GAAc;IAC7B,GAAG,iBAAiB;IACpB,GAAG,SAAS;IACZ,GAAG,gBAAgB;IACnB,GAAG,aAAa;IAChB,GAAG,aAAa;IAChB,GAAG,UAAU;IACb,GAAG,kBAAkB;IACrB,GAAG,mBAAmB;CACvB,CAAC;AACF,MAAM,QAAQ,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;AAEnE,MAAM,sBAAsB,GAKvB,EAAE,CAAC;AAER,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,IAAY,EACZ,IAA6B,EAC7B,MAAc,EACd,KAAa;IAEb,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1E,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,IAAiB,EAAE,WAAmB;IAClE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE3B,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAC3B,WAAW,EACX;QACE,MAAM,EAAE,2BAA2B,IAAI,CAAC,EAAE,EAAE;QAC5C,WAAW,EAAE,mDAAmD,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,UAAU,IAAI;QACjG,cAAc,EAAE;YACd,SAAS,EAAE,yBAAyB;YACpC,YAAY,EAAE,0CAA0C;SACzD;KACF,EACD,IAAI,CAAC,EAAE,EACP,aAAa,CACd,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,mBAAmB,EACnB;QACE,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE,2BAA2B,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,IAAI,qBAAqB,IAAI,CAAC,eAAe,GAAG;QACrG,SAAS,EAAE,GAAG,OAAO,CAAC,SAAS,EAAE;QACjC,SAAS,EAAE,oEAAoE;QAC/E,WAAW,EAAE,oEAAoE;KAClF,EACD,IAAI,CAAC,EAAE,EACP,WAAW,CACZ,CAAC;IAEF,IAAI,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC9B,WAAW,EACX;QACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;QAChC,QAAQ,EAAE,cAAc;KACzB,EACD,IAAI,CAAC,EAAE,EACP,YAAY,CACb,CAAQ,CAAC;IAEV,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvE,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC1B,WAAW,EACX,EAAE,KAAK,EAAE,4CAA4C,EAAE,QAAQ,EAAE,MAAM,EAAE,EACzE,IAAI,CAAC,EAAE,EACP,qBAAqB,CACtB,CAAQ,CAAC;IACZ,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnD,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEnD,MAAM,eAAe,GAAG,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC;IACtD,MAAM,gBAAgB,GACpB,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,eAAe,IAAI,GAAG;QAClE,CAAC,CAAC,oBAAoB;QACtB,CAAC,CAAC,cAAc,CAAC;IACrB,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,gBAAgB,EAChB,EAAE,KAAK,EAAE,gBAAgB,EAAE,EAC3B,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;IACvC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEpD,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAC7B,gBAAgB,EAChB;QACE,IAAI,EAAE,yBAAyB,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;QACtD,WAAW,EAAE,uBAAuB,IAAI,CAAC,IAAI,YAAY,WAAW,GAAG;QACvE,KAAK,EAAE;YACL;gBACE,KAAK,EAAE,IAAI,CAAC,MAAM;gBAClB,MAAM,EAAE,kDAAkD,IAAI,CAAC,EAAE,EAAE;gBACnE,QAAQ,EACN,mGAAmG;aACtG;SACF;KACF,EACD,IAAI,CAAC,EAAE,EACP,gBAAgB,CACjB,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,oBAAoB,EACpB;QACE,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,OAAO,EAAE,MAAM;QACf,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,cAAc,UAAU,CAAC,KAAK,CAAC,MAAM,0CAA0C;QACvF,SAAS,EAAE;YACT,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,WAAW;YACX,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,eAAe,EAAE,IAAI,CAAC,eAAe;SACtC;KACF,EACD,IAAI,CAAC,EAAE,EACP,oBAAoB,CACrB,CAAC;IAEF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,mBAAmB,EACnB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,EACxB,IAAI,CAAC,EAAE,EACP,mBAAmB,CACpB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE3C,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAChC,iBAAiB,EACjB;QACE,KAAK,EAAE;YACL,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,sBAAsB,IAAI,CAAC,EAAE,EAAE,EAAE;YAC1E,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,IAAI,CAAC,EAAE,EAAE,EAAE;YACpE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,IAAI,CAAC,EAAE,EAAE,EAAE;SACnF;KACF,EACD,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,CAAC,MAAM,QAAQ,CAC9B,wBAAwB,EACxB;QACE,MAAM,EAAE,8BAA8B,IAAI,CAAC,EAAE,EAAE;QAC/C,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;YACrF,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,oCAAoC,EAAE;YAC3F,EAAE,QAAQ,EAAE,mBAAmB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,qCAAqC,EAAE;YAC9F,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,EAAE;YACrF,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,kCAAkC,EAAE;YAC1F,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;SAC1F;KACF,EACD,IAAI,CAAC,EAAE,EACP,wBAAwB,CACzB,CAAQ,CAAC;IACV,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAC/B,sBAAsB,EACtB,EAAE,KAAK,EAAE,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,EAC7B,IAAI,CAAC,EAAE,EACP,sBAAsB,CACvB,CAAQ,CAAC;IACV,MAAM,CAAC,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAElD,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,WAAW;QACX,EAAE,EAAE,IAAI;QACR,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;QAC/B,eAAe,EAAE,UAAU,CAAC,KAAK,CAAC,MAAM;QACxC,aAAa,EAAE,SAAS,CAAC,YAAY;KACtC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,KAAoB,EAAE,WAAmB;IACpE,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,EAAE,CAC5E,CAAC,KAAK,IAAI,EAAE;QACV,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,SAAS,GAAG,SAAS,EAAE,CAAC;YAC9B,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM;gBAAE,OAAO;YAEtC,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9B,IAAI,CAAC;gBACH,OAAO,CAAC,SAAS,CAAC,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YAC/D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,SAAS,CAAC,GAAG;oBACnB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW;oBACX,EAAE,EAAE,KAAK;oBACT,SAAS,EAAE,CAAC;oBACZ,eAAe,EAAE,CAAC;oBAClB,aAAa,EAAE,CAAC;oBAChB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,EAAE,CACL,CAAC;IAEF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,QAAQ,CAAC,2EAA2E,EAAE,GAAG,EAAE;IACzF,EAAE,CAAC,qEAAqE,EAAE,KAAK,IAAI,EAAE;QACnF,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,kBAAkB,GAAG,MAAM,CAAC,QAAQ,CACxC,OAAO,CAAC,GAAG,CAAC,6BAA6B,IAAI,GAAG,EAChD,EAAE,CACH,CAAC;QACF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAC7F,CAAC;QAEF,MAAM,oBAAoB,GAAG,MAAM,CAAC,QAAQ,CAC1C,OAAO,CAAC,GAAG,CAAC,8BAA8B,IAAI,GAAG,EACjD,EAAE,CACH,CAAC;QACF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CACtF,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;QAEvC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAErD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/E,MAAM,aAAa,GAAG;YACpB,WAAW;YACX,mBAAmB;YACnB,WAAW;YACX,gBAAgB;YAChB,gBAAgB;YAChB,oBAAoB;YACpB,mBAAmB;YACnB,iBAAiB;YACjB,wBAAwB;YACxB,sBAAsB;SACvB,CAAC;QAEF,OAAO,CAAC,GAAG,CACT,mCAAmC,OAAO,CAAC,OAAO,UAAU,OAAO,CAAC,KAAK,UAAU,SAAS,gBAAgB,WAAW,SAAS,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,cAAc,SAAS,cAAc,sBAAsB,CAAC,MAAM,EAAE,CACpO,CAAC;QAEF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CACX,mCAAmC,EACnC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACtB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,7 +0,0 @@
1
- /**
2
- * Open-source dataset benchmark for long-running tasks (ToolBench lane).
3
- *
4
- * This test uses ToolBench multi-tool instructions and runs task workflows
5
- * through NodeBench MCP tools in parallel "subagent" workers.
6
- */
7
- export {};
@@ -1,218 +0,0 @@
1
- /**
2
- * Open-source dataset benchmark for long-running tasks (ToolBench lane).
3
- *
4
- * This test uses ToolBench multi-tool instructions and runs task workflows
5
- * through NodeBench MCP tools in parallel "subagent" workers.
6
- */
7
- import { describe, expect, it } from "vitest";
8
- import datasetFixture from "./fixtures/toolbench_instruction.sample.json";
9
- import { verificationTools } from "../tools/verificationTools.js";
10
- import { reconTools } from "../tools/reconTools.js";
11
- import { evalTools } from "../tools/evalTools.js";
12
- import { qualityGateTools } from "../tools/qualityGateTools.js";
13
- import { flywheelTools } from "../tools/flywheelTools.js";
14
- import { learningTools } from "../tools/learningTools.js";
15
- import { documentationTools } from "../tools/documentationTools.js";
16
- import { agentBootstrapTools } from "../tools/agentBootstrapTools.js";
17
- import { createMetaTools } from "../tools/metaTools.js";
18
- const fixture = datasetFixture;
19
- const domainTools = [
20
- ...verificationTools,
21
- ...evalTools,
22
- ...qualityGateTools,
23
- ...learningTools,
24
- ...flywheelTools,
25
- ...reconTools,
26
- ...documentationTools,
27
- ...agentBootstrapTools,
28
- ];
29
- const allTools = [...domainTools, ...createMetaTools(domainTools)];
30
- const openDatasetToolCallLog = [];
31
- function findTool(name) {
32
- const tool = allTools.find((candidate) => candidate.name === name);
33
- if (!tool)
34
- throw new Error(`Tool not found: ${name}`);
35
- return tool;
36
- }
37
- async function callTool(name, args, taskId, stage) {
38
- const tool = findTool(name);
39
- try {
40
- const result = await tool.handler(args);
41
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: true });
42
- return result;
43
- }
44
- catch (error) {
45
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: false });
46
- throw error;
47
- }
48
- }
49
- async function runDatasetTask(task, workerIndex) {
50
- const started = Date.now();
51
- const datasetSource = fixture.sourceUrls[task.group] ?? fixture.sourceUrls.G1 ?? "";
52
- const recon = (await callTool("run_recon", {
53
- target: `ToolBench multi-tool task ${task.id}`,
54
- description: `Open-source long-running benchmark (${task.apiCount} APIs in task context).`,
55
- projectContext: {
56
- techStack: "TypeScript, MCP, SQLite",
57
- architecture: "MCP orchestration benchmark with parallel subagent workers",
58
- },
59
- }, task.id, "recon_start"));
60
- await callTool("log_recon_finding", {
61
- sessionId: recon.sessionId,
62
- category: "dataset",
63
- summary: `Ingested ToolBench task ${task.id} with ${task.apiCount} APIs and complexity ${task.complexityScore}.`,
64
- sourceUrl: datasetSource,
65
- relevance: "Long-running multi-tool benchmark for MCP orchestration quality.",
66
- actionItems: "Run in parallel worker pool and enforce mandatory flywheel checks.",
67
- }, task.id, "recon_log");
68
- let discovered = (await callTool("findTools", {
69
- query: task.prompt.slice(0, 600),
70
- category: "verification",
71
- }, task.id, "find_tools"));
72
- if (!Array.isArray(discovered?.tools) || discovered.tools.length === 0) {
73
- discovered = (await callTool("findTools", { query: "multi-step tool orchestration verification", category: "bootstrap" }, task.id, "find_tools_fallback"));
74
- }
75
- expect(Array.isArray(discovered.tools)).toBe(true);
76
- expect(discovered.tools.length).toBeGreaterThan(0);
77
- const methodologyTopic = task.apiCount >= 9 ? "mandatory_flywheel" : "closed_loop";
78
- const methodology = (await callTool("getMethodology", { topic: methodologyTopic }, task.id, "get_methodology"));
79
- expect(methodology.title).toBeTruthy();
80
- expect(Array.isArray(methodology.steps)).toBe(true);
81
- expect(methodology.steps.length).toBeGreaterThan(0);
82
- const evalRun = (await callTool("start_eval_run", {
83
- name: `open-dataset-toolbench-${task.id}-${Date.now()}`,
84
- description: `ToolBench multi-tool scenario (${task.apiCount} APIs, worker ${workerIndex})`,
85
- cases: [
86
- {
87
- input: task.prompt,
88
- intent: `Coordinate long-running ToolBench workflow for ${task.id}`,
89
- expected: "Discover strategy, run eval bookkeeping, and complete closed-loop plus mandatory flywheel checks.",
90
- },
91
- ],
92
- }, task.id, "start_eval_run"));
93
- await callTool("record_eval_result", {
94
- caseId: evalRun.caseIds[0],
95
- verdict: "pass",
96
- score: 1,
97
- actual: `Discovered ${discovered.tools.length} tools and completed ToolBench workflow.`,
98
- telemetry: {
99
- dataset: fixture.dataset,
100
- split: fixture.split,
101
- taskId: task.id,
102
- workerIndex,
103
- group: task.group,
104
- apiCount: task.apiCount,
105
- relevantApiCount: task.relevantApiCount,
106
- requiredParameterCount: task.requiredParameterCount,
107
- optionalParameterCount: task.optionalParameterCount,
108
- apiCategories: task.apiCategories,
109
- },
110
- }, task.id, "record_eval_result");
111
- const evalSummary = (await callTool("complete_eval_run", { runId: evalRun.runId }, task.id, "complete_eval_run"));
112
- expect(evalSummary.status).toBe("completed");
113
- expect(evalSummary.summary.passed).toBe(1);
114
- const closedLoop = (await callTool("run_closed_loop", {
115
- steps: [
116
- { step: "compile", passed: true, output: `Compile checks for ${task.id}` },
117
- { step: "lint", passed: true, output: `Lint checks for ${task.id}` },
118
- { step: "test", passed: true, output: `Parallel benchmark checks for ${task.id}` },
119
- ],
120
- }, task.id, "run_closed_loop"));
121
- expect(closedLoop.allPassed).toBe(true);
122
- const flywheel = (await callTool("run_mandatory_flywheel", {
123
- target: `Open-source ToolBench task ${task.id}`,
124
- steps: [
125
- { stepName: "static_analysis", passed: true, output: "Types and schemas validated." },
126
- { stepName: "happy_path_test", passed: true, output: "Dataset task completed end-to-end." },
127
- { stepName: "failure_path_test", passed: true, output: "Fallback discovery query validated." },
128
- { stepName: "gap_analysis", passed: true, output: "No blocking gaps for this task." },
129
- { stepName: "fix_and_reverify", passed: true, output: "No rework required after checks." },
130
- { stepName: "deploy_and_document", passed: true, output: "Benchmark result documented." },
131
- ],
132
- }, task.id, "run_mandatory_flywheel"));
133
- expect(flywheel.passed).toBe(true);
134
- const knowledgeQuery = task.id;
135
- const knowledge = (await callTool("search_all_knowledge", { query: knowledgeQuery, limit: 10 }, task.id, "search_all_knowledge"));
136
- expect(typeof knowledge.totalResults).toBe("number");
137
- expect(knowledge.totalResults).toBeGreaterThan(0);
138
- return {
139
- taskId: task.id,
140
- workerIndex,
141
- ok: true,
142
- elapsedMs: Date.now() - started,
143
- discoveredTools: discovered.tools.length,
144
- knowledgeHits: knowledge.totalResults,
145
- };
146
- }
147
- async function runWorkerPool(tasks, concurrency) {
148
- const boundedConcurrency = Math.max(1, Math.min(concurrency, tasks.length));
149
- const results = new Array(tasks.length);
150
- let nextIndex = 0;
151
- const workers = Array.from({ length: boundedConcurrency }, (_, workerIndex) => (async () => {
152
- while (true) {
153
- const taskIndex = nextIndex++;
154
- if (taskIndex >= tasks.length)
155
- return;
156
- const task = tasks[taskIndex];
157
- try {
158
- results[taskIndex] = await runDatasetTask(task, workerIndex);
159
- }
160
- catch (error) {
161
- results[taskIndex] = {
162
- taskId: task.id,
163
- workerIndex,
164
- ok: false,
165
- elapsedMs: 0,
166
- discoveredTools: 0,
167
- knowledgeHits: 0,
168
- error: error instanceof Error ? error.message : String(error),
169
- };
170
- }
171
- }
172
- })());
173
- await Promise.all(workers);
174
- return results;
175
- }
176
- describe("Scenario: Open-Source Long-Running Dataset (ToolBench Parallel Subagents)", () => {
177
- it("should execute ToolBench tasks with parallel MCP subagent workflows", async () => {
178
- expect(Array.isArray(fixture.tasks)).toBe(true);
179
- expect(fixture.tasks.length).toBeGreaterThan(0);
180
- const requestedTaskLimit = Number.parseInt(process.env.NODEBENCH_TOOLBENCH_TASK_LIMIT ?? "8", 10);
181
- const taskLimit = Math.max(1, Math.min(fixture.tasks.length, Number.isFinite(requestedTaskLimit) ? requestedTaskLimit : 8));
182
- const requestedConcurrency = Number.parseInt(process.env.NODEBENCH_TOOLBENCH_CONCURRENCY ?? "4", 10);
183
- const concurrency = Math.max(1, Math.min(taskLimit, Number.isFinite(requestedConcurrency) ? requestedConcurrency : 4));
184
- const tasks = fixture.tasks.slice(0, taskLimit);
185
- const started = Date.now();
186
- const results = await runWorkerPool(tasks, concurrency);
187
- const elapsedMs = Date.now() - started;
188
- const failed = results.filter((result) => !result.ok);
189
- const passed = results.filter((result) => result.ok);
190
- const calledTools = new Set(openDatasetToolCallLog.map((entry) => entry.tool));
191
- const requiredTools = [
192
- "run_recon",
193
- "log_recon_finding",
194
- "findTools",
195
- "getMethodology",
196
- "start_eval_run",
197
- "record_eval_result",
198
- "complete_eval_run",
199
- "run_closed_loop",
200
- "run_mandatory_flywheel",
201
- "search_all_knowledge",
202
- ];
203
- console.log(`[open-dataset-toolbench] dataset=${fixture.dataset} split=${fixture.split} tasks=${taskLimit} concurrency=${concurrency} pass=${passed.length}/${results.length} elapsedMs=${elapsedMs} toolCalls=${openDatasetToolCallLog.length}`);
204
- if (failed.length > 0) {
205
- console.error("[open-dataset-toolbench] failures:", failed.map((result) => ({
206
- taskId: result.taskId,
207
- workerIndex: result.workerIndex,
208
- error: result.error,
209
- })));
210
- }
211
- expect(failed.length).toBe(0);
212
- expect(passed.length).toBe(taskLimit);
213
- for (const requiredTool of requiredTools) {
214
- expect(calledTools.has(requiredTool)).toBe(true);
215
- }
216
- });
217
- });
218
- //# sourceMappingURL=openDatasetParallelEvalToolbench.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openDatasetParallelEvalToolbench.test.js","sourceRoot":"","sources":["../../src/__tests__/openDatasetParallelEvalToolbench.test.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,cAAc,MAAM,8CAA8C,CAAC;AAC1E,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAyCxD,MAAM,OAAO,GAAG,cAAgC,CAAC;AAEjD,MAAM,WAAW,GAAc;IAC7B,GAAG,iBAAiB;IACpB,GAAG,SAAS;IACZ,GAAG,gBAAgB;IACnB,GAAG,aAAa;IAChB,GAAG,aAAa;IAChB,GAAG,UAAU;IACb,GAAG,kBAAkB;IACrB,GAAG,mBAAmB;CACvB,CAAC;AACF,MAAM,QAAQ,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;AAEnE,MAAM,sBAAsB,GAKvB,EAAE,CAAC;AAER,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,IAAY,EACZ,IAA6B,EAC7B,MAAc,EACd,KAAa;IAEb,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1E,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,IAAiB,EAAE,WAAmB;IAClE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC3B,MAAM,aAAa,GAAG,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,EAAE,IAAI,EAAE,CAAC;IAEpF,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAC3B,WAAW,EACX;QACE,MAAM,EAAE,6BAA6B,IAAI,CAAC,EAAE,EAAE;QAC9C,WAAW,EAAE,uCAAuC,IAAI,CAAC,QAAQ,yBAAyB;QAC1F,cAAc,EAAE;YACd,SAAS,EAAE,yBAAyB;YACpC,YAAY,EAAE,4DAA4D;SAC3E;KACF,EACD,IAAI,CAAC,EAAE,EACP,aAAa,CACd,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,mBAAmB,EACnB;QACE,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE,2BAA2B,IAAI,CAAC,EAAE,SAAS,IAAI,CAAC,QAAQ,wBAAwB,IAAI,CAAC,eAAe,GAAG;QAChH,SAAS,EAAE,aAAa;QACxB,SAAS,EAAE,kEAAkE;QAC7E,WAAW,EAAE,oEAAoE;KAClF,EACD,IAAI,CAAC,EAAE,EACP,WAAW,CACZ,CAAC;IAEF,IAAI,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC9B,WAAW,EACX;QACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;QAChC,QAAQ,EAAE,cAAc;KACzB,EACD,IAAI,CAAC,EAAE,EACP,YAAY,CACb,CAAQ,CAAC;IAEV,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvE,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC1B,WAAW,EACX,EAAE,KAAK,EAAE,4CAA4C,EAAE,QAAQ,EAAE,WAAW,EAAE,EAC9E,IAAI,CAAC,EAAE,EACP,qBAAqB,CACtB,CAAQ,CAAC;IACZ,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnD,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEnD,MAAM,gBAAgB,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,aAAa,CAAC;IACnF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,gBAAgB,EAChB,EAAE,KAAK,EAAE,gBAAgB,EAAE,EAC3B,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;IACvC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEpD,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAC7B,gBAAgB,EAChB;QACE,IAAI,EAAE,0BAA0B,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;QACvD,WAAW,EAAE,kCAAkC,IAAI,CAAC,QAAQ,iBAAiB,WAAW,GAAG;QAC3F,KAAK,EAAE;YACL;gBACE,KAAK,EAAE,IAAI,CAAC,MAAM;gBAClB,MAAM,EAAE,kDAAkD,IAAI,CAAC,EAAE,EAAE;gBACnE,QAAQ,EACN,mGAAmG;aACtG;SACF;KACF,EACD,IAAI,CAAC,EAAE,EACP,gBAAgB,CACjB,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,oBAAoB,EACpB;QACE,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,OAAO,EAAE,MAAM;QACf,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,cAAc,UAAU,CAAC,KAAK,CAAC,MAAM,0CAA0C;QACvF,SAAS,EAAE;YACT,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,WAAW;YACX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;YACvC,sBAAsB,EAAE,IAAI,CAAC,sBAAsB;YACnD,sBAAsB,EAAE,IAAI,CAAC,sBAAsB;YACnD,aAAa,EAAE,IAAI,CAAC,aAAa;SAClC;KACF,EACD,IAAI,CAAC,EAAE,EACP,oBAAoB,CACrB,CAAC;IAEF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,mBAAmB,EACnB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,EACxB,IAAI,CAAC,EAAE,EACP,mBAAmB,CACpB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE3C,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAChC,iBAAiB,EACjB;QACE,KAAK,EAAE;YACL,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,sBAAsB,IAAI,CAAC,EAAE,EAAE,EAAE;YAC1E,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,IAAI,CAAC,EAAE,EAAE,EAAE;YACpE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,IAAI,CAAC,EAAE,EAAE,EAAE;SACnF;KACF,EACD,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,CAAC,MAAM,QAAQ,CAC9B,wBAAwB,EACxB;QACE,MAAM,EAAE,8BAA8B,IAAI,CAAC,EAAE,EAAE;QAC/C,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;YACrF,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,oCAAoC,EAAE;YAC3F,EAAE,QAAQ,EAAE,mBAAmB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,qCAAqC,EAAE;YAC9F,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,EAAE;YACrF,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,kCAAkC,EAAE;YAC1F,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;SAC1F;KACF,EACD,IAAI,CAAC,EAAE,EACP,wBAAwB,CACzB,CAAQ,CAAC;IACV,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,cAAc,GAAG,IAAI,CAAC,EAAE,CAAC;IAC/B,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAC/B,sBAAsB,EACtB,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,EAAE,EAAE,EAAE,EACpC,IAAI,CAAC,EAAE,EACP,sBAAsB,CACvB,CAAQ,CAAC;IACV,MAAM,CAAC,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAElD,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,WAAW;QACX,EAAE,EAAE,IAAI;QACR,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;QAC/B,eAAe,EAAE,UAAU,CAAC,KAAK,CAAC,MAAM;QACxC,aAAa,EAAE,SAAS,CAAC,YAAY;KACtC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,KAAoB,EAAE,WAAmB;IACpE,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,EAAE,CAC5E,CAAC,KAAK,IAAI,EAAE;QACV,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,SAAS,GAAG,SAAS,EAAE,CAAC;YAC9B,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM;gBAAE,OAAO;YAEtC,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9B,IAAI,CAAC;gBACH,OAAO,CAAC,SAAS,CAAC,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YAC/D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,SAAS,CAAC,GAAG;oBACnB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW;oBACX,EAAE,EAAE,KAAK;oBACT,SAAS,EAAE,CAAC;oBACZ,eAAe,EAAE,CAAC;oBAClB,aAAa,EAAE,CAAC;oBAChB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,EAAE,CACL,CAAC;IAEF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,QAAQ,CAAC,2EAA2E,EAAE,GAAG,EAAE;IACzF,EAAE,CAAC,qEAAqE,EAAE,KAAK,IAAI,EAAE;QACnF,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,kBAAkB,GAAG,MAAM,CAAC,QAAQ,CACxC,OAAO,CAAC,GAAG,CAAC,8BAA8B,IAAI,GAAG,EACjD,EAAE,CACH,CAAC;QACF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAC7F,CAAC;QAEF,MAAM,oBAAoB,GAAG,MAAM,CAAC,QAAQ,CAC1C,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,GAAG,EAClD,EAAE,CACH,CAAC;QACF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CACtF,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;QAEvC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAErD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/E,MAAM,aAAa,GAAG;YACpB,WAAW;YACX,mBAAmB;YACnB,WAAW;YACX,gBAAgB;YAChB,gBAAgB;YAChB,oBAAoB;YACpB,mBAAmB;YACnB,iBAAiB;YACjB,wBAAwB;YACxB,sBAAsB;SACvB,CAAC;QAEF,OAAO,CAAC,GAAG,CACT,oCAAoC,OAAO,CAAC,OAAO,UAAU,OAAO,CAAC,KAAK,UAAU,SAAS,gBAAgB,WAAW,SAAS,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,cAAc,SAAS,cAAc,sBAAsB,CAAC,MAAM,EAAE,CACrO,CAAC;QAEF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CACX,oCAAoC,EACpC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACtB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,10 +0,0 @@
1
- /**
2
- * Performance comparison: "no tools" (noop handlers) vs NodeBench MCP toolchain.
3
- *
4
- * Notes:
5
- * - This is a wall-clock micro-benchmark for local harness overhead, not capability/accuracy.
6
- * - Disabled by default to avoid noisy perf output in normal test runs.
7
- * Set NODEBENCH_RUN_PERF_COMPARE=1 to run.
8
- * - GAIA is gated; if a local GAIA fixture exists in `.cache/gaia`, it will be included.
9
- */
10
- export {};