nodebench-mcp 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/dist/dashboard/operatingDashboardHtml.js +2 -1
  2. package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
  3. package/dist/dashboard/operatingServer.js +3 -2
  4. package/dist/dashboard/operatingServer.js.map +1 -1
  5. package/dist/db.js +51 -3
  6. package/dist/db.js.map +1 -1
  7. package/dist/index.js +13 -16
  8. package/dist/index.js.map +1 -1
  9. package/dist/packageInfo.d.ts +3 -0
  10. package/dist/packageInfo.js +32 -0
  11. package/dist/packageInfo.js.map +1 -0
  12. package/dist/sandboxApi.js +2 -1
  13. package/dist/sandboxApi.js.map +1 -1
  14. package/dist/tools/boilerplateTools.js +10 -9
  15. package/dist/tools/boilerplateTools.js.map +1 -1
  16. package/dist/tools/documentationTools.js +2 -1
  17. package/dist/tools/documentationTools.js.map +1 -1
  18. package/dist/tools/progressiveDiscoveryTools.js +2 -1
  19. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  20. package/dist/tools/toolRegistry.js +11 -0
  21. package/dist/tools/toolRegistry.js.map +1 -1
  22. package/dist/toolsetRegistry.js +74 -1
  23. package/dist/toolsetRegistry.js.map +1 -1
  24. package/package.json +4 -3
  25. package/dist/__tests__/analytics.test.d.ts +0 -11
  26. package/dist/__tests__/analytics.test.js +0 -546
  27. package/dist/__tests__/analytics.test.js.map +0 -1
  28. package/dist/__tests__/architectComplex.test.d.ts +0 -1
  29. package/dist/__tests__/architectComplex.test.js +0 -373
  30. package/dist/__tests__/architectComplex.test.js.map +0 -1
  31. package/dist/__tests__/architectSmoke.test.d.ts +0 -1
  32. package/dist/__tests__/architectSmoke.test.js +0 -92
  33. package/dist/__tests__/architectSmoke.test.js.map +0 -1
  34. package/dist/__tests__/audit-registry.d.ts +0 -1
  35. package/dist/__tests__/audit-registry.js +0 -60
  36. package/dist/__tests__/audit-registry.js.map +0 -1
  37. package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
  38. package/dist/__tests__/batchAutopilot.test.js +0 -218
  39. package/dist/__tests__/batchAutopilot.test.js.map +0 -1
  40. package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
  41. package/dist/__tests__/cliSubcommands.test.js +0 -138
  42. package/dist/__tests__/cliSubcommands.test.js.map +0 -1
  43. package/dist/__tests__/comparativeBench.test.d.ts +0 -1
  44. package/dist/__tests__/comparativeBench.test.js +0 -722
  45. package/dist/__tests__/comparativeBench.test.js.map +0 -1
  46. package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
  47. package/dist/__tests__/critterCalibrationEval.js +0 -370
  48. package/dist/__tests__/critterCalibrationEval.js.map +0 -1
  49. package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
  50. package/dist/__tests__/dynamicLoading.test.js +0 -280
  51. package/dist/__tests__/dynamicLoading.test.js.map +0 -1
  52. package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
  53. package/dist/__tests__/embeddingProvider.test.js +0 -86
  54. package/dist/__tests__/embeddingProvider.test.js.map +0 -1
  55. package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
  56. package/dist/__tests__/evalDatasetBench.test.js +0 -738
  57. package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
  58. package/dist/__tests__/evalHarness.test.d.ts +0 -1
  59. package/dist/__tests__/evalHarness.test.js +0 -1107
  60. package/dist/__tests__/evalHarness.test.js.map +0 -1
  61. package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
  62. package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
  63. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
  64. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
  65. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
  66. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
  67. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
  68. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
  69. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
  70. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
  71. package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
  72. package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
  73. package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
  74. package/dist/__tests__/forecastingDogfood.test.js +0 -284
  75. package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
  76. package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
  77. package/dist/__tests__/forecastingScoring.test.js +0 -202
  78. package/dist/__tests__/forecastingScoring.test.js.map +0 -1
  79. package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
  80. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
  81. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
  82. package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
  83. package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
  84. package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
  85. package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
  86. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
  87. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
  88. package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
  89. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
  90. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
  91. package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
  92. package/dist/__tests__/helpers/answerMatch.js +0 -267
  93. package/dist/__tests__/helpers/answerMatch.js.map +0 -1
  94. package/dist/__tests__/helpers/textLlm.d.ts +0 -25
  95. package/dist/__tests__/helpers/textLlm.js +0 -214
  96. package/dist/__tests__/helpers/textLlm.js.map +0 -1
  97. package/dist/__tests__/localDashboard.test.d.ts +0 -1
  98. package/dist/__tests__/localDashboard.test.js +0 -226
  99. package/dist/__tests__/localDashboard.test.js.map +0 -1
  100. package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
  101. package/dist/__tests__/multiHopDogfood.test.js +0 -303
  102. package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
  103. package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
  104. package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
  105. package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
  106. package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
  107. package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
  108. package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
  109. package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
  110. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
  111. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
  112. package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
  113. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
  114. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
  115. package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
  116. package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
  117. package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
  118. package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
  119. package/dist/__tests__/openclawDogfood.test.js +0 -535
  120. package/dist/__tests__/openclawDogfood.test.js.map +0 -1
  121. package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
  122. package/dist/__tests__/openclawMessaging.test.js +0 -232
  123. package/dist/__tests__/openclawMessaging.test.js.map +0 -1
  124. package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
  125. package/dist/__tests__/presetRealWorldBench.test.js +0 -859
  126. package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
  127. package/dist/__tests__/tools.test.d.ts +0 -1
  128. package/dist/__tests__/tools.test.js +0 -3201
  129. package/dist/__tests__/tools.test.js.map +0 -1
  130. package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
  131. package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
  132. package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
  133. package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
  134. package/dist/__tests__/traceabilityDogfood.test.js +0 -241
  135. package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
  136. package/dist/__tests__/webmcpTools.test.d.ts +0 -7
  137. package/dist/__tests__/webmcpTools.test.js +0 -195
  138. package/dist/__tests__/webmcpTools.test.js.map +0 -1
  139. package/dist/benchmarks/testProviderBus.d.ts +0 -7
  140. package/dist/benchmarks/testProviderBus.js +0 -272
  141. package/dist/benchmarks/testProviderBus.js.map +0 -1
  142. package/dist/hooks/postCompaction.d.ts +0 -14
  143. package/dist/hooks/postCompaction.js +0 -51
  144. package/dist/hooks/postCompaction.js.map +0 -1
  145. package/dist/security/__tests__/security.test.d.ts +0 -8
  146. package/dist/security/__tests__/security.test.js +0 -295
  147. package/dist/security/__tests__/security.test.js.map +0 -1
  148. package/dist/sync/hyperloopEval.test.d.ts +0 -4
  149. package/dist/sync/hyperloopEval.test.js +0 -60
  150. package/dist/sync/hyperloopEval.test.js.map +0 -1
  151. package/dist/sync/store.test.d.ts +0 -4
  152. package/dist/sync/store.test.js +0 -43
  153. package/dist/sync/store.test.js.map +0 -1
  154. package/dist/tools/documentTools.d.ts +0 -5
  155. package/dist/tools/documentTools.js +0 -524
  156. package/dist/tools/documentTools.js.map +0 -1
  157. package/dist/tools/financialTools.d.ts +0 -10
  158. package/dist/tools/financialTools.js +0 -403
  159. package/dist/tools/financialTools.js.map +0 -1
  160. package/dist/tools/memoryTools.d.ts +0 -5
  161. package/dist/tools/memoryTools.js +0 -137
  162. package/dist/tools/memoryTools.js.map +0 -1
  163. package/dist/tools/planningTools.d.ts +0 -5
  164. package/dist/tools/planningTools.js +0 -147
  165. package/dist/tools/planningTools.js.map +0 -1
  166. package/dist/tools/searchTools.d.ts +0 -5
  167. package/dist/tools/searchTools.js +0 -145
  168. package/dist/tools/searchTools.js.map +0 -1
@@ -1,220 +0,0 @@
1
- /**
2
- * Open-source dataset benchmark for long-running tasks (SWE-bench lane).
3
- *
4
- * This test uses SWE-bench Verified issue tasks and runs task workflows
5
- * through NodeBench MCP tools in parallel "subagent" workers.
6
- */
7
- import { describe, expect, it } from "vitest";
8
- import datasetFixture from "./fixtures/swebench_verified.sample.json";
9
- import { verificationTools } from "../tools/verificationTools.js";
10
- import { reconTools } from "../tools/reconTools.js";
11
- import { evalTools } from "../tools/evalTools.js";
12
- import { qualityGateTools } from "../tools/qualityGateTools.js";
13
- import { flywheelTools } from "../tools/flywheelTools.js";
14
- import { learningTools } from "../tools/learningTools.js";
15
- import { documentationTools } from "../tools/documentationTools.js";
16
- import { agentBootstrapTools } from "../tools/agentBootstrapTools.js";
17
- import { createMetaTools } from "../tools/metaTools.js";
18
- const fixture = datasetFixture;
19
- const domainTools = [
20
- ...verificationTools,
21
- ...evalTools,
22
- ...qualityGateTools,
23
- ...learningTools,
24
- ...flywheelTools,
25
- ...reconTools,
26
- ...documentationTools,
27
- ...agentBootstrapTools,
28
- ];
29
- const allTools = [...domainTools, ...createMetaTools(domainTools)];
30
- const openDatasetToolCallLog = [];
31
- function findTool(name) {
32
- const tool = allTools.find((candidate) => candidate.name === name);
33
- if (!tool)
34
- throw new Error(`Tool not found: ${name}`);
35
- return tool;
36
- }
37
- async function callTool(name, args, taskId, stage) {
38
- const tool = findTool(name);
39
- try {
40
- const result = await tool.handler(args);
41
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: true });
42
- return result;
43
- }
44
- catch (error) {
45
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: false });
46
- throw error;
47
- }
48
- }
49
- async function runDatasetTask(task, workerIndex) {
50
- const started = Date.now();
51
- const recon = (await callTool("run_recon", {
52
- target: `SWE-bench Verified task ${task.id}`,
53
- description: `Open-source long-running engineering benchmark (${task.repo}, ${task.difficulty}).`,
54
- projectContext: {
55
- techStack: "TypeScript, MCP, SQLite",
56
- architecture: "Parallel subagent MCP workflow benchmark",
57
- },
58
- }, task.id, "recon_start"));
59
- await callTool("log_recon_finding", {
60
- sessionId: recon.sessionId,
61
- category: "dataset",
62
- summary: `Ingested SWE-bench task ${task.id} (${task.repo}) with failToPass=${task.failToPassCount}.`,
63
- sourceUrl: `${fixture.sourceUrl}`,
64
- relevance: "Real-world software issue benchmark for MCP orchestration quality.",
65
- actionItems: "Run in parallel worker pool and enforce mandatory flywheel checks.",
66
- }, task.id, "recon_log");
67
- let discovered = (await callTool("findTools", {
68
- query: task.prompt.slice(0, 600),
69
- category: "verification",
70
- }, task.id, "find_tools"));
71
- if (!Array.isArray(discovered?.tools) || discovered.tools.length === 0) {
72
- discovered = (await callTool("findTools", { query: "software bugfix verification eval workflow", category: "eval" }, task.id, "find_tools_fallback"));
73
- }
74
- expect(Array.isArray(discovered.tools)).toBe(true);
75
- expect(discovered.tools.length).toBeGreaterThan(0);
76
- const difficultyLower = task.difficulty.toLowerCase();
77
- const methodologyTopic = difficultyLower.includes("1-4 hours") || task.complexityScore >= 140
78
- ? "mandatory_flywheel"
79
- : "verification";
80
- const methodology = (await callTool("getMethodology", { topic: methodologyTopic }, task.id, "get_methodology"));
81
- expect(methodology.title).toBeTruthy();
82
- expect(Array.isArray(methodology.steps)).toBe(true);
83
- expect(methodology.steps.length).toBeGreaterThan(0);
84
- const evalRun = (await callTool("start_eval_run", {
85
- name: `open-dataset-swebench-${task.id}-${Date.now()}`,
86
- description: `SWE-bench scenario (${task.repo}, worker ${workerIndex})`,
87
- cases: [
88
- {
89
- input: task.prompt,
90
- intent: `Coordinate long-running SWE-bench workflow for ${task.id}`,
91
- expected: "Discover strategy, run eval bookkeeping, and complete closed-loop plus mandatory flywheel checks.",
92
- },
93
- ],
94
- }, task.id, "start_eval_run"));
95
- await callTool("record_eval_result", {
96
- caseId: evalRun.caseIds[0],
97
- verdict: "pass",
98
- score: 1,
99
- actual: `Discovered ${discovered.tools.length} tools and completed SWE-bench workflow.`,
100
- telemetry: {
101
- dataset: fixture.dataset,
102
- split: fixture.split,
103
- taskId: task.id,
104
- repo: task.repo,
105
- difficulty: task.difficulty,
106
- workerIndex,
107
- statementLength: task.statementLength,
108
- hintLength: task.hintLength,
109
- failToPassCount: task.failToPassCount,
110
- passToPassCount: task.passToPassCount,
111
- complexityScore: task.complexityScore,
112
- },
113
- }, task.id, "record_eval_result");
114
- const evalSummary = (await callTool("complete_eval_run", { runId: evalRun.runId }, task.id, "complete_eval_run"));
115
- expect(evalSummary.status).toBe("completed");
116
- expect(evalSummary.summary.passed).toBe(1);
117
- const closedLoop = (await callTool("run_closed_loop", {
118
- steps: [
119
- { step: "compile", passed: true, output: `Compile checks for ${task.id}` },
120
- { step: "lint", passed: true, output: `Lint checks for ${task.id}` },
121
- { step: "test", passed: true, output: `Parallel benchmark checks for ${task.id}` },
122
- ],
123
- }, task.id, "run_closed_loop"));
124
- expect(closedLoop.allPassed).toBe(true);
125
- const flywheel = (await callTool("run_mandatory_flywheel", {
126
- target: `Open-source SWE-bench task ${task.id}`,
127
- steps: [
128
- { stepName: "static_analysis", passed: true, output: "Types and schemas validated." },
129
- { stepName: "happy_path_test", passed: true, output: "Dataset task completed end-to-end." },
130
- { stepName: "failure_path_test", passed: true, output: "Fallback discovery query validated." },
131
- { stepName: "gap_analysis", passed: true, output: "No blocking gaps for this task." },
132
- { stepName: "fix_and_reverify", passed: true, output: "No rework required after checks." },
133
- { stepName: "deploy_and_document", passed: true, output: "Benchmark result documented." },
134
- ],
135
- }, task.id, "run_mandatory_flywheel"));
136
- expect(flywheel.passed).toBe(true);
137
- const knowledge = (await callTool("search_all_knowledge", { query: task.id, limit: 10 }, task.id, "search_all_knowledge"));
138
- expect(typeof knowledge.totalResults).toBe("number");
139
- expect(knowledge.totalResults).toBeGreaterThan(0);
140
- return {
141
- taskId: task.id,
142
- workerIndex,
143
- ok: true,
144
- elapsedMs: Date.now() - started,
145
- discoveredTools: discovered.tools.length,
146
- knowledgeHits: knowledge.totalResults,
147
- };
148
- }
149
- async function runWorkerPool(tasks, concurrency) {
150
- const boundedConcurrency = Math.max(1, Math.min(concurrency, tasks.length));
151
- const results = new Array(tasks.length);
152
- let nextIndex = 0;
153
- const workers = Array.from({ length: boundedConcurrency }, (_, workerIndex) => (async () => {
154
- while (true) {
155
- const taskIndex = nextIndex++;
156
- if (taskIndex >= tasks.length)
157
- return;
158
- const task = tasks[taskIndex];
159
- try {
160
- results[taskIndex] = await runDatasetTask(task, workerIndex);
161
- }
162
- catch (error) {
163
- results[taskIndex] = {
164
- taskId: task.id,
165
- workerIndex,
166
- ok: false,
167
- elapsedMs: 0,
168
- discoveredTools: 0,
169
- knowledgeHits: 0,
170
- error: error instanceof Error ? error.message : String(error),
171
- };
172
- }
173
- }
174
- })());
175
- await Promise.all(workers);
176
- return results;
177
- }
178
- describe("Scenario: Open-Source Long-Running Dataset (SWE-bench Parallel Subagents)", () => {
179
- it("should execute SWE-bench tasks with parallel MCP subagent workflows", async () => {
180
- expect(Array.isArray(fixture.tasks)).toBe(true);
181
- expect(fixture.tasks.length).toBeGreaterThan(0);
182
- const requestedTaskLimit = Number.parseInt(process.env.NODEBENCH_SWEBENCH_TASK_LIMIT ?? "8", 10);
183
- const taskLimit = Math.max(1, Math.min(fixture.tasks.length, Number.isFinite(requestedTaskLimit) ? requestedTaskLimit : 8));
184
- const requestedConcurrency = Number.parseInt(process.env.NODEBENCH_SWEBENCH_CONCURRENCY ?? "4", 10);
185
- const concurrency = Math.max(1, Math.min(taskLimit, Number.isFinite(requestedConcurrency) ? requestedConcurrency : 4));
186
- const tasks = fixture.tasks.slice(0, taskLimit);
187
- const started = Date.now();
188
- const results = await runWorkerPool(tasks, concurrency);
189
- const elapsedMs = Date.now() - started;
190
- const failed = results.filter((result) => !result.ok);
191
- const passed = results.filter((result) => result.ok);
192
- const calledTools = new Set(openDatasetToolCallLog.map((entry) => entry.tool));
193
- const requiredTools = [
194
- "run_recon",
195
- "log_recon_finding",
196
- "findTools",
197
- "getMethodology",
198
- "start_eval_run",
199
- "record_eval_result",
200
- "complete_eval_run",
201
- "run_closed_loop",
202
- "run_mandatory_flywheel",
203
- "search_all_knowledge",
204
- ];
205
- console.log(`[open-dataset-swebench] dataset=${fixture.dataset} split=${fixture.split} tasks=${taskLimit} concurrency=${concurrency} pass=${passed.length}/${results.length} elapsedMs=${elapsedMs} toolCalls=${openDatasetToolCallLog.length}`);
206
- if (failed.length > 0) {
207
- console.error("[open-dataset-swebench] failures:", failed.map((result) => ({
208
- taskId: result.taskId,
209
- workerIndex: result.workerIndex,
210
- error: result.error,
211
- })));
212
- }
213
- expect(failed.length).toBe(0);
214
- expect(passed.length).toBe(taskLimit);
215
- for (const requiredTool of requiredTools) {
216
- expect(calledTools.has(requiredTool)).toBe(true);
217
- }
218
- });
219
- });
220
- //# sourceMappingURL=openDatasetParallelEvalSwebench.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openDatasetParallelEvalSwebench.test.js","sourceRoot":"","sources":["../../src/__tests__/openDatasetParallelEvalSwebench.test.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,cAAc,MAAM,0CAA0C,CAAC;AACtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AA4CxD,MAAM,OAAO,GAAG,cAAgC,CAAC;AAEjD,MAAM,WAAW,GAAc;IAC7B,GAAG,iBAAiB;IACpB,GAAG,SAAS;IACZ,GAAG,gBAAgB;IACnB,GAAG,aAAa;IAChB,GAAG,aAAa;IAChB,GAAG,UAAU;IACb,GAAG,kBAAkB;IACrB,GAAG,mBAAmB;CACvB,CAAC;AACF,MAAM,QAAQ,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;AAEnE,MAAM,sBAAsB,GAKvB,EAAE,CAAC;AAER,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,IAAY,EACZ,IAA6B,EAC7B,MAAc,EACd,KAAa;IAEb,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1E,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,IAAiB,EAAE,WAAmB;IAClE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE3B,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAC3B,WAAW,EACX;QACE,MAAM,EAAE,2BAA2B,IAAI,CAAC,EAAE,EAAE;QAC5C,WAAW,EAAE,mDAAmD,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,UAAU,IAAI;QACjG,cAAc,EAAE;YACd,SAAS,EAAE,yBAAyB;YACpC,YAAY,EAAE,0CAA0C;SACzD;KACF,EACD,IAAI,CAAC,EAAE,EACP,aAAa,CACd,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,mBAAmB,EACnB;QACE,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE,2BAA2B,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,IAAI,qBAAqB,IAAI,CAAC,eAAe,GAAG;QACrG,SAAS,EAAE,GAAG,OAAO,CAAC,SAAS,EAAE;QACjC,SAAS,EAAE,oEAAoE;QAC/E,WAAW,EAAE,oEAAoE;KAClF,EACD,IAAI,CAAC,EAAE,EACP,WAAW,CACZ,CAAC;IAEF,IAAI,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC9B,WAAW,EACX;QACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;QAChC,QAAQ,EAAE,cAAc;KACzB,EACD,IAAI,CAAC,EAAE,EACP,YAAY,CACb,CAAQ,CAAC;IAEV,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvE,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC1B,WAAW,EACX,EAAE,KAAK,EAAE,4CAA4C,EAAE,QAAQ,EAAE,MAAM,EAAE,EACzE,IAAI,CAAC,EAAE,EACP,qBAAqB,CACtB,CAAQ,CAAC;IACZ,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnD,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEnD,MAAM,eAAe,GAAG,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC;IACtD,MAAM,gBAAgB,GACpB,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,eAAe,IAAI,GAAG;QAClE,CAAC,CAAC,oBAAoB;QACtB,CAAC,CAAC,cAAc,CAAC;IACrB,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,gBAAgB,EAChB,EAAE,KAAK,EAAE,gBAAgB,EAAE,EAC3B,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;IACvC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEpD,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAC7B,gBAAgB,EAChB;QACE,IAAI,EAAE,yBAAyB,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;QACtD,WAAW,EAAE,uBAAuB,IAAI,CAAC,IAAI,YAAY,WAAW,GAAG;QACvE,KAAK,EAAE;YACL;gBACE,KAAK,EAAE,IAAI,CAAC,MAAM;gBAClB,MAAM,EAAE,kDAAkD,IAAI,CAAC,EAAE,EAAE;gBACnE,QAAQ,EACN,mGAAmG;aACtG;SACF;KACF,EACD,IAAI,CAAC,EAAE,EACP,gBAAgB,CACjB,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,oBAAoB,EACpB;QACE,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,OAAO,EAAE,MAAM;QACf,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,cAAc,UAAU,CAAC,KAAK,CAAC,MAAM,0CAA0C;QACvF,SAAS,EAAE;YACT,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,WAAW;YACX,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,eAAe,EAAE,IAAI,CAAC,eAAe;SACtC;KACF,EACD,IAAI,CAAC,EAAE,EACP,oBAAoB,CACrB,CAAC;IAEF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,mBAAmB,EACnB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,EACxB,IAAI,CAAC,EAAE,EACP,mBAAmB,CACpB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE3C,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAChC,iBAAiB,EACjB;QACE,KAAK,EAAE;YACL,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,sBAAsB,IAAI,CAAC,EAAE,EAAE,EAAE;YAC1E,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,IAAI,CAAC,EAAE,EAAE,EAAE;YACpE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,IAAI,CAAC,EAAE,EAAE,EAAE;SACnF;KACF,EACD,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,CAAC,MAAM,QAAQ,CAC9B,wBAAwB,EACxB;QACE,MAAM,EAAE,8BAA8B,IAAI,CAAC,EAAE,EAAE;QAC/C,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;YACrF,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,oCAAoC,EAAE;YAC3F,EAAE,QAAQ,EAAE,mBAAmB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,qCAAqC,EAAE;YAC9F,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,EAAE;YACrF,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,kCAAkC,EAAE;YAC1F,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;SAC1F;KACF,EACD,IAAI,CAAC,EAAE,EACP,wBAAwB,CACzB,CAAQ,CAAC;IACV,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAC/B,sBAAsB,EACtB,EAAE,KAAK,EAAE,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,EAC7B,IAAI,CAAC,EAAE,EACP,sBAAsB,CACvB,CAAQ,CAAC;IACV,MAAM,CAAC,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAElD,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,WAAW;QACX,EAAE,EAAE,IAAI;QACR,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;QAC/B,eAAe,EAAE,UAAU,CAAC,KAAK,CAAC,MAAM;QACxC,aAAa,EAAE,SAAS,CAAC,YAAY;KACtC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,KAAoB,EAAE,WAAmB;IACpE,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,EAAE,CAC5E,CAAC,KAAK,IAAI,EAAE;QACV,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,SAAS,GAAG,SAAS,EAAE,CAAC;YAC9B,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM;gBAAE,OAAO;YAEtC,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9B,IAAI,CAAC;gBACH,OAAO,CAAC,SAAS,CAAC,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YAC/D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,SAAS,CAAC,GAAG;oBACnB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW;oBACX,EAAE,EAAE,KAAK;oBACT,SAAS,EAAE,CAAC;oBACZ,eAAe,EAAE,CAAC;oBAClB,aAAa,EAAE,CAAC;oBAChB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,EAAE,CACL,CAAC;IAEF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,QAAQ,CAAC,2EAA2E,EAAE,GAAG,EAAE;IACzF,EAAE,CAAC,qEAAqE,EAAE,KAAK,IAAI,EAAE;QACnF,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,kBAAkB,GAAG,MAAM,CAAC,QAAQ,CACxC,OAAO,CAAC,GAAG,CAAC,6BAA6B,IAAI,GAAG,EAChD,EAAE,CACH,CAAC;QACF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAC7F,CAAC;QAEF,MAAM,oBAAoB,GAAG,MAAM,CAAC,QAAQ,CAC1C,OAAO,CAAC,GAAG,CAAC,8BAA8B,IAAI,GAAG,EACjD,EAAE,CACH,CAAC;QACF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CACtF,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;QAEvC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAErD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/E,MAAM,aAAa,GAAG;YACpB,WAAW;YACX,mBAAmB;YACnB,WAAW;YACX,gBAAgB;YAChB,gBAAgB;YAChB,oBAAoB;YACpB,mBAAmB;YACnB,iBAAiB;YACjB,wBAAwB;YACxB,sBAAsB;SACvB,CAAC;QAEF,OAAO,CAAC,GAAG,CACT,mCAAmC,OAAO,CAAC,OAAO,UAAU,OAAO,CAAC,KAAK,UAAU,SAAS,gBAAgB,WAAW,SAAS,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,cAAc,SAAS,cAAc,sBAAsB,CAAC,MAAM,EAAE,CACpO,CAAC;QAEF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CACX,mCAAmC,EACnC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACtB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,7 +0,0 @@
1
- /**
2
- * Open-source dataset benchmark for long-running tasks (ToolBench lane).
3
- *
4
- * This test uses ToolBench multi-tool instructions and runs task workflows
5
- * through NodeBench MCP tools in parallel "subagent" workers.
6
- */
7
- export {};
@@ -1,218 +0,0 @@
1
- /**
2
- * Open-source dataset benchmark for long-running tasks (ToolBench lane).
3
- *
4
- * This test uses ToolBench multi-tool instructions and runs task workflows
5
- * through NodeBench MCP tools in parallel "subagent" workers.
6
- */
7
- import { describe, expect, it } from "vitest";
8
- import datasetFixture from "./fixtures/toolbench_instruction.sample.json";
9
- import { verificationTools } from "../tools/verificationTools.js";
10
- import { reconTools } from "../tools/reconTools.js";
11
- import { evalTools } from "../tools/evalTools.js";
12
- import { qualityGateTools } from "../tools/qualityGateTools.js";
13
- import { flywheelTools } from "../tools/flywheelTools.js";
14
- import { learningTools } from "../tools/learningTools.js";
15
- import { documentationTools } from "../tools/documentationTools.js";
16
- import { agentBootstrapTools } from "../tools/agentBootstrapTools.js";
17
- import { createMetaTools } from "../tools/metaTools.js";
18
- const fixture = datasetFixture;
19
- const domainTools = [
20
- ...verificationTools,
21
- ...evalTools,
22
- ...qualityGateTools,
23
- ...learningTools,
24
- ...flywheelTools,
25
- ...reconTools,
26
- ...documentationTools,
27
- ...agentBootstrapTools,
28
- ];
29
- const allTools = [...domainTools, ...createMetaTools(domainTools)];
30
- const openDatasetToolCallLog = [];
31
- function findTool(name) {
32
- const tool = allTools.find((candidate) => candidate.name === name);
33
- if (!tool)
34
- throw new Error(`Tool not found: ${name}`);
35
- return tool;
36
- }
37
- async function callTool(name, args, taskId, stage) {
38
- const tool = findTool(name);
39
- try {
40
- const result = await tool.handler(args);
41
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: true });
42
- return result;
43
- }
44
- catch (error) {
45
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: false });
46
- throw error;
47
- }
48
- }
49
- async function runDatasetTask(task, workerIndex) {
50
- const started = Date.now();
51
- const datasetSource = fixture.sourceUrls[task.group] ?? fixture.sourceUrls.G1 ?? "";
52
- const recon = (await callTool("run_recon", {
53
- target: `ToolBench multi-tool task ${task.id}`,
54
- description: `Open-source long-running benchmark (${task.apiCount} APIs in task context).`,
55
- projectContext: {
56
- techStack: "TypeScript, MCP, SQLite",
57
- architecture: "MCP orchestration benchmark with parallel subagent workers",
58
- },
59
- }, task.id, "recon_start"));
60
- await callTool("log_recon_finding", {
61
- sessionId: recon.sessionId,
62
- category: "dataset",
63
- summary: `Ingested ToolBench task ${task.id} with ${task.apiCount} APIs and complexity ${task.complexityScore}.`,
64
- sourceUrl: datasetSource,
65
- relevance: "Long-running multi-tool benchmark for MCP orchestration quality.",
66
- actionItems: "Run in parallel worker pool and enforce mandatory flywheel checks.",
67
- }, task.id, "recon_log");
68
- let discovered = (await callTool("findTools", {
69
- query: task.prompt.slice(0, 600),
70
- category: "verification",
71
- }, task.id, "find_tools"));
72
- if (!Array.isArray(discovered?.tools) || discovered.tools.length === 0) {
73
- discovered = (await callTool("findTools", { query: "multi-step tool orchestration verification", category: "bootstrap" }, task.id, "find_tools_fallback"));
74
- }
75
- expect(Array.isArray(discovered.tools)).toBe(true);
76
- expect(discovered.tools.length).toBeGreaterThan(0);
77
- const methodologyTopic = task.apiCount >= 9 ? "mandatory_flywheel" : "closed_loop";
78
- const methodology = (await callTool("getMethodology", { topic: methodologyTopic }, task.id, "get_methodology"));
79
- expect(methodology.title).toBeTruthy();
80
- expect(Array.isArray(methodology.steps)).toBe(true);
81
- expect(methodology.steps.length).toBeGreaterThan(0);
82
- const evalRun = (await callTool("start_eval_run", {
83
- name: `open-dataset-toolbench-${task.id}-${Date.now()}`,
84
- description: `ToolBench multi-tool scenario (${task.apiCount} APIs, worker ${workerIndex})`,
85
- cases: [
86
- {
87
- input: task.prompt,
88
- intent: `Coordinate long-running ToolBench workflow for ${task.id}`,
89
- expected: "Discover strategy, run eval bookkeeping, and complete closed-loop plus mandatory flywheel checks.",
90
- },
91
- ],
92
- }, task.id, "start_eval_run"));
93
- await callTool("record_eval_result", {
94
- caseId: evalRun.caseIds[0],
95
- verdict: "pass",
96
- score: 1,
97
- actual: `Discovered ${discovered.tools.length} tools and completed ToolBench workflow.`,
98
- telemetry: {
99
- dataset: fixture.dataset,
100
- split: fixture.split,
101
- taskId: task.id,
102
- workerIndex,
103
- group: task.group,
104
- apiCount: task.apiCount,
105
- relevantApiCount: task.relevantApiCount,
106
- requiredParameterCount: task.requiredParameterCount,
107
- optionalParameterCount: task.optionalParameterCount,
108
- apiCategories: task.apiCategories,
109
- },
110
- }, task.id, "record_eval_result");
111
- const evalSummary = (await callTool("complete_eval_run", { runId: evalRun.runId }, task.id, "complete_eval_run"));
112
- expect(evalSummary.status).toBe("completed");
113
- expect(evalSummary.summary.passed).toBe(1);
114
- const closedLoop = (await callTool("run_closed_loop", {
115
- steps: [
116
- { step: "compile", passed: true, output: `Compile checks for ${task.id}` },
117
- { step: "lint", passed: true, output: `Lint checks for ${task.id}` },
118
- { step: "test", passed: true, output: `Parallel benchmark checks for ${task.id}` },
119
- ],
120
- }, task.id, "run_closed_loop"));
121
- expect(closedLoop.allPassed).toBe(true);
122
- const flywheel = (await callTool("run_mandatory_flywheel", {
123
- target: `Open-source ToolBench task ${task.id}`,
124
- steps: [
125
- { stepName: "static_analysis", passed: true, output: "Types and schemas validated." },
126
- { stepName: "happy_path_test", passed: true, output: "Dataset task completed end-to-end." },
127
- { stepName: "failure_path_test", passed: true, output: "Fallback discovery query validated." },
128
- { stepName: "gap_analysis", passed: true, output: "No blocking gaps for this task." },
129
- { stepName: "fix_and_reverify", passed: true, output: "No rework required after checks." },
130
- { stepName: "deploy_and_document", passed: true, output: "Benchmark result documented." },
131
- ],
132
- }, task.id, "run_mandatory_flywheel"));
133
- expect(flywheel.passed).toBe(true);
134
- const knowledgeQuery = task.id;
135
- const knowledge = (await callTool("search_all_knowledge", { query: knowledgeQuery, limit: 10 }, task.id, "search_all_knowledge"));
136
- expect(typeof knowledge.totalResults).toBe("number");
137
- expect(knowledge.totalResults).toBeGreaterThan(0);
138
- return {
139
- taskId: task.id,
140
- workerIndex,
141
- ok: true,
142
- elapsedMs: Date.now() - started,
143
- discoveredTools: discovered.tools.length,
144
- knowledgeHits: knowledge.totalResults,
145
- };
146
- }
147
- async function runWorkerPool(tasks, concurrency) {
148
- const boundedConcurrency = Math.max(1, Math.min(concurrency, tasks.length));
149
- const results = new Array(tasks.length);
150
- let nextIndex = 0;
151
- const workers = Array.from({ length: boundedConcurrency }, (_, workerIndex) => (async () => {
152
- while (true) {
153
- const taskIndex = nextIndex++;
154
- if (taskIndex >= tasks.length)
155
- return;
156
- const task = tasks[taskIndex];
157
- try {
158
- results[taskIndex] = await runDatasetTask(task, workerIndex);
159
- }
160
- catch (error) {
161
- results[taskIndex] = {
162
- taskId: task.id,
163
- workerIndex,
164
- ok: false,
165
- elapsedMs: 0,
166
- discoveredTools: 0,
167
- knowledgeHits: 0,
168
- error: error instanceof Error ? error.message : String(error),
169
- };
170
- }
171
- }
172
- })());
173
- await Promise.all(workers);
174
- return results;
175
- }
176
- describe("Scenario: Open-Source Long-Running Dataset (ToolBench Parallel Subagents)", () => {
177
- it("should execute ToolBench tasks with parallel MCP subagent workflows", async () => {
178
- expect(Array.isArray(fixture.tasks)).toBe(true);
179
- expect(fixture.tasks.length).toBeGreaterThan(0);
180
- const requestedTaskLimit = Number.parseInt(process.env.NODEBENCH_TOOLBENCH_TASK_LIMIT ?? "8", 10);
181
- const taskLimit = Math.max(1, Math.min(fixture.tasks.length, Number.isFinite(requestedTaskLimit) ? requestedTaskLimit : 8));
182
- const requestedConcurrency = Number.parseInt(process.env.NODEBENCH_TOOLBENCH_CONCURRENCY ?? "4", 10);
183
- const concurrency = Math.max(1, Math.min(taskLimit, Number.isFinite(requestedConcurrency) ? requestedConcurrency : 4));
184
- const tasks = fixture.tasks.slice(0, taskLimit);
185
- const started = Date.now();
186
- const results = await runWorkerPool(tasks, concurrency);
187
- const elapsedMs = Date.now() - started;
188
- const failed = results.filter((result) => !result.ok);
189
- const passed = results.filter((result) => result.ok);
190
- const calledTools = new Set(openDatasetToolCallLog.map((entry) => entry.tool));
191
- const requiredTools = [
192
- "run_recon",
193
- "log_recon_finding",
194
- "findTools",
195
- "getMethodology",
196
- "start_eval_run",
197
- "record_eval_result",
198
- "complete_eval_run",
199
- "run_closed_loop",
200
- "run_mandatory_flywheel",
201
- "search_all_knowledge",
202
- ];
203
- console.log(`[open-dataset-toolbench] dataset=${fixture.dataset} split=${fixture.split} tasks=${taskLimit} concurrency=${concurrency} pass=${passed.length}/${results.length} elapsedMs=${elapsedMs} toolCalls=${openDatasetToolCallLog.length}`);
204
- if (failed.length > 0) {
205
- console.error("[open-dataset-toolbench] failures:", failed.map((result) => ({
206
- taskId: result.taskId,
207
- workerIndex: result.workerIndex,
208
- error: result.error,
209
- })));
210
- }
211
- expect(failed.length).toBe(0);
212
- expect(passed.length).toBe(taskLimit);
213
- for (const requiredTool of requiredTools) {
214
- expect(calledTools.has(requiredTool)).toBe(true);
215
- }
216
- });
217
- });
218
- //# sourceMappingURL=openDatasetParallelEvalToolbench.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openDatasetParallelEvalToolbench.test.js","sourceRoot":"","sources":["../../src/__tests__/openDatasetParallelEvalToolbench.test.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,cAAc,MAAM,8CAA8C,CAAC;AAC1E,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAyCxD,MAAM,OAAO,GAAG,cAAgC,CAAC;AAEjD,MAAM,WAAW,GAAc;IAC7B,GAAG,iBAAiB;IACpB,GAAG,SAAS;IACZ,GAAG,gBAAgB;IACnB,GAAG,aAAa;IAChB,GAAG,aAAa;IAChB,GAAG,UAAU;IACb,GAAG,kBAAkB;IACrB,GAAG,mBAAmB;CACvB,CAAC;AACF,MAAM,QAAQ,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;AAEnE,MAAM,sBAAsB,GAKvB,EAAE,CAAC;AAER,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,IAAY,EACZ,IAA6B,EAC7B,MAAc,EACd,KAAa;IAEb,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1E,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,IAAiB,EAAE,WAAmB;IAClE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC3B,MAAM,aAAa,GAAG,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,EAAE,IAAI,EAAE,CAAC;IAEpF,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAC3B,WAAW,EACX;QACE,MAAM,EAAE,6BAA6B,IAAI,CAAC,EAAE,EAAE;QAC9C,WAAW,EAAE,uCAAuC,IAAI,CAAC,QAAQ,yBAAyB;QAC1F,cAAc,EAAE;YACd,SAAS,EAAE,yBAAyB;YACpC,YAAY,EAAE,4DAA4D;SAC3E;KACF,EACD,IAAI,CAAC,EAAE,EACP,aAAa,CACd,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,mBAAmB,EACnB;QACE,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE,2BAA2B,IAAI,CAAC,EAAE,SAAS,IAAI,CAAC,QAAQ,wBAAwB,IAAI,CAAC,eAAe,GAAG;QAChH,SAAS,EAAE,aAAa;QACxB,SAAS,EAAE,kEAAkE;QAC7E,WAAW,EAAE,oEAAoE;KAClF,EACD,IAAI,CAAC,EAAE,EACP,WAAW,CACZ,CAAC;IAEF,IAAI,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC9B,WAAW,EACX;QACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;QAChC,QAAQ,EAAE,cAAc;KACzB,EACD,IAAI,CAAC,EAAE,EACP,YAAY,CACb,CAAQ,CAAC;IAEV,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvE,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC1B,WAAW,EACX,EAAE,KAAK,EAAE,4CAA4C,EAAE,QAAQ,EAAE,WAAW,EAAE,EAC9E,IAAI,CAAC,EAAE,EACP,qBAAqB,CACtB,CAAQ,CAAC;IACZ,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnD,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEnD,MAAM,gBAAgB,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,aAAa,CAAC;IACnF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,gBAAgB,EAChB,EAAE,KAAK,EAAE,gBAAgB,EAAE,EAC3B,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;IACvC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEpD,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAC7B,gBAAgB,EAChB;QACE,IAAI,EAAE,0BAA0B,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;QACvD,WAAW,EAAE,kCAAkC,IAAI,CAAC,QAAQ,iBAAiB,WAAW,GAAG;QAC3F,KAAK,EAAE;YACL;gBACE,KAAK,EAAE,IAAI,CAAC,MAAM;gBAClB,MAAM,EAAE,kDAAkD,IAAI,CAAC,EAAE,EAAE;gBACnE,QAAQ,EACN,mGAAmG;aACtG;SACF;KACF,EACD,IAAI,CAAC,EAAE,EACP,gBAAgB,CACjB,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,oBAAoB,EACpB;QACE,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,OAAO,EAAE,MAAM;QACf,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,cAAc,UAAU,CAAC,KAAK,CAAC,MAAM,0CAA0C;QACvF,SAAS,EAAE;YACT,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,WAAW;YACX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;YACvC,sBAAsB,EAAE,IAAI,CAAC,sBAAsB;YACnD,sBAAsB,EAAE,IAAI,CAAC,sBAAsB;YACnD,aAAa,EAAE,IAAI,CAAC,aAAa;SAClC;KACF,EACD,IAAI,CAAC,EAAE,EACP,oBAAoB,CACrB,CAAC;IAEF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,mBAAmB,EACnB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,EACxB,IAAI,CAAC,EAAE,EACP,mBAAmB,CACpB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE3C,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAChC,iBAAiB,EACjB;QACE,KAAK,EAAE;YACL,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,sBAAsB,IAAI,CAAC,EAAE,EAAE,EAAE;YAC1E,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,IAAI,CAAC,EAAE,EAAE,EAAE;YACpE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,IAAI,CAAC,EAAE,EAAE,EAAE;SACnF;KACF,EACD,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,CAAC,MAAM,QAAQ,CAC9B,wBAAwB,EACxB;QACE,MAAM,EAAE,8BAA8B,IAAI,CAAC,EAAE,EAAE;QAC/C,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;YACrF,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,oCAAoC,EAAE;YAC3F,EAAE,QAAQ,EAAE,mBAAmB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,qCAAqC,EAAE;YAC9F,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,EAAE;YACrF,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,kCAAkC,EAAE;YAC1F,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;SAC1F;KACF,EACD,IAAI,CAAC,EAAE,EACP,wBAAwB,CACzB,CAAQ,CAAC;IACV,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,cAAc,GAAG,IAAI,CAAC,EAAE,CAAC;IAC/B,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAC/B,sBAAsB,EACtB,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,EAAE,EAAE,EAAE,EACpC,IAAI,CAAC,EAAE,EACP,sBAAsB,CACvB,CAAQ,CAAC;IACV,MAAM,CAAC,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAElD,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,WAAW;QACX,EAAE,EAAE,IAAI;QACR,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;QAC/B,eAAe,EAAE,UAAU,CAAC,KAAK,CAAC,MAAM;QACxC,aAAa,EAAE,SAAS,CAAC,YAAY;KACtC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,KAAoB,EAAE,WAAmB;IACpE,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,EAAE,CAC5E,CAAC,KAAK,IAAI,EAAE;QACV,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,SAAS,GAAG,SAAS,EAAE,CAAC;YAC9B,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM;gBAAE,OAAO;YAEtC,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9B,IAAI,CAAC;gBACH,OAAO,CAAC,SAAS,CAAC,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YAC/D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,SAAS,CAAC,GAAG;oBACnB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW;oBACX,EAAE,EAAE,KAAK;oBACT,SAAS,EAAE,CAAC;oBACZ,eAAe,EAAE,CAAC;oBAClB,aAAa,EAAE,CAAC;oBAChB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,EAAE,CACL,CAAC;IAEF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,QAAQ,CAAC,2EAA2E,EAAE,GAAG,EAAE;IACzF,EAAE,CAAC,qEAAqE,EAAE,KAAK,IAAI,EAAE;QACnF,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,kBAAkB,GAAG,MAAM,CAAC,QAAQ,CACxC,OAAO,CAAC,GAAG,CAAC,8BAA8B,IAAI,GAAG,EACjD,EAAE,CACH,CAAC;QACF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAC7F,CAAC;QAEF,MAAM,oBAAoB,GAAG,MAAM,CAAC,QAAQ,CAC1C,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,GAAG,EAClD,EAAE,CACH,CAAC;QACF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CACtF,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;QAEvC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAErD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/E,MAAM,aAAa,GAAG;YACpB,WAAW;YACX,mBAAmB;YACnB,WAAW;YACX,gBAAgB;YAChB,gBAAgB;YAChB,oBAAoB;YACpB,mBAAmB;YACnB,iBAAiB;YACjB,wBAAwB;YACxB,sBAAsB;SACvB,CAAC;QAEF,OAAO,CAAC,GAAG,CACT,oCAAoC,OAAO,CAAC,OAAO,UAAU,OAAO,CAAC,KAAK,UAAU,SAAS,gBAAgB,WAAW,SAAS,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,cAAc,SAAS,cAAc,sBAAsB,CAAC,MAAM,EAAE,CACrO,CAAC;QAEF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CACX,oCAAoC,EACpC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACtB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,10 +0,0 @@
1
- /**
2
- * Performance comparison: "no tools" (noop handlers) vs NodeBench MCP toolchain.
3
- *
4
- * Notes:
5
- * - This is a wall-clock micro-benchmark for local harness overhead, not capability/accuracy.
6
- * - Disabled by default to avoid noisy perf output in normal test runs.
7
- * Set NODEBENCH_RUN_PERF_COMPARE=1 to run.
8
- * - GAIA is gated; if a local GAIA fixture exists in `.cache/gaia`, it will be included.
9
- */
10
- export {};