nodebench-mcp 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/dist/dashboard/operatingDashboardHtml.js +2 -1
  2. package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
  3. package/dist/dashboard/operatingServer.js +3 -2
  4. package/dist/dashboard/operatingServer.js.map +1 -1
  5. package/dist/db.js +51 -3
  6. package/dist/db.js.map +1 -1
  7. package/dist/index.js +13 -16
  8. package/dist/index.js.map +1 -1
  9. package/dist/packageInfo.d.ts +3 -0
  10. package/dist/packageInfo.js +32 -0
  11. package/dist/packageInfo.js.map +1 -0
  12. package/dist/sandboxApi.js +2 -1
  13. package/dist/sandboxApi.js.map +1 -1
  14. package/dist/tools/boilerplateTools.js +10 -9
  15. package/dist/tools/boilerplateTools.js.map +1 -1
  16. package/dist/tools/documentationTools.js +2 -1
  17. package/dist/tools/documentationTools.js.map +1 -1
  18. package/dist/tools/progressiveDiscoveryTools.js +2 -1
  19. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  20. package/dist/tools/toolRegistry.js +11 -0
  21. package/dist/tools/toolRegistry.js.map +1 -1
  22. package/dist/toolsetRegistry.js +74 -1
  23. package/dist/toolsetRegistry.js.map +1 -1
  24. package/package.json +4 -3
  25. package/dist/__tests__/analytics.test.d.ts +0 -11
  26. package/dist/__tests__/analytics.test.js +0 -546
  27. package/dist/__tests__/analytics.test.js.map +0 -1
  28. package/dist/__tests__/architectComplex.test.d.ts +0 -1
  29. package/dist/__tests__/architectComplex.test.js +0 -373
  30. package/dist/__tests__/architectComplex.test.js.map +0 -1
  31. package/dist/__tests__/architectSmoke.test.d.ts +0 -1
  32. package/dist/__tests__/architectSmoke.test.js +0 -92
  33. package/dist/__tests__/architectSmoke.test.js.map +0 -1
  34. package/dist/__tests__/audit-registry.d.ts +0 -1
  35. package/dist/__tests__/audit-registry.js +0 -60
  36. package/dist/__tests__/audit-registry.js.map +0 -1
  37. package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
  38. package/dist/__tests__/batchAutopilot.test.js +0 -218
  39. package/dist/__tests__/batchAutopilot.test.js.map +0 -1
  40. package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
  41. package/dist/__tests__/cliSubcommands.test.js +0 -138
  42. package/dist/__tests__/cliSubcommands.test.js.map +0 -1
  43. package/dist/__tests__/comparativeBench.test.d.ts +0 -1
  44. package/dist/__tests__/comparativeBench.test.js +0 -722
  45. package/dist/__tests__/comparativeBench.test.js.map +0 -1
  46. package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
  47. package/dist/__tests__/critterCalibrationEval.js +0 -370
  48. package/dist/__tests__/critterCalibrationEval.js.map +0 -1
  49. package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
  50. package/dist/__tests__/dynamicLoading.test.js +0 -280
  51. package/dist/__tests__/dynamicLoading.test.js.map +0 -1
  52. package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
  53. package/dist/__tests__/embeddingProvider.test.js +0 -86
  54. package/dist/__tests__/embeddingProvider.test.js.map +0 -1
  55. package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
  56. package/dist/__tests__/evalDatasetBench.test.js +0 -738
  57. package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
  58. package/dist/__tests__/evalHarness.test.d.ts +0 -1
  59. package/dist/__tests__/evalHarness.test.js +0 -1107
  60. package/dist/__tests__/evalHarness.test.js.map +0 -1
  61. package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
  62. package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
  63. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
  64. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
  65. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
  66. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
  67. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
  68. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
  69. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
  70. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
  71. package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
  72. package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
  73. package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
  74. package/dist/__tests__/forecastingDogfood.test.js +0 -284
  75. package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
  76. package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
  77. package/dist/__tests__/forecastingScoring.test.js +0 -202
  78. package/dist/__tests__/forecastingScoring.test.js.map +0 -1
  79. package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
  80. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
  81. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
  82. package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
  83. package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
  84. package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
  85. package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
  86. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
  87. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
  88. package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
  89. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
  90. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
  91. package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
  92. package/dist/__tests__/helpers/answerMatch.js +0 -267
  93. package/dist/__tests__/helpers/answerMatch.js.map +0 -1
  94. package/dist/__tests__/helpers/textLlm.d.ts +0 -25
  95. package/dist/__tests__/helpers/textLlm.js +0 -214
  96. package/dist/__tests__/helpers/textLlm.js.map +0 -1
  97. package/dist/__tests__/localDashboard.test.d.ts +0 -1
  98. package/dist/__tests__/localDashboard.test.js +0 -226
  99. package/dist/__tests__/localDashboard.test.js.map +0 -1
  100. package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
  101. package/dist/__tests__/multiHopDogfood.test.js +0 -303
  102. package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
  103. package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
  104. package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
  105. package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
  106. package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
  107. package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
  108. package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
  109. package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
  110. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
  111. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
  112. package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
  113. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
  114. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
  115. package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
  116. package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
  117. package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
  118. package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
  119. package/dist/__tests__/openclawDogfood.test.js +0 -535
  120. package/dist/__tests__/openclawDogfood.test.js.map +0 -1
  121. package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
  122. package/dist/__tests__/openclawMessaging.test.js +0 -232
  123. package/dist/__tests__/openclawMessaging.test.js.map +0 -1
  124. package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
  125. package/dist/__tests__/presetRealWorldBench.test.js +0 -859
  126. package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
  127. package/dist/__tests__/tools.test.d.ts +0 -1
  128. package/dist/__tests__/tools.test.js +0 -3201
  129. package/dist/__tests__/tools.test.js.map +0 -1
  130. package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
  131. package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
  132. package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
  133. package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
  134. package/dist/__tests__/traceabilityDogfood.test.js +0 -241
  135. package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
  136. package/dist/__tests__/webmcpTools.test.d.ts +0 -7
  137. package/dist/__tests__/webmcpTools.test.js +0 -195
  138. package/dist/__tests__/webmcpTools.test.js.map +0 -1
  139. package/dist/benchmarks/testProviderBus.d.ts +0 -7
  140. package/dist/benchmarks/testProviderBus.js +0 -272
  141. package/dist/benchmarks/testProviderBus.js.map +0 -1
  142. package/dist/hooks/postCompaction.d.ts +0 -14
  143. package/dist/hooks/postCompaction.js +0 -51
  144. package/dist/hooks/postCompaction.js.map +0 -1
  145. package/dist/security/__tests__/security.test.d.ts +0 -8
  146. package/dist/security/__tests__/security.test.js +0 -295
  147. package/dist/security/__tests__/security.test.js.map +0 -1
  148. package/dist/sync/hyperloopEval.test.d.ts +0 -4
  149. package/dist/sync/hyperloopEval.test.js +0 -60
  150. package/dist/sync/hyperloopEval.test.js.map +0 -1
  151. package/dist/sync/store.test.d.ts +0 -4
  152. package/dist/sync/store.test.js +0 -43
  153. package/dist/sync/store.test.js.map +0 -1
  154. package/dist/tools/documentTools.d.ts +0 -5
  155. package/dist/tools/documentTools.js +0 -524
  156. package/dist/tools/documentTools.js.map +0 -1
  157. package/dist/tools/financialTools.d.ts +0 -10
  158. package/dist/tools/financialTools.js +0 -403
  159. package/dist/tools/financialTools.js.map +0 -1
  160. package/dist/tools/memoryTools.d.ts +0 -5
  161. package/dist/tools/memoryTools.js +0 -137
  162. package/dist/tools/memoryTools.js.map +0 -1
  163. package/dist/tools/planningTools.d.ts +0 -5
  164. package/dist/tools/planningTools.js +0 -147
  165. package/dist/tools/planningTools.js.map +0 -1
  166. package/dist/tools/searchTools.d.ts +0 -5
  167. package/dist/tools/searchTools.js +0 -145
  168. package/dist/tools/searchTools.js.map +0 -1
@@ -1,209 +0,0 @@
1
- /**
2
- * Open-source dataset benchmark for long-running tasks.
3
- *
4
- * This test uses BFCL v3 long-context scenarios and runs task workflows
5
- * through NodeBench MCP tools in parallel "subagent" workers.
6
- */
7
- import { describe, expect, it } from "vitest";
8
- import datasetFixture from "./fixtures/bfcl_v3_long_context.sample.json";
9
- import { verificationTools } from "../tools/verificationTools.js";
10
- import { reconTools } from "../tools/reconTools.js";
11
- import { evalTools } from "../tools/evalTools.js";
12
- import { qualityGateTools } from "../tools/qualityGateTools.js";
13
- import { flywheelTools } from "../tools/flywheelTools.js";
14
- import { learningTools } from "../tools/learningTools.js";
15
- import { documentationTools } from "../tools/documentationTools.js";
16
- import { agentBootstrapTools } from "../tools/agentBootstrapTools.js";
17
- import { createMetaTools } from "../tools/metaTools.js";
18
- const fixture = datasetFixture;
19
- const domainTools = [
20
- ...verificationTools,
21
- ...evalTools,
22
- ...qualityGateTools,
23
- ...learningTools,
24
- ...flywheelTools,
25
- ...reconTools,
26
- ...documentationTools,
27
- ...agentBootstrapTools,
28
- ];
29
- const allTools = [...domainTools, ...createMetaTools(domainTools)];
30
- const openDatasetToolCallLog = [];
31
- function findTool(name) {
32
- const tool = allTools.find((candidate) => candidate.name === name);
33
- if (!tool)
34
- throw new Error(`Tool not found: ${name}`);
35
- return tool;
36
- }
37
- async function callTool(name, args, taskId, stage) {
38
- const tool = findTool(name);
39
- try {
40
- const result = await tool.handler(args);
41
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: true });
42
- return result;
43
- }
44
- catch (error) {
45
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: false });
46
- throw error;
47
- }
48
- }
49
- async function runDatasetTask(task, workerIndex) {
50
- const started = Date.now();
51
- const recon = (await callTool("run_recon", {
52
- target: `BFCL long-context task ${task.id}`,
53
- description: `Open-source long-running benchmark task (${task.turnCount} turns).`,
54
- projectContext: {
55
- techStack: "TypeScript, MCP, SQLite",
56
- architecture: "MCP tool orchestration benchmark",
57
- },
58
- }, task.id, "recon_start"));
59
- await callTool("log_recon_finding", {
60
- sessionId: recon.sessionId,
61
- category: "dataset",
62
- summary: `Ingested BFCL task ${task.id} with ${task.turnCount} turns and ${task.expectedPathLength} expected calls.`,
63
- sourceUrl: `${fixture.sourceUrl}#${task.id}`,
64
- relevance: "Long-running multi-turn benchmark for MCP tool orchestration.",
65
- actionItems: "Run in parallel subagents and track flywheel compliance.",
66
- }, task.id, "recon_log");
67
- let discovered = (await callTool("findTools", {
68
- query: task.prompt.slice(0, 600),
69
- category: "bootstrap",
70
- }, task.id, "find_tools"));
71
- if (!Array.isArray(discovered?.tools) || discovered.tools.length === 0) {
72
- discovered = (await callTool("findTools", { query: "verification flywheel methodology", category: "verification" }, task.id, "find_tools_fallback"));
73
- }
74
- expect(Array.isArray(discovered.tools)).toBe(true);
75
- expect(discovered.tools.length).toBeGreaterThan(0);
76
- const methodology = (await callTool("getMethodology", { topic: "mandatory_flywheel" }, task.id, "get_methodology"));
77
- expect(methodology.title).toBeTruthy();
78
- expect(Array.isArray(methodology.steps)).toBe(true);
79
- expect(methodology.steps.length).toBeGreaterThan(0);
80
- const evalRun = (await callTool("start_eval_run", {
81
- name: `open-dataset-${task.id}-${Date.now()}`,
82
- description: `BFCL long-context scenario (${task.turnCount} turns, worker ${workerIndex})`,
83
- cases: [
84
- {
85
- input: task.prompt,
86
- intent: `Coordinate long-running workflow for ${task.id}`,
87
- expected: "Discover tool strategy, run eval bookkeeping, and enforce mandatory flywheel checks.",
88
- },
89
- ],
90
- }, task.id, "start_eval_run"));
91
- await callTool("record_eval_result", {
92
- caseId: evalRun.caseIds[0],
93
- verdict: "pass",
94
- score: 1,
95
- actual: `Discovered ${discovered.tools.length} candidate tools and completed the workflow.`,
96
- telemetry: {
97
- dataset: fixture.dataset,
98
- split: fixture.split,
99
- taskId: task.id,
100
- workerIndex,
101
- turnCount: task.turnCount,
102
- expectedPathLength: task.expectedPathLength,
103
- expectedPath: task.expectedPath,
104
- involvedClasses: task.involvedClasses,
105
- },
106
- }, task.id, "record_eval_result");
107
- const evalSummary = (await callTool("complete_eval_run", { runId: evalRun.runId }, task.id, "complete_eval_run"));
108
- expect(evalSummary.status).toBe("completed");
109
- expect(evalSummary.summary.passed).toBe(1);
110
- const closedLoop = (await callTool("run_closed_loop", {
111
- steps: [
112
- { step: "compile", passed: true, output: `Compile checks for ${task.id}` },
113
- { step: "lint", passed: true, output: `Lint checks for ${task.id}` },
114
- { step: "test", passed: true, output: `Benchmark assertions for ${task.id}` },
115
- ],
116
- }, task.id, "run_closed_loop"));
117
- expect(closedLoop.allPassed).toBe(true);
118
- const flywheel = (await callTool("run_mandatory_flywheel", {
119
- target: `Open-source BFCL long-context task ${task.id}`,
120
- steps: [
121
- { stepName: "static_analysis", passed: true, output: "Types and schemas validated." },
122
- { stepName: "happy_path_test", passed: true, output: "Dataset task completed end-to-end." },
123
- { stepName: "failure_path_test", passed: true, output: "Fallback discovery query validated." },
124
- { stepName: "gap_analysis", passed: true, output: "No blocking gaps for this task." },
125
- { stepName: "fix_and_reverify", passed: true, output: "No rework required after checks." },
126
- { stepName: "deploy_and_document", passed: true, output: "Benchmark result documented." },
127
- ],
128
- }, task.id, "run_mandatory_flywheel"));
129
- expect(flywheel.passed).toBe(true);
130
- const knowledge = (await callTool("search_all_knowledge", { query: task.id, limit: 10 }, task.id, "search_all_knowledge"));
131
- expect(typeof knowledge.totalResults).toBe("number");
132
- expect(knowledge.totalResults).toBeGreaterThan(0);
133
- return {
134
- taskId: task.id,
135
- workerIndex,
136
- ok: true,
137
- elapsedMs: Date.now() - started,
138
- discoveredTools: discovered.tools.length,
139
- knowledgeHits: knowledge.totalResults,
140
- };
141
- }
142
- async function runWorkerPool(tasks, concurrency) {
143
- const boundedConcurrency = Math.max(1, Math.min(concurrency, tasks.length));
144
- const results = new Array(tasks.length);
145
- let nextIndex = 0;
146
- const workers = Array.from({ length: boundedConcurrency }, (_, workerIndex) => (async () => {
147
- while (true) {
148
- const taskIndex = nextIndex++;
149
- if (taskIndex >= tasks.length)
150
- return;
151
- const task = tasks[taskIndex];
152
- try {
153
- results[taskIndex] = await runDatasetTask(task, workerIndex);
154
- }
155
- catch (error) {
156
- results[taskIndex] = {
157
- taskId: task.id,
158
- workerIndex,
159
- ok: false,
160
- elapsedMs: 0,
161
- discoveredTools: 0,
162
- knowledgeHits: 0,
163
- error: error instanceof Error ? error.message : String(error),
164
- };
165
- }
166
- }
167
- })());
168
- await Promise.all(workers);
169
- return results;
170
- }
171
- describe("Scenario: Open-Source Long-Running Dataset (Parallel Subagents)", () => {
172
- it("should execute BFCL long-context tasks with parallel MCP subagent workflows", async () => {
173
- expect(Array.isArray(fixture.tasks)).toBe(true);
174
- expect(fixture.tasks.length).toBeGreaterThan(0);
175
- const requestedTaskLimit = Number.parseInt(process.env.NODEBENCH_OPEN_DATASET_TASK_LIMIT ?? "8", 10);
176
- const taskLimit = Math.max(1, Math.min(fixture.tasks.length, Number.isFinite(requestedTaskLimit) ? requestedTaskLimit : 8));
177
- const requestedConcurrency = Number.parseInt(process.env.NODEBENCH_OPEN_DATASET_CONCURRENCY ?? "4", 10);
178
- const concurrency = Math.max(1, Math.min(taskLimit, Number.isFinite(requestedConcurrency) ? requestedConcurrency : 4));
179
- const tasks = fixture.tasks.slice(0, taskLimit);
180
- const started = Date.now();
181
- const results = await runWorkerPool(tasks, concurrency);
182
- const elapsedMs = Date.now() - started;
183
- const failed = results.filter((result) => !result.ok);
184
- const passed = results.filter((result) => result.ok);
185
- const calledTools = new Set(openDatasetToolCallLog.map((entry) => entry.tool));
186
- const requiredTools = [
187
- "run_recon",
188
- "log_recon_finding",
189
- "findTools",
190
- "getMethodology",
191
- "start_eval_run",
192
- "record_eval_result",
193
- "complete_eval_run",
194
- "run_closed_loop",
195
- "run_mandatory_flywheel",
196
- "search_all_knowledge",
197
- ];
198
- console.log(`[open-dataset] dataset=${fixture.dataset} split=${fixture.split} tasks=${taskLimit} concurrency=${concurrency} pass=${passed.length}/${results.length} elapsedMs=${elapsedMs} toolCalls=${openDatasetToolCallLog.length}`);
199
- if (failed.length > 0) {
200
- console.error("[open-dataset] failures:", failed.map((result) => ({ taskId: result.taskId, workerIndex: result.workerIndex, error: result.error })));
201
- }
202
- expect(failed.length).toBe(0);
203
- expect(passed.length).toBe(taskLimit);
204
- for (const requiredTool of requiredTools) {
205
- expect(calledTools.has(requiredTool)).toBe(true);
206
- }
207
- });
208
- });
209
- //# sourceMappingURL=openDatasetParallelEval.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openDatasetParallelEval.test.js","sourceRoot":"","sources":["../../src/__tests__/openDatasetParallelEval.test.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,cAAc,MAAM,6CAA6C,CAAC;AACzE,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAuCxD,MAAM,OAAO,GAAG,cAAgC,CAAC;AAEjD,MAAM,WAAW,GAAc;IAC7B,GAAG,iBAAiB;IACpB,GAAG,SAAS;IACZ,GAAG,gBAAgB;IACnB,GAAG,aAAa;IAChB,GAAG,aAAa;IAChB,GAAG,UAAU;IACb,GAAG,kBAAkB;IACrB,GAAG,mBAAmB;CACvB,CAAC;AACF,MAAM,QAAQ,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;AAEnE,MAAM,sBAAsB,GAKvB,EAAE,CAAC;AAER,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,IAAY,EACZ,IAA6B,EAC7B,MAAc,EACd,KAAa;IAEb,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1E,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,IAAiB,EAAE,WAAmB;IAClE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE3B,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAC3B,WAAW,EACX;QACE,MAAM,EAAE,0BAA0B,IAAI,CAAC,EAAE,EAAE;QAC3C,WAAW,EAAE,4CAA4C,IAAI,CAAC,SAAS,UAAU;QACjF,cAAc,EAAE;YACd,SAAS,EAAE,yBAAyB;YACpC,YAAY,EAAE,kCAAkC;SACjD;KACF,EACD,IAAI,CAAC,EAAE,EACP,aAAa,CACd,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,mBAAmB,EACnB;QACE,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE,sBAAsB,IAAI,CAAC,EAAE,SAAS,IAAI,CAAC,SAAS,cAAc,IAAI,CAAC,kBAAkB,kBAAkB;QACpH,SAAS,EAAE,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,EAAE,EAAE;QAC5C,SAAS,EAAE,+DAA+D;QAC1E,WAAW,EAAE,0DAA0D;KACxE,EACD,IAAI,CAAC,EAAE,EACP,WAAW,CACZ,CAAC;IAEF,IAAI,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC9B,WAAW,EACX;QACE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;QAChC,QAAQ,EAAE,WAAW;KACtB,EACD,IAAI,CAAC,EAAE,EACP,YAAY,CACb,CAAQ,CAAC;IAEV,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvE,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC1B,WAAW,EACX,EAAE,KAAK,EAAE,mCAAmC,EAAE,QAAQ,EAAE,cAAc,EAAE,EACxE,IAAI,CAAC,EAAE,EACP,qBAAqB,CACtB,CAAQ,CAAC;IACZ,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnD,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEnD,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,gBAAgB,EAChB,EAAE,KAAK,EAAE,oBAAoB,EAAE,EAC/B,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;IACvC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEpD,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAC7B,gBAAgB,EAChB;QACE,IAAI,EAAE,gBAAgB,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;QAC7C,WAAW,EAAE,+BAA+B,IAAI,CAAC,SAAS,kBAAkB,WAAW,GAAG;QAC1F,KAAK,EAAE;YACL;gBACE,KAAK,EAAE,IAAI,CAAC,MAAM;gBAClB,MAAM,EAAE,wCAAwC,IAAI,CAAC,EAAE,EAAE;gBACzD,QAAQ,EACN,sFAAsF;aACzF;SACF;KACF,EACD,IAAI,CAAC,EAAE,EACP,gBAAgB,CACjB,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,oBAAoB,EACpB;QACE,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,OAAO,EAAE,MAAM;QACf,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,cAAc,UAAU,CAAC,KAAK,CAAC,MAAM,8CAA8C;QAC3F,SAAS,EAAE;YACT,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,WAAW;YACX,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;YAC3C,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,eAAe,EAAE,IAAI,CAAC,eAAe;SACtC;KACF,EACD,IAAI,CAAC,EAAE,EACP,oBAAoB,CACrB,CAAC;IAEF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,mBAAmB,EACnB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,EACxB,IAAI,CAAC,EAAE,EACP,mBAAmB,CACpB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE3C,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAChC,iBAAiB,EACjB;QACE,KAAK,EAAE;YACL,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,sBAAsB,IAAI,CAAC,EAAE,EAAE,EAAE;YAC1E,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,IAAI,CAAC,EAAE,EAAE,EAAE;YACpE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,4BAA4B,IAAI,CAAC,EAAE,EAAE,EAAE;SAC9E;KACF,EACD,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,CAAC,MAAM,QAAQ,CAC9B,wBAAwB,EACxB;QACE,MAAM,EAAE,sCAAsC,IAAI,CAAC,EAAE,EAAE;QACvD,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;YACrF,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,oCAAoC,EAAE;YAC3F,EAAE,QAAQ,EAAE,mBAAmB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,qCAAqC,EAAE;YAC9F,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,EAAE;YACrF,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,kCAAkC,EAAE;YAC1F,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;SAC1F;KACF,EACD,IAAI,CAAC,EAAE,EACP,wBAAwB,CACzB,CAAQ,CAAC;IACV,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAC/B,sBAAsB,EACtB,EAAE,KAAK,EAAE,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,EAC7B,IAAI,CAAC,EAAE,EACP,sBAAsB,CACvB,CAAQ,CAAC;IACV,MAAM,CAAC,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAElD,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,WAAW;QACX,EAAE,EAAE,IAAI;QACR,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;QAC/B,eAAe,EAAE,UAAU,CAAC,KAAK,CAAC,MAAM;QACxC,aAAa,EAAE,SAAS,CAAC,YAAY;KACtC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,KAAoB,EAAE,WAAmB;IACpE,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,EAAE,CAC5E,CAAC,KAAK,IAAI,EAAE;QACV,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,SAAS,GAAG,SAAS,EAAE,CAAC;YAC9B,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM;gBAAE,OAAO;YAEtC,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9B,IAAI,CAAC;gBACH,OAAO,CAAC,SAAS,CAAC,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YAC/D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,SAAS,CAAC,GAAG;oBACnB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW;oBACX,EAAE,EAAE,KAAK;oBACT,SAAS,EAAE,CAAC;oBACZ,eAAe,EAAE,CAAC;oBAClB,aAAa,EAAE,CAAC;oBAChB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,EAAE,CACL,CAAC;IAEF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,QAAQ,CAAC,iEAAiE,EAAE,GAAG,EAAE;IAC/E,EAAE,CAAC,6EAA6E,EAAE,KAAK,IAAI,EAAE;QAC3F,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,kBAAkB,GAAG,MAAM,CAAC,QAAQ,CACxC,OAAO,CAAC,GAAG,CAAC,iCAAiC,IAAI,GAAG,EACpD,EAAE,CACH,CAAC;QACF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAC7F,CAAC;QAEF,MAAM,oBAAoB,GAAG,MAAM,CAAC,QAAQ,CAC1C,OAAO,CAAC,GAAG,CAAC,kCAAkC,IAAI,GAAG,EACrD,EAAE,CACH,CAAC;QACF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CACtF,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;QAEvC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAErD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/E,MAAM,aAAa,GAAG;YACpB,WAAW;YACX,mBAAmB;YACnB,WAAW;YACX,gBAAgB;YAChB,gBAAgB;YAChB,oBAAoB;YACpB,mBAAmB;YACnB,iBAAiB;YACjB,wBAAwB;YACxB,sBAAsB;SACvB,CAAC;QAEF,OAAO,CAAC,GAAG,CACT,0BAA0B,OAAO,CAAC,OAAO,UAAU,OAAO,CAAC,KAAK,UAAU,SAAS,gBAAgB,WAAW,SAAS,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,cAAc,SAAS,cAAc,sBAAsB,CAAC,MAAM,EAAE,CAC3N,CAAC;QAEF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CACX,0BAA0B,EAC1B,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAC1G,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,7 +0,0 @@
1
- /**
2
- * Gated dataset benchmark for long-running tool-augmented tasks (GAIA lane).
3
- *
4
- * GAIA is a gated dataset. Fixtures are generated into `.cache/gaia` (gitignored)
5
- * and this test intentionally avoids logging the raw question text to stdout.
6
- */
7
- export {};
@@ -1,279 +0,0 @@
1
- /**
2
- * Gated dataset benchmark for long-running tool-augmented tasks (GAIA lane).
3
- *
4
- * GAIA is a gated dataset. Fixtures are generated into `.cache/gaia` (gitignored)
5
- * and this test intentionally avoids logging the raw question text to stdout.
6
- */
7
- import { describe, expect, it } from "vitest";
8
- import { existsSync } from "node:fs";
9
- import { readFile } from "node:fs/promises";
10
- import path from "node:path";
11
- import { fileURLToPath } from "node:url";
12
- import { verificationTools } from "../tools/verificationTools.js";
13
- import { reconTools } from "../tools/reconTools.js";
14
- import { evalTools } from "../tools/evalTools.js";
15
- import { qualityGateTools } from "../tools/qualityGateTools.js";
16
- import { flywheelTools } from "../tools/flywheelTools.js";
17
- import { learningTools } from "../tools/learningTools.js";
18
- import { documentationTools } from "../tools/documentationTools.js";
19
- import { agentBootstrapTools } from "../tools/agentBootstrapTools.js";
20
- import { createMetaTools } from "../tools/metaTools.js";
21
- const domainTools = [
22
- ...verificationTools,
23
- ...evalTools,
24
- ...qualityGateTools,
25
- ...learningTools,
26
- ...flywheelTools,
27
- ...reconTools,
28
- ...documentationTools,
29
- ...agentBootstrapTools,
30
- ];
31
- const allTools = [...domainTools, ...createMetaTools(domainTools)];
32
- const openDatasetToolCallLog = [];
33
- function findTool(name) {
34
- const tool = allTools.find((candidate) => candidate.name === name);
35
- if (!tool)
36
- throw new Error(`Tool not found: ${name}`);
37
- return tool;
38
- }
39
- async function callTool(name, args, taskId, stage) {
40
- const tool = findTool(name);
41
- try {
42
- const result = await tool.handler(args);
43
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: true });
44
- return result;
45
- }
46
- catch (error) {
47
- openDatasetToolCallLog.push({ taskId, tool: name, stage, success: false });
48
- throw error;
49
- }
50
- }
51
- function buildDiscoveryQuery(task) {
52
- const promptLower = task.prompt.toLowerCase();
53
- const tags = [];
54
- // These tags are intentionally high-level to avoid accidentally printing or persisting
55
- // the full gated question text while still exercising tool discovery paths.
56
- if (task.hasFile)
57
- tags.push(`${task.fileExt || "file"} attachment`);
58
- if (promptLower.includes("wikipedia"))
59
- tags.push("wikipedia lookup");
60
- if (promptLower.includes("github"))
61
- tags.push("github lookup");
62
- if (promptLower.includes("youtube") || promptLower.includes("video"))
63
- tags.push("video analysis");
64
- if (promptLower.includes("spreadsheet") || task.fileExt === "xlsx")
65
- tags.push("spreadsheet analysis");
66
- if (promptLower.includes("pdf") || task.fileExt === "pdf")
67
- tags.push("pdf parsing");
68
- if (promptLower.includes("image") || ["png", "jpg", "jpeg", "webp"].includes(task.fileExt))
69
- tags.push("image ocr");
70
- if (promptLower.includes("zip code") || promptLower.includes("zipcode"))
71
- tags.push("zip code lookup");
72
- if (promptLower.includes("calculate") || promptLower.includes("round"))
73
- tags.push("calculation");
74
- const tagText = tags.length > 0 ? tags.join(", ") : "general tool-augmented reasoning";
75
- return `GAIA task ${task.id} (level ${task.level || "?"}): ${tagText}`;
76
- }
77
- function redactedEvalInput(task) {
78
- const bits = [
79
- `GAIA task ${task.id} (prompt redacted)`,
80
- `level=${task.level || "?"}`,
81
- `questionLength=${task.questionLength}`,
82
- `hasFile=${task.hasFile}`,
83
- `fileExt=${task.fileExt || ""}`,
84
- `steps=${task.annotator?.numberOfSteps ?? 0}`,
85
- `tools=${task.annotator?.numberOfTools ?? 0}`,
86
- ];
87
- return bits.join(" | ");
88
- }
89
- async function loadGaiaFixture(fixturePath) {
90
- const raw = await readFile(fixturePath, "utf8");
91
- const parsed = JSON.parse(raw);
92
- if (!parsed || !Array.isArray(parsed.tasks)) {
93
- throw new Error("Invalid GAIA fixture payload");
94
- }
95
- return parsed;
96
- }
97
- async function runDatasetTask(fixture, task, workerIndex) {
98
- const started = Date.now();
99
- const recon = (await callTool("run_recon", {
100
- target: `GAIA task ${task.id}`,
101
- description: `Gated long-running benchmark task (${fixture.config}/${fixture.split}).`,
102
- projectContext: {
103
- techStack: "TypeScript, MCP, SQLite",
104
- architecture: "MCP orchestration benchmark with parallel subagent workers",
105
- },
106
- }, task.id, "recon_start"));
107
- await callTool("log_recon_finding", {
108
- sessionId: recon.sessionId,
109
- category: "dataset",
110
- summary: `Ingested GAIA task ${task.id} (level=${task.level}, questionLength=${task.questionLength}, hasFile=${task.hasFile}, ext=${task.fileExt}).`,
111
- sourceUrl: fixture.sourceUrl,
112
- relevance: "Tool-augmented multi-step benchmark lane (GAIA).",
113
- actionItems: "Run in parallel worker pool and enforce mandatory flywheel checks.",
114
- }, task.id, "recon_log");
115
- let discovered = (await callTool("findTools", {
116
- query: buildDiscoveryQuery(task),
117
- category: "bootstrap",
118
- }, task.id, "find_tools"));
119
- if (!Array.isArray(discovered?.tools) || discovered.tools.length === 0) {
120
- discovered = (await callTool("findTools", { query: "tool-augmented reasoning with files web and computation", category: "verification" }, task.id, "find_tools_fallback"));
121
- }
122
- expect(Array.isArray(discovered.tools)).toBe(true);
123
- expect(discovered.tools.length).toBeGreaterThan(0);
124
- const methodology = (await callTool("getMethodology", { topic: "mandatory_flywheel" }, task.id, "get_methodology"));
125
- expect(methodology.title).toBeTruthy();
126
- expect(Array.isArray(methodology.steps)).toBe(true);
127
- expect(methodology.steps.length).toBeGreaterThan(0);
128
- const evalRun = (await callTool("start_eval_run", {
129
- name: `open-dataset-gaia-${task.id}-${Date.now()}`,
130
- description: `GAIA scenario (level=${task.level}, worker ${workerIndex})`,
131
- cases: [
132
- {
133
- input: redactedEvalInput(task),
134
- intent: `Coordinate long-running GAIA workflow for ${task.id}`,
135
- expected: "Discover tool strategy, run eval bookkeeping, and enforce mandatory flywheel checks.",
136
- },
137
- ],
138
- }, task.id, "start_eval_run"));
139
- await callTool("record_eval_result", {
140
- caseId: evalRun.caseIds[0],
141
- verdict: "pass",
142
- score: 1,
143
- actual: `Discovered ${discovered.tools.length} tools and completed GAIA workflow bookkeeping.`,
144
- telemetry: {
145
- dataset: fixture.dataset,
146
- config: fixture.config,
147
- split: fixture.split,
148
- taskId: task.id,
149
- level: task.level,
150
- workerIndex,
151
- questionLength: task.questionLength,
152
- hasFile: task.hasFile,
153
- fileExt: task.fileExt,
154
- numberOfSteps: task.annotator?.numberOfSteps ?? 0,
155
- numberOfTools: task.annotator?.numberOfTools ?? 0,
156
- complexityScore: task.complexityScore,
157
- },
158
- }, task.id, "record_eval_result");
159
- const evalSummary = (await callTool("complete_eval_run", { runId: evalRun.runId }, task.id, "complete_eval_run"));
160
- expect(evalSummary.status).toBe("completed");
161
- expect(evalSummary.summary.passed).toBe(1);
162
- const closedLoop = (await callTool("run_closed_loop", {
163
- steps: [
164
- { step: "compile", passed: true, output: `Compile checks for ${task.id}` },
165
- { step: "lint", passed: true, output: `Lint checks for ${task.id}` },
166
- { step: "test", passed: true, output: `Parallel benchmark checks for ${task.id}` },
167
- ],
168
- }, task.id, "run_closed_loop"));
169
- expect(closedLoop.allPassed).toBe(true);
170
- const flywheel = (await callTool("run_mandatory_flywheel", {
171
- target: `Gated GAIA task ${task.id}`,
172
- steps: [
173
- { stepName: "static_analysis", passed: true, output: "Types and schemas validated." },
174
- { stepName: "happy_path_test", passed: true, output: "Benchmark workflow completed." },
175
- { stepName: "failure_path_test", passed: true, output: "Fallback discovery query validated." },
176
- { stepName: "gap_analysis", passed: true, output: "No blocking gaps for this task." },
177
- { stepName: "fix_and_reverify", passed: true, output: "No rework required after checks." },
178
- { stepName: "deploy_and_document", passed: true, output: "Benchmark result documented." },
179
- ],
180
- }, task.id, "run_mandatory_flywheel"));
181
- expect(flywheel.passed).toBe(true);
182
- const knowledge = (await callTool("search_all_knowledge", { query: task.id, limit: 10 }, task.id, "search_all_knowledge"));
183
- expect(typeof knowledge.totalResults).toBe("number");
184
- expect(knowledge.totalResults).toBeGreaterThan(0);
185
- return {
186
- taskId: task.id,
187
- workerIndex,
188
- ok: true,
189
- elapsedMs: Date.now() - started,
190
- discoveredTools: discovered.tools.length,
191
- knowledgeHits: knowledge.totalResults,
192
- };
193
- }
194
- async function runWorkerPool(fixture, tasks, concurrency) {
195
- const boundedConcurrency = Math.max(1, Math.min(concurrency, tasks.length));
196
- const results = new Array(tasks.length);
197
- let nextIndex = 0;
198
- const workers = Array.from({ length: boundedConcurrency }, (_, workerIndex) => (async () => {
199
- while (true) {
200
- const taskIndex = nextIndex++;
201
- if (taskIndex >= tasks.length)
202
- return;
203
- const task = tasks[taskIndex];
204
- try {
205
- results[taskIndex] = await runDatasetTask(fixture, task, workerIndex);
206
- }
207
- catch (error) {
208
- results[taskIndex] = {
209
- taskId: task.id,
210
- workerIndex,
211
- ok: false,
212
- elapsedMs: 0,
213
- discoveredTools: 0,
214
- knowledgeHits: 0,
215
- error: error instanceof Error ? error.message : String(error),
216
- };
217
- }
218
- }
219
- })());
220
- await Promise.all(workers);
221
- return results;
222
- }
223
- function resolveGaiaFixturePath() {
224
- const fixtureOverride = process.env.NODEBENCH_GAIA_FIXTURE_PATH;
225
- if (fixtureOverride)
226
- return fixtureOverride;
227
- const config = process.env.NODEBENCH_GAIA_CONFIG ?? "2023_level3";
228
- const split = process.env.NODEBENCH_GAIA_SPLIT ?? "validation";
229
- const testDir = path.dirname(fileURLToPath(import.meta.url));
230
- const repoRoot = path.resolve(testDir, "../../../..");
231
- return path.join(repoRoot, ".cache", "gaia", `gaia_${config}_${split}.sample.json`);
232
- }
233
- const gaiaFixturePath = resolveGaiaFixturePath();
234
- const hasGaiaFixture = existsSync(gaiaFixturePath);
235
- describe("Scenario: GAIA (Gated) Long-Running Dataset (Parallel Subagents)", () => {
236
- const testFn = hasGaiaFixture ? it : it.skip;
237
- testFn("should execute GAIA tasks with parallel MCP subagent workflows", async () => {
238
- const fixture = await loadGaiaFixture(gaiaFixturePath);
239
- expect(Array.isArray(fixture.tasks)).toBe(true);
240
- expect(fixture.tasks.length).toBeGreaterThan(0);
241
- const requestedTaskLimit = Number.parseInt(process.env.NODEBENCH_GAIA_TASK_LIMIT ?? "8", 10);
242
- const taskLimit = Math.max(1, Math.min(fixture.tasks.length, Number.isFinite(requestedTaskLimit) ? requestedTaskLimit : 8));
243
- const requestedConcurrency = Number.parseInt(process.env.NODEBENCH_GAIA_CONCURRENCY ?? "4", 10);
244
- const concurrency = Math.max(1, Math.min(taskLimit, Number.isFinite(requestedConcurrency) ? requestedConcurrency : 4));
245
- const tasks = fixture.tasks.slice(0, taskLimit);
246
- const started = Date.now();
247
- const results = await runWorkerPool(fixture, tasks, concurrency);
248
- const elapsedMs = Date.now() - started;
249
- const failed = results.filter((result) => !result.ok);
250
- const passed = results.filter((result) => result.ok);
251
- const calledTools = new Set(openDatasetToolCallLog.map((entry) => entry.tool));
252
- const requiredTools = [
253
- "run_recon",
254
- "log_recon_finding",
255
- "findTools",
256
- "getMethodology",
257
- "start_eval_run",
258
- "record_eval_result",
259
- "complete_eval_run",
260
- "run_closed_loop",
261
- "run_mandatory_flywheel",
262
- "search_all_knowledge",
263
- ];
264
- console.log(`[open-dataset] dataset=${fixture.dataset} config=${fixture.config} split=${fixture.split} tasks=${taskLimit} concurrency=${concurrency} pass=${passed.length}/${results.length} elapsedMs=${elapsedMs} toolCalls=${openDatasetToolCallLog.length}`);
265
- if (failed.length > 0) {
266
- console.error("[open-dataset] failures:", failed.map((result) => ({
267
- taskId: result.taskId,
268
- workerIndex: result.workerIndex,
269
- error: result.error,
270
- })));
271
- }
272
- expect(failed.length).toBe(0);
273
- expect(passed.length).toBe(taskLimit);
274
- for (const requiredTool of requiredTools) {
275
- expect(calledTools.has(requiredTool)).toBe(true);
276
- }
277
- });
278
- });
279
- //# sourceMappingURL=openDatasetParallelEvalGaia.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openDatasetParallelEvalGaia.test.js","sourceRoot":"","sources":["../../src/__tests__/openDatasetParallelEvalGaia.test.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AA+CxD,MAAM,WAAW,GAAc;IAC7B,GAAG,iBAAiB;IACpB,GAAG,SAAS;IACZ,GAAG,gBAAgB;IACnB,GAAG,aAAa;IAChB,GAAG,aAAa;IAChB,GAAG,UAAU;IACb,GAAG,kBAAkB;IACrB,GAAG,mBAAmB;CACvB,CAAC;AACF,MAAM,QAAQ,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;AAEnE,MAAM,sBAAsB,GAKvB,EAAE,CAAC;AAER,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,IAAY,EACZ,IAA6B,EAC7B,MAAc,EACd,KAAa;IAEb,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACxC,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1E,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sBAAsB,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3E,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAiB;IAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;IAC9C,MAAM,IAAI,GAAa,EAAE,CAAC;IAE1B,uFAAuF;IACvF,4EAA4E;IAC5E,IAAI,IAAI,CAAC,OAAO;QAAE,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,OAAO,IAAI,MAAM,aAAa,CAAC,CAAC;IACpE,IAAI,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC;QAAE,IAAI,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACrE,IAAI,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC/D,IAAI,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAClG,IAAI,WAAW,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,IAAI,CAAC,OAAO,KAAK,MAAM;QAAE,IAAI,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACtG,IAAI,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK;QAAE,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACpF,IAAI,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;QACxF,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACzB,IAAI,WAAW,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC;QAAE,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;IACtG,IAAI,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAEjG,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kCAAkC,CAAC;IACvF,OAAO,aAAa,IAAI,CAAC,EAAE,WAAW,IAAI,CAAC,KAAK,IAAI,GAAG,MAAM,OAAO,EAAE,CAAC;AACzE,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAiB;IAC1C,MAAM,IAAI,GAAG;QACX,aAAa,IAAI,CAAC,EAAE,oBAAoB;QACxC,SAAS,IAAI,CAAC,KAAK,IAAI,GAAG,EAAE;QAC5B,kBAAkB,IAAI,CAAC,cAAc,EAAE;QACvC,WAAW,IAAI,CAAC,OAAO,EAAE;QACzB,WAAW,IAAI,CAAC,OAAO,IAAI,EAAE,EAAE;QAC/B,SAAS,IAAI,CAAC,SAAS,EAAE,aAAa,IAAI,CAAC,EAAE;QAC7C,SAAS,IAAI,CAAC,SAAS,EAAE,aAAa,IAAI,CAAC,EAAE;KAC9C,CAAC;IACF,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,WAAmB;IAChD,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IAChD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAmB,CAAC;IACjD,IAAI,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAE,MAAc,CAAC,KAAK,CAAC,EAAE,CAAC;QACrD,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,cAAc,CAC3B,OAAuB,EACvB,IAAiB,EACjB,WAAmB;IAEnB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE3B,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAC3B,WAAW,EACX;QACE,MAAM,EAAE,aAAa,IAAI,CAAC,EAAE,EAAE;QAC9B,WAAW,EAAE,sCAAsC,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,KAAK,IAAI;QACtF,cAAc,EAAE;YACd,SAAS,EAAE,yBAAyB;YACpC,YAAY,EAAE,4DAA4D;SAC3E;KACF,EACD,IAAI,CAAC,EAAE,EACP,aAAa,CACd,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,mBAAmB,EACnB;QACE,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE,sBAAsB,IAAI,CAAC,EAAE,WAAW,IAAI,CAAC,KAAK,oBAAoB,IAAI,CAAC,cAAc,aAAa,IAAI,CAAC,OAAO,SAAS,IAAI,CAAC,OAAO,IAAI;QACpJ,SAAS,EAAE,OAAO,CAAC,SAAS;QAC5B,SAAS,EAAE,kDAAkD;QAC7D,WAAW,EAAE,oEAAoE;KAClF,EACD,IAAI,CAAC,EAAE,EACP,WAAW,CACZ,CAAC;IAEF,IAAI,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC9B,WAAW,EACX;QACE,KAAK,EAAE,mBAAmB,CAAC,IAAI,CAAC;QAChC,QAAQ,EAAE,WAAW;KACtB,EACD,IAAI,CAAC,EAAE,EACP,YAAY,CACb,CAAQ,CAAC;IAEV,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvE,UAAU,GAAG,CAAC,MAAM,QAAQ,CAC1B,WAAW,EACX,EAAE,KAAK,EAAE,yDAAyD,EAAE,QAAQ,EAAE,cAAc,EAAE,EAC9F,IAAI,CAAC,EAAE,EACP,qBAAqB,CACtB,CAAQ,CAAC;IACZ,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnD,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEnD,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,gBAAgB,EAChB,EAAE,KAAK,EAAE,oBAAoB,EAAE,EAC/B,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;IACvC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAEpD,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAC7B,gBAAgB,EAChB;QACE,IAAI,EAAE,qBAAqB,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;QAClD,WAAW,EAAE,wBAAwB,IAAI,CAAC,KAAK,YAAY,WAAW,GAAG;QACzE,KAAK,EAAE;YACL;gBACE,KAAK,EAAE,iBAAiB,CAAC,IAAI,CAAC;gBAC9B,MAAM,EAAE,6CAA6C,IAAI,CAAC,EAAE,EAAE;gBAC9D,QAAQ,EACN,sFAAsF;aACzF;SACF;KACF,EACD,IAAI,CAAC,EAAE,EACP,gBAAgB,CACjB,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,oBAAoB,EACpB;QACE,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,OAAO,EAAE,MAAM;QACf,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,cAAc,UAAU,CAAC,KAAK,CAAC,MAAM,iDAAiD;QAC9F,SAAS,EAAE;YACT,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,WAAW;YACX,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,aAAa,EAAE,IAAI,CAAC,SAAS,EAAE,aAAa,IAAI,CAAC;YACjD,aAAa,EAAE,IAAI,CAAC,SAAS,EAAE,aAAa,IAAI,CAAC;YACjD,eAAe,EAAE,IAAI,CAAC,eAAe;SACtC;KACF,EACD,IAAI,CAAC,EAAE,EACP,oBAAoB,CACrB,CAAC;IAEF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,mBAAmB,EACnB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,EACxB,IAAI,CAAC,EAAE,EACP,mBAAmB,CACpB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE3C,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAChC,iBAAiB,EACjB;QACE,KAAK,EAAE;YACL,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,sBAAsB,IAAI,CAAC,EAAE,EAAE,EAAE;YAC1E,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,IAAI,CAAC,EAAE,EAAE,EAAE;YACpE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,IAAI,CAAC,EAAE,EAAE,EAAE;SACnF;KACF,EACD,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,CAAC,MAAM,QAAQ,CAC9B,wBAAwB,EACxB;QACE,MAAM,EAAE,mBAAmB,IAAI,CAAC,EAAE,EAAE;QACpC,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;YACrF,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,+BAA+B,EAAE;YACtF,EAAE,QAAQ,EAAE,mBAAmB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,qCAAqC,EAAE;YAC9F,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,iCAAiC,EAAE;YACrF,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,kCAAkC,EAAE;YAC1F,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,8BAA8B,EAAE;SAC1F;KACF,EACD,IAAI,CAAC,EAAE,EACP,wBAAwB,CACzB,CAAQ,CAAC;IACV,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAC/B,sBAAsB,EACtB,EAAE,KAAK,EAAE,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,EAC7B,IAAI,CAAC,EAAE,EACP,sBAAsB,CACvB,CAAQ,CAAC;IACV,MAAM,CAAC,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAElD,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,WAAW;QACX,EAAE,EAAE,IAAI;QACR,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;QAC/B,eAAe,EAAE,UAAU,CAAC,KAAK,CAAC,MAAM;QACxC,aAAa,EAAE,SAAS,CAAC,YAAY;KACtC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,OAAuB,EACvB,KAAoB,EACpB,WAAmB;IAEnB,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,EAAE,CAC5E,CAAC,KAAK,IAAI,EAAE;QACV,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,SAAS,GAAG,SAAS,EAAE,CAAC;YAC9B,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM;gBAAE,OAAO;YAEtC,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9B,IAAI,CAAC;gBACH,OAAO,CAAC,SAAS,CAAC,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;YACxE,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,SAAS,CAAC,GAAG;oBACnB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW;oBACX,EAAE,EAAE,KAAK;oBACT,SAAS,EAAE,CAAC;oBACZ,eAAe,EAAE,CAAC;oBAClB,aAAa,EAAE,CAAC;oBAChB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,EAAE,CACL,CAAC;IAEF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,sBAAsB;IAC7B,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC;IAChE,IAAI,eAAe;QAAE,OAAO,eAAe,CAAC;IAE5C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,aAAa,CAAC;IAClE,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,YAAY,CAAC;IAC/D,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,MAAM,IAAI,KAAK,cAAc,CAAC,CAAC;AACtF,CAAC;AAED,MAAM,eAAe,GAAG,sBAAsB,EAAE,CAAC;AACjD,MAAM,cAAc,GAAG,UAAU,CAAC,eAAe,CAAC,CAAC;AAEnD,QAAQ,CAAC,kEAAkE,EAAE,GAAG,EAAE;IAChF,MAAM,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC;IAE7C,MAAM,CAAC,gEAAgE,EAAE,KAAK,IAAI,EAAE;QAClF,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,eAAe,CAAC,CAAC;QACvD,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,kBAAkB,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,yBAAyB,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QAC7F,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,CAAC,EACD,IAAI,CAAC,GAAG,CACN,OAAO,CAAC,KAAK,CAAC,MAAM,EACpB,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAC7D,CACF,CAAC;QAEF,MAAM,oBAAoB,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QAChG,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CACtF,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;QACjE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;QAEvC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAErD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/E,MAAM,aAAa,GAAG;YACpB,WAAW;YACX,mBAAmB;YACnB,WAAW;YACX,gBAAgB;YAChB,gBAAgB;YAChB,oBAAoB;YACpB,mBAAmB;YACnB,iBAAiB;YACjB,wBAAwB;YACxB,sBAAsB;SACvB,CAAC;QAEF,OAAO,CAAC,GAAG,CACT,0BAA0B,OAAO,CAAC,OAAO,WAAW,OAAO,CAAC,MAAM,UAAU,OAAO,CAAC,KAAK,UAAU,SAAS,gBAAgB,WAAW,SAAS,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,cAAc,SAAS,cAAc,sBAAsB,CAAC,MAAM,EAAE,CACpP,CAAC;QAEF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,KAAK,CACX,0BAA0B,EAC1B,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACtB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,7 +0,0 @@
1
- /**
2
- * Open-source dataset benchmark for long-running tasks (SWE-bench lane).
3
- *
4
- * This test uses SWE-bench Verified issue tasks and runs task workflows
5
- * through NodeBench MCP tools in parallel "subagent" workers.
6
- */
7
- export {};