nodebench-mcp 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/NODEBENCH_AGENTS.md +74 -67
  2. package/README.md +36 -34
  3. package/dist/dashboard/operatingDashboardHtml.js +2 -1
  4. package/dist/dashboard/operatingDashboardHtml.js.map +1 -1
  5. package/dist/dashboard/operatingServer.js +3 -2
  6. package/dist/dashboard/operatingServer.js.map +1 -1
  7. package/dist/db.js +51 -3
  8. package/dist/db.js.map +1 -1
  9. package/dist/index.js +19 -18
  10. package/dist/index.js.map +1 -1
  11. package/dist/packageInfo.d.ts +3 -0
  12. package/dist/packageInfo.js +32 -0
  13. package/dist/packageInfo.js.map +1 -0
  14. package/dist/sandboxApi.js +2 -1
  15. package/dist/sandboxApi.js.map +1 -1
  16. package/dist/tools/boilerplateTools.js +10 -9
  17. package/dist/tools/boilerplateTools.js.map +1 -1
  18. package/dist/tools/documentationTools.js +2 -1
  19. package/dist/tools/documentationTools.js.map +1 -1
  20. package/dist/tools/progressiveDiscoveryTools.js +2 -1
  21. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  22. package/dist/tools/toolRegistry.js +11 -0
  23. package/dist/tools/toolRegistry.js.map +1 -1
  24. package/dist/toolsetRegistry.js +74 -1
  25. package/dist/toolsetRegistry.js.map +1 -1
  26. package/package.json +7 -6
  27. package/scripts/install.sh +14 -14
  28. package/dist/__tests__/analytics.test.d.ts +0 -11
  29. package/dist/__tests__/analytics.test.js +0 -546
  30. package/dist/__tests__/analytics.test.js.map +0 -1
  31. package/dist/__tests__/architectComplex.test.d.ts +0 -1
  32. package/dist/__tests__/architectComplex.test.js +0 -373
  33. package/dist/__tests__/architectComplex.test.js.map +0 -1
  34. package/dist/__tests__/architectSmoke.test.d.ts +0 -1
  35. package/dist/__tests__/architectSmoke.test.js +0 -92
  36. package/dist/__tests__/architectSmoke.test.js.map +0 -1
  37. package/dist/__tests__/audit-registry.d.ts +0 -1
  38. package/dist/__tests__/audit-registry.js +0 -60
  39. package/dist/__tests__/audit-registry.js.map +0 -1
  40. package/dist/__tests__/batchAutopilot.test.d.ts +0 -8
  41. package/dist/__tests__/batchAutopilot.test.js +0 -218
  42. package/dist/__tests__/batchAutopilot.test.js.map +0 -1
  43. package/dist/__tests__/cliSubcommands.test.d.ts +0 -1
  44. package/dist/__tests__/cliSubcommands.test.js +0 -138
  45. package/dist/__tests__/cliSubcommands.test.js.map +0 -1
  46. package/dist/__tests__/comparativeBench.test.d.ts +0 -1
  47. package/dist/__tests__/comparativeBench.test.js +0 -722
  48. package/dist/__tests__/comparativeBench.test.js.map +0 -1
  49. package/dist/__tests__/critterCalibrationEval.d.ts +0 -8
  50. package/dist/__tests__/critterCalibrationEval.js +0 -370
  51. package/dist/__tests__/critterCalibrationEval.js.map +0 -1
  52. package/dist/__tests__/dynamicLoading.test.d.ts +0 -1
  53. package/dist/__tests__/dynamicLoading.test.js +0 -280
  54. package/dist/__tests__/dynamicLoading.test.js.map +0 -1
  55. package/dist/__tests__/embeddingProvider.test.d.ts +0 -1
  56. package/dist/__tests__/embeddingProvider.test.js +0 -86
  57. package/dist/__tests__/embeddingProvider.test.js.map +0 -1
  58. package/dist/__tests__/evalDatasetBench.test.d.ts +0 -1
  59. package/dist/__tests__/evalDatasetBench.test.js +0 -738
  60. package/dist/__tests__/evalDatasetBench.test.js.map +0 -1
  61. package/dist/__tests__/evalHarness.test.d.ts +0 -1
  62. package/dist/__tests__/evalHarness.test.js +0 -1107
  63. package/dist/__tests__/evalHarness.test.js.map +0 -1
  64. package/dist/__tests__/fixtures/bfcl_v3_long_context.sample.json +0 -264
  65. package/dist/__tests__/fixtures/generateBfclLongContextFixture.d.ts +0 -10
  66. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js +0 -135
  67. package/dist/__tests__/fixtures/generateBfclLongContextFixture.js.map +0 -1
  68. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.d.ts +0 -14
  69. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js +0 -189
  70. package/dist/__tests__/fixtures/generateSwebenchVerifiedFixture.js.map +0 -1
  71. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.d.ts +0 -16
  72. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js +0 -154
  73. package/dist/__tests__/fixtures/generateToolbenchInstructionFixture.js.map +0 -1
  74. package/dist/__tests__/fixtures/swebench_verified.sample.json +0 -162
  75. package/dist/__tests__/fixtures/toolbench_instruction.sample.json +0 -109
  76. package/dist/__tests__/forecastingDogfood.test.d.ts +0 -9
  77. package/dist/__tests__/forecastingDogfood.test.js +0 -284
  78. package/dist/__tests__/forecastingDogfood.test.js.map +0 -1
  79. package/dist/__tests__/forecastingScoring.test.d.ts +0 -9
  80. package/dist/__tests__/forecastingScoring.test.js +0 -202
  81. package/dist/__tests__/forecastingScoring.test.js.map +0 -1
  82. package/dist/__tests__/gaiaCapabilityAudioEval.test.d.ts +0 -15
  83. package/dist/__tests__/gaiaCapabilityAudioEval.test.js +0 -265
  84. package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +0 -1
  85. package/dist/__tests__/gaiaCapabilityEval.test.d.ts +0 -14
  86. package/dist/__tests__/gaiaCapabilityEval.test.js +0 -1259
  87. package/dist/__tests__/gaiaCapabilityEval.test.js.map +0 -1
  88. package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +0 -15
  89. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +0 -914
  90. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +0 -1
  91. package/dist/__tests__/gaiaCapabilityMediaEval.test.d.ts +0 -15
  92. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +0 -1101
  93. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +0 -1
  94. package/dist/__tests__/helpers/answerMatch.d.ts +0 -41
  95. package/dist/__tests__/helpers/answerMatch.js +0 -267
  96. package/dist/__tests__/helpers/answerMatch.js.map +0 -1
  97. package/dist/__tests__/helpers/textLlm.d.ts +0 -25
  98. package/dist/__tests__/helpers/textLlm.js +0 -214
  99. package/dist/__tests__/helpers/textLlm.js.map +0 -1
  100. package/dist/__tests__/localDashboard.test.d.ts +0 -1
  101. package/dist/__tests__/localDashboard.test.js +0 -226
  102. package/dist/__tests__/localDashboard.test.js.map +0 -1
  103. package/dist/__tests__/multiHopDogfood.test.d.ts +0 -12
  104. package/dist/__tests__/multiHopDogfood.test.js +0 -303
  105. package/dist/__tests__/multiHopDogfood.test.js.map +0 -1
  106. package/dist/__tests__/openDatasetParallelEval.test.d.ts +0 -7
  107. package/dist/__tests__/openDatasetParallelEval.test.js +0 -209
  108. package/dist/__tests__/openDatasetParallelEval.test.js.map +0 -1
  109. package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +0 -7
  110. package/dist/__tests__/openDatasetParallelEvalGaia.test.js +0 -279
  111. package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +0 -1
  112. package/dist/__tests__/openDatasetParallelEvalSwebench.test.d.ts +0 -7
  113. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js +0 -220
  114. package/dist/__tests__/openDatasetParallelEvalSwebench.test.js.map +0 -1
  115. package/dist/__tests__/openDatasetParallelEvalToolbench.test.d.ts +0 -7
  116. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js +0 -218
  117. package/dist/__tests__/openDatasetParallelEvalToolbench.test.js.map +0 -1
  118. package/dist/__tests__/openDatasetPerfComparison.test.d.ts +0 -10
  119. package/dist/__tests__/openDatasetPerfComparison.test.js +0 -318
  120. package/dist/__tests__/openDatasetPerfComparison.test.js.map +0 -1
  121. package/dist/__tests__/openclawDogfood.test.d.ts +0 -23
  122. package/dist/__tests__/openclawDogfood.test.js +0 -535
  123. package/dist/__tests__/openclawDogfood.test.js.map +0 -1
  124. package/dist/__tests__/openclawMessaging.test.d.ts +0 -14
  125. package/dist/__tests__/openclawMessaging.test.js +0 -232
  126. package/dist/__tests__/openclawMessaging.test.js.map +0 -1
  127. package/dist/__tests__/presetRealWorldBench.test.d.ts +0 -1
  128. package/dist/__tests__/presetRealWorldBench.test.js +0 -859
  129. package/dist/__tests__/presetRealWorldBench.test.js.map +0 -1
  130. package/dist/__tests__/tools.test.d.ts +0 -1
  131. package/dist/__tests__/tools.test.js +0 -3201
  132. package/dist/__tests__/tools.test.js.map +0 -1
  133. package/dist/__tests__/toolsetGatingEval.test.d.ts +0 -1
  134. package/dist/__tests__/toolsetGatingEval.test.js +0 -1099
  135. package/dist/__tests__/toolsetGatingEval.test.js.map +0 -1
  136. package/dist/__tests__/traceabilityDogfood.test.d.ts +0 -12
  137. package/dist/__tests__/traceabilityDogfood.test.js +0 -241
  138. package/dist/__tests__/traceabilityDogfood.test.js.map +0 -1
  139. package/dist/__tests__/webmcpTools.test.d.ts +0 -7
  140. package/dist/__tests__/webmcpTools.test.js +0 -195
  141. package/dist/__tests__/webmcpTools.test.js.map +0 -1
  142. package/dist/benchmarks/testProviderBus.d.ts +0 -7
  143. package/dist/benchmarks/testProviderBus.js +0 -272
  144. package/dist/benchmarks/testProviderBus.js.map +0 -1
  145. package/dist/hooks/postCompaction.d.ts +0 -14
  146. package/dist/hooks/postCompaction.js +0 -51
  147. package/dist/hooks/postCompaction.js.map +0 -1
  148. package/dist/security/__tests__/security.test.d.ts +0 -8
  149. package/dist/security/__tests__/security.test.js +0 -295
  150. package/dist/security/__tests__/security.test.js.map +0 -1
  151. package/dist/sync/hyperloopEval.test.d.ts +0 -4
  152. package/dist/sync/hyperloopEval.test.js +0 -60
  153. package/dist/sync/hyperloopEval.test.js.map +0 -1
  154. package/dist/sync/store.test.d.ts +0 -4
  155. package/dist/sync/store.test.js +0 -43
  156. package/dist/sync/store.test.js.map +0 -1
  157. package/dist/tools/documentTools.d.ts +0 -5
  158. package/dist/tools/documentTools.js +0 -524
  159. package/dist/tools/documentTools.js.map +0 -1
  160. package/dist/tools/financialTools.d.ts +0 -10
  161. package/dist/tools/financialTools.js +0 -403
  162. package/dist/tools/financialTools.js.map +0 -1
  163. package/dist/tools/memoryTools.d.ts +0 -5
  164. package/dist/tools/memoryTools.js +0 -137
  165. package/dist/tools/memoryTools.js.map +0 -1
  166. package/dist/tools/planningTools.d.ts +0 -5
  167. package/dist/tools/planningTools.js +0 -147
  168. package/dist/tools/planningTools.js.map +0 -1
  169. package/dist/tools/searchTools.d.ts +0 -5
  170. package/dist/tools/searchTools.js +0 -145
  171. package/dist/tools/searchTools.js.map +0 -1
@@ -1,318 +0,0 @@
1
- /**
2
- * Performance comparison: "no tools" (noop handlers) vs NodeBench MCP toolchain.
3
- *
4
- * Notes:
5
- * - This is a wall-clock micro-benchmark for local harness overhead, not capability/accuracy.
6
- * - Disabled by default to avoid noisy perf output in normal test runs.
7
- * Set NODEBENCH_RUN_PERF_COMPARE=1 to run.
8
- * - GAIA is gated; if a local GAIA fixture exists in `.cache/gaia`, it will be included.
9
- */
10
- import { describe, expect, it } from "vitest";
11
- import { existsSync } from "node:fs";
12
- import { readFile } from "node:fs/promises";
13
- import path from "node:path";
14
- import { fileURLToPath } from "node:url";
15
- import { performance } from "node:perf_hooks";
16
- import bfclFixture from "./fixtures/bfcl_v3_long_context.sample.json";
17
- import toolbenchFixture from "./fixtures/toolbench_instruction.sample.json";
18
- import swebenchFixture from "./fixtures/swebench_verified.sample.json";
19
- import { verificationTools } from "../tools/verificationTools.js";
20
- import { reconTools } from "../tools/reconTools.js";
21
- import { evalTools } from "../tools/evalTools.js";
22
- import { qualityGateTools } from "../tools/qualityGateTools.js";
23
- import { flywheelTools } from "../tools/flywheelTools.js";
24
- import { learningTools } from "../tools/learningTools.js";
25
- import { documentationTools } from "../tools/documentationTools.js";
26
- import { agentBootstrapTools } from "../tools/agentBootstrapTools.js";
27
- import { createMetaTools } from "../tools/metaTools.js";
28
- const shouldRun = process.env.NODEBENCH_RUN_PERF_COMPARE === "1";
29
- const domainTools = [
30
- ...verificationTools,
31
- ...evalTools,
32
- ...qualityGateTools,
33
- ...learningTools,
34
- ...flywheelTools,
35
- ...reconTools,
36
- ...documentationTools,
37
- ...agentBootstrapTools,
38
- ];
39
- const allTools = [...domainTools, ...createMetaTools(domainTools)];
40
- function findTool(name) {
41
- const tool = allTools.find((candidate) => candidate.name === name);
42
- if (!tool)
43
- throw new Error(`Tool not found: ${name}`);
44
- return tool;
45
- }
46
- function buildNoopToolCaller() {
47
- return async (name, _args, taskId, _stage) => {
48
- switch (name) {
49
- case "run_recon":
50
- return { sessionId: `noop_${taskId}` };
51
- case "log_recon_finding":
52
- return { ok: true };
53
- case "findTools":
54
- return { tools: [{ name: "noop_tool" }] };
55
- case "getMethodology":
56
- return { title: "noop", steps: ["noop-step"] };
57
- case "start_eval_run":
58
- return { runId: `noop_run_${taskId}`, caseIds: [`noop_case_${taskId}`] };
59
- case "record_eval_result":
60
- return { ok: true };
61
- case "complete_eval_run":
62
- return { status: "completed", summary: { passed: 1 } };
63
- case "run_closed_loop":
64
- return { allPassed: true };
65
- case "run_mandatory_flywheel":
66
- return { passed: true };
67
- case "search_all_knowledge":
68
- return { totalResults: 1 };
69
- default:
70
- throw new Error(`noop tool not implemented: ${name}`);
71
- }
72
- };
73
- }
74
- function buildNodebenchToolCaller(toolCallLog) {
75
- return async (name, args, taskId, stage) => {
76
- const tool = findTool(name);
77
- try {
78
- const result = await tool.handler(args);
79
- toolCallLog.push({ taskId, tool: name, stage, success: true });
80
- return result;
81
- }
82
- catch (error) {
83
- toolCallLog.push({ taskId, tool: name, stage, success: false });
84
- throw error;
85
- }
86
- };
87
- }
88
- function clampInt(value, fallback, min, max) {
89
- const parsed = Number.parseInt(String(value ?? ""), 10);
90
- const asNum = Number.isFinite(parsed) ? parsed : fallback;
91
- return Math.max(min, Math.min(max, asNum));
92
- }
93
- async function runPipelineForTask(task, datasetId, callTool, workerIndex, opts) {
94
- const started = performance.now();
95
- const recon = (await callTool("run_recon", {
96
- target: `${datasetId} task ${task.id}`,
97
- description: "Perf comparison pipeline run.",
98
- projectContext: {
99
- techStack: "TypeScript, MCP, SQLite",
100
- architecture: "Perf comparison harness (noop vs NodeBench tools)",
101
- },
102
- }, task.id, "recon_start"));
103
- await callTool("log_recon_finding", {
104
- sessionId: recon.sessionId,
105
- category: "dataset",
106
- summary: `Perf comparison task ${task.id} (dataset=${datasetId}).`,
107
- relevance: "Measure overhead of toolchain vs noop baseline.",
108
- }, task.id, "recon_log");
109
- const findToolsQuery = opts?.redactedFindToolsQuery?.(task) ?? task.prompt.slice(0, 600) ?? task.id;
110
- const discovered = (await callTool("findTools", { query: findToolsQuery, category: "bootstrap" }, task.id, "find_tools"));
111
- expect(Array.isArray(discovered?.tools)).toBe(true);
112
- const methodology = (await callTool("getMethodology", { topic: "mandatory_flywheel" }, task.id, "get_methodology"));
113
- expect(methodology.title).toBeTruthy();
114
- const evalInput = opts?.redactedEvalInput?.(task) ?? task.prompt;
115
- const evalRun = (await callTool("start_eval_run", {
116
- name: `perf-${datasetId}-${task.id}-${Date.now()}`,
117
- description: `Perf comparison (${datasetId}, worker ${workerIndex})`,
118
- cases: [
119
- {
120
- input: evalInput,
121
- intent: `Perf compare pipeline for ${task.id}`,
122
- expected: "Complete bookkeeping toolchain steps without errors.",
123
- },
124
- ],
125
- }, task.id, "start_eval_run"));
126
- await callTool("record_eval_result", {
127
- caseId: evalRun.caseIds[0],
128
- verdict: "pass",
129
- score: 1,
130
- actual: "Perf comparison pass.",
131
- telemetry: { dataset: datasetId, taskId: task.id, workerIndex },
132
- }, task.id, "record_eval_result");
133
- const evalSummary = (await callTool("complete_eval_run", { runId: evalRun.runId }, task.id, "complete_eval_run"));
134
- expect(evalSummary.status).toBe("completed");
135
- const closedLoop = (await callTool("run_closed_loop", {
136
- steps: [
137
- { step: "compile", passed: true, output: "ok" },
138
- { step: "lint", passed: true, output: "ok" },
139
- { step: "test", passed: true, output: "ok" },
140
- ],
141
- }, task.id, "run_closed_loop"));
142
- expect(closedLoop.allPassed).toBe(true);
143
- const flywheel = (await callTool("run_mandatory_flywheel", {
144
- target: `Perf compare ${datasetId} ${task.id}`,
145
- steps: [
146
- { stepName: "static_analysis", passed: true, output: "ok" },
147
- { stepName: "happy_path_test", passed: true, output: "ok" },
148
- { stepName: "failure_path_test", passed: true, output: "ok" },
149
- { stepName: "gap_analysis", passed: true, output: "ok" },
150
- { stepName: "fix_and_reverify", passed: true, output: "ok" },
151
- { stepName: "deploy_and_document", passed: true, output: "ok" },
152
- ],
153
- }, task.id, "run_mandatory_flywheel"));
154
- expect(flywheel.passed).toBe(true);
155
- const knowledge = (await callTool("search_all_knowledge", { query: task.id, limit: 5 }, task.id, "search_all_knowledge"));
156
- expect(typeof knowledge.totalResults).toBe("number");
157
- return {
158
- taskId: task.id,
159
- workerIndex,
160
- ok: true,
161
- elapsedMs: performance.now() - started,
162
- };
163
- }
164
- async function runWorkerPool(datasetId, tasks, concurrency, callTool, opts) {
165
- const boundedConcurrency = Math.max(1, Math.min(concurrency, tasks.length));
166
- const results = new Array(tasks.length);
167
- let nextIndex = 0;
168
- const workers = Array.from({ length: boundedConcurrency }, (_, workerIndex) => (async () => {
169
- while (true) {
170
- const taskIndex = nextIndex++;
171
- if (taskIndex >= tasks.length)
172
- return;
173
- const task = tasks[taskIndex];
174
- try {
175
- results[taskIndex] = await runPipelineForTask(task, datasetId, callTool, workerIndex, opts);
176
- }
177
- catch (error) {
178
- results[taskIndex] = {
179
- taskId: task.id,
180
- workerIndex,
181
- ok: false,
182
- elapsedMs: 0,
183
- error: error instanceof Error ? error.message : String(error),
184
- };
185
- }
186
- }
187
- })());
188
- await Promise.all(workers);
189
- return results;
190
- }
191
- function toGenericTasks(fixture) {
192
- const tasks = Array.isArray(fixture?.tasks) ? fixture.tasks : [];
193
- return tasks
194
- .map((t) => ({
195
- id: String(t?.id ?? "").trim(),
196
- prompt: String(t?.prompt ?? "").trim(),
197
- meta: typeof t === "object" && t ? t : undefined,
198
- }))
199
- .filter((t) => t.id && t.prompt);
200
- }
201
- function resolveGaiaFixturePath() {
202
- const config = process.env.NODEBENCH_GAIA_CONFIG ?? "2023_level3";
203
- const split = process.env.NODEBENCH_GAIA_SPLIT ?? "validation";
204
- const testDir = path.dirname(fileURLToPath(import.meta.url));
205
- const repoRoot = path.resolve(testDir, "../../../..");
206
- return path.join(repoRoot, ".cache", "gaia", `gaia_${config}_${split}.sample.json`);
207
- }
208
- async function tryLoadGaiaFixture() {
209
- const gaiaPath = resolveGaiaFixturePath();
210
- if (!existsSync(gaiaPath))
211
- return null;
212
- const raw = await readFile(gaiaPath, "utf8");
213
- return JSON.parse(raw);
214
- }
215
- function gaiaRedactedEvalInput(task) {
216
- const meta = (task.meta ?? {});
217
- return [
218
- `GAIA task ${task.id} (prompt redacted)`,
219
- `level=${meta.level ?? "?"}`,
220
- `questionLength=${meta.questionLength ?? ""}`,
221
- `hasFile=${meta.hasFile ?? ""}`,
222
- `fileExt=${meta.fileExt ?? ""}`,
223
- ].join(" | ");
224
- }
225
- function gaiaRedactedFindToolsQuery(task) {
226
- const meta = (task.meta ?? {});
227
- const ext = String(meta.fileExt ?? "");
228
- return `GAIA task ${task.id} (level ${meta.level ?? "?"}) | hasFile=${meta.hasFile ?? false} | ext=${ext}`;
229
- }
230
- const datasets = [
231
- {
232
- id: "bfcl",
233
- tasks: toGenericTasks(bfclFixture),
234
- limitEnv: "NODEBENCH_OPEN_DATASET_TASK_LIMIT",
235
- concurrencyEnv: "NODEBENCH_OPEN_DATASET_CONCURRENCY",
236
- },
237
- {
238
- id: "toolbench",
239
- tasks: toGenericTasks(toolbenchFixture),
240
- limitEnv: "NODEBENCH_TOOLBENCH_TASK_LIMIT",
241
- concurrencyEnv: "NODEBENCH_TOOLBENCH_CONCURRENCY",
242
- },
243
- {
244
- id: "swebench",
245
- tasks: toGenericTasks(swebenchFixture),
246
- limitEnv: "NODEBENCH_SWEBENCH_TASK_LIMIT",
247
- concurrencyEnv: "NODEBENCH_SWEBENCH_CONCURRENCY",
248
- },
249
- ];
250
- describe("Benchmark: No-Tools vs NodeBench MCP Toolchain (Perf Compare)", () => {
251
- const testFn = shouldRun ? it : it.skip;
252
- testFn("should compare wall-clock runtime for noop vs NodeBench toolchain", async () => {
253
- const gaia = await tryLoadGaiaFixture();
254
- if (gaia) {
255
- datasets.push({
256
- id: "gaia",
257
- tasks: toGenericTasks(gaia),
258
- limitEnv: "NODEBENCH_GAIA_TASK_LIMIT",
259
- concurrencyEnv: "NODEBENCH_GAIA_CONCURRENCY",
260
- redactedEvalInput: gaiaRedactedEvalInput,
261
- redactedFindToolsQuery: gaiaRedactedFindToolsQuery,
262
- });
263
- }
264
- const noopCaller = buildNoopToolCaller();
265
- const nodebenchToolCallLog = [];
266
- const nodebenchCaller = buildNodebenchToolCaller(nodebenchToolCallLog);
267
- for (const dataset of datasets) {
268
- expect(dataset.tasks.length).toBeGreaterThan(0);
269
- const taskLimit = clampInt(process.env[dataset.limitEnv], 8, 1, dataset.tasks.length);
270
- const concurrency = clampInt(process.env[dataset.concurrencyEnv], 4, 1, taskLimit);
271
- const tasks = dataset.tasks.slice(0, taskLimit);
272
- const noopStarted = performance.now();
273
- const noopResults = await runWorkerPool(dataset.id, tasks, concurrency, noopCaller, {
274
- redactedEvalInput: dataset.redactedEvalInput,
275
- redactedFindToolsQuery: dataset.redactedFindToolsQuery,
276
- });
277
- const noopElapsed = performance.now() - noopStarted;
278
- const nodebenchStarted = performance.now();
279
- const nodebenchResults = await runWorkerPool(dataset.id, tasks, concurrency, nodebenchCaller, {
280
- redactedEvalInput: dataset.redactedEvalInput,
281
- redactedFindToolsQuery: dataset.redactedFindToolsQuery,
282
- });
283
- const nodebenchElapsed = performance.now() - nodebenchStarted;
284
- const noopFailed = noopResults.filter((r) => !r.ok);
285
- const nodebenchFailed = nodebenchResults.filter((r) => !r.ok);
286
- if (noopFailed.length > 0 || nodebenchFailed.length > 0) {
287
- console.error("[perf-compare] failures", {
288
- dataset: dataset.id,
289
- noopFailed,
290
- nodebenchFailed,
291
- });
292
- }
293
- expect(noopFailed.length).toBe(0);
294
- expect(nodebenchFailed.length).toBe(0);
295
- const overhead = nodebenchElapsed - noopElapsed;
296
- const ratio = noopElapsed > 0 ? nodebenchElapsed / noopElapsed : Number.POSITIVE_INFINITY;
297
- const overheadPerTask = overhead / Math.max(1, tasks.length);
298
- console.log(`[perf-compare] dataset=${dataset.id} tasks=${tasks.length} concurrency=${concurrency} noopMs=${noopElapsed.toFixed(2)} nodebenchMs=${nodebenchElapsed.toFixed(2)} overheadMs=${overhead.toFixed(2)} overheadPerTaskMs=${overheadPerTask.toFixed(2)} ratio=${ratio.toFixed(2)}`);
299
- }
300
- const requiredTools = new Set([
301
- "run_recon",
302
- "log_recon_finding",
303
- "findTools",
304
- "getMethodology",
305
- "start_eval_run",
306
- "record_eval_result",
307
- "complete_eval_run",
308
- "run_closed_loop",
309
- "run_mandatory_flywheel",
310
- "search_all_knowledge",
311
- ]);
312
- const calledTools = new Set(nodebenchToolCallLog.map((e) => e.tool));
313
- for (const name of requiredTools) {
314
- expect(calledTools.has(name)).toBe(true);
315
- }
316
- });
317
- });
318
- //# sourceMappingURL=openDatasetPerfComparison.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openDatasetPerfComparison.test.js","sourceRoot":"","sources":["../../src/__tests__/openDatasetPerfComparison.test.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAE9C,OAAO,WAAW,MAAM,6CAA6C,CAAC;AACtE,OAAO,gBAAgB,MAAM,8CAA8C,CAAC;AAC5E,OAAO,eAAe,MAAM,0CAA0C,CAAC;AAEvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAmCxD,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,0BAA0B,KAAK,GAAG,CAAC;AAEjE,MAAM,WAAW,GAAc;IAC7B,GAAG,iBAAiB;IACpB,GAAG,SAAS;IACZ,GAAG,gBAAgB;IACnB,GAAG,aAAa;IAChB,GAAG,aAAa;IAChB,GAAG,UAAU;IACb,GAAG,kBAAkB;IACrB,GAAG,mBAAmB;CACvB,CAAC;AACF,MAAM,QAAQ,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;AAEnE,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,mBAAmB;IAC1B,OAAO,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE;QAC3C,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,WAAW;gBACd,OAAO,EAAE,SAAS,EAAE,QAAQ,MAAM,EAAE,EAAE,CAAC;YACzC,KAAK,mBAAmB;gBACtB,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;YACtB,KAAK,WAAW;gBACd,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;YAC5C,KAAK,gBAAgB;gBACnB,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,WAAW,CAAC,EAAE,CAAC;YACjD,KAAK,gBAAgB;gBACnB,OAAO,EAAE,KAAK,EAAE,YAAY,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,aAAa,MAAM,EAAE,CAAC,EAAE,CAAC;YAC3E,KAAK,oBAAoB;gBACvB,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;YACtB,KAAK,mBAAmB;gBACtB,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzD,KAAK,iBAAiB;gBACpB,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;YAC7B,KAAK,wBAAwB;gBAC3B,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;YAC1B,KAAK,sBAAsB;gBACzB,OAAO,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;YAC7B;gBACE,MAAM,IAAI,KAAK,CAAC,8BAA8B,IAAI,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC,CAAC;AACJ,CAAC;AAED,SAAS,wBAAwB,CAC/B,WAAqF;IAErF,OAAO,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE;QACzC,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC5B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACxC,WAAW,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/D,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,WAAW,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC,CAAC;AACJ,CAAC;AAED,SAAS,QAAQ,CAAC,KAAc,EAAE,QAAgB,EAAE,GAAW,EAAE,GAAW;IAC1E,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;IACxD,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;IAC1D,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;AAC7C,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,IAAiB,EACjB,SAAiB,EACjB,QAAoB,EACpB,WAAmB,EACnB,IAGC;IAED,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAElC,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAC3B,WAAW,EACX;QACE,MAAM,EAAE,GAAG,SAAS,SAAS,IAAI,CAAC,EAAE,EAAE;QACtC,WAAW,EAAE,+BAA+B;QAC5C,cAAc,EAAE;YACd,SAAS,EAAE,yBAAyB;YACpC,YAAY,EAAE,mDAAmD;SAClE;KACF,EACD,IAAI,CAAC,EAAE,EACP,aAAa,CACd,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,mBAAmB,EACnB;QACE,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE,wBAAwB,IAAI,CAAC,EAAE,aAAa,SAAS,IAAI;QAClE,SAAS,EAAE,iDAAiD;KAC7D,EACD,IAAI,CAAC,EAAE,EACP,WAAW,CACZ,CAAC;IAEF,MAAM,cAAc,GAClB,IAAI,EAAE,sBAAsB,EAAE,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;IAC/E,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAChC,WAAW,EACX,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,WAAW,EAAE,EAChD,IAAI,CAAC,EAAE,EACP,YAAY,CACb,CAAQ,CAAC;IACV,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEpD,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,gBAAgB,EAChB,EAAE,KAAK,EAAE,oBAAoB,EAAE,EAC/B,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;IAEvC,MAAM,SAAS,GAAG,IAAI,EAAE,iBAAiB,EAAE,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC;IACjE,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAC7B,gBAAgB,EAChB;QACE,IAAI,EAAE,QAAQ,SAAS,IAAI,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;QAClD,WAAW,EAAE,oBAAoB,SAAS,YAAY,WAAW,GAAG;QACpE,KAAK,EAAE;YACL;gBACE,KAAK,EAAE,SAAS;gBAChB,MAAM,EAAE,6BAA6B,IAAI,CAAC,EAAE,EAAE;gBAC9C,QAAQ,EAAE,sDAAsD;aACjE;SACF;KACF,EACD,IAAI,CAAC,EAAE,EACP,gBAAgB,CACjB,CAAQ,CAAC;IAEV,MAAM,QAAQ,CACZ,oBAAoB,EACpB;QACE,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1B,OAAO,EAAE,MAAM;QACf,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,uBAAuB;QAC/B,SAAS,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,EAAE,WAAW,EAAE;KAChE,EACD,IAAI,CAAC,EAAE,EACP,oBAAoB,CACrB,CAAC;IAEF,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CACjC,mBAAmB,EACnB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,EACxB,IAAI,CAAC,EAAE,EACP,mBAAmB,CACpB,CAAQ,CAAC;IACV,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAE7C,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAChC,iBAAiB,EACjB;QACE,KAAK,EAAE;YACL,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;YAC/C,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;YAC5C,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;SAC7C;KACF,EACD,IAAI,CAAC,EAAE,EACP,iBAAiB,CAClB,CAAQ,CAAC;IACV,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,CAAC,MAAM,QAAQ,CAC9B,wBAAwB,EACxB;QACE,MAAM,EAAE,gBAAgB,SAAS,IAAI,IAAI,CAAC,EAAE,EAAE;QAC9C,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;YAC3D,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;YAC3D,EAAE,QAAQ,EAAE,mBAAmB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;YAC7D,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;YACxD,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;YAC5D,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;SAChE;KACF,EACD,IAAI,CAAC,EAAE,EACP,wBAAwB,CACzB,CAAQ,CAAC;IACV,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAC/B,sBAAsB,EACtB,EAAE,KAAK,EAAE,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,EAC5B,IAAI,CAAC,EAAE,EACP,sBAAsB,CACvB,CAAQ,CAAC;IACV,MAAM,CAAC,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAErD,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,WAAW;QACX,EAAE,EAAE,IAAI;QACR,SAAS,EAAE,WAAW,CAAC,GAAG,EAAE,GAAG,OAAO;KACvC,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,SAAiB,EACjB,KAAoB,EACpB,WAAmB,EACnB,QAAoB,EACpB,IAGC;IAED,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5E,MAAM,OAAO,GAAmB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACxD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC,EAAE,WAAW,EAAE,EAAE,CAC5E,CAAC,KAAK,IAAI,EAAE;QACV,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,SAAS,GAAG,SAAS,EAAE,CAAC;YAC9B,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM;gBAAE,OAAO;YAEtC,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9B,IAAI,CAAC;gBACH,OAAO,CAAC,SAAS,CAAC,GAAG,MAAM,kBAAkB,CAC3C,IAAI,EACJ,SAAS,EACT,QAAQ,EACR,WAAW,EACX,IAAI,CACL,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,SAAS,CAAC,GAAG;oBACnB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW;oBACX,EAAE,EAAE,KAAK;oBACT,SAAS,EAAE,CAAC;oBACZ,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,EAAE,CACL,CAAC;IAEF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,cAAc,CAAC,OAAY;IAClC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC;QAChB,EAAE,EAAE,MAAM,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;QAC9B,MAAM,EAAE,MAAM,CAAC,CAAC,EAAE,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;QACtC,IAAI,EAAE,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS;KACjD,CAAC,CAAC;SACF,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC;AAC1C,CAAC;AAED,SAAS,sBAAsB;IAC7B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,aAAa,CAAC;IAClE,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,YAAY,CAAC;IAC/D,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,MAAM,IAAI,KAAK,cAAc,CAAC,CAAC;AACtF,CAAC;AAED,KAAK,UAAU,kBAAkB;IAC/B,MAAM,QAAQ,GAAG,sBAAsB,EAAE,CAAC;IAC1C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IACvC,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC7C,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,qBAAqB,CAAC,IAAiB;IAC9C,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAQ,CAAC;IACtC,OAAO;QACL,aAAa,IAAI,CAAC,EAAE,oBAAoB;QACxC,SAAS,IAAI,CAAC,KAAK,IAAI,GAAG,EAAE;QAC5B,kBAAkB,IAAI,CAAC,cAAc,IAAI,EAAE,EAAE;QAC7C,WAAW,IAAI,CAAC,OAAO,IAAI,EAAE,EAAE;QAC/B,WAAW,IAAI,CAAC,OAAO,IAAI,EAAE,EAAE;KAChC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAChB,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAiB;IACnD,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAQ,CAAC;IACtC,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;IACvC,OAAO,aAAa,IAAI,CAAC,EAAE,WAAW,IAAI,CAAC,KAAK,IAAI,GAAG,eAAe,IAAI,CAAC,OAAO,IAAI,KAAK,UAAU,GAAG,EAAE,CAAC;AAC7G,CAAC;AAED,MAAM,QAAQ,GAAuB;IACnC;QACE,EAAE,EAAE,MAAM;QACV,KAAK,EAAE,cAAc,CAAC,WAAW,CAAC;QAClC,QAAQ,EAAE,mCAAmC;QAC7C,cAAc,EAAE,oCAAoC;KACrD;IACD;QACE,EAAE,EAAE,WAAW;QACf,KAAK,EAAE,cAAc,CAAC,gBAAgB,CAAC;QACvC,QAAQ,EAAE,gCAAgC;QAC1C,cAAc,EAAE,iCAAiC;KAClD;IACD;QACE,EAAE,EAAE,UAAU;QACd,KAAK,EAAE,cAAc,CAAC,eAAe,CAAC;QACtC,QAAQ,EAAE,+BAA+B;QACzC,cAAc,EAAE,gCAAgC;KACjD;CACF,CAAC;AAEF,QAAQ,CAAC,+DAA+D,EAAE,GAAG,EAAE;IAC7E,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC;IAExC,MAAM,CAAC,mEAAmE,EAAE,KAAK,IAAI,EAAE;QACrF,MAAM,IAAI,GAAG,MAAM,kBAAkB,EAAE,CAAC;QACxC,IAAI,IAAI,EAAE,CAAC;YACT,QAAQ,CAAC,IAAI,CAAC;gBACZ,EAAE,EAAE,MAAM;gBACV,KAAK,EAAE,cAAc,CAAC,IAAI,CAAC;gBAC3B,QAAQ,EAAE,2BAA2B;gBACrC,cAAc,EAAE,4BAA4B;gBAC5C,iBAAiB,EAAE,qBAAqB;gBACxC,sBAAsB,EAAE,0BAA0B;aACnD,CAAC,CAAC;QACL,CAAC;QAED,MAAM,UAAU,GAAG,mBAAmB,EAAE,CAAC;QACzC,MAAM,oBAAoB,GAKrB,EAAE,CAAC;QACR,MAAM,eAAe,GAAG,wBAAwB,CAAC,oBAAoB,CAAC,CAAC;QAEvE,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;YAEhD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACtF,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,SAAS,CAAC,CAAC;YACnF,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;YAEhD,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACtC,MAAM,WAAW,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE;gBAClF,iBAAiB,EAAE,OAAO,CAAC,iBAAiB;gBAC5C,sBAAsB,EAAE,OAAO,CAAC,sBAAsB;aACvD,CAAC,CAAC;YACH,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC;YAEpD,MAAM,gBAAgB,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YAC3C,MAAM,gBAAgB,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE;gBAC5F,iBAAiB,EAAE,OAAO,CAAC,iBAAiB;gBAC5C,sBAAsB,EAAE,OAAO,CAAC,sBAAsB;aACvD,CAAC,CAAC;YACH,MAAM,gBAAgB,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,gBAAgB,CAAC;YAE9D,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACpD,MAAM,eAAe,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAC9D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxD,OAAO,CAAC,KAAK,CAAC,yBAAyB,EAAE;oBACvC,OAAO,EAAE,OAAO,CAAC,EAAE;oBACnB,UAAU;oBACV,eAAe;iBAChB,CAAC,CAAC;YACL,CAAC;YACD,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClC,MAAM,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAEvC,MAAM,QAAQ,GAAG,gBAAgB,GAAG,WAAW,CAAC;YAChD,MAAM,KAAK,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,GAAG,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,iBAAiB,CAAC;YAC1F,MAAM,eAAe,GAAG,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;YAE7D,OAAO,CAAC,GAAG,CACT,0BAA0B,OAAO,CAAC,EAAE,UAAU,KAAK,CAAC,MAAM,gBAAgB,WAAW,WAAW,WAAW,CAAC,OAAO,CACjH,CAAC,CACF,gBAAgB,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,QAAQ,CAAC,OAAO,CACzE,CAAC,CACF,sBAAsB,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC9E,CAAC;QACJ,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC;YAC5B,WAAW;YACX,mBAAmB;YACnB,WAAW;YACX,gBAAgB;YAChB,gBAAgB;YAChB,oBAAoB;YACpB,mBAAmB;YACnB,iBAAiB;YACjB,wBAAwB;YACxB,sBAAsB;SACvB,CAAC,CAAC;QACH,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACrE,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;YACjC,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,23 +0,0 @@
1
- /**
2
- * OpenClaw Dogfood Test — End-to-end integration across Tier A + Tier C
3
- *
4
- * Exercises the full lifecycle:
5
- * 1. Setup check → system readiness
6
- * 2. Configure sandbox policy → allowlist + blocklist
7
- * 3. Connect session → spawn with policy enforcement
8
- * 4. Call allowed skill → passes enforcement
9
- * 5. Call blocked skill → blocked by allowlist
10
- * 6. Call with suspicious args → blocked by pattern scanning
11
- * 7. Audit trail → violations logged
12
- * 8. Compliance scoring → grade reflects violations
13
- * 9. Export audit report → markdown output
14
- * 10. Disconnect → compliance summary
15
- * 11. Gotcha lifecycle → record + search
16
- * 12. Scaffold dry-run → project generation preview
17
- * 13. Workflow audit → risk scoring
18
- * 14. Skill risk profiling → trust scoring
19
- * 15. Tier C bridge tools → mcp-local openclawTools echo
20
- *
21
- * Vitest: `npx vitest run src/__tests__/openclawDogfood.test.ts`
22
- */
23
- export {};