npm - nodebench-mcp - Versions diffs - 2.6.0 → 2.8.1 - Mend

nodebench-mcp 2.6.0 → 2.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/NODEBENCH_AGENTS.md +1 -1
package/README.md +21 -12
package/dist/__tests__/audit-registry.d.ts +1 -0
package/dist/__tests__/audit-registry.js +60 -0
package/dist/__tests__/audit-registry.js.map +1 -0
package/dist/__tests__/gaiaCapabilityEval.test.js +59 -1
package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
package/dist/__tests__/gaiaCapabilityFilesEval.test.js +388 -9
package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
package/dist/__tests__/tools.test.js +551 -4
package/dist/__tests__/tools.test.js.map +1 -1
package/dist/index.js +28 -6
package/dist/index.js.map +1 -1
package/dist/tools/boilerplateTools.d.ts +11 -0
package/dist/tools/boilerplateTools.js +500 -0
package/dist/tools/boilerplateTools.js.map +1 -0
package/dist/tools/cCompilerBenchmarkTools.d.ts +14 -0
package/dist/tools/cCompilerBenchmarkTools.js +453 -0
package/dist/tools/cCompilerBenchmarkTools.js.map +1 -0
package/dist/tools/figmaFlowTools.d.ts +13 -0
package/dist/tools/figmaFlowTools.js +183 -0
package/dist/tools/figmaFlowTools.js.map +1 -0
package/dist/tools/flickerDetectionTools.d.ts +14 -0
package/dist/tools/flickerDetectionTools.js +231 -0
package/dist/tools/flickerDetectionTools.js.map +1 -0
package/dist/tools/localFileTools.d.ts +1 -0
package/dist/tools/localFileTools.js +1926 -27
package/dist/tools/localFileTools.js.map +1 -1
package/dist/tools/metaTools.js +17 -0
package/dist/tools/metaTools.js.map +1 -1
package/dist/tools/progressiveDiscoveryTools.d.ts +14 -0
package/dist/tools/progressiveDiscoveryTools.js +239 -0
package/dist/tools/progressiveDiscoveryTools.js.map +1 -0
package/dist/tools/toolRegistry.d.ts +88 -0
package/dist/tools/toolRegistry.js +1926 -0
package/dist/tools/toolRegistry.js.map +1 -0
package/package.json +3 -2

package/dist/__tests__/tools.test.js CHANGED Viewed

@@ -7,6 +7,7 @@ import { describe, it, expect } from "vitest";
 import os from "node:os";
 import path from "node:path";
 import { mkdtemp, writeFile } from "node:fs/promises";
+import { existsSync } from "node:fs";
 import { verificationTools } from "../tools/verificationTools.js";
 import { reconTools } from "../tools/reconTools.js";
 import { uiCaptureTools } from "../tools/uiCaptureTools.js";
@@ -27,6 +28,12 @@ import { securityTools } from "../tools/securityTools.js";
 import { platformTools } from "../tools/platformTools.js";
 import { localFileTools } from "../tools/localFileTools.js";
 import { researchWritingTools } from "../tools/researchWritingTools.js";
+import { flickerDetectionTools } from "../tools/flickerDetectionTools.js";
+import { figmaFlowTools } from "../tools/figmaFlowTools.js";
+import { createProgressiveDiscoveryTools } from "../tools/progressiveDiscoveryTools.js";
+import { boilerplateTools } from "../tools/boilerplateTools.js";
+import { cCompilerBenchmarkTools } from "../tools/cCompilerBenchmarkTools.js";
+import { getQuickRef } from "../tools/toolRegistry.js";
 // Assemble all tools like index.ts does
 const domainTools = [
     ...verificationTools,
@@ -48,15 +55,22 @@ const domainTools = [
     ...securityTools,
     ...platformTools,
     ...researchWritingTools,
+    ...flickerDetectionTools,
+    ...figmaFlowTools,
+    ...boilerplateTools,
+    ...cCompilerBenchmarkTools,
 ];
-const allTools = [...domainTools, ...createMetaTools(domainTools)];
+const metaTools = createMetaTools(domainTools);
+const allToolsWithoutDiscovery = [...domainTools, ...metaTools];
+const discoveryTools = createProgressiveDiscoveryTools(allToolsWithoutDiscovery.map((t) => ({ name: t.name, description: t.description })));
+const allTools = [...allToolsWithoutDiscovery, ...discoveryTools];
 // ═══════════════════════════════════════════════════════════════════════════
 // STATIC LAYER — structure validation
 // ═══════════════════════════════════════════════════════════════════════════
 describe("Static: tool structure", () => {
-    it("should have 98 tools total", () => {
-        // 96 domain tools + 2 meta tools (findTools, getMethodology)
-        expect(allTools.length).toBe(98);
+    it("should have 129 tools total", () => {
+        // 124 domain tools + 2 meta tools (findTools, getMethodology) + 3 progressive discovery tools
+        expect(allTools.length).toBe(129);
     });
     it("every tool has name, description, inputSchema, handler", () => {
         for (const tool of allTools) {
@@ -458,6 +472,25 @@ describe("Static: self_reinforced_learning methodology", () => {
 // ═══════════════════════════════════════════════════════════════════════════
 const findTool = (name) => allTools.find((t) => t.name === name);
 describe("Unit: local file tools", () => {
+    const findRepoFile = (relPath) => {
+        let dir = process.cwd();
+        for (let i = 0; i < 10; i++) {
+            const candidate = path.join(dir, relPath);
+            if (existsSync(candidate))
+                return candidate;
+            const parent = path.dirname(dir);
+            if (parent === dir)
+                break;
+            dir = parent;
+        }
+        throw new Error(`Fixture not found: ${relPath}`);
+    };
+    it("tool registry should include quickRefs for all local_file tools", () => {
+        const missing = localFileTools
+            .map((t) => t.name)
+            .filter((name) => !getQuickRef(name));
+        expect(missing).toEqual([]);
+    });
     it("read_csv_file should parse a bounded table", async () => {
         const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
         const csvPath = path.join(tmpDir, "sample.csv");
@@ -501,6 +534,218 @@ describe("Unit: local file tools", () => {
         expect(result.rows[0][0]).toBe("Movie A");
         expect(result.rows[0][1]).toBe(2009);
     });
+    it("csv_select_rows should filter rows and select columns", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
+        const csvPath = path.join(tmpDir, "sample.csv");
+        await writeFile(csvPath, "name,age\nAlice,30\nBob,25\nCara,40\n", "utf8");
+        const tool = findTool("csv_select_rows");
+        const result = (await tool.handler({
+            path: csvPath,
+            hasHeader: true,
+            where: [{ column: "age", op: "gt", value: 25 }],
+            returnColumns: ["name"],
+            limit: 10,
+        }));
+        expect(result.headers).toEqual(["name"]);
+        expect(result.rows.length).toBe(2);
+        expect(result.rows[0].row[0]).toBe("Alice");
+        expect(result.rows[1].row[0]).toBe("Cara");
+    });
+    it("csv_aggregate should compute min and return bestRow", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
+        const csvPath = path.join(tmpDir, "sample.csv");
+        await writeFile(csvPath, "name,age\nAlice,30\nBob,25\nCara,40\n", "utf8");
+        const tool = findTool("csv_aggregate");
+        const result = (await tool.handler({
+            path: csvPath,
+            hasHeader: true,
+            operation: "min",
+            value: { type: "column", column: "age" },
+            returnColumns: ["name", "age"],
+        }));
+        expect(result.result).toBe(25);
+        expect(result.bestRow.headers).toEqual(["name", "age"]);
+        expect(result.bestRow.row[0]).toBe("Bob");
+    });
+    it("xlsx_select_rows should filter rows and select columns", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
+        const xlsxPath = path.join(tmpDir, "sample.xlsx");
+        const mod = await import("xlsx");
+        const XLSX = mod.default ?? mod;
+        const wb = XLSX.utils.book_new();
+        const sheet = XLSX.utils.aoa_to_sheet([
+            ["Title", "Year"],
+            ["Movie A", 2009],
+            ["Movie B", 2011],
+        ]);
+        XLSX.utils.book_append_sheet(wb, sheet, "Sheet1");
+        XLSX.writeFile(wb, xlsxPath);
+        const tool = findTool("xlsx_select_rows");
+        const result = (await tool.handler({
+            path: xlsxPath,
+            sheetName: "Sheet1",
+            headerRow: 1,
+            where: [{ column: "Year", op: "eq", value: 2009 }],
+            returnColumns: ["Title"],
+            limit: 10,
+        }));
+        expect(result.headers).toEqual(["Title"]);
+        expect(result.rows.length).toBe(1);
+        expect(result.rows[0].row[0]).toBe("Movie A");
+    });
+    it("xlsx_aggregate should compute min and return bestRow", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
+        const xlsxPath = path.join(tmpDir, "sample.xlsx");
+        const mod = await import("xlsx");
+        const XLSX = mod.default ?? mod;
+        const wb = XLSX.utils.book_new();
+        const sheet = XLSX.utils.aoa_to_sheet([
+            ["Title", "Year"],
+            ["Movie A", 2009],
+            ["Movie B", 2011],
+        ]);
+        XLSX.utils.book_append_sheet(wb, sheet, "Sheet1");
+        XLSX.writeFile(wb, xlsxPath);
+        const tool = findTool("xlsx_aggregate");
+        const result = (await tool.handler({
+            path: xlsxPath,
+            sheetName: "Sheet1",
+            headerRow: 1,
+            operation: "min",
+            value: { type: "column", column: "Year" },
+            returnColumns: ["Title", "Year"],
+        }));
+        expect(result.result).toBe(2009);
+        expect(result.bestRow.headers).toEqual(["Title", "Year"]);
+        expect(result.bestRow.row[0]).toBe("Movie A");
+    });
+    it("read_pdf_text should extract page text", async () => {
+        const pdfPath = findRepoFile(path.join("test_assets", "Report_2025-12-25.pdf"));
+        const tool = findTool("read_pdf_text");
+        const result = (await tool.handler({
+            path: pdfPath,
+            pageStart: 1,
+            pageEnd: 1,
+            maxChars: 2000,
+        }));
+        expect(result.pagesIncluded).toEqual([1]);
+        expect(String(result.text)).toContain("Hello World");
+    });
+    it("pdf_search_text should find matches with snippets", async () => {
+        const pdfPath = findRepoFile(path.join("test_assets", "Report_2025-12-25.pdf"));
+        const tool = findTool("pdf_search_text");
+        const result = (await tool.handler({
+            path: pdfPath,
+            query: "Hello",
+            maxMatches: 5,
+        }));
+        expect(result.matchCount).toBeGreaterThan(0);
+        expect(result.matches[0].page).toBe(1);
+        expect(String(result.matches[0].snippet)).toContain("Hello");
+    });
+    it("read_text_file should return bounded text slices", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
+        const filePath = path.join(tmpDir, "notes.txt");
+        await writeFile(filePath, "Line1\nLine2\nLine3\n", "utf8");
+        const tool = findTool("read_text_file");
+        const result = (await tool.handler({
+            path: filePath,
+            startChar: 0,
+            maxChars: 10,
+        }));
+        expect(result.truncated).toBe(true);
+        expect(String(result.text)).toContain("Line1");
+    });
+    it("read_json_file and json_select should parse and select values", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
+        const filePath = path.join(tmpDir, "data.json");
+        await writeFile(filePath, JSON.stringify({ a: { b: [{ name: "alpha" }, { name: "beta" }] } }), "utf8");
+        const readTool = findTool("read_json_file");
+        const readResult = (await readTool.handler({
+            path: filePath,
+            maxDepth: 6,
+            maxItems: 50,
+            maxStringChars: 1000,
+        }));
+        expect(readResult.rootType).toBe("object");
+        expect(readResult.value.a.b.length).toBe(2);
+        const selectTool = findTool("json_select");
+        const selectResult = (await selectTool.handler({
+            path: filePath,
+            pointer: "/a/b/1/name",
+            maxDepth: 3,
+            maxItems: 10,
+            maxStringChars: 100,
+        }));
+        expect(selectResult.found).toBe(true);
+        expect(selectResult.value).toBe("beta");
+    });
+    it("read_jsonl_file should parse lines and report errors", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
+        const filePath = path.join(tmpDir, "data.jsonl");
+        await writeFile(filePath, '{"ok":1}\nnot-json\n{"ok":2}\n', "utf8");
+        const tool = findTool("read_jsonl_file");
+        const result = (await tool.handler({
+            path: filePath,
+            limitLines: 10,
+            parseJson: true,
+            maxDepth: 4,
+            maxItems: 20,
+            maxStringChars: 100,
+        }));
+        expect(result.returnedLines).toBe(2);
+        expect(result.errorCount).toBe(1);
+        expect(result.lines[0].value.ok).toBe(1);
+        expect(result.lines[1].value.ok).toBe(2);
+    });
+    it("zip_list_files and zip_read_text_file should read entries", async () => {
+        const zipPath = findRepoFile(path.join("test_assets", "zip_fixture.zip"));
+        const listTool = findTool("zip_list_files");
+        const listResult = (await listTool.handler({ path: zipPath, maxEntries: 50 }));
+        const names = (listResult.entries ?? []).map((e) => e.fileName);
+        expect(names).toContain("hello.txt");
+        expect(names).toContain("folder/data.csv");
+        const readTool = findTool("zip_read_text_file");
+        const readResult = (await readTool.handler({
+            path: zipPath,
+            innerPath: "hello.txt",
+            maxChars: 2000,
+        }));
+        expect(String(readResult.text)).toContain("Hello from zip fixture");
+    });
+    it("zip_extract_file should safely extract to outputDir", async () => {
+        const zipPath = findRepoFile(path.join("test_assets", "zip_fixture.zip"));
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-zip-"));
+        const extractTool = findTool("zip_extract_file");
+        const extracted = (await extractTool.handler({
+            path: zipPath,
+            innerPath: "folder/data.csv",
+            outputDir: tmpDir,
+            overwrite: true,
+        }));
+        expect(typeof extracted.extractedPath).toBe("string");
+        expect(existsSync(extracted.extractedPath)).toBe(true);
+        const readTool = findTool("read_text_file");
+        const text = (await readTool.handler({ path: extracted.extractedPath, maxChars: 2000 }));
+        expect(String(text.text)).toContain("alpha,1");
+    });
+    it("read_docx_text should extract document text", async () => {
+        const docxPath = findRepoFile(path.join("test_assets", "docx_fixture.docx"));
+        const tool = findTool("read_docx_text");
+        const result = (await tool.handler({ path: docxPath, maxChars: 5000 }));
+        expect(String(result.text)).toContain("Hello DOCX");
+        expect(String(result.text)).toContain("Second paragraph");
+    });
+    it("read_pptx_text should extract slide text with markers", async () => {
+        const pptxPath = findRepoFile(path.join("test_assets", "pptx_fixture.pptx"));
+        const tool = findTool("read_pptx_text");
+        const result = (await tool.handler({ path: pptxPath, maxChars: 10000 }));
+        expect(result.slideCount).toBe(2);
+        expect(String(result.text)).toContain("[SLIDE 1]");
+        expect(String(result.text)).toContain("Hello PPTX Slide1");
+        expect(String(result.text)).toContain("[SLIDE 2]");
+        expect(String(result.text)).toContain("Slide2 Text");
+    });
 });
 describe("Unit: abandon_cycle", () => {
     it("should abandon an active cycle", async () => {
@@ -1190,4 +1435,306 @@ describe("Static: scan_terminal_security tool", () => {
         expect(props).toHaveProperty("verbose");
     });
 });
+// ═══════════════════════════════════════════════════════════════════════════
+// v2.8.0 — Progressive Discovery, Boilerplate, Benchmark tools
+// ═══════════════════════════════════════════════════════════════════════════
+describe("Static: progressive discovery tools", () => {
+    it("should include discover_tools, get_tool_quick_ref, get_workflow_chain", () => {
+        const names = allTools.map((t) => t.name);
+        expect(names).toContain("discover_tools");
+        expect(names).toContain("get_tool_quick_ref");
+        expect(names).toContain("get_workflow_chain");
+    });
+    it("discover_tools requires query parameter", () => {
+        const tool = findTool("discover_tools");
+        expect(tool.inputSchema.required).toContain("query");
+        expect(tool.inputSchema.properties).toHaveProperty("category");
+        expect(tool.inputSchema.properties).toHaveProperty("phase");
+        expect(tool.inputSchema.properties).toHaveProperty("limit");
+    });
+    it("get_tool_quick_ref requires toolName parameter", () => {
+        const tool = findTool("get_tool_quick_ref");
+        expect(tool.inputSchema.required).toContain("toolName");
+    });
+    it("get_workflow_chain requires chain parameter", () => {
+        const tool = findTool("get_workflow_chain");
+        expect(tool.inputSchema.required).toContain("chain");
+    });
+});
+describe("Unit: discover_tools hybrid search", () => {
+    it("should return ranked results for verification query", async () => {
+        const tool = findTool("discover_tools");
+        const result = (await tool.handler({ query: "verify implementation" }));
+        expect(result.resultCount).toBeGreaterThan(0);
+        expect(result.results[0]).toHaveProperty("relevanceScore");
+        expect(result.results[0]).toHaveProperty("quickRef");
+        expect(result.results[0].relevanceScore).toBeGreaterThan(0);
+    });
+    it("should filter by category", async () => {
+        const tool = findTool("discover_tools");
+        const result = (await tool.handler({ query: "test", category: "eval" }));
+        for (const r of result.results) {
+            expect(r.category).toBe("eval");
+        }
+    });
+    it("should filter by phase", async () => {
+        const tool = findTool("discover_tools");
+        const result = (await tool.handler({ query: "search find", phase: "research" }));
+        for (const r of result.results) {
+            expect(r.phase).toBe("research");
+        }
+    });
+    it("should include matching workflow chains", async () => {
+        const tool = findTool("discover_tools");
+        const result = (await tool.handler({ query: "new feature build" }));
+        expect(result.matchingWorkflows.length).toBeGreaterThan(0);
+    });
+    it("should return progressive hint", async () => {
+        const tool = findTool("discover_tools");
+        const result = (await tool.handler({ query: "verify" }));
+        expect(result._progressiveHint).toBeTruthy();
+    });
+});
+describe("Unit: get_tool_quick_ref", () => {
+    it("should return quick ref for known tool", async () => {
+        const tool = findTool("get_tool_quick_ref");
+        const result = (await tool.handler({ toolName: "start_verification_cycle" }));
+        expect(result.tool).toBe("start_verification_cycle");
+        expect(result.category).toBe("verification");
+        expect(result.quickRef).toHaveProperty("nextAction");
+        expect(result.quickRef).toHaveProperty("nextTools");
+        expect(result.quickRef.nextTools.length).toBeGreaterThan(0);
+    });
+    it("should return error for unknown tool with suggestions", async () => {
+        const tool = findTool("get_tool_quick_ref");
+        const result = (await tool.handler({ toolName: "nonexistent_tool_xyz" }));
+        expect(result.error).toBe(true);
+        expect(result).toHaveProperty("didYouMean");
+    });
+    it("should include related tool details when requested", async () => {
+        const tool = findTool("get_tool_quick_ref");
+        const result = (await tool.handler({
+            toolName: "run_mandatory_flywheel",
+            includeRelatedDetails: true,
+        }));
+        expect(result).toHaveProperty("relatedToolDetails");
+        expect(Object.keys(result.relatedToolDetails).length).toBeGreaterThan(0);
+    });
+});
+describe("Unit: get_workflow_chain", () => {
+    it("should list all available chains", async () => {
+        const tool = findTool("get_workflow_chain");
+        const result = (await tool.handler({ chain: "list" }));
+        expect(result.availableChains.length).toBeGreaterThan(0);
+        const keys = result.availableChains.map((c) => c.key);
+        expect(keys).toContain("new_feature");
+        expect(keys).toContain("fix_bug");
+        expect(keys).toContain("c_compiler_benchmark");
+    });
+    it("should return enriched chain steps", async () => {
+        const tool = findTool("get_workflow_chain");
+        const result = (await tool.handler({ chain: "new_feature" }));
+        expect(result.name).toBe("Build a New Feature");
+        expect(result.totalSteps).toBeGreaterThan(5);
+        expect(result.steps[0]).toHaveProperty("tool");
+        expect(result.steps[0]).toHaveProperty("action");
+        expect(result.steps[0]).toHaveProperty("quickRef");
+    });
+    it("should return error for unknown chain", async () => {
+        const tool = findTool("get_workflow_chain");
+        const result = (await tool.handler({ chain: "nonexistent_chain" }));
+        expect(result.error).toBe(true);
+    });
+});
+describe("Static: boilerplate tools", () => {
+    it("should include scaffold_nodebench_project and get_boilerplate_status", () => {
+        const names = allTools.map((t) => t.name);
+        expect(names).toContain("scaffold_nodebench_project");
+        expect(names).toContain("get_boilerplate_status");
+    });
+    it("scaffold_nodebench_project requires projectPath, projectName, techStack", () => {
+        const tool = findTool("scaffold_nodebench_project");
+        expect(tool.inputSchema.required).toContain("projectPath");
+        expect(tool.inputSchema.required).toContain("projectName");
+        expect(tool.inputSchema.required).toContain("techStack");
+    });
+    it("get_boilerplate_status requires projectPath", () => {
+        const tool = findTool("get_boilerplate_status");
+        expect(tool.inputSchema.required).toContain("projectPath");
+    });
+});
+describe("Unit: scaffold_nodebench_project dry run", () => {
+    it("should preview files without creating them", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-scaffold-"));
+        const tool = findTool("scaffold_nodebench_project");
+        const result = (await tool.handler({
+            projectPath: tmpDir,
+            projectName: "test-project",
+            techStack: "TypeScript, Node.js",
+            dryRun: true,
+        }));
+        expect(result.dryRun).toBe(true);
+        expect(result.summary.totalFiles).toBeGreaterThan(5);
+        expect(result.willCreate.length).toBeGreaterThan(0);
+        expect(result.willCreate).toContain("AGENTS.md");
+        expect(result.willCreate).toContain("package.json");
+        expect(result.willCreate).toContain(".mcp.json");
+        expect(result._quickRef).toBeDefined();
+    });
+});
+describe("Unit: scaffold_nodebench_project actual creation", () => {
+    it("should create all project files", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-scaffold-"));
+        const tool = findTool("scaffold_nodebench_project");
+        const result = (await tool.handler({
+            projectPath: tmpDir,
+            projectName: "real-project",
+            techStack: "TypeScript, React",
+            dryRun: false,
+            includeParallelAgents: true,
+            includeGithubActions: true,
+        }));
+        expect(result.dryRun).toBe(false);
+        expect(result.summary.created).toBeGreaterThan(5);
+        // Verify key files exist
+        const { existsSync } = await import("node:fs");
+        expect(existsSync(path.join(tmpDir, "AGENTS.md"))).toBe(true);
+        expect(existsSync(path.join(tmpDir, "package.json"))).toBe(true);
+        expect(existsSync(path.join(tmpDir, ".mcp.json"))).toBe(true);
+        expect(existsSync(path.join(tmpDir, ".parallel-agents"))).toBe(true);
+        expect(existsSync(path.join(tmpDir, ".github", "workflows"))).toBe(true);
+    });
+});
+describe("Unit: get_boilerplate_status", () => {
+    it("should scan an empty directory and find everything missing", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-status-"));
+        const tool = findTool("get_boilerplate_status");
+        const result = (await tool.handler({ projectPath: tmpDir }));
+        expect(result.completionPercentage).toBe(0);
+        expect(result.missing).toBeGreaterThan(0);
+        expect(result.missingFiles).toContain("AGENTS.md");
+        expect(result.recommendations.length).toBeGreaterThan(0);
+    });
+    it("should detect existing files after scaffolding", async () => {
+        const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-status-"));
+        // Scaffold first
+        await findTool("scaffold_nodebench_project").handler({
+            projectPath: tmpDir,
+            projectName: "status-test",
+            techStack: "TypeScript",
+            dryRun: false,
+        });
+        // Then check status
+        const tool = findTool("get_boilerplate_status");
+        const result = (await tool.handler({ projectPath: tmpDir }));
+        expect(result.completionPercentage).toBeGreaterThan(50);
+        expect(result.found).toBeGreaterThan(5);
+    });
+    it("should throw for nonexistent path", async () => {
+        const tool = findTool("get_boilerplate_status");
+        await expect(tool.handler({ projectPath: "/nonexistent/path/xyz123" })).rejects.toThrow("does not exist");
+    });
+});
+describe("Static: C-compiler benchmark tools", () => {
+    it("should include all 3 benchmark tools", () => {
+        const names = allTools.map((t) => t.name);
+        expect(names).toContain("start_autonomy_benchmark");
+        expect(names).toContain("log_benchmark_milestone");
+        expect(names).toContain("complete_autonomy_benchmark");
+    });
+    it("start_autonomy_benchmark requires challenge parameter", () => {
+        const tool = findTool("start_autonomy_benchmark");
+        expect(tool.inputSchema.required).toContain("challenge");
+        const challengeProp = tool.inputSchema.properties.challenge;
+        expect(challengeProp.enum).toContain("c_compiler");
+        expect(challengeProp.enum).toContain("rest_api");
+        expect(challengeProp.enum).toContain("fullstack_app");
+        expect(challengeProp.enum).toContain("list");
+    });
+    it("log_benchmark_milestone requires benchmarkId, milestoneId, verificationPassed", () => {
+        const tool = findTool("log_benchmark_milestone");
+        expect(tool.inputSchema.required).toContain("benchmarkId");
+        expect(tool.inputSchema.required).toContain("milestoneId");
+        expect(tool.inputSchema.required).toContain("verificationPassed");
+    });
+    it("complete_autonomy_benchmark requires benchmarkId and reason", () => {
+        const tool = findTool("complete_autonomy_benchmark");
+        expect(tool.inputSchema.required).toContain("benchmarkId");
+        expect(tool.inputSchema.required).toContain("reason");
+    });
+});
+describe("Unit: start_autonomy_benchmark", () => {
+    it("should list all available challenges", async () => {
+        const tool = findTool("start_autonomy_benchmark");
+        const result = (await tool.handler({ challenge: "list" }));
+        expect(result.availableChallenges.length).toBe(5);
+        const keys = result.availableChallenges.map((c) => c.key);
+        expect(keys).toContain("c_compiler");
+        expect(keys).toContain("rest_api");
+        expect(keys).toContain("fullstack_app");
+        expect(keys).toContain("cli_tool");
+        expect(keys).toContain("data_pipeline");
+    });
+    it("should start a cli_tool benchmark", async () => {
+        const tool = findTool("start_autonomy_benchmark");
+        const result = (await tool.handler({
+            challenge: "cli_tool",
+            notes: "test benchmark",
+        }));
+        expect(result.benchmarkId).toBeTruthy();
+        expect(result.challenge).toBe("cli_tool");
+        expect(result.difficulty).toBe("easy");
+        expect(result.totalPoints).toBe(100);
+        expect(result.milestones.length).toBe(8);
+        expect(result._quickRef).toBeDefined();
+    });
+    it("should throw for unknown challenge", async () => {
+        const tool = findTool("start_autonomy_benchmark");
+        await expect(tool.handler({ challenge: "nonexistent_challenge" })).rejects.toThrow("Unknown challenge");
+    });
+});
+describe("Integration: full benchmark lifecycle", () => {
+    it("start → log milestone → complete", async () => {
+        // 1. Start benchmark
+        const benchmark = (await findTool("start_autonomy_benchmark").handler({
+            challenge: "cli_tool",
+            notes: "integration test",
+        }));
+        expect(benchmark.benchmarkId).toBeTruthy();
+        // 2. Log a milestone
+        const milestone = (await findTool("log_benchmark_milestone").handler({
+            benchmarkId: benchmark.benchmarkId,
+            milestoneId: "project_setup",
+            verificationPassed: true,
+            toolsUsed: ["run_closed_loop", "bootstrap_project"],
+            notes: "Project initialized",
+        }));
+        expect(milestone.points).toBe(15);
+        expect(milestone.progress.earnedPoints).toBe(15);
+        expect(milestone.progress.milestonesCompleted).toBe(1);
+        // 3. Log another milestone (failed)
+        const milestone2 = (await findTool("log_benchmark_milestone").handler({
+            benchmarkId: benchmark.benchmarkId,
+            milestoneId: "arg_parsing",
+            verificationPassed: false,
+            notes: "Arg parsing failed tests",
+        }));
+        expect(milestone2.points).toBe(0);
+        expect(milestone2.progress.earnedPoints).toBe(15); // unchanged
+        // 4. Complete benchmark
+        const completed = (await findTool("complete_autonomy_benchmark").handler({
+            benchmarkId: benchmark.benchmarkId,
+            reason: "stuck",
+            notes: "Integration test complete",
+        }));
+        expect(completed.score.earnedPoints).toBe(15);
+        expect(completed.score.percentage).toBe(15);
+        expect(completed.score.grade).toContain("F");
+        expect(completed.milestones.completed).toBe(1);
+        expect(completed.milestones.failed).toBe(1);
+        expect(completed.milestones.pending).toBe(6);
+        expect(completed.analysis.strengths).toContain("Project Setup");
+        expect(completed._quickRef).toBeDefined();
+    });
+});
 //# sourceMappingURL=tools.test.js.map