nodebench-mcp 2.6.0 → 2.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/NODEBENCH_AGENTS.md +1 -1
  2. package/README.md +21 -12
  3. package/dist/__tests__/audit-registry.d.ts +1 -0
  4. package/dist/__tests__/audit-registry.js +60 -0
  5. package/dist/__tests__/audit-registry.js.map +1 -0
  6. package/dist/__tests__/gaiaCapabilityEval.test.js +59 -1
  7. package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
  8. package/dist/__tests__/gaiaCapabilityFilesEval.test.js +388 -9
  9. package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
  10. package/dist/__tests__/tools.test.js +551 -4
  11. package/dist/__tests__/tools.test.js.map +1 -1
  12. package/dist/index.js +28 -6
  13. package/dist/index.js.map +1 -1
  14. package/dist/tools/boilerplateTools.d.ts +11 -0
  15. package/dist/tools/boilerplateTools.js +500 -0
  16. package/dist/tools/boilerplateTools.js.map +1 -0
  17. package/dist/tools/cCompilerBenchmarkTools.d.ts +14 -0
  18. package/dist/tools/cCompilerBenchmarkTools.js +453 -0
  19. package/dist/tools/cCompilerBenchmarkTools.js.map +1 -0
  20. package/dist/tools/figmaFlowTools.d.ts +13 -0
  21. package/dist/tools/figmaFlowTools.js +183 -0
  22. package/dist/tools/figmaFlowTools.js.map +1 -0
  23. package/dist/tools/flickerDetectionTools.d.ts +14 -0
  24. package/dist/tools/flickerDetectionTools.js +231 -0
  25. package/dist/tools/flickerDetectionTools.js.map +1 -0
  26. package/dist/tools/localFileTools.d.ts +1 -0
  27. package/dist/tools/localFileTools.js +1926 -27
  28. package/dist/tools/localFileTools.js.map +1 -1
  29. package/dist/tools/metaTools.js +17 -0
  30. package/dist/tools/metaTools.js.map +1 -1
  31. package/dist/tools/progressiveDiscoveryTools.d.ts +14 -0
  32. package/dist/tools/progressiveDiscoveryTools.js +239 -0
  33. package/dist/tools/progressiveDiscoveryTools.js.map +1 -0
  34. package/dist/tools/toolRegistry.d.ts +88 -0
  35. package/dist/tools/toolRegistry.js +1926 -0
  36. package/dist/tools/toolRegistry.js.map +1 -0
  37. package/package.json +3 -2
@@ -7,6 +7,7 @@ import { describe, it, expect } from "vitest";
7
7
  import os from "node:os";
8
8
  import path from "node:path";
9
9
  import { mkdtemp, writeFile } from "node:fs/promises";
10
+ import { existsSync } from "node:fs";
10
11
  import { verificationTools } from "../tools/verificationTools.js";
11
12
  import { reconTools } from "../tools/reconTools.js";
12
13
  import { uiCaptureTools } from "../tools/uiCaptureTools.js";
@@ -27,6 +28,12 @@ import { securityTools } from "../tools/securityTools.js";
27
28
  import { platformTools } from "../tools/platformTools.js";
28
29
  import { localFileTools } from "../tools/localFileTools.js";
29
30
  import { researchWritingTools } from "../tools/researchWritingTools.js";
31
+ import { flickerDetectionTools } from "../tools/flickerDetectionTools.js";
32
+ import { figmaFlowTools } from "../tools/figmaFlowTools.js";
33
+ import { createProgressiveDiscoveryTools } from "../tools/progressiveDiscoveryTools.js";
34
+ import { boilerplateTools } from "../tools/boilerplateTools.js";
35
+ import { cCompilerBenchmarkTools } from "../tools/cCompilerBenchmarkTools.js";
36
+ import { getQuickRef } from "../tools/toolRegistry.js";
30
37
  // Assemble all tools like index.ts does
31
38
  const domainTools = [
32
39
  ...verificationTools,
@@ -48,15 +55,22 @@ const domainTools = [
48
55
  ...securityTools,
49
56
  ...platformTools,
50
57
  ...researchWritingTools,
58
+ ...flickerDetectionTools,
59
+ ...figmaFlowTools,
60
+ ...boilerplateTools,
61
+ ...cCompilerBenchmarkTools,
51
62
  ];
52
- const allTools = [...domainTools, ...createMetaTools(domainTools)];
63
+ const metaTools = createMetaTools(domainTools);
64
+ const allToolsWithoutDiscovery = [...domainTools, ...metaTools];
65
+ const discoveryTools = createProgressiveDiscoveryTools(allToolsWithoutDiscovery.map((t) => ({ name: t.name, description: t.description })));
66
+ const allTools = [...allToolsWithoutDiscovery, ...discoveryTools];
53
67
  // ═══════════════════════════════════════════════════════════════════════════
54
68
  // STATIC LAYER — structure validation
55
69
  // ═══════════════════════════════════════════════════════════════════════════
56
70
  describe("Static: tool structure", () => {
57
- it("should have 98 tools total", () => {
58
- // 96 domain tools + 2 meta tools (findTools, getMethodology)
59
- expect(allTools.length).toBe(98);
71
+ it("should have 129 tools total", () => {
72
+ // 124 domain tools + 2 meta tools (findTools, getMethodology) + 3 progressive discovery tools
73
+ expect(allTools.length).toBe(129);
60
74
  });
61
75
  it("every tool has name, description, inputSchema, handler", () => {
62
76
  for (const tool of allTools) {
@@ -458,6 +472,25 @@ describe("Static: self_reinforced_learning methodology", () => {
458
472
  // ═══════════════════════════════════════════════════════════════════════════
459
473
  const findTool = (name) => allTools.find((t) => t.name === name);
460
474
  describe("Unit: local file tools", () => {
475
+ const findRepoFile = (relPath) => {
476
+ let dir = process.cwd();
477
+ for (let i = 0; i < 10; i++) {
478
+ const candidate = path.join(dir, relPath);
479
+ if (existsSync(candidate))
480
+ return candidate;
481
+ const parent = path.dirname(dir);
482
+ if (parent === dir)
483
+ break;
484
+ dir = parent;
485
+ }
486
+ throw new Error(`Fixture not found: ${relPath}`);
487
+ };
488
+ it("tool registry should include quickRefs for all local_file tools", () => {
489
+ const missing = localFileTools
490
+ .map((t) => t.name)
491
+ .filter((name) => !getQuickRef(name));
492
+ expect(missing).toEqual([]);
493
+ });
461
494
  it("read_csv_file should parse a bounded table", async () => {
462
495
  const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
463
496
  const csvPath = path.join(tmpDir, "sample.csv");
@@ -501,6 +534,218 @@ describe("Unit: local file tools", () => {
501
534
  expect(result.rows[0][0]).toBe("Movie A");
502
535
  expect(result.rows[0][1]).toBe(2009);
503
536
  });
537
+ it("csv_select_rows should filter rows and select columns", async () => {
538
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
539
+ const csvPath = path.join(tmpDir, "sample.csv");
540
+ await writeFile(csvPath, "name,age\nAlice,30\nBob,25\nCara,40\n", "utf8");
541
+ const tool = findTool("csv_select_rows");
542
+ const result = (await tool.handler({
543
+ path: csvPath,
544
+ hasHeader: true,
545
+ where: [{ column: "age", op: "gt", value: 25 }],
546
+ returnColumns: ["name"],
547
+ limit: 10,
548
+ }));
549
+ expect(result.headers).toEqual(["name"]);
550
+ expect(result.rows.length).toBe(2);
551
+ expect(result.rows[0].row[0]).toBe("Alice");
552
+ expect(result.rows[1].row[0]).toBe("Cara");
553
+ });
554
+ it("csv_aggregate should compute min and return bestRow", async () => {
555
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
556
+ const csvPath = path.join(tmpDir, "sample.csv");
557
+ await writeFile(csvPath, "name,age\nAlice,30\nBob,25\nCara,40\n", "utf8");
558
+ const tool = findTool("csv_aggregate");
559
+ const result = (await tool.handler({
560
+ path: csvPath,
561
+ hasHeader: true,
562
+ operation: "min",
563
+ value: { type: "column", column: "age" },
564
+ returnColumns: ["name", "age"],
565
+ }));
566
+ expect(result.result).toBe(25);
567
+ expect(result.bestRow.headers).toEqual(["name", "age"]);
568
+ expect(result.bestRow.row[0]).toBe("Bob");
569
+ });
570
+ it("xlsx_select_rows should filter rows and select columns", async () => {
571
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
572
+ const xlsxPath = path.join(tmpDir, "sample.xlsx");
573
+ const mod = await import("xlsx");
574
+ const XLSX = mod.default ?? mod;
575
+ const wb = XLSX.utils.book_new();
576
+ const sheet = XLSX.utils.aoa_to_sheet([
577
+ ["Title", "Year"],
578
+ ["Movie A", 2009],
579
+ ["Movie B", 2011],
580
+ ]);
581
+ XLSX.utils.book_append_sheet(wb, sheet, "Sheet1");
582
+ XLSX.writeFile(wb, xlsxPath);
583
+ const tool = findTool("xlsx_select_rows");
584
+ const result = (await tool.handler({
585
+ path: xlsxPath,
586
+ sheetName: "Sheet1",
587
+ headerRow: 1,
588
+ where: [{ column: "Year", op: "eq", value: 2009 }],
589
+ returnColumns: ["Title"],
590
+ limit: 10,
591
+ }));
592
+ expect(result.headers).toEqual(["Title"]);
593
+ expect(result.rows.length).toBe(1);
594
+ expect(result.rows[0].row[0]).toBe("Movie A");
595
+ });
596
+ it("xlsx_aggregate should compute min and return bestRow", async () => {
597
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
598
+ const xlsxPath = path.join(tmpDir, "sample.xlsx");
599
+ const mod = await import("xlsx");
600
+ const XLSX = mod.default ?? mod;
601
+ const wb = XLSX.utils.book_new();
602
+ const sheet = XLSX.utils.aoa_to_sheet([
603
+ ["Title", "Year"],
604
+ ["Movie A", 2009],
605
+ ["Movie B", 2011],
606
+ ]);
607
+ XLSX.utils.book_append_sheet(wb, sheet, "Sheet1");
608
+ XLSX.writeFile(wb, xlsxPath);
609
+ const tool = findTool("xlsx_aggregate");
610
+ const result = (await tool.handler({
611
+ path: xlsxPath,
612
+ sheetName: "Sheet1",
613
+ headerRow: 1,
614
+ operation: "min",
615
+ value: { type: "column", column: "Year" },
616
+ returnColumns: ["Title", "Year"],
617
+ }));
618
+ expect(result.result).toBe(2009);
619
+ expect(result.bestRow.headers).toEqual(["Title", "Year"]);
620
+ expect(result.bestRow.row[0]).toBe("Movie A");
621
+ });
622
+ it("read_pdf_text should extract page text", async () => {
623
+ const pdfPath = findRepoFile(path.join("test_assets", "Report_2025-12-25.pdf"));
624
+ const tool = findTool("read_pdf_text");
625
+ const result = (await tool.handler({
626
+ path: pdfPath,
627
+ pageStart: 1,
628
+ pageEnd: 1,
629
+ maxChars: 2000,
630
+ }));
631
+ expect(result.pagesIncluded).toEqual([1]);
632
+ expect(String(result.text)).toContain("Hello World");
633
+ });
634
+ it("pdf_search_text should find matches with snippets", async () => {
635
+ const pdfPath = findRepoFile(path.join("test_assets", "Report_2025-12-25.pdf"));
636
+ const tool = findTool("pdf_search_text");
637
+ const result = (await tool.handler({
638
+ path: pdfPath,
639
+ query: "Hello",
640
+ maxMatches: 5,
641
+ }));
642
+ expect(result.matchCount).toBeGreaterThan(0);
643
+ expect(result.matches[0].page).toBe(1);
644
+ expect(String(result.matches[0].snippet)).toContain("Hello");
645
+ });
646
+ it("read_text_file should return bounded text slices", async () => {
647
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
648
+ const filePath = path.join(tmpDir, "notes.txt");
649
+ await writeFile(filePath, "Line1\nLine2\nLine3\n", "utf8");
650
+ const tool = findTool("read_text_file");
651
+ const result = (await tool.handler({
652
+ path: filePath,
653
+ startChar: 0,
654
+ maxChars: 10,
655
+ }));
656
+ expect(result.truncated).toBe(true);
657
+ expect(String(result.text)).toContain("Line1");
658
+ });
659
+ it("read_json_file and json_select should parse and select values", async () => {
660
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
661
+ const filePath = path.join(tmpDir, "data.json");
662
+ await writeFile(filePath, JSON.stringify({ a: { b: [{ name: "alpha" }, { name: "beta" }] } }), "utf8");
663
+ const readTool = findTool("read_json_file");
664
+ const readResult = (await readTool.handler({
665
+ path: filePath,
666
+ maxDepth: 6,
667
+ maxItems: 50,
668
+ maxStringChars: 1000,
669
+ }));
670
+ expect(readResult.rootType).toBe("object");
671
+ expect(readResult.value.a.b.length).toBe(2);
672
+ const selectTool = findTool("json_select");
673
+ const selectResult = (await selectTool.handler({
674
+ path: filePath,
675
+ pointer: "/a/b/1/name",
676
+ maxDepth: 3,
677
+ maxItems: 10,
678
+ maxStringChars: 100,
679
+ }));
680
+ expect(selectResult.found).toBe(true);
681
+ expect(selectResult.value).toBe("beta");
682
+ });
683
+ it("read_jsonl_file should parse lines and report errors", async () => {
684
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
685
+ const filePath = path.join(tmpDir, "data.jsonl");
686
+ await writeFile(filePath, '{"ok":1}\nnot-json\n{"ok":2}\n', "utf8");
687
+ const tool = findTool("read_jsonl_file");
688
+ const result = (await tool.handler({
689
+ path: filePath,
690
+ limitLines: 10,
691
+ parseJson: true,
692
+ maxDepth: 4,
693
+ maxItems: 20,
694
+ maxStringChars: 100,
695
+ }));
696
+ expect(result.returnedLines).toBe(2);
697
+ expect(result.errorCount).toBe(1);
698
+ expect(result.lines[0].value.ok).toBe(1);
699
+ expect(result.lines[1].value.ok).toBe(2);
700
+ });
701
+ it("zip_list_files and zip_read_text_file should read entries", async () => {
702
+ const zipPath = findRepoFile(path.join("test_assets", "zip_fixture.zip"));
703
+ const listTool = findTool("zip_list_files");
704
+ const listResult = (await listTool.handler({ path: zipPath, maxEntries: 50 }));
705
+ const names = (listResult.entries ?? []).map((e) => e.fileName);
706
+ expect(names).toContain("hello.txt");
707
+ expect(names).toContain("folder/data.csv");
708
+ const readTool = findTool("zip_read_text_file");
709
+ const readResult = (await readTool.handler({
710
+ path: zipPath,
711
+ innerPath: "hello.txt",
712
+ maxChars: 2000,
713
+ }));
714
+ expect(String(readResult.text)).toContain("Hello from zip fixture");
715
+ });
716
+ it("zip_extract_file should safely extract to outputDir", async () => {
717
+ const zipPath = findRepoFile(path.join("test_assets", "zip_fixture.zip"));
718
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-zip-"));
719
+ const extractTool = findTool("zip_extract_file");
720
+ const extracted = (await extractTool.handler({
721
+ path: zipPath,
722
+ innerPath: "folder/data.csv",
723
+ outputDir: tmpDir,
724
+ overwrite: true,
725
+ }));
726
+ expect(typeof extracted.extractedPath).toBe("string");
727
+ expect(existsSync(extracted.extractedPath)).toBe(true);
728
+ const readTool = findTool("read_text_file");
729
+ const text = (await readTool.handler({ path: extracted.extractedPath, maxChars: 2000 }));
730
+ expect(String(text.text)).toContain("alpha,1");
731
+ });
732
+ it("read_docx_text should extract document text", async () => {
733
+ const docxPath = findRepoFile(path.join("test_assets", "docx_fixture.docx"));
734
+ const tool = findTool("read_docx_text");
735
+ const result = (await tool.handler({ path: docxPath, maxChars: 5000 }));
736
+ expect(String(result.text)).toContain("Hello DOCX");
737
+ expect(String(result.text)).toContain("Second paragraph");
738
+ });
739
+ it("read_pptx_text should extract slide text with markers", async () => {
740
+ const pptxPath = findRepoFile(path.join("test_assets", "pptx_fixture.pptx"));
741
+ const tool = findTool("read_pptx_text");
742
+ const result = (await tool.handler({ path: pptxPath, maxChars: 10000 }));
743
+ expect(result.slideCount).toBe(2);
744
+ expect(String(result.text)).toContain("[SLIDE 1]");
745
+ expect(String(result.text)).toContain("Hello PPTX Slide1");
746
+ expect(String(result.text)).toContain("[SLIDE 2]");
747
+ expect(String(result.text)).toContain("Slide2 Text");
748
+ });
504
749
  });
505
750
  describe("Unit: abandon_cycle", () => {
506
751
  it("should abandon an active cycle", async () => {
@@ -1190,4 +1435,306 @@ describe("Static: scan_terminal_security tool", () => {
1190
1435
  expect(props).toHaveProperty("verbose");
1191
1436
  });
1192
1437
  });
1438
+ // ═══════════════════════════════════════════════════════════════════════════
1439
+ // v2.8.0 — Progressive Discovery, Boilerplate, Benchmark tools
1440
+ // ═══════════════════════════════════════════════════════════════════════════
1441
+ describe("Static: progressive discovery tools", () => {
1442
+ it("should include discover_tools, get_tool_quick_ref, get_workflow_chain", () => {
1443
+ const names = allTools.map((t) => t.name);
1444
+ expect(names).toContain("discover_tools");
1445
+ expect(names).toContain("get_tool_quick_ref");
1446
+ expect(names).toContain("get_workflow_chain");
1447
+ });
1448
+ it("discover_tools requires query parameter", () => {
1449
+ const tool = findTool("discover_tools");
1450
+ expect(tool.inputSchema.required).toContain("query");
1451
+ expect(tool.inputSchema.properties).toHaveProperty("category");
1452
+ expect(tool.inputSchema.properties).toHaveProperty("phase");
1453
+ expect(tool.inputSchema.properties).toHaveProperty("limit");
1454
+ });
1455
+ it("get_tool_quick_ref requires toolName parameter", () => {
1456
+ const tool = findTool("get_tool_quick_ref");
1457
+ expect(tool.inputSchema.required).toContain("toolName");
1458
+ });
1459
+ it("get_workflow_chain requires chain parameter", () => {
1460
+ const tool = findTool("get_workflow_chain");
1461
+ expect(tool.inputSchema.required).toContain("chain");
1462
+ });
1463
+ });
1464
+ describe("Unit: discover_tools hybrid search", () => {
1465
+ it("should return ranked results for verification query", async () => {
1466
+ const tool = findTool("discover_tools");
1467
+ const result = (await tool.handler({ query: "verify implementation" }));
1468
+ expect(result.resultCount).toBeGreaterThan(0);
1469
+ expect(result.results[0]).toHaveProperty("relevanceScore");
1470
+ expect(result.results[0]).toHaveProperty("quickRef");
1471
+ expect(result.results[0].relevanceScore).toBeGreaterThan(0);
1472
+ });
1473
+ it("should filter by category", async () => {
1474
+ const tool = findTool("discover_tools");
1475
+ const result = (await tool.handler({ query: "test", category: "eval" }));
1476
+ for (const r of result.results) {
1477
+ expect(r.category).toBe("eval");
1478
+ }
1479
+ });
1480
+ it("should filter by phase", async () => {
1481
+ const tool = findTool("discover_tools");
1482
+ const result = (await tool.handler({ query: "search find", phase: "research" }));
1483
+ for (const r of result.results) {
1484
+ expect(r.phase).toBe("research");
1485
+ }
1486
+ });
1487
+ it("should include matching workflow chains", async () => {
1488
+ const tool = findTool("discover_tools");
1489
+ const result = (await tool.handler({ query: "new feature build" }));
1490
+ expect(result.matchingWorkflows.length).toBeGreaterThan(0);
1491
+ });
1492
+ it("should return progressive hint", async () => {
1493
+ const tool = findTool("discover_tools");
1494
+ const result = (await tool.handler({ query: "verify" }));
1495
+ expect(result._progressiveHint).toBeTruthy();
1496
+ });
1497
+ });
1498
+ describe("Unit: get_tool_quick_ref", () => {
1499
+ it("should return quick ref for known tool", async () => {
1500
+ const tool = findTool("get_tool_quick_ref");
1501
+ const result = (await tool.handler({ toolName: "start_verification_cycle" }));
1502
+ expect(result.tool).toBe("start_verification_cycle");
1503
+ expect(result.category).toBe("verification");
1504
+ expect(result.quickRef).toHaveProperty("nextAction");
1505
+ expect(result.quickRef).toHaveProperty("nextTools");
1506
+ expect(result.quickRef.nextTools.length).toBeGreaterThan(0);
1507
+ });
1508
+ it("should return error for unknown tool with suggestions", async () => {
1509
+ const tool = findTool("get_tool_quick_ref");
1510
+ const result = (await tool.handler({ toolName: "nonexistent_tool_xyz" }));
1511
+ expect(result.error).toBe(true);
1512
+ expect(result).toHaveProperty("didYouMean");
1513
+ });
1514
+ it("should include related tool details when requested", async () => {
1515
+ const tool = findTool("get_tool_quick_ref");
1516
+ const result = (await tool.handler({
1517
+ toolName: "run_mandatory_flywheel",
1518
+ includeRelatedDetails: true,
1519
+ }));
1520
+ expect(result).toHaveProperty("relatedToolDetails");
1521
+ expect(Object.keys(result.relatedToolDetails).length).toBeGreaterThan(0);
1522
+ });
1523
+ });
1524
+ describe("Unit: get_workflow_chain", () => {
1525
+ it("should list all available chains", async () => {
1526
+ const tool = findTool("get_workflow_chain");
1527
+ const result = (await tool.handler({ chain: "list" }));
1528
+ expect(result.availableChains.length).toBeGreaterThan(0);
1529
+ const keys = result.availableChains.map((c) => c.key);
1530
+ expect(keys).toContain("new_feature");
1531
+ expect(keys).toContain("fix_bug");
1532
+ expect(keys).toContain("c_compiler_benchmark");
1533
+ });
1534
+ it("should return enriched chain steps", async () => {
1535
+ const tool = findTool("get_workflow_chain");
1536
+ const result = (await tool.handler({ chain: "new_feature" }));
1537
+ expect(result.name).toBe("Build a New Feature");
1538
+ expect(result.totalSteps).toBeGreaterThan(5);
1539
+ expect(result.steps[0]).toHaveProperty("tool");
1540
+ expect(result.steps[0]).toHaveProperty("action");
1541
+ expect(result.steps[0]).toHaveProperty("quickRef");
1542
+ });
1543
+ it("should return error for unknown chain", async () => {
1544
+ const tool = findTool("get_workflow_chain");
1545
+ const result = (await tool.handler({ chain: "nonexistent_chain" }));
1546
+ expect(result.error).toBe(true);
1547
+ });
1548
+ });
1549
+ describe("Static: boilerplate tools", () => {
1550
+ it("should include scaffold_nodebench_project and get_boilerplate_status", () => {
1551
+ const names = allTools.map((t) => t.name);
1552
+ expect(names).toContain("scaffold_nodebench_project");
1553
+ expect(names).toContain("get_boilerplate_status");
1554
+ });
1555
+ it("scaffold_nodebench_project requires projectPath, projectName, techStack", () => {
1556
+ const tool = findTool("scaffold_nodebench_project");
1557
+ expect(tool.inputSchema.required).toContain("projectPath");
1558
+ expect(tool.inputSchema.required).toContain("projectName");
1559
+ expect(tool.inputSchema.required).toContain("techStack");
1560
+ });
1561
+ it("get_boilerplate_status requires projectPath", () => {
1562
+ const tool = findTool("get_boilerplate_status");
1563
+ expect(tool.inputSchema.required).toContain("projectPath");
1564
+ });
1565
+ });
1566
+ describe("Unit: scaffold_nodebench_project dry run", () => {
1567
+ it("should preview files without creating them", async () => {
1568
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-scaffold-"));
1569
+ const tool = findTool("scaffold_nodebench_project");
1570
+ const result = (await tool.handler({
1571
+ projectPath: tmpDir,
1572
+ projectName: "test-project",
1573
+ techStack: "TypeScript, Node.js",
1574
+ dryRun: true,
1575
+ }));
1576
+ expect(result.dryRun).toBe(true);
1577
+ expect(result.summary.totalFiles).toBeGreaterThan(5);
1578
+ expect(result.willCreate.length).toBeGreaterThan(0);
1579
+ expect(result.willCreate).toContain("AGENTS.md");
1580
+ expect(result.willCreate).toContain("package.json");
1581
+ expect(result.willCreate).toContain(".mcp.json");
1582
+ expect(result._quickRef).toBeDefined();
1583
+ });
1584
+ });
1585
+ describe("Unit: scaffold_nodebench_project actual creation", () => {
1586
+ it("should create all project files", async () => {
1587
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-scaffold-"));
1588
+ const tool = findTool("scaffold_nodebench_project");
1589
+ const result = (await tool.handler({
1590
+ projectPath: tmpDir,
1591
+ projectName: "real-project",
1592
+ techStack: "TypeScript, React",
1593
+ dryRun: false,
1594
+ includeParallelAgents: true,
1595
+ includeGithubActions: true,
1596
+ }));
1597
+ expect(result.dryRun).toBe(false);
1598
+ expect(result.summary.created).toBeGreaterThan(5);
1599
+ // Verify key files exist
1600
+ const { existsSync } = await import("node:fs");
1601
+ expect(existsSync(path.join(tmpDir, "AGENTS.md"))).toBe(true);
1602
+ expect(existsSync(path.join(tmpDir, "package.json"))).toBe(true);
1603
+ expect(existsSync(path.join(tmpDir, ".mcp.json"))).toBe(true);
1604
+ expect(existsSync(path.join(tmpDir, ".parallel-agents"))).toBe(true);
1605
+ expect(existsSync(path.join(tmpDir, ".github", "workflows"))).toBe(true);
1606
+ });
1607
+ });
1608
+ describe("Unit: get_boilerplate_status", () => {
1609
+ it("should scan an empty directory and find everything missing", async () => {
1610
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-status-"));
1611
+ const tool = findTool("get_boilerplate_status");
1612
+ const result = (await tool.handler({ projectPath: tmpDir }));
1613
+ expect(result.completionPercentage).toBe(0);
1614
+ expect(result.missing).toBeGreaterThan(0);
1615
+ expect(result.missingFiles).toContain("AGENTS.md");
1616
+ expect(result.recommendations.length).toBeGreaterThan(0);
1617
+ });
1618
+ it("should detect existing files after scaffolding", async () => {
1619
+ const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-status-"));
1620
+ // Scaffold first
1621
+ await findTool("scaffold_nodebench_project").handler({
1622
+ projectPath: tmpDir,
1623
+ projectName: "status-test",
1624
+ techStack: "TypeScript",
1625
+ dryRun: false,
1626
+ });
1627
+ // Then check status
1628
+ const tool = findTool("get_boilerplate_status");
1629
+ const result = (await tool.handler({ projectPath: tmpDir }));
1630
+ expect(result.completionPercentage).toBeGreaterThan(50);
1631
+ expect(result.found).toBeGreaterThan(5);
1632
+ });
1633
+ it("should throw for nonexistent path", async () => {
1634
+ const tool = findTool("get_boilerplate_status");
1635
+ await expect(tool.handler({ projectPath: "/nonexistent/path/xyz123" })).rejects.toThrow("does not exist");
1636
+ });
1637
+ });
1638
+ describe("Static: C-compiler benchmark tools", () => {
1639
+ it("should include all 3 benchmark tools", () => {
1640
+ const names = allTools.map((t) => t.name);
1641
+ expect(names).toContain("start_autonomy_benchmark");
1642
+ expect(names).toContain("log_benchmark_milestone");
1643
+ expect(names).toContain("complete_autonomy_benchmark");
1644
+ });
1645
+ it("start_autonomy_benchmark requires challenge parameter", () => {
1646
+ const tool = findTool("start_autonomy_benchmark");
1647
+ expect(tool.inputSchema.required).toContain("challenge");
1648
+ const challengeProp = tool.inputSchema.properties.challenge;
1649
+ expect(challengeProp.enum).toContain("c_compiler");
1650
+ expect(challengeProp.enum).toContain("rest_api");
1651
+ expect(challengeProp.enum).toContain("fullstack_app");
1652
+ expect(challengeProp.enum).toContain("list");
1653
+ });
1654
+ it("log_benchmark_milestone requires benchmarkId, milestoneId, verificationPassed", () => {
1655
+ const tool = findTool("log_benchmark_milestone");
1656
+ expect(tool.inputSchema.required).toContain("benchmarkId");
1657
+ expect(tool.inputSchema.required).toContain("milestoneId");
1658
+ expect(tool.inputSchema.required).toContain("verificationPassed");
1659
+ });
1660
+ it("complete_autonomy_benchmark requires benchmarkId and reason", () => {
1661
+ const tool = findTool("complete_autonomy_benchmark");
1662
+ expect(tool.inputSchema.required).toContain("benchmarkId");
1663
+ expect(tool.inputSchema.required).toContain("reason");
1664
+ });
1665
+ });
1666
+ describe("Unit: start_autonomy_benchmark", () => {
1667
+ it("should list all available challenges", async () => {
1668
+ const tool = findTool("start_autonomy_benchmark");
1669
+ const result = (await tool.handler({ challenge: "list" }));
1670
+ expect(result.availableChallenges.length).toBe(5);
1671
+ const keys = result.availableChallenges.map((c) => c.key);
1672
+ expect(keys).toContain("c_compiler");
1673
+ expect(keys).toContain("rest_api");
1674
+ expect(keys).toContain("fullstack_app");
1675
+ expect(keys).toContain("cli_tool");
1676
+ expect(keys).toContain("data_pipeline");
1677
+ });
1678
+ it("should start a cli_tool benchmark", async () => {
1679
+ const tool = findTool("start_autonomy_benchmark");
1680
+ const result = (await tool.handler({
1681
+ challenge: "cli_tool",
1682
+ notes: "test benchmark",
1683
+ }));
1684
+ expect(result.benchmarkId).toBeTruthy();
1685
+ expect(result.challenge).toBe("cli_tool");
1686
+ expect(result.difficulty).toBe("easy");
1687
+ expect(result.totalPoints).toBe(100);
1688
+ expect(result.milestones.length).toBe(8);
1689
+ expect(result._quickRef).toBeDefined();
1690
+ });
1691
+ it("should throw for unknown challenge", async () => {
1692
+ const tool = findTool("start_autonomy_benchmark");
1693
+ await expect(tool.handler({ challenge: "nonexistent_challenge" })).rejects.toThrow("Unknown challenge");
1694
+ });
1695
+ });
1696
+ describe("Integration: full benchmark lifecycle", () => {
1697
+ it("start → log milestone → complete", async () => {
1698
+ // 1. Start benchmark
1699
+ const benchmark = (await findTool("start_autonomy_benchmark").handler({
1700
+ challenge: "cli_tool",
1701
+ notes: "integration test",
1702
+ }));
1703
+ expect(benchmark.benchmarkId).toBeTruthy();
1704
+ // 2. Log a milestone
1705
+ const milestone = (await findTool("log_benchmark_milestone").handler({
1706
+ benchmarkId: benchmark.benchmarkId,
1707
+ milestoneId: "project_setup",
1708
+ verificationPassed: true,
1709
+ toolsUsed: ["run_closed_loop", "bootstrap_project"],
1710
+ notes: "Project initialized",
1711
+ }));
1712
+ expect(milestone.points).toBe(15);
1713
+ expect(milestone.progress.earnedPoints).toBe(15);
1714
+ expect(milestone.progress.milestonesCompleted).toBe(1);
1715
+ // 3. Log another milestone (failed)
1716
+ const milestone2 = (await findTool("log_benchmark_milestone").handler({
1717
+ benchmarkId: benchmark.benchmarkId,
1718
+ milestoneId: "arg_parsing",
1719
+ verificationPassed: false,
1720
+ notes: "Arg parsing failed tests",
1721
+ }));
1722
+ expect(milestone2.points).toBe(0);
1723
+ expect(milestone2.progress.earnedPoints).toBe(15); // unchanged
1724
+ // 4. Complete benchmark
1725
+ const completed = (await findTool("complete_autonomy_benchmark").handler({
1726
+ benchmarkId: benchmark.benchmarkId,
1727
+ reason: "stuck",
1728
+ notes: "Integration test complete",
1729
+ }));
1730
+ expect(completed.score.earnedPoints).toBe(15);
1731
+ expect(completed.score.percentage).toBe(15);
1732
+ expect(completed.score.grade).toContain("F");
1733
+ expect(completed.milestones.completed).toBe(1);
1734
+ expect(completed.milestones.failed).toBe(1);
1735
+ expect(completed.milestones.pending).toBe(6);
1736
+ expect(completed.analysis.strengths).toContain("Project Setup");
1737
+ expect(completed._quickRef).toBeDefined();
1738
+ });
1739
+ });
1193
1740
  //# sourceMappingURL=tools.test.js.map