nodebench-mcp 2.6.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NODEBENCH_AGENTS.md +1 -1
- package/README.md +21 -12
- package/dist/__tests__/audit-registry.d.ts +1 -0
- package/dist/__tests__/audit-registry.js +60 -0
- package/dist/__tests__/audit-registry.js.map +1 -0
- package/dist/__tests__/gaiaCapabilityEval.test.js +59 -1
- package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js +388 -9
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
- package/dist/__tests__/tools.test.js +551 -4
- package/dist/__tests__/tools.test.js.map +1 -1
- package/dist/index.js +28 -6
- package/dist/index.js.map +1 -1
- package/dist/tools/boilerplateTools.d.ts +11 -0
- package/dist/tools/boilerplateTools.js +500 -0
- package/dist/tools/boilerplateTools.js.map +1 -0
- package/dist/tools/cCompilerBenchmarkTools.d.ts +14 -0
- package/dist/tools/cCompilerBenchmarkTools.js +453 -0
- package/dist/tools/cCompilerBenchmarkTools.js.map +1 -0
- package/dist/tools/figmaFlowTools.d.ts +13 -0
- package/dist/tools/figmaFlowTools.js +183 -0
- package/dist/tools/figmaFlowTools.js.map +1 -0
- package/dist/tools/flickerDetectionTools.d.ts +14 -0
- package/dist/tools/flickerDetectionTools.js +231 -0
- package/dist/tools/flickerDetectionTools.js.map +1 -0
- package/dist/tools/localFileTools.d.ts +1 -0
- package/dist/tools/localFileTools.js +1926 -27
- package/dist/tools/localFileTools.js.map +1 -1
- package/dist/tools/metaTools.js +17 -0
- package/dist/tools/metaTools.js.map +1 -1
- package/dist/tools/progressiveDiscoveryTools.d.ts +14 -0
- package/dist/tools/progressiveDiscoveryTools.js +239 -0
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -0
- package/dist/tools/toolRegistry.d.ts +88 -0
- package/dist/tools/toolRegistry.js +1926 -0
- package/dist/tools/toolRegistry.js.map +1 -0
- package/package.json +3 -2
|
@@ -7,6 +7,7 @@ import { describe, it, expect } from "vitest";
|
|
|
7
7
|
import os from "node:os";
|
|
8
8
|
import path from "node:path";
|
|
9
9
|
import { mkdtemp, writeFile } from "node:fs/promises";
|
|
10
|
+
import { existsSync } from "node:fs";
|
|
10
11
|
import { verificationTools } from "../tools/verificationTools.js";
|
|
11
12
|
import { reconTools } from "../tools/reconTools.js";
|
|
12
13
|
import { uiCaptureTools } from "../tools/uiCaptureTools.js";
|
|
@@ -27,6 +28,12 @@ import { securityTools } from "../tools/securityTools.js";
|
|
|
27
28
|
import { platformTools } from "../tools/platformTools.js";
|
|
28
29
|
import { localFileTools } from "../tools/localFileTools.js";
|
|
29
30
|
import { researchWritingTools } from "../tools/researchWritingTools.js";
|
|
31
|
+
import { flickerDetectionTools } from "../tools/flickerDetectionTools.js";
|
|
32
|
+
import { figmaFlowTools } from "../tools/figmaFlowTools.js";
|
|
33
|
+
import { createProgressiveDiscoveryTools } from "../tools/progressiveDiscoveryTools.js";
|
|
34
|
+
import { boilerplateTools } from "../tools/boilerplateTools.js";
|
|
35
|
+
import { cCompilerBenchmarkTools } from "../tools/cCompilerBenchmarkTools.js";
|
|
36
|
+
import { getQuickRef } from "../tools/toolRegistry.js";
|
|
30
37
|
// Assemble all tools like index.ts does
|
|
31
38
|
const domainTools = [
|
|
32
39
|
...verificationTools,
|
|
@@ -48,15 +55,22 @@ const domainTools = [
|
|
|
48
55
|
...securityTools,
|
|
49
56
|
...platformTools,
|
|
50
57
|
...researchWritingTools,
|
|
58
|
+
...flickerDetectionTools,
|
|
59
|
+
...figmaFlowTools,
|
|
60
|
+
...boilerplateTools,
|
|
61
|
+
...cCompilerBenchmarkTools,
|
|
51
62
|
];
|
|
52
|
-
const
|
|
63
|
+
const metaTools = createMetaTools(domainTools);
|
|
64
|
+
const allToolsWithoutDiscovery = [...domainTools, ...metaTools];
|
|
65
|
+
const discoveryTools = createProgressiveDiscoveryTools(allToolsWithoutDiscovery.map((t) => ({ name: t.name, description: t.description })));
|
|
66
|
+
const allTools = [...allToolsWithoutDiscovery, ...discoveryTools];
|
|
53
67
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
54
68
|
// STATIC LAYER — structure validation
|
|
55
69
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
56
70
|
describe("Static: tool structure", () => {
|
|
57
|
-
it("should have
|
|
58
|
-
//
|
|
59
|
-
expect(allTools.length).toBe(
|
|
71
|
+
it("should have 129 tools total", () => {
|
|
72
|
+
// 124 domain tools + 2 meta tools (findTools, getMethodology) + 3 progressive discovery tools
|
|
73
|
+
expect(allTools.length).toBe(129);
|
|
60
74
|
});
|
|
61
75
|
it("every tool has name, description, inputSchema, handler", () => {
|
|
62
76
|
for (const tool of allTools) {
|
|
@@ -458,6 +472,25 @@ describe("Static: self_reinforced_learning methodology", () => {
|
|
|
458
472
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
459
473
|
const findTool = (name) => allTools.find((t) => t.name === name);
|
|
460
474
|
describe("Unit: local file tools", () => {
|
|
475
|
+
const findRepoFile = (relPath) => {
|
|
476
|
+
let dir = process.cwd();
|
|
477
|
+
for (let i = 0; i < 10; i++) {
|
|
478
|
+
const candidate = path.join(dir, relPath);
|
|
479
|
+
if (existsSync(candidate))
|
|
480
|
+
return candidate;
|
|
481
|
+
const parent = path.dirname(dir);
|
|
482
|
+
if (parent === dir)
|
|
483
|
+
break;
|
|
484
|
+
dir = parent;
|
|
485
|
+
}
|
|
486
|
+
throw new Error(`Fixture not found: ${relPath}`);
|
|
487
|
+
};
|
|
488
|
+
it("tool registry should include quickRefs for all local_file tools", () => {
|
|
489
|
+
const missing = localFileTools
|
|
490
|
+
.map((t) => t.name)
|
|
491
|
+
.filter((name) => !getQuickRef(name));
|
|
492
|
+
expect(missing).toEqual([]);
|
|
493
|
+
});
|
|
461
494
|
it("read_csv_file should parse a bounded table", async () => {
|
|
462
495
|
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
463
496
|
const csvPath = path.join(tmpDir, "sample.csv");
|
|
@@ -501,6 +534,218 @@ describe("Unit: local file tools", () => {
|
|
|
501
534
|
expect(result.rows[0][0]).toBe("Movie A");
|
|
502
535
|
expect(result.rows[0][1]).toBe(2009);
|
|
503
536
|
});
|
|
537
|
+
it("csv_select_rows should filter rows and select columns", async () => {
|
|
538
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
539
|
+
const csvPath = path.join(tmpDir, "sample.csv");
|
|
540
|
+
await writeFile(csvPath, "name,age\nAlice,30\nBob,25\nCara,40\n", "utf8");
|
|
541
|
+
const tool = findTool("csv_select_rows");
|
|
542
|
+
const result = (await tool.handler({
|
|
543
|
+
path: csvPath,
|
|
544
|
+
hasHeader: true,
|
|
545
|
+
where: [{ column: "age", op: "gt", value: 25 }],
|
|
546
|
+
returnColumns: ["name"],
|
|
547
|
+
limit: 10,
|
|
548
|
+
}));
|
|
549
|
+
expect(result.headers).toEqual(["name"]);
|
|
550
|
+
expect(result.rows.length).toBe(2);
|
|
551
|
+
expect(result.rows[0].row[0]).toBe("Alice");
|
|
552
|
+
expect(result.rows[1].row[0]).toBe("Cara");
|
|
553
|
+
});
|
|
554
|
+
it("csv_aggregate should compute min and return bestRow", async () => {
|
|
555
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
556
|
+
const csvPath = path.join(tmpDir, "sample.csv");
|
|
557
|
+
await writeFile(csvPath, "name,age\nAlice,30\nBob,25\nCara,40\n", "utf8");
|
|
558
|
+
const tool = findTool("csv_aggregate");
|
|
559
|
+
const result = (await tool.handler({
|
|
560
|
+
path: csvPath,
|
|
561
|
+
hasHeader: true,
|
|
562
|
+
operation: "min",
|
|
563
|
+
value: { type: "column", column: "age" },
|
|
564
|
+
returnColumns: ["name", "age"],
|
|
565
|
+
}));
|
|
566
|
+
expect(result.result).toBe(25);
|
|
567
|
+
expect(result.bestRow.headers).toEqual(["name", "age"]);
|
|
568
|
+
expect(result.bestRow.row[0]).toBe("Bob");
|
|
569
|
+
});
|
|
570
|
+
it("xlsx_select_rows should filter rows and select columns", async () => {
|
|
571
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
572
|
+
const xlsxPath = path.join(tmpDir, "sample.xlsx");
|
|
573
|
+
const mod = await import("xlsx");
|
|
574
|
+
const XLSX = mod.default ?? mod;
|
|
575
|
+
const wb = XLSX.utils.book_new();
|
|
576
|
+
const sheet = XLSX.utils.aoa_to_sheet([
|
|
577
|
+
["Title", "Year"],
|
|
578
|
+
["Movie A", 2009],
|
|
579
|
+
["Movie B", 2011],
|
|
580
|
+
]);
|
|
581
|
+
XLSX.utils.book_append_sheet(wb, sheet, "Sheet1");
|
|
582
|
+
XLSX.writeFile(wb, xlsxPath);
|
|
583
|
+
const tool = findTool("xlsx_select_rows");
|
|
584
|
+
const result = (await tool.handler({
|
|
585
|
+
path: xlsxPath,
|
|
586
|
+
sheetName: "Sheet1",
|
|
587
|
+
headerRow: 1,
|
|
588
|
+
where: [{ column: "Year", op: "eq", value: 2009 }],
|
|
589
|
+
returnColumns: ["Title"],
|
|
590
|
+
limit: 10,
|
|
591
|
+
}));
|
|
592
|
+
expect(result.headers).toEqual(["Title"]);
|
|
593
|
+
expect(result.rows.length).toBe(1);
|
|
594
|
+
expect(result.rows[0].row[0]).toBe("Movie A");
|
|
595
|
+
});
|
|
596
|
+
it("xlsx_aggregate should compute min and return bestRow", async () => {
|
|
597
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
598
|
+
const xlsxPath = path.join(tmpDir, "sample.xlsx");
|
|
599
|
+
const mod = await import("xlsx");
|
|
600
|
+
const XLSX = mod.default ?? mod;
|
|
601
|
+
const wb = XLSX.utils.book_new();
|
|
602
|
+
const sheet = XLSX.utils.aoa_to_sheet([
|
|
603
|
+
["Title", "Year"],
|
|
604
|
+
["Movie A", 2009],
|
|
605
|
+
["Movie B", 2011],
|
|
606
|
+
]);
|
|
607
|
+
XLSX.utils.book_append_sheet(wb, sheet, "Sheet1");
|
|
608
|
+
XLSX.writeFile(wb, xlsxPath);
|
|
609
|
+
const tool = findTool("xlsx_aggregate");
|
|
610
|
+
const result = (await tool.handler({
|
|
611
|
+
path: xlsxPath,
|
|
612
|
+
sheetName: "Sheet1",
|
|
613
|
+
headerRow: 1,
|
|
614
|
+
operation: "min",
|
|
615
|
+
value: { type: "column", column: "Year" },
|
|
616
|
+
returnColumns: ["Title", "Year"],
|
|
617
|
+
}));
|
|
618
|
+
expect(result.result).toBe(2009);
|
|
619
|
+
expect(result.bestRow.headers).toEqual(["Title", "Year"]);
|
|
620
|
+
expect(result.bestRow.row[0]).toBe("Movie A");
|
|
621
|
+
});
|
|
622
|
+
it("read_pdf_text should extract page text", async () => {
|
|
623
|
+
const pdfPath = findRepoFile(path.join("test_assets", "Report_2025-12-25.pdf"));
|
|
624
|
+
const tool = findTool("read_pdf_text");
|
|
625
|
+
const result = (await tool.handler({
|
|
626
|
+
path: pdfPath,
|
|
627
|
+
pageStart: 1,
|
|
628
|
+
pageEnd: 1,
|
|
629
|
+
maxChars: 2000,
|
|
630
|
+
}));
|
|
631
|
+
expect(result.pagesIncluded).toEqual([1]);
|
|
632
|
+
expect(String(result.text)).toContain("Hello World");
|
|
633
|
+
});
|
|
634
|
+
it("pdf_search_text should find matches with snippets", async () => {
|
|
635
|
+
const pdfPath = findRepoFile(path.join("test_assets", "Report_2025-12-25.pdf"));
|
|
636
|
+
const tool = findTool("pdf_search_text");
|
|
637
|
+
const result = (await tool.handler({
|
|
638
|
+
path: pdfPath,
|
|
639
|
+
query: "Hello",
|
|
640
|
+
maxMatches: 5,
|
|
641
|
+
}));
|
|
642
|
+
expect(result.matchCount).toBeGreaterThan(0);
|
|
643
|
+
expect(result.matches[0].page).toBe(1);
|
|
644
|
+
expect(String(result.matches[0].snippet)).toContain("Hello");
|
|
645
|
+
});
|
|
646
|
+
it("read_text_file should return bounded text slices", async () => {
|
|
647
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
648
|
+
const filePath = path.join(tmpDir, "notes.txt");
|
|
649
|
+
await writeFile(filePath, "Line1\nLine2\nLine3\n", "utf8");
|
|
650
|
+
const tool = findTool("read_text_file");
|
|
651
|
+
const result = (await tool.handler({
|
|
652
|
+
path: filePath,
|
|
653
|
+
startChar: 0,
|
|
654
|
+
maxChars: 10,
|
|
655
|
+
}));
|
|
656
|
+
expect(result.truncated).toBe(true);
|
|
657
|
+
expect(String(result.text)).toContain("Line1");
|
|
658
|
+
});
|
|
659
|
+
it("read_json_file and json_select should parse and select values", async () => {
|
|
660
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
661
|
+
const filePath = path.join(tmpDir, "data.json");
|
|
662
|
+
await writeFile(filePath, JSON.stringify({ a: { b: [{ name: "alpha" }, { name: "beta" }] } }), "utf8");
|
|
663
|
+
const readTool = findTool("read_json_file");
|
|
664
|
+
const readResult = (await readTool.handler({
|
|
665
|
+
path: filePath,
|
|
666
|
+
maxDepth: 6,
|
|
667
|
+
maxItems: 50,
|
|
668
|
+
maxStringChars: 1000,
|
|
669
|
+
}));
|
|
670
|
+
expect(readResult.rootType).toBe("object");
|
|
671
|
+
expect(readResult.value.a.b.length).toBe(2);
|
|
672
|
+
const selectTool = findTool("json_select");
|
|
673
|
+
const selectResult = (await selectTool.handler({
|
|
674
|
+
path: filePath,
|
|
675
|
+
pointer: "/a/b/1/name",
|
|
676
|
+
maxDepth: 3,
|
|
677
|
+
maxItems: 10,
|
|
678
|
+
maxStringChars: 100,
|
|
679
|
+
}));
|
|
680
|
+
expect(selectResult.found).toBe(true);
|
|
681
|
+
expect(selectResult.value).toBe("beta");
|
|
682
|
+
});
|
|
683
|
+
it("read_jsonl_file should parse lines and report errors", async () => {
|
|
684
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
685
|
+
const filePath = path.join(tmpDir, "data.jsonl");
|
|
686
|
+
await writeFile(filePath, '{"ok":1}\nnot-json\n{"ok":2}\n', "utf8");
|
|
687
|
+
const tool = findTool("read_jsonl_file");
|
|
688
|
+
const result = (await tool.handler({
|
|
689
|
+
path: filePath,
|
|
690
|
+
limitLines: 10,
|
|
691
|
+
parseJson: true,
|
|
692
|
+
maxDepth: 4,
|
|
693
|
+
maxItems: 20,
|
|
694
|
+
maxStringChars: 100,
|
|
695
|
+
}));
|
|
696
|
+
expect(result.returnedLines).toBe(2);
|
|
697
|
+
expect(result.errorCount).toBe(1);
|
|
698
|
+
expect(result.lines[0].value.ok).toBe(1);
|
|
699
|
+
expect(result.lines[1].value.ok).toBe(2);
|
|
700
|
+
});
|
|
701
|
+
it("zip_list_files and zip_read_text_file should read entries", async () => {
|
|
702
|
+
const zipPath = findRepoFile(path.join("test_assets", "zip_fixture.zip"));
|
|
703
|
+
const listTool = findTool("zip_list_files");
|
|
704
|
+
const listResult = (await listTool.handler({ path: zipPath, maxEntries: 50 }));
|
|
705
|
+
const names = (listResult.entries ?? []).map((e) => e.fileName);
|
|
706
|
+
expect(names).toContain("hello.txt");
|
|
707
|
+
expect(names).toContain("folder/data.csv");
|
|
708
|
+
const readTool = findTool("zip_read_text_file");
|
|
709
|
+
const readResult = (await readTool.handler({
|
|
710
|
+
path: zipPath,
|
|
711
|
+
innerPath: "hello.txt",
|
|
712
|
+
maxChars: 2000,
|
|
713
|
+
}));
|
|
714
|
+
expect(String(readResult.text)).toContain("Hello from zip fixture");
|
|
715
|
+
});
|
|
716
|
+
it("zip_extract_file should safely extract to outputDir", async () => {
|
|
717
|
+
const zipPath = findRepoFile(path.join("test_assets", "zip_fixture.zip"));
|
|
718
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-zip-"));
|
|
719
|
+
const extractTool = findTool("zip_extract_file");
|
|
720
|
+
const extracted = (await extractTool.handler({
|
|
721
|
+
path: zipPath,
|
|
722
|
+
innerPath: "folder/data.csv",
|
|
723
|
+
outputDir: tmpDir,
|
|
724
|
+
overwrite: true,
|
|
725
|
+
}));
|
|
726
|
+
expect(typeof extracted.extractedPath).toBe("string");
|
|
727
|
+
expect(existsSync(extracted.extractedPath)).toBe(true);
|
|
728
|
+
const readTool = findTool("read_text_file");
|
|
729
|
+
const text = (await readTool.handler({ path: extracted.extractedPath, maxChars: 2000 }));
|
|
730
|
+
expect(String(text.text)).toContain("alpha,1");
|
|
731
|
+
});
|
|
732
|
+
it("read_docx_text should extract document text", async () => {
|
|
733
|
+
const docxPath = findRepoFile(path.join("test_assets", "docx_fixture.docx"));
|
|
734
|
+
const tool = findTool("read_docx_text");
|
|
735
|
+
const result = (await tool.handler({ path: docxPath, maxChars: 5000 }));
|
|
736
|
+
expect(String(result.text)).toContain("Hello DOCX");
|
|
737
|
+
expect(String(result.text)).toContain("Second paragraph");
|
|
738
|
+
});
|
|
739
|
+
it("read_pptx_text should extract slide text with markers", async () => {
|
|
740
|
+
const pptxPath = findRepoFile(path.join("test_assets", "pptx_fixture.pptx"));
|
|
741
|
+
const tool = findTool("read_pptx_text");
|
|
742
|
+
const result = (await tool.handler({ path: pptxPath, maxChars: 10000 }));
|
|
743
|
+
expect(result.slideCount).toBe(2);
|
|
744
|
+
expect(String(result.text)).toContain("[SLIDE 1]");
|
|
745
|
+
expect(String(result.text)).toContain("Hello PPTX Slide1");
|
|
746
|
+
expect(String(result.text)).toContain("[SLIDE 2]");
|
|
747
|
+
expect(String(result.text)).toContain("Slide2 Text");
|
|
748
|
+
});
|
|
504
749
|
});
|
|
505
750
|
describe("Unit: abandon_cycle", () => {
|
|
506
751
|
it("should abandon an active cycle", async () => {
|
|
@@ -1190,4 +1435,306 @@ describe("Static: scan_terminal_security tool", () => {
|
|
|
1190
1435
|
expect(props).toHaveProperty("verbose");
|
|
1191
1436
|
});
|
|
1192
1437
|
});
|
|
1438
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
1439
|
+
// v2.8.0 — Progressive Discovery, Boilerplate, Benchmark tools
|
|
1440
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
1441
|
+
describe("Static: progressive discovery tools", () => {
|
|
1442
|
+
it("should include discover_tools, get_tool_quick_ref, get_workflow_chain", () => {
|
|
1443
|
+
const names = allTools.map((t) => t.name);
|
|
1444
|
+
expect(names).toContain("discover_tools");
|
|
1445
|
+
expect(names).toContain("get_tool_quick_ref");
|
|
1446
|
+
expect(names).toContain("get_workflow_chain");
|
|
1447
|
+
});
|
|
1448
|
+
it("discover_tools requires query parameter", () => {
|
|
1449
|
+
const tool = findTool("discover_tools");
|
|
1450
|
+
expect(tool.inputSchema.required).toContain("query");
|
|
1451
|
+
expect(tool.inputSchema.properties).toHaveProperty("category");
|
|
1452
|
+
expect(tool.inputSchema.properties).toHaveProperty("phase");
|
|
1453
|
+
expect(tool.inputSchema.properties).toHaveProperty("limit");
|
|
1454
|
+
});
|
|
1455
|
+
it("get_tool_quick_ref requires toolName parameter", () => {
|
|
1456
|
+
const tool = findTool("get_tool_quick_ref");
|
|
1457
|
+
expect(tool.inputSchema.required).toContain("toolName");
|
|
1458
|
+
});
|
|
1459
|
+
it("get_workflow_chain requires chain parameter", () => {
|
|
1460
|
+
const tool = findTool("get_workflow_chain");
|
|
1461
|
+
expect(tool.inputSchema.required).toContain("chain");
|
|
1462
|
+
});
|
|
1463
|
+
});
|
|
1464
|
+
describe("Unit: discover_tools hybrid search", () => {
|
|
1465
|
+
it("should return ranked results for verification query", async () => {
|
|
1466
|
+
const tool = findTool("discover_tools");
|
|
1467
|
+
const result = (await tool.handler({ query: "verify implementation" }));
|
|
1468
|
+
expect(result.resultCount).toBeGreaterThan(0);
|
|
1469
|
+
expect(result.results[0]).toHaveProperty("relevanceScore");
|
|
1470
|
+
expect(result.results[0]).toHaveProperty("quickRef");
|
|
1471
|
+
expect(result.results[0].relevanceScore).toBeGreaterThan(0);
|
|
1472
|
+
});
|
|
1473
|
+
it("should filter by category", async () => {
|
|
1474
|
+
const tool = findTool("discover_tools");
|
|
1475
|
+
const result = (await tool.handler({ query: "test", category: "eval" }));
|
|
1476
|
+
for (const r of result.results) {
|
|
1477
|
+
expect(r.category).toBe("eval");
|
|
1478
|
+
}
|
|
1479
|
+
});
|
|
1480
|
+
it("should filter by phase", async () => {
|
|
1481
|
+
const tool = findTool("discover_tools");
|
|
1482
|
+
const result = (await tool.handler({ query: "search find", phase: "research" }));
|
|
1483
|
+
for (const r of result.results) {
|
|
1484
|
+
expect(r.phase).toBe("research");
|
|
1485
|
+
}
|
|
1486
|
+
});
|
|
1487
|
+
it("should include matching workflow chains", async () => {
|
|
1488
|
+
const tool = findTool("discover_tools");
|
|
1489
|
+
const result = (await tool.handler({ query: "new feature build" }));
|
|
1490
|
+
expect(result.matchingWorkflows.length).toBeGreaterThan(0);
|
|
1491
|
+
});
|
|
1492
|
+
it("should return progressive hint", async () => {
|
|
1493
|
+
const tool = findTool("discover_tools");
|
|
1494
|
+
const result = (await tool.handler({ query: "verify" }));
|
|
1495
|
+
expect(result._progressiveHint).toBeTruthy();
|
|
1496
|
+
});
|
|
1497
|
+
});
|
|
1498
|
+
describe("Unit: get_tool_quick_ref", () => {
|
|
1499
|
+
it("should return quick ref for known tool", async () => {
|
|
1500
|
+
const tool = findTool("get_tool_quick_ref");
|
|
1501
|
+
const result = (await tool.handler({ toolName: "start_verification_cycle" }));
|
|
1502
|
+
expect(result.tool).toBe("start_verification_cycle");
|
|
1503
|
+
expect(result.category).toBe("verification");
|
|
1504
|
+
expect(result.quickRef).toHaveProperty("nextAction");
|
|
1505
|
+
expect(result.quickRef).toHaveProperty("nextTools");
|
|
1506
|
+
expect(result.quickRef.nextTools.length).toBeGreaterThan(0);
|
|
1507
|
+
});
|
|
1508
|
+
it("should return error for unknown tool with suggestions", async () => {
|
|
1509
|
+
const tool = findTool("get_tool_quick_ref");
|
|
1510
|
+
const result = (await tool.handler({ toolName: "nonexistent_tool_xyz" }));
|
|
1511
|
+
expect(result.error).toBe(true);
|
|
1512
|
+
expect(result).toHaveProperty("didYouMean");
|
|
1513
|
+
});
|
|
1514
|
+
it("should include related tool details when requested", async () => {
|
|
1515
|
+
const tool = findTool("get_tool_quick_ref");
|
|
1516
|
+
const result = (await tool.handler({
|
|
1517
|
+
toolName: "run_mandatory_flywheel",
|
|
1518
|
+
includeRelatedDetails: true,
|
|
1519
|
+
}));
|
|
1520
|
+
expect(result).toHaveProperty("relatedToolDetails");
|
|
1521
|
+
expect(Object.keys(result.relatedToolDetails).length).toBeGreaterThan(0);
|
|
1522
|
+
});
|
|
1523
|
+
});
|
|
1524
|
+
describe("Unit: get_workflow_chain", () => {
|
|
1525
|
+
it("should list all available chains", async () => {
|
|
1526
|
+
const tool = findTool("get_workflow_chain");
|
|
1527
|
+
const result = (await tool.handler({ chain: "list" }));
|
|
1528
|
+
expect(result.availableChains.length).toBeGreaterThan(0);
|
|
1529
|
+
const keys = result.availableChains.map((c) => c.key);
|
|
1530
|
+
expect(keys).toContain("new_feature");
|
|
1531
|
+
expect(keys).toContain("fix_bug");
|
|
1532
|
+
expect(keys).toContain("c_compiler_benchmark");
|
|
1533
|
+
});
|
|
1534
|
+
it("should return enriched chain steps", async () => {
|
|
1535
|
+
const tool = findTool("get_workflow_chain");
|
|
1536
|
+
const result = (await tool.handler({ chain: "new_feature" }));
|
|
1537
|
+
expect(result.name).toBe("Build a New Feature");
|
|
1538
|
+
expect(result.totalSteps).toBeGreaterThan(5);
|
|
1539
|
+
expect(result.steps[0]).toHaveProperty("tool");
|
|
1540
|
+
expect(result.steps[0]).toHaveProperty("action");
|
|
1541
|
+
expect(result.steps[0]).toHaveProperty("quickRef");
|
|
1542
|
+
});
|
|
1543
|
+
it("should return error for unknown chain", async () => {
|
|
1544
|
+
const tool = findTool("get_workflow_chain");
|
|
1545
|
+
const result = (await tool.handler({ chain: "nonexistent_chain" }));
|
|
1546
|
+
expect(result.error).toBe(true);
|
|
1547
|
+
});
|
|
1548
|
+
});
|
|
1549
|
+
describe("Static: boilerplate tools", () => {
|
|
1550
|
+
it("should include scaffold_nodebench_project and get_boilerplate_status", () => {
|
|
1551
|
+
const names = allTools.map((t) => t.name);
|
|
1552
|
+
expect(names).toContain("scaffold_nodebench_project");
|
|
1553
|
+
expect(names).toContain("get_boilerplate_status");
|
|
1554
|
+
});
|
|
1555
|
+
it("scaffold_nodebench_project requires projectPath, projectName, techStack", () => {
|
|
1556
|
+
const tool = findTool("scaffold_nodebench_project");
|
|
1557
|
+
expect(tool.inputSchema.required).toContain("projectPath");
|
|
1558
|
+
expect(tool.inputSchema.required).toContain("projectName");
|
|
1559
|
+
expect(tool.inputSchema.required).toContain("techStack");
|
|
1560
|
+
});
|
|
1561
|
+
it("get_boilerplate_status requires projectPath", () => {
|
|
1562
|
+
const tool = findTool("get_boilerplate_status");
|
|
1563
|
+
expect(tool.inputSchema.required).toContain("projectPath");
|
|
1564
|
+
});
|
|
1565
|
+
});
|
|
1566
|
+
describe("Unit: scaffold_nodebench_project dry run", () => {
|
|
1567
|
+
it("should preview files without creating them", async () => {
|
|
1568
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-scaffold-"));
|
|
1569
|
+
const tool = findTool("scaffold_nodebench_project");
|
|
1570
|
+
const result = (await tool.handler({
|
|
1571
|
+
projectPath: tmpDir,
|
|
1572
|
+
projectName: "test-project",
|
|
1573
|
+
techStack: "TypeScript, Node.js",
|
|
1574
|
+
dryRun: true,
|
|
1575
|
+
}));
|
|
1576
|
+
expect(result.dryRun).toBe(true);
|
|
1577
|
+
expect(result.summary.totalFiles).toBeGreaterThan(5);
|
|
1578
|
+
expect(result.willCreate.length).toBeGreaterThan(0);
|
|
1579
|
+
expect(result.willCreate).toContain("AGENTS.md");
|
|
1580
|
+
expect(result.willCreate).toContain("package.json");
|
|
1581
|
+
expect(result.willCreate).toContain(".mcp.json");
|
|
1582
|
+
expect(result._quickRef).toBeDefined();
|
|
1583
|
+
});
|
|
1584
|
+
});
|
|
1585
|
+
describe("Unit: scaffold_nodebench_project actual creation", () => {
|
|
1586
|
+
it("should create all project files", async () => {
|
|
1587
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-scaffold-"));
|
|
1588
|
+
const tool = findTool("scaffold_nodebench_project");
|
|
1589
|
+
const result = (await tool.handler({
|
|
1590
|
+
projectPath: tmpDir,
|
|
1591
|
+
projectName: "real-project",
|
|
1592
|
+
techStack: "TypeScript, React",
|
|
1593
|
+
dryRun: false,
|
|
1594
|
+
includeParallelAgents: true,
|
|
1595
|
+
includeGithubActions: true,
|
|
1596
|
+
}));
|
|
1597
|
+
expect(result.dryRun).toBe(false);
|
|
1598
|
+
expect(result.summary.created).toBeGreaterThan(5);
|
|
1599
|
+
// Verify key files exist
|
|
1600
|
+
const { existsSync } = await import("node:fs");
|
|
1601
|
+
expect(existsSync(path.join(tmpDir, "AGENTS.md"))).toBe(true);
|
|
1602
|
+
expect(existsSync(path.join(tmpDir, "package.json"))).toBe(true);
|
|
1603
|
+
expect(existsSync(path.join(tmpDir, ".mcp.json"))).toBe(true);
|
|
1604
|
+
expect(existsSync(path.join(tmpDir, ".parallel-agents"))).toBe(true);
|
|
1605
|
+
expect(existsSync(path.join(tmpDir, ".github", "workflows"))).toBe(true);
|
|
1606
|
+
});
|
|
1607
|
+
});
|
|
1608
|
+
describe("Unit: get_boilerplate_status", () => {
|
|
1609
|
+
it("should scan an empty directory and find everything missing", async () => {
|
|
1610
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-status-"));
|
|
1611
|
+
const tool = findTool("get_boilerplate_status");
|
|
1612
|
+
const result = (await tool.handler({ projectPath: tmpDir }));
|
|
1613
|
+
expect(result.completionPercentage).toBe(0);
|
|
1614
|
+
expect(result.missing).toBeGreaterThan(0);
|
|
1615
|
+
expect(result.missingFiles).toContain("AGENTS.md");
|
|
1616
|
+
expect(result.recommendations.length).toBeGreaterThan(0);
|
|
1617
|
+
});
|
|
1618
|
+
it("should detect existing files after scaffolding", async () => {
|
|
1619
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-status-"));
|
|
1620
|
+
// Scaffold first
|
|
1621
|
+
await findTool("scaffold_nodebench_project").handler({
|
|
1622
|
+
projectPath: tmpDir,
|
|
1623
|
+
projectName: "status-test",
|
|
1624
|
+
techStack: "TypeScript",
|
|
1625
|
+
dryRun: false,
|
|
1626
|
+
});
|
|
1627
|
+
// Then check status
|
|
1628
|
+
const tool = findTool("get_boilerplate_status");
|
|
1629
|
+
const result = (await tool.handler({ projectPath: tmpDir }));
|
|
1630
|
+
expect(result.completionPercentage).toBeGreaterThan(50);
|
|
1631
|
+
expect(result.found).toBeGreaterThan(5);
|
|
1632
|
+
});
|
|
1633
|
+
it("should throw for nonexistent path", async () => {
|
|
1634
|
+
const tool = findTool("get_boilerplate_status");
|
|
1635
|
+
await expect(tool.handler({ projectPath: "/nonexistent/path/xyz123" })).rejects.toThrow("does not exist");
|
|
1636
|
+
});
|
|
1637
|
+
});
|
|
1638
|
+
describe("Static: C-compiler benchmark tools", () => {
|
|
1639
|
+
it("should include all 3 benchmark tools", () => {
|
|
1640
|
+
const names = allTools.map((t) => t.name);
|
|
1641
|
+
expect(names).toContain("start_autonomy_benchmark");
|
|
1642
|
+
expect(names).toContain("log_benchmark_milestone");
|
|
1643
|
+
expect(names).toContain("complete_autonomy_benchmark");
|
|
1644
|
+
});
|
|
1645
|
+
it("start_autonomy_benchmark requires challenge parameter", () => {
|
|
1646
|
+
const tool = findTool("start_autonomy_benchmark");
|
|
1647
|
+
expect(tool.inputSchema.required).toContain("challenge");
|
|
1648
|
+
const challengeProp = tool.inputSchema.properties.challenge;
|
|
1649
|
+
expect(challengeProp.enum).toContain("c_compiler");
|
|
1650
|
+
expect(challengeProp.enum).toContain("rest_api");
|
|
1651
|
+
expect(challengeProp.enum).toContain("fullstack_app");
|
|
1652
|
+
expect(challengeProp.enum).toContain("list");
|
|
1653
|
+
});
|
|
1654
|
+
it("log_benchmark_milestone requires benchmarkId, milestoneId, verificationPassed", () => {
|
|
1655
|
+
const tool = findTool("log_benchmark_milestone");
|
|
1656
|
+
expect(tool.inputSchema.required).toContain("benchmarkId");
|
|
1657
|
+
expect(tool.inputSchema.required).toContain("milestoneId");
|
|
1658
|
+
expect(tool.inputSchema.required).toContain("verificationPassed");
|
|
1659
|
+
});
|
|
1660
|
+
it("complete_autonomy_benchmark requires benchmarkId and reason", () => {
|
|
1661
|
+
const tool = findTool("complete_autonomy_benchmark");
|
|
1662
|
+
expect(tool.inputSchema.required).toContain("benchmarkId");
|
|
1663
|
+
expect(tool.inputSchema.required).toContain("reason");
|
|
1664
|
+
});
|
|
1665
|
+
});
|
|
1666
|
+
describe("Unit: start_autonomy_benchmark", () => {
|
|
1667
|
+
it("should list all available challenges", async () => {
|
|
1668
|
+
const tool = findTool("start_autonomy_benchmark");
|
|
1669
|
+
const result = (await tool.handler({ challenge: "list" }));
|
|
1670
|
+
expect(result.availableChallenges.length).toBe(5);
|
|
1671
|
+
const keys = result.availableChallenges.map((c) => c.key);
|
|
1672
|
+
expect(keys).toContain("c_compiler");
|
|
1673
|
+
expect(keys).toContain("rest_api");
|
|
1674
|
+
expect(keys).toContain("fullstack_app");
|
|
1675
|
+
expect(keys).toContain("cli_tool");
|
|
1676
|
+
expect(keys).toContain("data_pipeline");
|
|
1677
|
+
});
|
|
1678
|
+
it("should start a cli_tool benchmark", async () => {
|
|
1679
|
+
const tool = findTool("start_autonomy_benchmark");
|
|
1680
|
+
const result = (await tool.handler({
|
|
1681
|
+
challenge: "cli_tool",
|
|
1682
|
+
notes: "test benchmark",
|
|
1683
|
+
}));
|
|
1684
|
+
expect(result.benchmarkId).toBeTruthy();
|
|
1685
|
+
expect(result.challenge).toBe("cli_tool");
|
|
1686
|
+
expect(result.difficulty).toBe("easy");
|
|
1687
|
+
expect(result.totalPoints).toBe(100);
|
|
1688
|
+
expect(result.milestones.length).toBe(8);
|
|
1689
|
+
expect(result._quickRef).toBeDefined();
|
|
1690
|
+
});
|
|
1691
|
+
it("should throw for unknown challenge", async () => {
|
|
1692
|
+
const tool = findTool("start_autonomy_benchmark");
|
|
1693
|
+
await expect(tool.handler({ challenge: "nonexistent_challenge" })).rejects.toThrow("Unknown challenge");
|
|
1694
|
+
});
|
|
1695
|
+
});
|
|
1696
|
+
describe("Integration: full benchmark lifecycle", () => {
|
|
1697
|
+
it("start → log milestone → complete", async () => {
|
|
1698
|
+
// 1. Start benchmark
|
|
1699
|
+
const benchmark = (await findTool("start_autonomy_benchmark").handler({
|
|
1700
|
+
challenge: "cli_tool",
|
|
1701
|
+
notes: "integration test",
|
|
1702
|
+
}));
|
|
1703
|
+
expect(benchmark.benchmarkId).toBeTruthy();
|
|
1704
|
+
// 2. Log a milestone
|
|
1705
|
+
const milestone = (await findTool("log_benchmark_milestone").handler({
|
|
1706
|
+
benchmarkId: benchmark.benchmarkId,
|
|
1707
|
+
milestoneId: "project_setup",
|
|
1708
|
+
verificationPassed: true,
|
|
1709
|
+
toolsUsed: ["run_closed_loop", "bootstrap_project"],
|
|
1710
|
+
notes: "Project initialized",
|
|
1711
|
+
}));
|
|
1712
|
+
expect(milestone.points).toBe(15);
|
|
1713
|
+
expect(milestone.progress.earnedPoints).toBe(15);
|
|
1714
|
+
expect(milestone.progress.milestonesCompleted).toBe(1);
|
|
1715
|
+
// 3. Log another milestone (failed)
|
|
1716
|
+
const milestone2 = (await findTool("log_benchmark_milestone").handler({
|
|
1717
|
+
benchmarkId: benchmark.benchmarkId,
|
|
1718
|
+
milestoneId: "arg_parsing",
|
|
1719
|
+
verificationPassed: false,
|
|
1720
|
+
notes: "Arg parsing failed tests",
|
|
1721
|
+
}));
|
|
1722
|
+
expect(milestone2.points).toBe(0);
|
|
1723
|
+
expect(milestone2.progress.earnedPoints).toBe(15); // unchanged
|
|
1724
|
+
// 4. Complete benchmark
|
|
1725
|
+
const completed = (await findTool("complete_autonomy_benchmark").handler({
|
|
1726
|
+
benchmarkId: benchmark.benchmarkId,
|
|
1727
|
+
reason: "stuck",
|
|
1728
|
+
notes: "Integration test complete",
|
|
1729
|
+
}));
|
|
1730
|
+
expect(completed.score.earnedPoints).toBe(15);
|
|
1731
|
+
expect(completed.score.percentage).toBe(15);
|
|
1732
|
+
expect(completed.score.grade).toContain("F");
|
|
1733
|
+
expect(completed.milestones.completed).toBe(1);
|
|
1734
|
+
expect(completed.milestones.failed).toBe(1);
|
|
1735
|
+
expect(completed.milestones.pending).toBe(6);
|
|
1736
|
+
expect(completed.analysis.strengths).toContain("Project Setup");
|
|
1737
|
+
expect(completed._quickRef).toBeDefined();
|
|
1738
|
+
});
|
|
1739
|
+
});
|
|
1193
1740
|
//# sourceMappingURL=tools.test.js.map
|