nodebench-mcp 2.4.0 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NODEBENCH_AGENTS.md +8 -4
- package/README.md +56 -19
- package/dist/__tests__/evalHarness.test.js +1 -1
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js +543 -57
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
- package/dist/__tests__/tools.test.js +664 -6
- package/dist/__tests__/tools.test.js.map +1 -1
- package/dist/index.js +30 -6
- package/dist/index.js.map +1 -1
- package/dist/tools/boilerplateTools.d.ts +11 -0
- package/dist/tools/boilerplateTools.js +500 -0
- package/dist/tools/boilerplateTools.js.map +1 -0
- package/dist/tools/cCompilerBenchmarkTools.d.ts +14 -0
- package/dist/tools/cCompilerBenchmarkTools.js +453 -0
- package/dist/tools/cCompilerBenchmarkTools.js.map +1 -0
- package/dist/tools/figmaFlowTools.d.ts +13 -0
- package/dist/tools/figmaFlowTools.js +183 -0
- package/dist/tools/figmaFlowTools.js.map +1 -0
- package/dist/tools/flickerDetectionTools.d.ts +14 -0
- package/dist/tools/flickerDetectionTools.js +231 -0
- package/dist/tools/flickerDetectionTools.js.map +1 -0
- package/dist/tools/localFileTools.d.ts +1 -0
- package/dist/tools/localFileTools.js +1926 -27
- package/dist/tools/localFileTools.js.map +1 -1
- package/dist/tools/metaTools.js +96 -2
- package/dist/tools/metaTools.js.map +1 -1
- package/dist/tools/progressiveDiscoveryTools.d.ts +14 -0
- package/dist/tools/progressiveDiscoveryTools.js +222 -0
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -0
- package/dist/tools/researchWritingTools.d.ts +12 -0
- package/dist/tools/researchWritingTools.js +573 -0
- package/dist/tools/researchWritingTools.js.map +1 -0
- package/dist/tools/securityTools.js +128 -0
- package/dist/tools/securityTools.js.map +1 -1
- package/dist/tools/toolRegistry.d.ts +70 -0
- package/dist/tools/toolRegistry.js +1437 -0
- package/dist/tools/toolRegistry.js.map +1 -0
- package/package.json +6 -3
|
@@ -7,6 +7,7 @@ import { describe, it, expect } from "vitest";
|
|
|
7
7
|
import os from "node:os";
|
|
8
8
|
import path from "node:path";
|
|
9
9
|
import { mkdtemp, writeFile } from "node:fs/promises";
|
|
10
|
+
import { existsSync } from "node:fs";
|
|
10
11
|
import { verificationTools } from "../tools/verificationTools.js";
|
|
11
12
|
import { reconTools } from "../tools/reconTools.js";
|
|
12
13
|
import { uiCaptureTools } from "../tools/uiCaptureTools.js";
|
|
@@ -26,6 +27,12 @@ import { llmTools } from "../tools/llmTools.js";
|
|
|
26
27
|
import { securityTools } from "../tools/securityTools.js";
|
|
27
28
|
import { platformTools } from "../tools/platformTools.js";
|
|
28
29
|
import { localFileTools } from "../tools/localFileTools.js";
|
|
30
|
+
import { researchWritingTools } from "../tools/researchWritingTools.js";
|
|
31
|
+
import { flickerDetectionTools } from "../tools/flickerDetectionTools.js";
|
|
32
|
+
import { figmaFlowTools } from "../tools/figmaFlowTools.js";
|
|
33
|
+
import { createProgressiveDiscoveryTools } from "../tools/progressiveDiscoveryTools.js";
|
|
34
|
+
import { boilerplateTools } from "../tools/boilerplateTools.js";
|
|
35
|
+
import { cCompilerBenchmarkTools } from "../tools/cCompilerBenchmarkTools.js";
|
|
29
36
|
// Assemble all tools like index.ts does
|
|
30
37
|
const domainTools = [
|
|
31
38
|
...verificationTools,
|
|
@@ -46,15 +53,23 @@ const domainTools = [
|
|
|
46
53
|
...llmTools,
|
|
47
54
|
...securityTools,
|
|
48
55
|
...platformTools,
|
|
56
|
+
...researchWritingTools,
|
|
57
|
+
...flickerDetectionTools,
|
|
58
|
+
...figmaFlowTools,
|
|
59
|
+
...boilerplateTools,
|
|
60
|
+
...cCompilerBenchmarkTools,
|
|
49
61
|
];
|
|
50
|
-
const
|
|
62
|
+
const metaTools = createMetaTools(domainTools);
|
|
63
|
+
const allToolsWithoutDiscovery = [...domainTools, ...metaTools];
|
|
64
|
+
const discoveryTools = createProgressiveDiscoveryTools(allToolsWithoutDiscovery.map((t) => ({ name: t.name, description: t.description })));
|
|
65
|
+
const allTools = [...allToolsWithoutDiscovery, ...discoveryTools];
|
|
51
66
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
52
67
|
// STATIC LAYER — structure validation
|
|
53
68
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
54
69
|
describe("Static: tool structure", () => {
|
|
55
|
-
it("should have
|
|
56
|
-
//
|
|
57
|
-
expect(allTools.length).toBe(
|
|
70
|
+
it("should have 129 tools total", () => {
|
|
71
|
+
// 124 domain tools + 2 meta tools (findTools, getMethodology) + 3 progressive discovery tools
|
|
72
|
+
expect(allTools.length).toBe(129);
|
|
58
73
|
});
|
|
59
74
|
it("every tool has name, description, inputSchema, handler", () => {
|
|
60
75
|
for (const tool of allTools) {
|
|
@@ -266,7 +281,7 @@ describe("Static: new methodology topics", () => {
|
|
|
266
281
|
expect(topics).toContain("agent_bootstrap");
|
|
267
282
|
expect(topics).toContain("autonomous_maintenance");
|
|
268
283
|
expect(topics).toContain("parallel_agent_teams");
|
|
269
|
-
expect(topics.length).toBe(
|
|
284
|
+
expect(topics.length).toBe(19); // All topics listed in overview
|
|
270
285
|
});
|
|
271
286
|
});
|
|
272
287
|
describe("Unit: setup_local_env", () => {
|
|
@@ -456,6 +471,19 @@ describe("Static: self_reinforced_learning methodology", () => {
|
|
|
456
471
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
457
472
|
const findTool = (name) => allTools.find((t) => t.name === name);
|
|
458
473
|
describe("Unit: local file tools", () => {
|
|
474
|
+
const findRepoFile = (relPath) => {
|
|
475
|
+
let dir = process.cwd();
|
|
476
|
+
for (let i = 0; i < 10; i++) {
|
|
477
|
+
const candidate = path.join(dir, relPath);
|
|
478
|
+
if (existsSync(candidate))
|
|
479
|
+
return candidate;
|
|
480
|
+
const parent = path.dirname(dir);
|
|
481
|
+
if (parent === dir)
|
|
482
|
+
break;
|
|
483
|
+
dir = parent;
|
|
484
|
+
}
|
|
485
|
+
throw new Error(`Fixture not found: ${relPath}`);
|
|
486
|
+
};
|
|
459
487
|
it("read_csv_file should parse a bounded table", async () => {
|
|
460
488
|
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
461
489
|
const csvPath = path.join(tmpDir, "sample.csv");
|
|
@@ -499,6 +527,218 @@ describe("Unit: local file tools", () => {
|
|
|
499
527
|
expect(result.rows[0][0]).toBe("Movie A");
|
|
500
528
|
expect(result.rows[0][1]).toBe(2009);
|
|
501
529
|
});
|
|
530
|
+
it("csv_select_rows should filter rows and select columns", async () => {
|
|
531
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
532
|
+
const csvPath = path.join(tmpDir, "sample.csv");
|
|
533
|
+
await writeFile(csvPath, "name,age\nAlice,30\nBob,25\nCara,40\n", "utf8");
|
|
534
|
+
const tool = findTool("csv_select_rows");
|
|
535
|
+
const result = (await tool.handler({
|
|
536
|
+
path: csvPath,
|
|
537
|
+
hasHeader: true,
|
|
538
|
+
where: [{ column: "age", op: "gt", value: 25 }],
|
|
539
|
+
returnColumns: ["name"],
|
|
540
|
+
limit: 10,
|
|
541
|
+
}));
|
|
542
|
+
expect(result.headers).toEqual(["name"]);
|
|
543
|
+
expect(result.rows.length).toBe(2);
|
|
544
|
+
expect(result.rows[0].row[0]).toBe("Alice");
|
|
545
|
+
expect(result.rows[1].row[0]).toBe("Cara");
|
|
546
|
+
});
|
|
547
|
+
it("csv_aggregate should compute min and return bestRow", async () => {
|
|
548
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
549
|
+
const csvPath = path.join(tmpDir, "sample.csv");
|
|
550
|
+
await writeFile(csvPath, "name,age\nAlice,30\nBob,25\nCara,40\n", "utf8");
|
|
551
|
+
const tool = findTool("csv_aggregate");
|
|
552
|
+
const result = (await tool.handler({
|
|
553
|
+
path: csvPath,
|
|
554
|
+
hasHeader: true,
|
|
555
|
+
operation: "min",
|
|
556
|
+
value: { type: "column", column: "age" },
|
|
557
|
+
returnColumns: ["name", "age"],
|
|
558
|
+
}));
|
|
559
|
+
expect(result.result).toBe(25);
|
|
560
|
+
expect(result.bestRow.headers).toEqual(["name", "age"]);
|
|
561
|
+
expect(result.bestRow.row[0]).toBe("Bob");
|
|
562
|
+
});
|
|
563
|
+
it("xlsx_select_rows should filter rows and select columns", async () => {
|
|
564
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
565
|
+
const xlsxPath = path.join(tmpDir, "sample.xlsx");
|
|
566
|
+
const mod = await import("xlsx");
|
|
567
|
+
const XLSX = mod.default ?? mod;
|
|
568
|
+
const wb = XLSX.utils.book_new();
|
|
569
|
+
const sheet = XLSX.utils.aoa_to_sheet([
|
|
570
|
+
["Title", "Year"],
|
|
571
|
+
["Movie A", 2009],
|
|
572
|
+
["Movie B", 2011],
|
|
573
|
+
]);
|
|
574
|
+
XLSX.utils.book_append_sheet(wb, sheet, "Sheet1");
|
|
575
|
+
XLSX.writeFile(wb, xlsxPath);
|
|
576
|
+
const tool = findTool("xlsx_select_rows");
|
|
577
|
+
const result = (await tool.handler({
|
|
578
|
+
path: xlsxPath,
|
|
579
|
+
sheetName: "Sheet1",
|
|
580
|
+
headerRow: 1,
|
|
581
|
+
where: [{ column: "Year", op: "eq", value: 2009 }],
|
|
582
|
+
returnColumns: ["Title"],
|
|
583
|
+
limit: 10,
|
|
584
|
+
}));
|
|
585
|
+
expect(result.headers).toEqual(["Title"]);
|
|
586
|
+
expect(result.rows.length).toBe(1);
|
|
587
|
+
expect(result.rows[0].row[0]).toBe("Movie A");
|
|
588
|
+
});
|
|
589
|
+
it("xlsx_aggregate should compute min and return bestRow", async () => {
|
|
590
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
591
|
+
const xlsxPath = path.join(tmpDir, "sample.xlsx");
|
|
592
|
+
const mod = await import("xlsx");
|
|
593
|
+
const XLSX = mod.default ?? mod;
|
|
594
|
+
const wb = XLSX.utils.book_new();
|
|
595
|
+
const sheet = XLSX.utils.aoa_to_sheet([
|
|
596
|
+
["Title", "Year"],
|
|
597
|
+
["Movie A", 2009],
|
|
598
|
+
["Movie B", 2011],
|
|
599
|
+
]);
|
|
600
|
+
XLSX.utils.book_append_sheet(wb, sheet, "Sheet1");
|
|
601
|
+
XLSX.writeFile(wb, xlsxPath);
|
|
602
|
+
const tool = findTool("xlsx_aggregate");
|
|
603
|
+
const result = (await tool.handler({
|
|
604
|
+
path: xlsxPath,
|
|
605
|
+
sheetName: "Sheet1",
|
|
606
|
+
headerRow: 1,
|
|
607
|
+
operation: "min",
|
|
608
|
+
value: { type: "column", column: "Year" },
|
|
609
|
+
returnColumns: ["Title", "Year"],
|
|
610
|
+
}));
|
|
611
|
+
expect(result.result).toBe(2009);
|
|
612
|
+
expect(result.bestRow.headers).toEqual(["Title", "Year"]);
|
|
613
|
+
expect(result.bestRow.row[0]).toBe("Movie A");
|
|
614
|
+
});
|
|
615
|
+
it("read_pdf_text should extract page text", async () => {
|
|
616
|
+
const pdfPath = findRepoFile(path.join("test_assets", "Report_2025-12-25.pdf"));
|
|
617
|
+
const tool = findTool("read_pdf_text");
|
|
618
|
+
const result = (await tool.handler({
|
|
619
|
+
path: pdfPath,
|
|
620
|
+
pageStart: 1,
|
|
621
|
+
pageEnd: 1,
|
|
622
|
+
maxChars: 2000,
|
|
623
|
+
}));
|
|
624
|
+
expect(result.pagesIncluded).toEqual([1]);
|
|
625
|
+
expect(String(result.text)).toContain("Hello World");
|
|
626
|
+
});
|
|
627
|
+
it("pdf_search_text should find matches with snippets", async () => {
|
|
628
|
+
const pdfPath = findRepoFile(path.join("test_assets", "Report_2025-12-25.pdf"));
|
|
629
|
+
const tool = findTool("pdf_search_text");
|
|
630
|
+
const result = (await tool.handler({
|
|
631
|
+
path: pdfPath,
|
|
632
|
+
query: "Hello",
|
|
633
|
+
maxMatches: 5,
|
|
634
|
+
}));
|
|
635
|
+
expect(result.matchCount).toBeGreaterThan(0);
|
|
636
|
+
expect(result.matches[0].page).toBe(1);
|
|
637
|
+
expect(String(result.matches[0].snippet)).toContain("Hello");
|
|
638
|
+
});
|
|
639
|
+
it("read_text_file should return bounded text slices", async () => {
|
|
640
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
641
|
+
const filePath = path.join(tmpDir, "notes.txt");
|
|
642
|
+
await writeFile(filePath, "Line1\nLine2\nLine3\n", "utf8");
|
|
643
|
+
const tool = findTool("read_text_file");
|
|
644
|
+
const result = (await tool.handler({
|
|
645
|
+
path: filePath,
|
|
646
|
+
startChar: 0,
|
|
647
|
+
maxChars: 10,
|
|
648
|
+
}));
|
|
649
|
+
expect(result.truncated).toBe(true);
|
|
650
|
+
expect(String(result.text)).toContain("Line1");
|
|
651
|
+
});
|
|
652
|
+
it("read_json_file and json_select should parse and select values", async () => {
|
|
653
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
654
|
+
const filePath = path.join(tmpDir, "data.json");
|
|
655
|
+
await writeFile(filePath, JSON.stringify({ a: { b: [{ name: "alpha" }, { name: "beta" }] } }), "utf8");
|
|
656
|
+
const readTool = findTool("read_json_file");
|
|
657
|
+
const readResult = (await readTool.handler({
|
|
658
|
+
path: filePath,
|
|
659
|
+
maxDepth: 6,
|
|
660
|
+
maxItems: 50,
|
|
661
|
+
maxStringChars: 1000,
|
|
662
|
+
}));
|
|
663
|
+
expect(readResult.rootType).toBe("object");
|
|
664
|
+
expect(readResult.value.a.b.length).toBe(2);
|
|
665
|
+
const selectTool = findTool("json_select");
|
|
666
|
+
const selectResult = (await selectTool.handler({
|
|
667
|
+
path: filePath,
|
|
668
|
+
pointer: "/a/b/1/name",
|
|
669
|
+
maxDepth: 3,
|
|
670
|
+
maxItems: 10,
|
|
671
|
+
maxStringChars: 100,
|
|
672
|
+
}));
|
|
673
|
+
expect(selectResult.found).toBe(true);
|
|
674
|
+
expect(selectResult.value).toBe("beta");
|
|
675
|
+
});
|
|
676
|
+
it("read_jsonl_file should parse lines and report errors", async () => {
|
|
677
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-"));
|
|
678
|
+
const filePath = path.join(tmpDir, "data.jsonl");
|
|
679
|
+
await writeFile(filePath, '{"ok":1}\nnot-json\n{"ok":2}\n', "utf8");
|
|
680
|
+
const tool = findTool("read_jsonl_file");
|
|
681
|
+
const result = (await tool.handler({
|
|
682
|
+
path: filePath,
|
|
683
|
+
limitLines: 10,
|
|
684
|
+
parseJson: true,
|
|
685
|
+
maxDepth: 4,
|
|
686
|
+
maxItems: 20,
|
|
687
|
+
maxStringChars: 100,
|
|
688
|
+
}));
|
|
689
|
+
expect(result.returnedLines).toBe(2);
|
|
690
|
+
expect(result.errorCount).toBe(1);
|
|
691
|
+
expect(result.lines[0].value.ok).toBe(1);
|
|
692
|
+
expect(result.lines[1].value.ok).toBe(2);
|
|
693
|
+
});
|
|
694
|
+
it("zip_list_files and zip_read_text_file should read entries", async () => {
|
|
695
|
+
const zipPath = findRepoFile(path.join("test_assets", "zip_fixture.zip"));
|
|
696
|
+
const listTool = findTool("zip_list_files");
|
|
697
|
+
const listResult = (await listTool.handler({ path: zipPath, maxEntries: 50 }));
|
|
698
|
+
const names = (listResult.entries ?? []).map((e) => e.fileName);
|
|
699
|
+
expect(names).toContain("hello.txt");
|
|
700
|
+
expect(names).toContain("folder/data.csv");
|
|
701
|
+
const readTool = findTool("zip_read_text_file");
|
|
702
|
+
const readResult = (await readTool.handler({
|
|
703
|
+
path: zipPath,
|
|
704
|
+
innerPath: "hello.txt",
|
|
705
|
+
maxChars: 2000,
|
|
706
|
+
}));
|
|
707
|
+
expect(String(readResult.text)).toContain("Hello from zip fixture");
|
|
708
|
+
});
|
|
709
|
+
it("zip_extract_file should safely extract to outputDir", async () => {
|
|
710
|
+
const zipPath = findRepoFile(path.join("test_assets", "zip_fixture.zip"));
|
|
711
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-mcp-zip-"));
|
|
712
|
+
const extractTool = findTool("zip_extract_file");
|
|
713
|
+
const extracted = (await extractTool.handler({
|
|
714
|
+
path: zipPath,
|
|
715
|
+
innerPath: "folder/data.csv",
|
|
716
|
+
outputDir: tmpDir,
|
|
717
|
+
overwrite: true,
|
|
718
|
+
}));
|
|
719
|
+
expect(typeof extracted.extractedPath).toBe("string");
|
|
720
|
+
expect(existsSync(extracted.extractedPath)).toBe(true);
|
|
721
|
+
const readTool = findTool("read_text_file");
|
|
722
|
+
const text = (await readTool.handler({ path: extracted.extractedPath, maxChars: 2000 }));
|
|
723
|
+
expect(String(text.text)).toContain("alpha,1");
|
|
724
|
+
});
|
|
725
|
+
it("read_docx_text should extract document text", async () => {
|
|
726
|
+
const docxPath = findRepoFile(path.join("test_assets", "docx_fixture.docx"));
|
|
727
|
+
const tool = findTool("read_docx_text");
|
|
728
|
+
const result = (await tool.handler({ path: docxPath, maxChars: 5000 }));
|
|
729
|
+
expect(String(result.text)).toContain("Hello DOCX");
|
|
730
|
+
expect(String(result.text)).toContain("Second paragraph");
|
|
731
|
+
});
|
|
732
|
+
it("read_pptx_text should extract slide text with markers", async () => {
|
|
733
|
+
const pptxPath = findRepoFile(path.join("test_assets", "pptx_fixture.pptx"));
|
|
734
|
+
const tool = findTool("read_pptx_text");
|
|
735
|
+
const result = (await tool.handler({ path: pptxPath, maxChars: 10000 }));
|
|
736
|
+
expect(result.slideCount).toBe(2);
|
|
737
|
+
expect(String(result.text)).toContain("[SLIDE 1]");
|
|
738
|
+
expect(String(result.text)).toContain("Hello PPTX Slide1");
|
|
739
|
+
expect(String(result.text)).toContain("[SLIDE 2]");
|
|
740
|
+
expect(String(result.text)).toContain("Slide2 Text");
|
|
741
|
+
});
|
|
502
742
|
});
|
|
503
743
|
describe("Unit: abandon_cycle", () => {
|
|
504
744
|
it("should abandon an active cycle", async () => {
|
|
@@ -815,7 +1055,7 @@ describe("Unit: run_code_analysis", () => {
|
|
|
815
1055
|
it("should detect hardcoded API key in code", async () => {
|
|
816
1056
|
const tool = findTool("run_code_analysis");
|
|
817
1057
|
const result = (await tool.handler({
|
|
818
|
-
content: 'const api_key = "
|
|
1058
|
+
content: 'const api_key = "FAKE_TEST_KEY_abcdefghijklmnopqrstuvwxyz1234567890";',
|
|
819
1059
|
checks: ["secrets"],
|
|
820
1060
|
}));
|
|
821
1061
|
expect(result.totalFindings).toBeGreaterThanOrEqual(1);
|
|
@@ -1072,4 +1312,422 @@ describe("Integration: search finds logged gaps", () => {
|
|
|
1072
1312
|
});
|
|
1073
1313
|
});
|
|
1074
1314
|
});
|
|
1315
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
1316
|
+
// RESEARCH WRITING TOOLS — academic paper polishing
|
|
1317
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
1318
|
+
describe("Static: research writing tools", () => {
|
|
1319
|
+
it("should export 8 research writing tools", () => {
|
|
1320
|
+
expect(researchWritingTools.length).toBe(8);
|
|
1321
|
+
});
|
|
1322
|
+
it("should include all 8 research writing tools in allTools", () => {
|
|
1323
|
+
const names = allTools.map((t) => t.name);
|
|
1324
|
+
expect(names).toContain("polish_academic_text");
|
|
1325
|
+
expect(names).toContain("translate_academic");
|
|
1326
|
+
expect(names).toContain("compress_or_expand_text");
|
|
1327
|
+
expect(names).toContain("remove_ai_signatures");
|
|
1328
|
+
expect(names).toContain("check_paper_logic");
|
|
1329
|
+
expect(names).toContain("generate_academic_caption");
|
|
1330
|
+
expect(names).toContain("analyze_experiment_data");
|
|
1331
|
+
expect(names).toContain("review_paper_as_reviewer");
|
|
1332
|
+
});
|
|
1333
|
+
it("polish_academic_text requires text parameter", () => {
|
|
1334
|
+
const tool = findTool("polish_academic_text");
|
|
1335
|
+
expect(tool.inputSchema.required).toContain("text");
|
|
1336
|
+
expect(tool.inputSchema.properties).toHaveProperty("targetVenue");
|
|
1337
|
+
expect(tool.inputSchema.properties).toHaveProperty("language");
|
|
1338
|
+
});
|
|
1339
|
+
it("translate_academic requires text, from, and to parameters", () => {
|
|
1340
|
+
const tool = findTool("translate_academic");
|
|
1341
|
+
expect(tool.inputSchema.required).toContain("text");
|
|
1342
|
+
expect(tool.inputSchema.required).toContain("from");
|
|
1343
|
+
expect(tool.inputSchema.required).toContain("to");
|
|
1344
|
+
});
|
|
1345
|
+
it("compress_or_expand_text requires text and mode parameters", () => {
|
|
1346
|
+
const tool = findTool("compress_or_expand_text");
|
|
1347
|
+
expect(tool.inputSchema.required).toContain("text");
|
|
1348
|
+
expect(tool.inputSchema.required).toContain("mode");
|
|
1349
|
+
const modeProp = tool.inputSchema.properties.mode;
|
|
1350
|
+
expect(modeProp.enum).toContain("compress");
|
|
1351
|
+
expect(modeProp.enum).toContain("expand");
|
|
1352
|
+
});
|
|
1353
|
+
it("remove_ai_signatures requires text parameter", () => {
|
|
1354
|
+
const tool = findTool("remove_ai_signatures");
|
|
1355
|
+
expect(tool.inputSchema.required).toContain("text");
|
|
1356
|
+
});
|
|
1357
|
+
it("check_paper_logic requires text parameter", () => {
|
|
1358
|
+
const tool = findTool("check_paper_logic");
|
|
1359
|
+
expect(tool.inputSchema.required).toContain("text");
|
|
1360
|
+
expect(tool.inputSchema.properties).toHaveProperty("checkType");
|
|
1361
|
+
});
|
|
1362
|
+
it("generate_academic_caption requires description and figureType", () => {
|
|
1363
|
+
const tool = findTool("generate_academic_caption");
|
|
1364
|
+
expect(tool.inputSchema.required).toContain("description");
|
|
1365
|
+
expect(tool.inputSchema.required).toContain("figureType");
|
|
1366
|
+
const ftProp = tool.inputSchema.properties.figureType;
|
|
1367
|
+
expect(ftProp.enum).toContain("figure");
|
|
1368
|
+
expect(ftProp.enum).toContain("table");
|
|
1369
|
+
});
|
|
1370
|
+
it("analyze_experiment_data requires data and goal parameters", () => {
|
|
1371
|
+
const tool = findTool("analyze_experiment_data");
|
|
1372
|
+
expect(tool.inputSchema.required).toContain("data");
|
|
1373
|
+
expect(tool.inputSchema.required).toContain("goal");
|
|
1374
|
+
expect(tool.inputSchema.properties).toHaveProperty("format");
|
|
1375
|
+
});
|
|
1376
|
+
it("review_paper_as_reviewer requires text and venue parameters", () => {
|
|
1377
|
+
const tool = findTool("review_paper_as_reviewer");
|
|
1378
|
+
expect(tool.inputSchema.required).toContain("text");
|
|
1379
|
+
expect(tool.inputSchema.required).toContain("venue");
|
|
1380
|
+
const strictProp = tool.inputSchema.properties.strictness;
|
|
1381
|
+
expect(strictProp.enum).toContain("lenient");
|
|
1382
|
+
expect(strictProp.enum).toContain("moderate");
|
|
1383
|
+
expect(strictProp.enum).toContain("harsh");
|
|
1384
|
+
});
|
|
1385
|
+
});
|
|
1386
|
+
describe("Unit: remove_ai_signatures pattern detection", () => {
|
|
1387
|
+
it("should detect AI patterns in text with known signatures", async () => {
|
|
1388
|
+
const tool = findTool("remove_ai_signatures");
|
|
1389
|
+
const result = (await tool.handler({
|
|
1390
|
+
text: "We leverage advanced techniques to delve into the multifaceted landscape of deep learning. Furthermore, it is worth noting that our comprehensive approach utilizes a robust framework.",
|
|
1391
|
+
}));
|
|
1392
|
+
expect(result.patternsFound).toBeGreaterThan(0);
|
|
1393
|
+
expect(result.detectedPatterns.length).toBeGreaterThan(0);
|
|
1394
|
+
expect(result.detectedPatterns.some((p) => p.label.includes("leverage"))).toBe(true);
|
|
1395
|
+
});
|
|
1396
|
+
it("should return clean verdict for natural text", async () => {
|
|
1397
|
+
const tool = findTool("remove_ai_signatures");
|
|
1398
|
+
const result = (await tool.handler({
|
|
1399
|
+
text: "We train a convolutional network on ImageNet for 90 epochs using SGD with momentum 0.9.",
|
|
1400
|
+
}));
|
|
1401
|
+
expect(result.patternsFound).toBe(0);
|
|
1402
|
+
expect(result.verdict).toContain("No significant AI signatures");
|
|
1403
|
+
});
|
|
1404
|
+
});
|
|
1405
|
+
describe("Static: academic_paper_writing methodology", () => {
|
|
1406
|
+
it("should return academic_paper_writing methodology with 8 steps", async () => {
|
|
1407
|
+
const tool = allTools.find((t) => t.name === "getMethodology");
|
|
1408
|
+
const result = (await tool.handler({ topic: "academic_paper_writing" }));
|
|
1409
|
+
expect(result.title).toContain("Academic Paper Writing");
|
|
1410
|
+
expect(result.steps.length).toBe(8);
|
|
1411
|
+
expect(result.steps[0].name).toBe("Polish Draft");
|
|
1412
|
+
expect(result.steps[6].name).toBe("Simulate Review");
|
|
1413
|
+
});
|
|
1414
|
+
});
|
|
1415
|
+
describe("Static: scan_terminal_security tool", () => {
|
|
1416
|
+
const tool = domainTools.find((t) => t.name === "scan_terminal_security");
|
|
1417
|
+
it("should exist", () => {
|
|
1418
|
+
expect(tool).toBeDefined();
|
|
1419
|
+
});
|
|
1420
|
+
it("should accept projectRoot and checks", () => {
|
|
1421
|
+
const props = tool.inputSchema.properties;
|
|
1422
|
+
expect(props).toHaveProperty("projectRoot");
|
|
1423
|
+
expect(props).toHaveProperty("checks");
|
|
1424
|
+
});
|
|
1425
|
+
it("should accept scanHome and verbose flags", () => {
|
|
1426
|
+
const props = tool.inputSchema.properties;
|
|
1427
|
+
expect(props).toHaveProperty("scanHome");
|
|
1428
|
+
expect(props).toHaveProperty("verbose");
|
|
1429
|
+
});
|
|
1430
|
+
});
|
|
1431
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
1432
|
+
// v2.8.0 — Progressive Discovery, Boilerplate, Benchmark tools
|
|
1433
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
1434
|
+
describe("Static: progressive discovery tools", () => {
|
|
1435
|
+
it("should include discover_tools, get_tool_quick_ref, get_workflow_chain", () => {
|
|
1436
|
+
const names = allTools.map((t) => t.name);
|
|
1437
|
+
expect(names).toContain("discover_tools");
|
|
1438
|
+
expect(names).toContain("get_tool_quick_ref");
|
|
1439
|
+
expect(names).toContain("get_workflow_chain");
|
|
1440
|
+
});
|
|
1441
|
+
it("discover_tools requires query parameter", () => {
|
|
1442
|
+
const tool = findTool("discover_tools");
|
|
1443
|
+
expect(tool.inputSchema.required).toContain("query");
|
|
1444
|
+
expect(tool.inputSchema.properties).toHaveProperty("category");
|
|
1445
|
+
expect(tool.inputSchema.properties).toHaveProperty("phase");
|
|
1446
|
+
expect(tool.inputSchema.properties).toHaveProperty("limit");
|
|
1447
|
+
});
|
|
1448
|
+
it("get_tool_quick_ref requires toolName parameter", () => {
|
|
1449
|
+
const tool = findTool("get_tool_quick_ref");
|
|
1450
|
+
expect(tool.inputSchema.required).toContain("toolName");
|
|
1451
|
+
});
|
|
1452
|
+
it("get_workflow_chain requires chain parameter", () => {
|
|
1453
|
+
const tool = findTool("get_workflow_chain");
|
|
1454
|
+
expect(tool.inputSchema.required).toContain("chain");
|
|
1455
|
+
});
|
|
1456
|
+
});
|
|
1457
|
+
describe("Unit: discover_tools hybrid search", () => {
|
|
1458
|
+
it("should return ranked results for verification query", async () => {
|
|
1459
|
+
const tool = findTool("discover_tools");
|
|
1460
|
+
const result = (await tool.handler({ query: "verify implementation" }));
|
|
1461
|
+
expect(result.resultCount).toBeGreaterThan(0);
|
|
1462
|
+
expect(result.results[0]).toHaveProperty("relevanceScore");
|
|
1463
|
+
expect(result.results[0]).toHaveProperty("quickRef");
|
|
1464
|
+
expect(result.results[0].relevanceScore).toBeGreaterThan(0);
|
|
1465
|
+
});
|
|
1466
|
+
it("should filter by category", async () => {
|
|
1467
|
+
const tool = findTool("discover_tools");
|
|
1468
|
+
const result = (await tool.handler({ query: "test", category: "eval" }));
|
|
1469
|
+
for (const r of result.results) {
|
|
1470
|
+
expect(r.category).toBe("eval");
|
|
1471
|
+
}
|
|
1472
|
+
});
|
|
1473
|
+
it("should filter by phase", async () => {
|
|
1474
|
+
const tool = findTool("discover_tools");
|
|
1475
|
+
const result = (await tool.handler({ query: "search find", phase: "research" }));
|
|
1476
|
+
for (const r of result.results) {
|
|
1477
|
+
expect(r.phase).toBe("research");
|
|
1478
|
+
}
|
|
1479
|
+
});
|
|
1480
|
+
it("should include matching workflow chains", async () => {
|
|
1481
|
+
const tool = findTool("discover_tools");
|
|
1482
|
+
const result = (await tool.handler({ query: "new feature build" }));
|
|
1483
|
+
expect(result.matchingWorkflows.length).toBeGreaterThan(0);
|
|
1484
|
+
});
|
|
1485
|
+
it("should return progressive hint", async () => {
|
|
1486
|
+
const tool = findTool("discover_tools");
|
|
1487
|
+
const result = (await tool.handler({ query: "verify" }));
|
|
1488
|
+
expect(result._progressiveHint).toBeTruthy();
|
|
1489
|
+
});
|
|
1490
|
+
});
|
|
1491
|
+
describe("Unit: get_tool_quick_ref", () => {
|
|
1492
|
+
it("should return quick ref for known tool", async () => {
|
|
1493
|
+
const tool = findTool("get_tool_quick_ref");
|
|
1494
|
+
const result = (await tool.handler({ toolName: "start_verification_cycle" }));
|
|
1495
|
+
expect(result.tool).toBe("start_verification_cycle");
|
|
1496
|
+
expect(result.category).toBe("verification");
|
|
1497
|
+
expect(result.quickRef).toHaveProperty("nextAction");
|
|
1498
|
+
expect(result.quickRef).toHaveProperty("nextTools");
|
|
1499
|
+
expect(result.quickRef.nextTools.length).toBeGreaterThan(0);
|
|
1500
|
+
});
|
|
1501
|
+
it("should return error for unknown tool with suggestions", async () => {
|
|
1502
|
+
const tool = findTool("get_tool_quick_ref");
|
|
1503
|
+
const result = (await tool.handler({ toolName: "nonexistent_tool_xyz" }));
|
|
1504
|
+
expect(result.error).toBe(true);
|
|
1505
|
+
expect(result).toHaveProperty("didYouMean");
|
|
1506
|
+
});
|
|
1507
|
+
it("should include related tool details when requested", async () => {
|
|
1508
|
+
const tool = findTool("get_tool_quick_ref");
|
|
1509
|
+
const result = (await tool.handler({
|
|
1510
|
+
toolName: "run_mandatory_flywheel",
|
|
1511
|
+
includeRelatedDetails: true,
|
|
1512
|
+
}));
|
|
1513
|
+
expect(result).toHaveProperty("relatedToolDetails");
|
|
1514
|
+
expect(Object.keys(result.relatedToolDetails).length).toBeGreaterThan(0);
|
|
1515
|
+
});
|
|
1516
|
+
});
|
|
1517
|
+
describe("Unit: get_workflow_chain", () => {
|
|
1518
|
+
it("should list all available chains", async () => {
|
|
1519
|
+
const tool = findTool("get_workflow_chain");
|
|
1520
|
+
const result = (await tool.handler({ chain: "list" }));
|
|
1521
|
+
expect(result.availableChains.length).toBeGreaterThan(0);
|
|
1522
|
+
const keys = result.availableChains.map((c) => c.key);
|
|
1523
|
+
expect(keys).toContain("new_feature");
|
|
1524
|
+
expect(keys).toContain("fix_bug");
|
|
1525
|
+
expect(keys).toContain("c_compiler_benchmark");
|
|
1526
|
+
});
|
|
1527
|
+
it("should return enriched chain steps", async () => {
|
|
1528
|
+
const tool = findTool("get_workflow_chain");
|
|
1529
|
+
const result = (await tool.handler({ chain: "new_feature" }));
|
|
1530
|
+
expect(result.name).toBe("Build a New Feature");
|
|
1531
|
+
expect(result.totalSteps).toBeGreaterThan(5);
|
|
1532
|
+
expect(result.steps[0]).toHaveProperty("tool");
|
|
1533
|
+
expect(result.steps[0]).toHaveProperty("action");
|
|
1534
|
+
expect(result.steps[0]).toHaveProperty("quickRef");
|
|
1535
|
+
});
|
|
1536
|
+
it("should return error for unknown chain", async () => {
|
|
1537
|
+
const tool = findTool("get_workflow_chain");
|
|
1538
|
+
const result = (await tool.handler({ chain: "nonexistent_chain" }));
|
|
1539
|
+
expect(result.error).toBe(true);
|
|
1540
|
+
});
|
|
1541
|
+
});
|
|
1542
|
+
describe("Static: boilerplate tools", () => {
|
|
1543
|
+
it("should include scaffold_nodebench_project and get_boilerplate_status", () => {
|
|
1544
|
+
const names = allTools.map((t) => t.name);
|
|
1545
|
+
expect(names).toContain("scaffold_nodebench_project");
|
|
1546
|
+
expect(names).toContain("get_boilerplate_status");
|
|
1547
|
+
});
|
|
1548
|
+
it("scaffold_nodebench_project requires projectPath, projectName, techStack", () => {
|
|
1549
|
+
const tool = findTool("scaffold_nodebench_project");
|
|
1550
|
+
expect(tool.inputSchema.required).toContain("projectPath");
|
|
1551
|
+
expect(tool.inputSchema.required).toContain("projectName");
|
|
1552
|
+
expect(tool.inputSchema.required).toContain("techStack");
|
|
1553
|
+
});
|
|
1554
|
+
it("get_boilerplate_status requires projectPath", () => {
|
|
1555
|
+
const tool = findTool("get_boilerplate_status");
|
|
1556
|
+
expect(tool.inputSchema.required).toContain("projectPath");
|
|
1557
|
+
});
|
|
1558
|
+
});
|
|
1559
|
+
describe("Unit: scaffold_nodebench_project dry run", () => {
|
|
1560
|
+
it("should preview files without creating them", async () => {
|
|
1561
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-scaffold-"));
|
|
1562
|
+
const tool = findTool("scaffold_nodebench_project");
|
|
1563
|
+
const result = (await tool.handler({
|
|
1564
|
+
projectPath: tmpDir,
|
|
1565
|
+
projectName: "test-project",
|
|
1566
|
+
techStack: "TypeScript, Node.js",
|
|
1567
|
+
dryRun: true,
|
|
1568
|
+
}));
|
|
1569
|
+
expect(result.dryRun).toBe(true);
|
|
1570
|
+
expect(result.summary.totalFiles).toBeGreaterThan(5);
|
|
1571
|
+
expect(result.willCreate.length).toBeGreaterThan(0);
|
|
1572
|
+
expect(result.willCreate).toContain("AGENTS.md");
|
|
1573
|
+
expect(result.willCreate).toContain("package.json");
|
|
1574
|
+
expect(result.willCreate).toContain(".mcp.json");
|
|
1575
|
+
expect(result._quickRef).toBeDefined();
|
|
1576
|
+
});
|
|
1577
|
+
});
|
|
1578
|
+
describe("Unit: scaffold_nodebench_project actual creation", () => {
|
|
1579
|
+
it("should create all project files", async () => {
|
|
1580
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-scaffold-"));
|
|
1581
|
+
const tool = findTool("scaffold_nodebench_project");
|
|
1582
|
+
const result = (await tool.handler({
|
|
1583
|
+
projectPath: tmpDir,
|
|
1584
|
+
projectName: "real-project",
|
|
1585
|
+
techStack: "TypeScript, React",
|
|
1586
|
+
dryRun: false,
|
|
1587
|
+
includeParallelAgents: true,
|
|
1588
|
+
includeGithubActions: true,
|
|
1589
|
+
}));
|
|
1590
|
+
expect(result.dryRun).toBe(false);
|
|
1591
|
+
expect(result.summary.created).toBeGreaterThan(5);
|
|
1592
|
+
// Verify key files exist
|
|
1593
|
+
const { existsSync } = await import("node:fs");
|
|
1594
|
+
expect(existsSync(path.join(tmpDir, "AGENTS.md"))).toBe(true);
|
|
1595
|
+
expect(existsSync(path.join(tmpDir, "package.json"))).toBe(true);
|
|
1596
|
+
expect(existsSync(path.join(tmpDir, ".mcp.json"))).toBe(true);
|
|
1597
|
+
expect(existsSync(path.join(tmpDir, ".parallel-agents"))).toBe(true);
|
|
1598
|
+
expect(existsSync(path.join(tmpDir, ".github", "workflows"))).toBe(true);
|
|
1599
|
+
});
|
|
1600
|
+
});
|
|
1601
|
+
describe("Unit: get_boilerplate_status", () => {
|
|
1602
|
+
it("should scan an empty directory and find everything missing", async () => {
|
|
1603
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-status-"));
|
|
1604
|
+
const tool = findTool("get_boilerplate_status");
|
|
1605
|
+
const result = (await tool.handler({ projectPath: tmpDir }));
|
|
1606
|
+
expect(result.completionPercentage).toBe(0);
|
|
1607
|
+
expect(result.missing).toBeGreaterThan(0);
|
|
1608
|
+
expect(result.missingFiles).toContain("AGENTS.md");
|
|
1609
|
+
expect(result.recommendations.length).toBeGreaterThan(0);
|
|
1610
|
+
});
|
|
1611
|
+
it("should detect existing files after scaffolding", async () => {
|
|
1612
|
+
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "nodebench-status-"));
|
|
1613
|
+
// Scaffold first
|
|
1614
|
+
await findTool("scaffold_nodebench_project").handler({
|
|
1615
|
+
projectPath: tmpDir,
|
|
1616
|
+
projectName: "status-test",
|
|
1617
|
+
techStack: "TypeScript",
|
|
1618
|
+
dryRun: false,
|
|
1619
|
+
});
|
|
1620
|
+
// Then check status
|
|
1621
|
+
const tool = findTool("get_boilerplate_status");
|
|
1622
|
+
const result = (await tool.handler({ projectPath: tmpDir }));
|
|
1623
|
+
expect(result.completionPercentage).toBeGreaterThan(50);
|
|
1624
|
+
expect(result.found).toBeGreaterThan(5);
|
|
1625
|
+
});
|
|
1626
|
+
it("should throw for nonexistent path", async () => {
|
|
1627
|
+
const tool = findTool("get_boilerplate_status");
|
|
1628
|
+
await expect(tool.handler({ projectPath: "/nonexistent/path/xyz123" })).rejects.toThrow("does not exist");
|
|
1629
|
+
});
|
|
1630
|
+
});
|
|
1631
|
+
describe("Static: C-compiler benchmark tools", () => {
|
|
1632
|
+
it("should include all 3 benchmark tools", () => {
|
|
1633
|
+
const names = allTools.map((t) => t.name);
|
|
1634
|
+
expect(names).toContain("start_autonomy_benchmark");
|
|
1635
|
+
expect(names).toContain("log_benchmark_milestone");
|
|
1636
|
+
expect(names).toContain("complete_autonomy_benchmark");
|
|
1637
|
+
});
|
|
1638
|
+
it("start_autonomy_benchmark requires challenge parameter", () => {
|
|
1639
|
+
const tool = findTool("start_autonomy_benchmark");
|
|
1640
|
+
expect(tool.inputSchema.required).toContain("challenge");
|
|
1641
|
+
const challengeProp = tool.inputSchema.properties.challenge;
|
|
1642
|
+
expect(challengeProp.enum).toContain("c_compiler");
|
|
1643
|
+
expect(challengeProp.enum).toContain("rest_api");
|
|
1644
|
+
expect(challengeProp.enum).toContain("fullstack_app");
|
|
1645
|
+
expect(challengeProp.enum).toContain("list");
|
|
1646
|
+
});
|
|
1647
|
+
it("log_benchmark_milestone requires benchmarkId, milestoneId, verificationPassed", () => {
|
|
1648
|
+
const tool = findTool("log_benchmark_milestone");
|
|
1649
|
+
expect(tool.inputSchema.required).toContain("benchmarkId");
|
|
1650
|
+
expect(tool.inputSchema.required).toContain("milestoneId");
|
|
1651
|
+
expect(tool.inputSchema.required).toContain("verificationPassed");
|
|
1652
|
+
});
|
|
1653
|
+
it("complete_autonomy_benchmark requires benchmarkId and reason", () => {
|
|
1654
|
+
const tool = findTool("complete_autonomy_benchmark");
|
|
1655
|
+
expect(tool.inputSchema.required).toContain("benchmarkId");
|
|
1656
|
+
expect(tool.inputSchema.required).toContain("reason");
|
|
1657
|
+
});
|
|
1658
|
+
});
|
|
1659
|
+
describe("Unit: start_autonomy_benchmark", () => {
|
|
1660
|
+
it("should list all available challenges", async () => {
|
|
1661
|
+
const tool = findTool("start_autonomy_benchmark");
|
|
1662
|
+
const result = (await tool.handler({ challenge: "list" }));
|
|
1663
|
+
expect(result.availableChallenges.length).toBe(5);
|
|
1664
|
+
const keys = result.availableChallenges.map((c) => c.key);
|
|
1665
|
+
expect(keys).toContain("c_compiler");
|
|
1666
|
+
expect(keys).toContain("rest_api");
|
|
1667
|
+
expect(keys).toContain("fullstack_app");
|
|
1668
|
+
expect(keys).toContain("cli_tool");
|
|
1669
|
+
expect(keys).toContain("data_pipeline");
|
|
1670
|
+
});
|
|
1671
|
+
it("should start a cli_tool benchmark", async () => {
|
|
1672
|
+
const tool = findTool("start_autonomy_benchmark");
|
|
1673
|
+
const result = (await tool.handler({
|
|
1674
|
+
challenge: "cli_tool",
|
|
1675
|
+
notes: "test benchmark",
|
|
1676
|
+
}));
|
|
1677
|
+
expect(result.benchmarkId).toBeTruthy();
|
|
1678
|
+
expect(result.challenge).toBe("cli_tool");
|
|
1679
|
+
expect(result.difficulty).toBe("easy");
|
|
1680
|
+
expect(result.totalPoints).toBe(100);
|
|
1681
|
+
expect(result.milestones.length).toBe(8);
|
|
1682
|
+
expect(result._quickRef).toBeDefined();
|
|
1683
|
+
});
|
|
1684
|
+
it("should throw for unknown challenge", async () => {
|
|
1685
|
+
const tool = findTool("start_autonomy_benchmark");
|
|
1686
|
+
await expect(tool.handler({ challenge: "nonexistent_challenge" })).rejects.toThrow("Unknown challenge");
|
|
1687
|
+
});
|
|
1688
|
+
});
|
|
1689
|
+
describe("Integration: full benchmark lifecycle", () => {
|
|
1690
|
+
it("start → log milestone → complete", async () => {
|
|
1691
|
+
// 1. Start benchmark
|
|
1692
|
+
const benchmark = (await findTool("start_autonomy_benchmark").handler({
|
|
1693
|
+
challenge: "cli_tool",
|
|
1694
|
+
notes: "integration test",
|
|
1695
|
+
}));
|
|
1696
|
+
expect(benchmark.benchmarkId).toBeTruthy();
|
|
1697
|
+
// 2. Log a milestone
|
|
1698
|
+
const milestone = (await findTool("log_benchmark_milestone").handler({
|
|
1699
|
+
benchmarkId: benchmark.benchmarkId,
|
|
1700
|
+
milestoneId: "project_setup",
|
|
1701
|
+
verificationPassed: true,
|
|
1702
|
+
toolsUsed: ["run_closed_loop", "bootstrap_project"],
|
|
1703
|
+
notes: "Project initialized",
|
|
1704
|
+
}));
|
|
1705
|
+
expect(milestone.points).toBe(15);
|
|
1706
|
+
expect(milestone.progress.earnedPoints).toBe(15);
|
|
1707
|
+
expect(milestone.progress.milestonesCompleted).toBe(1);
|
|
1708
|
+
// 3. Log another milestone (failed)
|
|
1709
|
+
const milestone2 = (await findTool("log_benchmark_milestone").handler({
|
|
1710
|
+
benchmarkId: benchmark.benchmarkId,
|
|
1711
|
+
milestoneId: "arg_parsing",
|
|
1712
|
+
verificationPassed: false,
|
|
1713
|
+
notes: "Arg parsing failed tests",
|
|
1714
|
+
}));
|
|
1715
|
+
expect(milestone2.points).toBe(0);
|
|
1716
|
+
expect(milestone2.progress.earnedPoints).toBe(15); // unchanged
|
|
1717
|
+
// 4. Complete benchmark
|
|
1718
|
+
const completed = (await findTool("complete_autonomy_benchmark").handler({
|
|
1719
|
+
benchmarkId: benchmark.benchmarkId,
|
|
1720
|
+
reason: "stuck",
|
|
1721
|
+
notes: "Integration test complete",
|
|
1722
|
+
}));
|
|
1723
|
+
expect(completed.score.earnedPoints).toBe(15);
|
|
1724
|
+
expect(completed.score.percentage).toBe(15);
|
|
1725
|
+
expect(completed.score.grade).toContain("F");
|
|
1726
|
+
expect(completed.milestones.completed).toBe(1);
|
|
1727
|
+
expect(completed.milestones.failed).toBe(1);
|
|
1728
|
+
expect(completed.milestones.pending).toBe(6);
|
|
1729
|
+
expect(completed.analysis.strengths).toContain("Project Setup");
|
|
1730
|
+
expect(completed._quickRef).toBeDefined();
|
|
1731
|
+
});
|
|
1732
|
+
});
|
|
1075
1733
|
//# sourceMappingURL=tools.test.js.map
|