nodebench-mcp 2.25.0 → 2.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NODEBENCH_AGENTS.md +5 -4
- package/README.md +145 -16
- package/dist/__tests__/architectComplex.test.js +3 -5
- package/dist/__tests__/architectComplex.test.js.map +1 -1
- package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
- package/dist/__tests__/batchAutopilot.test.js +218 -0
- package/dist/__tests__/batchAutopilot.test.js.map +1 -0
- package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
- package/dist/__tests__/cliSubcommands.test.js +138 -0
- package/dist/__tests__/cliSubcommands.test.js.map +1 -0
- package/dist/__tests__/evalHarness.test.js +1 -1
- package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
- package/dist/__tests__/forecastingDogfood.test.js +284 -0
- package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
- package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
- package/dist/__tests__/forecastingScoring.test.js +202 -0
- package/dist/__tests__/forecastingScoring.test.js.map +1 -0
- package/dist/__tests__/localDashboard.test.d.ts +1 -0
- package/dist/__tests__/localDashboard.test.js +226 -0
- package/dist/__tests__/localDashboard.test.js.map +1 -0
- package/dist/__tests__/multiHopDogfood.test.js +11 -11
- package/dist/__tests__/multiHopDogfood.test.js.map +1 -1
- package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
- package/dist/__tests__/openclawDogfood.test.js +535 -0
- package/dist/__tests__/openclawDogfood.test.js.map +1 -0
- package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
- package/dist/__tests__/openclawMessaging.test.js +232 -0
- package/dist/__tests__/openclawMessaging.test.js.map +1 -0
- package/dist/__tests__/presetRealWorldBench.test.js +0 -2
- package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
- package/dist/__tests__/tools.test.js +9 -157
- package/dist/__tests__/tools.test.js.map +1 -1
- package/dist/__tests__/toolsetGatingEval.test.js +0 -2
- package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
- package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
- package/dist/__tests__/traceabilityDogfood.test.js +241 -0
- package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
- package/dist/__tests__/webmcpTools.test.d.ts +7 -0
- package/dist/__tests__/webmcpTools.test.js +195 -0
- package/dist/__tests__/webmcpTools.test.js.map +1 -0
- package/dist/dashboard/briefHtml.d.ts +20 -0
- package/dist/dashboard/briefHtml.js +1000 -0
- package/dist/dashboard/briefHtml.js.map +1 -0
- package/dist/dashboard/briefServer.d.ts +18 -0
- package/dist/dashboard/briefServer.js +320 -0
- package/dist/dashboard/briefServer.js.map +1 -0
- package/dist/dashboard/html.js +1470 -1230
- package/dist/dashboard/html.js.map +1 -1
- package/dist/dashboard/server.js +166 -41
- package/dist/dashboard/server.js.map +1 -1
- package/dist/index.js +210 -14
- package/dist/index.js.map +1 -1
- package/dist/tools/critterTools.js +4 -0
- package/dist/tools/critterTools.js.map +1 -1
- package/dist/tools/forecastingTools.d.ts +11 -0
- package/dist/tools/forecastingTools.js +616 -0
- package/dist/tools/forecastingTools.js.map +1 -0
- package/dist/tools/localDashboardTools.d.ts +8 -0
- package/dist/tools/localDashboardTools.js +332 -0
- package/dist/tools/localDashboardTools.js.map +1 -0
- package/dist/tools/metaTools.js +170 -1
- package/dist/tools/metaTools.js.map +1 -1
- package/dist/tools/openclawTools.d.ts +11 -0
- package/dist/tools/openclawTools.js +1017 -0
- package/dist/tools/openclawTools.js.map +1 -0
- package/dist/tools/overstoryTools.d.ts +14 -0
- package/dist/tools/overstoryTools.js +426 -0
- package/dist/tools/overstoryTools.js.map +1 -0
- package/dist/tools/progressiveDiscoveryTools.js +50 -115
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
- package/dist/tools/selfEvalTools.js +8 -1
- package/dist/tools/selfEvalTools.js.map +1 -1
- package/dist/tools/sessionMemoryTools.js +14 -2
- package/dist/tools/sessionMemoryTools.js.map +1 -1
- package/dist/tools/toolRegistry.d.ts +1 -15
- package/dist/tools/toolRegistry.js +243 -228
- package/dist/tools/toolRegistry.js.map +1 -1
- package/dist/tools/visualQaTools.d.ts +2 -0
- package/dist/tools/visualQaTools.js +1088 -0
- package/dist/tools/visualQaTools.js.map +1 -0
- package/dist/tools/webmcpTools.d.ts +16 -0
- package/dist/tools/webmcpTools.js +703 -0
- package/dist/tools/webmcpTools.js.map +1 -0
- package/dist/toolsetRegistry.js +6 -2
- package/dist/toolsetRegistry.js.map +1 -1
- package/package.json +2 -2
|
@@ -47,7 +47,7 @@ import { uiUxDiveTools } from "../tools/uiUxDiveTools.js";
|
|
|
47
47
|
import { mcpBridgeTools } from "../tools/mcpBridgeTools.js";
|
|
48
48
|
import { uiUxDiveAdvancedTools } from "../tools/uiUxDiveAdvancedTools.js";
|
|
49
49
|
import { skillUpdateTools } from "../tools/skillUpdateTools.js";
|
|
50
|
-
import {
|
|
50
|
+
import { overstoryTools } from "../tools/overstoryTools.js";
|
|
51
51
|
import { getQuickRef, hybridSearch, TOOL_REGISTRY, SEARCH_MODES, ALL_REGISTRY_ENTRIES, WORKFLOW_CHAINS, tokenize, buildDenseIndex, getToolComplexity } from "../tools/toolRegistry.js";
|
|
52
52
|
// Assemble all tools like index.ts does
|
|
53
53
|
const domainTools = [
|
|
@@ -89,7 +89,7 @@ const domainTools = [
|
|
|
89
89
|
...mcpBridgeTools,
|
|
90
90
|
...uiUxDiveAdvancedTools,
|
|
91
91
|
...skillUpdateTools,
|
|
92
|
-
...
|
|
92
|
+
...overstoryTools,
|
|
93
93
|
];
|
|
94
94
|
const metaTools = createMetaTools(domainTools);
|
|
95
95
|
const allToolsWithoutDiscovery = [...domainTools, ...metaTools];
|
|
@@ -99,9 +99,9 @@ const allTools = [...allToolsWithoutDiscovery, ...discoveryTools];
|
|
|
99
99
|
// STATIC LAYER — structure validation
|
|
100
100
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
101
101
|
describe("Static: tool structure", () => {
|
|
102
|
-
it("should have
|
|
103
|
-
//
|
|
104
|
-
expect(allTools.length).toBe(
|
|
102
|
+
it("should have 175 tools total", () => {
|
|
103
|
+
// domain tools + 3 meta tools (findTools, getMethodology, check_mcp_setup) + 3 progressive discovery tools
|
|
104
|
+
expect(allTools.length).toBe(213);
|
|
105
105
|
});
|
|
106
106
|
it("every tool has name, description, inputSchema, handler", () => {
|
|
107
107
|
for (const tool of allTools) {
|
|
@@ -325,7 +325,7 @@ describe("Static: new methodology topics", () => {
|
|
|
325
325
|
expect(topics).toContain("agent_bootstrap");
|
|
326
326
|
expect(topics).toContain("autonomous_maintenance");
|
|
327
327
|
expect(topics).toContain("parallel_agent_teams");
|
|
328
|
-
expect(topics.length).toBe(
|
|
328
|
+
expect(topics.length).toBe(26); // All topics listed in overview (includes scenario_testing added v2.26.0)
|
|
329
329
|
});
|
|
330
330
|
});
|
|
331
331
|
describe("Unit: setup_local_env", () => {
|
|
@@ -1633,153 +1633,6 @@ describe("Unit: get_workflow_chain", () => {
|
|
|
1633
1633
|
expect(result.error).toBe(true);
|
|
1634
1634
|
});
|
|
1635
1635
|
});
|
|
1636
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
1637
|
-
// MULTI-HOP TRAVERSAL, RELATED TOOLS, PAGINATION & EXPANSION TESTS
|
|
1638
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
1639
|
-
describe("Static: relatedTools auto-derivation", () => {
|
|
1640
|
-
it("every registry entry should have relatedTools populated", () => {
|
|
1641
|
-
for (const entry of ALL_REGISTRY_ENTRIES) {
|
|
1642
|
-
expect(entry.quickRef.relatedTools, `Missing relatedTools for ${entry.name}`).toBeDefined();
|
|
1643
|
-
expect(entry.quickRef.relatedTools.length, `Empty relatedTools for ${entry.name}`).toBeGreaterThan(0);
|
|
1644
|
-
}
|
|
1645
|
-
});
|
|
1646
|
-
it("relatedTools should not overlap with nextTools", () => {
|
|
1647
|
-
for (const entry of ALL_REGISTRY_ENTRIES) {
|
|
1648
|
-
const nextSet = new Set(entry.quickRef.nextTools);
|
|
1649
|
-
for (const related of entry.quickRef.relatedTools ?? []) {
|
|
1650
|
-
expect(nextSet.has(related), `${entry.name}: '${related}' appears in both nextTools and relatedTools`).toBe(false);
|
|
1651
|
-
}
|
|
1652
|
-
}
|
|
1653
|
-
});
|
|
1654
|
-
it("relatedTools should not contain self", () => {
|
|
1655
|
-
for (const entry of ALL_REGISTRY_ENTRIES) {
|
|
1656
|
-
expect(entry.quickRef.relatedTools).not.toContain(entry.name);
|
|
1657
|
-
}
|
|
1658
|
-
});
|
|
1659
|
-
it("relatedTools should cap at 7 entries", () => {
|
|
1660
|
-
for (const entry of ALL_REGISTRY_ENTRIES) {
|
|
1661
|
-
expect(entry.quickRef.relatedTools.length, `${entry.name} has too many relatedTools`).toBeLessThanOrEqual(7);
|
|
1662
|
-
}
|
|
1663
|
-
});
|
|
1664
|
-
it("relatedTools should reference valid registry tools", () => {
|
|
1665
|
-
for (const entry of ALL_REGISTRY_ENTRIES) {
|
|
1666
|
-
for (const related of entry.quickRef.relatedTools ?? []) {
|
|
1667
|
-
expect(TOOL_REGISTRY.has(related), `${entry.name} relatedTools references unknown tool: ${related}`).toBe(true);
|
|
1668
|
-
}
|
|
1669
|
-
}
|
|
1670
|
-
});
|
|
1671
|
-
});
|
|
1672
|
-
describe("Unit: discover_tools pagination", () => {
|
|
1673
|
-
it("should return hasMore when more results available", async () => {
|
|
1674
|
-
const tool = findTool("discover_tools");
|
|
1675
|
-
const result = (await tool.handler({ query: "verify", limit: 3 }));
|
|
1676
|
-
expect(result.hasMore).toBe(true);
|
|
1677
|
-
expect(result.resultCount).toBe(3);
|
|
1678
|
-
expect(result.offset).toBe(0);
|
|
1679
|
-
expect(result.totalMatches).toBeGreaterThan(3);
|
|
1680
|
-
});
|
|
1681
|
-
it("should support offset parameter for paging", async () => {
|
|
1682
|
-
const tool = findTool("discover_tools");
|
|
1683
|
-
const page1 = (await tool.handler({ query: "verify", limit: 3, offset: 0 }));
|
|
1684
|
-
const page2 = (await tool.handler({ query: "verify", limit: 3, offset: 3 }));
|
|
1685
|
-
// Pages should have different results
|
|
1686
|
-
const page1Names = page1.results.map((r) => r.name);
|
|
1687
|
-
const page2Names = page2.results.map((r) => r.name);
|
|
1688
|
-
const overlap = page1Names.filter((n) => page2Names.includes(n));
|
|
1689
|
-
expect(overlap.length).toBe(0);
|
|
1690
|
-
});
|
|
1691
|
-
it("should return hasMore=false when exhausted", async () => {
|
|
1692
|
-
const tool = findTool("discover_tools");
|
|
1693
|
-
const result = (await tool.handler({ query: "very_unique_nonexistent_query_xyz_abc", limit: 10 }));
|
|
1694
|
-
expect(result.hasMore).toBe(false);
|
|
1695
|
-
});
|
|
1696
|
-
});
|
|
1697
|
-
describe("Unit: discover_tools expansion", () => {
|
|
1698
|
-
it("should expand top results via relatedTools", async () => {
|
|
1699
|
-
const tool = findTool("discover_tools");
|
|
1700
|
-
const withExpand = (await tool.handler({ query: "verify", limit: 20, expand: 3 }));
|
|
1701
|
-
const without = (await tool.handler({ query: "verify", limit: 20 }));
|
|
1702
|
-
// Expansion should discover additional or equal tools
|
|
1703
|
-
expect(withExpand.totalMatches).toBeGreaterThanOrEqual(without.totalMatches);
|
|
1704
|
-
});
|
|
1705
|
-
it("expanded results should have depth and expandedFrom", async () => {
|
|
1706
|
-
const tool = findTool("discover_tools");
|
|
1707
|
-
const result = (await tool.handler({ query: "verify", limit: 30, expand: 3, explain: true }));
|
|
1708
|
-
const expanded = result.results.filter((r) => r.depth && r.depth > 0);
|
|
1709
|
-
// Verify structure of any expanded results
|
|
1710
|
-
for (const r of expanded) {
|
|
1711
|
-
expect(r.expandedFrom).toBeDefined();
|
|
1712
|
-
expect(r.expandedFrom.length).toBeGreaterThan(0);
|
|
1713
|
-
}
|
|
1714
|
-
});
|
|
1715
|
-
});
|
|
1716
|
-
describe("Unit: get_tool_quick_ref multi-hop", () => {
|
|
1717
|
-
it("depth=1 should return direct neighbors only (hopDistance=1)", async () => {
|
|
1718
|
-
const tool = findTool("get_tool_quick_ref");
|
|
1719
|
-
const result = (await tool.handler({
|
|
1720
|
-
toolName: "start_verification_cycle",
|
|
1721
|
-
includeRelatedDetails: true,
|
|
1722
|
-
depth: 1,
|
|
1723
|
-
}));
|
|
1724
|
-
expect(result.depth).toBe(1);
|
|
1725
|
-
expect(result.relatedToolDetails).toBeDefined();
|
|
1726
|
-
for (const details of Object.values(result.relatedToolDetails)) {
|
|
1727
|
-
expect(details.hopDistance).toBe(1);
|
|
1728
|
-
}
|
|
1729
|
-
});
|
|
1730
|
-
it("depth=2 should discover tools 2 hops away", async () => {
|
|
1731
|
-
const tool = findTool("get_tool_quick_ref");
|
|
1732
|
-
const depth1Result = (await tool.handler({
|
|
1733
|
-
toolName: "start_verification_cycle",
|
|
1734
|
-
includeRelatedDetails: true,
|
|
1735
|
-
depth: 1,
|
|
1736
|
-
}));
|
|
1737
|
-
const depth2Result = (await tool.handler({
|
|
1738
|
-
toolName: "start_verification_cycle",
|
|
1739
|
-
includeRelatedDetails: true,
|
|
1740
|
-
depth: 2,
|
|
1741
|
-
}));
|
|
1742
|
-
// Depth 2 should discover more tools
|
|
1743
|
-
expect(depth2Result.totalDiscovered).toBeGreaterThan(depth1Result.totalDiscovered);
|
|
1744
|
-
// Should have both hop distances
|
|
1745
|
-
const distances = Object.values(depth2Result.relatedToolDetails).map((d) => d.hopDistance);
|
|
1746
|
-
expect(distances).toContain(1);
|
|
1747
|
-
expect(distances).toContain(2);
|
|
1748
|
-
});
|
|
1749
|
-
it("depth > 3 should be capped at 3", async () => {
|
|
1750
|
-
const tool = findTool("get_tool_quick_ref");
|
|
1751
|
-
const result = (await tool.handler({
|
|
1752
|
-
toolName: "start_verification_cycle",
|
|
1753
|
-
includeRelatedDetails: true,
|
|
1754
|
-
depth: 10,
|
|
1755
|
-
}));
|
|
1756
|
-
expect(result.depth).toBe(3);
|
|
1757
|
-
const maxHop = Math.max(...Object.values(result.relatedToolDetails).map((d) => d.hopDistance));
|
|
1758
|
-
expect(maxHop).toBeLessThanOrEqual(3);
|
|
1759
|
-
});
|
|
1760
|
-
it("default depth should be 1 (backward compatible)", async () => {
|
|
1761
|
-
const tool = findTool("get_tool_quick_ref");
|
|
1762
|
-
const result = (await tool.handler({
|
|
1763
|
-
toolName: "run_mandatory_flywheel",
|
|
1764
|
-
includeRelatedDetails: true,
|
|
1765
|
-
}));
|
|
1766
|
-
expect(result.depth).toBe(1);
|
|
1767
|
-
for (const details of Object.values(result.relatedToolDetails)) {
|
|
1768
|
-
expect(details.hopDistance).toBe(1);
|
|
1769
|
-
}
|
|
1770
|
-
});
|
|
1771
|
-
it("each discovered tool should have reachedVia field", async () => {
|
|
1772
|
-
const tool = findTool("get_tool_quick_ref");
|
|
1773
|
-
const result = (await tool.handler({
|
|
1774
|
-
toolName: "start_verification_cycle",
|
|
1775
|
-
depth: 2,
|
|
1776
|
-
}));
|
|
1777
|
-
for (const [, details] of Object.entries(result.relatedToolDetails)) {
|
|
1778
|
-
expect(details.reachedVia).toBeDefined();
|
|
1779
|
-
expect(typeof details.reachedVia).toBe("string");
|
|
1780
|
-
}
|
|
1781
|
-
});
|
|
1782
|
-
});
|
|
1783
1636
|
describe("Static: boilerplate tools", () => {
|
|
1784
1637
|
it("should include scaffold_nodebench_project and get_boilerplate_status", () => {
|
|
1785
1638
|
const names = allTools.map((t) => t.name);
|
|
@@ -2968,11 +2821,10 @@ describe("Agent-as-a-Graph: execution trace edges", () => {
|
|
|
2968
2821
|
limit: 15,
|
|
2969
2822
|
explain: true,
|
|
2970
2823
|
});
|
|
2971
|
-
expect(baseline.length).toBeGreaterThanOrEqual(
|
|
2824
|
+
expect(baseline.length).toBeGreaterThanOrEqual(6);
|
|
2972
2825
|
const topTool = baseline[0].name;
|
|
2973
|
-
|
|
2974
|
-
const
|
|
2975
|
-
const baselineScore = baseline[7].score;
|
|
2826
|
+
const boostTarget = baseline[5].name; // position 6 — NOT in top 5
|
|
2827
|
+
const baselineScore = baseline[5].score;
|
|
2976
2828
|
// Step 2: Inject trace edge from top tool → boost target
|
|
2977
2829
|
_resetCooccurrenceCache();
|
|
2978
2830
|
const edges = new Map();
|