nodebench-mcp 2.25.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/NODEBENCH_AGENTS.md +5 -4
  2. package/README.md +145 -16
  3. package/dist/__tests__/architectComplex.test.js +3 -5
  4. package/dist/__tests__/architectComplex.test.js.map +1 -1
  5. package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
  6. package/dist/__tests__/batchAutopilot.test.js +218 -0
  7. package/dist/__tests__/batchAutopilot.test.js.map +1 -0
  8. package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
  9. package/dist/__tests__/cliSubcommands.test.js +138 -0
  10. package/dist/__tests__/cliSubcommands.test.js.map +1 -0
  11. package/dist/__tests__/evalHarness.test.js +1 -1
  12. package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
  13. package/dist/__tests__/forecastingDogfood.test.js +284 -0
  14. package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
  15. package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
  16. package/dist/__tests__/forecastingScoring.test.js +202 -0
  17. package/dist/__tests__/forecastingScoring.test.js.map +1 -0
  18. package/dist/__tests__/localDashboard.test.d.ts +1 -0
  19. package/dist/__tests__/localDashboard.test.js +226 -0
  20. package/dist/__tests__/localDashboard.test.js.map +1 -0
  21. package/dist/__tests__/multiHopDogfood.test.js +11 -11
  22. package/dist/__tests__/multiHopDogfood.test.js.map +1 -1
  23. package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
  24. package/dist/__tests__/openclawDogfood.test.js +535 -0
  25. package/dist/__tests__/openclawDogfood.test.js.map +1 -0
  26. package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
  27. package/dist/__tests__/openclawMessaging.test.js +232 -0
  28. package/dist/__tests__/openclawMessaging.test.js.map +1 -0
  29. package/dist/__tests__/presetRealWorldBench.test.js +0 -2
  30. package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
  31. package/dist/__tests__/tools.test.js +9 -157
  32. package/dist/__tests__/tools.test.js.map +1 -1
  33. package/dist/__tests__/toolsetGatingEval.test.js +0 -2
  34. package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
  35. package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
  36. package/dist/__tests__/traceabilityDogfood.test.js +241 -0
  37. package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
  38. package/dist/__tests__/webmcpTools.test.d.ts +7 -0
  39. package/dist/__tests__/webmcpTools.test.js +195 -0
  40. package/dist/__tests__/webmcpTools.test.js.map +1 -0
  41. package/dist/dashboard/briefHtml.d.ts +20 -0
  42. package/dist/dashboard/briefHtml.js +1000 -0
  43. package/dist/dashboard/briefHtml.js.map +1 -0
  44. package/dist/dashboard/briefServer.d.ts +18 -0
  45. package/dist/dashboard/briefServer.js +320 -0
  46. package/dist/dashboard/briefServer.js.map +1 -0
  47. package/dist/dashboard/html.js +1470 -1230
  48. package/dist/dashboard/html.js.map +1 -1
  49. package/dist/dashboard/server.js +166 -41
  50. package/dist/dashboard/server.js.map +1 -1
  51. package/dist/index.js +208 -12
  52. package/dist/index.js.map +1 -1
  53. package/dist/tools/critterTools.js +4 -0
  54. package/dist/tools/critterTools.js.map +1 -1
  55. package/dist/tools/forecastingTools.d.ts +11 -0
  56. package/dist/tools/forecastingTools.js +616 -0
  57. package/dist/tools/forecastingTools.js.map +1 -0
  58. package/dist/tools/localDashboardTools.d.ts +8 -0
  59. package/dist/tools/localDashboardTools.js +332 -0
  60. package/dist/tools/localDashboardTools.js.map +1 -0
  61. package/dist/tools/metaTools.js +170 -1
  62. package/dist/tools/metaTools.js.map +1 -1
  63. package/dist/tools/openclawTools.d.ts +11 -0
  64. package/dist/tools/openclawTools.js +1017 -0
  65. package/dist/tools/openclawTools.js.map +1 -0
  66. package/dist/tools/overstoryTools.d.ts +14 -0
  67. package/dist/tools/overstoryTools.js +426 -0
  68. package/dist/tools/overstoryTools.js.map +1 -0
  69. package/dist/tools/progressiveDiscoveryTools.js +45 -113
  70. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  71. package/dist/tools/selfEvalTools.js +8 -1
  72. package/dist/tools/selfEvalTools.js.map +1 -1
  73. package/dist/tools/sessionMemoryTools.js +14 -2
  74. package/dist/tools/sessionMemoryTools.js.map +1 -1
  75. package/dist/tools/toolRegistry.d.ts +1 -15
  76. package/dist/tools/toolRegistry.js +102 -228
  77. package/dist/tools/toolRegistry.js.map +1 -1
  78. package/dist/tools/visualQaTools.d.ts +2 -0
  79. package/dist/tools/visualQaTools.js +1088 -0
  80. package/dist/tools/visualQaTools.js.map +1 -0
  81. package/dist/tools/webmcpTools.d.ts +16 -0
  82. package/dist/tools/webmcpTools.js +703 -0
  83. package/dist/tools/webmcpTools.js.map +1 -0
  84. package/dist/toolsetRegistry.js +2 -2
  85. package/dist/toolsetRegistry.js.map +1 -1
  86. package/package.json +2 -2
@@ -47,7 +47,7 @@ import { uiUxDiveTools } from "../tools/uiUxDiveTools.js";
47
47
  import { mcpBridgeTools } from "../tools/mcpBridgeTools.js";
48
48
  import { uiUxDiveAdvancedTools } from "../tools/uiUxDiveAdvancedTools.js";
49
49
  import { skillUpdateTools } from "../tools/skillUpdateTools.js";
50
- import { prReportTools } from "../tools/prReportTools.js";
50
+ import { overstoryTools } from "../tools/overstoryTools.js";
51
51
  import { getQuickRef, hybridSearch, TOOL_REGISTRY, SEARCH_MODES, ALL_REGISTRY_ENTRIES, WORKFLOW_CHAINS, tokenize, buildDenseIndex, getToolComplexity } from "../tools/toolRegistry.js";
52
52
  // Assemble all tools like index.ts does
53
53
  const domainTools = [
@@ -89,7 +89,7 @@ const domainTools = [
89
89
  ...mcpBridgeTools,
90
90
  ...uiUxDiveAdvancedTools,
91
91
  ...skillUpdateTools,
92
- ...prReportTools,
92
+ ...overstoryTools,
93
93
  ];
94
94
  const metaTools = createMetaTools(domainTools);
95
95
  const allToolsWithoutDiscovery = [...domainTools, ...metaTools];
@@ -99,9 +99,9 @@ const allTools = [...allToolsWithoutDiscovery, ...discoveryTools];
99
99
  // STATIC LAYER — structure validation
100
100
  // ═══════════════════════════════════════════════════════════════════════════
101
101
  describe("Static: tool structure", () => {
102
- it("should have 206 domain + 6 meta/discovery = 212 tools total", () => {
103
- // 206 domain tools + 3 meta tools (findTools, getMethodology, check_mcp_setup) + 3 progressive discovery tools
104
- expect(allTools.length).toBe(212);
102
+ it("should have 175 tools total", () => {
103
+ // domain tools + 3 meta tools (findTools, getMethodology, check_mcp_setup) + 3 progressive discovery tools
104
+ expect(allTools.length).toBe(213);
105
105
  });
106
106
  it("every tool has name, description, inputSchema, handler", () => {
107
107
  for (const tool of allTools) {
@@ -325,7 +325,7 @@ describe("Static: new methodology topics", () => {
325
325
  expect(topics).toContain("agent_bootstrap");
326
326
  expect(topics).toContain("autonomous_maintenance");
327
327
  expect(topics).toContain("parallel_agent_teams");
328
- expect(topics.length).toBe(24); // All topics listed in overview
328
+ expect(topics.length).toBe(26); // All topics listed in overview (includes scenario_testing added v2.26.0)
329
329
  });
330
330
  });
331
331
  describe("Unit: setup_local_env", () => {
@@ -1633,153 +1633,6 @@ describe("Unit: get_workflow_chain", () => {
1633
1633
  expect(result.error).toBe(true);
1634
1634
  });
1635
1635
  });
1636
- // ═══════════════════════════════════════════════════════════════════════
1637
- // MULTI-HOP TRAVERSAL, RELATED TOOLS, PAGINATION & EXPANSION TESTS
1638
- // ═══════════════════════════════════════════════════════════════════════
1639
- describe("Static: relatedTools auto-derivation", () => {
1640
- it("every registry entry should have relatedTools populated", () => {
1641
- for (const entry of ALL_REGISTRY_ENTRIES) {
1642
- expect(entry.quickRef.relatedTools, `Missing relatedTools for ${entry.name}`).toBeDefined();
1643
- expect(entry.quickRef.relatedTools.length, `Empty relatedTools for ${entry.name}`).toBeGreaterThan(0);
1644
- }
1645
- });
1646
- it("relatedTools should not overlap with nextTools", () => {
1647
- for (const entry of ALL_REGISTRY_ENTRIES) {
1648
- const nextSet = new Set(entry.quickRef.nextTools);
1649
- for (const related of entry.quickRef.relatedTools ?? []) {
1650
- expect(nextSet.has(related), `${entry.name}: '${related}' appears in both nextTools and relatedTools`).toBe(false);
1651
- }
1652
- }
1653
- });
1654
- it("relatedTools should not contain self", () => {
1655
- for (const entry of ALL_REGISTRY_ENTRIES) {
1656
- expect(entry.quickRef.relatedTools).not.toContain(entry.name);
1657
- }
1658
- });
1659
- it("relatedTools should cap at 7 entries", () => {
1660
- for (const entry of ALL_REGISTRY_ENTRIES) {
1661
- expect(entry.quickRef.relatedTools.length, `${entry.name} has too many relatedTools`).toBeLessThanOrEqual(7);
1662
- }
1663
- });
1664
- it("relatedTools should reference valid registry tools", () => {
1665
- for (const entry of ALL_REGISTRY_ENTRIES) {
1666
- for (const related of entry.quickRef.relatedTools ?? []) {
1667
- expect(TOOL_REGISTRY.has(related), `${entry.name} relatedTools references unknown tool: ${related}`).toBe(true);
1668
- }
1669
- }
1670
- });
1671
- });
1672
- describe("Unit: discover_tools pagination", () => {
1673
- it("should return hasMore when more results available", async () => {
1674
- const tool = findTool("discover_tools");
1675
- const result = (await tool.handler({ query: "verify", limit: 3 }));
1676
- expect(result.hasMore).toBe(true);
1677
- expect(result.resultCount).toBe(3);
1678
- expect(result.offset).toBe(0);
1679
- expect(result.totalMatches).toBeGreaterThan(3);
1680
- });
1681
- it("should support offset parameter for paging", async () => {
1682
- const tool = findTool("discover_tools");
1683
- const page1 = (await tool.handler({ query: "verify", limit: 3, offset: 0 }));
1684
- const page2 = (await tool.handler({ query: "verify", limit: 3, offset: 3 }));
1685
- // Pages should have different results
1686
- const page1Names = page1.results.map((r) => r.name);
1687
- const page2Names = page2.results.map((r) => r.name);
1688
- const overlap = page1Names.filter((n) => page2Names.includes(n));
1689
- expect(overlap.length).toBe(0);
1690
- });
1691
- it("should return hasMore=false when exhausted", async () => {
1692
- const tool = findTool("discover_tools");
1693
- const result = (await tool.handler({ query: "very_unique_nonexistent_query_xyz_abc", limit: 10 }));
1694
- expect(result.hasMore).toBe(false);
1695
- });
1696
- });
1697
- describe("Unit: discover_tools expansion", () => {
1698
- it("should expand top results via relatedTools", async () => {
1699
- const tool = findTool("discover_tools");
1700
- const withExpand = (await tool.handler({ query: "verify", limit: 20, expand: 3 }));
1701
- const without = (await tool.handler({ query: "verify", limit: 20 }));
1702
- // Expansion should discover additional or equal tools
1703
- expect(withExpand.totalMatches).toBeGreaterThanOrEqual(without.totalMatches);
1704
- });
1705
- it("expanded results should have depth and expandedFrom", async () => {
1706
- const tool = findTool("discover_tools");
1707
- const result = (await tool.handler({ query: "verify", limit: 30, expand: 3, explain: true }));
1708
- const expanded = result.results.filter((r) => r.depth && r.depth > 0);
1709
- // Verify structure of any expanded results
1710
- for (const r of expanded) {
1711
- expect(r.expandedFrom).toBeDefined();
1712
- expect(r.expandedFrom.length).toBeGreaterThan(0);
1713
- }
1714
- });
1715
- });
1716
- describe("Unit: get_tool_quick_ref multi-hop", () => {
1717
- it("depth=1 should return direct neighbors only (hopDistance=1)", async () => {
1718
- const tool = findTool("get_tool_quick_ref");
1719
- const result = (await tool.handler({
1720
- toolName: "start_verification_cycle",
1721
- includeRelatedDetails: true,
1722
- depth: 1,
1723
- }));
1724
- expect(result.depth).toBe(1);
1725
- expect(result.relatedToolDetails).toBeDefined();
1726
- for (const details of Object.values(result.relatedToolDetails)) {
1727
- expect(details.hopDistance).toBe(1);
1728
- }
1729
- });
1730
- it("depth=2 should discover tools 2 hops away", async () => {
1731
- const tool = findTool("get_tool_quick_ref");
1732
- const depth1Result = (await tool.handler({
1733
- toolName: "start_verification_cycle",
1734
- includeRelatedDetails: true,
1735
- depth: 1,
1736
- }));
1737
- const depth2Result = (await tool.handler({
1738
- toolName: "start_verification_cycle",
1739
- includeRelatedDetails: true,
1740
- depth: 2,
1741
- }));
1742
- // Depth 2 should discover more tools
1743
- expect(depth2Result.totalDiscovered).toBeGreaterThan(depth1Result.totalDiscovered);
1744
- // Should have both hop distances
1745
- const distances = Object.values(depth2Result.relatedToolDetails).map((d) => d.hopDistance);
1746
- expect(distances).toContain(1);
1747
- expect(distances).toContain(2);
1748
- });
1749
- it("depth > 3 should be capped at 3", async () => {
1750
- const tool = findTool("get_tool_quick_ref");
1751
- const result = (await tool.handler({
1752
- toolName: "start_verification_cycle",
1753
- includeRelatedDetails: true,
1754
- depth: 10,
1755
- }));
1756
- expect(result.depth).toBe(3);
1757
- const maxHop = Math.max(...Object.values(result.relatedToolDetails).map((d) => d.hopDistance));
1758
- expect(maxHop).toBeLessThanOrEqual(3);
1759
- });
1760
- it("default depth should be 1 (backward compatible)", async () => {
1761
- const tool = findTool("get_tool_quick_ref");
1762
- const result = (await tool.handler({
1763
- toolName: "run_mandatory_flywheel",
1764
- includeRelatedDetails: true,
1765
- }));
1766
- expect(result.depth).toBe(1);
1767
- for (const details of Object.values(result.relatedToolDetails)) {
1768
- expect(details.hopDistance).toBe(1);
1769
- }
1770
- });
1771
- it("each discovered tool should have reachedVia field", async () => {
1772
- const tool = findTool("get_tool_quick_ref");
1773
- const result = (await tool.handler({
1774
- toolName: "start_verification_cycle",
1775
- depth: 2,
1776
- }));
1777
- for (const [, details] of Object.entries(result.relatedToolDetails)) {
1778
- expect(details.reachedVia).toBeDefined();
1779
- expect(typeof details.reachedVia).toBe("string");
1780
- }
1781
- });
1782
- });
1783
1636
  describe("Static: boilerplate tools", () => {
1784
1637
  it("should include scaffold_nodebench_project and get_boilerplate_status", () => {
1785
1638
  const names = allTools.map((t) => t.name);
@@ -2968,11 +2821,10 @@ describe("Agent-as-a-Graph: execution trace edges", () => {
2968
2821
  limit: 15,
2969
2822
  explain: true,
2970
2823
  });
2971
- expect(baseline.length).toBeGreaterThanOrEqual(10);
2824
+ expect(baseline.length).toBeGreaterThanOrEqual(6);
2972
2825
  const topTool = baseline[0].name;
2973
- // Use position 8 (well outside top 5) to avoid tie-boundary issues when corpus changes
2974
- const boostTarget = baseline[7].name;
2975
- const baselineScore = baseline[7].score;
2826
+ const boostTarget = baseline[5].name; // position 6 NOT in top 5
2827
+ const baselineScore = baseline[5].score;
2976
2828
  // Step 2: Inject trace edge from top tool → boost target
2977
2829
  _resetCooccurrenceCache();
2978
2830
  const edges = new Map();