nodebench-mcp 2.22.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/NODEBENCH_AGENTS.md +5 -4
  2. package/README.md +495 -280
  3. package/dist/__tests__/architectComplex.test.js +3 -5
  4. package/dist/__tests__/architectComplex.test.js.map +1 -1
  5. package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
  6. package/dist/__tests__/batchAutopilot.test.js +218 -0
  7. package/dist/__tests__/batchAutopilot.test.js.map +1 -0
  8. package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
  9. package/dist/__tests__/cliSubcommands.test.js +138 -0
  10. package/dist/__tests__/cliSubcommands.test.js.map +1 -0
  11. package/dist/__tests__/evalHarness.test.js +1 -1
  12. package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
  13. package/dist/__tests__/forecastingDogfood.test.js +284 -0
  14. package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
  15. package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
  16. package/dist/__tests__/forecastingScoring.test.js +202 -0
  17. package/dist/__tests__/forecastingScoring.test.js.map +1 -0
  18. package/dist/__tests__/localDashboard.test.d.ts +1 -0
  19. package/dist/__tests__/localDashboard.test.js +226 -0
  20. package/dist/__tests__/localDashboard.test.js.map +1 -0
  21. package/dist/__tests__/multiHopDogfood.test.d.ts +12 -0
  22. package/dist/__tests__/multiHopDogfood.test.js +303 -0
  23. package/dist/__tests__/multiHopDogfood.test.js.map +1 -0
  24. package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
  25. package/dist/__tests__/openclawDogfood.test.js +535 -0
  26. package/dist/__tests__/openclawDogfood.test.js.map +1 -0
  27. package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
  28. package/dist/__tests__/openclawMessaging.test.js +232 -0
  29. package/dist/__tests__/openclawMessaging.test.js.map +1 -0
  30. package/dist/__tests__/tools.test.js +7 -3
  31. package/dist/__tests__/tools.test.js.map +1 -1
  32. package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
  33. package/dist/__tests__/traceabilityDogfood.test.js +241 -0
  34. package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
  35. package/dist/__tests__/webmcpTools.test.d.ts +7 -0
  36. package/dist/__tests__/webmcpTools.test.js +195 -0
  37. package/dist/__tests__/webmcpTools.test.js.map +1 -0
  38. package/dist/dashboard/briefHtml.d.ts +20 -0
  39. package/dist/dashboard/briefHtml.js +1000 -0
  40. package/dist/dashboard/briefHtml.js.map +1 -0
  41. package/dist/dashboard/briefServer.d.ts +18 -0
  42. package/dist/dashboard/briefServer.js +320 -0
  43. package/dist/dashboard/briefServer.js.map +1 -0
  44. package/dist/dashboard/html.d.ts +18 -0
  45. package/dist/dashboard/html.js +1491 -0
  46. package/dist/dashboard/html.js.map +1 -0
  47. package/dist/dashboard/server.d.ts +17 -0
  48. package/dist/dashboard/server.js +403 -0
  49. package/dist/dashboard/server.js.map +1 -0
  50. package/dist/db.js +38 -0
  51. package/dist/db.js.map +1 -1
  52. package/dist/index.js +211 -5
  53. package/dist/index.js.map +1 -1
  54. package/dist/tools/critterTools.js +4 -0
  55. package/dist/tools/critterTools.js.map +1 -1
  56. package/dist/tools/forecastingTools.d.ts +11 -0
  57. package/dist/tools/forecastingTools.js +616 -0
  58. package/dist/tools/forecastingTools.js.map +1 -0
  59. package/dist/tools/localDashboardTools.d.ts +8 -0
  60. package/dist/tools/localDashboardTools.js +332 -0
  61. package/dist/tools/localDashboardTools.js.map +1 -0
  62. package/dist/tools/metaTools.js +170 -1
  63. package/dist/tools/metaTools.js.map +1 -1
  64. package/dist/tools/openclawTools.d.ts +11 -0
  65. package/dist/tools/openclawTools.js +1017 -0
  66. package/dist/tools/openclawTools.js.map +1 -0
  67. package/dist/tools/overstoryTools.d.ts +14 -0
  68. package/dist/tools/overstoryTools.js +426 -0
  69. package/dist/tools/overstoryTools.js.map +1 -0
  70. package/dist/tools/prReportTools.d.ts +11 -0
  71. package/dist/tools/prReportTools.js +911 -0
  72. package/dist/tools/prReportTools.js.map +1 -0
  73. package/dist/tools/progressiveDiscoveryTools.js +28 -9
  74. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  75. package/dist/tools/selfEvalTools.js +8 -1
  76. package/dist/tools/selfEvalTools.js.map +1 -1
  77. package/dist/tools/sessionMemoryTools.js +14 -2
  78. package/dist/tools/sessionMemoryTools.js.map +1 -1
  79. package/dist/tools/skillUpdateTools.d.ts +24 -0
  80. package/dist/tools/skillUpdateTools.js +469 -0
  81. package/dist/tools/skillUpdateTools.js.map +1 -0
  82. package/dist/tools/toolRegistry.js +178 -0
  83. package/dist/tools/toolRegistry.js.map +1 -1
  84. package/dist/tools/uiUxDiveAdvancedTools.js +61 -0
  85. package/dist/tools/uiUxDiveAdvancedTools.js.map +1 -1
  86. package/dist/tools/uiUxDiveTools.js +154 -1
  87. package/dist/tools/uiUxDiveTools.js.map +1 -1
  88. package/dist/tools/visualQaTools.d.ts +2 -0
  89. package/dist/tools/visualQaTools.js +1088 -0
  90. package/dist/tools/visualQaTools.js.map +1 -0
  91. package/dist/tools/webmcpTools.d.ts +16 -0
  92. package/dist/tools/webmcpTools.js +703 -0
  93. package/dist/tools/webmcpTools.js.map +1 -0
  94. package/dist/toolsetRegistry.js +4 -0
  95. package/dist/toolsetRegistry.js.map +1 -1
  96. package/package.json +1 -1
@@ -2070,6 +2070,18 @@ const REGISTRY_ENTRIES = [
2070
2070
  },
2071
2071
  phase: "research",
2072
2072
  },
2073
+ {
2074
+ name: "ingest_dive_screenshots",
2075
+ category: "ui_ux_dive",
2076
+ tags: ["ui", "screenshot", "ingest", "import", "bulk", "disk", "gallery", "dive", "png", "jpg"],
2077
+ quickRef: {
2078
+ nextAction: "Screenshots ingested into dive session. View them in the dashboard or use dive_screenshot to capture new ones.",
2079
+ nextTools: ["dive_screenshot", "tag_ui_bug", "end_component_flow", "get_dive_tree"],
2080
+ methodology: "agentic_vision",
2081
+ tip: "Scans a directory for PNG/JPG files and bulk-imports them into a dive session's screenshot gallery. Use after external Playwright captures.",
2082
+ },
2083
+ phase: "test",
2084
+ },
2073
2085
  // ═══════════════════════════════════════════
2074
2086
  // UI/UX DIVE V2 — Deep interaction testing,
2075
2087
  // screenshots, design audit, backend links,
@@ -2235,6 +2247,70 @@ const REGISTRY_ENTRIES = [
2235
2247
  },
2236
2248
  phase: "ship",
2237
2249
  },
2250
+ {
2251
+ name: "open_dive_dashboard",
2252
+ category: "ui_ux_dive_v2",
2253
+ tags: ["ui", "dashboard", "dive", "flywheel", "browser", "local", "report", "overview", "session", "open", "visualization"],
2254
+ quickRef: {
2255
+ nextAction: "Dashboard is open. Continue the dive — the dashboard auto-refreshes every 5s to show live progress.",
2256
+ nextTools: ["start_ui_dive", "dive_auto_discover", "dive_code_locate", "dive_fix_verify"],
2257
+ methodology: "agentic_vision",
2258
+ tip: "Opens a local web dashboard (port 6274) showing the full flywheel cycle: routes, components, bugs, fixes, tests, reviews. Like Serena MCP's local page but for UI dives.",
2259
+ },
2260
+ phase: "utility",
2261
+ },
2262
+ // ═══════════════════════════════════════════
2263
+ // SKILL SELF-UPDATE PROTOCOL — Track rule
2264
+ // file provenance, staleness, and resync
2265
+ // ═══════════════════════════════════════════
2266
+ {
2267
+ name: "register_skill",
2268
+ category: "skill_update",
2269
+ tags: ["skill", "rule", "register", "source", "hash", "frontmatter", "provenance", "memory", "agents-md", "cursor", "windsurf", "update"],
2270
+ quickRef: {
2271
+ nextAction: "Skill registered. Use check_skill_freshness periodically to detect when source files change.",
2272
+ nextTools: ["check_skill_freshness", "list_skills"],
2273
+ methodology: "self_reinforced_learning",
2274
+ tip: "Register every .md rule file (e.g. .windsurf/rules/, AGENTS.md) with its source files, triggers, and update instructions. Enables automatic staleness detection.",
2275
+ },
2276
+ phase: "verify",
2277
+ },
2278
+ {
2279
+ name: "check_skill_freshness",
2280
+ category: "skill_update",
2281
+ tags: ["skill", "freshness", "stale", "hash", "check", "drift", "source", "detect", "sync", "update", "rule"],
2282
+ quickRef: {
2283
+ nextAction: "If stale skills found, follow their update_instructions then call sync_skill to record the resync.",
2284
+ nextTools: ["sync_skill", "list_skills", "register_skill"],
2285
+ methodology: "self_reinforced_learning",
2286
+ tip: "Run at session start or after big code changes. Compares SHA-256 hashes of source files to detect drift. Auto-updates skill status in DB.",
2287
+ },
2288
+ phase: "verify",
2289
+ },
2290
+ {
2291
+ name: "sync_skill",
2292
+ category: "skill_update",
2293
+ tags: ["skill", "sync", "resync", "update", "hash", "refresh", "frontmatter", "rule", "source", "stale"],
2294
+ quickRef: {
2295
+ nextAction: "Skill synced. Verify the updated skill file is correct, then continue with your task.",
2296
+ nextTools: ["check_skill_freshness", "list_skills"],
2297
+ methodology: "self_reinforced_learning",
2298
+ tip: "Call AFTER you have read the changed source files and updated the skill .md content. This tool records the sync and updates the hash.",
2299
+ },
2300
+ phase: "verify",
2301
+ },
2302
+ {
2303
+ name: "list_skills",
2304
+ category: "skill_update",
2305
+ tags: ["skill", "list", "status", "overview", "rule", "memory", "history", "sync", "fresh", "stale"],
2306
+ quickRef: {
2307
+ nextAction: "Review skill statuses. Register any untracked rule files, check freshness for stale ones.",
2308
+ nextTools: ["register_skill", "check_skill_freshness", "sync_skill"],
2309
+ methodology: "self_reinforced_learning",
2310
+ tip: "Use includeHistory:true to see the full sync timeline for each skill. Filter by status:'stale' to focus on what needs updating.",
2311
+ },
2312
+ phase: "utility",
2313
+ },
2238
2314
  // ═══════════════════════════════════════════
2239
2315
  // MCP BRIDGE — Connect external MCP servers
2240
2316
  // ═══════════════════════════════════════════
@@ -2333,6 +2409,57 @@ const REGISTRY_ENTRIES = [
2333
2409
  },
2334
2410
  phase: "implement",
2335
2411
  },
2412
+ // ═══════════════════════════════════════════
2413
+ // QA ORCHESTRATION — Overstory multi-agent QA
2414
+ // ═══════════════════════════════════════════
2415
+ {
2416
+ name: "overstory_fleet_status",
2417
+ category: "qa_orchestration",
2418
+ tags: ["overstory", "agent", "fleet", "status", "health", "multi-agent", "orchestration", "qa", "dogfood", "worktree"],
2419
+ quickRef: {
2420
+ nextAction: "Review agent states. If agents are idle, run dogfood:overstory to start a QA session.",
2421
+ nextTools: ["overstory_qa_summary", "overstory_mail_log", "run_visual_qa_suite"],
2422
+ methodology: "ai_flywheel",
2423
+ tip: "Reads .overstory/agent-manifest.json and overstory.db. Shows configured agents, capabilities, gate policy, and live agent health.",
2424
+ },
2425
+ phase: "utility",
2426
+ },
2427
+ {
2428
+ name: "overstory_qa_summary",
2429
+ category: "qa_orchestration",
2430
+ tags: ["overstory", "qa", "gate", "summary", "stability", "grade", "ssim", "triage", "p0", "p1", "dogfood"],
2431
+ quickRef: {
2432
+ nextAction: "If gate fails, check failing routes and fix p0/p1 issues. If gate passes, proceed to merge.",
2433
+ nextTools: ["overstory_mail_log", "overstory_fleet_status", "run_visual_qa_suite", "burst_capture"],
2434
+ methodology: "ai_flywheel",
2435
+ tip: "Aggregates SSIM stability grades from visual_qa_runs and Gemini QA triage from Overstory mail. Returns gate pass/fail verdict.",
2436
+ },
2437
+ phase: "verify",
2438
+ },
2439
+ {
2440
+ name: "overstory_mail_log",
2441
+ category: "qa_orchestration",
2442
+ tags: ["overstory", "mail", "log", "message", "route", "triage", "dispatch", "agent", "coordination"],
2443
+ quickRef: {
2444
+ nextAction: "Review messages to understand QA session state. Filter by type or agent for focused view.",
2445
+ nextTools: ["overstory_qa_summary", "overstory_fleet_status", "overstory_merge_queue"],
2446
+ methodology: "ai_flywheel",
2447
+ tip: "Supports type_filter (result/dispatch/worker_done/escalation) and agent_filter. Shows structured mail payloads from the QA agent fleet.",
2448
+ },
2449
+ phase: "utility",
2450
+ },
2451
+ {
2452
+ name: "overstory_merge_queue",
2453
+ category: "qa_orchestration",
2454
+ tags: ["overstory", "merge", "queue", "branch", "conflict", "gate", "builder", "qa", "resolution"],
2455
+ quickRef: {
2456
+ nextAction: "If branches are blocked, check QA gate failures. If pending, trigger merge with overstory merge --all.",
2457
+ nextTools: ["overstory_qa_summary", "overstory_mail_log", "overstory_fleet_status"],
2458
+ methodology: "ai_flywheel",
2459
+ tip: "Shows FIFO merge queue with conflict resolution tiers. Use include_completed:true to see merge history.",
2460
+ },
2461
+ phase: "utility",
2462
+ },
2336
2463
  ];
2337
2464
  // ── Exported lookup structures ───────────────────────────────────────────
2338
2465
  /** Map of tool name → registry entry for O(1) lookup */
@@ -2390,6 +2517,7 @@ const CATEGORY_COMPLEXITY = {
2390
2517
  email: "medium",
2391
2518
  rss: "low",
2392
2519
  architect: "low",
2520
+ qa_orchestration: "low",
2393
2521
  };
2394
2522
  /** Per-tool complexity overrides (when category default is wrong) */
2395
2523
  const TOOL_COMPLEXITY_OVERRIDES = {
@@ -3190,6 +3318,7 @@ export const WORKFLOW_CHAINS = {
3190
3318
  { tool: "run_mandatory_flywheel", action: "6-step final verification" },
3191
3319
  { tool: "record_learning", action: "Capture what you learned" },
3192
3320
  { tool: "promote_to_eval", action: "Feed into eval batch" },
3321
+ { tool: "save_session_note", action: "Save traceability note — cite original request, summarize what was delivered" },
3193
3322
  ],
3194
3323
  },
3195
3324
  fix_bug: {
@@ -3202,6 +3331,7 @@ export const WORKFLOW_CHAINS = {
3202
3331
  { tool: "log_test_result", action: "Record regression test" },
3203
3332
  { tool: "run_mandatory_flywheel", action: "6-step verification" },
3204
3333
  { tool: "record_learning", action: "Record the gotcha/pattern" },
3334
+ { tool: "save_session_note", action: "Save traceability note — cite original request, record root cause and fix" },
3205
3335
  ],
3206
3336
  },
3207
3337
  ui_change: {
@@ -3215,6 +3345,7 @@ export const WORKFLOW_CHAINS = {
3215
3345
  { tool: "run_quality_gate", action: "Run ui_ux_qa gate" },
3216
3346
  { tool: "run_mandatory_flywheel", action: "Final verification" },
3217
3347
  { tool: "record_learning", action: "Record UI patterns" },
3348
+ { tool: "save_session_note", action: "Save traceability note — cite original request, record visual evidence path" },
3218
3349
  ],
3219
3350
  },
3220
3351
  parallel_project: {
@@ -3532,5 +3663,52 @@ export const WORKFLOW_CHAINS = {
3532
3663
  { tool: "save_session_note", action: "Log sent emails so you have an audit trail that survives compaction" },
3533
3664
  ],
3534
3665
  },
3666
+ webmcp_discovery: {
3667
+ name: "WebMCP Origin Discovery",
3668
+ description: "Connect to a WebMCP-enabled origin, discover its tools, and invoke them from the agent",
3669
+ steps: [
3670
+ { tool: "connect_webmcp_origin", action: "Connect to the target origin URL and establish a WebMCP session" },
3671
+ { tool: "list_webmcp_tools", action: "List all tools exposed by the origin with schemas and annotations" },
3672
+ { tool: "call_webmcp_tool", action: "Invoke a specific tool on the remote origin with arguments" },
3673
+ { tool: "disconnect_webmcp_origin", action: "Clean up the WebMCP session when done" },
3674
+ ],
3675
+ },
3676
+ batch_autopilot: {
3677
+ name: "Batch Autopilot Run",
3678
+ description: "Set up an operator profile and run a batch autopilot session for autonomous agent tasks",
3679
+ steps: [
3680
+ { tool: "setup_operator_profile", action: "Create or update USER.md and operator profile for autopilot context" },
3681
+ { tool: "get_autopilot_status", action: "Check current autopilot readiness, profile completeness, and last run status" },
3682
+ { tool: "trigger_batch_run", action: "Start a batch autopilot run using the operator profile as context" },
3683
+ { tool: "get_batch_run_history", action: "Review history of past batch runs, outcomes, and timing" },
3684
+ { tool: "sync_operator_profile", action: "Sync operator profile state from disk after manual edits" },
3685
+ ],
3686
+ },
3687
+ daily_review: {
3688
+ name: "Daily Brief Review",
3689
+ description: "Pull the latest daily brief, review narrative threads, check ops dashboard, and sync to local storage",
3690
+ steps: [
3691
+ { tool: "sync_daily_brief", action: "Pull today's brief and narrative from Convex into local SQLite" },
3692
+ { tool: "get_daily_brief_summary", action: "Get the full brief summary with key signals and insights" },
3693
+ { tool: "get_narrative_status", action: "Check narrative thread status — dominant story, under-reported angle, evidence scores" },
3694
+ { tool: "get_ops_dashboard", action: "Review pipeline health: posting status, tool usage, active workflows" },
3695
+ { tool: "open_local_dashboard", action: "Open the local HTML dashboard in the browser for visual review" },
3696
+ ],
3697
+ },
3698
+ gemini_qa: {
3699
+ name: "Gemini Vision QA Loop",
3700
+ description: "Automated UI/UX quality gate — capture screenshots (dark/light × desktop/mobile), send to Gemini Flash for Jony Ive product design review, fix issues, loop until 100/100",
3701
+ steps: [
3702
+ { tool: "check_mcp_setup", action: "Verify Gemini API key (GOOGLE_AI_KEY) and vision domain are ready" },
3703
+ { tool: "start_verification_cycle", action: "Open a verification cycle titled 'Gemini QA Loop' to track progress" },
3704
+ { tool: "save_session_note", action: "Shell: `npx vite build` then `npx playwright test tests/e2e/full-ui-dogfood.spec.ts --project=chromium --workers=1` — capture 4-variant screenshots" },
3705
+ { tool: "save_session_note", action: "Shell: `npm run dogfood:publish` — copy screenshots to public/dogfood/ with variant metadata manifest" },
3706
+ { tool: "save_session_note", action: "Shell: `npx vite build && node scripts/ui/runDogfoodGeminiQa.mjs` — rebuild, launch preview, trigger Gemini QA" },
3707
+ { tool: "log_test_result", action: "Log QA score from public/dogfood/qa-results.json — formula: 100 - P1×6 - P2×2 - P3×1" },
3708
+ { tool: "save_session_note", action: "Fix P1 issues (6pts each) then P2 (2pts) then P3 (1pt) — root-cause each before fixing" },
3709
+ { tool: "get_overstory_qa_gate", action: "Check QA gate for per-route stability grades and issue counts" },
3710
+ { tool: "record_learning", action: "Record QA trajectory and Gemini finding patterns for regression tracking" },
3711
+ ],
3712
+ },
3535
3713
  };
3536
3714
  //# sourceMappingURL=toolRegistry.js.map