npm - nodebench-mcp - Versions diffs - 2.22.0 → 2.26.0 - Mend

nodebench-mcp 2.22.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/NODEBENCH_AGENTS.md +5 -4
package/README.md +495 -280
package/dist/__tests__/architectComplex.test.js +3 -5
package/dist/__tests__/architectComplex.test.js.map +1 -1
package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
package/dist/__tests__/batchAutopilot.test.js +218 -0
package/dist/__tests__/batchAutopilot.test.js.map +1 -0
package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
package/dist/__tests__/cliSubcommands.test.js +138 -0
package/dist/__tests__/cliSubcommands.test.js.map +1 -0
package/dist/__tests__/evalHarness.test.js +1 -1
package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
package/dist/__tests__/forecastingDogfood.test.js +284 -0
package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
package/dist/__tests__/forecastingScoring.test.js +202 -0
package/dist/__tests__/forecastingScoring.test.js.map +1 -0
package/dist/__tests__/localDashboard.test.d.ts +1 -0
package/dist/__tests__/localDashboard.test.js +226 -0
package/dist/__tests__/localDashboard.test.js.map +1 -0
package/dist/__tests__/multiHopDogfood.test.d.ts +12 -0
package/dist/__tests__/multiHopDogfood.test.js +303 -0
package/dist/__tests__/multiHopDogfood.test.js.map +1 -0
package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
package/dist/__tests__/openclawDogfood.test.js +535 -0
package/dist/__tests__/openclawDogfood.test.js.map +1 -0
package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
package/dist/__tests__/openclawMessaging.test.js +232 -0
package/dist/__tests__/openclawMessaging.test.js.map +1 -0
package/dist/__tests__/tools.test.js +7 -3
package/dist/__tests__/tools.test.js.map +1 -1
package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
package/dist/__tests__/traceabilityDogfood.test.js +241 -0
package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
package/dist/__tests__/webmcpTools.test.d.ts +7 -0
package/dist/__tests__/webmcpTools.test.js +195 -0
package/dist/__tests__/webmcpTools.test.js.map +1 -0
package/dist/dashboard/briefHtml.d.ts +20 -0
package/dist/dashboard/briefHtml.js +1000 -0
package/dist/dashboard/briefHtml.js.map +1 -0
package/dist/dashboard/briefServer.d.ts +18 -0
package/dist/dashboard/briefServer.js +320 -0
package/dist/dashboard/briefServer.js.map +1 -0
package/dist/dashboard/html.d.ts +18 -0
package/dist/dashboard/html.js +1491 -0
package/dist/dashboard/html.js.map +1 -0
package/dist/dashboard/server.d.ts +17 -0
package/dist/dashboard/server.js +403 -0
package/dist/dashboard/server.js.map +1 -0
package/dist/db.js +38 -0
package/dist/db.js.map +1 -1
package/dist/index.js +211 -5
package/dist/index.js.map +1 -1
package/dist/tools/critterTools.js +4 -0
package/dist/tools/critterTools.js.map +1 -1
package/dist/tools/forecastingTools.d.ts +11 -0
package/dist/tools/forecastingTools.js +616 -0
package/dist/tools/forecastingTools.js.map +1 -0
package/dist/tools/localDashboardTools.d.ts +8 -0
package/dist/tools/localDashboardTools.js +332 -0
package/dist/tools/localDashboardTools.js.map +1 -0
package/dist/tools/metaTools.js +170 -1
package/dist/tools/metaTools.js.map +1 -1
package/dist/tools/openclawTools.d.ts +11 -0
package/dist/tools/openclawTools.js +1017 -0
package/dist/tools/openclawTools.js.map +1 -0
package/dist/tools/overstoryTools.d.ts +14 -0
package/dist/tools/overstoryTools.js +426 -0
package/dist/tools/overstoryTools.js.map +1 -0
package/dist/tools/prReportTools.d.ts +11 -0
package/dist/tools/prReportTools.js +911 -0
package/dist/tools/prReportTools.js.map +1 -0
package/dist/tools/progressiveDiscoveryTools.js +28 -9
package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
package/dist/tools/selfEvalTools.js +8 -1
package/dist/tools/selfEvalTools.js.map +1 -1
package/dist/tools/sessionMemoryTools.js +14 -2
package/dist/tools/sessionMemoryTools.js.map +1 -1
package/dist/tools/skillUpdateTools.d.ts +24 -0
package/dist/tools/skillUpdateTools.js +469 -0
package/dist/tools/skillUpdateTools.js.map +1 -0
package/dist/tools/toolRegistry.js +178 -0
package/dist/tools/toolRegistry.js.map +1 -1
package/dist/tools/uiUxDiveAdvancedTools.js +61 -0
package/dist/tools/uiUxDiveAdvancedTools.js.map +1 -1
package/dist/tools/uiUxDiveTools.js +154 -1
package/dist/tools/uiUxDiveTools.js.map +1 -1
package/dist/tools/visualQaTools.d.ts +2 -0
package/dist/tools/visualQaTools.js +1088 -0
package/dist/tools/visualQaTools.js.map +1 -0
package/dist/tools/webmcpTools.d.ts +16 -0
package/dist/tools/webmcpTools.js +703 -0
package/dist/tools/webmcpTools.js.map +1 -0
package/dist/toolsetRegistry.js +4 -0
package/dist/toolsetRegistry.js.map +1 -1
package/package.json +1 -1

package/dist/tools/toolRegistry.js CHANGED Viewed

@@ -2070,6 +2070,18 @@ const REGISTRY_ENTRIES = [
         },
         phase: "research",
     },
+    {
+        name: "ingest_dive_screenshots",
+        category: "ui_ux_dive",
+        tags: ["ui", "screenshot", "ingest", "import", "bulk", "disk", "gallery", "dive", "png", "jpg"],
+        quickRef: {
+            nextAction: "Screenshots ingested into dive session. View them in the dashboard or use dive_screenshot to capture new ones.",
+            nextTools: ["dive_screenshot", "tag_ui_bug", "end_component_flow", "get_dive_tree"],
+            methodology: "agentic_vision",
+            tip: "Scans a directory for PNG/JPG files and bulk-imports them into a dive session's screenshot gallery. Use after external Playwright captures.",
+        },
+        phase: "test",
+    },
     // ═══════════════════════════════════════════
     // UI/UX DIVE V2 — Deep interaction testing,
     // screenshots, design audit, backend links,
@@ -2235,6 +2247,70 @@ const REGISTRY_ENTRIES = [
         },
         phase: "ship",
     },
+    {
+        name: "open_dive_dashboard",
+        category: "ui_ux_dive_v2",
+        tags: ["ui", "dashboard", "dive", "flywheel", "browser", "local", "report", "overview", "session", "open", "visualization"],
+        quickRef: {
+            nextAction: "Dashboard is open. Continue the dive — the dashboard auto-refreshes every 5s to show live progress.",
+            nextTools: ["start_ui_dive", "dive_auto_discover", "dive_code_locate", "dive_fix_verify"],
+            methodology: "agentic_vision",
+            tip: "Opens a local web dashboard (port 6274) showing the full flywheel cycle: routes, components, bugs, fixes, tests, reviews. Like Serena MCP's local page but for UI dives.",
+        },
+        phase: "utility",
+    },
+    // ═══════════════════════════════════════════
+    // SKILL SELF-UPDATE PROTOCOL — Track rule
+    // file provenance, staleness, and resync
+    // ═══════════════════════════════════════════
+    {
+        name: "register_skill",
+        category: "skill_update",
+        tags: ["skill", "rule", "register", "source", "hash", "frontmatter", "provenance", "memory", "agents-md", "cursor", "windsurf", "update"],
+        quickRef: {
+            nextAction: "Skill registered. Use check_skill_freshness periodically to detect when source files change.",
+            nextTools: ["check_skill_freshness", "list_skills"],
+            methodology: "self_reinforced_learning",
+            tip: "Register every .md rule file (e.g. .windsurf/rules/, AGENTS.md) with its source files, triggers, and update instructions. Enables automatic staleness detection.",
+        },
+        phase: "verify",
+    },
+    {
+        name: "check_skill_freshness",
+        category: "skill_update",
+        tags: ["skill", "freshness", "stale", "hash", "check", "drift", "source", "detect", "sync", "update", "rule"],
+        quickRef: {
+            nextAction: "If stale skills found, follow their update_instructions then call sync_skill to record the resync.",
+            nextTools: ["sync_skill", "list_skills", "register_skill"],
+            methodology: "self_reinforced_learning",
+            tip: "Run at session start or after big code changes. Compares SHA-256 hashes of source files to detect drift. Auto-updates skill status in DB.",
+        },
+        phase: "verify",
+    },
+    {
+        name: "sync_skill",
+        category: "skill_update",
+        tags: ["skill", "sync", "resync", "update", "hash", "refresh", "frontmatter", "rule", "source", "stale"],
+        quickRef: {
+            nextAction: "Skill synced. Verify the updated skill file is correct, then continue with your task.",
+            nextTools: ["check_skill_freshness", "list_skills"],
+            methodology: "self_reinforced_learning",
+            tip: "Call AFTER you have read the changed source files and updated the skill .md content. This tool records the sync and updates the hash.",
+        },
+        phase: "verify",
+    },
+    {
+        name: "list_skills",
+        category: "skill_update",
+        tags: ["skill", "list", "status", "overview", "rule", "memory", "history", "sync", "fresh", "stale"],
+        quickRef: {
+            nextAction: "Review skill statuses. Register any untracked rule files, check freshness for stale ones.",
+            nextTools: ["register_skill", "check_skill_freshness", "sync_skill"],
+            methodology: "self_reinforced_learning",
+            tip: "Use includeHistory:true to see the full sync timeline for each skill. Filter by status:'stale' to focus on what needs updating.",
+        },
+        phase: "utility",
+    },
     // ═══════════════════════════════════════════
     // MCP BRIDGE — Connect external MCP servers
     // ═══════════════════════════════════════════
@@ -2333,6 +2409,57 @@ const REGISTRY_ENTRIES = [
         },
         phase: "implement",
     },
+    // ═══════════════════════════════════════════
+    // QA ORCHESTRATION — Overstory multi-agent QA
+    // ═══════════════════════════════════════════
+    {
+        name: "overstory_fleet_status",
+        category: "qa_orchestration",
+        tags: ["overstory", "agent", "fleet", "status", "health", "multi-agent", "orchestration", "qa", "dogfood", "worktree"],
+        quickRef: {
+            nextAction: "Review agent states. If agents are idle, run dogfood:overstory to start a QA session.",
+            nextTools: ["overstory_qa_summary", "overstory_mail_log", "run_visual_qa_suite"],
+            methodology: "ai_flywheel",
+            tip: "Reads .overstory/agent-manifest.json and overstory.db. Shows configured agents, capabilities, gate policy, and live agent health.",
+        },
+        phase: "utility",
+    },
+    {
+        name: "overstory_qa_summary",
+        category: "qa_orchestration",
+        tags: ["overstory", "qa", "gate", "summary", "stability", "grade", "ssim", "triage", "p0", "p1", "dogfood"],
+        quickRef: {
+            nextAction: "If gate fails, check failing routes and fix p0/p1 issues. If gate passes, proceed to merge.",
+            nextTools: ["overstory_mail_log", "overstory_fleet_status", "run_visual_qa_suite", "burst_capture"],
+            methodology: "ai_flywheel",
+            tip: "Aggregates SSIM stability grades from visual_qa_runs and Gemini QA triage from Overstory mail. Returns gate pass/fail verdict.",
+        },
+        phase: "verify",
+    },
+    {
+        name: "overstory_mail_log",
+        category: "qa_orchestration",
+        tags: ["overstory", "mail", "log", "message", "route", "triage", "dispatch", "agent", "coordination"],
+        quickRef: {
+            nextAction: "Review messages to understand QA session state. Filter by type or agent for focused view.",
+            nextTools: ["overstory_qa_summary", "overstory_fleet_status", "overstory_merge_queue"],
+            methodology: "ai_flywheel",
+            tip: "Supports type_filter (result/dispatch/worker_done/escalation) and agent_filter. Shows structured mail payloads from the QA agent fleet.",
+        },
+        phase: "utility",
+    },
+    {
+        name: "overstory_merge_queue",
+        category: "qa_orchestration",
+        tags: ["overstory", "merge", "queue", "branch", "conflict", "gate", "builder", "qa", "resolution"],
+        quickRef: {
+            nextAction: "If branches are blocked, check QA gate failures. If pending, trigger merge with overstory merge --all.",
+            nextTools: ["overstory_qa_summary", "overstory_mail_log", "overstory_fleet_status"],
+            methodology: "ai_flywheel",
+            tip: "Shows FIFO merge queue with conflict resolution tiers. Use include_completed:true to see merge history.",
+        },
+        phase: "utility",
+    },
 ];
 // ── Exported lookup structures ───────────────────────────────────────────
 /** Map of tool name → registry entry for O(1) lookup */
@@ -2390,6 +2517,7 @@ const CATEGORY_COMPLEXITY = {
     email: "medium",
     rss: "low",
     architect: "low",
+    qa_orchestration: "low",
 };
 /** Per-tool complexity overrides (when category default is wrong) */
 const TOOL_COMPLEXITY_OVERRIDES = {
@@ -3190,6 +3318,7 @@ export const WORKFLOW_CHAINS = {
             { tool: "run_mandatory_flywheel", action: "6-step final verification" },
             { tool: "record_learning", action: "Capture what you learned" },
             { tool: "promote_to_eval", action: "Feed into eval batch" },
+            { tool: "save_session_note", action: "Save traceability note — cite original request, summarize what was delivered" },
         ],
     },
     fix_bug: {
@@ -3202,6 +3331,7 @@ export const WORKFLOW_CHAINS = {
             { tool: "log_test_result", action: "Record regression test" },
             { tool: "run_mandatory_flywheel", action: "6-step verification" },
             { tool: "record_learning", action: "Record the gotcha/pattern" },
+            { tool: "save_session_note", action: "Save traceability note — cite original request, record root cause and fix" },
         ],
     },
     ui_change: {
@@ -3215,6 +3345,7 @@ export const WORKFLOW_CHAINS = {
             { tool: "run_quality_gate", action: "Run ui_ux_qa gate" },
             { tool: "run_mandatory_flywheel", action: "Final verification" },
             { tool: "record_learning", action: "Record UI patterns" },
+            { tool: "save_session_note", action: "Save traceability note — cite original request, record visual evidence path" },
         ],
     },
     parallel_project: {
@@ -3532,5 +3663,52 @@ export const WORKFLOW_CHAINS = {
             { tool: "save_session_note", action: "Log sent emails so you have an audit trail that survives compaction" },
         ],
     },
+    webmcp_discovery: {
+        name: "WebMCP Origin Discovery",
+        description: "Connect to a WebMCP-enabled origin, discover its tools, and invoke them from the agent",
+        steps: [
+            { tool: "connect_webmcp_origin", action: "Connect to the target origin URL and establish a WebMCP session" },
+            { tool: "list_webmcp_tools", action: "List all tools exposed by the origin with schemas and annotations" },
+            { tool: "call_webmcp_tool", action: "Invoke a specific tool on the remote origin with arguments" },
+            { tool: "disconnect_webmcp_origin", action: "Clean up the WebMCP session when done" },
+        ],
+    },
+    batch_autopilot: {
+        name: "Batch Autopilot Run",
+        description: "Set up an operator profile and run a batch autopilot session for autonomous agent tasks",
+        steps: [
+            { tool: "setup_operator_profile", action: "Create or update USER.md and operator profile for autopilot context" },
+            { tool: "get_autopilot_status", action: "Check current autopilot readiness, profile completeness, and last run status" },
+            { tool: "trigger_batch_run", action: "Start a batch autopilot run using the operator profile as context" },
+            { tool: "get_batch_run_history", action: "Review history of past batch runs, outcomes, and timing" },
+            { tool: "sync_operator_profile", action: "Sync operator profile state from disk after manual edits" },
+        ],
+    },
+    daily_review: {
+        name: "Daily Brief Review",
+        description: "Pull the latest daily brief, review narrative threads, check ops dashboard, and sync to local storage",
+        steps: [
+            { tool: "sync_daily_brief", action: "Pull today's brief and narrative from Convex into local SQLite" },
+            { tool: "get_daily_brief_summary", action: "Get the full brief summary with key signals and insights" },
+            { tool: "get_narrative_status", action: "Check narrative thread status — dominant story, under-reported angle, evidence scores" },
+            { tool: "get_ops_dashboard", action: "Review pipeline health: posting status, tool usage, active workflows" },
+            { tool: "open_local_dashboard", action: "Open the local HTML dashboard in the browser for visual review" },
+        ],
+    },
+    gemini_qa: {
+        name: "Gemini Vision QA Loop",
+        description: "Automated UI/UX quality gate — capture screenshots (dark/light × desktop/mobile), send to Gemini Flash for Jony Ive product design review, fix issues, loop until 100/100",
+        steps: [
+            { tool: "check_mcp_setup", action: "Verify Gemini API key (GOOGLE_AI_KEY) and vision domain are ready" },
+            { tool: "start_verification_cycle", action: "Open a verification cycle titled 'Gemini QA Loop' to track progress" },
+            { tool: "save_session_note", action: "Shell: `npx vite build` then `npx playwright test tests/e2e/full-ui-dogfood.spec.ts --project=chromium --workers=1` — capture 4-variant screenshots" },
+            { tool: "save_session_note", action: "Shell: `npm run dogfood:publish` — copy screenshots to public/dogfood/ with variant metadata manifest" },
+            { tool: "save_session_note", action: "Shell: `npx vite build && node scripts/ui/runDogfoodGeminiQa.mjs` — rebuild, launch preview, trigger Gemini QA" },
+            { tool: "log_test_result", action: "Log QA score from public/dogfood/qa-results.json — formula: 100 - P1×6 - P2×2 - P3×1" },
+            { tool: "save_session_note", action: "Fix P1 issues (6pts each) then P2 (2pts) then P3 (1pt) — root-cause each before fixing" },
+            { tool: "get_overstory_qa_gate", action: "Check QA gate for per-route stability grades and issue counts" },
+            { tool: "record_learning", action: "Record QA trajectory and Gemini finding patterns for regression tracking" },
+        ],
+    },
 };
 //# sourceMappingURL=toolRegistry.js.map