nodebench-mcp 2.25.0 → 2.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NODEBENCH_AGENTS.md +5 -4
- package/README.md +145 -16
- package/dist/__tests__/architectComplex.test.js +3 -5
- package/dist/__tests__/architectComplex.test.js.map +1 -1
- package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
- package/dist/__tests__/batchAutopilot.test.js +218 -0
- package/dist/__tests__/batchAutopilot.test.js.map +1 -0
- package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
- package/dist/__tests__/cliSubcommands.test.js +138 -0
- package/dist/__tests__/cliSubcommands.test.js.map +1 -0
- package/dist/__tests__/evalHarness.test.js +1 -1
- package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
- package/dist/__tests__/forecastingDogfood.test.js +284 -0
- package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
- package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
- package/dist/__tests__/forecastingScoring.test.js +202 -0
- package/dist/__tests__/forecastingScoring.test.js.map +1 -0
- package/dist/__tests__/localDashboard.test.d.ts +1 -0
- package/dist/__tests__/localDashboard.test.js +226 -0
- package/dist/__tests__/localDashboard.test.js.map +1 -0
- package/dist/__tests__/multiHopDogfood.test.js +11 -11
- package/dist/__tests__/multiHopDogfood.test.js.map +1 -1
- package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
- package/dist/__tests__/openclawDogfood.test.js +535 -0
- package/dist/__tests__/openclawDogfood.test.js.map +1 -0
- package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
- package/dist/__tests__/openclawMessaging.test.js +232 -0
- package/dist/__tests__/openclawMessaging.test.js.map +1 -0
- package/dist/__tests__/presetRealWorldBench.test.js +0 -2
- package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
- package/dist/__tests__/tools.test.js +9 -157
- package/dist/__tests__/tools.test.js.map +1 -1
- package/dist/__tests__/toolsetGatingEval.test.js +0 -2
- package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
- package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
- package/dist/__tests__/traceabilityDogfood.test.js +241 -0
- package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
- package/dist/__tests__/webmcpTools.test.d.ts +7 -0
- package/dist/__tests__/webmcpTools.test.js +195 -0
- package/dist/__tests__/webmcpTools.test.js.map +1 -0
- package/dist/dashboard/briefHtml.d.ts +20 -0
- package/dist/dashboard/briefHtml.js +1000 -0
- package/dist/dashboard/briefHtml.js.map +1 -0
- package/dist/dashboard/briefServer.d.ts +18 -0
- package/dist/dashboard/briefServer.js +320 -0
- package/dist/dashboard/briefServer.js.map +1 -0
- package/dist/dashboard/html.js +1470 -1230
- package/dist/dashboard/html.js.map +1 -1
- package/dist/dashboard/server.js +166 -41
- package/dist/dashboard/server.js.map +1 -1
- package/dist/index.js +210 -14
- package/dist/index.js.map +1 -1
- package/dist/tools/critterTools.js +4 -0
- package/dist/tools/critterTools.js.map +1 -1
- package/dist/tools/forecastingTools.d.ts +11 -0
- package/dist/tools/forecastingTools.js +616 -0
- package/dist/tools/forecastingTools.js.map +1 -0
- package/dist/tools/localDashboardTools.d.ts +8 -0
- package/dist/tools/localDashboardTools.js +332 -0
- package/dist/tools/localDashboardTools.js.map +1 -0
- package/dist/tools/metaTools.js +170 -1
- package/dist/tools/metaTools.js.map +1 -1
- package/dist/tools/openclawTools.d.ts +11 -0
- package/dist/tools/openclawTools.js +1017 -0
- package/dist/tools/openclawTools.js.map +1 -0
- package/dist/tools/overstoryTools.d.ts +14 -0
- package/dist/tools/overstoryTools.js +426 -0
- package/dist/tools/overstoryTools.js.map +1 -0
- package/dist/tools/progressiveDiscoveryTools.js +50 -115
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
- package/dist/tools/selfEvalTools.js +8 -1
- package/dist/tools/selfEvalTools.js.map +1 -1
- package/dist/tools/sessionMemoryTools.js +14 -2
- package/dist/tools/sessionMemoryTools.js.map +1 -1
- package/dist/tools/toolRegistry.d.ts +1 -15
- package/dist/tools/toolRegistry.js +243 -228
- package/dist/tools/toolRegistry.js.map +1 -1
- package/dist/tools/visualQaTools.d.ts +2 -0
- package/dist/tools/visualQaTools.js +1088 -0
- package/dist/tools/visualQaTools.js.map +1 -0
- package/dist/tools/webmcpTools.d.ts +16 -0
- package/dist/tools/webmcpTools.js +703 -0
- package/dist/tools/webmcpTools.js.map +1 -0
- package/dist/toolsetRegistry.js +6 -2
- package/dist/toolsetRegistry.js.map +1 -1
- package/package.json +2 -2
|
@@ -2073,14 +2073,14 @@ const REGISTRY_ENTRIES = [
|
|
|
2073
2073
|
{
|
|
2074
2074
|
name: "ingest_dive_screenshots",
|
|
2075
2075
|
category: "ui_ux_dive",
|
|
2076
|
-
tags: ["ui", "screenshot", "ingest", "import", "bulk", "
|
|
2076
|
+
tags: ["ui", "screenshot", "ingest", "import", "bulk", "disk", "gallery", "dive", "png", "jpg"],
|
|
2077
2077
|
quickRef: {
|
|
2078
|
-
nextAction: "Screenshots ingested into session
|
|
2079
|
-
nextTools: ["
|
|
2078
|
+
nextAction: "Screenshots ingested into dive session. View them in the dashboard or use dive_screenshot to capture new ones.",
|
|
2079
|
+
nextTools: ["dive_screenshot", "tag_ui_bug", "end_component_flow", "get_dive_tree"],
|
|
2080
2080
|
methodology: "agentic_vision",
|
|
2081
|
-
tip: "
|
|
2081
|
+
tip: "Scans a directory for PNG/JPG files and bulk-imports them into a dive session's screenshot gallery. Use after external Playwright captures.",
|
|
2082
2082
|
},
|
|
2083
|
-
phase: "
|
|
2083
|
+
phase: "test",
|
|
2084
2084
|
},
|
|
2085
2085
|
// ═══════════════════════════════════════════
|
|
2086
2086
|
// UI/UX DIVE V2 — Deep interaction testing,
|
|
@@ -2266,7 +2266,7 @@ const REGISTRY_ENTRIES = [
|
|
|
2266
2266
|
{
|
|
2267
2267
|
name: "register_skill",
|
|
2268
2268
|
category: "skill_update",
|
|
2269
|
-
tags: ["skill", "rule", "register", "source", "hash", "frontmatter", "provenance", "memory", "agents-md", "cursor", "windsurf", "update"
|
|
2269
|
+
tags: ["skill", "rule", "register", "source", "hash", "frontmatter", "provenance", "memory", "agents-md", "cursor", "windsurf", "update"],
|
|
2270
2270
|
quickRef: {
|
|
2271
2271
|
nextAction: "Skill registered. Use check_skill_freshness periodically to detect when source files change.",
|
|
2272
2272
|
nextTools: ["check_skill_freshness", "list_skills"],
|
|
@@ -2312,28 +2312,6 @@ const REGISTRY_ENTRIES = [
|
|
|
2312
2312
|
phase: "utility",
|
|
2313
2313
|
},
|
|
2314
2314
|
// ═══════════════════════════════════════════
|
|
2315
|
-
// RE-EXAMINE 11/10 — Fresh-eyes quality pass
|
|
2316
|
-
// Modular rules: reexamine_process → a11y,
|
|
2317
|
-
// resilience, polish, keyboard, performance
|
|
2318
|
-
// Cross-ref via related_ frontmatter hops
|
|
2319
|
-
// ═══════════════════════════════════════════
|
|
2320
|
-
// NOTE: These are not MCP tools — they are rule
|
|
2321
|
-
// files in .cursor/rules/ and .windsurf/rules/.
|
|
2322
|
-
// The skill_update tools above (register_skill,
|
|
2323
|
-
// check_skill_freshness) track their freshness.
|
|
2324
|
-
// The related_ field in each rule's frontmatter
|
|
2325
|
-
// enables one-hop and two-hop cross-referencing:
|
|
2326
|
-
//
|
|
2327
|
-
// reexamine_process
|
|
2328
|
-
// └─ related_: [a11y, resilience, polish, keyboard, performance]
|
|
2329
|
-
// └─ reexamine_a11y.related_: [keyboard, polish, process]
|
|
2330
|
-
// └─ reexamine_resilience.related_: [performance, process, polish]
|
|
2331
|
-
// └─ reexamine_polish.related_: [a11y, performance, process]
|
|
2332
|
-
// └─ reexamine_keyboard.related_: [a11y, process]
|
|
2333
|
-
// └─ reexamine_performance.related_: [resilience, polish, process]
|
|
2334
|
-
//
|
|
2335
|
-
// Two-hop example: process → a11y → keyboard (discovers keyboard via a11y)
|
|
2336
|
-
// ═══════════════════════════════════════════
|
|
2337
2315
|
// MCP BRIDGE — Connect external MCP servers
|
|
2338
2316
|
// ═══════════════════════════════════════════
|
|
2339
2317
|
{
|
|
@@ -2432,43 +2410,178 @@ const REGISTRY_ENTRIES = [
|
|
|
2432
2410
|
phase: "implement",
|
|
2433
2411
|
},
|
|
2434
2412
|
// ═══════════════════════════════════════════
|
|
2435
|
-
//
|
|
2413
|
+
// QA ORCHESTRATION — Overstory multi-agent QA
|
|
2436
2414
|
// ═══════════════════════════════════════════
|
|
2437
2415
|
{
|
|
2438
|
-
name: "
|
|
2439
|
-
category: "
|
|
2440
|
-
tags: ["
|
|
2416
|
+
name: "overstory_fleet_status",
|
|
2417
|
+
category: "qa_orchestration",
|
|
2418
|
+
tags: ["overstory", "agent", "fleet", "status", "health", "multi-agent", "orchestration", "qa", "dogfood", "worktree"],
|
|
2441
2419
|
quickRef: {
|
|
2442
|
-
nextAction: "
|
|
2443
|
-
nextTools: ["
|
|
2444
|
-
methodology: "
|
|
2445
|
-
tip: "
|
|
2420
|
+
nextAction: "Review agent states. If agents are idle, run dogfood:overstory to start a QA session.",
|
|
2421
|
+
nextTools: ["overstory_qa_summary", "overstory_mail_log", "run_visual_qa_suite"],
|
|
2422
|
+
methodology: "ai_flywheel",
|
|
2423
|
+
tip: "Reads .overstory/agent-manifest.json and overstory.db. Shows configured agents, capabilities, gate policy, and live agent health.",
|
|
2446
2424
|
},
|
|
2447
|
-
phase: "
|
|
2425
|
+
phase: "utility",
|
|
2448
2426
|
},
|
|
2449
2427
|
{
|
|
2450
|
-
name: "
|
|
2451
|
-
category: "
|
|
2452
|
-
tags: ["
|
|
2428
|
+
name: "overstory_qa_summary",
|
|
2429
|
+
category: "qa_orchestration",
|
|
2430
|
+
tags: ["overstory", "qa", "gate", "summary", "stability", "grade", "ssim", "triage", "p0", "p1", "dogfood"],
|
|
2453
2431
|
quickRef: {
|
|
2454
|
-
nextAction: "
|
|
2455
|
-
nextTools: ["
|
|
2456
|
-
methodology: "
|
|
2457
|
-
tip: "
|
|
2432
|
+
nextAction: "If gate fails, check failing routes and fix p0/p1 issues. If gate passes, proceed to merge.",
|
|
2433
|
+
nextTools: ["overstory_mail_log", "overstory_fleet_status", "run_visual_qa_suite", "burst_capture"],
|
|
2434
|
+
methodology: "ai_flywheel",
|
|
2435
|
+
tip: "Aggregates SSIM stability grades from visual_qa_runs and Gemini QA triage from Overstory mail. Returns gate pass/fail verdict.",
|
|
2458
2436
|
},
|
|
2459
|
-
phase: "
|
|
2437
|
+
phase: "verify",
|
|
2460
2438
|
},
|
|
2461
2439
|
{
|
|
2462
|
-
name: "
|
|
2463
|
-
category: "
|
|
2464
|
-
tags: ["
|
|
2440
|
+
name: "overstory_mail_log",
|
|
2441
|
+
category: "qa_orchestration",
|
|
2442
|
+
tags: ["overstory", "mail", "log", "message", "route", "triage", "dispatch", "agent", "coordination"],
|
|
2465
2443
|
quickRef: {
|
|
2466
|
-
nextAction: "
|
|
2467
|
-
nextTools: ["
|
|
2468
|
-
methodology: "
|
|
2469
|
-
tip: "
|
|
2444
|
+
nextAction: "Review messages to understand QA session state. Filter by type or agent for focused view.",
|
|
2445
|
+
nextTools: ["overstory_qa_summary", "overstory_fleet_status", "overstory_merge_queue"],
|
|
2446
|
+
methodology: "ai_flywheel",
|
|
2447
|
+
tip: "Supports type_filter (result/dispatch/worker_done/escalation) and agent_filter. Shows structured mail payloads from the QA agent fleet.",
|
|
2470
2448
|
},
|
|
2471
|
-
phase: "
|
|
2449
|
+
phase: "utility",
|
|
2450
|
+
},
|
|
2451
|
+
{
|
|
2452
|
+
name: "overstory_merge_queue",
|
|
2453
|
+
category: "qa_orchestration",
|
|
2454
|
+
tags: ["overstory", "merge", "queue", "branch", "conflict", "gate", "builder", "qa", "resolution"],
|
|
2455
|
+
quickRef: {
|
|
2456
|
+
nextAction: "If branches are blocked, check QA gate failures. If pending, trigger merge with overstory merge --all.",
|
|
2457
|
+
nextTools: ["overstory_qa_summary", "overstory_mail_log", "overstory_fleet_status"],
|
|
2458
|
+
methodology: "ai_flywheel",
|
|
2459
|
+
tip: "Shows FIFO merge queue with conflict resolution tiers. Use include_completed:true to see merge history.",
|
|
2460
|
+
},
|
|
2461
|
+
phase: "utility",
|
|
2462
|
+
},
|
|
2463
|
+
// ═══════════════════════════════════════════
|
|
2464
|
+
// VISUAL QA — Deep interaction captures & stability
|
|
2465
|
+
// ═══════════════════════════════════════════
|
|
2466
|
+
{
|
|
2467
|
+
name: "burst_capture",
|
|
2468
|
+
category: "visual_qa",
|
|
2469
|
+
tags: ["burst", "capture", "screenshot", "rapid", "interaction", "deep", "animation", "transition", "hover", "click", "popup", "drawer", "modal", "streaming", "agent", "component"],
|
|
2470
|
+
quickRef: {
|
|
2471
|
+
nextAction: "Burst captured. Run compute_web_stability to measure SSIM across frames, or generate_grid_collage for visual comparison.",
|
|
2472
|
+
nextTools: ["compute_web_stability", "generate_grid_collage", "run_visual_qa_suite"],
|
|
2473
|
+
methodology: "ai_flywheel",
|
|
2474
|
+
tip: "Use burst capture for deep interaction testing — popups, hover states, streaming responses, drawer opens, thread switches. Captures rapid frame sequences during UI transitions.",
|
|
2475
|
+
},
|
|
2476
|
+
phase: "test",
|
|
2477
|
+
complexity: "medium",
|
|
2478
|
+
},
|
|
2479
|
+
{
|
|
2480
|
+
name: "generate_grid_collage",
|
|
2481
|
+
category: "visual_qa",
|
|
2482
|
+
tags: ["grid", "collage", "visual", "comparison", "before-after", "screenshot", "composite", "overview", "review"],
|
|
2483
|
+
quickRef: {
|
|
2484
|
+
nextAction: "Collage generated. Review visually for inconsistencies. Use run_visual_qa_suite for automated scoring.",
|
|
2485
|
+
nextTools: ["run_visual_qa_suite", "compute_web_stability", "analyze_screenshot"],
|
|
2486
|
+
methodology: "ai_flywheel",
|
|
2487
|
+
tip: "Generates a composite grid image from multiple screenshots — useful for comparing dark/light, desktop/mobile, or before/after states side-by-side.",
|
|
2488
|
+
},
|
|
2489
|
+
phase: "verify",
|
|
2490
|
+
complexity: "low",
|
|
2491
|
+
},
|
|
2492
|
+
{
|
|
2493
|
+
name: "compute_web_stability",
|
|
2494
|
+
category: "visual_qa",
|
|
2495
|
+
tags: ["stability", "ssim", "structural", "similarity", "flicker", "jank", "layout-shift", "regression", "diff", "frame", "comparison"],
|
|
2496
|
+
quickRef: {
|
|
2497
|
+
nextAction: "Stability computed. If SSIM < 0.95, investigate layout shifts or animation jank. Log issues with tag_ui_bug.",
|
|
2498
|
+
nextTools: ["burst_capture", "tag_ui_bug", "log_gap", "run_visual_qa_suite"],
|
|
2499
|
+
methodology: "ai_flywheel",
|
|
2500
|
+
tip: "Computes block-based SSIM between frame pairs to detect visual instability — layout shifts, flicker, and rendering regressions.",
|
|
2501
|
+
},
|
|
2502
|
+
phase: "test",
|
|
2503
|
+
complexity: "medium",
|
|
2504
|
+
},
|
|
2505
|
+
{
|
|
2506
|
+
name: "run_visual_qa_suite",
|
|
2507
|
+
category: "visual_qa",
|
|
2508
|
+
tags: ["visual", "qa", "suite", "end-to-end", "automated", "gemini", "scoring", "jony-ive", "design", "review", "deep-interaction", "scenario", "agent", "streaming", "popup", "drawer"],
|
|
2509
|
+
quickRef: {
|
|
2510
|
+
nextAction: "QA suite complete. Fix P0/P1 issues first (highest score impact), then P2/P3. Re-run to verify improvements.",
|
|
2511
|
+
nextTools: ["burst_capture", "log_gap", "record_learning", "save_session_note"],
|
|
2512
|
+
methodology: "ai_flywheel",
|
|
2513
|
+
tip: "End-to-end visual QA: captures all routes + deep interactions (agent queries, streaming, popups, drawers) → Gemini scores against Jony Ive design principles → auto-triages by P-level. Formula: 100 - P1×6 - P2×2 - P3×1.",
|
|
2514
|
+
},
|
|
2515
|
+
phase: "verify",
|
|
2516
|
+
complexity: "high",
|
|
2517
|
+
},
|
|
2518
|
+
// ═══════════════════════════════════════════
|
|
2519
|
+
// LOCAL DASHBOARD — Daily brief + narrative + ops
|
|
2520
|
+
// ═══════════════════════════════════════════
|
|
2521
|
+
{
|
|
2522
|
+
name: "sync_daily_brief",
|
|
2523
|
+
category: "local_dashboard",
|
|
2524
|
+
tags: ["sync", "daily", "brief", "convex", "sqlite", "pull", "refresh", "narrative", "dashboard", "data"],
|
|
2525
|
+
quickRef: {
|
|
2526
|
+
nextAction: "Data synced. Call get_daily_brief_summary to read the brief, or open_local_dashboard for visual review.",
|
|
2527
|
+
nextTools: ["get_daily_brief_summary", "get_narrative_status", "open_local_dashboard"],
|
|
2528
|
+
methodology: "ai_flywheel",
|
|
2529
|
+
tip: "Pulls latest dashboard snapshot + narrative threads from Convex into local SQLite. Requires CONVEX_SITE_URL and MCP_SECRET env vars.",
|
|
2530
|
+
},
|
|
2531
|
+
phase: "research",
|
|
2532
|
+
complexity: "medium",
|
|
2533
|
+
},
|
|
2534
|
+
{
|
|
2535
|
+
name: "get_daily_brief_summary",
|
|
2536
|
+
category: "local_dashboard",
|
|
2537
|
+
tags: ["daily", "brief", "summary", "metrics", "features", "sources", "dashboard", "offline", "local"],
|
|
2538
|
+
quickRef: {
|
|
2539
|
+
nextAction: "Review the brief. Check key signals and source quality. Use get_narrative_status for thread analysis.",
|
|
2540
|
+
nextTools: ["get_narrative_status", "get_ops_dashboard", "open_local_dashboard"],
|
|
2541
|
+
methodology: "ai_flywheel",
|
|
2542
|
+
tip: "Reads from local SQLite — zero network needed. Returns dashboard metrics, features, and source summary from the last sync.",
|
|
2543
|
+
},
|
|
2544
|
+
phase: "research",
|
|
2545
|
+
complexity: "low",
|
|
2546
|
+
},
|
|
2547
|
+
{
|
|
2548
|
+
name: "get_narrative_status",
|
|
2549
|
+
category: "local_dashboard",
|
|
2550
|
+
tags: ["narrative", "thread", "status", "phase", "emerging", "escalating", "climax", "resolution", "dormant", "story"],
|
|
2551
|
+
quickRef: {
|
|
2552
|
+
nextAction: "Review thread distribution. Focus on escalating/climax threads for timely action. Use get_ops_dashboard for pipeline health.",
|
|
2553
|
+
nextTools: ["get_daily_brief_summary", "get_ops_dashboard", "open_local_dashboard"],
|
|
2554
|
+
methodology: "ai_flywheel",
|
|
2555
|
+
tip: "Returns narrative threads grouped by phase with event counts. Filter by phase to focus on specific lifecycle stages.",
|
|
2556
|
+
},
|
|
2557
|
+
phase: "research",
|
|
2558
|
+
complexity: "low",
|
|
2559
|
+
},
|
|
2560
|
+
{
|
|
2561
|
+
name: "get_ops_dashboard",
|
|
2562
|
+
category: "local_dashboard",
|
|
2563
|
+
tags: ["ops", "operational", "dashboard", "sync", "tool-call", "frequency", "verification", "health", "monitoring"],
|
|
2564
|
+
quickRef: {
|
|
2565
|
+
nextAction: "Review ops health. If tool error rates are high, investigate root causes. If sync is stale, run sync_daily_brief.",
|
|
2566
|
+
nextTools: ["sync_daily_brief", "get_daily_brief_summary", "open_local_dashboard"],
|
|
2567
|
+
methodology: "ai_flywheel",
|
|
2568
|
+
tip: "Returns last sync info, tool call frequency (24h), active verification cycles, data counts, and privacy mode status.",
|
|
2569
|
+
},
|
|
2570
|
+
phase: "utility",
|
|
2571
|
+
complexity: "low",
|
|
2572
|
+
},
|
|
2573
|
+
{
|
|
2574
|
+
name: "open_local_dashboard",
|
|
2575
|
+
category: "local_dashboard",
|
|
2576
|
+
tags: ["open", "dashboard", "browser", "server", "html", "visual", "brief", "narrative", "ops", "local", "ui"],
|
|
2577
|
+
quickRef: {
|
|
2578
|
+
nextAction: "Dashboard is running. Open the URL in a browser to see Brief metrics, Narrative thread lanes, and Ops status.",
|
|
2579
|
+
nextTools: ["sync_daily_brief", "get_daily_brief_summary", "get_narrative_status"],
|
|
2580
|
+
methodology: "ai_flywheel",
|
|
2581
|
+
tip: "Starts the local dashboard server on port 6275 if not already running. Auto-refreshes every 30s from local SQLite.",
|
|
2582
|
+
},
|
|
2583
|
+
phase: "utility",
|
|
2584
|
+
complexity: "low",
|
|
2472
2585
|
},
|
|
2473
2586
|
];
|
|
2474
2587
|
// ── Exported lookup structures ───────────────────────────────────────────
|
|
@@ -2476,110 +2589,6 @@ const REGISTRY_ENTRIES = [
|
|
|
2476
2589
|
export const TOOL_REGISTRY = new Map(REGISTRY_ENTRIES.map((e) => [e.name, e]));
|
|
2477
2590
|
/** All registry entries as array */
|
|
2478
2591
|
export const ALL_REGISTRY_ENTRIES = REGISTRY_ENTRIES;
|
|
2479
|
-
// ── Auto-derive relatedTools for entries that don't have manual overrides ──
|
|
2480
|
-
// Uses 3 signals: same-category siblings, DOMAIN_CLUSTERS neighbors, tag overlap.
|
|
2481
|
-
// Must run after REGISTRY_ENTRIES is fully built. Forward-reference to DOMAIN_CLUSTERS
|
|
2482
|
-
// is fine because this runs at module load time (DOMAIN_CLUSTERS is defined below).
|
|
2483
|
-
/** Late-init: populated by _populateRelatedTools() at bottom of file */
|
|
2484
|
-
let _domainClusters = null;
|
|
2485
|
-
export function _setDomainClustersRef(clusters) {
|
|
2486
|
-
_domainClusters = clusters;
|
|
2487
|
-
}
|
|
2488
|
-
function computeRelatedTools(entry) {
|
|
2489
|
-
// If manually specified, use that
|
|
2490
|
-
if (entry.quickRef.relatedTools && entry.quickRef.relatedTools.length > 0) {
|
|
2491
|
-
return entry.quickRef.relatedTools;
|
|
2492
|
-
}
|
|
2493
|
-
const related = new Set();
|
|
2494
|
-
const nextToolsSet = new Set(entry.quickRef.nextTools);
|
|
2495
|
-
// 1. Same-category siblings (excluding self and nextTools), up to 3
|
|
2496
|
-
let sibCount = 0;
|
|
2497
|
-
for (const e of REGISTRY_ENTRIES) {
|
|
2498
|
-
if (sibCount >= 3)
|
|
2499
|
-
break;
|
|
2500
|
-
if (e.category === entry.category && e.name !== entry.name && !nextToolsSet.has(e.name)) {
|
|
2501
|
-
related.add(e.name);
|
|
2502
|
-
sibCount++;
|
|
2503
|
-
}
|
|
2504
|
-
}
|
|
2505
|
-
// 2. DOMAIN_CLUSTERS neighbors: tools from related categories, up to 2
|
|
2506
|
-
if (_domainClusters) {
|
|
2507
|
-
let clusterCount = 0;
|
|
2508
|
-
for (const cluster of Object.values(_domainClusters)) {
|
|
2509
|
-
if (clusterCount >= 2)
|
|
2510
|
-
break;
|
|
2511
|
-
if (cluster.includes(entry.category)) {
|
|
2512
|
-
for (const neighborCat of cluster) {
|
|
2513
|
-
if (clusterCount >= 2)
|
|
2514
|
-
break;
|
|
2515
|
-
if (neighborCat === entry.category)
|
|
2516
|
-
continue;
|
|
2517
|
-
for (const e of REGISTRY_ENTRIES) {
|
|
2518
|
-
if (e.category === neighborCat && !nextToolsSet.has(e.name) && !related.has(e.name)) {
|
|
2519
|
-
related.add(e.name);
|
|
2520
|
-
clusterCount++;
|
|
2521
|
-
break; // one tool per neighbor category
|
|
2522
|
-
}
|
|
2523
|
-
}
|
|
2524
|
-
}
|
|
2525
|
-
}
|
|
2526
|
-
}
|
|
2527
|
-
}
|
|
2528
|
-
// 3. Tag overlap: tools sharing 2+ tags (not in nextTools or already related), up to 2
|
|
2529
|
-
const myTags = new Set(entry.tags);
|
|
2530
|
-
let tagCount = 0;
|
|
2531
|
-
for (const other of REGISTRY_ENTRIES) {
|
|
2532
|
-
if (tagCount >= 2)
|
|
2533
|
-
break;
|
|
2534
|
-
if (other.name === entry.name || nextToolsSet.has(other.name) || related.has(other.name))
|
|
2535
|
-
continue;
|
|
2536
|
-
let overlap = 0;
|
|
2537
|
-
for (const t of other.tags) {
|
|
2538
|
-
if (myTags.has(t))
|
|
2539
|
-
overlap++;
|
|
2540
|
-
if (overlap >= 2)
|
|
2541
|
-
break;
|
|
2542
|
-
}
|
|
2543
|
-
if (overlap >= 2) {
|
|
2544
|
-
related.add(other.name);
|
|
2545
|
-
tagCount++;
|
|
2546
|
-
}
|
|
2547
|
-
}
|
|
2548
|
-
// 4. Fallback: if still empty (small category, all siblings in nextTools), accept 1-tag overlap
|
|
2549
|
-
if (related.size === 0) {
|
|
2550
|
-
for (const other of REGISTRY_ENTRIES) {
|
|
2551
|
-
if (related.size >= 3)
|
|
2552
|
-
break;
|
|
2553
|
-
if (other.name === entry.name || nextToolsSet.has(other.name))
|
|
2554
|
-
continue;
|
|
2555
|
-
const hasTagOverlap = other.tags.some((t) => myTags.has(t));
|
|
2556
|
-
if (hasTagOverlap) {
|
|
2557
|
-
related.add(other.name);
|
|
2558
|
-
}
|
|
2559
|
-
}
|
|
2560
|
-
}
|
|
2561
|
-
// 5. Last resort: if STILL empty, pick tools from the same phase (workflow adjacency)
|
|
2562
|
-
if (related.size === 0) {
|
|
2563
|
-
for (const other of REGISTRY_ENTRIES) {
|
|
2564
|
-
if (related.size >= 3)
|
|
2565
|
-
break;
|
|
2566
|
-
if (other.name === entry.name || nextToolsSet.has(other.name))
|
|
2567
|
-
continue;
|
|
2568
|
-
if (other.phase === entry.phase) {
|
|
2569
|
-
related.add(other.name);
|
|
2570
|
-
}
|
|
2571
|
-
}
|
|
2572
|
-
}
|
|
2573
|
-
return [...related].slice(0, 7); // hard cap at 7
|
|
2574
|
-
}
|
|
2575
|
-
/** Populate relatedTools for all registry entries. Called once at module load after DOMAIN_CLUSTERS exists. */
|
|
2576
|
-
export function _populateRelatedTools() {
|
|
2577
|
-
for (const entry of REGISTRY_ENTRIES) {
|
|
2578
|
-
if (!entry.quickRef.relatedTools || entry.quickRef.relatedTools.length === 0) {
|
|
2579
|
-
entry.quickRef.relatedTools = computeRelatedTools(entry);
|
|
2580
|
-
}
|
|
2581
|
-
}
|
|
2582
|
-
}
|
|
2583
2592
|
/** Get quick ref for a tool, with fallback for unregistered tools */
|
|
2584
2593
|
export function getQuickRef(toolName) {
|
|
2585
2594
|
return TOOL_REGISTRY.get(toolName)?.quickRef ?? null;
|
|
@@ -2631,7 +2640,9 @@ const CATEGORY_COMPLEXITY = {
|
|
|
2631
2640
|
email: "medium",
|
|
2632
2641
|
rss: "low",
|
|
2633
2642
|
architect: "low",
|
|
2634
|
-
|
|
2643
|
+
qa_orchestration: "low",
|
|
2644
|
+
visual_qa: "medium",
|
|
2645
|
+
local_dashboard: "low",
|
|
2635
2646
|
};
|
|
2636
2647
|
/** Per-tool complexity overrides (when category default is wrong) */
|
|
2637
2648
|
const TOOL_COMPLEXITY_OVERRIDES = {
|
|
@@ -2910,9 +2921,6 @@ const DOMAIN_CLUSTERS = {
|
|
|
2910
2921
|
writing: ["research_writing", "documentation"],
|
|
2911
2922
|
measurement: ["eval", "benchmark", "self_eval"],
|
|
2912
2923
|
};
|
|
2913
|
-
// Wire up domain clusters and auto-populate relatedTools for all registry entries
|
|
2914
|
-
_setDomainClustersRef(DOMAIN_CLUSTERS);
|
|
2915
|
-
_populateRelatedTools();
|
|
2916
2924
|
// ── Execution trace edges — co-occurrence mining from tool_call_log ────────
|
|
2917
2925
|
// Based on Agent-as-a-Graph (arxiv:2511.18194): execution trace edges
|
|
2918
2926
|
// mine sequential co-occurrence patterns to discover implicit tool relationships.
|
|
@@ -2953,36 +2961,17 @@ export function _setDbAccessor(accessor) {
|
|
|
2953
2961
|
*
|
|
2954
2962
|
* Approach: for each session, pull the ordered tool sequence, then count
|
|
2955
2963
|
* pairs within a sliding window of 5 calls. O(n) per session, no self-join.
|
|
2956
|
-
*
|
|
2957
|
-
* When transitive=true, infer A→C via A→B + B→C (two-hop co-occurrence).
|
|
2958
|
-
* Extended cap of 15 edges/tool (vs 10 for direct-only).
|
|
2959
2964
|
*/
|
|
2960
|
-
|
|
2961
|
-
let _transitiveCooccurrenceCacheTime = 0;
|
|
2962
|
-
function getCooccurrenceEdges(options) {
|
|
2963
|
-
const transitive = options?.transitive ?? false;
|
|
2965
|
+
function getCooccurrenceEdges() {
|
|
2964
2966
|
const now = Date.now();
|
|
2965
|
-
|
|
2966
|
-
|
|
2967
|
-
if (_transitiveCooccurrenceCache && now - _transitiveCooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
|
|
2968
|
-
return _transitiveCooccurrenceCache;
|
|
2969
|
-
}
|
|
2970
|
-
}
|
|
2971
|
-
else {
|
|
2972
|
-
if (_cooccurrenceCache && now - _cooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
|
|
2973
|
-
return _cooccurrenceCache;
|
|
2974
|
-
}
|
|
2967
|
+
if (_cooccurrenceCache && now - _cooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
|
|
2968
|
+
return _cooccurrenceCache;
|
|
2975
2969
|
}
|
|
2976
|
-
|
|
2977
|
-
const directEdges = new Map();
|
|
2970
|
+
const edges = new Map();
|
|
2978
2971
|
if (!_dbAccessor) {
|
|
2979
|
-
_cooccurrenceCache =
|
|
2972
|
+
_cooccurrenceCache = edges;
|
|
2980
2973
|
_cooccurrenceCacheTime = now;
|
|
2981
|
-
|
|
2982
|
-
_transitiveCooccurrenceCache = directEdges;
|
|
2983
|
-
_transitiveCooccurrenceCacheTime = now;
|
|
2984
|
-
}
|
|
2985
|
-
return directEdges;
|
|
2974
|
+
return edges;
|
|
2986
2975
|
}
|
|
2987
2976
|
try {
|
|
2988
2977
|
const db = _dbAccessor();
|
|
@@ -3023,51 +3012,24 @@ function getCooccurrenceEdges(options) {
|
|
|
3023
3012
|
.sort((a, b) => b[1] - a[1]);
|
|
3024
3013
|
for (const [key] of sorted) {
|
|
3025
3014
|
const [toolA, toolB] = key.split("\0");
|
|
3026
|
-
const list =
|
|
3015
|
+
const list = edges.get(toolA) ?? [];
|
|
3027
3016
|
if (list.length < 10) {
|
|
3028
3017
|
list.push(toolB);
|
|
3029
|
-
|
|
3018
|
+
edges.set(toolA, list);
|
|
3030
3019
|
}
|
|
3031
3020
|
}
|
|
3032
3021
|
}
|
|
3033
3022
|
catch {
|
|
3034
3023
|
// No DB or table not yet created — return empty (graceful degradation)
|
|
3035
3024
|
}
|
|
3036
|
-
|
|
3037
|
-
_cooccurrenceCache = directEdges;
|
|
3025
|
+
_cooccurrenceCache = edges;
|
|
3038
3026
|
_cooccurrenceCacheTime = now;
|
|
3039
|
-
|
|
3040
|
-
return directEdges;
|
|
3041
|
-
// Transitive inference: A→B and B→C ⟹ A→C (two-hop)
|
|
3042
|
-
const transitiveEdges = new Map([...directEdges.entries()].map(([k, v]) => [k, [...v]]));
|
|
3043
|
-
for (const [toolA, directNeighbors] of directEdges) {
|
|
3044
|
-
const existingSet = new Set(directNeighbors);
|
|
3045
|
-
existingSet.add(toolA); // avoid self-loops
|
|
3046
|
-
for (const toolB of directNeighbors) {
|
|
3047
|
-
const bNeighbors = directEdges.get(toolB);
|
|
3048
|
-
if (!bNeighbors)
|
|
3049
|
-
continue;
|
|
3050
|
-
const list = transitiveEdges.get(toolA);
|
|
3051
|
-
for (const toolC of bNeighbors) {
|
|
3052
|
-
if (existingSet.has(toolC))
|
|
3053
|
-
continue;
|
|
3054
|
-
if (list.length >= 15)
|
|
3055
|
-
break; // extended cap for transitive
|
|
3056
|
-
list.push(toolC);
|
|
3057
|
-
existingSet.add(toolC);
|
|
3058
|
-
}
|
|
3059
|
-
}
|
|
3060
|
-
}
|
|
3061
|
-
_transitiveCooccurrenceCache = transitiveEdges;
|
|
3062
|
-
_transitiveCooccurrenceCacheTime = now;
|
|
3063
|
-
return transitiveEdges;
|
|
3027
|
+
return edges;
|
|
3064
3028
|
}
|
|
3065
3029
|
/** Reset co-occurrence cache — for testing only. */
|
|
3066
3030
|
export function _resetCooccurrenceCache() {
|
|
3067
3031
|
_cooccurrenceCache = null;
|
|
3068
3032
|
_cooccurrenceCacheTime = 0;
|
|
3069
|
-
_transitiveCooccurrenceCache = null;
|
|
3070
|
-
_transitiveCooccurrenceCacheTime = 0;
|
|
3071
3033
|
}
|
|
3072
3034
|
/** Inject co-occurrence edges directly — for testing only. */
|
|
3073
3035
|
export function _setCooccurrenceForTesting(edges) {
|
|
@@ -3459,8 +3421,7 @@ export function hybridSearch(query, tools, options) {
|
|
|
3459
3421
|
});
|
|
3460
3422
|
}
|
|
3461
3423
|
results.sort((a, b) => b.score - a.score);
|
|
3462
|
-
|
|
3463
|
-
return results.slice(offset, offset + limit);
|
|
3424
|
+
return results.slice(0, limit);
|
|
3464
3425
|
}
|
|
3465
3426
|
/** Available search modes for discover_tools */
|
|
3466
3427
|
export const SEARCH_MODES = ["hybrid", "fuzzy", "regex", "prefix", "semantic", "exact", "dense", "embedding"];
|
|
@@ -3482,6 +3443,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
3482
3443
|
{ tool: "run_mandatory_flywheel", action: "6-step final verification" },
|
|
3483
3444
|
{ tool: "record_learning", action: "Capture what you learned" },
|
|
3484
3445
|
{ tool: "promote_to_eval", action: "Feed into eval batch" },
|
|
3446
|
+
{ tool: "save_session_note", action: "Save traceability note — cite original request, summarize what was delivered" },
|
|
3485
3447
|
],
|
|
3486
3448
|
},
|
|
3487
3449
|
fix_bug: {
|
|
@@ -3494,6 +3456,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
3494
3456
|
{ tool: "log_test_result", action: "Record regression test" },
|
|
3495
3457
|
{ tool: "run_mandatory_flywheel", action: "6-step verification" },
|
|
3496
3458
|
{ tool: "record_learning", action: "Record the gotcha/pattern" },
|
|
3459
|
+
{ tool: "save_session_note", action: "Save traceability note — cite original request, record root cause and fix" },
|
|
3497
3460
|
],
|
|
3498
3461
|
},
|
|
3499
3462
|
ui_change: {
|
|
@@ -3507,6 +3470,7 @@ export const WORKFLOW_CHAINS = {
|
|
|
3507
3470
|
{ tool: "run_quality_gate", action: "Run ui_ux_qa gate" },
|
|
3508
3471
|
{ tool: "run_mandatory_flywheel", action: "Final verification" },
|
|
3509
3472
|
{ tool: "record_learning", action: "Record UI patterns" },
|
|
3473
|
+
{ tool: "save_session_note", action: "Save traceability note — cite original request, record visual evidence path" },
|
|
3510
3474
|
],
|
|
3511
3475
|
},
|
|
3512
3476
|
parallel_project: {
|
|
@@ -3824,16 +3788,67 @@ export const WORKFLOW_CHAINS = {
|
|
|
3824
3788
|
{ tool: "save_session_note", action: "Log sent emails so you have an audit trail that survives compaction" },
|
|
3825
3789
|
],
|
|
3826
3790
|
},
|
|
3827
|
-
|
|
3828
|
-
name: "
|
|
3829
|
-
description: "
|
|
3791
|
+
webmcp_discovery: {
|
|
3792
|
+
name: "WebMCP Origin Discovery",
|
|
3793
|
+
description: "Connect to a WebMCP-enabled origin, discover its tools, and invoke them from the agent",
|
|
3794
|
+
steps: [
|
|
3795
|
+
{ tool: "connect_webmcp_origin", action: "Connect to the target origin URL and establish a WebMCP session" },
|
|
3796
|
+
{ tool: "list_webmcp_tools", action: "List all tools exposed by the origin with schemas and annotations" },
|
|
3797
|
+
{ tool: "call_webmcp_tool", action: "Invoke a specific tool on the remote origin with arguments" },
|
|
3798
|
+
{ tool: "disconnect_webmcp_origin", action: "Clean up the WebMCP session when done" },
|
|
3799
|
+
],
|
|
3800
|
+
},
|
|
3801
|
+
batch_autopilot: {
|
|
3802
|
+
name: "Batch Autopilot Run",
|
|
3803
|
+
description: "Set up an operator profile and run a batch autopilot session for autonomous agent tasks",
|
|
3804
|
+
steps: [
|
|
3805
|
+
{ tool: "setup_operator_profile", action: "Create or update USER.md and operator profile for autopilot context" },
|
|
3806
|
+
{ tool: "get_autopilot_status", action: "Check current autopilot readiness, profile completeness, and last run status" },
|
|
3807
|
+
{ tool: "trigger_batch_run", action: "Start a batch autopilot run using the operator profile as context" },
|
|
3808
|
+
{ tool: "get_batch_run_history", action: "Review history of past batch runs, outcomes, and timing" },
|
|
3809
|
+
{ tool: "sync_operator_profile", action: "Sync operator profile state from disk after manual edits" },
|
|
3810
|
+
],
|
|
3811
|
+
},
|
|
3812
|
+
daily_review: {
|
|
3813
|
+
name: "Daily Brief Review",
|
|
3814
|
+
description: "Pull the latest daily brief, review narrative threads, check ops dashboard, and sync to local storage",
|
|
3815
|
+
steps: [
|
|
3816
|
+
{ tool: "sync_daily_brief", action: "Pull today's brief and narrative from Convex into local SQLite" },
|
|
3817
|
+
{ tool: "get_daily_brief_summary", action: "Get the full brief summary with key signals and insights" },
|
|
3818
|
+
{ tool: "get_narrative_status", action: "Check narrative thread status — dominant story, under-reported angle, evidence scores" },
|
|
3819
|
+
{ tool: "get_ops_dashboard", action: "Review pipeline health: posting status, tool usage, active workflows" },
|
|
3820
|
+
{ tool: "open_local_dashboard", action: "Open the local HTML dashboard in the browser for visual review" },
|
|
3821
|
+
],
|
|
3822
|
+
},
|
|
3823
|
+
deep_interaction: {
|
|
3824
|
+
name: "Deep Interaction Discovery & Capture",
|
|
3825
|
+
description: "Systematically discover, capture, and verify interactive UI behaviors — popups, drawers, streaming responses, hover states, agent conversations, thread management, keyboard shortcuts. Goes beyond static screenshot routes to test real user behavior flows.",
|
|
3826
|
+
steps: [
|
|
3827
|
+
{ tool: "dive_auto_discover", action: "Auto-discover interactive components (buttons, drawers, modals, expandable rows) across all routes" },
|
|
3828
|
+
{ tool: "start_ui_dive", action: "Start a structured UI dive session to track interaction coverage" },
|
|
3829
|
+
{ tool: "burst_capture", action: "Rapid-fire capture during interaction transitions (open drawer, hover tooltip, type in agent panel)" },
|
|
3830
|
+
{ tool: "dive_interaction_test", action: "Test specific interaction patterns: click→open→verify, type→submit→stream, hover→preview→dismiss" },
|
|
3831
|
+
{ tool: "compute_web_stability", action: "Measure SSIM stability across interaction frames — detect layout shifts, flicker, animation jank" },
|
|
3832
|
+
{ tool: "dive_record_test_step", action: "Record each interaction test step with expected vs actual behavior" },
|
|
3833
|
+
{ tool: "run_visual_qa_suite", action: "Run full visual QA suite including deep interaction captures" },
|
|
3834
|
+
{ tool: "tag_ui_bug", action: "Tag issues found during interaction testing (broken hover, drawer z-index, missing focus trap)" },
|
|
3835
|
+
{ tool: "get_dive_report", action: "Generate interaction coverage report — which components were tested, which remain" },
|
|
3836
|
+
{ tool: "record_learning", action: "Record interaction patterns, common failure modes, and selector strategies" },
|
|
3837
|
+
],
|
|
3838
|
+
},
|
|
3839
|
+
gemini_qa: {
|
|
3840
|
+
name: "Gemini Vision QA Loop",
|
|
3841
|
+
description: "Automated UI/UX quality gate — capture screenshots (dark/light × desktop/mobile), send to Gemini Flash for Jony Ive product design review, fix issues, loop until 100/100",
|
|
3830
3842
|
steps: [
|
|
3831
|
-
{ tool: "
|
|
3832
|
-
{ tool: "
|
|
3833
|
-
{ tool: "
|
|
3834
|
-
{ tool: "
|
|
3835
|
-
{ tool: "
|
|
3836
|
-
{ tool: "
|
|
3843
|
+
{ tool: "check_mcp_setup", action: "Verify Gemini API key (GOOGLE_AI_KEY) and vision domain are ready" },
|
|
3844
|
+
{ tool: "start_verification_cycle", action: "Open a verification cycle titled 'Gemini QA Loop' to track progress" },
|
|
3845
|
+
{ tool: "save_session_note", action: "Shell: `npx vite build` then `npx playwright test tests/e2e/full-ui-dogfood.spec.ts --project=chromium --workers=1` — capture 4-variant screenshots" },
|
|
3846
|
+
{ tool: "save_session_note", action: "Shell: `npm run dogfood:publish` — copy screenshots to public/dogfood/ with variant metadata manifest" },
|
|
3847
|
+
{ tool: "save_session_note", action: "Shell: `npx vite build && node scripts/ui/runDogfoodGeminiQa.mjs` — rebuild, launch preview, trigger Gemini QA" },
|
|
3848
|
+
{ tool: "log_test_result", action: "Log QA score from public/dogfood/qa-results.json — formula: 100 - P1×6 - P2×2 - P3×1" },
|
|
3849
|
+
{ tool: "save_session_note", action: "Fix P1 issues (6pts each) then P2 (2pts) then P3 (1pt) — root-cause each before fixing" },
|
|
3850
|
+
{ tool: "get_overstory_qa_gate", action: "Check QA gate for per-route stability grades and issue counts" },
|
|
3851
|
+
{ tool: "record_learning", action: "Record QA trajectory and Gemini finding patterns for regression tracking" },
|
|
3837
3852
|
],
|
|
3838
3853
|
},
|
|
3839
3854
|
};
|