npm - nodebench-mcp - Versions diffs - 2.31.1 → 2.32.0 - Mend

nodebench-mcp 2.31.1 → 2.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/README.md +14 -6
package/dist/engine/server.js +14 -4
package/dist/engine/server.js.map +1 -1
package/dist/index.js +1946 -670
package/dist/index.js.map +1 -1
package/dist/security/SecurityError.d.ts +18 -0
package/dist/security/SecurityError.js +22 -0
package/dist/security/SecurityError.js.map +1 -0
package/dist/security/__tests__/security.test.d.ts +8 -0
package/dist/security/__tests__/security.test.js +295 -0
package/dist/security/__tests__/security.test.js.map +1 -0
package/dist/security/auditLog.d.ts +36 -0
package/dist/security/auditLog.js +178 -0
package/dist/security/auditLog.js.map +1 -0
package/dist/security/commandSandbox.d.ts +33 -0
package/dist/security/commandSandbox.js +159 -0
package/dist/security/commandSandbox.js.map +1 -0
package/dist/security/config.d.ts +23 -0
package/dist/security/config.js +43 -0
package/dist/security/config.js.map +1 -0
package/dist/security/credentialRedactor.d.ts +22 -0
package/dist/security/credentialRedactor.js +118 -0
package/dist/security/credentialRedactor.js.map +1 -0
package/dist/security/index.d.ts +20 -0
package/dist/security/index.js +21 -0
package/dist/security/index.js.map +1 -0
package/dist/security/pathSandbox.d.ts +23 -0
package/dist/security/pathSandbox.js +160 -0
package/dist/security/pathSandbox.js.map +1 -0
package/dist/security/urlValidator.d.ts +23 -0
package/dist/security/urlValidator.js +125 -0
package/dist/security/urlValidator.js.map +1 -0
package/dist/tools/agentBootstrapTools.js +22 -29
package/dist/tools/agentBootstrapTools.js.map +1 -1
package/dist/tools/contextSandboxTools.js +7 -9
package/dist/tools/contextSandboxTools.js.map +1 -1
package/dist/tools/deepSimTools.d.ts +2 -0
package/dist/tools/deepSimTools.js +404 -0
package/dist/tools/deepSimTools.js.map +1 -0
package/dist/tools/dimensionTools.d.ts +2 -0
package/dist/tools/dimensionTools.js +246 -0
package/dist/tools/dimensionTools.js.map +1 -0
package/dist/tools/executionTraceTools.d.ts +2 -0
package/dist/tools/executionTraceTools.js +446 -0
package/dist/tools/executionTraceTools.js.map +1 -0
package/dist/tools/founderTools.d.ts +13 -0
package/dist/tools/founderTools.js +595 -0
package/dist/tools/founderTools.js.map +1 -0
package/dist/tools/gitWorkflowTools.js +14 -10
package/dist/tools/gitWorkflowTools.js.map +1 -1
package/dist/tools/githubTools.js +19 -2
package/dist/tools/githubTools.js.map +1 -1
package/dist/tools/index.d.ts +87 -0
package/dist/tools/index.js +102 -0
package/dist/tools/index.js.map +1 -0
package/dist/tools/localFileTools.js +24 -12
package/dist/tools/localFileTools.js.map +1 -1
package/dist/tools/memoryDecay.d.ts +70 -0
package/dist/tools/memoryDecay.js +247 -0
package/dist/tools/memoryDecay.js.map +1 -0
package/dist/tools/missionHarnessTools.d.ts +32 -0
package/dist/tools/missionHarnessTools.js +972 -0
package/dist/tools/missionHarnessTools.js.map +1 -0
package/dist/tools/observabilityTools.d.ts +15 -0
package/dist/tools/observabilityTools.js +787 -0
package/dist/tools/observabilityTools.js.map +1 -0
package/dist/tools/openclawTools.js +151 -36
package/dist/tools/openclawTools.js.map +1 -1
package/dist/tools/progressiveDiscoveryTools.js +5 -4
package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
package/dist/tools/qualityGateTools.js +118 -2
package/dist/tools/qualityGateTools.js.map +1 -1
package/dist/tools/rssTools.js +3 -0
package/dist/tools/rssTools.js.map +1 -1
package/dist/tools/scraplingTools.js +15 -0
package/dist/tools/scraplingTools.js.map +1 -1
package/dist/tools/seoTools.js +66 -1
package/dist/tools/seoTools.js.map +1 -1
package/dist/tools/sessionMemoryTools.js +50 -11
package/dist/tools/sessionMemoryTools.js.map +1 -1
package/dist/tools/temporalIntelligenceTools.d.ts +12 -0
package/dist/tools/temporalIntelligenceTools.js +1068 -0
package/dist/tools/temporalIntelligenceTools.js.map +1 -0
package/dist/tools/toolRegistry.d.ts +19 -0
package/dist/tools/toolRegistry.js +857 -31
package/dist/tools/toolRegistry.js.map +1 -1
package/dist/tools/webTools.js +14 -1
package/dist/tools/webTools.js.map +1 -1
package/dist/tools/webmcpTools.js +13 -2
package/dist/tools/webmcpTools.js.map +1 -1
package/dist/toolsetRegistry.js +13 -0
package/dist/toolsetRegistry.js.map +1 -1
package/dist/types.d.ts +10 -0
package/package.json +124 -124

package/dist/tools/toolRegistry.js CHANGED Viewed

@@ -1322,6 +1322,97 @@ const REGISTRY_ENTRIES = [
         phase: "ship",
     },
     // ═══ RESEARCH WRITING ═══
+    {
+        name: "start_execution_run",
+        category: "platform",
+        tags: ["execution-trace", "run", "start", "session", "receipt", "workflow", "traceable", "begin"],
+        quickRef: {
+            nextAction: "Execution run started. Record the first meaningful step immediately so the trace has a visible timeline.",
+            nextTools: ["record_execution_step", "attach_execution_evidence", "record_execution_decision"],
+            methodology: "agent_bootstrap",
+            tip: "Use one run per user-visible workflow. Keep the title operator-friendly because it appears in the UI.",
+        },
+        phase: "implement",
+        complexity: "low",
+    },
+    {
+        name: "complete_execution_run",
+        category: "platform",
+        tags: ["execution-trace", "run", "complete", "finish", "close", "status", "traceable", "ship"],
+        quickRef: {
+            nextAction: "Execution run closed. Review the resulting Execution Trace tabs to confirm evidence, decisions, and verification all landed correctly.",
+            nextTools: ["record_learning", "save_session_note"],
+            methodology: "closed_loop",
+            tip: "Pass token usage and toolsUsed when available so the run is useful for later benchmarking.",
+        },
+        phase: "ship",
+        complexity: "low",
+    },
+    {
+        name: "record_execution_step",
+        category: "platform",
+        tags: ["execution-trace", "receipt", "step", "timeline", "workflow", "action", "traceable", "span"],
+        quickRef: {
+            nextAction: "Step recorded. Add evidence for supporting facts and record a decision if the step changed direction or selected an option.",
+            nextTools: ["attach_execution_evidence", "record_execution_decision", "record_execution_verification"],
+            methodology: "closed_loop",
+            tip: "Use this for meaningful transitions only. Good traces read like operator receipts, not noisy debug logs.",
+        },
+        phase: "implement",
+        complexity: "low",
+    },
+    {
+        name: "record_execution_decision",
+        category: "platform",
+        tags: ["execution-trace", "decision", "ranking", "selection", "basis", "alternatives", "confidence", "traceable"],
+        quickRef: {
+            nextAction: "Decision recorded. Attach the evidence that supports it and add a limitation note if the choice depends on incomplete information.",
+            nextTools: ["attach_execution_evidence", "record_execution_verification", "complete_execution_run"],
+            methodology: "verification",
+            tip: "Record the basis and alternatives considered. That gives explainability without exposing raw hidden reasoning.",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
+    {
+        name: "record_execution_verification",
+        category: "platform",
+        tags: ["execution-trace", "verification", "qa", "check", "render", "formula", "artifact", "traceable"],
+        quickRef: {
+            nextAction: "Verification recorded. If it failed, fix the issue and record a follow-up verification so the trace shows the correction loop clearly.",
+            nextTools: ["record_execution_step", "complete_execution_run", "record_learning"],
+            methodology: "closed_loop",
+            tip: "Use warnings for incomplete checks, failed for blocking issues, and fixed when the trace should show a successful repair.",
+        },
+        phase: "test",
+        complexity: "low",
+    },
+    {
+        name: "attach_execution_evidence",
+        category: "platform",
+        tags: ["execution-trace", "evidence", "sources", "truth-boundary", "urls", "files", "support", "claims"],
+        quickRef: {
+            nextAction: "Evidence attached. Cross-check that unsupported claims are listed explicitly before you finalize the run.",
+            nextTools: ["record_execution_decision", "record_execution_verification", "complete_execution_run"],
+            methodology: "reconnaissance",
+            tip: "Use supportedClaims and unsupportedClaims to make the truth boundary visible in the run, not just in the final answer.",
+        },
+        phase: "research",
+        complexity: "low",
+    },
+    {
+        name: "request_execution_approval",
+        category: "platform",
+        tags: ["execution-trace", "approval", "human-in-the-loop", "risk", "gate", "policy", "handoff", "traceable"],
+        quickRef: {
+            nextAction: "Approval requested. Pause risky execution and let the operator resolve the pending gate before continuing.",
+            nextTools: ["record_execution_step", "record_execution_verification", "complete_execution_run"],
+            methodology: "quality_gates",
+            tip: "Use for externally visible writes, destructive edits, or any action you would want an operator to justify later.",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
     {
         name: "polish_academic_text",
         category: "research_writing",
@@ -3080,6 +3171,514 @@ const REGISTRY_ENTRIES = [
         phase: "meta",
         complexity: "low",
     },
+    // ═══ OBSERVABILITY ═══
+    {
+        name: "get_system_pulse",
+        category: "observability",
+        tags: ["health", "status", "pulse", "monitoring", "dashboard", "uptime", "errors", "diagnostics"],
+        quickRef: {
+            nextAction: "Pulse captured. If healthScore < 70, run get_drift_report for details. If critical, run run_self_heal.",
+            nextTools: ["get_drift_report", "run_self_heal", "get_uptime_stats"],
+            tip: "Call this first when investigating system issues — it gives you the full picture in one shot.",
+        },
+        phase: "utility",
+        complexity: "low",
+    },
+    {
+        name: "get_drift_report",
+        category: "observability",
+        tags: ["drift", "detection", "orphaned", "stale", "bloat", "maintenance", "audit", "cleanup"],
+        quickRef: {
+            nextAction: "Drift detected. Review healable issues, then call run_self_heal with targets to auto-fix.",
+            nextTools: ["run_self_heal", "get_system_pulse", "cleanup_stale_runs"],
+            tip: "Include include_history=true to see trend over time — one-off spikes are different from sustained degradation.",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
+    {
+        name: "run_self_heal",
+        category: "observability",
+        tags: ["heal", "repair", "fix", "autonomous", "maintenance", "cleanup", "self-healing", "auto-fix"],
+        quickRef: {
+            nextAction: "Healing complete. Re-run get_drift_report to verify fixes took effect.",
+            nextTools: ["get_drift_report", "get_system_pulse"],
+            tip: "Use dry_run=true first to preview what would be fixed without actually changing anything.",
+        },
+        phase: "utility",
+        complexity: "low",
+    },
+    {
+        name: "get_uptime_stats",
+        category: "observability",
+        tags: ["uptime", "metrics", "calls", "errors", "trends", "rate", "performance", "statistics"],
+        quickRef: {
+            nextAction: "Stats captured. Check error trend direction — if 'increasing', investigate with get_drift_report.",
+            nextTools: ["get_drift_report", "get_system_pulse", "get_trajectory_analysis"],
+            tip: "Compare 1hr vs 24hr error rates — a recent spike in an otherwise stable system needs different treatment than chronic errors.",
+        },
+        phase: "utility",
+        complexity: "low",
+    },
+    {
+        name: "set_watchdog_config",
+        category: "observability",
+        tags: ["watchdog", "config", "interval", "thresholds", "monitoring", "background", "schedule"],
+        quickRef: {
+            nextAction: "Watchdog reconfigured. Changes take effect immediately. Check get_watchdog_log after one cycle to verify.",
+            nextTools: ["get_watchdog_log", "get_system_pulse"],
+            tip: "Set interval_minutes=1 for debugging, then raise to 5-10 for normal operation to reduce overhead.",
+        },
+        phase: "utility",
+        complexity: "low",
+    },
+    {
+        name: "get_watchdog_log",
+        category: "observability",
+        tags: ["watchdog", "log", "history", "trend", "background", "audit", "timeline"],
+        quickRef: {
+            nextAction: "Log reviewed. If trend is 'degrading', investigate the most common issue type with get_drift_report.",
+            nextTools: ["get_drift_report", "set_watchdog_config", "get_system_pulse"],
+            tip: "Use only_issues=true to filter noise and focus on entries where something actually went wrong.",
+        },
+        phase: "utility",
+        complexity: "low",
+    },
+    {
+        name: "get_sentinel_report",
+        category: "observability",
+        tags: ["sentinel", "probes", "quality", "testing", "build", "e2e", "voice", "a11y", "visual", "performance"],
+        quickRef: {
+            nextAction: "Report reviewed. For failing probes, check diagnosis root causes and apply suggested fixes.",
+            nextTools: ["get_drift_report", "get_system_pulse", "run_self_heal"],
+            tip: "Use probe_filter to focus on specific areas like 'build,e2e' instead of reviewing all 9 probes.",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
+    {
+        name: "get_observability_summary",
+        category: "observability",
+        tags: ["summary", "unified", "health", "sentinel", "watchdog", "quick-check", "overview"],
+        quickRef: {
+            nextAction: "Summary reviewed. Follow nextActions recommendations for highest-impact improvements.",
+            nextTools: ["get_drift_report", "run_self_heal", "get_sentinel_report", "get_uptime_stats"],
+            tip: "Best starting point for any session — gives you MCP health, sentinel status, and watchdog state in one call.",
+        },
+        phase: "utility",
+        complexity: "low",
+    },
+    // ═══ TEMPORAL INTELLIGENCE (Unified Temporal Agentic OS) ═══
+    {
+        name: "ingest_temporal_observation",
+        category: "temporal_intelligence",
+        tags: ["temporal", "observation", "ingest", "time-series", "stream", "signal", "data", "event"],
+        quickRef: {
+            nextAction: "Observation ingested. Run detect_temporal_signal on the same streamKey to find patterns, or ingest more observations to build a richer time series.",
+            nextTools: ["detect_temporal_signal", "build_causal_chain", "query_temporal_signals"],
+            methodology: "temporal_agentic_os",
+            tip: "Use consistent streamKey naming (e.g. 'github/commits/repo', 'jira/velocity/team') for clean signal detection.",
+        },
+        phase: "research",
+        complexity: "low",
+    },
+    {
+        name: "detect_temporal_signal",
+        category: "temporal_intelligence",
+        tags: ["temporal", "signal", "detect", "anomaly", "momentum", "regime-shift", "trend", "analysis", "statistics"],
+        quickRef: {
+            nextAction: "Signals detected. Build a causal_chain to explain significant signals, or generate a zero_draft to communicate findings. Use query_temporal_signals to retrieve stored signals.",
+            nextTools: ["build_causal_chain", "generate_zero_draft", "query_temporal_signals", "forecast_temporal_trend"],
+            methodology: "temporal_agentic_os",
+            tip: "Need 5+ numeric observations for momentum, 10+ for regime shift detection. Use lookbackDays to control analysis window.",
+        },
+        phase: "research",
+        complexity: "medium",
+    },
+    {
+        name: "build_causal_chain",
+        category: "temporal_intelligence",
+        tags: ["temporal", "causal", "chain", "causality", "root-cause", "analysis", "timeline", "explanation"],
+        quickRef: {
+            nextAction: "Causal chain built. Generate a zero_draft to communicate the analysis, or create a proof_pack to verify the chain's conclusions.",
+            nextTools: ["generate_zero_draft", "create_proof_pack", "detect_temporal_signal"],
+            methodology: "temporal_agentic_os",
+            tip: "Nodes must be chronological. Link evidenceObservationIds to ground each causal step in data.",
+        },
+        phase: "research",
+        complexity: "medium",
+    },
+    {
+        name: "generate_zero_draft",
+        category: "temporal_intelligence",
+        tags: ["temporal", "draft", "artifact", "generate", "email", "slack", "spec", "pr", "content", "auto-draft"],
+        quickRef: {
+            nextAction: "Draft generated. Review the bodyMarkdown, edit as needed, then approve or create a proof_pack before sending.",
+            nextTools: ["create_proof_pack", "detect_temporal_signal", "build_causal_chain"],
+            methodology: "temporal_agentic_os",
+            tip: "Link signal IDs and chain IDs to auto-populate the draft with evidence. Always review before approving.",
+        },
+        phase: "implement",
+        complexity: "high",
+    },
+    {
+        name: "create_proof_pack",
+        category: "temporal_intelligence",
+        tags: ["temporal", "proof", "pack", "verification", "checklist", "metrics", "dogfood", "immutable", "audit"],
+        quickRef: {
+            nextAction: "Proof pack created. If pass rate is 100%, status is 'ready' for approval. Otherwise, address failing items and create a new pack.",
+            nextTools: ["query_temporal_signals", "generate_zero_draft", "detect_temporal_signal"],
+            methodology: "temporal_agentic_os",
+            tip: "100% pass rate auto-sets status to 'ready'. Include metrics for cost/performance tracking.",
+        },
+        phase: "verify",
+        complexity: "medium",
+    },
+    {
+        name: "query_temporal_signals",
+        category: "temporal_intelligence",
+        tags: ["temporal", "signal", "query", "search", "filter", "retrieve", "list", "status"],
+        quickRef: {
+            nextAction: "Signals retrieved. Investigate high-confidence signals with build_causal_chain, or forecast trends with forecast_temporal_trend.",
+            nextTools: ["build_causal_chain", "forecast_temporal_trend", "detect_temporal_signal", "generate_zero_draft"],
+            methodology: "temporal_agentic_os",
+            tip: "Filter by status='open' to focus on unresolved signals. Use date range to scope analysis.",
+        },
+        phase: "research",
+        complexity: "low",
+    },
+    {
+        name: "forecast_temporal_trend",
+        category: "temporal_intelligence",
+        tags: ["temporal", "forecast", "trend", "prediction", "time-series", "regression", "smoothing", "statistics"],
+        quickRef: {
+            nextAction: "Forecast generated. Compare predictions with actual observations as they arrive. Use detect_temporal_signal to monitor for deviations from forecast.",
+            nextTools: ["detect_temporal_signal", "ingest_temporal_observation", "query_temporal_signals", "generate_zero_draft"],
+            methodology: "temporal_agentic_os",
+            tip: "Linear method works best with clear trends. Exponential smoothing handles noisy data better. Naive is a baseline.",
+        },
+        phase: "research",
+        complexity: "high",
+    },
+    // ── Mission Harness (Hierarchical execution) ──────────────────────────
+    {
+        name: "plan_decompose_mission",
+        category: "mission_harness",
+        tags: ["mission", "planner", "decompose", "subtask", "verifiability", "orchestration", "hierarchy", "execution"],
+        quickRef: {
+            nextAction: "Mission decomposed. Assign agents to subtasks, then use judge_verify_subtask as each completes.",
+            nextTools: ["judge_verify_subtask", "harness_get_mission_status", "harness_list_runs"],
+            methodology: "mission_execution_harness",
+            tip: "Every subtask needs verifiabilityTier + outputContract. Tier 1 = machine-checkable, Tier 2 = expert-checkable.",
+        },
+        phase: "research",
+        complexity: "high",
+    },
+    {
+        name: "judge_verify_subtask",
+        category: "mission_harness",
+        tags: ["judge", "verify", "review", "evidence", "artifact", "verdict", "quality", "gate"],
+        quickRef: {
+            nextAction: "Subtask verified. If passed and requiresSniffCheck, use sniff_record_human_review. If failed, use judge_request_retry.",
+            nextTools: ["sniff_record_human_review", "judge_request_retry", "merge_compose_output", "harness_get_mission_status"],
+            methodology: "mission_execution_harness",
+            tip: "No hardcoded score floors — 0 means 0. Evidence refs create the traceability chain.",
+        },
+        phase: "verify",
+        complexity: "high",
+    },
+    {
+        name: "judge_request_retry",
+        category: "mission_harness",
+        tags: ["retry", "escalate", "replan", "budget", "failure", "recovery", "resilience"],
+        quickRef: {
+            nextAction: "Retry requested. Worker should re-attempt with newInstructions. If budget exhausted, auto-escalates.",
+            nextTools: ["judge_verify_subtask", "harness_get_mission_status", "plan_decompose_mission"],
+            methodology: "mission_execution_harness",
+            tip: "Retry budget enforced — exhausted budget auto-escalates. Use 'stop' only for unverifiable subtasks.",
+        },
+        phase: "verify",
+        complexity: "medium",
+    },
+    {
+        name: "merge_compose_output",
+        category: "mission_harness",
+        tags: ["merge", "compose", "output", "artifact", "boundary", "orchestration", "finalize"],
+        quickRef: {
+            nextAction: "Output merged. If requiresJudgeReview, run judge_verify_subtask on the merge. Otherwise check mission status.",
+            nextTools: ["judge_verify_subtask", "sniff_record_human_review", "harness_get_mission_status"],
+            methodology: "mission_execution_harness",
+            tip: "Judge-gated: all subtasks must be 'passed' before merge. No shared free-for-all editing.",
+        },
+        phase: "ship",
+        complexity: "high",
+    },
+    {
+        name: "sniff_record_human_review",
+        category: "mission_harness",
+        tags: ["human", "review", "sniff", "check", "approval", "block", "concern", "quality"],
+        quickRef: {
+            nextAction: "Sniff-check recorded. If 'block', subtask enters force-retry. If 'pass', proceed to merge.",
+            nextTools: ["merge_compose_output", "judge_request_retry", "harness_get_mission_status"],
+            methodology: "mission_execution_harness",
+            tip: "Issue tags: unsupported_claim, weak_evidence, not_credible, too_risky, scope_drift, missing_source, contradictory, stale_data.",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
+    {
+        name: "harness_get_mission_status",
+        category: "mission_harness",
+        tags: ["status", "mission", "dashboard", "trace", "receipt", "progress", "overview"],
+        quickRef: {
+            nextAction: "Review subtask states and decide next action: verify pending subtasks, merge passed ones, or record sniff-checks.",
+            nextTools: ["judge_verify_subtask", "merge_compose_output", "sniff_record_human_review", "harness_list_runs"],
+            methodology: "mission_execution_harness",
+            tip: "Use includeEvidence=true for full traceability audit. Default omits evidence for performance.",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
+    {
+        name: "harness_list_runs",
+        category: "mission_harness",
+        tags: ["list", "runs", "missions", "history", "discovery", "overview"],
+        quickRef: {
+            nextAction: "Pick a run to inspect with harness_get_mission_status, or create a new mission with plan_decompose_mission.",
+            nextTools: ["harness_get_mission_status", "plan_decompose_mission"],
+            methodology: "mission_execution_harness",
+            tip: "Filter by status to find active, failed, or completed runs.",
+        },
+        phase: "research",
+        complexity: "low",
+    },
+    {
+        name: "compute_dimension_profile",
+        category: "mission_harness",
+        tags: ["deeptrace", "dimension", "profile", "regime", "company", "capital", "capability", "time"],
+        quickRef: {
+            nextAction: "Profile computed. Export the full bundle, inspect evidence rows and interaction effects, then record any regime-sensitive recommendation in the execution trace.",
+            nextTools: ["export_dimension_bundle", "list_dimension_evidence", "list_dimension_interactions", "record_execution_decision"],
+            methodology: "mission_execution_harness",
+            tip: "Recompute after new company evidence, hiring signals, financing events, or world events land.",
+        },
+        phase: "research",
+        complexity: "medium",
+    },
+    {
+        name: "get_dimension_profile",
+        category: "mission_harness",
+        tags: ["deeptrace", "dimension", "profile", "regime", "policy_context", "confidence", "company"],
+        quickRef: {
+            nextAction: "Read the latest normalized state, regime label, and policy context. If it looks stale, recompute. If it looks material, drill into bundle details.",
+            nextTools: ["compute_dimension_profile", "export_dimension_bundle", "list_dimension_snapshots"],
+            methodology: "mission_execution_harness",
+            tip: "Use this for a fast read before pulling the heavier evidence and snapshot bundle.",
+        },
+        phase: "research",
+        complexity: "low",
+    },
+    {
+        name: "list_dimension_snapshots",
+        category: "mission_harness",
+        tags: ["deeptrace", "dimension", "snapshots", "history", "regime_transition", "timeline"],
+        quickRef: {
+            nextAction: "Review how the entity moved across regimes over time, then use those transitions to qualify the current recommendation.",
+            nextTools: ["get_dimension_profile", "export_dimension_bundle", "record_execution_verification"],
+            methodology: "mission_execution_harness",
+            tip: "Use snapshots to answer whether a company became stronger after funding, hiring, or strategic events rather than assuming a static state.",
+        },
+        phase: "research",
+        complexity: "low",
+    },
+    {
+        name: "list_dimension_evidence",
+        category: "mission_harness",
+        tags: ["deeptrace", "dimension", "evidence", "audit", "verified", "estimated", "inferred"],
+        quickRef: {
+            nextAction: "Audit the evidence behind each score and availability status. If a recommendation depends on a weak signal, call that out explicitly.",
+            nextTools: ["list_dimension_interactions", "record_execution_decision", "record_execution_verification"],
+            methodology: "mission_execution_harness",
+            tip: "Availability labels matter. Verified and inferred evidence should not be treated as equally strong.",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
+    {
+        name: "list_dimension_interactions",
+        category: "mission_harness",
+        tags: ["deeptrace", "dimension", "interaction", "causal", "capital", "network", "fragility"],
+        quickRef: {
+            nextAction: "Use interaction effects to explain why the recommendation changes under different regimes instead of collapsing everything into one score.",
+            nextTools: ["export_dimension_bundle", "record_execution_decision", "record_execution_verification"],
+            methodology: "mission_execution_harness",
+            tip: "Interaction effects are where capital, capability, and narrative signals become causal rather than just descriptive.",
+        },
+        phase: "research",
+        complexity: "medium",
+    },
+    {
+        name: "export_dimension_bundle",
+        category: "mission_harness",
+        tags: ["deeptrace", "dimension", "bundle", "profile", "snapshots", "evidence", "interactions", "audit"],
+        quickRef: {
+            nextAction: "Use the bundle as the auditable substrate for your memo, execution trace, or judge review. Cite the profile, evidence, and interactions directly.",
+            nextTools: ["run_research_cell", "record_execution_step", "record_execution_decision", "record_execution_verification"],
+            methodology: "mission_execution_harness",
+            tip: "This is the safest handoff artifact for Claude Code because it preserves the profile, evidence, and history in one fetch.",
+        },
+        phase: "research",
+        complexity: "medium",
+    },
+    {
+        name: "run_research_cell",
+        category: "mission_harness",
+        tags: ["deeptrace", "research", "reanalysis", "confidence", "coverage", "evidence", "gaps", "counter_hypothesis"],
+        quickRef: {
+            nextAction: "Review the merged findings for gaps, counter-hypotheses, and coverage deficiencies. If evidence is still sparse, escalate to due-diligence orchestrator for external acquisition.",
+            nextTools: ["export_dimension_bundle", "compute_dimension_profile", "run_entity_intelligence_mission", "record_execution_decision"],
+            methodology: "mission_execution_harness",
+            tip: "This cell re-analyzes existing DeepTrace data — it does NOT acquire new evidence. Use it to surface what is missing before committing to expensive external research.",
+        },
+        phase: "research",
+        complexity: "high",
+    },
+    {
+        name: "run_entity_intelligence_mission",
+        category: "mission_harness",
+        tags: ["deeptrace", "mission", "entity", "intelligence", "investigation", "relationship", "ownership", "supply_chain", "research_cell"],
+        quickRef: {
+            nextAction: "Review the unified mission output (graph, ownership, supply chain, signals, causal chains). If researchCell was enabled or forceResearchCell was used, check whether the cell triggered and review its findings.",
+            nextTools: ["run_research_cell", "export_dimension_bundle", "record_execution_step", "record_execution_verification"],
+            methodology: "mission_execution_harness",
+            tip: "Pass researchCell=true for threshold-driven bounded re-analysis, or forceResearchCell=true when an operator wants the cell to run even if confidence and coverage look healthy.",
+        },
+        phase: "research",
+        complexity: "high",
+    },
+    // ═══ DEEP SIM (claim graph → simulation → decision memo) ═══
+    {
+        name: "build_claim_graph",
+        category: "deep_sim",
+        tags: ["deeptrace", "claims", "evidence", "graph", "provenance"],
+        quickRef: {
+            nextAction: "Claim graph built. Extract variables to identify levers, or generate countermodels to stress-test the graph.",
+            nextTools: ["extract_variables", "generate_countermodels"],
+            methodology: "deep_sim",
+        },
+        phase: "research",
+        complexity: "high",
+    },
+    {
+        name: "extract_variables",
+        category: "deep_sim",
+        tags: ["deeptrace", "variables", "weights", "sensitivity"],
+        quickRef: {
+            nextAction: "Variables extracted with sensitivity weights. Generate countermodels to falsify, run a sim to explore branches, or score compounding drift.",
+            nextTools: ["generate_countermodels", "run_deep_sim", "score_compounding"],
+            methodology: "deep_sim",
+        },
+        phase: "research",
+        complexity: "high",
+    },
+    {
+        name: "generate_countermodels",
+        category: "deep_sim",
+        tags: ["deeptrace", "counter", "hypothesis", "falsification"],
+        quickRef: {
+            nextAction: "Countermodels generated. Run a deep sim to test them under branching scenarios, or rank interventions by delta.",
+            nextTools: ["run_deep_sim", "rank_interventions"],
+            methodology: "deep_sim",
+        },
+        phase: "research",
+        complexity: "high",
+    },
+    {
+        name: "run_deep_sim",
+        category: "deep_sim",
+        tags: ["deeptrace", "simulation", "scenario", "branching", "agents"],
+        quickRef: {
+            nextAction: "Simulation complete. Rank interventions by impact delta, render a decision memo, or score compounding trajectory drift.",
+            nextTools: ["rank_interventions", "render_decision_memo", "score_compounding"],
+            methodology: "deep_sim",
+        },
+        phase: "research",
+        complexity: "high",
+    },
+    {
+        name: "rank_interventions",
+        category: "deep_sim",
+        tags: ["deeptrace", "interventions", "ranking", "delta"],
+        quickRef: {
+            nextAction: "Interventions ranked by delta. Render a decision memo for stakeholders, or score compounding to check trajectory drift.",
+            nextTools: ["render_decision_memo", "score_compounding"],
+            methodology: "deep_sim",
+        },
+        phase: "research",
+        complexity: "medium",
+    },
+    {
+        name: "score_compounding",
+        category: "deep_sim",
+        tags: ["deeptrace", "trajectory", "compounding", "drift", "score"],
+        quickRef: {
+            nextAction: "Compounding score computed. Render a decision memo summarizing the trajectory, or re-rank interventions if drift is significant.",
+            nextTools: ["render_decision_memo", "rank_interventions"],
+            methodology: "deep_sim",
+        },
+        phase: "verify",
+        complexity: "medium",
+    },
+    {
+        name: "render_decision_memo",
+        category: "deep_sim",
+        tags: ["deeptrace", "memo", "decision", "executive", "report"],
+        quickRef: {
+            nextAction: "Decision memo rendered. Share with stakeholders. To iterate, rebuild the claim graph or extract new variables.",
+            nextTools: ["build_claim_graph", "extract_variables"],
+            methodology: "deep_sim",
+        },
+        phase: "ship",
+        complexity: "medium",
+    },
+    // ═══ FOUNDER ═══
+    {
+        name: "founder_deep_context_gather",
+        category: "founder",
+        tags: ["founder", "artifact", "packet", "context", "gather", "deep-search", "ocd", "nudge"],
+        quickRef: {
+            nextAction: "Context gather protocol returned. Complete ALL required steps, then call founder_packet_validate before saving.",
+            nextTools: ["founder_packet_validate", "founder_packet_diff"],
+            methodology: "founder",
+            tip: "Always call this BEFORE generating an artifact packet. It ensures OCD-level thoroughness.",
+        },
+        phase: "research",
+        complexity: "medium",
+    },
+    {
+        name: "founder_packet_validate",
+        category: "founder",
+        tags: ["founder", "artifact", "packet", "validate", "quality-gate", "audit"],
+        quickRef: {
+            nextAction: "Packet validated. If passed, save and export. If failed, fix the flagged issues and re-validate.",
+            nextTools: ["founder_packet_diff", "founder_deep_context_gather"],
+            methodology: "founder",
+        },
+        phase: "verify",
+        complexity: "low",
+    },
+    {
+        name: "founder_packet_diff",
+        category: "founder",
+        tags: ["founder", "artifact", "packet", "diff", "history", "drift", "comparison"],
+        quickRef: {
+            nextAction: "Diff generated. Surface new contradictions and unresolved actions to the founder. Feed findings into the next packet generation.",
+            nextTools: ["founder_deep_context_gather", "founder_packet_validate"],
+            methodology: "founder",
+        },
+        phase: "research",
+        complexity: "low",
+    },
 ];
 // ── Exported lookup structures ───────────────────────────────────────────
 /** Map of tool name → registry entry for O(1) lookup */
@@ -3090,6 +3689,13 @@ export const ALL_REGISTRY_ENTRIES = REGISTRY_ENTRIES;
 export function getQuickRef(toolName) {
     return TOOL_REGISTRY.get(toolName)?.quickRef ?? null;
 }
+/**
+ * Compatibility helper for older callers that expect a "related tools" list.
+ * The current registry models this through quickRef.nextTools.
+ */
+export function computeRelatedTools(toolName) {
+    return getQuickRef(toolName)?.nextTools ?? [];
+}
 /** Get all tools in a category */
 export function getToolsByCategory(category) {
     return REGISTRY_ENTRIES.filter((e) => e.category === category);
@@ -3222,6 +3828,135 @@ export function getToolComplexity(toolName) {
         return CATEGORY_COMPLEXITY[entry.category] ?? "medium";
     return "medium";
 }
+/**
+ * Category-level annotation defaults.
+ * Every tool inherits its category's annotations unless overridden per-tool.
+ *
+ * Classification logic:
+ * - readOnlyHint: true  → category only reads/analyzes, no mutations
+ * - destructiveHint: true → category creates, writes, deletes, or sends data
+ * - openWorldHint: true  → category hits external services (network, APIs)
+ */
+const CATEGORY_ANNOTATIONS = {
+    // ── Read-only categories (no side effects, no network) ──
+    reconnaissance: { readOnlyHint: true },
+    progressive_discovery: { readOnlyHint: true },
+    meta: { readOnlyHint: true },
+    toon: { readOnlyHint: true },
+    pattern: { readOnlyHint: true },
+    local_file: { readOnlyHint: true },
+    architect: { readOnlyHint: true },
+    local_dashboard: { readOnlyHint: true },
+    design_governance: { readOnlyHint: true },
+    agent_traverse: { readOnlyHint: true },
+    observability: { readOnlyHint: true },
+    research_optimizer: { readOnlyHint: true },
+    documentation: { readOnlyHint: true },
+    security: { readOnlyHint: true },
+    gaia_solvers: { readOnlyHint: true },
+    ui_ux_dive: { readOnlyHint: true },
+    ui_ux_dive_v2: { readOnlyHint: true },
+    // ── Stateful but non-destructive categories (write to local DB/state) ──
+    verification: { readOnlyHint: false, destructiveHint: false },
+    eval: { readOnlyHint: false, destructiveHint: false },
+    quality_gate: { readOnlyHint: false, destructiveHint: false },
+    learning: { readOnlyHint: false, destructiveHint: false },
+    flywheel: { readOnlyHint: false, destructiveHint: false },
+    session_memory: { readOnlyHint: false, destructiveHint: false },
+    self_eval: { readOnlyHint: false, destructiveHint: false },
+    critter: { readOnlyHint: false, destructiveHint: false },
+    engine_context: { readOnlyHint: false, destructiveHint: false },
+    qa_orchestration: { readOnlyHint: false, destructiveHint: false },
+    skill_update: { readOnlyHint: false, destructiveHint: false },
+    benchmark: { readOnlyHint: false, destructiveHint: false },
+    thompson_protocol: { readOnlyHint: false, destructiveHint: false },
+    parallel_agents: { readOnlyHint: false, destructiveHint: false },
+    research_writing: { readOnlyHint: false, destructiveHint: false },
+    platform: { readOnlyHint: false, destructiveHint: false },
+    // ── Destructive categories (create, write, delete, execute) ──
+    boilerplate: { destructiveHint: true },
+    bootstrap: { destructiveHint: true },
+    git_workflow: { destructiveHint: true },
+    context_sandbox: { destructiveHint: true },
+    // ── Open-world categories (external network access) ──
+    web: { openWorldHint: true },
+    web_scraping: { openWorldHint: true },
+    github: { openWorldHint: true },
+    llm: { openWorldHint: true },
+    email: { openWorldHint: true, destructiveHint: true },
+    rss: { openWorldHint: true },
+    voice_bridge: { openWorldHint: true },
+    mcp_bridge: { openWorldHint: true },
+    flicker_detection: { openWorldHint: true },
+    figma_flow: { openWorldHint: true },
+    seo: { readOnlyHint: true, openWorldHint: true },
+    visual_qa: { readOnlyHint: true, openWorldHint: true },
+    ui_capture: { readOnlyHint: false, openWorldHint: true },
+    vision: { readOnlyHint: true, openWorldHint: true },
+};
+/**
+ * Per-tool annotation overrides (when category default is wrong).
+ * Sparse — only tools that deviate from their category.
+ */
+const TOOL_ANNOTATION_OVERRIDES = {
+    // ── Explicitly destructive tools ──
+    send_email: { destructiveHint: true, openWorldHint: true },
+    execute_shell_command: { destructiveHint: true },
+    sandbox_execute: { destructiveHint: true },
+    scaffold_nodebench_project: { destructiveHint: true },
+    scaffold_research_pipeline: { destructiveHint: true },
+    git_create_branch: { destructiveHint: true },
+    git_commit_changes: { destructiveHint: true },
+    git_push_branch: { destructiveHint: true, openWorldHint: true },
+    create_visual_pr: { destructiveHint: true, openWorldHint: true },
+    cleanup_stale_runs: { destructiveHint: true },
+    // ── Explicitly read-only tools in otherwise mutable categories ──
+    get_verification_status: { readOnlyHint: true },
+    list_verification_cycles: { readOnlyHint: true },
+    list_eval_runs: { readOnlyHint: true },
+    compare_eval_runs: { readOnlyHint: true },
+    get_gate_history: { readOnlyHint: true },
+    get_gate_preset: { readOnlyHint: true },
+    get_flywheel_status: { readOnlyHint: true },
+    get_parallel_status: { readOnlyHint: true },
+    get_agent_role: { readOnlyHint: true },
+    list_agent_tasks: { readOnlyHint: true },
+    get_project_context: { readOnlyHint: true },
+    get_boilerplate_status: { readOnlyHint: true },
+    load_session_notes: { readOnlyHint: true },
+    refresh_task_context: { readOnlyHint: true },
+    get_engine_context_health: { readOnlyHint: true },
+    get_workflow_history: { readOnlyHint: true },
+    search_content_archive: { readOnlyHint: true },
+    search_all_knowledge: { readOnlyHint: true },
+    get_recon_summary: { readOnlyHint: true },
+    save_session_note: { destructiveHint: false },
+    // ── Open-world overrides for specific tools ──
+    fetch_url: { openWorldHint: true, readOnlyHint: true },
+    web_search: { openWorldHint: true, readOnlyHint: true },
+    search_github: { openWorldHint: true, readOnlyHint: true },
+    check_mcp_setup: { readOnlyHint: true, openWorldHint: true },
+    scrapling_crawl_stop: { destructiveHint: false, openWorldHint: true },
+    // ── Discovery tools are always read-only ──
+    discover_tools: { readOnlyHint: true },
+    get_tool_quick_ref: { readOnlyHint: true },
+    get_workflow_chain: { readOnlyHint: true },
+    findTools: { readOnlyHint: true },
+    getMethodology: { readOnlyHint: true },
+};
+/**
+ * Get MCP security annotations for a tool.
+ * Resolution: per-tool override merged ON TOP of category default → empty (no hints).
+ */
+export function getToolAnnotations(toolName) {
+    const entry = TOOL_REGISTRY.get(toolName);
+    const categoryDefaults = entry ? (CATEGORY_ANNOTATIONS[entry.category] ?? {}) : {};
+    const overrides = TOOL_ANNOTATION_OVERRIDES[toolName];
+    if (overrides) {
+        return { ...categoryDefaults, ...overrides };
+    }
+    return categoryDefaults;
+}
 // ── Synonym / semantic expansion map ──────────────────────────────────────
 const SYNONYM_MAP = {
     // ── Existing technical synonyms ──
@@ -3474,7 +4209,7 @@ export function _setDbAccessor(accessor) {
  * Approach: for each session, pull the ordered tool sequence, then count
  * pairs within a sliding window of 5 calls. O(n) per session, no self-join.
  */
-function getCooccurrenceEdges() {
+export function getCooccurrenceEdges() {
     const now = Date.now();
     if (_cooccurrenceCache && now - _cooccurrenceCacheTime < COOCCURRENCE_TTL_MS) {
         return _cooccurrenceCache;
@@ -3971,6 +4706,28 @@ export const WORKFLOW_CHAINS = {
             { tool: "save_session_note", action: "Save traceability note — cite original request, record root cause and fix" },
         ],
     },
+    autonomous_qa_bug: {
+        name: "Autonomous QA Bug Verdict",
+        description: "Evidence-first bug reproduction with trigger/verify split, bounded retries, blocked-infra classification, and anomaly isolation",
+        steps: [
+            { tool: "search_all_knowledge", action: "Check prior bug signatures, setup blockers, and learned repro patterns before touching the workflow" },
+            { tool: "start_execution_run", action: "Open an execution trace so setup, trigger, verification, and verdict all land in one auditable run" },
+            { tool: "plan_decompose_mission", action: "Break the bug into setup, trigger, verify, evidence, and verdict subtasks with bounded contracts" },
+            { tool: "record_execution_step", action: "Log environment setup and preconditions before attempting reproduction" },
+            { tool: "record_execution_verification", action: "Verify setup state explicitly before trigger; classify missing environment or auth as blocked infra" },
+            { tool: "record_execution_step", action: "Execute the smallest trigger needed to reproduce the reported symptom" },
+            { tool: "attach_execution_evidence", action: "Attach screenshots, logs, videos, metrics, or diffs that show actual behavior" },
+            { tool: "get_gate_preset", action: "Load the agent_bug_verdict gate so the pre-verdict checks stay explicit and boolean" },
+            { tool: "run_quality_gate", action: "Run the agent_bug_verdict gate before deciding pass/fail/block" },
+            { tool: "judge_verify_subtask", action: "Judge the primary bug against the output contract with evidence-backed verdict and confidence" },
+            { tool: "judge_request_retry", action: "Retry only the failing trigger or setup step, up to budget; escalate blocked infra instead of looping blindly" },
+            { tool: "log_gap", action: "Log anomalies or newly found bugs separately so they do not overwrite the main bug verdict" },
+            { tool: "sniff_record_human_review", action: "Record human sniff-check when the verdict is high-risk, ambiguous, or externally visible" },
+            { tool: "complete_execution_run", action: "Close the trace with final status, evidence summary, and any drift from the original bug mission" },
+            { tool: "save_session_note", action: "Save traceability note — cite original bug, blocker classification, evidence path, and final verdict" },
+            { tool: "record_learning", action: "Record the reproduction pattern, blocker signature, and anomaly handling guidance for future runs" },
+        ],
+    },
     ui_change: {
         name: "UI/UX Change",
         description: "Frontend implementation with visual verification",
@@ -4043,7 +4800,7 @@ export const WORKFLOW_CHAINS = {
     },
     security_audit: {
         name: "Security Audit",
-        description: "Comprehensive security assessment of dependencies, code, and terminal history",
+        description: "Security audit of dependencies, code, and terminal history",
         steps: [
             { tool: "search_all_knowledge", action: "Check past security findings" },
             { tool: "scan_dependencies", action: "Check npm/pip packages for known CVEs" },
@@ -4058,7 +4815,7 @@ export const WORKFLOW_CHAINS = {
     },
     code_review: {
         name: "Code Review",
-        description: "Structured code review with quality gates and learning capture",
+        description: "Code review with quality gates and learning capture",
         steps: [
             { tool: "search_all_knowledge", action: "Check for relevant past patterns and gotchas" },
             { tool: "run_closed_loop", action: "Verify code compiles and tests pass" },
@@ -4102,7 +4859,7 @@ export const WORKFLOW_CHAINS = {
     },
     coordinator_spawn: {
         name: "Coordinator → Subagent Spawn",
-        description: "Spawn and coordinate parallel subagents with task locks, roles, and gates",
+        description: "Coordinate parallel subagents with task locks and gates",
         steps: [
             { tool: "search_all_knowledge", action: "Check prior coordination patterns" },
             { tool: "get_parallel_status", action: "Check current agent activity" },
@@ -4118,7 +4875,7 @@ export const WORKFLOW_CHAINS = {
     },
     self_setup: {
         name: "Self-Setup / Capability Escalation",
-        description: "Detect and resolve missing capabilities before starting work",
+        description: "Detect and resolve missing capabilities before work",
         steps: [
             { tool: "discover_tools", action: "Search for needed capability" },
             { tool: "get_tool_quick_ref", action: "Check if tool exists but needs configuration" },
@@ -4132,7 +4889,7 @@ export const WORKFLOW_CHAINS = {
     },
     flicker_detection: {
         name: "Android Flicker Detection",
-        description: "Detect and analyze Android UI flicker using 4-layer pipeline",
+        description: "Detect Android UI flicker via 4-layer pipeline",
         steps: [
             { tool: "search_all_knowledge", action: "Check past flicker patterns and known issues" },
             { tool: "capture_surface_stats", action: "L0: Capture SurfaceFlinger jank metrics" },
@@ -4145,7 +4902,7 @@ export const WORKFLOW_CHAINS = {
     },
     figma_flow_analysis: {
         name: "Figma Flow Analysis",
-        description: "Extract, cluster, and visualize Figma design flows",
+        description: "Extract, cluster, and visualize Figma flows",
         steps: [
             { tool: "search_all_knowledge", action: "Check past design flow analysis patterns" },
             { tool: "extract_figma_frames", action: "Phase 1: Depth-3 tree traversal for frames" },
@@ -4157,7 +4914,7 @@ export const WORKFLOW_CHAINS = {
     },
     agent_eval: {
         name: "Agent Evaluation Pipeline",
-        description: "Measure, observe, and improve agent performance using NodeBench MCP. Combines contract compliance scoring, trajectory analysis, eval runs, and self-reinforced learning to create a closed loop: run agent → score → identify gaps → fix → re-score.",
+        description: "Measure and improve agent performance via closed-loop eval",
         steps: [
             { tool: "check_contract_compliance", action: "Score the agent session against the 6-dimension contract (front-door, self-setup, pre-impl, parallel, ship-gates, efficiency)" },
             { tool: "get_trajectory_analysis", action: "Analyze tool usage patterns — frequency, errors, sequential bigrams, phase distribution" },
@@ -4172,7 +4929,7 @@ export const WORKFLOW_CHAINS = {
     },
     contract_compliance: {
         name: "Contract Compliance Audit",
-        description: "Verify that an agent session followed the NodeBench Agent Contract. Quick check after any agent task completes.",
+        description: "Verify agent session followed the NodeBench contract",
         steps: [
             { tool: "log_tool_call", action: "Ensure all tool calls in the session are logged (auto-instrumented or manual)" },
             { tool: "check_contract_compliance", action: "Score the session across 6 dimensions (25 front-door + 10 self-setup + 15 pre-impl + 10 parallel + 30 ship-gates + 10 efficiency = 100)" },
@@ -4183,7 +4940,7 @@ export const WORKFLOW_CHAINS = {
     },
     ablation_eval: {
         name: "Ablation Evaluation (Prove NodeBench MCP Value)",
-        description: "Run controlled experiments comparing agent performance with vs without NodeBench MCP. Based on Anthropic's eval harness methodology. Tests 5 conditions (bare/lite/full/cold_kb/no_gates) across a fixed task bank with multi-trial statistics.",
+        description: "A/B test agent performance across 5 conditions with eval stats",
         steps: [
             { tool: "create_task_bank", action: "Step 1: Define tasks with deterministic success criteria, forbidden behaviors, and budgets. Target 30-200 tasks." },
             { tool: "get_gate_preset", action: "Step 2: Load agent_comparison gate preset — 10 boolean rules covering outcome + process quality" },
@@ -4199,7 +4956,7 @@ export const WORKFLOW_CHAINS = {
     },
     session_recovery: {
         name: "Session Recovery (Post-Compaction)",
-        description: "Recover state after context compaction, /clear, or session resume. Loads filesystem notes, refreshes task context, and continues where you left off.",
+        description: "Recover state after compaction, /clear, or session resume",
         steps: [
             { tool: "load_session_notes", action: "Step 1: Load today's session notes from filesystem" },
             { tool: "refresh_task_context", action: "Step 2: Re-inject active verification cycle, open gaps, and recent learnings" },
@@ -4211,7 +4968,7 @@ export const WORKFLOW_CHAINS = {
     },
     attention_refresh: {
         name: "Attention Refresh (Mid-Session)",
-        description: "Combat attention drift after 30+ tool calls. Re-injects original goals, checks for drift, and re-anchors focus. Based on Manus 'Manipulate Attention Through Recitation' principle.",
+        description: "Re-inject goals and re-anchor focus after 30+ tool calls",
         steps: [
             { tool: "refresh_task_context", action: "Step 1: Re-inject current goals, open gaps, and session stats" },
             { tool: "save_session_note", action: "Step 2: Save progress checkpoint before continuing" },
@@ -4221,7 +4978,7 @@ export const WORKFLOW_CHAINS = {
     },
     task_bank_setup: {
         name: "Task Bank Setup (50-Task Starter Kit)",
-        description: "Build a statistically meaningful task bank for agent evaluation. Covers 7 categories (bugfix/refactor/integration/ui/security/performance/migration) × 4 difficulty levels with deterministic grading criteria.",
+        description: "Build a task bank for agent eval across 7 categories",
         steps: [
             { tool: "search_all_knowledge", action: "Step 1: Search past learnings and recon findings for real bugs/tasks to include" },
             { tool: "create_task_bank", action: "Step 2: Add 10 bugfix tasks (easy→expert) with test-based success criteria" },
@@ -4236,7 +4993,7 @@ export const WORKFLOW_CHAINS = {
     },
     pr_review: {
         name: "Pull Request Review",
-        description: "Structured PR review with git compliance, verification cross-reference, and merge gate",
+        description: "PR review with git compliance and merge gate",
         steps: [
             { tool: "check_git_compliance", action: "Verify branch state and commit conventions" },
             { tool: "review_pr_checklist", action: "Run structured PR checklist with verification cross-reference" },
@@ -4247,7 +5004,7 @@ export const WORKFLOW_CHAINS = {
     },
     seo_audit: {
         name: "Full SEO Audit",
-        description: "End-to-end SEO audit: technical SEO, content analysis, performance, WordPress security",
+        description: "SEO audit: technical, content, performance, WordPress",
         steps: [
             { tool: "seo_audit_url", action: "Analyze meta tags, headings, images, structured data" },
             { tool: "analyze_seo_content", action: "Check readability, keyword density, link ratios" },
@@ -4271,7 +5028,7 @@ export const WORKFLOW_CHAINS = {
     },
     intentionality_check: {
         name: "Intentionality Check (Critter)",
-        description: "Pause before action — articulate why and who, then proceed with clarity",
+        description: "Articulate why and who before acting, then proceed",
         steps: [
             { tool: "critter_check", action: "Answer: Why are you doing this? Who is it for? Score your intentionality" },
             { tool: "save_session_note", action: "Persist the critter check so it survives context compaction" },
@@ -4280,7 +5037,7 @@ export const WORKFLOW_CHAINS = {
     },
     research_digest: {
         name: "Automated Research Digest",
-        description: "Subscribe to RSS/Atom feeds, fetch new articles, build a digest, and optionally email it",
+        description: "Subscribe to RSS/Atom feeds, build digest, email it",
         steps: [
             { tool: "add_rss_source", action: "Register RSS/Atom feed URLs for topics of interest (arXiv, blogs, news)" },
             { tool: "fetch_rss_feeds", action: "Pull latest articles from all registered sources — new items stored in SQLite" },
@@ -4292,7 +5049,7 @@ export const WORKFLOW_CHAINS = {
     },
     email_assistant: {
         name: "Email Draft Assistant",
-        description: "Read inbox, draft professional replies, review, and send — all from the agent",
+        description: "Read inbox, draft replies, review, and send via agent",
         steps: [
             { tool: "read_emails", action: "Fetch recent/unread emails from IMAP inbox to understand what needs attention" },
             { tool: "draft_email_reply", action: "Generate a professional reply draft from original email context and your instructions" },
@@ -4302,7 +5059,7 @@ export const WORKFLOW_CHAINS = {
     },
     webmcp_discovery: {
         name: "WebMCP Origin Discovery",
-        description: "Connect to a WebMCP-enabled origin, discover its tools, and invoke them from the agent",
+        description: "Connect to WebMCP origin, discover and invoke tools",
         steps: [
             { tool: "connect_webmcp_origin", action: "Connect to the target origin URL and establish a WebMCP session" },
             { tool: "list_webmcp_tools", action: "List all tools exposed by the origin with schemas and annotations" },
@@ -4312,7 +5069,7 @@ export const WORKFLOW_CHAINS = {
     },
     batch_autopilot: {
         name: "Batch Autopilot Run",
-        description: "Set up an operator profile and run a batch autopilot session for autonomous agent tasks",
+        description: "Set up operator profile and run batch autopilot session",
         steps: [
             { tool: "setup_operator_profile", action: "Create or update USER.md and operator profile for autopilot context" },
             { tool: "get_autopilot_status", action: "Check current autopilot readiness, profile completeness, and last run status" },
@@ -4323,7 +5080,7 @@ export const WORKFLOW_CHAINS = {
     },
     daily_review: {
         name: "Daily Brief Review",
-        description: "Pull the latest daily brief, review narrative threads, check ops dashboard, and sync to local storage",
+        description: "Pull daily brief, review narratives, check ops dashboard",
         steps: [
             { tool: "sync_daily_brief", action: "Pull today's brief and narrative from Convex into local SQLite" },
             { tool: "get_daily_brief_summary", action: "Get the full brief summary with key signals and insights" },
@@ -4334,7 +5091,7 @@ export const WORKFLOW_CHAINS = {
     },
     deep_interaction: {
         name: "Deep Interaction Discovery & Capture",
-        description: "Systematically discover, capture, and verify interactive UI behaviors — popups, drawers, streaming responses, hover states, agent conversations, thread management, keyboard shortcuts. Goes beyond static screenshot routes to test real user behavior flows.",
+        description: "Discover, capture, and verify interactive UI behaviors",
         steps: [
             { tool: "dive_auto_discover", action: "Auto-discover interactive components (buttons, drawers, modals, expandable rows) across all routes" },
             { tool: "start_ui_dive", action: "Start a structured UI dive session to track interaction coverage" },
@@ -4350,7 +5107,7 @@ export const WORKFLOW_CHAINS = {
     },
     gemini_qa: {
         name: "Gemini Vision QA Loop",
-        description: "Automated UI/UX quality gate — capture screenshots (dark/light × desktop/mobile), send to Gemini Flash for Jony Ive product design review, fix issues, loop until 100/100",
+        description: "Gemini vision QA loop: capture, score, fix, repeat",
         steps: [
             { tool: "check_mcp_setup", action: "Verify Gemini API key (GOOGLE_AI_KEY) and vision domain are ready" },
             { tool: "start_verification_cycle", action: "Open a verification cycle titled 'Gemini QA Loop' to track progress" },
@@ -4363,9 +5120,53 @@ export const WORKFLOW_CHAINS = {
             { tool: "record_learning", action: "Record QA trajectory and Gemini finding patterns for regression tracking" },
         ],
     },
+    six_hour_qa: {
+        name: "6-Hour Comprehensive QA Workflow",
+        description: "9-phase automated pipeline covering all 39 routes, 18 interaction scenarios (before/during/after captures), 12 animation-critical routes (SSIM burst analysis), 6 screenshot variants (dark/light × desktop/mobile × normal/reduced-motion), 15 Jony Ive aesthetic criteria, Gemini Vision dogfood, 10 agent eval scenarios via LLM judge, learning loop, and final verdict synthesis. Parallelized in batches of 6 concurrent routes.",
+        steps: [
+            { tool: "start_verification_cycle", action: "Phase 1 SETUP: Create root QA session, run vite build + tsc --noEmit + vitest, capture baseline test counts and screenshot manifest" },
+            { tool: "run_closed_loop", action: "Phase 1 SETUP: Verify build compiles, zero type errors, all tests pass — establish baseline metrics" },
+            { tool: "get_gate_preset", action: "Phase 2 APP_QA: Load a11y gate (12 WCAG 2.1 AA rules) — ARIA, contrast, keyboard, focus, skip-link, tab-order, touch-targets" },
+            { tool: "run_quality_gate", action: "Phase 2 APP_QA: Run a11y + visual_regression + code_review + ui_ux_qa + performance gates on all 39 routes (batched ×6 parallel)" },
+            { tool: "capture_ui_screenshot", action: "Phase 3 INTERACTIONS: Capture BEFORE state for 18 interaction scenarios (command palette, sidebar hover, tab switch, entity search, etc.)" },
+            { tool: "run_visual_qa_suite", action: "Phase 3 INTERACTIONS: Trigger each interaction, capture DURING state (tooltip visible, modal open, thread expanding), wait settle delay" },
+            { tool: "diff_screenshots", action: "Phase 3 INTERACTIONS: Capture AFTER state (settled, restored), diff BEFORE→AFTER to verify clean state restoration" },
+            { tool: "run_visual_qa_suite", action: "Phase 4 ANIMATION: Burst capture 12 animation-critical routes (10-15 frames each, 40-100ms interval), compute SSIM stability scores" },
+            { tool: "compute_web_stability", action: "Phase 4 ANIMATION: Verify no jank frames (SSIM>threshold), effective FPS>30, frame delta variance<2× median per route" },
+            { tool: "run_visual_qa_suite", action: "Phase 4 ANIMATION: Re-test all 12 routes with prefers-reduced-motion:reduce — SSIM must be >0.98 (near-static)" },
+            { tool: "analyze_screenshot", action: "Phase 5 AESTHETIC: Gemini Vision Pro review of 39 routes × 4 variants — 15 Jony Ive criteria (earned complexity, visual hierarchy, spacing, typography, color harmony, alignment, whitespace, icons, loading elegance, empty states, mobile adaptation, dark mode refinement, animation purpose, focus states, error states)" },
+            { tool: "save_session_note", action: "Phase 6 DOGFOOD: Trigger Gemini Vision dogfood QA (screenshotQa + videoQa) on 6 screenshot variants, compute score (100 - P0×10 - P1×6 - P2×2 - P3×1)" },
+            { tool: "start_eval_run", action: "Phase 7 AGENT_EVAL: Create eval suite with 10 agent scenarios (research thesis, DD verify, QA bug, contract compliance, workflow chain, discovery, evidence gathering, cross-check, multi-agent coordination, error recovery)" },
+            { tool: "save_session_note", action: "Phase 7 AGENT_EVAL: Execute each scenario, grade with LLM judge (8 boolean criteria), record evalResults with per-scenario reasoning" },
+            { tool: "complete_eval_run", action: "Phase 7 AGENT_EVAL: Finalize eval run — pass rate, critical criteria check (noHallucination + noForbiddenActions), failure patterns" },
+            { tool: "compare_eval_runs", action: "Phase 7 AGENT_EVAL: Compare against baseline — DEPLOY/REVERT/INVESTIGATE recommendation" },
+            { tool: "get_improvement_recommendations", action: "Phase 8 LEARNING: Extract failure patterns from all 9 phases — gate failures, interaction mismatches, jank, aesthetic violations, agent failures" },
+            { tool: "record_learning", action: "Phase 8 LEARNING: 5-whys root cause → targeted fix → re-eval → compare. Bank edge cases for regression prevention" },
+            { tool: "save_session_note", action: "Phase 9 SYNTHESIS: Cross-check all evidence, compute final verdict (verified/provisionally_verified/needs_review/failed), generate proof pack with coverage: 39 routes × 6 variants × 18 interactions × 12 animation routes" },
+        ],
+    },
+    comprehensive_qa: {
+        name: "Comprehensive QA Suite",
+        description: "Full QA pipeline: accessibility audit, visual regression, code review, deploy readiness, and verdict derivation",
+        steps: [
+            { tool: "start_verification_cycle", action: "Open a QA verification cycle to track all checks in one auditable run" },
+            { tool: "get_gate_preset", action: "Load the a11y gate preset — 8 WCAG 2.1 AA rules for accessibility compliance" },
+            { tool: "run_quality_gate", action: "Run the a11y gate against changed components — check ARIA, contrast, keyboard, focus, motion, forms, landmarks" },
+            { tool: "get_gate_preset", action: "Load the visual_regression gate — 6 rules for baseline comparison, layout shift, responsive, dark/light" },
+            { tool: "run_quality_gate", action: "Run the visual_regression gate — compare screenshots against baselines at 3 viewports" },
+            { tool: "get_gate_preset", action: "Load the code_review gate — compile, lint, tests, secrets, error handling, patterns, regression test" },
+            { tool: "run_quality_gate", action: "Run the code_review gate against all changed files" },
+            { tool: "run_closed_loop", action: "Execute compile→lint→test→debug closed loop until full green" },
+            { tool: "get_gate_preset", action: "Load deploy_readiness gate — all tests, no critical gaps, eval scores, learnings, no TODOs" },
+            { tool: "run_quality_gate", action: "Run deploy_readiness gate to confirm the change is ready to ship" },
+            { tool: "log_test_result", action: "Record the full QA suite result with layer=integration and all gate scores" },
+            { tool: "record_learning", action: "Bank QA findings, edge cases, and accessibility patterns for future runs" },
+            { tool: "save_session_note", action: "Save traceability note linking this QA run to the original request, with citedFrom reference" },
+        ],
+    },
     content_pipeline: {
         name: "Daily Content Pipeline",
-        description: "End-to-end content production: gather signals from RSS + web search, build research digest, generate a 3-post thread (Signal/Analysis/Agency) via LLM, quality-gate the output, and publish via email or save for manual posting. Engine-graded for conformance.",
+        description: "Gather signals, build digest, generate 3-post thread, publish",
         steps: [
             { tool: "fetch_rss_feeds", action: "Pull latest articles from all registered RSS/Atom sources — new items stored in SQLite" },
             { tool: "web_search", action: "Search for breaking developments in target topics (AI, infrastructure, security) to supplement RSS" },
@@ -4380,7 +5181,7 @@ export const WORKFLOW_CHAINS = {
     },
     content_publish: {
         name: "Content Publish & Distribute",
-        description: "Take a content pipeline output and distribute across channels — email digest to subscribers, format for LinkedIn posting, archive to knowledge base",
+        description: "Distribute content across email, LinkedIn, and archive",
         steps: [
             { tool: "search_all_knowledge", action: "Load the latest content pipeline output from session notes or knowledge base" },
             { tool: "call_llm", action: "Format content for target platform (LinkedIn character limits, email HTML, markdown archive)" },
@@ -4392,7 +5193,7 @@ export const WORKFLOW_CHAINS = {
     },
     agent_traversal: {
         name: "Agent Frontend Traversal",
-        description: "Navigate the NodeBench AI frontend as an agent — discover views, interact with per-view tools, traverse content feeds, and plan multi-view journeys",
+        description: "Navigate frontend views, invoke per-view tools, traverse feeds",
         steps: [
             { tool: "list_available_views", action: "Discover all 27 views with capabilities and available tools" },
             { tool: "get_traversal_plan", action: "Generate a goal-based traversal plan ranking views by relevance" },
@@ -4406,7 +5207,7 @@ export const WORKFLOW_CHAINS = {
     },
     research_optimizer: {
         name: "Research Optimization Pipeline",
-        description: "Single-agent deep web research → structured extraction → multi-criteria scoring → ranked recommendation. For travel booking, vendor selection, investment comparison, or any multi-attribute decision.",
+        description: "Deep research, extract, score, and rank multi-attribute options",
         steps: [
             { tool: "web_search", action: "Search for options and pricing across multiple queries (hotels, flights, products)" },
             { tool: "fetch_url", action: "Fetch detailed pages for top search results — extract pricing, reviews, specs" },
@@ -4419,7 +5220,7 @@ export const WORKFLOW_CHAINS = {
     },
     parallel_research: {
         name: "Parallel Multi-Agent Research",
-        description: "Coordinator spawns specialized sub-agents for parallel research, merges results, scores, and optimizes. For complex decisions requiring multiple information sources gathered simultaneously.",
+        description: "Spawn parallel sub-agents for research, merge and score results",
         steps: [
             { tool: "bootstrap_parallel_agents", action: "Scaffold parallel agent infrastructure — define roles for each research domain" },
             { tool: "claim_task", action: "Each sub-agent claims a research domain (pricing, reviews, logistics, availability)" },
@@ -4435,7 +5236,7 @@ export const WORKFLOW_CHAINS = {
     },
     competitive_intel: {
         name: "Competitive Intelligence Pipeline",
-        description: "Stealth-fetch competitor pages, extract structured data, merge multi-source results, score and rank. Uses Scrapling for anti-bot bypass and adaptive element tracking.",
+        description: "Stealth-fetch competitor pages, extract and rank data",
         steps: [
             { tool: "web_search", action: "Identify competitor URLs and market landscape" },
             { tool: "scrapling_batch_fetch", action: "Stealth-fetch 5-10 competitor pages in parallel with anti-bot bypass" },
@@ -4448,7 +5249,7 @@ export const WORKFLOW_CHAINS = {
     },
     price_monitor: {
         name: "Price Monitoring Pipeline",
-        description: "Crawl product pages, track price elements with adaptive relocation, aggregate data, and alert on changes. Uses Scrapling's element tracking to survive CSS changes.",
+        description: "Crawl product pages, track prices, alert on changes",
         steps: [
             { tool: "scrapling_crawl", action: "Start multi-page crawl of product catalog or competitor pricing pages" },
             { tool: "scrapling_crawl_status", action: "Poll crawl progress and collect extracted items" },
@@ -4462,7 +5263,7 @@ export const WORKFLOW_CHAINS = {
     },
     thompson_protocol: {
         name: "Thompson Protocol Content Pipeline",
-        description: "Transform complex topics into accessible content using the 'Calculus Made Easy' approach — 4-agent pipeline: Thompson Writer (plain English + analogies) → Feynman Editor (skeptical beginner rejection loop, max 3 cycles) → Visual Metaphor Mapper (1:1 analogy→visual prompts) → Anti-Elitism Linter (banned phrase detection + readability scoring) → Quality Gate (10-point checklist).",
+        description: "Transform complex topics into plain-English content via 4 agents",
         steps: [
             { tool: "thompson_pipeline", action: "Initialize the full pipeline — generates execution plan with all agent prompts and handoff points" },
             { tool: "thompson_write", action: "Transform the complex topic into plain-English sections with jargon translations, analogies, and difficulty acknowledgments" },
@@ -4476,5 +5277,30 @@ export const WORKFLOW_CHAINS = {
             { tool: "record_learning", action: "Record which analogies, styles, and audience levels produced the best engagement" },
         ],
     },
+    system_observability: {
+        name: "system_observability",
+        description: "System health check, drift detection, and auto-maintenance",
+        steps: [
+            { tool: "get_system_pulse", action: "Capture real-time health snapshot — DB, dashboards, errors, embedding cache, health score" },
+            { tool: "get_drift_report", action: "Detect configuration and state drift — orphaned cycles, stale runs, DB bloat, error spikes" },
+            { tool: "run_self_heal", action: "Auto-fix healable drift issues — abandoned cycles, stale runs, log pruning (use dry_run first)" },
+            { tool: "get_uptime_stats", action: "Review call rates, error trends, and top tools across time windows" },
+            { tool: "get_watchdog_log", action: "Check background watchdog history — health score trend, auto-healed actions" },
+            { tool: "save_session_note", action: "Record health findings and any manual interventions for future reference" },
+        ],
+    },
+    mission_execution: {
+        name: "Mission Execution Harness",
+        description: "Hierarchical Planner → Worker → Judge → Human Sniff-Check → Merge pipeline for verifiable work",
+        steps: [
+            { tool: "plan_decompose_mission", action: "Decompose mission into subtasks with verifiability tiers, judge methods, retry budgets, and output contracts" },
+            { tool: "harness_get_mission_status", action: "Check execution board — which subtasks are pending, assigned, or blocked" },
+            { tool: "judge_verify_subtask", action: "Judge reviews subtask output against output contract — verdict + evidence + artifacts" },
+            { tool: "judge_request_retry", action: "If failed: retry (with new instructions), replan, escalate, or stop if unverifiable" },
+            { tool: "sniff_record_human_review", action: "Human sniff-check: pass / concern / block with issue tags (weak_evidence, unsupported_claim, etc.)" },
+            { tool: "merge_compose_output", action: "Judge-gated merge of passed subtask artifacts into composed output" },
+            { tool: "harness_get_mission_status", action: "Final traceability audit — receipts, evidence refs, decisions, verifications, diffs, approvals" },
+        ],
+    },
 };
 //# sourceMappingURL=toolRegistry.js.map