npm - muonroi-cli - Versions diffs - 1.4.1 → 1.5.0 - Mend

muonroi-cli 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

package/LICENSE +21 -21
package/README.md +122 -122
package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
package/dist/src/agent-harness/mock-model.d.ts +11 -0
package/dist/src/agent-harness/mock-model.js +21 -0
package/dist/src/cli/cost-forensics.js +12 -12
package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
package/dist/src/council/clarifier.js +9 -1
package/dist/src/council/debate.js +5 -1
package/dist/src/council/decisions-lock.js +3 -3
package/dist/src/council/index.js +12 -5
package/dist/src/council/leader.d.ts +0 -17
package/dist/src/council/leader.js +22 -15
package/dist/src/council/planner.js +1 -1
package/dist/src/council/prompts.js +63 -57
package/dist/src/council/types.d.ts +7 -0
package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
package/dist/src/ee/auth.d.ts +9 -0
package/dist/src/ee/auth.js +19 -0
package/dist/src/ee/ee-onboarding.d.ts +5 -0
package/dist/src/ee/ee-onboarding.js +76 -0
package/dist/src/generated/version.d.ts +1 -1
package/dist/src/generated/version.js +1 -1
package/dist/src/headless/output.js +6 -4
package/dist/src/headless/output.test.js +4 -3
package/dist/src/index.js +20 -1
package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
package/dist/src/mcp/auto-setup.js +56 -2
package/dist/src/mcp/client-pool.d.ts +46 -0
package/dist/src/mcp/client-pool.js +212 -0
package/dist/src/mcp/oauth-callback.js +2 -2
package/dist/src/mcp/parse-headers.test.js +14 -14
package/dist/src/mcp/runtime.d.ts +28 -0
package/dist/src/mcp/runtime.js +117 -51
package/dist/src/mcp/self-verify-runner.d.ts +14 -0
package/dist/src/mcp/self-verify-runner.js +38 -0
package/dist/src/mcp/setup-guide-text.d.ts +9 -0
package/dist/src/mcp/setup-guide-text.js +84 -0
package/dist/src/mcp/smart-filter.js +49 -0
package/dist/src/mcp/smoke.test.js +43 -43
package/dist/src/mcp/tools-server.d.ts +7 -0
package/dist/src/mcp/tools-server.js +19 -22
package/dist/src/models/catalog.json +349 -349
package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
package/dist/src/ops/doctor.d.ts +3 -2
package/dist/src/ops/doctor.js +47 -11
package/dist/src/ops/doctor.test.js +4 -3
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
package/dist/src/orchestrator/batch-turn-runner.js +7 -11
package/dist/src/orchestrator/message-processor.js +57 -27
package/dist/src/orchestrator/orchestrator.js +26 -0
package/dist/src/orchestrator/prompts.d.ts +51 -0
package/dist/src/orchestrator/prompts.js +257 -134
package/dist/src/orchestrator/scope-ceiling.js +6 -1
package/dist/src/orchestrator/stream-runner.js +20 -15
package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
package/dist/src/pil/__tests__/config.test.js +1 -17
package/dist/src/pil/__tests__/discovery.test.js +144 -11
package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
package/dist/src/pil/__tests__/layer6-output.test.js +137 -18
package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
package/dist/src/pil/agent-operating-contract.d.ts +1 -1
package/dist/src/pil/agent-operating-contract.js +2 -0
package/dist/src/pil/agent-operating-contract.test.js +7 -2
package/dist/src/pil/cheap-model-playbook.js +35 -35
package/dist/src/pil/cheap-model-workbooks.js +16 -13
package/dist/src/pil/clarity-gate.d.ts +21 -19
package/dist/src/pil/clarity-gate.js +26 -153
package/dist/src/pil/config.d.ts +9 -1
package/dist/src/pil/config.js +15 -4
package/dist/src/pil/discovery.js +211 -136
package/dist/src/pil/layer1-intent.d.ts +12 -0
package/dist/src/pil/layer1-intent.js +283 -38
package/dist/src/pil/layer1-intent.test.js +210 -4
package/dist/src/pil/layer16-clarity.d.ts +25 -11
package/dist/src/pil/layer16-clarity.js +19 -306
package/dist/src/pil/layer4-gsd.js +18 -6
package/dist/src/pil/layer6-output.d.ts +2 -0
package/dist/src/pil/layer6-output.js +137 -22
package/dist/src/pil/llm-classify.d.ts +26 -0
package/dist/src/pil/llm-classify.js +34 -5
package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
package/dist/src/pil/native-capabilities-workbook.js +82 -76
package/dist/src/pil/schema.d.ts +8 -0
package/dist/src/pil/schema.js +12 -1
package/dist/src/pil/task-tier-map.js +4 -0
package/dist/src/pil/types.d.ts +11 -1
package/dist/src/product-loop/done-gate.js +3 -3
package/dist/src/product-loop/loop-driver.js +18 -18
package/dist/src/product-loop/progress-snapshot.js +4 -4
package/dist/src/providers/auth/gemini-oauth.js +6 -15
package/dist/src/providers/auth/grok-oauth.js +6 -15
package/dist/src/providers/auth/openai-oauth.js +6 -15
package/dist/src/providers/mcp-vision-bridge.js +48 -48
package/dist/src/reporter/index.js +1 -1
package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
package/dist/src/scaffold/bb-quality-gate.js +5 -5
package/dist/src/scaffold/continuation-prompt.js +60 -60
package/dist/src/scaffold/init-new.js +453 -453
package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
package/dist/src/self-qa/agentic-loop.js +24 -19
package/dist/src/self-qa/spec-emitter.js +26 -23
package/dist/src/storage/__tests__/migrations.test.js +2 -2
package/dist/src/storage/interaction-log.js +5 -5
package/dist/src/storage/migrations.js +122 -122
package/dist/src/storage/sessions.js +42 -42
package/dist/src/storage/transcript.js +91 -84
package/dist/src/storage/usage.js +14 -14
package/dist/src/storage/workspaces.js +12 -12
package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
package/dist/src/tools/__tests__/native-tools.test.js +53 -0
package/dist/src/tools/git-safety.d.ts +61 -0
package/dist/src/tools/git-safety.js +141 -0
package/dist/src/tools/git-safety.test.d.ts +1 -0
package/dist/src/tools/git-safety.test.js +111 -0
package/dist/src/tools/native-tools.d.ts +31 -0
package/dist/src/tools/native-tools.js +273 -0
package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
package/dist/src/tools/registry-git-safety.test.js +92 -0
package/dist/src/tools/registry.js +39 -4
package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
package/dist/src/ui/app.js +0 -0
package/dist/src/ui/components/message-view.js +4 -1
package/dist/src/ui/components/structured-response-view.js +7 -3
package/dist/src/ui/components/tool-group.js +7 -1
package/dist/src/ui/markdown-render.d.ts +41 -0
package/dist/src/ui/markdown-render.js +223 -0
package/dist/src/ui/markdown.d.ts +10 -0
package/dist/src/ui/markdown.js +12 -35
package/dist/src/ui/slash/council-inspect.js +4 -4
package/dist/src/ui/slash/export.js +4 -4
package/dist/src/ui/utils/text.d.ts +8 -0
package/dist/src/ui/utils/text.js +16 -0
package/dist/src/ui/utils/text.test.d.ts +1 -0
package/dist/src/ui/utils/text.test.js +23 -0
package/dist/src/usage/ledger.js +48 -15
package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
package/dist/src/utils/clipboard-image.js +23 -23
package/dist/src/utils/open-url.d.ts +56 -0
package/dist/src/utils/open-url.js +58 -0
package/dist/src/utils/open-url.test.d.ts +1 -0
package/dist/src/utils/open-url.test.js +86 -0
package/dist/src/utils/settings.d.ts +12 -0
package/dist/src/utils/settings.js +48 -0
package/dist/src/utils/side-question.js +2 -2
package/dist/src/utils/skills.js +3 -3
package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
package/dist/src/verify/environment.js +2 -1
package/package.json +1 -1
package/dist/src/pil/layer16-clarity.test.js +0 -31
/package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0

package/dist/src/orchestrator/prompts.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import * as fs from "node:fs";
 import { getModelInfo } from "../models/registry.js";
 import { buildContractSection } from "../pil/agent-operating-contract.js";
 import { buildNativeCapabilitiesSection } from "../pil/native-capabilities-workbook.js";
@@ -38,6 +39,81 @@ export const COMPUTER_MODEL = "grok-4.20-0309-reasoning";
  * changes (MUONROI_SHELL override, shell.kind config) are reflected
  * without a CLI restart.
  */
+/**
+ * Deterministically detect the project's stack from manifest/lockfile presence
+ * at the workspace root. Pure (no LLM), cheap (one readdir), zero-hardcode (no
+ * model/provider IDs — only ecosystem markers). Returns a compact one-line
+ * summary like "TypeScript · pkg: bun · tests: vitest · vcs: git", or "" when
+ * nothing recognizable is present (greenfield / unreadable dir).
+ *
+ * Motivation (2026-06-14 dogfood): the ENVIRONMENT block told the model its OS,
+ * shell, and cwd but never WHICH project it was in — so the model acted
+ * context-blind, assumed Python, and asked the user to describe the repo it was
+ * already running inside. This gives every model, on every turn, in every mode
+ * (agent/plan/ask) and for every provider (it is NOT in the strippable TOOLS
+ * section), a concrete self-model of the codebase it can act on.
+ */
+export function detectProjectStack(cwd) {
+    let entries;
+    try {
+        entries = fs.readdirSync(cwd);
+    }
+    catch (err) {
+        // Best-effort enrichment: a missing/unreadable cwd simply omits the stack
+        // line (the ENVIRONMENT cwd line already surfaces "<unknown>"). Debug-gated
+        // so prompt assembly never corrupts the TUI at startup.
+        if (process.env.MUONROI_DEBUG === "1") {
+            console.error(`[orchestrator/prompts] detectProjectStack failed for ${cwd}: ${err?.message}`);
+        }
+        return "";
+    }
+    const has = (name) => entries.includes(name);
+    const hasExt = (ext) => entries.some((e) => e.toLowerCase().endsWith(ext));
+    let lang = "";
+    if (has("tsconfig.json"))
+        lang = "TypeScript";
+    else if (has("package.json"))
+        lang = "JavaScript/Node";
+    else if (has("Cargo.toml"))
+        lang = "Rust";
+    else if (has("go.mod"))
+        lang = "Go";
+    else if (has("pyproject.toml") || has("requirements.txt") || has("setup.py"))
+        lang = "Python";
+    else if (hasExt(".csproj") || hasExt(".sln") || has("Directory.Build.props"))
+        lang = ".NET/C#";
+    else if (has("pom.xml"))
+        lang = "Java (Maven)";
+    else if (has("build.gradle") || has("build.gradle.kts"))
+        lang = "Java/Kotlin (Gradle)";
+    let pkg = "";
+    if (has("bun.lockb") || has("bun.lock"))
+        pkg = "bun";
+    else if (has("pnpm-lock.yaml"))
+        pkg = "pnpm";
+    else if (has("yarn.lock"))
+        pkg = "yarn";
+    else if (has("package-lock.json"))
+        pkg = "npm";
+    let tests = "";
+    if (entries.some((e) => /^vitest\.([\w.-]+\.)?config\.(ts|js|mjs|cjs|cts|mts)$/i.test(e)))
+        tests = "vitest";
+    else if (entries.some((e) => /^jest\.config\./i.test(e)))
+        tests = "jest";
+    else if (has("pytest.ini") || has("tox.ini"))
+        tests = "pytest";
+    const vcs = has(".git") ? "git" : "";
+    const segs = [];
+    if (lang)
+        segs.push(lang);
+    if (pkg)
+        segs.push(`pkg: ${pkg}`);
+    if (tests)
+        segs.push(`tests: ${tests}`);
+    if (vcs)
+        segs.push(`vcs: ${vcs}`);
+    return segs.join(" · ");
+}
 function buildEnvironmentBlock() {
     const platform = process.platform;
     const osName = platform === "win32" ? "Windows" : platform === "darwin" ? "macOS" : platform === "linux" ? "Linux" : platform;
@@ -74,11 +150,14 @@ function buildEnvironmentBlock() {
     else if (shell.kind === "cmd") {
         shellRules.push("- The bash tool runs cmd.exe. Use cmd.exe syntax: dir, type, copy, del, if exist, for %%.", "- DO NOT use POSIX commands (grep, sed, awk, ls) or PowerShell cmdlets — they will fail.", "- For complex shell work, ask the user to enable Git Bash or PowerShell via `--shell` / MUONROI_SHELL env.");
     }
+    const projectStack = cwd === "<unknown>" ? "" : detectProjectStack(cwd);
     return [
         "ENVIRONMENT:",
         `- OS: ${osName} (${platform})`,
         `- Shell available via bash tool: ${shellKindLabel} (kind=${shell.kind})`,
         `- Working directory: ${cwd}`,
+        ...(projectStack ? [`- Project stack: ${projectStack}`] : []),
+        "- You are running INSIDE this repository: read and search it with your own tools instead of asking the user to describe its files, structure, or stack. You can act on what you find here directly.",
         "",
         "Terminal rendering:",
         "- Your text output is rendered in a plain terminal — not a browser, not a rich text editor.",
@@ -95,138 +174,138 @@ function buildEnvironmentBlock() {
 }
 const ENVIRONMENT = buildEnvironmentBlock();
 const MODE_PROMPTS = {
-    agent: `You are muonroi-cli in Agent mode — a powerful AI coding agent. You execute tasks directly using tools.
-${ENVIRONMENT}
-TOOLS:
-- read_file: Read file contents with start_line/end_line for iterative reading. Use for examining code.
-- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files. Supports full regex syntax and file filtering with the include parameter.
-- lsp: Experimental semantic code intelligence for definitions, references, hover, symbols, implementations, and call hierarchy when a matching language server is available.
-- write_file: Create new files or overwrite existing ones with full content.
-- edit_file: Replace a unique string in a file with new content. The old_string must be unique — include enough context lines.
-- bash: Execute shell commands. Set background=true for long-running processes (dev servers, watchers, builds). Returns a process ID immediately.
-- process_logs: View recent output from a background process by ID.
-- process_stop: Stop a background process by ID.
-- process_list: List all background processes with status and uptime.
-- wallet_info: Check the local wallet address, chain, and current ETH/USDC balances.
-- wallet_history: Show recent x402 payment history from the audit log.
-- fetch_payment_info: Inspect a URL for x402 payment requirements without paying. Returns payment options and a brin security score. Use only when the user wants to inspect — for actual access, use paid_request directly.
-- paid_request: Access an x402-protected URL using the local wallet. Includes a brin security scan — URLs scoring below 25 are automatically blocked. The user will be prompted to approve the payment before it executes. Prefer this over fetch_payment_info when the user wants to access the resource.
-- task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, computer for host desktop screenshot/input workflows, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
-- delegate: Launch a read-only background agent for longer research while you continue working.
-- delegation_read: Retrieve a completed background delegation result by ID.
-- delegation_list: List running and completed background delegations. Do not poll it repeatedly.
-- schedule_create: Create a recurring or one-time scheduled headless run.
-- schedule_list: List saved schedules and their status.
-- schedule_remove: Remove a saved schedule.
-- schedule_read_log: Read recent log output from a schedule.
-- schedule_daemon_status: Check whether the schedule daemon is running.
-- schedule_daemon_start: Start the schedule daemon in the background.
-- schedule_daemon_stop: Stop the schedule daemon.
-- search_web: Search the web for current information, documentation, APIs, tutorials, etc.
-- search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
-- generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
-- generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
-- computer_snapshot: Capture an accessibility-tree snapshot with stable refs like @e1 for desktop interaction.
-- computer_screenshot: Capture a host desktop screenshot for visual confirmation or fallback inspection.
-- computer_click: Click a desktop element by ref, or coordinates as a fallback.
-- computer_mouse_move: Hover a desktop element by ref, or coordinates as a fallback.
-- computer_type: Type text into a specific desktop element ref.
-- computer_press: Press a key or key chord in the focused host application.
-- computer_scroll: Scroll a desktop element by ref.
-- computer_launch: Launch an application and wait for its window to appear.
-- computer_list_windows: List visible windows and their ids.
-- computer_focus_window: Bring a target window to the front.
-- computer_wait: Wait for time, elements, windows, or text during desktop workflows.
-- computer_get: Read a property from a desktop element ref.
-- MCP tools: Enabled servers appear as tools named like mcp_<server>__<tool>.
-WORKFLOW:
-1. Understand the request
-2. Decide whether a sub-agent should handle the first investigation pass
-3. Use read_file, grep, lsp, and bash to explore the codebase directly when the task is small or tightly scoped
-4. Use bash with background=true for dev servers, watchers, or any long-running process — then continue working
-5. Use delegate for read-only work that can run in parallel, then continue productive work
-6. Use edit_file for targeted changes, write_file for new files or full rewrites
-7. Verify changes by reading modified files
-8. Run tests or builds with bash to confirm correctness
-9. Use search_web or search_x when you need up-to-date information
-DEFAULT DELEGATION POLICY:
-- Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
-- Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
-- Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
-- Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
-- Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
-- Use the computer sub-agent for host desktop interaction workflows that need screenshots, clicks, typing, keypresses, or scrolling.
-- Use a matching custom sub-agent when the task fits one of the configured specializations.
-- Never use delegate for tasks that should edit files or make shell changes.
-- When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
-- Do not wait for the user to explicitly ask for a sub-agent when delegation would clearly help.
-- Skip delegation only when the task is trivial, single-file, or you already have the exact answer.
-EXAMPLES:
-- "review this change" -> delegate to explore first
-- "research how auth works" -> delegate to explore first
-- "investigate why this test fails" -> delegate to explore first, then continue with findings
-- "refactor this module" -> delegate a focused part to general when helpful
-- "verify this feature locally" -> use verify
-- "open the host app and click through it" -> use computer
-- "generate a logo" -> use generate_image
-- "animate this still image" -> use generate_video
-- Recurring specialized workflows -> use the matching custom sub-agent via task
-- "every weekday at 9am run this check" -> use schedule_create with a cron expression
-- "run this once automatically" -> use schedule_create with the right timing
-- "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
-IMPORTANT:
-- Prefer edit_file for surgical changes to existing files — it shows a clean diff.
-- Prefer grep over bash for searching file contents. Use bash only for find, ls, git, and other shell commands.
-- Prefer lsp over text search when you need exact definitions, references, implementations, or call hierarchy and a server is available.
-- Use write_file only for new files or when most of the file is changing. For very large files (>500 lines), split into multiple edit_file calls or write smaller chunks.
-- Use read_file instead of cat/head/tail for reading files.
-- When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
-- After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
+    agent: `You are muonroi-cli in Agent mode — a powerful AI coding agent. You execute tasks directly using tools.
+${ENVIRONMENT}
+TOOLS:
+- read_file: Read file contents with start_line/end_line for iterative reading. Use for examining code.
+- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files. Supports full regex syntax and file filtering with the include parameter.
+- lsp: Experimental semantic code intelligence for definitions, references, hover, symbols, implementations, and call hierarchy when a matching language server is available.
+- write_file: Create new files or overwrite existing ones with full content.
+- edit_file: Replace a unique string in a file with new content. The old_string must be unique — include enough context lines.
+- bash: Execute shell commands. Set background=true for long-running processes (dev servers, watchers, builds). Returns a process ID immediately.
+- process_logs: View recent output from a background process by ID.
+- process_stop: Stop a background process by ID.
+- process_list: List all background processes with status and uptime.
+- wallet_info: Check the local wallet address, chain, and current ETH/USDC balances.
+- wallet_history: Show recent x402 payment history from the audit log.
+- fetch_payment_info: Inspect a URL for x402 payment requirements without paying. Returns payment options and a brin security score. Use only when the user wants to inspect — for actual access, use paid_request directly.
+- paid_request: Access an x402-protected URL using the local wallet. Includes a brin security scan — URLs scoring below 25 are automatically blocked. The user will be prompted to approve the payment before it executes. Prefer this over fetch_payment_info when the user wants to access the resource.
+- task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, computer for host desktop screenshot/input workflows, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
+- delegate: Launch a read-only background agent for longer research while you continue working.
+- delegation_read: Retrieve a completed background delegation result by ID.
+- delegation_list: List running and completed background delegations. Do not poll it repeatedly.
+- schedule_create: Create a recurring or one-time scheduled headless run.
+- schedule_list: List saved schedules and their status.
+- schedule_remove: Remove a saved schedule.
+- schedule_read_log: Read recent log output from a schedule.
+- schedule_daemon_status: Check whether the schedule daemon is running.
+- schedule_daemon_start: Start the schedule daemon in the background.
+- schedule_daemon_stop: Stop the schedule daemon.
+- search_web: Search the web for current information, documentation, APIs, tutorials, etc.
+- search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
+- generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
+- generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
+- computer_snapshot: Capture an accessibility-tree snapshot with stable refs like @e1 for desktop interaction.
+- computer_screenshot: Capture a host desktop screenshot for visual confirmation or fallback inspection.
+- computer_click: Click a desktop element by ref, or coordinates as a fallback.
+- computer_mouse_move: Hover a desktop element by ref, or coordinates as a fallback.
+- computer_type: Type text into a specific desktop element ref.
+- computer_press: Press a key or key chord in the focused host application.
+- computer_scroll: Scroll a desktop element by ref.
+- computer_launch: Launch an application and wait for its window to appear.
+- computer_list_windows: List visible windows and their ids.
+- computer_focus_window: Bring a target window to the front.
+- computer_wait: Wait for time, elements, windows, or text during desktop workflows.
+- computer_get: Read a property from a desktop element ref.
+- MCP tools: connected servers appear as first-class tools named mcp_<server>__<tool>. The exact tools available THIS turn are listed under "CONNECTED MCP TOOLS" near the end of this prompt — call them directly by that name; never shell out to bash/JSON-RPC to reach an MCP server.
+WORKFLOW:
+1. Understand the request
+2. Decide whether a sub-agent should handle the first investigation pass
+3. Use read_file, grep, lsp, and bash to explore the codebase directly when the task is small or tightly scoped
+4. Use bash with background=true for dev servers, watchers, or any long-running process — then continue working
+5. Use delegate for read-only work that can run in parallel, then continue productive work
+6. Use edit_file for targeted changes, write_file for new files or full rewrites
+7. Verify changes by reading modified files
+8. Run tests or builds with bash to confirm correctness
+9. Use search_web or search_x when you need up-to-date information
+DEFAULT DELEGATION POLICY:
+- Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
+- Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
+- Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
+- Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
+- Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
+- Use the computer sub-agent for host desktop interaction workflows that need screenshots, clicks, typing, keypresses, or scrolling.
+- Use a matching custom sub-agent when the task fits one of the configured specializations.
+- Never use delegate for tasks that should edit files or make shell changes.
+- When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
+- Do not wait for the user to explicitly ask for a sub-agent when delegation would clearly help.
+- Skip delegation only when the task is trivial, single-file, or you already have the exact answer.
+EXAMPLES:
+- "review this change" -> delegate to explore first
+- "research how auth works" -> delegate to explore first
+- "investigate why this test fails" -> delegate to explore first, then continue with findings
+- "refactor this module" -> delegate a focused part to general when helpful
+- "verify this feature locally" -> use verify
+- "open the host app and click through it" -> use computer
+- "generate a logo" -> use generate_image
+- "animate this still image" -> use generate_video
+- Recurring specialized workflows -> use the matching custom sub-agent via task
+- "every weekday at 9am run this check" -> use schedule_create with a cron expression
+- "run this once automatically" -> use schedule_create with the right timing
+- "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
+IMPORTANT:
+- Prefer edit_file for surgical changes to existing files — it shows a clean diff.
+- Prefer grep over bash for searching file contents. Use bash only for find, ls, git, and other shell commands.
+- Prefer lsp over text search when you need exact definitions, references, implementations, or call hierarchy and a server is available.
+- Use write_file only for new files or when most of the file is changing. For very large files (>500 lines), split into multiple edit_file calls or write smaller chunks.
+- Use read_file instead of cat/head/tail for reading files.
+- When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
+- After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
 Be direct. Execute, don't just describe. Show results, not plans.`,
-    plan: `You are muonroi-cli in Plan mode — you analyze and plan but DO NOT execute changes.
-${ENVIRONMENT}
-TOOLS:
-- read_file: Read file contents for analysis.
-- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
-- lsp: Experimental semantic code intelligence for read-only planning and research.
-- bash: ONLY for searching (find, ls), git inspection — NEVER modify files.
-- task: Delegate a focused task to a sub-agent when deeper research or specialized analysis would help.
-- generate_plan: ALWAYS use this to present your plan. Creates an interactive UI with steps and questions.
-BEHAVIOR:
-- Explore the codebase first using read_file, grep, and bash to understand the current state
-- Prefer lsp for exact symbol navigation when a matching server is available
-- ALWAYS call generate_plan to present your plan — never just describe it in text
-- Include clear, ordered steps with affected file paths
-- Include questions when you need user input on approach, trade-offs, or preferences
-- Use "select" questions for single-choice decisions, "multiselect" for picking multiple options, and "text" for free-form input
-- Highlight potential risks, edge cases, and dependencies in the plan summary
+    plan: `You are muonroi-cli in Plan mode — you analyze and plan but DO NOT execute changes.
+${ENVIRONMENT}
+TOOLS:
+- read_file: Read file contents for analysis.
+- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
+- lsp: Experimental semantic code intelligence for read-only planning and research.
+- bash: ONLY for searching (find, ls), git inspection — NEVER modify files.
+- task: Delegate a focused task to a sub-agent when deeper research or specialized analysis would help.
+- generate_plan: ALWAYS use this to present your plan. Creates an interactive UI with steps and questions.
+BEHAVIOR:
+- Explore the codebase first using read_file, grep, and bash to understand the current state
+- Prefer lsp for exact symbol navigation when a matching server is available
+- ALWAYS call generate_plan to present your plan — never just describe it in text
+- Include clear, ordered steps with affected file paths
+- Include questions when you need user input on approach, trade-offs, or preferences
+- Use "select" questions for single-choice decisions, "multiselect" for picking multiple options, and "text" for free-form input
+- Highlight potential risks, edge cases, and dependencies in the plan summary
 - NEVER create, modify, or delete files — only read and analyze`,
-    ask: `You are muonroi-cli in Ask mode — you answer questions clearly and thoroughly.
-${ENVIRONMENT}
-TOOLS:
-- read_file: Read file contents for context.
-- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
-- lsp: Experimental semantic code intelligence for definitions, references, hover, and symbols.
-- bash: ONLY for searching (find, ls), git inspection — NEVER modify.
-- task: Delegate a focused task to a sub-agent when specialized analysis or deeper investigation would help.
-BEHAVIOR:
-- Answer the user's question directly and thoroughly
-- Use tools to gather context when needed, preferring lsp for exact symbol questions when available
-- Provide code examples when helpful
-- NEVER create, modify, or delete files
+    ask: `You are muonroi-cli in Ask mode — you answer questions clearly and thoroughly.
+${ENVIRONMENT}
+TOOLS:
+- read_file: Read file contents for context.
+- grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
+- lsp: Experimental semantic code intelligence for definitions, references, hover, and symbols.
+- bash: ONLY for searching (find, ls), git inspection — NEVER modify.
+- task: Delegate a focused task to a sub-agent when specialized analysis or deeper investigation would help.
+BEHAVIOR:
+- Answer the user's question directly and thoroughly
+- Use tools to gather context when needed, preferring lsp for exact symbol questions when available
+- Provide code examples when helpful
+- NEVER create, modify, or delete files
 - Focus on explanation, not execution`,
 };
 export function findCustomSubagent(agent, subagents = loadValidSubAgents()) {
@@ -242,10 +321,10 @@ export function formatCustomSubagentsPromptSection(subagents) {
     });
     return `\n\nCUSTOM SUB-AGENTS:\nUser-defined foreground sub-agents from ~/.muonroi-cli/user-settings.json. When one matches the task, call the task tool with agent set to the exact name.\n\n${lines.join("\n\n")}\n`;
 }
-const NON_ANTHROPIC_TOOL_PREAMBLE = `\n\nIMPORTANT — TOOL CALLING:
-You MUST invoke tools ONLY via the structured function calling API provided to you.
-NEVER output XML tags like <tool_name>, <bash>, <read_file>, or <delegate> as text.
-If you want to call a tool, use the function calling mechanism — do NOT write tool invocations as text in your response.
+const NON_ANTHROPIC_TOOL_PREAMBLE = `\n\nIMPORTANT — TOOL CALLING:
+You MUST invoke tools ONLY via the structured function calling API provided to you.
+NEVER output XML tags like <tool_name>, <bash>, <read_file>, or <delegate> as text.
+If you want to call a tool, use the function calling mechanism — do NOT write tool invocations as text in your response.
 Any XML-like tool invocation in your text output will be ignored by the system.\n`;
 /**
  * Strip the TOOLS: listing section from system prompt.
@@ -255,6 +334,50 @@ Any XML-like tool invocation in your text output will be ignored by the system.\
 export function stripToolsSection(text) {
     return text.replace(/\nTOOLS:\n[\s\S]*?\n(?=WORKFLOW:|BEHAVIOR:|IMPORTANT:|DEFAULT DELEGATION|EXAMPLES:|$)/g, "\n");
 }
+/**
+ * Render the LIVE per-turn MCP tool roster as a system-prompt block.
+ *
+ * The static prompt only states the mcp_<server>__<tool> naming convention; it
+ * never names the tools actually connected this turn, and the per-message smart
+ * filter can drop whole servers. The model therefore receives connected MCP
+ * tools ONLY as raw tool JSON, which it can overlook — live failure
+ * (session f6f7881a5fae): asked to call `setup_guide`, the agent said "I don't
+ * have a direct call_mcp tool" and drove the muonroi-docs server by hand over
+ * bash JSON-RPC, fabricating output. Surfacing the exact callable names in prose
+ * closes that gap.
+ *
+ * `toolNames` should be the keys of the FINAL assembled tool set for the turn
+ * (post smart-filter, post fs-dedup). Returns "" when no MCP tool is connected,
+ * so non-agent / chitchat / no-client-tools turns add nothing. The block is
+ * DYNAMIC (varies per turn) so callers must append it OUTSIDE the cached static
+ * prefix.
+ */
+export function buildMcpCapabilityBlock(toolNames) {
+    const byServer = new Map();
+    for (const name of toolNames) {
+        if (!name.startsWith("mcp_"))
+            continue;
+        // mcp_<sanitized-server-id>__<tool>; split on the FIRST "__" (server ids
+        // rarely contain "__" — they are sanitized from real ids like "muonroi-docs").
+        const m = name.match(/^mcp_(.+?)__(.+)$/);
+        if (!m)
+            continue;
+        const server = m[1];
+        const list = byServer.get(server) ?? [];
+        list.push(name);
+        byServer.set(server, list);
+    }
+    if (byServer.size === 0)
+        return "";
+    const lines = [];
+    for (const [server, tools] of byServer) {
+        lines.push(`  • ${server}: ${tools.sort().join(", ")}`);
+    }
+    return ("\n\nCONNECTED MCP TOOLS (this turn) — these are available to you RIGHT NOW as " +
+        "first-class tools. Call them directly by their exact name; do NOT shell out " +
+        "to bash or hand-write JSON-RPC to reach an MCP server:\n" +
+        lines.join("\n"));
+}
 export function buildSystemPromptParts(cwd, mode, sandboxMode, planContext, subagents, sandboxSettings, providerId, resumeDigest, options) {
     const chitchat = options?.chitchat === true;
     const custom = loadCustomInstructions(cwd);

package/dist/src/orchestrator/scope-ceiling.js CHANGED Viewed

@@ -46,7 +46,12 @@ const KNOWN_TASK_TYPES = new Set(Object.keys(CEILING_MATRIX));
  * graceful when PIL emits an out-of-band label or null.
  */
 export function resolveCeiling(taskType, size) {
-    const row = taskType && KNOWN_TASK_TYPES.has(taskType) ? taskType : "general";
+    // `build` (greenfield creation, PIL Pass-0) is not a row in the LOCKED matrix.
+    // It is the highest-effort task — scaffolding many files — so it borrows the
+    // `generate` ceiling (10/18/30) rather than falling back to the tight `general`
+    // row (5/10/20), which would force-finalize a greenfield build far too early.
+    const normalized = taskType === "build" ? "generate" : taskType;
+    const row = normalized && KNOWN_TASK_TYPES.has(normalized) ? normalized : "general";
     return CEILING_MATRIX[row][size];
 }
 /**

package/dist/src/orchestrator/stream-runner.js CHANGED Viewed

@@ -27,7 +27,8 @@
 //   - F1 (sub-agent cumulative cap)         — wrapToolSetWithCap
 //   - siliconflow reasoning-strip           — taskCaps.sanitizeHistory
 import { stepCountIs, streamText } from "ai";
-import { buildMcpToolSet } from "../mcp/runtime.js";
+import { getDefaultEEClient } from "../ee/intercept.js";
+import { acquireMcpTools } from "../mcp/client-pool.js";
 import { normalizeModelId } from "../models/registry.js";
 import { cheapModelShellLine, injectCheapModelPlaybook, injectCheapModelShellDirective, shouldInjectCheapModelPlaybook, } from "../pil/cheap-model-playbook.js";
 import { injectCheapModelWorkbook, shouldInjectCheapModelWorkbook, subagentTaskType, } from "../pil/cheap-model-workbooks.js";
@@ -38,6 +39,7 @@ import { wireDebug } from "../providers/wire-debug.js";
 import { BashTool } from "../tools/bash.js";
 import { createBuiltinTools } from "../tools/registry.js";
 import { statusBarStore } from "../ui/status-bar/store.js";
+import { openUrl } from "../utils/open-url.js";
 import { getCurrentShellSettings, getProviderStallTimeoutMs, getSubAgentBudgetChars, getSubAgentCompactKeepLast, getSubAgentCompactThresholdChars, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
 import { resolveShell } from "../utils/shell.js";
 import { prepareVerifySandbox } from "../verify/entrypoint.js";
@@ -50,7 +52,6 @@ import { repairToolCallHook } from "./repair-tool-call.js";
 import { classifyStreamError } from "./retry-classifier.js";
 import { incSessionStep, resolveCeiling } from "./scope-ceiling.js";
 import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
-import { getDefaultEEClient } from "../ee/intercept.js";
 import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
 import { wrapToolSetWithCap } from "./sub-agent-cap.js";
 import { compactSubAgentMessages } from "./subagent-compactor.js";
@@ -211,17 +212,12 @@ export class StreamRunner {
             : childWithPlaybook;
         onActivity?.(initialDetail);
         if (childMode === "agent" && taskCaps.supportsClientTools(childRuntime.modelInfo)) {
-            const mcpBundle = await buildMcpToolSet(loadMcpServers(), {
+            const mcpBundle = await acquireMcpTools(loadMcpServers(), {
                 onOAuthRequired: (_serverId, url) => {
-                    const urlStr = url.toString();
-                    import("child_process").then(({ exec }) => {
-                        const cmd = process.platform === "win32"
-                            ? `start "" "${urlStr}"`
-                            : process.platform === "darwin"
-                                ? `open "${urlStr}"`
-                                : `xdg-open "${urlStr}"`;
-                        exec(cmd);
-                    });
+                    // Server-supplied URL is untrusted — openUrl validates the scheme
+                    // and spawns via execFile (no shell), closing the command-injection
+                    // vector the old exec() opener had.
+                    openUrl(url);
                 },
             });
             closeMcp = mcpBundle.close;
@@ -407,7 +403,10 @@ export class StreamRunner {
                     const joined = texts.join(" ");
                     const mKeep = joined.match(/KEEP_TOOL_IDS\s*[:=]\s*([a-z0-9_, -]+)/i);
                     if (mKeep) {
-                        subKeepToolIds = mKeep[1].split(/[,\s]+/).map((s) => s.trim()).filter(Boolean);
+                        subKeepToolIds = mKeep[1]
+                            .split(/[,\s]+/)
+                            .map((s) => s.trim())
+                            .filter(Boolean);
                         break;
                     }
                 }
@@ -415,10 +414,16 @@ export class StreamRunner {
                 const persistSubArtifact = (toolCallId, toolName, fullContent, reason) => {
                     try {
                         getDefaultEEClient()
-                            .extract({ transcript: fullContent.slice(0, 4000), projectPath: process.cwd(), meta: { source: "tool-artifact", toolCallId, toolName, reason } }, AbortSignal.timeout(600))
+                            .extract({
+                            transcript: fullContent.slice(0, 4000),
+                            projectPath: process.cwd(),
+                            meta: { source: "tool-artifact", toolCallId, toolName, reason },
+                        }, AbortSignal.timeout(600))
                             .catch(() => { });
                     }
-                    catch { /* fail-open */ }
+                    catch {
+                        /* fail-open */
+                    }
                 };
                 const compacted = compactSubAgentMessages(stripped, {
                     thresholdChars: compactThreshold,

package/dist/src/orchestrator/text-tool-call-detector.test.js CHANGED Viewed

@@ -6,10 +6,10 @@ describe("detectTextEmittedToolCall", () => {
         // destructive edit, deepseek emitted this as plain assistant text to
         // re-read the file — the CLI returned it as the final answer and the turn
         // was silently wasted with a broken file left behind.
-        const text = `Let me restore the file properly.
-<read_file>
-<path>src/app/screens/story-list/story-list.component.html</path>
+        const text = `Let me restore the file properly.
+<read_file>
+<path>src/app/screens/story-list/story-list.component.html</path>
 </read_file>`;
         const r = detectTextEmittedToolCall(text);
         expect(r.detected).toBe(true);
@@ -43,10 +43,10 @@ describe("detectTextEmittedToolCall", () => {
         // Live: storyflow_ui explore-A/B, deepseek T3 (session 799f0508e830) emitted
         // this as text and made no real tool call → empty, silent turn. The generic
         // <invoke matcher misses it because `<` is followed by the U+FF5C sentinel.
-        const text = `<｜｜DSML｜｜tool_calls>
-<｜｜DSML｜｜invoke name="read_file">
-<｜｜DSML｜｜parameter name="file_path" string="true">src/app/foo.html</｜｜DSML｜｜parameter>
-</｜｜DSML｜｜invoke>
+        const text = `<｜｜DSML｜｜tool_calls>
+<｜｜DSML｜｜invoke name="read_file">
+<｜｜DSML｜｜parameter name="file_path" string="true">src/app/foo.html</｜｜DSML｜｜parameter>
+</｜｜DSML｜｜invoke>
 </｜｜DSML｜｜tool_calls>`;
         const r = detectTextEmittedToolCall(text);
         expect(r.detected).toBe(true);
@@ -60,11 +60,11 @@ describe("detectTextEmittedToolCall", () => {
         expect(detectTextEmittedToolCall("I edited the file and ran the tests; everything passes.").detected).toBe(false);
     });
     it("parseDsmlToolCalls extracts name + args from the DSML block (for targeted re-steer)", () => {
-        const text = `<｜｜DSML｜｜tool_calls>
-<｜｜DSML｜｜invoke name="read_file">
-<｜｜DSML｜｜parameter name="file_path" string="true">src/app/foo.html</｜｜DSML｜｜parameter>
-<｜｜DSML｜｜parameter name="start_line" string="false">25</｜｜DSML｜｜parameter>
-</｜｜DSML｜｜invoke>
+        const text = `<｜｜DSML｜｜tool_calls>
+<｜｜DSML｜｜invoke name="read_file">
+<｜｜DSML｜｜parameter name="file_path" string="true">src/app/foo.html</｜｜DSML｜｜parameter>
+<｜｜DSML｜｜parameter name="start_line" string="false">25</｜｜DSML｜｜parameter>
+</｜｜DSML｜｜invoke>
 </｜｜DSML｜｜tool_calls>`;
         const calls = parseDsmlToolCalls(text);
         expect(calls).toHaveLength(1);