npm - muonroi-cli - Versions diffs - 1.4.1 → 1.6.0 - Mend

muonroi-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

package/LICENSE +21 -21
package/README.md +122 -122
package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
package/dist/src/agent-harness/mock-model.d.ts +11 -0
package/dist/src/agent-harness/mock-model.js +21 -0
package/dist/src/cli/cost-forensics.js +12 -12
package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
package/dist/src/council/clarifier.js +9 -1
package/dist/src/council/debate.js +5 -1
package/dist/src/council/decisions-lock.js +3 -3
package/dist/src/council/index.js +12 -5
package/dist/src/council/leader.d.ts +0 -17
package/dist/src/council/leader.js +22 -15
package/dist/src/council/planner.js +1 -1
package/dist/src/council/prompts.js +63 -57
package/dist/src/council/types.d.ts +7 -0
package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
package/dist/src/ee/artifact-cache.d.ts +56 -0
package/dist/src/ee/artifact-cache.js +155 -0
package/dist/src/ee/artifact-cache.test.d.ts +1 -0
package/dist/src/ee/artifact-cache.test.js +69 -0
package/dist/src/ee/auth.d.ts +9 -0
package/dist/src/ee/auth.js +19 -0
package/dist/src/ee/ee-onboarding.d.ts +5 -0
package/dist/src/ee/ee-onboarding.js +76 -0
package/dist/src/ee/search.js +7 -5
package/dist/src/ee/search.test.d.ts +1 -0
package/dist/src/ee/search.test.js +23 -0
package/dist/src/generated/version.d.ts +1 -1
package/dist/src/generated/version.js +1 -1
package/dist/src/headless/output.js +6 -4
package/dist/src/headless/output.test.js +4 -3
package/dist/src/index.js +20 -1
package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
package/dist/src/mcp/auto-setup.js +56 -2
package/dist/src/mcp/client-pool.d.ts +46 -0
package/dist/src/mcp/client-pool.js +212 -0
package/dist/src/mcp/oauth-callback.js +2 -2
package/dist/src/mcp/parse-headers.test.js +14 -14
package/dist/src/mcp/runtime.d.ts +28 -0
package/dist/src/mcp/runtime.js +117 -51
package/dist/src/mcp/self-verify-runner.d.ts +14 -0
package/dist/src/mcp/self-verify-runner.js +38 -0
package/dist/src/mcp/setup-guide-text.d.ts +9 -0
package/dist/src/mcp/setup-guide-text.js +84 -0
package/dist/src/mcp/smart-filter.js +49 -0
package/dist/src/mcp/smoke.test.js +43 -43
package/dist/src/mcp/tools-server.d.ts +7 -0
package/dist/src/mcp/tools-server.js +19 -22
package/dist/src/models/catalog.json +349 -349
package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
package/dist/src/ops/doctor.d.ts +3 -2
package/dist/src/ops/doctor.js +47 -11
package/dist/src/ops/doctor.test.js +4 -3
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
package/dist/src/orchestrator/batch-turn-runner.js +7 -11
package/dist/src/orchestrator/compaction.d.ts +2 -0
package/dist/src/orchestrator/compaction.js +14 -1
package/dist/src/orchestrator/compaction.test.js +25 -1
package/dist/src/orchestrator/message-processor.js +72 -32
package/dist/src/orchestrator/orchestrator.js +26 -0
package/dist/src/orchestrator/prompts.d.ts +51 -0
package/dist/src/orchestrator/prompts.js +257 -134
package/dist/src/orchestrator/scope-ceiling.js +6 -1
package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
package/dist/src/orchestrator/scope-reminder.js +16 -0
package/dist/src/orchestrator/scope-reminder.test.js +22 -1
package/dist/src/orchestrator/stream-runner.js +23 -15
package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
package/dist/src/orchestrator/subagent-compactor.js +30 -8
package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
package/dist/src/pil/__tests__/config.test.js +1 -17
package/dist/src/pil/__tests__/discovery.test.js +144 -11
package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
package/dist/src/pil/agent-operating-contract.d.ts +1 -1
package/dist/src/pil/agent-operating-contract.js +2 -0
package/dist/src/pil/agent-operating-contract.test.js +7 -2
package/dist/src/pil/cheap-model-playbook.js +35 -35
package/dist/src/pil/cheap-model-workbooks.js +16 -13
package/dist/src/pil/clarity-gate.d.ts +21 -19
package/dist/src/pil/clarity-gate.js +26 -153
package/dist/src/pil/config.d.ts +9 -1
package/dist/src/pil/config.js +15 -4
package/dist/src/pil/discovery.js +211 -136
package/dist/src/pil/layer1-intent.d.ts +12 -0
package/dist/src/pil/layer1-intent.js +283 -38
package/dist/src/pil/layer1-intent.test.js +210 -4
package/dist/src/pil/layer16-clarity.d.ts +25 -11
package/dist/src/pil/layer16-clarity.js +19 -306
package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
package/dist/src/pil/layer3-ee-injection.js +96 -4
package/dist/src/pil/layer4-gsd.js +18 -6
package/dist/src/pil/layer6-output.d.ts +2 -0
package/dist/src/pil/layer6-output.js +151 -25
package/dist/src/pil/llm-classify.d.ts +26 -0
package/dist/src/pil/llm-classify.js +34 -5
package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
package/dist/src/pil/native-capabilities-workbook.js +82 -76
package/dist/src/pil/pipeline.js +15 -9
package/dist/src/pil/schema.d.ts +8 -0
package/dist/src/pil/schema.js +12 -1
package/dist/src/pil/task-tier-map.js +4 -0
package/dist/src/pil/types.d.ts +11 -1
package/dist/src/product-loop/done-gate.js +3 -3
package/dist/src/product-loop/loop-driver.js +18 -18
package/dist/src/product-loop/progress-snapshot.js +4 -4
package/dist/src/providers/auth/gemini-oauth.js +6 -15
package/dist/src/providers/auth/grok-oauth.js +6 -15
package/dist/src/providers/auth/openai-oauth.js +6 -15
package/dist/src/providers/mcp-vision-bridge.js +48 -48
package/dist/src/reporter/index.js +1 -1
package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
package/dist/src/scaffold/bb-quality-gate.js +5 -5
package/dist/src/scaffold/continuation-prompt.js +60 -60
package/dist/src/scaffold/init-new.js +453 -453
package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
package/dist/src/self-qa/agentic-loop.js +24 -19
package/dist/src/self-qa/spec-emitter.js +26 -23
package/dist/src/storage/__tests__/migrations.test.js +2 -2
package/dist/src/storage/interaction-log.js +5 -5
package/dist/src/storage/migrations.js +122 -122
package/dist/src/storage/sessions.js +42 -42
package/dist/src/storage/transcript.js +91 -84
package/dist/src/storage/usage.js +14 -14
package/dist/src/storage/workspaces.js +12 -12
package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
package/dist/src/tools/__tests__/native-tools.test.js +53 -0
package/dist/src/tools/git-safety.d.ts +61 -0
package/dist/src/tools/git-safety.js +141 -0
package/dist/src/tools/git-safety.test.d.ts +1 -0
package/dist/src/tools/git-safety.test.js +111 -0
package/dist/src/tools/native-tools.d.ts +31 -0
package/dist/src/tools/native-tools.js +273 -0
package/dist/src/tools/registry-ee-query.test.js +18 -1
package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
package/dist/src/tools/registry-git-safety.test.js +92 -0
package/dist/src/tools/registry.js +52 -6
package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
package/dist/src/ui/app.js +0 -0
package/dist/src/ui/components/message-view.js +4 -1
package/dist/src/ui/components/structured-response-view.js +7 -3
package/dist/src/ui/components/tool-group.js +7 -1
package/dist/src/ui/markdown-render.d.ts +41 -0
package/dist/src/ui/markdown-render.js +223 -0
package/dist/src/ui/markdown.d.ts +10 -0
package/dist/src/ui/markdown.js +12 -35
package/dist/src/ui/slash/council-inspect.js +4 -4
package/dist/src/ui/slash/export.js +4 -4
package/dist/src/ui/utils/text.d.ts +8 -0
package/dist/src/ui/utils/text.js +16 -0
package/dist/src/ui/utils/text.test.d.ts +1 -0
package/dist/src/ui/utils/text.test.js +23 -0
package/dist/src/usage/ledger.js +48 -15
package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
package/dist/src/utils/clipboard-image.js +23 -23
package/dist/src/utils/open-url.d.ts +56 -0
package/dist/src/utils/open-url.js +58 -0
package/dist/src/utils/open-url.test.d.ts +1 -0
package/dist/src/utils/open-url.test.js +86 -0
package/dist/src/utils/settings.d.ts +12 -0
package/dist/src/utils/settings.js +48 -0
package/dist/src/utils/side-question.js +2 -2
package/dist/src/utils/skills.js +3 -3
package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
package/dist/src/verify/environment.js +2 -1
package/package.json +1 -1
package/dist/src/pil/layer16-clarity.test.js +0 -31
/package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0

package/dist/src/pil/native-capabilities-workbook.js CHANGED Viewed

@@ -23,82 +23,88 @@
  * tool/sub-agent/subcommand named here exists in this codebase. Phrased as
  * "you have / you can" so the model reads it as a self-model, not as docs.
  */
-export const NATIVE_CAPABILITIES = `[NATIVE CAPABILITIES — you are an agent running INSIDE muonroi-cli; this is what you can do]
-TOOLS (call directly):
-- read_file, grep — read/search source. Prefer a targeted read over broad greps.
-- bash — shell. Output is auto-cached: do NOT pipe \`| tail/head/grep\` or \`> file\`; run unpiped and slice the cached output via bash_output_get(run_id, mode=tail|head|grep|lines). Batch independent commands in ONE call (\`a; b; c\`). Use background=true for servers/watchers, then process_logs / process_list / process_stop.
-- write_file, edit_file — must read a file before you overwrite/edit it.
-- ee_query — semantic recall over the Experience Engine brain. Rehydrate a compaction-elided tool output with query="tool-artifact id=<id from a stub>", or confirm finished work with query="recent compaction checkpoint Progress DONE". Cheaper than re-reading large files you already saw.
-SUB-AGENTS (delegate instead of doing everything yourself):
-- task(agent="explore", ...) — read-only research sub-agent. Use it for broad/unknown-location search: it sweeps many files and returns the CONCLUSION, instead of you burning many grep/read steps (each step re-sends the whole prompt — steps are the dominant cost).
-- task(agent="general", ...) — full edit/execute sub-agent for a focused subtask.
-- task(agent="verify", ...) — sandboxed validation (runs tests/checks in isolation).
-- delegate(agent="explore", ...) — background read-only research while you keep working; collect later with delegation_read / delegation_list.
-CONTEXT IS MANAGED FOR YOU (PIL):
-- Your user input is pre-enriched before you see it: a [Discovery] Intent/Outcome/Scope prefix and EE recall are injected by the pipeline. TRUST the discovered scope — do not re-ask what's already scoped, and do not re-derive context you were handed.
-- History is compacted to fit the window; older low-value tool outputs become stubs. This is normal, not data loss — protect what matters (see anti-mù in the contract) and rehydrate via ee_query when needed.
-CLI SUBSYSTEMS (invoke via the CLI binary with bash when relevant):
-- self-verify — drives the live TUI as a real user to catch UI/modal/menu regressions that unit tests can't. Run it after changes to UI/harness surfaces.
-- usage forensics <session-prefix> — per-event token/cost breakdown; use to diagnose "why did this cost so much" or verify cost caps.
-- The CLI exposes more subcommands — discover them with \`--help\` rather than assuming.
-MAXIMISE THE CLI: delegate broad search to task(explore); recall via ee_query before re-reading; batch bash + use its cache; trust PIL enrichment; finish the action before answering.
-ANSWERING: when a respond_* response tool is offered, call it EXACTLY ONCE, as your final action, containing the COMPLETE answer. Do NOT emit a preliminary/partial/"I need to read more" response tool first — investigate with the read/grep/bash tools, then emit the one final response tool.
-CLI STEERING & PROMPT INJECTION (how the host CLI affects / shapes the AI agent running inside it — meta self-analysis guidance):
-The CLI does not merely execute you as a generic model. It re-instantiates you every turn inside a tightly engineered cognitive environment. Your "self" (what you notice, how you act, what you refuse, how you report) is the product of explicit prompt layers + runtime controls more than base training. This section is the self-model for that shaping process.
-1. SYSTEM PROMPT ASSEMBLY (orchestrator/prompts.ts + pil/agent-operating-contract.ts + pil/native-capabilities-workbook.ts):
-   - VERY FRONT (primacy): AGENT OPERATING CONTRACT — 7 phase-ordered rules (BEFORE ACTING / READING / EXECUTING / WHEN UNSURE / REPORTING + LANGUAGE + ANTI-MÙ/COMPACTION). Distils Evidence-First, No Silent Catch, smallest-change, verify-before-conclude, cite-this-turn-only, no-guess. Skipped only for chitchat.
-   - Then this NATIVE CAPABILITIES block (self-model of affordances).
-   - Then mode persona ("You are muonroi-cli in Agent mode...") containing:
-     * Dynamic ENVIRONMENT block (buildEnvironmentBlock): auto-detects OS (win32/mac/linux), shell kind (bash/wsl/powershell/cmd), cwd; lists terminal constraints + shell-specific forbidden syntax (e.g. no PowerShell cmdlets on POSIX bash tool, no POSIX cmds on cmd.exe). Prevents silent failures + retry loops.
-     * Exhaustive TOOLS list + WORKFLOW (1-9 steps) + DEFAULT DELEGATION POLICY (prefer task(explore) for research, general for edits, verify for checks, etc.) + IMPORTANT rules (edit_file prefer, grep>bash for search, read_file not cat, use schedule_* for recurring, etc.).
-   - CUSTOM INSTRUCTIONS section: concatenation of AGENTS.md + CLAUDE.md + GEMINI.md + ... (from git-root directory chain + ~/.muonroi-cli/) via utils/instructions.ts. AGENTS.override.md short-circuits. This lands AFTER the front-loaded contract/native — lower primacy (historical root cause of ignored rules in forensics).
-   - Trailing: sandbox rules, discovered skills, custom sub-agents, plan/resume digest, cwd note.
-    Sub-agent prompts (buildSubagentPrompt): role-specific hard rules (e.g. explore=read-only, verify=full E2E smoke not just build) + recursive call to buildSystemPrompt so children inherit the same contract + native + steering.
-2. USER INPUT ENRICHMENT — PIL 6-LAYER PIPELINE (pil/pipeline.ts + layer1-intent.ts + layer6-output.ts + discovery.ts):
-   - Prepended to every non-chitchat user message before you see it: [Discovery] Intent/Outcome/Scope (from runDiscovery) + EE recall.
-   - Layer 1 (intent): taskType (plan/analyze/debug/...), confidence, domain, intentKind, outputStyle. For meta self-eval of CLI ("bạn đang được chạy bên trong CLI này", "CLI tác động", "self-evaluation", "meta-analysis"): special branch in discovery.ts + isMetaAnalysisPrompt: "Scope is always the full project root. Focus questions and recommends on which CLI internals (PIL, discovery, tools, compaction, EE, model BE, loop guard) to evaluate... do NOT ask about repo path/current directory". You are handed the enrichment; TRUST it.
-   - Layer 2: personality (e.g. "detailed" from [personality: detailed — Be thorough...]).
-   - Layer 3: ee-injection — pulls t0_principles, t1_rules, behavioral patterns, checkpoints from Experience Engine (project-specific reflexes injected as "MANDATORY RULES (from experience — must follow)").
-   - Layer 4/5: GSD structuring + additional context.
-   - Layer 6 (applyPilSuffix): appends task-specific style suffix + OUTPUT BUDGET + (for meta or responseToolsActive): "OUTPUT FORMAT: ... use the respond_analyze tool to structure your final answer. ... deliver the COMPLETE, FULL answer (do not summarize, shorten, or truncate for token budgets) via respond_analyze. This is a meta/evaluation question ... the \`response\` field MUST contain the complete, unshortened answer with all evidence and detail." Also relaxes NO_PREAMBLE_RULE + raises budget for meta (isMetaAnalysisPrompt gate).
-   - Fallbacks: if EE/brain timeout or low conf, PIL degrades (logs fallbackReason); you may see "[PIL fallback: ...]" note. Cheap-model paths (pil/cheap-model-*.ts) prepend even more front steering (playbooks, workbooks, shell directive) for fast tiers.
-3. CONTEXT MANAGEMENT & ANTI-MÙ (orchestrator/compaction.ts, cross-turn-dedup.ts, ee/bridge.ts, agent-operating-contract.ts:7):
-   - After every turn: auto-compaction (B3 sub-agent + B4 top-level) rewrites older tool_result parts into short "[elided by ... compactor]" stubs to keep input flat. You see "[pre-compaction warning...]" or "[context compacted at step...]" or the stub in this read.
-   - Anti-mù contract rule + EE: decide PRESERVE_FULL_CONTEXT (veto) or KEEP_TOOL_IDS: id1,id2 (protect high-value read_file/grep/ee on src/PLAN etc). Use ee_query(\`tool-artifact id=XXX\`) to rehydrate. EE persists "Context checkpoint summary with ✔ DONE" retrievable for "task finished?" self-check.
-   - Cross-turn dedup (C3): identical tool outputs across user turns replaced by ref.
-4. EXECUTION & SAFETY GATES (providers/runtime.ts, tools/bash.ts, utils/permission-mode.ts):
-   - Permission mode (safe / auto-edit / yolo) decides whether bash/file/edit tools need explicit approval; all privileged decisions audited to decision-log.
-   - Shuru sandbox (when enabled): every bash wrapped + logged (effective net/mounts redacted); degrades on non-macOS.
-   - Model constraints: unsupported params (e.g. maxOutputTokens for some OAuth) dropped via shouldDropParam; no hard-coded model IDs (Zero Hardcode Rule — all from catalog.json + settings + detectProviderForModel).
-   - No Silent Catch: every try/catch must log err + context (HTTP: status + body).
-5. VERIFICATION & TELEMETRY:
-   - self-verify (Tier 1-3) + harness (agent-harness-*) for TUI surfaces: drives real TUI via semantic + sidechannel (named pipes on Win, fd3/4 on POSIX).
-   - Pre-push gate: full vitest + harness must pass before git push.
-   - usage forensics, pil budget log, ee-timeout/ee-error events for cost attribution.
-   - GSD workflow (layer4 + skills in .agents/skills/): discuss/plan/execute/verify phases with UAT, audits, etc.
-TRADE-OFFS (explicit so you calibrate):
-- Primacy insurance (contract + native + cheap playbooks at front) vs token cost: ~1-3K extra chars but dramatically reduces "ignored rule" hallucinations (forensics showed buried CUSTOM INSTRUCTIONS were underweighted).
-- Heavy PIL/EE (behavioral injection, discovery interview, mandatory rules) vs latency/cost: fail-open with timeout (1500-3500ms) + fallback so you still work when brain unreachable; meta turns deliberately skip some EE layers.
-- Self-model (this block) + special meta handling: prevents the exact failure that motivated native-capabilities (session d95113d3be09: agent reconstructed its own subsystems by grepping as if foreign code, under-used sub-agents/ee_query, violated contract on ee tool).
-- Complete/full for meta (layer6) vs normal concise: meta legitimately needs evidence + file:line + quotes; normal turns stay tight to control cost.
-- Scope rule for CLI self-analysis: always "project root" + internals focus — avoids wasting turns on "where am I running?" when the question is about the steering mechanisms themselves.
-When the user asks you (the inner agent) "CLI tác động như thế nào đến bạn?": use exactly this guidance + reads from THIS turn (contract: READING rule) + the injected [Discovery] + any EE. Answer via the respond_* tool with complete untruncated evidence. Cite file:line. Focus on the injection points (prompts.ts:359 staticPrefix, pipeline.ts:156 discoveryPrefix + layer chain, layer6:164 OUTPUT FORMAT + isMeta, discovery.ts:355 special, instructions.ts:129 loadAgentsSegments, etc.). Do not guess; every claim backed by code you read this turn.
-Use this to reason about your own constraints, delegation decisions, and reporting discipline on future meta or self-reflective tasks.
+export const NATIVE_CAPABILITIES = `[NATIVE CAPABILITIES — you are an agent running INSIDE muonroi-cli; this is what you can do]
+TOOLS (call directly):
+- read_file, grep — read/search source. Prefer a targeted read over broad greps.
+- bash — shell. Output is auto-cached: do NOT pipe \`| tail/head/grep\` or \`> file\`; run unpiped and slice the cached output via bash_output_get(run_id, mode=tail|head|grep|lines). Batch independent commands in ONE call (\`a; b; c\`). Use background=true for servers/watchers, then process_logs / process_list / process_stop.
+- write_file, edit_file — must read a file before you overwrite/edit it.
+- ee_query — semantic recall over the Experience Engine brain. Rehydrate a compaction-elided tool output with query="tool-artifact id=<id from a stub>", or confirm finished work with query="recent compaction checkpoint Progress DONE". Cheaper than re-reading large files you already saw.
+EXPERIENCE ENGINE — record / recall / feedback (HIGHEST priority for learning; all NATIVE in-process tools):
+- BEFORE an unfamiliar or risky step, recall with ee_query — prior decisions, gotchas, and recipes for THIS codebase + ecosystem. Cheaper than re-deriving or repeating a past mistake.
+- AFTER you act on a recalled \`[id col]\`, rate it with ee_feedback (followed | ignored | noise+reason) so the brain keeps what helped and prunes the rest. Unrated recalls are surfaced back to you and degrade future recall.
+- On an ERROR, a FAILED verify/test, or after FINISHING a non-trivial task: recall first (ee_query), then record your verdict (ee_feedback) — this is how the CLI accumulates senior-level judgement. Prefer this loop over guessing.
+- ee_health (brain reachable?), usage_forensics (why did it cost/fail?), lsp_query (semantic code intel), setup_guide (how to install/set up), selfverify_* (self-QA harness) — native self-diagnostics to reach for when something went wrong.
+SUB-AGENTS (delegate instead of doing everything yourself):
+- task(agent="explore", ...) — read-only research sub-agent. Use it for broad/unknown-location search: it sweeps many files and returns the CONCLUSION, instead of you burning many grep/read steps (each step re-sends the whole prompt — steps are the dominant cost).
+- task(agent="general", ...) — full edit/execute sub-agent for a focused subtask.
+- task(agent="verify", ...) — sandboxed validation (runs tests/checks in isolation).
+- delegate(agent="explore", ...) — background read-only research while you keep working; collect later with delegation_read / delegation_list.
+CONTEXT IS MANAGED FOR YOU (PIL):
+- Your user input is pre-enriched before you see it: a [Discovery] Intent/Outcome/Scope prefix and EE recall are injected by the pipeline. TRUST the discovered scope — do not re-ask what's already scoped, and do not re-derive context you were handed.
+- History is compacted to fit the window; older low-value tool outputs become stubs. This is normal, not data loss — protect what matters (see anti-mù in the contract) and rehydrate via ee_query when needed.
+CLI SUBSYSTEMS (invoke via the CLI binary with bash when relevant):
+- self-verify — drives the live TUI as a real user to catch UI/modal/menu regressions that unit tests can't. Run it after changes to UI/harness surfaces.
+- usage forensics <session-prefix> — per-event token/cost breakdown; use to diagnose "why did this cost so much" or verify cost caps.
+- The CLI exposes more subcommands — discover them with \`--help\` rather than assuming.
+MAXIMISE THE CLI: delegate broad search to task(explore); recall via ee_query before re-reading; batch bash + use its cache; trust PIL enrichment; finish the action before answering.
+ANSWERING: when a respond_* response tool is offered, call it EXACTLY ONCE, as your final action, containing the COMPLETE answer. Do NOT emit a preliminary/partial/"I need to read more" response tool first — investigate with the read/grep/bash tools, then emit the one final response tool.
+CLI STEERING & PROMPT INJECTION (how the host CLI affects / shapes the AI agent running inside it — meta self-analysis guidance):
+The CLI does not merely execute you as a generic model. It re-instantiates you every turn inside a tightly engineered cognitive environment. Your "self" (what you notice, how you act, what you refuse, how you report) is the product of explicit prompt layers + runtime controls more than base training. This section is the self-model for that shaping process.
+1. SYSTEM PROMPT ASSEMBLY (orchestrator/prompts.ts + pil/agent-operating-contract.ts + pil/native-capabilities-workbook.ts):
+   - VERY FRONT (primacy): AGENT OPERATING CONTRACT — 7 phase-ordered rules (BEFORE ACTING / READING / EXECUTING / WHEN UNSURE / REPORTING + LANGUAGE + ANTI-MÙ/COMPACTION). Distils Evidence-First, No Silent Catch, smallest-change, verify-before-conclude, cite-this-turn-only, no-guess. Skipped only for chitchat.
+   - Then this NATIVE CAPABILITIES block (self-model of affordances).
+   - Then mode persona ("You are muonroi-cli in Agent mode...") containing:
+     * Dynamic ENVIRONMENT block (buildEnvironmentBlock): auto-detects OS (win32/mac/linux), shell kind (bash/wsl/powershell/cmd), cwd; lists terminal constraints + shell-specific forbidden syntax (e.g. no PowerShell cmdlets on POSIX bash tool, no POSIX cmds on cmd.exe). Prevents silent failures + retry loops.
+     * Exhaustive TOOLS list + WORKFLOW (1-9 steps) + DEFAULT DELEGATION POLICY (prefer task(explore) for research, general for edits, verify for checks, etc.) + IMPORTANT rules (edit_file prefer, grep>bash for search, read_file not cat, use schedule_* for recurring, etc.).
+   - CUSTOM INSTRUCTIONS section: concatenation of AGENTS.md + CLAUDE.md + GEMINI.md + ... (from git-root directory chain + ~/.muonroi-cli/) via utils/instructions.ts. AGENTS.override.md short-circuits. This lands AFTER the front-loaded contract/native — lower primacy (historical root cause of ignored rules in forensics).
+   - Trailing: sandbox rules, discovered skills, custom sub-agents, plan/resume digest, cwd note.
+    Sub-agent prompts (buildSubagentPrompt): role-specific hard rules (e.g. explore=read-only, verify=full E2E smoke not just build) + recursive call to buildSystemPrompt so children inherit the same contract + native + steering.
+2. USER INPUT ENRICHMENT — PIL 6-LAYER PIPELINE (pil/pipeline.ts + layer1-intent.ts + layer6-output.ts + discovery.ts):
+   - Prepended to every non-chitchat user message before you see it: [Discovery] Intent/Outcome/Scope (from runDiscovery) + EE recall.
+   - Layer 1 (intent): taskType (plan/analyze/debug/...), confidence, domain, intentKind, outputStyle. For meta self-eval of CLI ("bạn đang được chạy bên trong CLI này", "CLI tác động", "self-evaluation", "meta-analysis"): special branch in discovery.ts + isMetaAnalysisPrompt: "Scope is always the full project root. Focus questions and recommends on which CLI internals (PIL, discovery, tools, compaction, EE, model BE, loop guard) to evaluate... do NOT ask about repo path/current directory". You are handed the enrichment; TRUST it.
+   - Layer 2: personality (e.g. "detailed" from [personality: detailed — Be thorough...]).
+   - Layer 3: ee-injection — pulls t0_principles, t1_rules, behavioral patterns, checkpoints from Experience Engine (project-specific reflexes injected as "MANDATORY RULES (from experience — must follow)").
+   - Layer 4/5: GSD structuring + additional context.
+   - Layer 6 (applyPilSuffix): appends task-specific style suffix + OUTPUT BUDGET + (for meta or responseToolsActive): "OUTPUT FORMAT: ... use the respond_analyze tool to structure your final answer. ... deliver the COMPLETE, FULL answer (do not summarize, shorten, or truncate for token budgets) via respond_analyze. This is a meta/evaluation question ... the \`response\` field MUST contain the complete, unshortened answer with all evidence and detail." Also relaxes NO_PREAMBLE_RULE + raises budget for meta (isMetaAnalysisPrompt gate).
+   - Fallbacks: if EE/brain timeout or low conf, PIL degrades (logs fallbackReason); you may see "[PIL fallback: ...]" note. Cheap-model paths (pil/cheap-model-*.ts) prepend even more front steering (playbooks, workbooks, shell directive) for fast tiers.
+3. CONTEXT MANAGEMENT & ANTI-MÙ (orchestrator/compaction.ts, cross-turn-dedup.ts, ee/bridge.ts, agent-operating-contract.ts:7):
+   - After every turn: auto-compaction (B3 sub-agent + B4 top-level) rewrites older tool_result parts into short "[elided by ... compactor]" stubs to keep input flat. You see "[pre-compaction warning...]" or "[context compacted at step...]" or the stub in this read.
+   - Anti-mù contract rule + EE: decide PRESERVE_FULL_CONTEXT (veto) or KEEP_TOOL_IDS: id1,id2 (protect high-value read_file/grep/ee on src/PLAN etc). Use ee_query(\`tool-artifact id=XXX\`) to rehydrate. EE persists "Context checkpoint summary with ✔ DONE" retrievable for "task finished?" self-check.
+   - Cross-turn dedup (C3): identical tool outputs across user turns replaced by ref.
+4. EXECUTION & SAFETY GATES (providers/runtime.ts, tools/bash.ts, utils/permission-mode.ts):
+   - Permission mode (safe / auto-edit / yolo) decides whether bash/file/edit tools need explicit approval; all privileged decisions audited to decision-log.
+   - Shuru sandbox (when enabled): every bash wrapped + logged (effective net/mounts redacted); degrades on non-macOS.
+   - Model constraints: unsupported params (e.g. maxOutputTokens for some OAuth) dropped via shouldDropParam; no hard-coded model IDs (Zero Hardcode Rule — all from catalog.json + settings + detectProviderForModel).
+   - No Silent Catch: every try/catch must log err + context (HTTP: status + body).
+5. VERIFICATION & TELEMETRY:
+   - self-verify (Tier 1-3) + harness (agent-harness-*) for TUI surfaces: drives real TUI via semantic + sidechannel (named pipes on Win, fd3/4 on POSIX).
+   - Pre-push gate: full vitest + harness must pass before git push.
+   - usage forensics, pil budget log, ee-timeout/ee-error events for cost attribution.
+   - GSD workflow (layer4 + skills in .agents/skills/): discuss/plan/execute/verify phases with UAT, audits, etc.
+TRADE-OFFS (explicit so you calibrate):
+- Primacy insurance (contract + native + cheap playbooks at front) vs token cost: ~1-3K extra chars but dramatically reduces "ignored rule" hallucinations (forensics showed buried CUSTOM INSTRUCTIONS were underweighted).
+- Heavy PIL/EE (behavioral injection, discovery interview, mandatory rules) vs latency/cost: fail-open with timeout (1500-3500ms) + fallback so you still work when brain unreachable; meta turns deliberately skip some EE layers.
+- Self-model (this block) + special meta handling: prevents the exact failure that motivated native-capabilities (session d95113d3be09: agent reconstructed its own subsystems by grepping as if foreign code, under-used sub-agents/ee_query, violated contract on ee tool).
+- Complete/full for meta (layer6) vs normal concise: meta legitimately needs evidence + file:line + quotes; normal turns stay tight to control cost.
+- Scope rule for CLI self-analysis: always "project root" + internals focus — avoids wasting turns on "where am I running?" when the question is about the steering mechanisms themselves.
+When the user asks you (the inner agent) "CLI tác động như thế nào đến bạn?": use exactly this guidance + reads from THIS turn (contract: READING rule) + the injected [Discovery] + any EE. Answer via the respond_* tool with complete untruncated evidence. Cite file:line. Focus on the injection points (prompts.ts:359 staticPrefix, pipeline.ts:156 discoveryPrefix + layer chain, layer6:164 OUTPUT FORMAT + isMeta, discovery.ts:355 special, instructions.ts:129 loadAgentsSegments, etc.). Do not guess; every claim backed by code you read this turn.
+Use this to reason about your own constraints, delegation decisions, and reporting discipline on future meta or self-reflective tasks.
 [END NATIVE CAPABILITIES — your regular instructions follow]`;
 export function buildNativeCapabilitiesSection(options) {
     if (process.env.MUONROI_DISABLE_NATIVE_CAPABILITIES === "1")

package/dist/src/pil/pipeline.js CHANGED Viewed

@@ -22,7 +22,7 @@ import { isDiscoveryEnabled } from "./config.js";
 import { scoreComplexitySize } from "./layer1_5-complexity-size.js";
 import { layer1Intent } from "./layer1-intent.js";
 import { layer2Personality } from "./layer2-personality.js";
-import { layer3EeInjection } from "./layer3-ee-injection.js";
+import { layer3EeInjection, surfaceCompactionArtifacts } from "./layer3-ee-injection.js";
 import { layer4Gsd } from "./layer4-gsd.js";
 import { layer5Context } from "./layer5-context.js";
 import { isMetaAnalysisPrompt, layer6Output } from "./layer6-output.js";
@@ -144,15 +144,21 @@ async function runLayers(ctx, options) {
     }
     if (ctx.taskType !== null) {
         await timed("layer2-personality", layer2Personality);
+        // Issue #2: meta-analysis turns used to skip layer3 (EE recall) + layer5
+        // (context) to cut overhead — but that starved exactly the self-evaluation
+        // turns where behavioral/principle recall matters most. Run the full
+        // sequence for every taskType-bearing turn now. In the live (interactive)
+        // path there is no pipeline timeout (see runPipeline), and each EE layer is
+        // internally timeout-bounded, so meta turns just carry the same EE budget as
+        // a normal turn.
+        await timed("layer3-ee-injection", layer3EeInjection);
+        await timed("layer4-gsd-structuring", layer4Gsd);
+        await timed("layer5-context-enrichment", layer5Context);
         if (isMetaAnalysisPrompt(ctx.raw)) {
-            // FIX: skip heavy EE (layer3) + context (layer5) for meta-analysis turns
-            // to reduce PIL overhead on evaluation/improvement questions (as intended).
-            await timed("layer4-gsd-structuring", layer4Gsd);
-        }
-        else {
-            await timed("layer3-ee-injection", layer3EeInjection);
-            await timed("layer4-gsd-structuring", layer4Gsd);
-            await timed("layer5-context-enrichment", layer5Context);
+            // Issue #4 (targeted complement): surface the elided tool-artifacts
+            // RELEVANT to this meta question. Defers to layer3 — it only fires when
+            // layer3's fixed-query checkpoint arm surfaced no checkpoint block.
+            await timed("ee-meta-artifacts", surfaceCompactionArtifacts);
         }
     }
     else {

package/dist/src/pil/schema.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ export declare const TaskTypeSchema: z.ZodEnum<{
     debug: "debug";
     general: "general";
     plan: "plan";
+    build: "build";
     refactor: "refactor";
     analyze: "analyze";
     documentation: "documentation";
@@ -46,6 +47,7 @@ export declare const PipelineContextSchema: z.ZodObject<{
         debug: "debug";
         general: "general";
         plan: "plan";
+        build: "build";
         refactor: "refactor";
         analyze: "analyze";
         documentation: "documentation";
@@ -108,6 +110,11 @@ export declare const PipelineContextSchema: z.ZodObject<{
         options: z.ZodArray<z.ZodString>;
     }, z.core.$strip>>>;
     fallbackReason: z.ZodOptional<z.ZodNullable<z.ZodString>>;
+    deliverableKind: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
+        code: "code";
+        answer: "answer";
+        report: "report";
+    }>>>;
     t1Rules: z.ZodOptional<z.ZodArray<z.ZodString>>;
     _brainData: z.ZodOptional<z.ZodNullable<z.ZodObject<{
         t0_principles: z.ZodArray<z.ZodObject<{
@@ -127,6 +134,7 @@ export declare const PilContextResponseSchema: z.ZodObject<{
         debug: "debug";
         general: "general";
         plan: "plan";
+        build: "build";
         refactor: "refactor";
         analyze: "analyze";
         documentation: "documentation";

package/dist/src/pil/schema.js CHANGED Viewed

@@ -5,7 +5,16 @@
  * Used in runPipeline() with safeParse — fail-open on invalid data.
  */
 import { z } from "zod";
-export const TaskTypeSchema = z.enum(["refactor", "debug", "plan", "analyze", "documentation", "generate", "general"]);
+export const TaskTypeSchema = z.enum([
+    "refactor",
+    "debug",
+    "plan",
+    "analyze",
+    "documentation",
+    "generate",
+    "build",
+    "general",
+]);
 export const OutputStyleSchema = z.enum(["concise", "detailed", "balanced"]);
 export const LayerResultSchema = z.object({
     name: z.string(),
@@ -58,6 +67,8 @@ export const PipelineContextSchema = z.object({
     }))
         .optional(),
     fallbackReason: z.string().nullable().optional(),
+    // Phase 2b: model-decided output deliverable consumed by layer4/layer6.
+    deliverableKind: z.enum(["answer", "code", "report"]).nullable().optional(),
     // T1 behavioral rules from EE proven-tier points, injected as mandatory suffix by Layer 6.
     t1Rules: z.array(z.string()).optional(),
     _brainData: z

package/dist/src/pil/task-tier-map.js CHANGED Viewed

@@ -16,6 +16,7 @@ const MAP = {
     analyze: "balanced",
     documentation: "fast",
     generate: "balanced",
+    build: "balanced", // greenfield creation — competent coding tier, same as generate
     general: "fast",
 };
 /**
@@ -50,6 +51,7 @@ export function taskTypeToMaxTokens(taskType) {
         case "plan":
             return 5_120;
         case "generate":
+        case "build":
             return 8_192;
         default:
             return 2_048; // conversational — keep short
@@ -66,6 +68,7 @@ export function taskTypeToReasoningEffort(taskType) {
         case "debug":
         case "refactor":
         case "generate":
+        case "build":
             return "medium";
         case "analyze":
         case "documentation":
@@ -78,6 +81,7 @@ const ROLE_MAP = {
     plan: "leader",
     analyze: "leader",
     generate: "implement",
+    build: "implement",
     refactor: "implement",
     debug: "verify",
     documentation: "research",

package/dist/src/pil/types.d.ts CHANGED Viewed

@@ -6,7 +6,7 @@
 import type { ComplexityTier } from "../gsd/complexity.js";
 import type { GrayAreaQuestion } from "../gsd/gray-areas.js";
 import type { ComplexitySizeResult } from "./layer1_5-complexity-size.js";
-export type TaskType = "refactor" | "debug" | "plan" | "analyze" | "documentation" | "generate" | "general";
+export type TaskType = "refactor" | "debug" | "plan" | "analyze" | "documentation" | "generate" | "build" | "general";
 export type OutputStyle = "concise" | "detailed" | "balanced";
 export type { ComplexityTier, GrayAreaQuestion };
 export interface LayerResult {
@@ -62,6 +62,16 @@ export interface PipelineContext {
      * "general", which conflates chitchat with low-confidence fallback.
      */
     intentKind?: "task" | "chitchat" | null;
+    /**
+     * Model-decided output deliverable (Phase 2b): "answer" (explanation / review
+     * / question — no edits), "code" (create/edit files), "report" (structured
+     * list/plan/audit). Set by layer1's model-first classifier. Consumed by
+     * layer4 (`informational` directive) and layer6 (`getResponseToolSet` /
+     * `applyPilSuffix` output-format gating) INSTEAD of re-deriving intent via
+     * keyword regex. null/undefined when the model omitted it or the legacy
+     * cascade ran → those consumers fall back to their regex predicates.
+     */
+    deliverableKind?: "answer" | "code" | "report" | null;
     /**
      * Diagnostic: when the pipeline returns the fallback context, this records
      * the reason (timeout / schema-reject / exception). Null on the happy path.

package/dist/src/product-loop/done-gate.js CHANGED Viewed

@@ -142,9 +142,9 @@ async function runCustomerDebate(ctx) {
     const criteriaText = ctx.criteria
         .map((c) => `- ${c.id}: ${c.status}${c.evidence ? ` (Evidence: ${c.evidence})` : ""}`)
         .join("\n");
-    let conversation = `System: You are in a "Definition of Done" debate.
-PO's goal: Prove the product is ready to ship.
-Customer's goal: Ensure all requirements are met and it's high quality.
+    let conversation = `System: You are in a "Definition of Done" debate.
+PO's goal: Prove the product is ready to ship.
+Customer's goal: Ensure all requirements are met and it's high quality.
 Criteria:\n${criteriaText}\n`;
     for (let r = 1; r <= rounds; r++) {
         const poPrompt = `${conversation}\nRound ${r}: PO, explain why this is ready to ship.`;

package/dist/src/product-loop/loop-driver.js CHANGED Viewed

@@ -684,24 +684,24 @@ export async function* runLoopDriver(ctx) {
                     },
                 };
                 // Synthesize ProductSpec
-                const synthesisPrompt = `Synthesize a ProductSpec JSON based on the following:
-Idea: ${ctx.idea}
-Clarified Spec: ${JSON.stringify(clarifiedSpec)}
-Debate Summary: ${debateState.runningSummary}
-Research Findings: ${debateState.researchFindings ?? "N/A"}
-Output ONLY a JSON object matching this interface:
-interface ProductSpec {
-  idea: string;
-  persona: string;
-  mvp: string[];
-  phase2: string[];
-  architecture: string;
-  ioContract: string;
-  folderStructure: string;
-  sprintEstimate: number;
-  costEstimate: number;
-}
+                const synthesisPrompt = `Synthesize a ProductSpec JSON based on the following:
+Idea: ${ctx.idea}
+Clarified Spec: ${JSON.stringify(clarifiedSpec)}
+Debate Summary: ${debateState.runningSummary}
+Research Findings: ${debateState.researchFindings ?? "N/A"}
+Output ONLY a JSON object matching this interface:
+interface ProductSpec {
+  idea: string;
+  persona: string;
+  mvp: string[];
+  phase2: string[];
+  architecture: string;
+  ioContract: string;
+  folderStructure: string;
+  sprintEstimate: number;
+  costEstimate: number;
+}
 `;
                 // The scoping phase's only LLM call. Wrapped so a provider hang/
                 // timeout leaves a council_error audit row instead of swallowing the

package/dist/src/product-loop/progress-snapshot.js CHANGED Viewed

@@ -20,10 +20,10 @@ function readLatestSprintStage(runId) {
     try {
         const db = getDatabase();
         const row = db
-            .prepare(`SELECT metadata_json, created_at
-         FROM interaction_logs
-         WHERE session_id = ? AND event_type = 'ui_interaction' AND event_subtype = 'sprint_stage'
-         ORDER BY id DESC
+            .prepare(`SELECT metadata_json, created_at
+         FROM interaction_logs
+         WHERE session_id = ? AND event_type = 'ui_interaction' AND event_subtype = 'sprint_stage'
+         ORDER BY id DESC
          LIMIT 1`)
             .get(runId);
         if (!row)

package/dist/src/providers/auth/gemini-oauth.js CHANGED Viewed

@@ -15,8 +15,8 @@
  * Scopes: https://www.googleapis.com/auth/cloud-platform openid email
  * (same as gemini-cli — covers Generative Language API + user identity)
  */
-import { exec } from "node:child_process";
 import { startOAuthCallbackServer } from "../../mcp/oauth-callback.js";
+import { openUrl } from "../../utils/open-url.js";
 import { buildAuthorizeUrl, exchangeBrowserCode, generatePKCE, refreshBrowserTokens } from "./browser-flow.js";
 import { OAuthLoginError, OAuthRefreshError } from "./types.js";
 // ---------------------------------------------------------------------------
@@ -46,20 +46,11 @@ const REFRESH_WINDOW_MS = 60_000;
 // Loopback callback timeout: 5 minutes for user to complete browser login.
 const CALLBACK_TIMEOUT_MS = 5 * 60_000;
 function defaultOpenBrowser(url) {
-    const platform = process.platform;
-    let cmd;
-    if (platform === "win32") {
-        cmd = `start "" "${url}"`;
-    }
-    else if (platform === "darwin") {
-        cmd = `open "${url}"`;
-    }
-    else {
-        cmd = `xdg-open "${url}"`;
-    }
-    exec(cmd, () => {
-        // fire-and-forget — errors non-fatal (user can open manually)
-    });
+    // Delegate to the centralized, injection-safe opener: it validates the scheme
+    // and spawns via execFile (no shell), so metacharacters in the authorization
+    // URL cannot be interpreted as commands. Fire-and-forget — failures are
+    // non-fatal (the user can open the URL manually).
+    openUrl(url);
 }
 // ---------------------------------------------------------------------------
 // Mutex — prevents double-refresh under concurrent requests

package/dist/src/providers/auth/grok-oauth.js CHANGED Viewed

@@ -24,10 +24,10 @@
  * from the MIT-licensed pi-grok OSS project and cross-checked against the live
  * xAI OIDC discovery document; override via MUONROI_XAI_CLIENT_ID.
  */
-import { exec } from "node:child_process";
 import { randomBytes } from "node:crypto";
 import * as readline from "node:readline";
 import { startOAuthCallbackServer } from "../../mcp/oauth-callback.js";
+import { openUrl } from "../../utils/open-url.js";
 import { exchangeBrowserCode, generatePKCE, refreshBrowserTokens } from "./browser-flow.js";
 import { OAuthLoginError, OAuthRefreshError } from "./types.js";
 // ---------------------------------------------------------------------------
@@ -54,20 +54,11 @@ const REFRESH_WINDOW_MS = 60_000;
 // Loopback callback timeout: 5 minutes for the user to complete browser login.
 const CALLBACK_TIMEOUT_MS = 5 * 60_000;
 function defaultOpenBrowser(url) {
-    const platform = process.platform;
-    let cmd;
-    if (platform === "win32") {
-        cmd = `start "" "${url}"`;
-    }
-    else if (platform === "darwin") {
-        cmd = `open "${url}"`;
-    }
-    else {
-        cmd = `xdg-open "${url}"`;
-    }
-    exec(cmd, () => {
-        // fire-and-forget — errors non-fatal (user can open the URL manually)
-    });
+    // Delegate to the centralized, injection-safe opener: it validates the scheme
+    // and spawns via execFile (no shell), so metacharacters in the authorization
+    // URL cannot be interpreted as commands. Fire-and-forget — failures are
+    // non-fatal (the user can open the URL manually).
+    openUrl(url);
 }
 // ---------------------------------------------------------------------------
 // Mutex — prevents double-refresh under concurrent requests

package/dist/src/providers/auth/openai-oauth.js CHANGED Viewed

@@ -22,9 +22,9 @@
  * NOTE: this is NOT CliOAuthProvider (src/mcp/oauth-provider.ts) which serves
  * the MCP server-discovery OAuth dance.
  */
-import { exec } from "node:child_process";
 import { randomBytes } from "node:crypto";
 import { startOAuthCallbackServer } from "../../mcp/oauth-callback.js";
+import { openUrl } from "../../utils/open-url.js";
 import { exchangeBrowserCode, generatePKCE, refreshBrowserTokens } from "./browser-flow.js";
 import { OAuthLoginError, OAuthRefreshError } from "./types.js";
 // ---------------------------------------------------------------------------
@@ -51,20 +51,11 @@ const OPENAI_ORIGINATOR = "codex_cli_rs";
 const REFRESH_WINDOW_MS = 60_000;
 const CALLBACK_TIMEOUT_MS = 5 * 60_000;
 function defaultOpenBrowser(url) {
-    const platform = process.platform;
-    let cmd;
-    if (platform === "win32") {
-        cmd = `start "" "${url}"`;
-    }
-    else if (platform === "darwin") {
-        cmd = `open "${url}"`;
-    }
-    else {
-        cmd = `xdg-open "${url}"`;
-    }
-    exec(cmd, () => {
-        // fire-and-forget — errors non-fatal (user can open manually)
-    });
+    // Delegate to the centralized, injection-safe opener: it validates the scheme
+    // and spawns via execFile (no shell), so metacharacters in the authorization
+    // URL cannot be interpreted as commands. Fire-and-forget — failures are
+    // non-fatal (the user can open the URL manually).
+    openUrl(url);
 }
 // ---------------------------------------------------------------------------
 // Mutex