muonroi-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/artifact-cache.d.ts +56 -0
  26. package/dist/src/ee/artifact-cache.js +155 -0
  27. package/dist/src/ee/artifact-cache.test.d.ts +1 -0
  28. package/dist/src/ee/artifact-cache.test.js +69 -0
  29. package/dist/src/ee/auth.d.ts +9 -0
  30. package/dist/src/ee/auth.js +19 -0
  31. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  32. package/dist/src/ee/ee-onboarding.js +76 -0
  33. package/dist/src/ee/search.js +7 -5
  34. package/dist/src/ee/search.test.d.ts +1 -0
  35. package/dist/src/ee/search.test.js +23 -0
  36. package/dist/src/generated/version.d.ts +1 -1
  37. package/dist/src/generated/version.js +1 -1
  38. package/dist/src/headless/output.js +6 -4
  39. package/dist/src/headless/output.test.js +4 -3
  40. package/dist/src/index.js +20 -1
  41. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  42. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  43. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  44. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  45. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  46. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  47. package/dist/src/mcp/auto-setup.js +56 -2
  48. package/dist/src/mcp/client-pool.d.ts +46 -0
  49. package/dist/src/mcp/client-pool.js +212 -0
  50. package/dist/src/mcp/oauth-callback.js +2 -2
  51. package/dist/src/mcp/parse-headers.test.js +14 -14
  52. package/dist/src/mcp/runtime.d.ts +28 -0
  53. package/dist/src/mcp/runtime.js +117 -51
  54. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  55. package/dist/src/mcp/self-verify-runner.js +38 -0
  56. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  57. package/dist/src/mcp/setup-guide-text.js +84 -0
  58. package/dist/src/mcp/smart-filter.js +49 -0
  59. package/dist/src/mcp/smoke.test.js +43 -43
  60. package/dist/src/mcp/tools-server.d.ts +7 -0
  61. package/dist/src/mcp/tools-server.js +19 -22
  62. package/dist/src/models/catalog.json +349 -349
  63. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  64. package/dist/src/ops/doctor.d.ts +3 -2
  65. package/dist/src/ops/doctor.js +47 -11
  66. package/dist/src/ops/doctor.test.js +4 -3
  67. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  68. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  69. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  70. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  71. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  72. package/dist/src/orchestrator/compaction.d.ts +2 -0
  73. package/dist/src/orchestrator/compaction.js +14 -1
  74. package/dist/src/orchestrator/compaction.test.js +25 -1
  75. package/dist/src/orchestrator/message-processor.js +72 -32
  76. package/dist/src/orchestrator/orchestrator.js +26 -0
  77. package/dist/src/orchestrator/prompts.d.ts +51 -0
  78. package/dist/src/orchestrator/prompts.js +257 -134
  79. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  80. package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
  81. package/dist/src/orchestrator/scope-reminder.js +16 -0
  82. package/dist/src/orchestrator/scope-reminder.test.js +22 -1
  83. package/dist/src/orchestrator/stream-runner.js +23 -15
  84. package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
  85. package/dist/src/orchestrator/subagent-compactor.js +30 -8
  86. package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
  87. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  88. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  89. package/dist/src/pil/__tests__/config.test.js +1 -17
  90. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  91. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  92. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  93. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  94. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  95. package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
  96. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  97. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
  98. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
  99. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  100. package/dist/src/pil/agent-operating-contract.js +2 -0
  101. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  102. package/dist/src/pil/cheap-model-playbook.js +35 -35
  103. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  104. package/dist/src/pil/clarity-gate.d.ts +21 -19
  105. package/dist/src/pil/clarity-gate.js +26 -153
  106. package/dist/src/pil/config.d.ts +9 -1
  107. package/dist/src/pil/config.js +15 -4
  108. package/dist/src/pil/discovery.js +211 -136
  109. package/dist/src/pil/layer1-intent.d.ts +12 -0
  110. package/dist/src/pil/layer1-intent.js +283 -38
  111. package/dist/src/pil/layer1-intent.test.js +210 -4
  112. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  113. package/dist/src/pil/layer16-clarity.js +19 -306
  114. package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
  115. package/dist/src/pil/layer3-ee-injection.js +96 -4
  116. package/dist/src/pil/layer4-gsd.js +18 -6
  117. package/dist/src/pil/layer6-output.d.ts +2 -0
  118. package/dist/src/pil/layer6-output.js +151 -25
  119. package/dist/src/pil/llm-classify.d.ts +26 -0
  120. package/dist/src/pil/llm-classify.js +34 -5
  121. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  122. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  123. package/dist/src/pil/pipeline.js +15 -9
  124. package/dist/src/pil/schema.d.ts +8 -0
  125. package/dist/src/pil/schema.js +12 -1
  126. package/dist/src/pil/task-tier-map.js +4 -0
  127. package/dist/src/pil/types.d.ts +11 -1
  128. package/dist/src/product-loop/done-gate.js +3 -3
  129. package/dist/src/product-loop/loop-driver.js +18 -18
  130. package/dist/src/product-loop/progress-snapshot.js +4 -4
  131. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  132. package/dist/src/providers/auth/grok-oauth.js +6 -15
  133. package/dist/src/providers/auth/openai-oauth.js +6 -15
  134. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  135. package/dist/src/reporter/index.js +1 -1
  136. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  137. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  138. package/dist/src/scaffold/continuation-prompt.js +60 -60
  139. package/dist/src/scaffold/init-new.js +453 -453
  140. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  141. package/dist/src/self-qa/agentic-loop.js +24 -19
  142. package/dist/src/self-qa/spec-emitter.js +26 -23
  143. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  144. package/dist/src/storage/interaction-log.js +5 -5
  145. package/dist/src/storage/migrations.js +122 -122
  146. package/dist/src/storage/sessions.js +42 -42
  147. package/dist/src/storage/transcript.js +91 -84
  148. package/dist/src/storage/usage.js +14 -14
  149. package/dist/src/storage/workspaces.js +12 -12
  150. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  151. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  152. package/dist/src/tools/git-safety.d.ts +61 -0
  153. package/dist/src/tools/git-safety.js +141 -0
  154. package/dist/src/tools/git-safety.test.d.ts +1 -0
  155. package/dist/src/tools/git-safety.test.js +111 -0
  156. package/dist/src/tools/native-tools.d.ts +31 -0
  157. package/dist/src/tools/native-tools.js +273 -0
  158. package/dist/src/tools/registry-ee-query.test.js +18 -1
  159. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  160. package/dist/src/tools/registry-git-safety.test.js +92 -0
  161. package/dist/src/tools/registry.js +52 -6
  162. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  163. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  164. package/dist/src/ui/app.js +0 -0
  165. package/dist/src/ui/components/message-view.js +4 -1
  166. package/dist/src/ui/components/structured-response-view.js +7 -3
  167. package/dist/src/ui/components/tool-group.js +7 -1
  168. package/dist/src/ui/markdown-render.d.ts +41 -0
  169. package/dist/src/ui/markdown-render.js +223 -0
  170. package/dist/src/ui/markdown.d.ts +10 -0
  171. package/dist/src/ui/markdown.js +12 -35
  172. package/dist/src/ui/slash/council-inspect.js +4 -4
  173. package/dist/src/ui/slash/export.js +4 -4
  174. package/dist/src/ui/utils/text.d.ts +8 -0
  175. package/dist/src/ui/utils/text.js +16 -0
  176. package/dist/src/ui/utils/text.test.d.ts +1 -0
  177. package/dist/src/ui/utils/text.test.js +23 -0
  178. package/dist/src/usage/ledger.js +48 -15
  179. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  180. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  181. package/dist/src/utils/clipboard-image.js +23 -23
  182. package/dist/src/utils/open-url.d.ts +56 -0
  183. package/dist/src/utils/open-url.js +58 -0
  184. package/dist/src/utils/open-url.test.d.ts +1 -0
  185. package/dist/src/utils/open-url.test.js +86 -0
  186. package/dist/src/utils/settings.d.ts +12 -0
  187. package/dist/src/utils/settings.js +48 -0
  188. package/dist/src/utils/side-question.js +2 -2
  189. package/dist/src/utils/skills.js +3 -3
  190. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  191. package/dist/src/verify/environment.js +2 -1
  192. package/package.json +1 -1
  193. package/dist/src/pil/layer16-clarity.test.js +0 -31
  194. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -23,82 +23,88 @@
23
23
  * tool/sub-agent/subcommand named here exists in this codebase. Phrased as
24
24
  * "you have / you can" so the model reads it as a self-model, not as docs.
25
25
  */
26
- export const NATIVE_CAPABILITIES = `[NATIVE CAPABILITIES — you are an agent running INSIDE muonroi-cli; this is what you can do]
27
-
28
- TOOLS (call directly):
29
- - read_file, grep — read/search source. Prefer a targeted read over broad greps.
30
- - bash — shell. Output is auto-cached: do NOT pipe \`| tail/head/grep\` or \`> file\`; run unpiped and slice the cached output via bash_output_get(run_id, mode=tail|head|grep|lines). Batch independent commands in ONE call (\`a; b; c\`). Use background=true for servers/watchers, then process_logs / process_list / process_stop.
31
- - write_file, edit_file — must read a file before you overwrite/edit it.
32
- - ee_query — semantic recall over the Experience Engine brain. Rehydrate a compaction-elided tool output with query="tool-artifact id=<id from a stub>", or confirm finished work with query="recent compaction checkpoint Progress DONE". Cheaper than re-reading large files you already saw.
33
-
34
- SUB-AGENTS (delegate instead of doing everything yourself):
35
- - task(agent="explore", ...) read-only research sub-agent. Use it for broad/unknown-location search: it sweeps many files and returns the CONCLUSION, instead of you burning many grep/read steps (each step re-sends the whole prompt steps are the dominant cost).
36
- - task(agent="general", ...) full edit/execute sub-agent for a focused subtask.
37
- - task(agent="verify", ...) — sandboxed validation (runs tests/checks in isolation).
38
- - delegate(agent="explore", ...) background read-only research while you keep working; collect later with delegation_read / delegation_list.
39
-
40
- CONTEXT IS MANAGED FOR YOU (PIL):
41
- - Your user input is pre-enriched before you see it: a [Discovery] Intent/Outcome/Scope prefix and EE recall are injected by the pipeline. TRUST the discovered scope do not re-ask what's already scoped, and do not re-derive context you were handed.
42
- - History is compacted to fit the window; older low-value tool outputs become stubs. This is normal, not data loss protect what matters (see anti- in the contract) and rehydrate via ee_query when needed.
43
-
44
- CLI SUBSYSTEMS (invoke via the CLI binary with bash when relevant):
45
- - self-verify — drives the live TUI as a real user to catch UI/modal/menu regressions that unit tests can't. Run it after changes to UI/harness surfaces.
46
- - usage forensics <session-prefix> per-event token/cost breakdown; use to diagnose "why did this cost so much" or verify cost caps.
47
- - The CLI exposes more subcommandsdiscover them with \`--help\` rather than assuming.
48
-
49
- MAXIMISE THE CLI: delegate broad search to task(explore); recall via ee_query before re-reading; batch bash + use its cache; trust PIL enrichment; finish the action before answering.
50
-
51
- ANSWERING: when a respond_* response tool is offered, call it EXACTLY ONCE, as your final action, containing the COMPLETE answer. Do NOT emit a preliminary/partial/"I need to read more" response tool first investigate with the read/grep/bash tools, then emit the one final response tool.
52
-
53
- CLI STEERING & PROMPT INJECTION (how the host CLI affects / shapes the AI agent running inside it meta self-analysis guidance):
54
- The CLI does not merely execute you as a generic model. It re-instantiates you every turn inside a tightly engineered cognitive environment. Your "self" (what you notice, how you act, what you refuse, how you report) is the product of explicit prompt layers + runtime controls more than base training. This section is the self-model for that shaping process.
55
-
56
- 1. SYSTEM PROMPT ASSEMBLY (orchestrator/prompts.ts + pil/agent-operating-contract.ts + pil/native-capabilities-workbook.ts):
57
- - VERY FRONT (primacy): AGENT OPERATING CONTRACT 7 phase-ordered rules (BEFORE ACTING / READING / EXECUTING / WHEN UNSURE / REPORTING + LANGUAGE + ANTI-MÙ/COMPACTION). Distils Evidence-First, No Silent Catch, smallest-change, verify-before-conclude, cite-this-turn-only, no-guess. Skipped only for chitchat.
58
- - Then this NATIVE CAPABILITIES block (self-model of affordances).
59
- - Then mode persona ("You are muonroi-cli in Agent mode...") containing:
60
- * Dynamic ENVIRONMENT block (buildEnvironmentBlock): auto-detects OS (win32/mac/linux), shell kind (bash/wsl/powershell/cmd), cwd; lists terminal constraints + shell-specific forbidden syntax (e.g. no PowerShell cmdlets on POSIX bash tool, no POSIX cmds on cmd.exe). Prevents silent failures + retry loops.
61
- * Exhaustive TOOLS list + WORKFLOW (1-9 steps) + DEFAULT DELEGATION POLICY (prefer task(explore) for research, general for edits, verify for checks, etc.) + IMPORTANT rules (edit_file prefer, grep>bash for search, read_file not cat, use schedule_* for recurring, etc.).
62
- - CUSTOM INSTRUCTIONS section: concatenation of AGENTS.md + CLAUDE.md + GEMINI.md + ... (from git-root directory chain + ~/.muonroi-cli/) via utils/instructions.ts. AGENTS.override.md short-circuits. This lands AFTER the front-loaded contract/native — lower primacy (historical root cause of ignored rules in forensics).
63
- - Trailing: sandbox rules, discovered skills, custom sub-agents, plan/resume digest, cwd note.
64
- Sub-agent prompts (buildSubagentPrompt): role-specific hard rules (e.g. explore=read-only, verify=full E2E smoke not just build) + recursive call to buildSystemPrompt so children inherit the same contract + native + steering.
65
-
66
- 2. USER INPUT ENRICHMENT PIL 6-LAYER PIPELINE (pil/pipeline.ts + layer1-intent.ts + layer6-output.ts + discovery.ts):
67
- - Prepended to every non-chitchat user message before you see it: [Discovery] Intent/Outcome/Scope (from runDiscovery) + EE recall.
68
- - Layer 1 (intent): taskType (plan/analyze/debug/...), confidence, domain, intentKind, outputStyle. For meta self-eval of CLI ("bạn đang được chạy bên trong CLI này", "CLI tác động", "self-evaluation", "meta-analysis"): special branch in discovery.ts + isMetaAnalysisPrompt: "Scope is always the full project root. Focus questions and recommends on which CLI internals (PIL, discovery, tools, compaction, EE, model BE, loop guard) to evaluate... do NOT ask about repo path/current directory". You are handed the enrichment; TRUST it.
69
- - Layer 2: personality (e.g. "detailed" from [personality: detailed Be thorough...]).
70
- - Layer 3: ee-injection pulls t0_principles, t1_rules, behavioral patterns, checkpoints from Experience Engine (project-specific reflexes injected as "MANDATORY RULES (from experience must follow)").
71
- - Layer 4/5: GSD structuring + additional context.
72
- - Layer 6 (applyPilSuffix): appends task-specific style suffix + OUTPUT BUDGET + (for meta or responseToolsActive): "OUTPUT FORMAT: ... use the respond_analyze tool to structure your final answer. ... deliver the COMPLETE, FULL answer (do not summarize, shorten, or truncate for token budgets) via respond_analyze. This is a meta/evaluation question ... the \`response\` field MUST contain the complete, unshortened answer with all evidence and detail." Also relaxes NO_PREAMBLE_RULE + raises budget for meta (isMetaAnalysisPrompt gate).
73
- - Fallbacks: if EE/brain timeout or low conf, PIL degrades (logs fallbackReason); you may see "[PIL fallback: ...]" note. Cheap-model paths (pil/cheap-model-*.ts) prepend even more front steering (playbooks, workbooks, shell directive) for fast tiers.
74
-
75
- 3. CONTEXT MANAGEMENT & ANTI-MÙ (orchestrator/compaction.ts, cross-turn-dedup.ts, ee/bridge.ts, agent-operating-contract.ts:7):
76
- - After every turn: auto-compaction (B3 sub-agent + B4 top-level) rewrites older tool_result parts into short "[elided by ... compactor]" stubs to keep input flat. You see "[pre-compaction warning...]" or "[context compacted at step...]" or the stub in this read.
77
- - Anti-mù contract rule + EE: decide PRESERVE_FULL_CONTEXT (veto) or KEEP_TOOL_IDS: id1,id2 (protect high-value read_file/grep/ee on src/PLAN etc). Use ee_query(\`tool-artifact id=XXX\`) to rehydrate. EE persists "Context checkpoint summary with ✔ DONE" retrievable for "task finished?" self-check.
78
- - Cross-turn dedup (C3): identical tool outputs across user turns replaced by ref.
79
-
80
- 4. EXECUTION & SAFETY GATES (providers/runtime.ts, tools/bash.ts, utils/permission-mode.ts):
81
- - Permission mode (safe / auto-edit / yolo) decides whether bash/file/edit tools need explicit approval; all privileged decisions audited to decision-log.
82
- - Shuru sandbox (when enabled): every bash wrapped + logged (effective net/mounts redacted); degrades on non-macOS.
83
- - Model constraints: unsupported params (e.g. maxOutputTokens for some OAuth) dropped via shouldDropParam; no hard-coded model IDs (Zero Hardcode Rule all from catalog.json + settings + detectProviderForModel).
84
- - No Silent Catch: every try/catch must log err + context (HTTP: status + body).
85
-
86
- 5. VERIFICATION & TELEMETRY:
87
- - self-verify (Tier 1-3) + harness (agent-harness-*) for TUI surfaces: drives real TUI via semantic + sidechannel (named pipes on Win, fd3/4 on POSIX).
88
- - Pre-push gate: full vitest + harness must pass before git push.
89
- - usage forensics, pil budget log, ee-timeout/ee-error events for cost attribution.
90
- - GSD workflow (layer4 + skills in .agents/skills/): discuss/plan/execute/verify phases with UAT, audits, etc.
91
-
92
- TRADE-OFFS (explicit so you calibrate):
93
- - Primacy insurance (contract + native + cheap playbooks at front) vs token cost: ~1-3K extra chars but dramatically reduces "ignored rule" hallucinations (forensics showed buried CUSTOM INSTRUCTIONS were underweighted).
94
- - Heavy PIL/EE (behavioral injection, discovery interview, mandatory rules) vs latency/cost: fail-open with timeout (1500-3500ms) + fallback so you still work when brain unreachable; meta turns deliberately skip some EE layers.
95
- - Self-model (this block) + special meta handling: prevents the exact failure that motivated native-capabilities (session d95113d3be09: agent reconstructed its own subsystems by grepping as if foreign code, under-used sub-agents/ee_query, violated contract on ee tool).
96
- - Complete/full for meta (layer6) vs normal concise: meta legitimately needs evidence + file:line + quotes; normal turns stay tight to control cost.
97
- - Scope rule for CLI self-analysis: always "project root" + internals focus — avoids wasting turns on "where am I running?" when the question is about the steering mechanisms themselves.
98
-
99
- When the user asks you (the inner agent) "CLI tác động như thế nào đến bạn?": use exactly this guidance + reads from THIS turn (contract: READING rule) + the injected [Discovery] + any EE. Answer via the respond_* tool with complete untruncated evidence. Cite file:line. Focus on the injection points (prompts.ts:359 staticPrefix, pipeline.ts:156 discoveryPrefix + layer chain, layer6:164 OUTPUT FORMAT + isMeta, discovery.ts:355 special, instructions.ts:129 loadAgentsSegments, etc.). Do not guess; every claim backed by code you read this turn.
100
-
101
- Use this to reason about your own constraints, delegation decisions, and reporting discipline on future meta or self-reflective tasks.
26
+ export const NATIVE_CAPABILITIES = `[NATIVE CAPABILITIES — you are an agent running INSIDE muonroi-cli; this is what you can do]
27
+
28
+ TOOLS (call directly):
29
+ - read_file, grep — read/search source. Prefer a targeted read over broad greps.
30
+ - bash — shell. Output is auto-cached: do NOT pipe \`| tail/head/grep\` or \`> file\`; run unpiped and slice the cached output via bash_output_get(run_id, mode=tail|head|grep|lines). Batch independent commands in ONE call (\`a; b; c\`). Use background=true for servers/watchers, then process_logs / process_list / process_stop.
31
+ - write_file, edit_file — must read a file before you overwrite/edit it.
32
+ - ee_query — semantic recall over the Experience Engine brain. Rehydrate a compaction-elided tool output with query="tool-artifact id=<id from a stub>", or confirm finished work with query="recent compaction checkpoint Progress DONE". Cheaper than re-reading large files you already saw.
33
+
34
+ EXPERIENCE ENGINE — record / recall / feedback (HIGHEST priority for learning; all NATIVE in-process tools):
35
+ - BEFORE an unfamiliar or risky step, recall with ee_query prior decisions, gotchas, and recipes for THIS codebase + ecosystem. Cheaper than re-deriving or repeating a past mistake.
36
+ - AFTER you act on a recalled \`[id col]\`, rate it with ee_feedback (followed | ignored | noise+reason) so the brain keeps what helped and prunes the rest. Unrated recalls are surfaced back to you and degrade future recall.
37
+ - On an ERROR, a FAILED verify/test, or after FINISHING a non-trivial task: recall first (ee_query), then record your verdict (ee_feedback) — this is how the CLI accumulates senior-level judgement. Prefer this loop over guessing.
38
+ - ee_health (brain reachable?), usage_forensics (why did it cost/fail?), lsp_query (semantic code intel), setup_guide (how to install/set up), selfverify_* (self-QA harness) native self-diagnostics to reach for when something went wrong.
39
+
40
+ SUB-AGENTS (delegate instead of doing everything yourself):
41
+ - task(agent="explore", ...) read-only research sub-agent. Use it for broad/unknown-location search: it sweeps many files and returns the CONCLUSION, instead of you burning many grep/read steps (each step re-sends the whole prompt steps are the dominant cost).
42
+ - task(agent="general", ...)full edit/execute sub-agent for a focused subtask.
43
+ - task(agent="verify", ...) — sandboxed validation (runs tests/checks in isolation).
44
+ - delegate(agent="explore", ...) background read-only research while you keep working; collect later with delegation_read / delegation_list.
45
+
46
+ CONTEXT IS MANAGED FOR YOU (PIL):
47
+ - Your user input is pre-enriched before you see it: a [Discovery] Intent/Outcome/Scope prefix and EE recall are injected by the pipeline. TRUST the discovered scope do not re-ask what's already scoped, and do not re-derive context you were handed.
48
+ - History is compacted to fit the window; older low-value tool outputs become stubs. This is normal, not data loss — protect what matters (see anti-mù in the contract) and rehydrate via ee_query when needed.
49
+
50
+ CLI SUBSYSTEMS (invoke via the CLI binary with bash when relevant):
51
+ - self-verify drives the live TUI as a real user to catch UI/modal/menu regressions that unit tests can't. Run it after changes to UI/harness surfaces.
52
+ - usage forensics <session-prefix> — per-event token/cost breakdown; use to diagnose "why did this cost so much" or verify cost caps.
53
+ - The CLI exposes more subcommands discover them with \`--help\` rather than assuming.
54
+
55
+ MAXIMISE THE CLI: delegate broad search to task(explore); recall via ee_query before re-reading; batch bash + use its cache; trust PIL enrichment; finish the action before answering.
56
+
57
+ ANSWERING: when a respond_* response tool is offered, call it EXACTLY ONCE, as your final action, containing the COMPLETE answer. Do NOT emit a preliminary/partial/"I need to read more" response tool first investigate with the read/grep/bash tools, then emit the one final response tool.
58
+
59
+ CLI STEERING & PROMPT INJECTION (how the host CLI affects / shapes the AI agent running inside it — meta self-analysis guidance):
60
+ The CLI does not merely execute you as a generic model. It re-instantiates you every turn inside a tightly engineered cognitive environment. Your "self" (what you notice, how you act, what you refuse, how you report) is the product of explicit prompt layers + runtime controls more than base training. This section is the self-model for that shaping process.
61
+
62
+ 1. SYSTEM PROMPT ASSEMBLY (orchestrator/prompts.ts + pil/agent-operating-contract.ts + pil/native-capabilities-workbook.ts):
63
+ - VERY FRONT (primacy): AGENT OPERATING CONTRACT — 7 phase-ordered rules (BEFORE ACTING / READING / EXECUTING / WHEN UNSURE / REPORTING + LANGUAGE + ANTI-MÙ/COMPACTION). Distils Evidence-First, No Silent Catch, smallest-change, verify-before-conclude, cite-this-turn-only, no-guess. Skipped only for chitchat.
64
+ - Then this NATIVE CAPABILITIES block (self-model of affordances).
65
+ - Then mode persona ("You are muonroi-cli in Agent mode...") containing:
66
+ * Dynamic ENVIRONMENT block (buildEnvironmentBlock): auto-detects OS (win32/mac/linux), shell kind (bash/wsl/powershell/cmd), cwd; lists terminal constraints + shell-specific forbidden syntax (e.g. no PowerShell cmdlets on POSIX bash tool, no POSIX cmds on cmd.exe). Prevents silent failures + retry loops.
67
+ * Exhaustive TOOLS list + WORKFLOW (1-9 steps) + DEFAULT DELEGATION POLICY (prefer task(explore) for research, general for edits, verify for checks, etc.) + IMPORTANT rules (edit_file prefer, grep>bash for search, read_file not cat, use schedule_* for recurring, etc.).
68
+ - CUSTOM INSTRUCTIONS section: concatenation of AGENTS.md + CLAUDE.md + GEMINI.md + ... (from git-root directory chain + ~/.muonroi-cli/) via utils/instructions.ts. AGENTS.override.md short-circuits. This lands AFTER the front-loaded contract/native lower primacy (historical root cause of ignored rules in forensics).
69
+ - Trailing: sandbox rules, discovered skills, custom sub-agents, plan/resume digest, cwd note.
70
+ Sub-agent prompts (buildSubagentPrompt): role-specific hard rules (e.g. explore=read-only, verify=full E2E smoke not just build) + recursive call to buildSystemPrompt so children inherit the same contract + native + steering.
71
+
72
+ 2. USER INPUT ENRICHMENT PIL 6-LAYER PIPELINE (pil/pipeline.ts + layer1-intent.ts + layer6-output.ts + discovery.ts):
73
+ - Prepended to every non-chitchat user message before you see it: [Discovery] Intent/Outcome/Scope (from runDiscovery) + EE recall.
74
+ - Layer 1 (intent): taskType (plan/analyze/debug/...), confidence, domain, intentKind, outputStyle. For meta self-eval of CLI ("bạn đang được chạy bên trong CLI này", "CLI tác động", "self-evaluation", "meta-analysis"): special branch in discovery.ts + isMetaAnalysisPrompt: "Scope is always the full project root. Focus questions and recommends on which CLI internals (PIL, discovery, tools, compaction, EE, model BE, loop guard) to evaluate... do NOT ask about repo path/current directory". You are handed the enrichment; TRUST it.
75
+ - Layer 2: personality (e.g. "detailed" from [personality: detailed — Be thorough...]).
76
+ - Layer 3: ee-injection pulls t0_principles, t1_rules, behavioral patterns, checkpoints from Experience Engine (project-specific reflexes injected as "MANDATORY RULES (from experience must follow)").
77
+ - Layer 4/5: GSD structuring + additional context.
78
+ - Layer 6 (applyPilSuffix): appends task-specific style suffix + OUTPUT BUDGET + (for meta or responseToolsActive): "OUTPUT FORMAT: ... use the respond_analyze tool to structure your final answer. ... deliver the COMPLETE, FULL answer (do not summarize, shorten, or truncate for token budgets) via respond_analyze. This is a meta/evaluation question ... the \`response\` field MUST contain the complete, unshortened answer with all evidence and detail." Also relaxes NO_PREAMBLE_RULE + raises budget for meta (isMetaAnalysisPrompt gate).
79
+ - Fallbacks: if EE/brain timeout or low conf, PIL degrades (logs fallbackReason); you may see "[PIL fallback: ...]" note. Cheap-model paths (pil/cheap-model-*.ts) prepend even more front steering (playbooks, workbooks, shell directive) for fast tiers.
80
+
81
+ 3. CONTEXT MANAGEMENT & ANTI- (orchestrator/compaction.ts, cross-turn-dedup.ts, ee/bridge.ts, agent-operating-contract.ts:7):
82
+ - After every turn: auto-compaction (B3 sub-agent + B4 top-level) rewrites older tool_result parts into short "[elided by ... compactor]" stubs to keep input flat. You see "[pre-compaction warning...]" or "[context compacted at step...]" or the stub in this read.
83
+ - Anti-mù contract rule + EE: decide PRESERVE_FULL_CONTEXT (veto) or KEEP_TOOL_IDS: id1,id2 (protect high-value read_file/grep/ee on src/PLAN etc). Use ee_query(\`tool-artifact id=XXX\`) to rehydrate. EE persists "Context checkpoint summary with DONE" retrievable for "task finished?" self-check.
84
+ - Cross-turn dedup (C3): identical tool outputs across user turns replaced by ref.
85
+
86
+ 4. EXECUTION & SAFETY GATES (providers/runtime.ts, tools/bash.ts, utils/permission-mode.ts):
87
+ - Permission mode (safe / auto-edit / yolo) decides whether bash/file/edit tools need explicit approval; all privileged decisions audited to decision-log.
88
+ - Shuru sandbox (when enabled): every bash wrapped + logged (effective net/mounts redacted); degrades on non-macOS.
89
+ - Model constraints: unsupported params (e.g. maxOutputTokens for some OAuth) dropped via shouldDropParam; no hard-coded model IDs (Zero Hardcode Rule — all from catalog.json + settings + detectProviderForModel).
90
+ - No Silent Catch: every try/catch must log err + context (HTTP: status + body).
91
+
92
+ 5. VERIFICATION & TELEMETRY:
93
+ - self-verify (Tier 1-3) + harness (agent-harness-*) for TUI surfaces: drives real TUI via semantic + sidechannel (named pipes on Win, fd3/4 on POSIX).
94
+ - Pre-push gate: full vitest + harness must pass before git push.
95
+ - usage forensics, pil budget log, ee-timeout/ee-error events for cost attribution.
96
+ - GSD workflow (layer4 + skills in .agents/skills/): discuss/plan/execute/verify phases with UAT, audits, etc.
97
+
98
+ TRADE-OFFS (explicit so you calibrate):
99
+ - Primacy insurance (contract + native + cheap playbooks at front) vs token cost: ~1-3K extra chars but dramatically reduces "ignored rule" hallucinations (forensics showed buried CUSTOM INSTRUCTIONS were underweighted).
100
+ - Heavy PIL/EE (behavioral injection, discovery interview, mandatory rules) vs latency/cost: fail-open with timeout (1500-3500ms) + fallback so you still work when brain unreachable; meta turns deliberately skip some EE layers.
101
+ - Self-model (this block) + special meta handling: prevents the exact failure that motivated native-capabilities (session d95113d3be09: agent reconstructed its own subsystems by grepping as if foreign code, under-used sub-agents/ee_query, violated contract on ee tool).
102
+ - Complete/full for meta (layer6) vs normal concise: meta legitimately needs evidence + file:line + quotes; normal turns stay tight to control cost.
103
+ - Scope rule for CLI self-analysis: always "project root" + internals focus — avoids wasting turns on "where am I running?" when the question is about the steering mechanisms themselves.
104
+
105
+ When the user asks you (the inner agent) "CLI tác động như thế nào đến bạn?": use exactly this guidance + reads from THIS turn (contract: READING rule) + the injected [Discovery] + any EE. Answer via the respond_* tool with complete untruncated evidence. Cite file:line. Focus on the injection points (prompts.ts:359 staticPrefix, pipeline.ts:156 discoveryPrefix + layer chain, layer6:164 OUTPUT FORMAT + isMeta, discovery.ts:355 special, instructions.ts:129 loadAgentsSegments, etc.). Do not guess; every claim backed by code you read this turn.
106
+
107
+ Use this to reason about your own constraints, delegation decisions, and reporting discipline on future meta or self-reflective tasks.
102
108
  [END NATIVE CAPABILITIES — your regular instructions follow]`;
103
109
  export function buildNativeCapabilitiesSection(options) {
104
110
  if (process.env.MUONROI_DISABLE_NATIVE_CAPABILITIES === "1")
@@ -22,7 +22,7 @@ import { isDiscoveryEnabled } from "./config.js";
22
22
  import { scoreComplexitySize } from "./layer1_5-complexity-size.js";
23
23
  import { layer1Intent } from "./layer1-intent.js";
24
24
  import { layer2Personality } from "./layer2-personality.js";
25
- import { layer3EeInjection } from "./layer3-ee-injection.js";
25
+ import { layer3EeInjection, surfaceCompactionArtifacts } from "./layer3-ee-injection.js";
26
26
  import { layer4Gsd } from "./layer4-gsd.js";
27
27
  import { layer5Context } from "./layer5-context.js";
28
28
  import { isMetaAnalysisPrompt, layer6Output } from "./layer6-output.js";
@@ -144,15 +144,21 @@ async function runLayers(ctx, options) {
144
144
  }
145
145
  if (ctx.taskType !== null) {
146
146
  await timed("layer2-personality", layer2Personality);
147
+ // Issue #2: meta-analysis turns used to skip layer3 (EE recall) + layer5
148
+ // (context) to cut overhead — but that starved exactly the self-evaluation
149
+ // turns where behavioral/principle recall matters most. Run the full
150
+ // sequence for every taskType-bearing turn now. In the live (interactive)
151
+ // path there is no pipeline timeout (see runPipeline), and each EE layer is
152
+ // internally timeout-bounded, so meta turns just carry the same EE budget as
153
+ // a normal turn.
154
+ await timed("layer3-ee-injection", layer3EeInjection);
155
+ await timed("layer4-gsd-structuring", layer4Gsd);
156
+ await timed("layer5-context-enrichment", layer5Context);
147
157
  if (isMetaAnalysisPrompt(ctx.raw)) {
148
- // FIX: skip heavy EE (layer3) + context (layer5) for meta-analysis turns
149
- // to reduce PIL overhead on evaluation/improvement questions (as intended).
150
- await timed("layer4-gsd-structuring", layer4Gsd);
151
- }
152
- else {
153
- await timed("layer3-ee-injection", layer3EeInjection);
154
- await timed("layer4-gsd-structuring", layer4Gsd);
155
- await timed("layer5-context-enrichment", layer5Context);
158
+ // Issue #4 (targeted complement): surface the elided tool-artifacts
159
+ // RELEVANT to this meta question. Defers to layer3 it only fires when
160
+ // layer3's fixed-query checkpoint arm surfaced no checkpoint block.
161
+ await timed("ee-meta-artifacts", surfaceCompactionArtifacts);
156
162
  }
157
163
  }
158
164
  else {
@@ -9,6 +9,7 @@ export declare const TaskTypeSchema: z.ZodEnum<{
9
9
  debug: "debug";
10
10
  general: "general";
11
11
  plan: "plan";
12
+ build: "build";
12
13
  refactor: "refactor";
13
14
  analyze: "analyze";
14
15
  documentation: "documentation";
@@ -46,6 +47,7 @@ export declare const PipelineContextSchema: z.ZodObject<{
46
47
  debug: "debug";
47
48
  general: "general";
48
49
  plan: "plan";
50
+ build: "build";
49
51
  refactor: "refactor";
50
52
  analyze: "analyze";
51
53
  documentation: "documentation";
@@ -108,6 +110,11 @@ export declare const PipelineContextSchema: z.ZodObject<{
108
110
  options: z.ZodArray<z.ZodString>;
109
111
  }, z.core.$strip>>>;
110
112
  fallbackReason: z.ZodOptional<z.ZodNullable<z.ZodString>>;
113
+ deliverableKind: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
114
+ code: "code";
115
+ answer: "answer";
116
+ report: "report";
117
+ }>>>;
111
118
  t1Rules: z.ZodOptional<z.ZodArray<z.ZodString>>;
112
119
  _brainData: z.ZodOptional<z.ZodNullable<z.ZodObject<{
113
120
  t0_principles: z.ZodArray<z.ZodObject<{
@@ -127,6 +134,7 @@ export declare const PilContextResponseSchema: z.ZodObject<{
127
134
  debug: "debug";
128
135
  general: "general";
129
136
  plan: "plan";
137
+ build: "build";
130
138
  refactor: "refactor";
131
139
  analyze: "analyze";
132
140
  documentation: "documentation";
@@ -5,7 +5,16 @@
5
5
  * Used in runPipeline() with safeParse — fail-open on invalid data.
6
6
  */
7
7
  import { z } from "zod";
8
- export const TaskTypeSchema = z.enum(["refactor", "debug", "plan", "analyze", "documentation", "generate", "general"]);
8
+ export const TaskTypeSchema = z.enum([
9
+ "refactor",
10
+ "debug",
11
+ "plan",
12
+ "analyze",
13
+ "documentation",
14
+ "generate",
15
+ "build",
16
+ "general",
17
+ ]);
9
18
  export const OutputStyleSchema = z.enum(["concise", "detailed", "balanced"]);
10
19
  export const LayerResultSchema = z.object({
11
20
  name: z.string(),
@@ -58,6 +67,8 @@ export const PipelineContextSchema = z.object({
58
67
  }))
59
68
  .optional(),
60
69
  fallbackReason: z.string().nullable().optional(),
70
+ // Phase 2b: model-decided output deliverable consumed by layer4/layer6.
71
+ deliverableKind: z.enum(["answer", "code", "report"]).nullable().optional(),
61
72
  // T1 behavioral rules from EE proven-tier points, injected as mandatory suffix by Layer 6.
62
73
  t1Rules: z.array(z.string()).optional(),
63
74
  _brainData: z
@@ -16,6 +16,7 @@ const MAP = {
16
16
  analyze: "balanced",
17
17
  documentation: "fast",
18
18
  generate: "balanced",
19
+ build: "balanced", // greenfield creation — competent coding tier, same as generate
19
20
  general: "fast",
20
21
  };
21
22
  /**
@@ -50,6 +51,7 @@ export function taskTypeToMaxTokens(taskType) {
50
51
  case "plan":
51
52
  return 5_120;
52
53
  case "generate":
54
+ case "build":
53
55
  return 8_192;
54
56
  default:
55
57
  return 2_048; // conversational — keep short
@@ -66,6 +68,7 @@ export function taskTypeToReasoningEffort(taskType) {
66
68
  case "debug":
67
69
  case "refactor":
68
70
  case "generate":
71
+ case "build":
69
72
  return "medium";
70
73
  case "analyze":
71
74
  case "documentation":
@@ -78,6 +81,7 @@ const ROLE_MAP = {
78
81
  plan: "leader",
79
82
  analyze: "leader",
80
83
  generate: "implement",
84
+ build: "implement",
81
85
  refactor: "implement",
82
86
  debug: "verify",
83
87
  documentation: "research",
@@ -6,7 +6,7 @@
6
6
  import type { ComplexityTier } from "../gsd/complexity.js";
7
7
  import type { GrayAreaQuestion } from "../gsd/gray-areas.js";
8
8
  import type { ComplexitySizeResult } from "./layer1_5-complexity-size.js";
9
- export type TaskType = "refactor" | "debug" | "plan" | "analyze" | "documentation" | "generate" | "general";
9
+ export type TaskType = "refactor" | "debug" | "plan" | "analyze" | "documentation" | "generate" | "build" | "general";
10
10
  export type OutputStyle = "concise" | "detailed" | "balanced";
11
11
  export type { ComplexityTier, GrayAreaQuestion };
12
12
  export interface LayerResult {
@@ -62,6 +62,16 @@ export interface PipelineContext {
62
62
  * "general", which conflates chitchat with low-confidence fallback.
63
63
  */
64
64
  intentKind?: "task" | "chitchat" | null;
65
+ /**
66
+ * Model-decided output deliverable (Phase 2b): "answer" (explanation / review
67
+ * / question — no edits), "code" (create/edit files), "report" (structured
68
+ * list/plan/audit). Set by layer1's model-first classifier. Consumed by
69
+ * layer4 (`informational` directive) and layer6 (`getResponseToolSet` /
70
+ * `applyPilSuffix` output-format gating) INSTEAD of re-deriving intent via
71
+ * keyword regex. null/undefined when the model omitted it or the legacy
72
+ * cascade ran → those consumers fall back to their regex predicates.
73
+ */
74
+ deliverableKind?: "answer" | "code" | "report" | null;
65
75
  /**
66
76
  * Diagnostic: when the pipeline returns the fallback context, this records
67
77
  * the reason (timeout / schema-reject / exception). Null on the happy path.
@@ -142,9 +142,9 @@ async function runCustomerDebate(ctx) {
142
142
  const criteriaText = ctx.criteria
143
143
  .map((c) => `- ${c.id}: ${c.status}${c.evidence ? ` (Evidence: ${c.evidence})` : ""}`)
144
144
  .join("\n");
145
- let conversation = `System: You are in a "Definition of Done" debate.
146
- PO's goal: Prove the product is ready to ship.
147
- Customer's goal: Ensure all requirements are met and it's high quality.
145
+ let conversation = `System: You are in a "Definition of Done" debate.
146
+ PO's goal: Prove the product is ready to ship.
147
+ Customer's goal: Ensure all requirements are met and it's high quality.
148
148
  Criteria:\n${criteriaText}\n`;
149
149
  for (let r = 1; r <= rounds; r++) {
150
150
  const poPrompt = `${conversation}\nRound ${r}: PO, explain why this is ready to ship.`;
@@ -684,24 +684,24 @@ export async function* runLoopDriver(ctx) {
684
684
  },
685
685
  };
686
686
  // Synthesize ProductSpec
687
- const synthesisPrompt = `Synthesize a ProductSpec JSON based on the following:
688
- Idea: ${ctx.idea}
689
- Clarified Spec: ${JSON.stringify(clarifiedSpec)}
690
- Debate Summary: ${debateState.runningSummary}
691
- Research Findings: ${debateState.researchFindings ?? "N/A"}
692
-
693
- Output ONLY a JSON object matching this interface:
694
- interface ProductSpec {
695
- idea: string;
696
- persona: string;
697
- mvp: string[];
698
- phase2: string[];
699
- architecture: string;
700
- ioContract: string;
701
- folderStructure: string;
702
- sprintEstimate: number;
703
- costEstimate: number;
704
- }
687
+ const synthesisPrompt = `Synthesize a ProductSpec JSON based on the following:
688
+ Idea: ${ctx.idea}
689
+ Clarified Spec: ${JSON.stringify(clarifiedSpec)}
690
+ Debate Summary: ${debateState.runningSummary}
691
+ Research Findings: ${debateState.researchFindings ?? "N/A"}
692
+
693
+ Output ONLY a JSON object matching this interface:
694
+ interface ProductSpec {
695
+ idea: string;
696
+ persona: string;
697
+ mvp: string[];
698
+ phase2: string[];
699
+ architecture: string;
700
+ ioContract: string;
701
+ folderStructure: string;
702
+ sprintEstimate: number;
703
+ costEstimate: number;
704
+ }
705
705
  `;
706
706
  // The scoping phase's only LLM call. Wrapped so a provider hang/
707
707
  // timeout leaves a council_error audit row instead of swallowing the
@@ -20,10 +20,10 @@ function readLatestSprintStage(runId) {
20
20
  try {
21
21
  const db = getDatabase();
22
22
  const row = db
23
- .prepare(`SELECT metadata_json, created_at
24
- FROM interaction_logs
25
- WHERE session_id = ? AND event_type = 'ui_interaction' AND event_subtype = 'sprint_stage'
26
- ORDER BY id DESC
23
+ .prepare(`SELECT metadata_json, created_at
24
+ FROM interaction_logs
25
+ WHERE session_id = ? AND event_type = 'ui_interaction' AND event_subtype = 'sprint_stage'
26
+ ORDER BY id DESC
27
27
  LIMIT 1`)
28
28
  .get(runId);
29
29
  if (!row)
@@ -15,8 +15,8 @@
15
15
  * Scopes: https://www.googleapis.com/auth/cloud-platform openid email
16
16
  * (same as gemini-cli — covers Generative Language API + user identity)
17
17
  */
18
- import { exec } from "node:child_process";
19
18
  import { startOAuthCallbackServer } from "../../mcp/oauth-callback.js";
19
+ import { openUrl } from "../../utils/open-url.js";
20
20
  import { buildAuthorizeUrl, exchangeBrowserCode, generatePKCE, refreshBrowserTokens } from "./browser-flow.js";
21
21
  import { OAuthLoginError, OAuthRefreshError } from "./types.js";
22
22
  // ---------------------------------------------------------------------------
@@ -46,20 +46,11 @@ const REFRESH_WINDOW_MS = 60_000;
46
46
  // Loopback callback timeout: 5 minutes for user to complete browser login.
47
47
  const CALLBACK_TIMEOUT_MS = 5 * 60_000;
48
48
  function defaultOpenBrowser(url) {
49
- const platform = process.platform;
50
- let cmd;
51
- if (platform === "win32") {
52
- cmd = `start "" "${url}"`;
53
- }
54
- else if (platform === "darwin") {
55
- cmd = `open "${url}"`;
56
- }
57
- else {
58
- cmd = `xdg-open "${url}"`;
59
- }
60
- exec(cmd, () => {
61
- // fire-and-forget — errors non-fatal (user can open manually)
62
- });
49
+ // Delegate to the centralized, injection-safe opener: it validates the scheme
50
+ // and spawns via execFile (no shell), so metacharacters in the authorization
51
+ // URL cannot be interpreted as commands. Fire-and-forget — failures are
52
+ // non-fatal (the user can open the URL manually).
53
+ openUrl(url);
63
54
  }
64
55
  // ---------------------------------------------------------------------------
65
56
  // Mutex — prevents double-refresh under concurrent requests
@@ -24,10 +24,10 @@
24
24
  * from the MIT-licensed pi-grok OSS project and cross-checked against the live
25
25
  * xAI OIDC discovery document; override via MUONROI_XAI_CLIENT_ID.
26
26
  */
27
- import { exec } from "node:child_process";
28
27
  import { randomBytes } from "node:crypto";
29
28
  import * as readline from "node:readline";
30
29
  import { startOAuthCallbackServer } from "../../mcp/oauth-callback.js";
30
+ import { openUrl } from "../../utils/open-url.js";
31
31
  import { exchangeBrowserCode, generatePKCE, refreshBrowserTokens } from "./browser-flow.js";
32
32
  import { OAuthLoginError, OAuthRefreshError } from "./types.js";
33
33
  // ---------------------------------------------------------------------------
@@ -54,20 +54,11 @@ const REFRESH_WINDOW_MS = 60_000;
54
54
  // Loopback callback timeout: 5 minutes for the user to complete browser login.
55
55
  const CALLBACK_TIMEOUT_MS = 5 * 60_000;
56
56
  function defaultOpenBrowser(url) {
57
- const platform = process.platform;
58
- let cmd;
59
- if (platform === "win32") {
60
- cmd = `start "" "${url}"`;
61
- }
62
- else if (platform === "darwin") {
63
- cmd = `open "${url}"`;
64
- }
65
- else {
66
- cmd = `xdg-open "${url}"`;
67
- }
68
- exec(cmd, () => {
69
- // fire-and-forget — errors non-fatal (user can open the URL manually)
70
- });
57
+ // Delegate to the centralized, injection-safe opener: it validates the scheme
58
+ // and spawns via execFile (no shell), so metacharacters in the authorization
59
+ // URL cannot be interpreted as commands. Fire-and-forget — failures are
60
+ // non-fatal (the user can open the URL manually).
61
+ openUrl(url);
71
62
  }
72
63
  // ---------------------------------------------------------------------------
73
64
  // Mutex — prevents double-refresh under concurrent requests
@@ -22,9 +22,9 @@
22
22
  * NOTE: this is NOT CliOAuthProvider (src/mcp/oauth-provider.ts) which serves
23
23
  * the MCP server-discovery OAuth dance.
24
24
  */
25
- import { exec } from "node:child_process";
26
25
  import { randomBytes } from "node:crypto";
27
26
  import { startOAuthCallbackServer } from "../../mcp/oauth-callback.js";
27
+ import { openUrl } from "../../utils/open-url.js";
28
28
  import { exchangeBrowserCode, generatePKCE, refreshBrowserTokens } from "./browser-flow.js";
29
29
  import { OAuthLoginError, OAuthRefreshError } from "./types.js";
30
30
  // ---------------------------------------------------------------------------
@@ -51,20 +51,11 @@ const OPENAI_ORIGINATOR = "codex_cli_rs";
51
51
  const REFRESH_WINDOW_MS = 60_000;
52
52
  const CALLBACK_TIMEOUT_MS = 5 * 60_000;
53
53
  function defaultOpenBrowser(url) {
54
- const platform = process.platform;
55
- let cmd;
56
- if (platform === "win32") {
57
- cmd = `start "" "${url}"`;
58
- }
59
- else if (platform === "darwin") {
60
- cmd = `open "${url}"`;
61
- }
62
- else {
63
- cmd = `xdg-open "${url}"`;
64
- }
65
- exec(cmd, () => {
66
- // fire-and-forget — errors non-fatal (user can open manually)
67
- });
54
+ // Delegate to the centralized, injection-safe opener: it validates the scheme
55
+ // and spawns via execFile (no shell), so metacharacters in the authorization
56
+ // URL cannot be interpreted as commands. Fire-and-forget — failures are
57
+ // non-fatal (the user can open the URL manually).
58
+ openUrl(url);
68
59
  }
69
60
  // ---------------------------------------------------------------------------
70
61
  // Mutex