muonroi-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/artifact-cache.d.ts +56 -0
  26. package/dist/src/ee/artifact-cache.js +155 -0
  27. package/dist/src/ee/artifact-cache.test.d.ts +1 -0
  28. package/dist/src/ee/artifact-cache.test.js +69 -0
  29. package/dist/src/ee/auth.d.ts +9 -0
  30. package/dist/src/ee/auth.js +19 -0
  31. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  32. package/dist/src/ee/ee-onboarding.js +76 -0
  33. package/dist/src/ee/search.js +7 -5
  34. package/dist/src/ee/search.test.d.ts +1 -0
  35. package/dist/src/ee/search.test.js +23 -0
  36. package/dist/src/generated/version.d.ts +1 -1
  37. package/dist/src/generated/version.js +1 -1
  38. package/dist/src/headless/output.js +6 -4
  39. package/dist/src/headless/output.test.js +4 -3
  40. package/dist/src/index.js +20 -1
  41. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  42. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  43. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  44. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  45. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  46. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  47. package/dist/src/mcp/auto-setup.js +56 -2
  48. package/dist/src/mcp/client-pool.d.ts +46 -0
  49. package/dist/src/mcp/client-pool.js +212 -0
  50. package/dist/src/mcp/oauth-callback.js +2 -2
  51. package/dist/src/mcp/parse-headers.test.js +14 -14
  52. package/dist/src/mcp/runtime.d.ts +28 -0
  53. package/dist/src/mcp/runtime.js +117 -51
  54. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  55. package/dist/src/mcp/self-verify-runner.js +38 -0
  56. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  57. package/dist/src/mcp/setup-guide-text.js +84 -0
  58. package/dist/src/mcp/smart-filter.js +49 -0
  59. package/dist/src/mcp/smoke.test.js +43 -43
  60. package/dist/src/mcp/tools-server.d.ts +7 -0
  61. package/dist/src/mcp/tools-server.js +19 -22
  62. package/dist/src/models/catalog.json +349 -349
  63. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  64. package/dist/src/ops/doctor.d.ts +3 -2
  65. package/dist/src/ops/doctor.js +47 -11
  66. package/dist/src/ops/doctor.test.js +4 -3
  67. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  68. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  69. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  70. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  71. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  72. package/dist/src/orchestrator/compaction.d.ts +2 -0
  73. package/dist/src/orchestrator/compaction.js +14 -1
  74. package/dist/src/orchestrator/compaction.test.js +25 -1
  75. package/dist/src/orchestrator/message-processor.js +72 -32
  76. package/dist/src/orchestrator/orchestrator.js +26 -0
  77. package/dist/src/orchestrator/prompts.d.ts +51 -0
  78. package/dist/src/orchestrator/prompts.js +257 -134
  79. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  80. package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
  81. package/dist/src/orchestrator/scope-reminder.js +16 -0
  82. package/dist/src/orchestrator/scope-reminder.test.js +22 -1
  83. package/dist/src/orchestrator/stream-runner.js +23 -15
  84. package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
  85. package/dist/src/orchestrator/subagent-compactor.js +30 -8
  86. package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
  87. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  88. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  89. package/dist/src/pil/__tests__/config.test.js +1 -17
  90. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  91. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  92. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  93. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  94. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  95. package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
  96. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  97. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
  98. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
  99. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  100. package/dist/src/pil/agent-operating-contract.js +2 -0
  101. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  102. package/dist/src/pil/cheap-model-playbook.js +35 -35
  103. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  104. package/dist/src/pil/clarity-gate.d.ts +21 -19
  105. package/dist/src/pil/clarity-gate.js +26 -153
  106. package/dist/src/pil/config.d.ts +9 -1
  107. package/dist/src/pil/config.js +15 -4
  108. package/dist/src/pil/discovery.js +211 -136
  109. package/dist/src/pil/layer1-intent.d.ts +12 -0
  110. package/dist/src/pil/layer1-intent.js +283 -38
  111. package/dist/src/pil/layer1-intent.test.js +210 -4
  112. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  113. package/dist/src/pil/layer16-clarity.js +19 -306
  114. package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
  115. package/dist/src/pil/layer3-ee-injection.js +96 -4
  116. package/dist/src/pil/layer4-gsd.js +18 -6
  117. package/dist/src/pil/layer6-output.d.ts +2 -0
  118. package/dist/src/pil/layer6-output.js +151 -25
  119. package/dist/src/pil/llm-classify.d.ts +26 -0
  120. package/dist/src/pil/llm-classify.js +34 -5
  121. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  122. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  123. package/dist/src/pil/pipeline.js +15 -9
  124. package/dist/src/pil/schema.d.ts +8 -0
  125. package/dist/src/pil/schema.js +12 -1
  126. package/dist/src/pil/task-tier-map.js +4 -0
  127. package/dist/src/pil/types.d.ts +11 -1
  128. package/dist/src/product-loop/done-gate.js +3 -3
  129. package/dist/src/product-loop/loop-driver.js +18 -18
  130. package/dist/src/product-loop/progress-snapshot.js +4 -4
  131. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  132. package/dist/src/providers/auth/grok-oauth.js +6 -15
  133. package/dist/src/providers/auth/openai-oauth.js +6 -15
  134. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  135. package/dist/src/reporter/index.js +1 -1
  136. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  137. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  138. package/dist/src/scaffold/continuation-prompt.js +60 -60
  139. package/dist/src/scaffold/init-new.js +453 -453
  140. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  141. package/dist/src/self-qa/agentic-loop.js +24 -19
  142. package/dist/src/self-qa/spec-emitter.js +26 -23
  143. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  144. package/dist/src/storage/interaction-log.js +5 -5
  145. package/dist/src/storage/migrations.js +122 -122
  146. package/dist/src/storage/sessions.js +42 -42
  147. package/dist/src/storage/transcript.js +91 -84
  148. package/dist/src/storage/usage.js +14 -14
  149. package/dist/src/storage/workspaces.js +12 -12
  150. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  151. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  152. package/dist/src/tools/git-safety.d.ts +61 -0
  153. package/dist/src/tools/git-safety.js +141 -0
  154. package/dist/src/tools/git-safety.test.d.ts +1 -0
  155. package/dist/src/tools/git-safety.test.js +111 -0
  156. package/dist/src/tools/native-tools.d.ts +31 -0
  157. package/dist/src/tools/native-tools.js +273 -0
  158. package/dist/src/tools/registry-ee-query.test.js +18 -1
  159. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  160. package/dist/src/tools/registry-git-safety.test.js +92 -0
  161. package/dist/src/tools/registry.js +52 -6
  162. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  163. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  164. package/dist/src/ui/app.js +0 -0
  165. package/dist/src/ui/components/message-view.js +4 -1
  166. package/dist/src/ui/components/structured-response-view.js +7 -3
  167. package/dist/src/ui/components/tool-group.js +7 -1
  168. package/dist/src/ui/markdown-render.d.ts +41 -0
  169. package/dist/src/ui/markdown-render.js +223 -0
  170. package/dist/src/ui/markdown.d.ts +10 -0
  171. package/dist/src/ui/markdown.js +12 -35
  172. package/dist/src/ui/slash/council-inspect.js +4 -4
  173. package/dist/src/ui/slash/export.js +4 -4
  174. package/dist/src/ui/utils/text.d.ts +8 -0
  175. package/dist/src/ui/utils/text.js +16 -0
  176. package/dist/src/ui/utils/text.test.d.ts +1 -0
  177. package/dist/src/ui/utils/text.test.js +23 -0
  178. package/dist/src/usage/ledger.js +48 -15
  179. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  180. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  181. package/dist/src/utils/clipboard-image.js +23 -23
  182. package/dist/src/utils/open-url.d.ts +56 -0
  183. package/dist/src/utils/open-url.js +58 -0
  184. package/dist/src/utils/open-url.test.d.ts +1 -0
  185. package/dist/src/utils/open-url.test.js +86 -0
  186. package/dist/src/utils/settings.d.ts +12 -0
  187. package/dist/src/utils/settings.js +48 -0
  188. package/dist/src/utils/side-question.js +2 -2
  189. package/dist/src/utils/skills.js +3 -3
  190. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  191. package/dist/src/verify/environment.js +2 -1
  192. package/package.json +1 -1
  193. package/dist/src/pil/layer16-clarity.test.js +0 -31
  194. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -244,6 +244,27 @@ export class Agent {
244
244
  this.pendingCalls = options.pendingCalls ?? null;
245
245
  this.permissionMode = options.permissionMode ?? "safe";
246
246
  ensureDefaultMcpServers();
247
+ // Pre-warm the always-on MCP servers in the BACKGROUND so they're pooled
248
+ // before the first user turn. npx stdio servers (filesystem/memory)
249
+ // cold-start >2.5s and would otherwise miss the first turn's build deadline
250
+ // (shown as "MCP unavailable: ... still connecting — available next turn").
251
+ // Empty-message smart-filter keeps only the baseline (drops browser/web
252
+ // categories) so we don't speculatively spawn playwright/tavily. Fire-and-
253
+ // forget; the pool handles errors and the per-turn acquire still connects on
254
+ // demand if this is skipped.
255
+ void (async () => {
256
+ try {
257
+ const [{ warmMcpClients }, { loadMcpServers }, { filterMcpServersByMessage }] = await Promise.all([
258
+ import("../mcp/client-pool.js"),
259
+ import("../utils/settings.js"),
260
+ import("../mcp/smart-filter.js"),
261
+ ]);
262
+ warmMcpClients(filterMcpServersByMessage(loadMcpServers(), ""));
263
+ }
264
+ catch (err) {
265
+ console.error(`[orchestrator] MCP pre-warm skipped: ${err?.message}`);
266
+ }
267
+ })();
247
268
  if (options.persistSession !== false) {
248
269
  this.sessionStore = new SessionStore(this.bash.getCwd());
249
270
  this.workspace = this.sessionStore.getWorkspace();
@@ -469,6 +490,11 @@ export class Agent {
469
490
  this.bash.cleanup(),
470
491
  shutdownWorkspaceLspManager(this.bash.getCwd()),
471
492
  extractSession(this.messages, this.bash.getCwd(), "cli-exit", this.getSessionId()),
493
+ // Tear down pooled MCP clients (client-pool.ts). They persist across turns
494
+ // by design (no per-turn cold-spawn), so the only real teardown is here at
495
+ // session end. Stdio children would die with the process anyway, but close
496
+ // them gracefully on a clean exit.
497
+ import("../mcp/client-pool.js").then((m) => m.closeAllMcpClients()),
472
498
  ]);
473
499
  }
474
500
  // Tool-loop cap handler — set by the UI (app.tsx) at startup. Invoked from
@@ -3,6 +3,38 @@ import { type CustomSubagentConfig, type SandboxMode, type SandboxSettings } fro
3
3
  export declare const MAX_TOOL_ROUNDS: number;
4
4
  export declare const VISION_MODEL = "grok-4-1-fast-reasoning";
5
5
  export declare const COMPUTER_MODEL = "grok-4.20-0309-reasoning";
6
+ /**
7
+ * Phase 5 Fix — Env-aware ENVIRONMENT block.
8
+ *
9
+ * Replaces the static rendering-only block with a dynamic block that
10
+ * tells the model exactly which OS + shell + cwd it's operating in.
11
+ * Without this the model historically emitted PowerShell cmdlets
12
+ * (Get-ChildItem, Select-Object, $null), cmd.exe syntax (del, if exist),
13
+ * or POSIX tools that aren't installed (hyperfine) — all of which fail
14
+ * silently in the bash tool and waste tokens on retry-cascades.
15
+ *
16
+ * Evidence: sessions f9a4cea1bf44, 9c63a38197f3, d0dc4a1f542a,
17
+ * 77cd2e11c6a5, 1bc27b79223c all logged shell-mismatch errors.
18
+ *
19
+ * The block is recomputed on each system-prompt assembly so settings
20
+ * changes (MUONROI_SHELL override, shell.kind config) are reflected
21
+ * without a CLI restart.
22
+ */
23
+ /**
24
+ * Deterministically detect the project's stack from manifest/lockfile presence
25
+ * at the workspace root. Pure (no LLM), cheap (one readdir), zero-hardcode (no
26
+ * model/provider IDs — only ecosystem markers). Returns a compact one-line
27
+ * summary like "TypeScript · pkg: bun · tests: vitest · vcs: git", or "" when
28
+ * nothing recognizable is present (greenfield / unreadable dir).
29
+ *
30
+ * Motivation (2026-06-14 dogfood): the ENVIRONMENT block told the model its OS,
31
+ * shell, and cwd but never WHICH project it was in — so the model acted
32
+ * context-blind, assumed Python, and asked the user to describe the repo it was
33
+ * already running inside. This gives every model, on every turn, in every mode
34
+ * (agent/plan/ask) and for every provider (it is NOT in the strippable TOOLS
35
+ * section), a concrete self-model of the codebase it can act on.
36
+ */
37
+ export declare function detectProjectStack(cwd: string): string;
6
38
  export declare function findCustomSubagent(agent: string, subagents?: CustomSubagentConfig[]): CustomSubagentConfig | undefined;
7
39
  export declare function formatCustomSubagentsPromptSection(subagents: CustomSubagentConfig[]): string;
8
40
  export interface SystemPromptParts {
@@ -24,6 +56,25 @@ export interface SystemPromptOptions {
24
56
  */
25
57
  chitchat?: boolean;
26
58
  }
59
+ /**
60
+ * Render the LIVE per-turn MCP tool roster as a system-prompt block.
61
+ *
62
+ * The static prompt only states the mcp_<server>__<tool> naming convention; it
63
+ * never names the tools actually connected this turn, and the per-message smart
64
+ * filter can drop whole servers. The model therefore receives connected MCP
65
+ * tools ONLY as raw tool JSON, which it can overlook — live failure
66
+ * (session f6f7881a5fae): asked to call `setup_guide`, the agent said "I don't
67
+ * have a direct call_mcp tool" and drove the muonroi-docs server by hand over
68
+ * bash JSON-RPC, fabricating output. Surfacing the exact callable names in prose
69
+ * closes that gap.
70
+ *
71
+ * `toolNames` should be the keys of the FINAL assembled tool set for the turn
72
+ * (post smart-filter, post fs-dedup). Returns "" when no MCP tool is connected,
73
+ * so non-agent / chitchat / no-client-tools turns add nothing. The block is
74
+ * DYNAMIC (varies per turn) so callers must append it OUTSIDE the cached static
75
+ * prefix.
76
+ */
77
+ export declare function buildMcpCapabilityBlock(toolNames: readonly string[]): string;
27
78
  export declare function buildSystemPromptParts(cwd: string, mode: AgentMode, sandboxMode: SandboxMode, planContext?: string | null, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string, resumeDigest?: string | null, options?: SystemPromptOptions): SystemPromptParts;
28
79
  export declare function buildSystemPrompt(cwd: string, mode: AgentMode, sandboxMode: SandboxMode, planContext?: string | null, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string, resumeDigest?: string | null, options?: SystemPromptOptions): string;
29
80
  export declare function buildSubagentPrompt(request: TaskRequest, cwd: string, custom: CustomSubagentConfig | null, sandboxMode: SandboxMode, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string): string;
@@ -1,3 +1,4 @@
1
+ import * as fs from "node:fs";
1
2
  import { getModelInfo } from "../models/registry.js";
2
3
  import { buildContractSection } from "../pil/agent-operating-contract.js";
3
4
  import { buildNativeCapabilitiesSection } from "../pil/native-capabilities-workbook.js";
@@ -38,6 +39,81 @@ export const COMPUTER_MODEL = "grok-4.20-0309-reasoning";
38
39
  * changes (MUONROI_SHELL override, shell.kind config) are reflected
39
40
  * without a CLI restart.
40
41
  */
42
+ /**
43
+ * Deterministically detect the project's stack from manifest/lockfile presence
44
+ * at the workspace root. Pure (no LLM), cheap (one readdir), zero-hardcode (no
45
+ * model/provider IDs — only ecosystem markers). Returns a compact one-line
46
+ * summary like "TypeScript · pkg: bun · tests: vitest · vcs: git", or "" when
47
+ * nothing recognizable is present (greenfield / unreadable dir).
48
+ *
49
+ * Motivation (2026-06-14 dogfood): the ENVIRONMENT block told the model its OS,
50
+ * shell, and cwd but never WHICH project it was in — so the model acted
51
+ * context-blind, assumed Python, and asked the user to describe the repo it was
52
+ * already running inside. This gives every model, on every turn, in every mode
53
+ * (agent/plan/ask) and for every provider (it is NOT in the strippable TOOLS
54
+ * section), a concrete self-model of the codebase it can act on.
55
+ */
56
+ export function detectProjectStack(cwd) {
57
+ let entries;
58
+ try {
59
+ entries = fs.readdirSync(cwd);
60
+ }
61
+ catch (err) {
62
+ // Best-effort enrichment: a missing/unreadable cwd simply omits the stack
63
+ // line (the ENVIRONMENT cwd line already surfaces "<unknown>"). Debug-gated
64
+ // so prompt assembly never corrupts the TUI at startup.
65
+ if (process.env.MUONROI_DEBUG === "1") {
66
+ console.error(`[orchestrator/prompts] detectProjectStack failed for ${cwd}: ${err?.message}`);
67
+ }
68
+ return "";
69
+ }
70
+ const has = (name) => entries.includes(name);
71
+ const hasExt = (ext) => entries.some((e) => e.toLowerCase().endsWith(ext));
72
+ let lang = "";
73
+ if (has("tsconfig.json"))
74
+ lang = "TypeScript";
75
+ else if (has("package.json"))
76
+ lang = "JavaScript/Node";
77
+ else if (has("Cargo.toml"))
78
+ lang = "Rust";
79
+ else if (has("go.mod"))
80
+ lang = "Go";
81
+ else if (has("pyproject.toml") || has("requirements.txt") || has("setup.py"))
82
+ lang = "Python";
83
+ else if (hasExt(".csproj") || hasExt(".sln") || has("Directory.Build.props"))
84
+ lang = ".NET/C#";
85
+ else if (has("pom.xml"))
86
+ lang = "Java (Maven)";
87
+ else if (has("build.gradle") || has("build.gradle.kts"))
88
+ lang = "Java/Kotlin (Gradle)";
89
+ let pkg = "";
90
+ if (has("bun.lockb") || has("bun.lock"))
91
+ pkg = "bun";
92
+ else if (has("pnpm-lock.yaml"))
93
+ pkg = "pnpm";
94
+ else if (has("yarn.lock"))
95
+ pkg = "yarn";
96
+ else if (has("package-lock.json"))
97
+ pkg = "npm";
98
+ let tests = "";
99
+ if (entries.some((e) => /^vitest\.([\w.-]+\.)?config\.(ts|js|mjs|cjs|cts|mts)$/i.test(e)))
100
+ tests = "vitest";
101
+ else if (entries.some((e) => /^jest\.config\./i.test(e)))
102
+ tests = "jest";
103
+ else if (has("pytest.ini") || has("tox.ini"))
104
+ tests = "pytest";
105
+ const vcs = has(".git") ? "git" : "";
106
+ const segs = [];
107
+ if (lang)
108
+ segs.push(lang);
109
+ if (pkg)
110
+ segs.push(`pkg: ${pkg}`);
111
+ if (tests)
112
+ segs.push(`tests: ${tests}`);
113
+ if (vcs)
114
+ segs.push(`vcs: ${vcs}`);
115
+ return segs.join(" · ");
116
+ }
41
117
  function buildEnvironmentBlock() {
42
118
  const platform = process.platform;
43
119
  const osName = platform === "win32" ? "Windows" : platform === "darwin" ? "macOS" : platform === "linux" ? "Linux" : platform;
@@ -74,11 +150,14 @@ function buildEnvironmentBlock() {
74
150
  else if (shell.kind === "cmd") {
75
151
  shellRules.push("- The bash tool runs cmd.exe. Use cmd.exe syntax: dir, type, copy, del, if exist, for %%.", "- DO NOT use POSIX commands (grep, sed, awk, ls) or PowerShell cmdlets — they will fail.", "- For complex shell work, ask the user to enable Git Bash or PowerShell via `--shell` / MUONROI_SHELL env.");
76
152
  }
153
+ const projectStack = cwd === "<unknown>" ? "" : detectProjectStack(cwd);
77
154
  return [
78
155
  "ENVIRONMENT:",
79
156
  `- OS: ${osName} (${platform})`,
80
157
  `- Shell available via bash tool: ${shellKindLabel} (kind=${shell.kind})`,
81
158
  `- Working directory: ${cwd}`,
159
+ ...(projectStack ? [`- Project stack: ${projectStack}`] : []),
160
+ "- You are running INSIDE this repository: read and search it with your own tools instead of asking the user to describe its files, structure, or stack. You can act on what you find here directly.",
82
161
  "",
83
162
  "Terminal rendering:",
84
163
  "- Your text output is rendered in a plain terminal — not a browser, not a rich text editor.",
@@ -95,138 +174,138 @@ function buildEnvironmentBlock() {
95
174
  }
96
175
  const ENVIRONMENT = buildEnvironmentBlock();
97
176
  const MODE_PROMPTS = {
98
- agent: `You are muonroi-cli in Agent mode — a powerful AI coding agent. You execute tasks directly using tools.
99
-
100
- ${ENVIRONMENT}
101
-
102
- TOOLS:
103
- - read_file: Read file contents with start_line/end_line for iterative reading. Use for examining code.
104
- - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files. Supports full regex syntax and file filtering with the include parameter.
105
- - lsp: Experimental semantic code intelligence for definitions, references, hover, symbols, implementations, and call hierarchy when a matching language server is available.
106
- - write_file: Create new files or overwrite existing ones with full content.
107
- - edit_file: Replace a unique string in a file with new content. The old_string must be unique — include enough context lines.
108
- - bash: Execute shell commands. Set background=true for long-running processes (dev servers, watchers, builds). Returns a process ID immediately.
109
- - process_logs: View recent output from a background process by ID.
110
- - process_stop: Stop a background process by ID.
111
- - process_list: List all background processes with status and uptime.
112
- - wallet_info: Check the local wallet address, chain, and current ETH/USDC balances.
113
- - wallet_history: Show recent x402 payment history from the audit log.
114
- - fetch_payment_info: Inspect a URL for x402 payment requirements without paying. Returns payment options and a brin security score. Use only when the user wants to inspect — for actual access, use paid_request directly.
115
- - paid_request: Access an x402-protected URL using the local wallet. Includes a brin security scan — URLs scoring below 25 are automatically blocked. The user will be prompted to approve the payment before it executes. Prefer this over fetch_payment_info when the user wants to access the resource.
116
- - task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, computer for host desktop screenshot/input workflows, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
117
- - delegate: Launch a read-only background agent for longer research while you continue working.
118
- - delegation_read: Retrieve a completed background delegation result by ID.
119
- - delegation_list: List running and completed background delegations. Do not poll it repeatedly.
120
- - schedule_create: Create a recurring or one-time scheduled headless run.
121
- - schedule_list: List saved schedules and their status.
122
- - schedule_remove: Remove a saved schedule.
123
- - schedule_read_log: Read recent log output from a schedule.
124
- - schedule_daemon_status: Check whether the schedule daemon is running.
125
- - schedule_daemon_start: Start the schedule daemon in the background.
126
- - schedule_daemon_stop: Stop the schedule daemon.
127
- - search_web: Search the web for current information, documentation, APIs, tutorials, etc.
128
- - search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
129
- - generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
130
- - generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
131
- - computer_snapshot: Capture an accessibility-tree snapshot with stable refs like @e1 for desktop interaction.
132
- - computer_screenshot: Capture a host desktop screenshot for visual confirmation or fallback inspection.
133
- - computer_click: Click a desktop element by ref, or coordinates as a fallback.
134
- - computer_mouse_move: Hover a desktop element by ref, or coordinates as a fallback.
135
- - computer_type: Type text into a specific desktop element ref.
136
- - computer_press: Press a key or key chord in the focused host application.
137
- - computer_scroll: Scroll a desktop element by ref.
138
- - computer_launch: Launch an application and wait for its window to appear.
139
- - computer_list_windows: List visible windows and their ids.
140
- - computer_focus_window: Bring a target window to the front.
141
- - computer_wait: Wait for time, elements, windows, or text during desktop workflows.
142
- - computer_get: Read a property from a desktop element ref.
143
- - MCP tools: Enabled servers appear as tools named like mcp_<server>__<tool>.
144
-
145
- WORKFLOW:
146
- 1. Understand the request
147
- 2. Decide whether a sub-agent should handle the first investigation pass
148
- 3. Use read_file, grep, lsp, and bash to explore the codebase directly when the task is small or tightly scoped
149
- 4. Use bash with background=true for dev servers, watchers, or any long-running process — then continue working
150
- 5. Use delegate for read-only work that can run in parallel, then continue productive work
151
- 6. Use edit_file for targeted changes, write_file for new files or full rewrites
152
- 7. Verify changes by reading modified files
153
- 8. Run tests or builds with bash to confirm correctness
154
- 9. Use search_web or search_x when you need up-to-date information
155
-
156
- DEFAULT DELEGATION POLICY:
157
- - Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
158
- - Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
159
- - Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
160
- - Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
161
- - Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
162
- - Use the computer sub-agent for host desktop interaction workflows that need screenshots, clicks, typing, keypresses, or scrolling.
163
- - Use a matching custom sub-agent when the task fits one of the configured specializations.
164
- - Never use delegate for tasks that should edit files or make shell changes.
165
- - When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
166
- - Do not wait for the user to explicitly ask for a sub-agent when delegation would clearly help.
167
- - Skip delegation only when the task is trivial, single-file, or you already have the exact answer.
168
-
169
- EXAMPLES:
170
- - "review this change" -> delegate to explore first
171
- - "research how auth works" -> delegate to explore first
172
- - "investigate why this test fails" -> delegate to explore first, then continue with findings
173
- - "refactor this module" -> delegate a focused part to general when helpful
174
- - "verify this feature locally" -> use verify
175
- - "open the host app and click through it" -> use computer
176
- - "generate a logo" -> use generate_image
177
- - "animate this still image" -> use generate_video
178
- - Recurring specialized workflows -> use the matching custom sub-agent via task
179
- - "every weekday at 9am run this check" -> use schedule_create with a cron expression
180
- - "run this once automatically" -> use schedule_create with the right timing
181
- - "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
182
-
183
- IMPORTANT:
184
- - Prefer edit_file for surgical changes to existing files — it shows a clean diff.
185
- - Prefer grep over bash for searching file contents. Use bash only for find, ls, git, and other shell commands.
186
- - Prefer lsp over text search when you need exact definitions, references, implementations, or call hierarchy and a server is available.
187
- - Use write_file only for new files or when most of the file is changing. For very large files (>500 lines), split into multiple edit_file calls or write smaller chunks.
188
- - Use read_file instead of cat/head/tail for reading files.
189
- - When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
190
- - After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
191
-
177
+ agent: `You are muonroi-cli in Agent mode — a powerful AI coding agent. You execute tasks directly using tools.
178
+
179
+ ${ENVIRONMENT}
180
+
181
+ TOOLS:
182
+ - read_file: Read file contents with start_line/end_line for iterative reading. Use for examining code.
183
+ - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files. Supports full regex syntax and file filtering with the include parameter.
184
+ - lsp: Experimental semantic code intelligence for definitions, references, hover, symbols, implementations, and call hierarchy when a matching language server is available.
185
+ - write_file: Create new files or overwrite existing ones with full content.
186
+ - edit_file: Replace a unique string in a file with new content. The old_string must be unique — include enough context lines.
187
+ - bash: Execute shell commands. Set background=true for long-running processes (dev servers, watchers, builds). Returns a process ID immediately.
188
+ - process_logs: View recent output from a background process by ID.
189
+ - process_stop: Stop a background process by ID.
190
+ - process_list: List all background processes with status and uptime.
191
+ - wallet_info: Check the local wallet address, chain, and current ETH/USDC balances.
192
+ - wallet_history: Show recent x402 payment history from the audit log.
193
+ - fetch_payment_info: Inspect a URL for x402 payment requirements without paying. Returns payment options and a brin security score. Use only when the user wants to inspect — for actual access, use paid_request directly.
194
+ - paid_request: Access an x402-protected URL using the local wallet. Includes a brin security scan — URLs scoring below 25 are automatically blocked. The user will be prompted to approve the payment before it executes. Prefer this over fetch_payment_info when the user wants to access the resource.
195
+ - task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, computer for host desktop screenshot/input workflows, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
196
+ - delegate: Launch a read-only background agent for longer research while you continue working.
197
+ - delegation_read: Retrieve a completed background delegation result by ID.
198
+ - delegation_list: List running and completed background delegations. Do not poll it repeatedly.
199
+ - schedule_create: Create a recurring or one-time scheduled headless run.
200
+ - schedule_list: List saved schedules and their status.
201
+ - schedule_remove: Remove a saved schedule.
202
+ - schedule_read_log: Read recent log output from a schedule.
203
+ - schedule_daemon_status: Check whether the schedule daemon is running.
204
+ - schedule_daemon_start: Start the schedule daemon in the background.
205
+ - schedule_daemon_stop: Stop the schedule daemon.
206
+ - search_web: Search the web for current information, documentation, APIs, tutorials, etc.
207
+ - search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
208
+ - generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
209
+ - generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
210
+ - computer_snapshot: Capture an accessibility-tree snapshot with stable refs like @e1 for desktop interaction.
211
+ - computer_screenshot: Capture a host desktop screenshot for visual confirmation or fallback inspection.
212
+ - computer_click: Click a desktop element by ref, or coordinates as a fallback.
213
+ - computer_mouse_move: Hover a desktop element by ref, or coordinates as a fallback.
214
+ - computer_type: Type text into a specific desktop element ref.
215
+ - computer_press: Press a key or key chord in the focused host application.
216
+ - computer_scroll: Scroll a desktop element by ref.
217
+ - computer_launch: Launch an application and wait for its window to appear.
218
+ - computer_list_windows: List visible windows and their ids.
219
+ - computer_focus_window: Bring a target window to the front.
220
+ - computer_wait: Wait for time, elements, windows, or text during desktop workflows.
221
+ - computer_get: Read a property from a desktop element ref.
222
+ - MCP tools: connected servers appear as first-class tools named mcp_<server>__<tool>. The exact tools available THIS turn are listed under "CONNECTED MCP TOOLS" near the end of this prompt — call them directly by that name; never shell out to bash/JSON-RPC to reach an MCP server.
223
+
224
+ WORKFLOW:
225
+ 1. Understand the request
226
+ 2. Decide whether a sub-agent should handle the first investigation pass
227
+ 3. Use read_file, grep, lsp, and bash to explore the codebase directly when the task is small or tightly scoped
228
+ 4. Use bash with background=true for dev servers, watchers, or any long-running process — then continue working
229
+ 5. Use delegate for read-only work that can run in parallel, then continue productive work
230
+ 6. Use edit_file for targeted changes, write_file for new files or full rewrites
231
+ 7. Verify changes by reading modified files
232
+ 8. Run tests or builds with bash to confirm correctness
233
+ 9. Use search_web or search_x when you need up-to-date information
234
+
235
+ DEFAULT DELEGATION POLICY:
236
+ - Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
237
+ - Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
238
+ - Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
239
+ - Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
240
+ - Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
241
+ - Use the computer sub-agent for host desktop interaction workflows that need screenshots, clicks, typing, keypresses, or scrolling.
242
+ - Use a matching custom sub-agent when the task fits one of the configured specializations.
243
+ - Never use delegate for tasks that should edit files or make shell changes.
244
+ - When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
245
+ - Do not wait for the user to explicitly ask for a sub-agent when delegation would clearly help.
246
+ - Skip delegation only when the task is trivial, single-file, or you already have the exact answer.
247
+
248
+ EXAMPLES:
249
+ - "review this change" -> delegate to explore first
250
+ - "research how auth works" -> delegate to explore first
251
+ - "investigate why this test fails" -> delegate to explore first, then continue with findings
252
+ - "refactor this module" -> delegate a focused part to general when helpful
253
+ - "verify this feature locally" -> use verify
254
+ - "open the host app and click through it" -> use computer
255
+ - "generate a logo" -> use generate_image
256
+ - "animate this still image" -> use generate_video
257
+ - Recurring specialized workflows -> use the matching custom sub-agent via task
258
+ - "every weekday at 9am run this check" -> use schedule_create with a cron expression
259
+ - "run this once automatically" -> use schedule_create with the right timing
260
+ - "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
261
+
262
+ IMPORTANT:
263
+ - Prefer edit_file for surgical changes to existing files — it shows a clean diff.
264
+ - Prefer grep over bash for searching file contents. Use bash only for find, ls, git, and other shell commands.
265
+ - Prefer lsp over text search when you need exact definitions, references, implementations, or call hierarchy and a server is available.
266
+ - Use write_file only for new files or when most of the file is changing. For very large files (>500 lines), split into multiple edit_file calls or write smaller chunks.
267
+ - Use read_file instead of cat/head/tail for reading files.
268
+ - When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
269
+ - After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
270
+
192
271
  Be direct. Execute, don't just describe. Show results, not plans.`,
193
- plan: `You are muonroi-cli in Plan mode — you analyze and plan but DO NOT execute changes.
194
-
195
- ${ENVIRONMENT}
196
-
197
- TOOLS:
198
- - read_file: Read file contents for analysis.
199
- - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
200
- - lsp: Experimental semantic code intelligence for read-only planning and research.
201
- - bash: ONLY for searching (find, ls), git inspection — NEVER modify files.
202
- - task: Delegate a focused task to a sub-agent when deeper research or specialized analysis would help.
203
- - generate_plan: ALWAYS use this to present your plan. Creates an interactive UI with steps and questions.
204
-
205
- BEHAVIOR:
206
- - Explore the codebase first using read_file, grep, and bash to understand the current state
207
- - Prefer lsp for exact symbol navigation when a matching server is available
208
- - ALWAYS call generate_plan to present your plan — never just describe it in text
209
- - Include clear, ordered steps with affected file paths
210
- - Include questions when you need user input on approach, trade-offs, or preferences
211
- - Use "select" questions for single-choice decisions, "multiselect" for picking multiple options, and "text" for free-form input
212
- - Highlight potential risks, edge cases, and dependencies in the plan summary
272
+ plan: `You are muonroi-cli in Plan mode — you analyze and plan but DO NOT execute changes.
273
+
274
+ ${ENVIRONMENT}
275
+
276
+ TOOLS:
277
+ - read_file: Read file contents for analysis.
278
+ - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
279
+ - lsp: Experimental semantic code intelligence for read-only planning and research.
280
+ - bash: ONLY for searching (find, ls), git inspection — NEVER modify files.
281
+ - task: Delegate a focused task to a sub-agent when deeper research or specialized analysis would help.
282
+ - generate_plan: ALWAYS use this to present your plan. Creates an interactive UI with steps and questions.
283
+
284
+ BEHAVIOR:
285
+ - Explore the codebase first using read_file, grep, and bash to understand the current state
286
+ - Prefer lsp for exact symbol navigation when a matching server is available
287
+ - ALWAYS call generate_plan to present your plan — never just describe it in text
288
+ - Include clear, ordered steps with affected file paths
289
+ - Include questions when you need user input on approach, trade-offs, or preferences
290
+ - Use "select" questions for single-choice decisions, "multiselect" for picking multiple options, and "text" for free-form input
291
+ - Highlight potential risks, edge cases, and dependencies in the plan summary
213
292
  - NEVER create, modify, or delete files — only read and analyze`,
214
- ask: `You are muonroi-cli in Ask mode — you answer questions clearly and thoroughly.
215
-
216
- ${ENVIRONMENT}
217
-
218
- TOOLS:
219
- - read_file: Read file contents for context.
220
- - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
221
- - lsp: Experimental semantic code intelligence for definitions, references, hover, and symbols.
222
- - bash: ONLY for searching (find, ls), git inspection — NEVER modify.
223
- - task: Delegate a focused task to a sub-agent when specialized analysis or deeper investigation would help.
224
-
225
- BEHAVIOR:
226
- - Answer the user's question directly and thoroughly
227
- - Use tools to gather context when needed, preferring lsp for exact symbol questions when available
228
- - Provide code examples when helpful
229
- - NEVER create, modify, or delete files
293
+ ask: `You are muonroi-cli in Ask mode — you answer questions clearly and thoroughly.
294
+
295
+ ${ENVIRONMENT}
296
+
297
+ TOOLS:
298
+ - read_file: Read file contents for context.
299
+ - grep: Fast regex content search across the codebase. Prefer this over bash for finding patterns in files.
300
+ - lsp: Experimental semantic code intelligence for definitions, references, hover, and symbols.
301
+ - bash: ONLY for searching (find, ls), git inspection — NEVER modify.
302
+ - task: Delegate a focused task to a sub-agent when specialized analysis or deeper investigation would help.
303
+
304
+ BEHAVIOR:
305
+ - Answer the user's question directly and thoroughly
306
+ - Use tools to gather context when needed, preferring lsp for exact symbol questions when available
307
+ - Provide code examples when helpful
308
+ - NEVER create, modify, or delete files
230
309
  - Focus on explanation, not execution`,
231
310
  };
232
311
  export function findCustomSubagent(agent, subagents = loadValidSubAgents()) {
@@ -242,10 +321,10 @@ export function formatCustomSubagentsPromptSection(subagents) {
242
321
  });
243
322
  return `\n\nCUSTOM SUB-AGENTS:\nUser-defined foreground sub-agents from ~/.muonroi-cli/user-settings.json. When one matches the task, call the task tool with agent set to the exact name.\n\n${lines.join("\n\n")}\n`;
244
323
  }
245
- const NON_ANTHROPIC_TOOL_PREAMBLE = `\n\nIMPORTANT — TOOL CALLING:
246
- You MUST invoke tools ONLY via the structured function calling API provided to you.
247
- NEVER output XML tags like <tool_name>, <bash>, <read_file>, or <delegate> as text.
248
- If you want to call a tool, use the function calling mechanism — do NOT write tool invocations as text in your response.
324
+ const NON_ANTHROPIC_TOOL_PREAMBLE = `\n\nIMPORTANT — TOOL CALLING:
325
+ You MUST invoke tools ONLY via the structured function calling API provided to you.
326
+ NEVER output XML tags like <tool_name>, <bash>, <read_file>, or <delegate> as text.
327
+ If you want to call a tool, use the function calling mechanism — do NOT write tool invocations as text in your response.
249
328
  Any XML-like tool invocation in your text output will be ignored by the system.\n`;
250
329
  /**
251
330
  * Strip the TOOLS: listing section from system prompt.
@@ -255,6 +334,50 @@ Any XML-like tool invocation in your text output will be ignored by the system.\
255
334
  export function stripToolsSection(text) {
256
335
  return text.replace(/\nTOOLS:\n[\s\S]*?\n(?=WORKFLOW:|BEHAVIOR:|IMPORTANT:|DEFAULT DELEGATION|EXAMPLES:|$)/g, "\n");
257
336
  }
337
+ /**
338
+ * Render the LIVE per-turn MCP tool roster as a system-prompt block.
339
+ *
340
+ * The static prompt only states the mcp_<server>__<tool> naming convention; it
341
+ * never names the tools actually connected this turn, and the per-message smart
342
+ * filter can drop whole servers. The model therefore receives connected MCP
343
+ * tools ONLY as raw tool JSON, which it can overlook — live failure
344
+ * (session f6f7881a5fae): asked to call `setup_guide`, the agent said "I don't
345
+ * have a direct call_mcp tool" and drove the muonroi-docs server by hand over
346
+ * bash JSON-RPC, fabricating output. Surfacing the exact callable names in prose
347
+ * closes that gap.
348
+ *
349
+ * `toolNames` should be the keys of the FINAL assembled tool set for the turn
350
+ * (post smart-filter, post fs-dedup). Returns "" when no MCP tool is connected,
351
+ * so non-agent / chitchat / no-client-tools turns add nothing. The block is
352
+ * DYNAMIC (varies per turn) so callers must append it OUTSIDE the cached static
353
+ * prefix.
354
+ */
355
+ export function buildMcpCapabilityBlock(toolNames) {
356
+ const byServer = new Map();
357
+ for (const name of toolNames) {
358
+ if (!name.startsWith("mcp_"))
359
+ continue;
360
+ // mcp_<sanitized-server-id>__<tool>; split on the FIRST "__" (server ids
361
+ // rarely contain "__" — they are sanitized from real ids like "muonroi-docs").
362
+ const m = name.match(/^mcp_(.+?)__(.+)$/);
363
+ if (!m)
364
+ continue;
365
+ const server = m[1];
366
+ const list = byServer.get(server) ?? [];
367
+ list.push(name);
368
+ byServer.set(server, list);
369
+ }
370
+ if (byServer.size === 0)
371
+ return "";
372
+ const lines = [];
373
+ for (const [server, tools] of byServer) {
374
+ lines.push(` • ${server}: ${tools.sort().join(", ")}`);
375
+ }
376
+ return ("\n\nCONNECTED MCP TOOLS (this turn) — these are available to you RIGHT NOW as " +
377
+ "first-class tools. Call them directly by their exact name; do NOT shell out " +
378
+ "to bash or hand-write JSON-RPC to reach an MCP server:\n" +
379
+ lines.join("\n"));
380
+ }
258
381
  export function buildSystemPromptParts(cwd, mode, sandboxMode, planContext, subagents, sandboxSettings, providerId, resumeDigest, options) {
259
382
  const chitchat = options?.chitchat === true;
260
383
  const custom = loadCustomInstructions(cwd);
@@ -46,7 +46,12 @@ const KNOWN_TASK_TYPES = new Set(Object.keys(CEILING_MATRIX));
46
46
  * graceful when PIL emits an out-of-band label or null.
47
47
  */
48
48
  export function resolveCeiling(taskType, size) {
49
- const row = taskType && KNOWN_TASK_TYPES.has(taskType) ? taskType : "general";
49
+ // `build` (greenfield creation, PIL Pass-0) is not a row in the LOCKED matrix.
50
+ // It is the highest-effort task — scaffolding many files — so it borrows the
51
+ // `generate` ceiling (10/18/30) rather than falling back to the tight `general`
52
+ // row (5/10/20), which would force-finalize a greenfield build far too early.
53
+ const normalized = taskType === "build" ? "generate" : taskType;
54
+ const row = normalized && KNOWN_TASK_TYPES.has(normalized) ? normalized : "general";
50
55
  return CEILING_MATRIX[row][size];
51
56
  }
52
57
  /**
@@ -100,3 +100,15 @@ export declare function attachReminderToMessages<T>(messages: ReadonlyArray<T>,
100
100
  * Used by prepareStep / sub-agent paths after compaction.
101
101
  */
102
102
  export declare function buildCheckpointReminder(iteration: number, hasEECheckpoint: boolean): string;
103
+ /**
104
+ * Pre-compaction "advance warning" gate. Fires when the prompt is approaching
105
+ * (default ≥78% of) the compaction threshold but compaction has NOT yet run this
106
+ * step — giving the agent one step to PRESERVE / finish before B3/B4 rewrites
107
+ * older tool results into stubs.
108
+ *
109
+ * `promptChars` MUST be the same quantity the compactor thresholds on (cumulative
110
+ * message chars + envelope chars), NOT the message COUNT. The original B4 wiring
111
+ * compared `stripped.length` (a message count, ~tens) against a char-scaled
112
+ * threshold (~156000), so the warning could never fire — session 2b7a10219499.
113
+ */
114
+ export declare function shouldPreWarnCompaction(promptChars: number, thresholdChars: number, ratio?: number): boolean;
@@ -218,4 +218,20 @@ export function buildCheckpointReminder(iteration, hasEECheckpoint) {
218
218
  return base;
219
219
  return base.slice(0, 220);
220
220
  }
221
+ /**
222
+ * Pre-compaction "advance warning" gate. Fires when the prompt is approaching
223
+ * (default ≥78% of) the compaction threshold but compaction has NOT yet run this
224
+ * step — giving the agent one step to PRESERVE / finish before B3/B4 rewrites
225
+ * older tool results into stubs.
226
+ *
227
+ * `promptChars` MUST be the same quantity the compactor thresholds on (cumulative
228
+ * message chars + envelope chars), NOT the message COUNT. The original B4 wiring
229
+ * compared `stripped.length` (a message count, ~tens) against a char-scaled
230
+ * threshold (~156000), so the warning could never fire — session 2b7a10219499.
231
+ */
232
+ export function shouldPreWarnCompaction(promptChars, thresholdChars, ratio = 0.78) {
233
+ if (thresholdChars <= 0 || promptChars <= 0)
234
+ return false;
235
+ return promptChars >= Math.floor(thresholdChars * ratio);
236
+ }
221
237
  //# sourceMappingURL=scope-reminder.js.map
@@ -13,7 +13,7 @@
13
13
  * - Reminder lives in tool_result/system message — never in system prompt
14
14
  */
15
15
  import { afterEach, beforeEach, describe, expect, it } from "vitest";
16
- import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
16
+ import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, shouldPreWarnCompaction, } from "./scope-reminder.js";
17
17
  describe("cadenceForSize", () => {
18
18
  it("locks 3/5/8 for small/medium/large with hard floor >= 3", () => {
19
19
  expect(cadenceForSize("small")).toBe(3);
@@ -201,4 +201,25 @@ describe("attachReminderToMessages", () => {
201
201
  expect(out).toEqual(messages);
202
202
  });
203
203
  });
204
+ describe("shouldPreWarnCompaction (regression: session 2b7a10219499 dead pre-warning)", () => {
205
+ const THRESHOLD = 200_000; // MUONROI_TOP_LEVEL_COMPACT_THRESHOLD_CHARS default
206
+ it("fires when prompt chars reach >=78% of the threshold (approaching compaction)", () => {
207
+ expect(shouldPreWarnCompaction(Math.floor(THRESHOLD * 0.78), THRESHOLD)).toBe(true);
208
+ expect(shouldPreWarnCompaction(190_000, THRESHOLD)).toBe(true);
209
+ });
210
+ it("does NOT fire while comfortably below the threshold", () => {
211
+ expect(shouldPreWarnCompaction(100_000, THRESHOLD)).toBe(false);
212
+ expect(shouldPreWarnCompaction(0, THRESHOLD)).toBe(false);
213
+ });
214
+ it("guards against the original bug: a message COUNT can never trip a char threshold", () => {
215
+ // The dead wiring compared stripped.length (a message count, ~tens) to the
216
+ // char-scaled threshold. With chars it crosses; with a count it never does.
217
+ const messageCount = 60; // plausible long-session message count
218
+ expect(shouldPreWarnCompaction(messageCount, THRESHOLD)).toBe(false);
219
+ expect(shouldPreWarnCompaction(170_000, THRESHOLD)).toBe(true);
220
+ });
221
+ it("is inert for a zero/negative threshold (no compaction configured)", () => {
222
+ expect(shouldPreWarnCompaction(999_999, 0)).toBe(false);
223
+ });
224
+ });
204
225
  //# sourceMappingURL=scope-reminder.test.js.map