muonroi-cli 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/auth.d.ts +9 -0
  26. package/dist/src/ee/auth.js +19 -0
  27. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  28. package/dist/src/ee/ee-onboarding.js +76 -0
  29. package/dist/src/generated/version.d.ts +1 -1
  30. package/dist/src/generated/version.js +1 -1
  31. package/dist/src/headless/output.js +6 -4
  32. package/dist/src/headless/output.test.js +4 -3
  33. package/dist/src/index.js +20 -1
  34. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  35. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  36. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  37. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  38. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  39. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  40. package/dist/src/mcp/auto-setup.js +56 -2
  41. package/dist/src/mcp/client-pool.d.ts +46 -0
  42. package/dist/src/mcp/client-pool.js +212 -0
  43. package/dist/src/mcp/oauth-callback.js +2 -2
  44. package/dist/src/mcp/parse-headers.test.js +14 -14
  45. package/dist/src/mcp/runtime.d.ts +28 -0
  46. package/dist/src/mcp/runtime.js +117 -51
  47. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  48. package/dist/src/mcp/self-verify-runner.js +38 -0
  49. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  50. package/dist/src/mcp/setup-guide-text.js +84 -0
  51. package/dist/src/mcp/smart-filter.js +49 -0
  52. package/dist/src/mcp/smoke.test.js +43 -43
  53. package/dist/src/mcp/tools-server.d.ts +7 -0
  54. package/dist/src/mcp/tools-server.js +19 -22
  55. package/dist/src/models/catalog.json +349 -349
  56. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  57. package/dist/src/ops/doctor.d.ts +3 -2
  58. package/dist/src/ops/doctor.js +47 -11
  59. package/dist/src/ops/doctor.test.js +4 -3
  60. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  61. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  62. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  63. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  64. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  65. package/dist/src/orchestrator/message-processor.js +57 -27
  66. package/dist/src/orchestrator/orchestrator.js +26 -0
  67. package/dist/src/orchestrator/prompts.d.ts +51 -0
  68. package/dist/src/orchestrator/prompts.js +257 -134
  69. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  70. package/dist/src/orchestrator/stream-runner.js +20 -15
  71. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  72. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  73. package/dist/src/pil/__tests__/config.test.js +1 -17
  74. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  75. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  76. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  77. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  78. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  79. package/dist/src/pil/__tests__/layer6-output.test.js +137 -18
  80. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  81. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  82. package/dist/src/pil/agent-operating-contract.js +2 -0
  83. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  84. package/dist/src/pil/cheap-model-playbook.js +35 -35
  85. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  86. package/dist/src/pil/clarity-gate.d.ts +21 -19
  87. package/dist/src/pil/clarity-gate.js +26 -153
  88. package/dist/src/pil/config.d.ts +9 -1
  89. package/dist/src/pil/config.js +15 -4
  90. package/dist/src/pil/discovery.js +211 -136
  91. package/dist/src/pil/layer1-intent.d.ts +12 -0
  92. package/dist/src/pil/layer1-intent.js +283 -38
  93. package/dist/src/pil/layer1-intent.test.js +210 -4
  94. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  95. package/dist/src/pil/layer16-clarity.js +19 -306
  96. package/dist/src/pil/layer4-gsd.js +18 -6
  97. package/dist/src/pil/layer6-output.d.ts +2 -0
  98. package/dist/src/pil/layer6-output.js +137 -22
  99. package/dist/src/pil/llm-classify.d.ts +26 -0
  100. package/dist/src/pil/llm-classify.js +34 -5
  101. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  102. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  103. package/dist/src/pil/schema.d.ts +8 -0
  104. package/dist/src/pil/schema.js +12 -1
  105. package/dist/src/pil/task-tier-map.js +4 -0
  106. package/dist/src/pil/types.d.ts +11 -1
  107. package/dist/src/product-loop/done-gate.js +3 -3
  108. package/dist/src/product-loop/loop-driver.js +18 -18
  109. package/dist/src/product-loop/progress-snapshot.js +4 -4
  110. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  111. package/dist/src/providers/auth/grok-oauth.js +6 -15
  112. package/dist/src/providers/auth/openai-oauth.js +6 -15
  113. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  114. package/dist/src/reporter/index.js +1 -1
  115. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  116. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  117. package/dist/src/scaffold/continuation-prompt.js +60 -60
  118. package/dist/src/scaffold/init-new.js +453 -453
  119. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  120. package/dist/src/self-qa/agentic-loop.js +24 -19
  121. package/dist/src/self-qa/spec-emitter.js +26 -23
  122. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  123. package/dist/src/storage/interaction-log.js +5 -5
  124. package/dist/src/storage/migrations.js +122 -122
  125. package/dist/src/storage/sessions.js +42 -42
  126. package/dist/src/storage/transcript.js +91 -84
  127. package/dist/src/storage/usage.js +14 -14
  128. package/dist/src/storage/workspaces.js +12 -12
  129. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  130. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  131. package/dist/src/tools/git-safety.d.ts +61 -0
  132. package/dist/src/tools/git-safety.js +141 -0
  133. package/dist/src/tools/git-safety.test.d.ts +1 -0
  134. package/dist/src/tools/git-safety.test.js +111 -0
  135. package/dist/src/tools/native-tools.d.ts +31 -0
  136. package/dist/src/tools/native-tools.js +273 -0
  137. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  138. package/dist/src/tools/registry-git-safety.test.js +92 -0
  139. package/dist/src/tools/registry.js +39 -4
  140. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  141. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  142. package/dist/src/ui/app.js +0 -0
  143. package/dist/src/ui/components/message-view.js +4 -1
  144. package/dist/src/ui/components/structured-response-view.js +7 -3
  145. package/dist/src/ui/components/tool-group.js +7 -1
  146. package/dist/src/ui/markdown-render.d.ts +41 -0
  147. package/dist/src/ui/markdown-render.js +223 -0
  148. package/dist/src/ui/markdown.d.ts +10 -0
  149. package/dist/src/ui/markdown.js +12 -35
  150. package/dist/src/ui/slash/council-inspect.js +4 -4
  151. package/dist/src/ui/slash/export.js +4 -4
  152. package/dist/src/ui/utils/text.d.ts +8 -0
  153. package/dist/src/ui/utils/text.js +16 -0
  154. package/dist/src/ui/utils/text.test.d.ts +1 -0
  155. package/dist/src/ui/utils/text.test.js +23 -0
  156. package/dist/src/usage/ledger.js +48 -15
  157. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  158. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  159. package/dist/src/utils/clipboard-image.js +23 -23
  160. package/dist/src/utils/open-url.d.ts +56 -0
  161. package/dist/src/utils/open-url.js +58 -0
  162. package/dist/src/utils/open-url.test.d.ts +1 -0
  163. package/dist/src/utils/open-url.test.js +86 -0
  164. package/dist/src/utils/settings.d.ts +12 -0
  165. package/dist/src/utils/settings.js +48 -0
  166. package/dist/src/utils/side-question.js +2 -2
  167. package/dist/src/utils/skills.js +3 -3
  168. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  169. package/dist/src/verify/environment.js +2 -1
  170. package/package.json +1 -1
  171. package/dist/src/pil/layer16-clarity.test.js +0 -31
  172. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -84,6 +84,27 @@ describe("doctor EE health checks (CQ-16c/16d)", () => {
84
84
  expect(eeHealth?.status).toBe("warn");
85
85
  expect(eeHealth?.detail).toContain("72.61.127.154");
86
86
  });
87
+ it("ee.health does NOT report unreachable when server is up but gates degraded (VERIFY F9)", async () => {
88
+ // Live ee_query works (server reachable) yet the gates sub-check fails —
89
+ // doctor must not call this "unreachable" (false negative). server.ok is
90
+ // the reachability signal, not result.ok.
91
+ healthDetailedMock.mockResolvedValue({
92
+ ok: false,
93
+ status: 200,
94
+ mode: "thin-client",
95
+ circuit: "closed",
96
+ components: {
97
+ server: { ok: true, status: 200 },
98
+ gates: { ok: false, status: 0 },
99
+ },
100
+ });
101
+ const results = await runDoctor();
102
+ const eeHealth = results.find((r) => r.name === "ee.health");
103
+ expect(eeHealth?.status).toBe("warn");
104
+ expect(eeHealth?.detail).not.toContain("unreachable");
105
+ expect(eeHealth?.detail).toContain("server=ok");
106
+ expect(eeHealth?.detail.toLowerCase()).toContain("gates");
107
+ });
87
108
  it("ee.health warns gracefully when healthDetailed throws", async () => {
88
109
  healthDetailedMock.mockRejectedValue(new Error("network timeout"));
89
110
  const results = await runDoctor();
@@ -2,9 +2,10 @@
2
2
  * src/ops/doctor.ts
3
3
  *
4
4
  * Health check runner for muonroi-cli doctor command.
5
- * Runs 7 named checks and returns pass/warn/fail results.
5
+ * Runs 10 named checks and returns pass/warn/fail results.
6
6
  *
7
- * Checks: bun_version, os, key_presence, ollama, ee, qdrant, error_rate
7
+ * Checks: bun_version, os, key_presence, ollama, dotnet, ee.health, ee.brain,
8
+ * qdrant, error_rate, council.mcp
8
9
  * Never throws — all checks handle errors gracefully (warn, not crash).
9
10
  */
10
11
  export interface CheckResult {
@@ -2,11 +2,13 @@
2
2
  * src/ops/doctor.ts
3
3
  *
4
4
  * Health check runner for muonroi-cli doctor command.
5
- * Runs 7 named checks and returns pass/warn/fail results.
5
+ * Runs 10 named checks and returns pass/warn/fail results.
6
6
  *
7
- * Checks: bun_version, os, key_presence, ollama, ee, qdrant, error_rate
7
+ * Checks: bun_version, os, key_presence, ollama, dotnet, ee.health, ee.brain,
8
+ * qdrant, error_rate, council.mcp
8
9
  * Never throws — all checks handle errors gracefully (warn, not crash).
9
10
  */
11
+ import { spawnSync } from "node:child_process";
10
12
  import { readFile } from "fs/promises";
11
13
  import os from "os";
12
14
  import path from "path";
@@ -116,7 +118,6 @@ async function checkEEDetailed() {
116
118
  const result = await healthDetailed();
117
119
  const serverOk = result.components.server.ok;
118
120
  const gatesOk = result.components.gates?.ok ?? true; // null if local mode
119
- const isHealthy = result.ok;
120
121
  const parts = [
121
122
  `mode=${result.mode}`,
122
123
  `circuit=${result.circuit}`,
@@ -125,7 +126,11 @@ async function checkEEDetailed() {
125
126
  if (result.components.gates !== null) {
126
127
  parts.push(`gates=${gatesOk ? "ok" : `fail(${result.components.gates.status})`}`);
127
128
  }
128
- if (!isHealthy) {
129
+ // Reachability is the SERVER component, not result.ok. A failing gates
130
+ // sub-check (e.g. read-token scope in thin-client mode) does NOT mean the
131
+ // EE server is unreachable — labelling it "unreachable" is a false negative
132
+ // that contradicts a live ee_query working. See VERIFY F9.
133
+ if (!serverOk) {
129
134
  const hint = result.mode === "thin-client"
130
135
  ? "Hint: check VPS 72.61.127.154:8082 is reachable; verify ~/.experience/config.json serverBaseUrl + serverReadAuthToken"
131
136
  : "Hint: start EE locally or configure thin-client in ~/.experience/config.json";
@@ -135,6 +140,13 @@ async function checkEEDetailed() {
135
140
  detail: `EE unreachable — ${parts.join(", ")}. ${hint}`,
136
141
  };
137
142
  }
143
+ if (!gatesOk) {
144
+ return {
145
+ name: "ee.health",
146
+ status: "warn",
147
+ detail: `EE reachable; gates check degraded — ${parts.join(", ")}. Hint: gates needs serverReadAuthToken scope in ~/.experience/config.json`,
148
+ };
149
+ }
138
150
  return {
139
151
  name: "ee.health",
140
152
  status: "pass",
@@ -157,9 +169,9 @@ async function checkBrainEmptiness() {
157
169
  // Count ee_injection events with event_subtype='no_match' in last 30 days
158
170
  const cutoff = new Date(Date.now() - 30 * 86_400_000).toISOString();
159
171
  const row = db
160
- .prepare(`SELECT COUNT(*) as cnt FROM interaction_logs
161
- WHERE event_type = 'ee_injection'
162
- AND event_subtype = 'no_match'
172
+ .prepare(`SELECT COUNT(*) as cnt FROM interaction_logs
173
+ WHERE event_type = 'ee_injection'
174
+ AND event_subtype = 'no_match'
163
175
  AND created_at >= ?`)
164
176
  .get(cutoff);
165
177
  const noMatchCount = row?.cnt ?? 0;
@@ -193,6 +205,29 @@ async function checkBrainEmptiness() {
193
205
  return { name: "ee.brain", status: "pass", detail: "brain check skipped (DB unavailable)" };
194
206
  }
195
207
  }
208
+ async function checkDotnet() {
209
+ // BB-aware scaffolding (muonroi-building-block) needs the .NET SDK for its
210
+ // restore/build/modular-boundaries quality gate. Doctor previously had no
211
+ // dotnet probe, so BB tasks had no preflight. See VERIFY F1.
212
+ try {
213
+ const res = spawnSync("dotnet", ["--version"], { encoding: "utf8", timeout: 5000 });
214
+ if (res.status === 0 && typeof res.stdout === "string" && res.stdout.trim().length > 0) {
215
+ return { name: "dotnet", status: "pass", detail: `dotnet ${res.stdout.trim()} — BB/.NET scaffold ready` };
216
+ }
217
+ return {
218
+ name: "dotnet",
219
+ status: "warn",
220
+ detail: "dotnet not found (optional — needed for muonroi-building-block scaffolding + quality gate)",
221
+ };
222
+ }
223
+ catch (err) {
224
+ return {
225
+ name: "dotnet",
226
+ status: "warn",
227
+ detail: `dotnet probe failed: ${err.message} (optional — needed for BB scaffolding)`,
228
+ };
229
+ }
230
+ }
196
231
  async function checkQdrant() {
197
232
  try {
198
233
  const qdrantUrl = process.env.QDRANT_URL ?? "http://localhost:6333";
@@ -258,10 +293,10 @@ async function checkCouncilMcpNudge() {
258
293
  // 2. Query DB for [Council Memory] records with URL or research topics
259
294
  const db = getDatabase();
260
295
  const rows = db
261
- .prepare(`SELECT message_json FROM messages
262
- WHERE role = 'system'
263
- AND message_json LIKE '%[Council Memory]%'
264
- ORDER BY created_at DESC
296
+ .prepare(`SELECT message_json FROM messages
297
+ WHERE role = 'system'
298
+ AND message_json LIKE '%[Council Memory]%'
299
+ ORDER BY created_at DESC
265
300
  LIMIT 50`)
266
301
  .all();
267
302
  let qualifyingCount = 0;
@@ -323,6 +358,7 @@ export async function runDoctor() {
323
358
  checkOS(),
324
359
  checkKeyPresence(),
325
360
  checkOllamaHealth(),
361
+ checkDotnet(), // NEW — VERIFY F1: BB/.NET scaffold preflight
326
362
  checkEEDetailed(), // replaces checkEE() — CQ-16c
327
363
  checkBrainEmptiness(), // NEW — CQ-16d
328
364
  checkQdrant(),
@@ -1,7 +1,7 @@
1
1
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
2
  // RED phase: import module under test (will fail until doctor.ts is created)
3
3
  import { formatDoctorReport, runDoctor } from "./doctor.js";
4
- describe("doctor — runDoctor returns 9 checks", () => {
4
+ describe("doctor — runDoctor returns 10 checks", () => {
5
5
  beforeEach(() => {
6
6
  // Mock fetch to avoid real network calls in tests
7
7
  vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: false, status: 503 }));
@@ -10,9 +10,9 @@ describe("doctor — runDoctor returns 9 checks", () => {
10
10
  vi.unstubAllGlobals();
11
11
  vi.restoreAllMocks();
12
12
  });
13
- it("returns exactly 9 CheckResult entries (council_mcp_nudge added in CQ-23)", async () => {
13
+ it("returns exactly 10 CheckResult entries (dotnet added in VERIFY F1)", async () => {
14
14
  const results = await runDoctor();
15
- expect(results).toHaveLength(9);
15
+ expect(results).toHaveLength(10);
16
16
  });
17
17
  it("each CheckResult has valid name, status, and detail fields", async () => {
18
18
  const results = await runDoctor();
@@ -32,6 +32,7 @@ describe("doctor — runDoctor returns 9 checks", () => {
32
32
  expect(names).toContain("os");
33
33
  expect(names).toContain("key_presence");
34
34
  expect(names).toContain("ollama");
35
+ expect(names).toContain("dotnet");
35
36
  expect(names).toContain("ee.health");
36
37
  expect(names).toContain("ee.brain");
37
38
  expect(names).toContain("qdrant");
@@ -0,0 +1,39 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { buildMcpCapabilityBlock } from "../prompts.js";
3
+ describe("buildMcpCapabilityBlock", () => {
4
+ it("returns '' when no MCP tools are connected (non-agent / chitchat / no-client-tools turns add nothing)", () => {
5
+ expect(buildMcpCapabilityBlock([])).toBe("");
6
+ expect(buildMcpCapabilityBlock(["read_file", "grep", "bash", "edit_file"])).toBe("");
7
+ });
8
+ it("names the exact callable mcp_<server>__<tool> tools connected this turn (regression: session f6f7881a5fae)", () => {
9
+ const block = buildMcpCapabilityBlock([
10
+ "read_file",
11
+ "bash",
12
+ "mcp_muonroi-docs__setup_guide",
13
+ "mcp_muonroi-docs__docs_search",
14
+ ]);
15
+ // The failure was the agent not knowing it could call setup_guide directly.
16
+ expect(block).toContain("mcp_muonroi-docs__setup_guide");
17
+ expect(block).toContain("mcp_muonroi-docs__docs_search");
18
+ expect(block).toMatch(/CONNECTED MCP TOOLS/);
19
+ // Steers away from the bash-JSON-RPC fallback the agent actually did.
20
+ expect(block).toMatch(/do NOT shell out to bash/i);
21
+ });
22
+ it("groups tools by server (id with a hyphen split on the first '__')", () => {
23
+ const block = buildMcpCapabilityBlock([
24
+ "mcp_muonroi-docs__setup_guide",
25
+ "mcp_context7__query_docs",
26
+ "mcp_muonroi-docs__docs_search",
27
+ ]);
28
+ // muonroi-docs appears once as a group header with both its tools.
29
+ expect(block.match(/muonroi-docs:/g)?.length).toBe(1);
30
+ expect(block).toMatch(/context7:/);
31
+ });
32
+ it("ignores non-mcp tool names and is deterministic (tools sorted within a server)", () => {
33
+ const block = buildMcpCapabilityBlock(["mcp_srv__b_tool", "write_file", "mcp_srv__a_tool"]);
34
+ expect(block).not.toContain("write_file");
35
+ // a_tool sorts before b_tool → stable output regardless of input order.
36
+ expect(block.indexOf("mcp_srv__a_tool")).toBeLessThan(block.indexOf("mcp_srv__b_tool"));
37
+ });
38
+ });
39
+ //# sourceMappingURL=mcp-capability-block.test.js.map
@@ -0,0 +1,65 @@
1
+ import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { describe, expect, it } from "vitest";
5
+ import { detectProjectStack } from "../prompts.js";
6
+ // detectProjectStack feeds the ENVIRONMENT block so every model — in any mode,
7
+ // on any provider — knows the concrete stack of the repo it is running inside,
8
+ // instead of assuming Python / asking the user to describe the project
9
+ // (2026-06-14 dogfood: "model native doesn't know what it can do in the CLI").
10
+ describe("detectProjectStack", () => {
11
+ const mkTemp = (slug) => mkdtempSync(join(tmpdir(), `mr-stack-${slug}-`));
12
+ it("detects the current repo as a JS/TS project under git", () => {
13
+ const out = detectProjectStack(process.cwd());
14
+ expect(out).toMatch(/TypeScript|JavaScript/);
15
+ expect(out).toMatch(/vcs: git/);
16
+ });
17
+ it("returns empty string for a bare directory (greenfield)", () => {
18
+ const dir = mkTemp("empty");
19
+ try {
20
+ expect(detectProjectStack(dir)).toBe("");
21
+ }
22
+ finally {
23
+ rmSync(dir, { recursive: true, force: true });
24
+ }
25
+ });
26
+ it("detects a Rust project from Cargo.toml", () => {
27
+ const dir = mkTemp("rust");
28
+ try {
29
+ writeFileSync(join(dir, "Cargo.toml"), "[package]\nname = 'x'\n");
30
+ expect(detectProjectStack(dir)).toMatch(/^Rust/);
31
+ }
32
+ finally {
33
+ rmSync(dir, { recursive: true, force: true });
34
+ }
35
+ });
36
+ it("detects a .NET project from a .csproj file", () => {
37
+ const dir = mkTemp("net");
38
+ try {
39
+ writeFileSync(join(dir, "App.csproj"), "<Project/>");
40
+ expect(detectProjectStack(dir)).toMatch(/\.NET\/C#/);
41
+ }
42
+ finally {
43
+ rmSync(dir, { recursive: true, force: true });
44
+ }
45
+ });
46
+ it("reports package manager + test runner for a bun/vitest TS project", () => {
47
+ const dir = mkTemp("ts");
48
+ try {
49
+ writeFileSync(join(dir, "tsconfig.json"), "{}");
50
+ writeFileSync(join(dir, "bun.lock"), "");
51
+ writeFileSync(join(dir, "vitest.config.ts"), "export default {}");
52
+ const out = detectProjectStack(dir);
53
+ expect(out).toMatch(/TypeScript/);
54
+ expect(out).toMatch(/pkg: bun/);
55
+ expect(out).toMatch(/tests: vitest/);
56
+ }
57
+ finally {
58
+ rmSync(dir, { recursive: true, force: true });
59
+ }
60
+ });
61
+ it("returns empty (no throw) for a missing directory", () => {
62
+ expect(detectProjectStack(join(tmpdir(), "definitely-missing-dir-9f8a7b6c"))).toBe("");
63
+ });
64
+ });
65
+ //# sourceMappingURL=project-stack.test.js.map
@@ -25,9 +25,10 @@
25
25
  // `recordUsage`, `appendCompletedTurn`, `discardAbortedTurn`,
26
26
  // `getCompactedThisTurn` / `setCompactedThisTurn`, etc.) so a future
27
27
  // `TurnRunnerDepsBase` hoist is mechanical.
28
- import { buildMcpToolSet } from "../mcp/runtime.js";
28
+ import { acquireMcpTools } from "../mcp/client-pool.js";
29
29
  import { getProviderCapabilities } from "../providers/capabilities.js";
30
30
  import { requireRuntimeProvider } from "../providers/runtime.js";
31
+ import { openUrl } from "../utils/open-url.js";
31
32
  import { loadMcpServers } from "../utils/settings.js";
32
33
  import { accumulateUsage, buildAssistantBatchMessage, buildBatchChatCompletionRequest, buildBatchName, buildToolBatchMessage, getBatchFinishReason, getBatchUsage, hasUsage, toLocalToolCall, } from "./batch-utils.js";
33
34
  import { relaxCompactionSettings } from "./compaction.js";
@@ -104,17 +105,12 @@ export class BatchTurnRunner {
104
105
  });
105
106
  let tools = !batchCaps.supportsClientTools(runtime.modelInfo) ? {} : baseTools;
106
107
  if (deps.mode === "agent" && batchCaps.supportsClientTools(runtime.modelInfo)) {
107
- const mcpBundle = await buildMcpToolSet(loadMcpServers(), {
108
+ const mcpBundle = await acquireMcpTools(loadMcpServers(), {
108
109
  onOAuthRequired: (_serverId, url) => {
109
- const urlStr = url.toString();
110
- import("child_process").then(({ exec }) => {
111
- const cmd = process.platform === "win32"
112
- ? `start "" "${urlStr}"`
113
- : process.platform === "darwin"
114
- ? `open "${urlStr}"`
115
- : `xdg-open "${urlStr}"`;
116
- exec(cmd);
117
- });
110
+ // Server-supplied URL is untrusted — openUrl validates the scheme
111
+ // and spawns via execFile (no shell), closing the command-injection
112
+ // vector the old exec() opener had.
113
+ openUrl(url);
118
114
  },
119
115
  });
120
116
  closeMcp = mcpBundle.close;
@@ -59,7 +59,7 @@ import * as phaseTracker from "../ee/phase-tracker.js";
59
59
  import { buildScope as buildScopeForVeto } from "../ee/scope.js";
60
60
  import { fireTrajectoryEvent } from "../ee/session-trajectory.js";
61
61
  import { getTenantId as getTenantIdForVeto } from "../ee/tenant.js";
62
- import { buildMcpToolSet } from "../mcp/runtime.js";
62
+ import { acquireMcpTools } from "../mcp/client-pool.js";
63
63
  import { dropRedundantFsMcpTools, filterMcpServersByMessage } from "../mcp/smart-filter.js";
64
64
  import { getModelInfo } from "../models/registry.js";
65
65
  import { cheapModelShellLine, injectCheapModelPlaybook, injectCheapModelShellDirective, shouldInjectCheapModelPlaybook, } from "../pil/cheap-model-playbook.js";
@@ -83,6 +83,7 @@ import { visionToolsNeeded } from "../tools/vision-gate.js";
83
83
  import { isDebugEnabled, recordTurnTrace } from "../ui/slash/debug.js";
84
84
  import { statusBarStore } from "../ui/status-bar/store.js";
85
85
  import { appendDecisionLog } from "../usage/decision-log.js";
86
+ import { openUrl } from "../utils/open-url.js";
86
87
  import { appendAudit, toolNeedsApproval } from "../utils/permission-mode.js";
87
88
  import { getAutoCouncilConfidence, getAutoCouncilMinRoles, getProviderStallTimeoutMs, getRoleModels, getTopLevelCompactKeepLast, getTopLevelCompactThresholdChars, getTopLevelToolBudgetChars, isAutoCouncilEnabled, isProviderDisabled, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
88
89
  import { resolveShell } from "../utils/shell.js";
@@ -92,7 +93,7 @@ import { humanizeApiError, isAuthenticationError, isContextLimitError, summarize
92
93
  import { buildGroundingFootnote, findUnverifiedClaims } from "./grounding-check.js";
93
94
  import { buildInterruptedTurnNote } from "./interrupted-turn.js";
94
95
  import { stableCallId } from "./pending-calls.js";
95
- import { applyModelConstraints, buildSystemPromptParts } from "./prompts.js";
96
+ import { applyModelConstraints, buildMcpCapabilityBlock, buildSystemPromptParts } from "./prompts.js";
96
97
  import { extractProviderOptionsShape } from "./provider-options-shape.js";
97
98
  import { wrapToolSetWithReadBudget } from "./read-path-budget.js";
98
99
  import { containsEncryptedReasoning, sanitizeModelMessages } from "./reasoning.js";
@@ -1017,32 +1018,26 @@ export class MessageProcessor {
1017
1018
  const filteredServers = filterMcpServersByMessage(loadMcpServers(), userMessage, {
1018
1019
  disabled: process.env.MUONROI_DISABLE_SMART_MCP === "1",
1019
1020
  });
1020
- // MCP non-blocking: race the build against a 2500ms cap so a slow
1021
- // stdio MCP server spawn (or many optional servers) does not block
1022
- // the main turn's first token / streamText indefinitely. On timeout
1023
- // or error we fall back to builtins only (domain servers like fs/tools
1024
- // are still valuable but the optional ones can be skipped for this turn).
1021
+ // MCP non-blocking: acquireMcpTools self-bounds it connects servers
1022
+ // in parallel and returns PARTIAL results at its internal deadline
1023
+ // (fast/cached servers included; slow first-connects reported in
1024
+ // .errors and available next turn). Clients are POOLED across turns
1025
+ // (client-pool.ts), so a server cold-spawns at most once per session
1026
+ // instead of every turn. No outer race: the old race discarded the
1027
+ // WHOLE bundle on timeout (Phase 1c — session f6f7881a5fae).
1025
1028
  let mcpBundle = null;
1026
1029
  try {
1027
- mcpBundle = await Promise.race([
1028
- buildMcpToolSet(filteredServers, {
1029
- onOAuthRequired: (_serverId, url) => {
1030
- const urlStr = url.toString();
1031
- import("child_process").then(({ exec }) => {
1032
- const cmd = process.platform === "win32"
1033
- ? `start "" "${urlStr}"`
1034
- : process.platform === "darwin"
1035
- ? `open "${urlStr}"`
1036
- : `xdg-open "${urlStr}"`;
1037
- exec(cmd);
1038
- });
1039
- },
1040
- }),
1041
- new Promise((_, reject) => setTimeout(() => reject(new Error("MCP build timeout (2500ms)")), 2500)),
1042
- ]);
1030
+ mcpBundle = await acquireMcpTools(filteredServers, {
1031
+ onOAuthRequired: (_serverId, url) => {
1032
+ // Server-supplied URL is untrusted — openUrl validates the
1033
+ // scheme and spawns via execFile (no shell), closing the
1034
+ // command-injection vector the old exec() opener had.
1035
+ openUrl(url);
1036
+ },
1037
+ });
1043
1038
  }
1044
1039
  catch (err) {
1045
- console.error("[MCP] buildMcpToolSet timed out or failed, proceeding with builtins only", err);
1040
+ console.error("[MCP] buildMcpToolSet failed, proceeding with builtins only", err);
1046
1041
  }
1047
1042
  if (mcpBundle) {
1048
1043
  closeMcp = mcpBundle.close;
@@ -1056,6 +1051,19 @@ export class MessageProcessor {
1056
1051
  const _builtinToolNames = new Set(Object.keys(rawToolSet));
1057
1052
  const { tools: _dedupedMcpTools, dropped: _droppedFsMcp } = dropRedundantFsMcpTools(mcpBundle.tools, _builtinToolNames);
1058
1053
  rawToolSet = { ...rawToolSet, ..._dedupedMcpTools };
1054
+ // muonroi-tools is THIS CLI: every tool it exposes (ee_query,
1055
+ // ee_feedback, ee_health, usage_forensics, lsp_query, setup_guide,
1056
+ // selfverify_*) is now a NATIVE in-process builtin (src/tools/
1057
+ // native-tools.ts) — strictly better (no subprocess, no cold-start).
1058
+ // If an external/legacy config still self-spawns muonroi-tools, drop
1059
+ // any MCP twin whose native equivalent is present so the model never
1060
+ // sees two interchangeable copies. (The CLI no longer self-spawns it
1061
+ // by default — see auto-setup.ts.)
1062
+ for (const key of Object.keys(rawToolSet)) {
1063
+ const twin = key.match(/^mcp_muonroi-tools__(.+)$/);
1064
+ if (twin && rawToolSet[twin[1]])
1065
+ delete rawToolSet[key];
1066
+ }
1059
1067
  if (_droppedFsMcp.length > 0 && deps.session) {
1060
1068
  try {
1061
1069
  logInteraction(deps.session.id, "routing", {
@@ -1068,7 +1076,20 @@ export class MessageProcessor {
1068
1076
  }
1069
1077
  }
1070
1078
  if (mcpBundle.errors.length > 0) {
1071
- yield { type: "content", content: `MCP unavailable: ${mcpBundle.errors.join(" | ")}\n\n` };
1079
+ // A pooled server that is still cold-starting is NOT "unavailable"
1080
+ // — it's warming up and will be ready next turn. Only surface
1081
+ // GENUINE failures as "unavailable"; show warming servers as a
1082
+ // soft, non-alarming note (and only the first time, since the
1083
+ // pool connects them in the background).
1084
+ const warming = mcpBundle.errors.filter((e) => /still connecting/.test(e));
1085
+ const failed = mcpBundle.errors.filter((e) => !/still connecting/.test(e));
1086
+ if (failed.length > 0) {
1087
+ yield { type: "content", content: `MCP unavailable: ${failed.join(" | ")}\n\n` };
1088
+ }
1089
+ if (warming.length > 0) {
1090
+ const names = warming.map((e) => e.split(":")[0]).join(", ");
1091
+ yield { type: "content", content: `MCP warming up (${names}) — ready from the next turn.\n\n` };
1092
+ }
1072
1093
  }
1073
1094
  }
1074
1095
  }
@@ -1169,6 +1190,15 @@ export class MessageProcessor {
1169
1190
  const systemWithShell = shouldInjectCheapModelPlaybook(runtime.modelInfo)
1170
1191
  ? injectCheapModelShellDirective(systemWithPlaybook, cheapModelShellLine(resolveShell({}).kind, process.platform))
1171
1192
  : systemWithPlaybook;
1193
+ // Append the LIVE MCP tool roster so the agent calls connected MCP
1194
+ // tools by their exact mcp_<server>__<tool> name instead of shelling
1195
+ // out (session f6f7881a5fae). Built from the FINAL toolset for this
1196
+ // iteration (post smart-filter + fs-dedup), so it never names a tool
1197
+ // the model can't actually call. Dynamic per turn → must live OUTSIDE
1198
+ // the cached staticPrefix; for claude it lands in the second
1199
+ // (non-cached) system message via the slice below.
1200
+ const mcpCapabilityBlock = buildMcpCapabilityBlock(Object.keys(tools));
1201
+ const systemWithCaps = mcpCapabilityBlock ? `${systemWithShell}${mcpCapabilityBlock}` : systemWithShell;
1172
1202
  const systemForModel = runtime.modelId.startsWith("claude")
1173
1203
  ? [
1174
1204
  {
@@ -1178,10 +1208,10 @@ export class MessageProcessor {
1178
1208
  },
1179
1209
  {
1180
1210
  role: "system",
1181
- content: systemWithShell.slice(systemParts.staticPrefix.length),
1211
+ content: systemWithCaps.slice(systemParts.staticPrefix.length),
1182
1212
  },
1183
1213
  ]
1184
- : systemWithShell;
1214
+ : systemWithCaps;
1185
1215
  // Capture prompt-size breakdown so recordUsage can attach it to the
1186
1216
  // cost-log entry. Without this, "system prompt is huge" is unfalsifiable.
1187
1217
  // chars/4 ≈ tokens for English; reported as chars to keep math obvious.
@@ -244,6 +244,27 @@ export class Agent {
244
244
  this.pendingCalls = options.pendingCalls ?? null;
245
245
  this.permissionMode = options.permissionMode ?? "safe";
246
246
  ensureDefaultMcpServers();
247
+ // Pre-warm the always-on MCP servers in the BACKGROUND so they're pooled
248
+ // before the first user turn. npx stdio servers (filesystem/memory)
249
+ // cold-start >2.5s and would otherwise miss the first turn's build deadline
250
+ // (shown as "MCP unavailable: ... still connecting — available next turn").
251
+ // Empty-message smart-filter keeps only the baseline (drops browser/web
252
+ // categories) so we don't speculatively spawn playwright/tavily. Fire-and-
253
+ // forget; the pool handles errors and the per-turn acquire still connects on
254
+ // demand if this is skipped.
255
+ void (async () => {
256
+ try {
257
+ const [{ warmMcpClients }, { loadMcpServers }, { filterMcpServersByMessage }] = await Promise.all([
258
+ import("../mcp/client-pool.js"),
259
+ import("../utils/settings.js"),
260
+ import("../mcp/smart-filter.js"),
261
+ ]);
262
+ warmMcpClients(filterMcpServersByMessage(loadMcpServers(), ""));
263
+ }
264
+ catch (err) {
265
+ console.error(`[orchestrator] MCP pre-warm skipped: ${err?.message}`);
266
+ }
267
+ })();
247
268
  if (options.persistSession !== false) {
248
269
  this.sessionStore = new SessionStore(this.bash.getCwd());
249
270
  this.workspace = this.sessionStore.getWorkspace();
@@ -469,6 +490,11 @@ export class Agent {
469
490
  this.bash.cleanup(),
470
491
  shutdownWorkspaceLspManager(this.bash.getCwd()),
471
492
  extractSession(this.messages, this.bash.getCwd(), "cli-exit", this.getSessionId()),
493
+ // Tear down pooled MCP clients (client-pool.ts). They persist across turns
494
+ // by design (no per-turn cold-spawn), so the only real teardown is here at
495
+ // session end. Stdio children would die with the process anyway, but close
496
+ // them gracefully on a clean exit.
497
+ import("../mcp/client-pool.js").then((m) => m.closeAllMcpClients()),
472
498
  ]);
473
499
  }
474
500
  // Tool-loop cap handler — set by the UI (app.tsx) at startup. Invoked from
@@ -3,6 +3,38 @@ import { type CustomSubagentConfig, type SandboxMode, type SandboxSettings } fro
3
3
  export declare const MAX_TOOL_ROUNDS: number;
4
4
  export declare const VISION_MODEL = "grok-4-1-fast-reasoning";
5
5
  export declare const COMPUTER_MODEL = "grok-4.20-0309-reasoning";
6
+ /**
7
+ * Phase 5 Fix — Env-aware ENVIRONMENT block.
8
+ *
9
+ * Replaces the static rendering-only block with a dynamic block that
10
+ * tells the model exactly which OS + shell + cwd it's operating in.
11
+ * Without this the model historically emitted PowerShell cmdlets
12
+ * (Get-ChildItem, Select-Object, $null), cmd.exe syntax (del, if exist),
13
+ * or POSIX tools that aren't installed (hyperfine) — all of which fail
14
+ * silently in the bash tool and waste tokens on retry-cascades.
15
+ *
16
+ * Evidence: sessions f9a4cea1bf44, 9c63a38197f3, d0dc4a1f542a,
17
+ * 77cd2e11c6a5, 1bc27b79223c all logged shell-mismatch errors.
18
+ *
19
+ * The block is recomputed on each system-prompt assembly so settings
20
+ * changes (MUONROI_SHELL override, shell.kind config) are reflected
21
+ * without a CLI restart.
22
+ */
23
+ /**
24
+ * Deterministically detect the project's stack from manifest/lockfile presence
25
+ * at the workspace root. Pure (no LLM), cheap (one readdir), zero-hardcode (no
26
+ * model/provider IDs — only ecosystem markers). Returns a compact one-line
27
+ * summary like "TypeScript · pkg: bun · tests: vitest · vcs: git", or "" when
28
+ * nothing recognizable is present (greenfield / unreadable dir).
29
+ *
30
+ * Motivation (2026-06-14 dogfood): the ENVIRONMENT block told the model its OS,
31
+ * shell, and cwd but never WHICH project it was in — so the model acted
32
+ * context-blind, assumed Python, and asked the user to describe the repo it was
33
+ * already running inside. This gives every model, on every turn, in every mode
34
+ * (agent/plan/ask) and for every provider (it is NOT in the strippable TOOLS
35
+ * section), a concrete self-model of the codebase it can act on.
36
+ */
37
+ export declare function detectProjectStack(cwd: string): string;
6
38
  export declare function findCustomSubagent(agent: string, subagents?: CustomSubagentConfig[]): CustomSubagentConfig | undefined;
7
39
  export declare function formatCustomSubagentsPromptSection(subagents: CustomSubagentConfig[]): string;
8
40
  export interface SystemPromptParts {
@@ -24,6 +56,25 @@ export interface SystemPromptOptions {
24
56
  */
25
57
  chitchat?: boolean;
26
58
  }
59
+ /**
60
+ * Render the LIVE per-turn MCP tool roster as a system-prompt block.
61
+ *
62
+ * The static prompt only states the mcp_<server>__<tool> naming convention; it
63
+ * never names the tools actually connected this turn, and the per-message smart
64
+ * filter can drop whole servers. The model therefore receives connected MCP
65
+ * tools ONLY as raw tool JSON, which it can overlook — live failure
66
+ * (session f6f7881a5fae): asked to call `setup_guide`, the agent said "I don't
67
+ * have a direct call_mcp tool" and drove the muonroi-docs server by hand over
68
+ * bash JSON-RPC, fabricating output. Surfacing the exact callable names in prose
69
+ * closes that gap.
70
+ *
71
+ * `toolNames` should be the keys of the FINAL assembled tool set for the turn
72
+ * (post smart-filter, post fs-dedup). Returns "" when no MCP tool is connected,
73
+ * so non-agent / chitchat / no-client-tools turns add nothing. The block is
74
+ * DYNAMIC (varies per turn) so callers must append it OUTSIDE the cached static
75
+ * prefix.
76
+ */
77
+ export declare function buildMcpCapabilityBlock(toolNames: readonly string[]): string;
27
78
  export declare function buildSystemPromptParts(cwd: string, mode: AgentMode, sandboxMode: SandboxMode, planContext?: string | null, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string, resumeDigest?: string | null, options?: SystemPromptOptions): SystemPromptParts;
28
79
  export declare function buildSystemPrompt(cwd: string, mode: AgentMode, sandboxMode: SandboxMode, planContext?: string | null, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string, resumeDigest?: string | null, options?: SystemPromptOptions): string;
29
80
  export declare function buildSubagentPrompt(request: TaskRequest, cwd: string, custom: CustomSubagentConfig | null, sandboxMode: SandboxMode, subagents?: CustomSubagentConfig[], sandboxSettings?: SandboxSettings, providerId?: string): string;