muonroi-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/artifact-cache.d.ts +56 -0
  26. package/dist/src/ee/artifact-cache.js +155 -0
  27. package/dist/src/ee/artifact-cache.test.d.ts +1 -0
  28. package/dist/src/ee/artifact-cache.test.js +69 -0
  29. package/dist/src/ee/auth.d.ts +9 -0
  30. package/dist/src/ee/auth.js +19 -0
  31. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  32. package/dist/src/ee/ee-onboarding.js +76 -0
  33. package/dist/src/ee/search.js +7 -5
  34. package/dist/src/ee/search.test.d.ts +1 -0
  35. package/dist/src/ee/search.test.js +23 -0
  36. package/dist/src/generated/version.d.ts +1 -1
  37. package/dist/src/generated/version.js +1 -1
  38. package/dist/src/headless/output.js +6 -4
  39. package/dist/src/headless/output.test.js +4 -3
  40. package/dist/src/index.js +20 -1
  41. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  42. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  43. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  44. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  45. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  46. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  47. package/dist/src/mcp/auto-setup.js +56 -2
  48. package/dist/src/mcp/client-pool.d.ts +46 -0
  49. package/dist/src/mcp/client-pool.js +212 -0
  50. package/dist/src/mcp/oauth-callback.js +2 -2
  51. package/dist/src/mcp/parse-headers.test.js +14 -14
  52. package/dist/src/mcp/runtime.d.ts +28 -0
  53. package/dist/src/mcp/runtime.js +117 -51
  54. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  55. package/dist/src/mcp/self-verify-runner.js +38 -0
  56. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  57. package/dist/src/mcp/setup-guide-text.js +84 -0
  58. package/dist/src/mcp/smart-filter.js +49 -0
  59. package/dist/src/mcp/smoke.test.js +43 -43
  60. package/dist/src/mcp/tools-server.d.ts +7 -0
  61. package/dist/src/mcp/tools-server.js +19 -22
  62. package/dist/src/models/catalog.json +349 -349
  63. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  64. package/dist/src/ops/doctor.d.ts +3 -2
  65. package/dist/src/ops/doctor.js +47 -11
  66. package/dist/src/ops/doctor.test.js +4 -3
  67. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  68. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  69. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  70. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  71. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  72. package/dist/src/orchestrator/compaction.d.ts +2 -0
  73. package/dist/src/orchestrator/compaction.js +14 -1
  74. package/dist/src/orchestrator/compaction.test.js +25 -1
  75. package/dist/src/orchestrator/message-processor.js +72 -32
  76. package/dist/src/orchestrator/orchestrator.js +26 -0
  77. package/dist/src/orchestrator/prompts.d.ts +51 -0
  78. package/dist/src/orchestrator/prompts.js +257 -134
  79. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  80. package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
  81. package/dist/src/orchestrator/scope-reminder.js +16 -0
  82. package/dist/src/orchestrator/scope-reminder.test.js +22 -1
  83. package/dist/src/orchestrator/stream-runner.js +23 -15
  84. package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
  85. package/dist/src/orchestrator/subagent-compactor.js +30 -8
  86. package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
  87. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  88. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  89. package/dist/src/pil/__tests__/config.test.js +1 -17
  90. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  91. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  92. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  93. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  94. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  95. package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
  96. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  97. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
  98. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
  99. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  100. package/dist/src/pil/agent-operating-contract.js +2 -0
  101. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  102. package/dist/src/pil/cheap-model-playbook.js +35 -35
  103. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  104. package/dist/src/pil/clarity-gate.d.ts +21 -19
  105. package/dist/src/pil/clarity-gate.js +26 -153
  106. package/dist/src/pil/config.d.ts +9 -1
  107. package/dist/src/pil/config.js +15 -4
  108. package/dist/src/pil/discovery.js +211 -136
  109. package/dist/src/pil/layer1-intent.d.ts +12 -0
  110. package/dist/src/pil/layer1-intent.js +283 -38
  111. package/dist/src/pil/layer1-intent.test.js +210 -4
  112. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  113. package/dist/src/pil/layer16-clarity.js +19 -306
  114. package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
  115. package/dist/src/pil/layer3-ee-injection.js +96 -4
  116. package/dist/src/pil/layer4-gsd.js +18 -6
  117. package/dist/src/pil/layer6-output.d.ts +2 -0
  118. package/dist/src/pil/layer6-output.js +151 -25
  119. package/dist/src/pil/llm-classify.d.ts +26 -0
  120. package/dist/src/pil/llm-classify.js +34 -5
  121. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  122. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  123. package/dist/src/pil/pipeline.js +15 -9
  124. package/dist/src/pil/schema.d.ts +8 -0
  125. package/dist/src/pil/schema.js +12 -1
  126. package/dist/src/pil/task-tier-map.js +4 -0
  127. package/dist/src/pil/types.d.ts +11 -1
  128. package/dist/src/product-loop/done-gate.js +3 -3
  129. package/dist/src/product-loop/loop-driver.js +18 -18
  130. package/dist/src/product-loop/progress-snapshot.js +4 -4
  131. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  132. package/dist/src/providers/auth/grok-oauth.js +6 -15
  133. package/dist/src/providers/auth/openai-oauth.js +6 -15
  134. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  135. package/dist/src/reporter/index.js +1 -1
  136. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  137. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  138. package/dist/src/scaffold/continuation-prompt.js +60 -60
  139. package/dist/src/scaffold/init-new.js +453 -453
  140. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  141. package/dist/src/self-qa/agentic-loop.js +24 -19
  142. package/dist/src/self-qa/spec-emitter.js +26 -23
  143. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  144. package/dist/src/storage/interaction-log.js +5 -5
  145. package/dist/src/storage/migrations.js +122 -122
  146. package/dist/src/storage/sessions.js +42 -42
  147. package/dist/src/storage/transcript.js +91 -84
  148. package/dist/src/storage/usage.js +14 -14
  149. package/dist/src/storage/workspaces.js +12 -12
  150. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  151. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  152. package/dist/src/tools/git-safety.d.ts +61 -0
  153. package/dist/src/tools/git-safety.js +141 -0
  154. package/dist/src/tools/git-safety.test.d.ts +1 -0
  155. package/dist/src/tools/git-safety.test.js +111 -0
  156. package/dist/src/tools/native-tools.d.ts +31 -0
  157. package/dist/src/tools/native-tools.js +273 -0
  158. package/dist/src/tools/registry-ee-query.test.js +18 -1
  159. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  160. package/dist/src/tools/registry-git-safety.test.js +92 -0
  161. package/dist/src/tools/registry.js +52 -6
  162. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  163. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  164. package/dist/src/ui/app.js +0 -0
  165. package/dist/src/ui/components/message-view.js +4 -1
  166. package/dist/src/ui/components/structured-response-view.js +7 -3
  167. package/dist/src/ui/components/tool-group.js +7 -1
  168. package/dist/src/ui/markdown-render.d.ts +41 -0
  169. package/dist/src/ui/markdown-render.js +223 -0
  170. package/dist/src/ui/markdown.d.ts +10 -0
  171. package/dist/src/ui/markdown.js +12 -35
  172. package/dist/src/ui/slash/council-inspect.js +4 -4
  173. package/dist/src/ui/slash/export.js +4 -4
  174. package/dist/src/ui/utils/text.d.ts +8 -0
  175. package/dist/src/ui/utils/text.js +16 -0
  176. package/dist/src/ui/utils/text.test.d.ts +1 -0
  177. package/dist/src/ui/utils/text.test.js +23 -0
  178. package/dist/src/usage/ledger.js +48 -15
  179. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  180. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  181. package/dist/src/utils/clipboard-image.js +23 -23
  182. package/dist/src/utils/open-url.d.ts +56 -0
  183. package/dist/src/utils/open-url.js +58 -0
  184. package/dist/src/utils/open-url.test.d.ts +1 -0
  185. package/dist/src/utils/open-url.test.js +86 -0
  186. package/dist/src/utils/settings.d.ts +12 -0
  187. package/dist/src/utils/settings.js +48 -0
  188. package/dist/src/utils/side-question.js +2 -2
  189. package/dist/src/utils/skills.js +3 -3
  190. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  191. package/dist/src/verify/environment.js +2 -1
  192. package/package.json +1 -1
  193. package/dist/src/pil/layer16-clarity.test.js +0 -31
  194. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -27,7 +27,9 @@
27
27
  // - F1 (sub-agent cumulative cap) — wrapToolSetWithCap
28
28
  // - siliconflow reasoning-strip — taskCaps.sanitizeHistory
29
29
  import { stepCountIs, streamText } from "ai";
30
- import { buildMcpToolSet } from "../mcp/runtime.js";
30
+ import { recordArtifact } from "../ee/artifact-cache.js";
31
+ import { getDefaultEEClient } from "../ee/intercept.js";
32
+ import { acquireMcpTools } from "../mcp/client-pool.js";
31
33
  import { normalizeModelId } from "../models/registry.js";
32
34
  import { cheapModelShellLine, injectCheapModelPlaybook, injectCheapModelShellDirective, shouldInjectCheapModelPlaybook, } from "../pil/cheap-model-playbook.js";
33
35
  import { injectCheapModelWorkbook, shouldInjectCheapModelWorkbook, subagentTaskType, } from "../pil/cheap-model-workbooks.js";
@@ -38,6 +40,7 @@ import { wireDebug } from "../providers/wire-debug.js";
38
40
  import { BashTool } from "../tools/bash.js";
39
41
  import { createBuiltinTools } from "../tools/registry.js";
40
42
  import { statusBarStore } from "../ui/status-bar/store.js";
43
+ import { openUrl } from "../utils/open-url.js";
41
44
  import { getCurrentShellSettings, getProviderStallTimeoutMs, getSubAgentBudgetChars, getSubAgentCompactKeepLast, getSubAgentCompactThresholdChars, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
42
45
  import { resolveShell } from "../utils/shell.js";
43
46
  import { prepareVerifySandbox } from "../verify/entrypoint.js";
@@ -50,7 +53,6 @@ import { repairToolCallHook } from "./repair-tool-call.js";
50
53
  import { classifyStreamError } from "./retry-classifier.js";
51
54
  import { incSessionStep, resolveCeiling } from "./scope-ceiling.js";
52
55
  import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
53
- import { getDefaultEEClient } from "../ee/intercept.js";
54
56
  import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
55
57
  import { wrapToolSetWithCap } from "./sub-agent-cap.js";
56
58
  import { compactSubAgentMessages } from "./subagent-compactor.js";
@@ -211,17 +213,12 @@ export class StreamRunner {
211
213
  : childWithPlaybook;
212
214
  onActivity?.(initialDetail);
213
215
  if (childMode === "agent" && taskCaps.supportsClientTools(childRuntime.modelInfo)) {
214
- const mcpBundle = await buildMcpToolSet(loadMcpServers(), {
216
+ const mcpBundle = await acquireMcpTools(loadMcpServers(), {
215
217
  onOAuthRequired: (_serverId, url) => {
216
- const urlStr = url.toString();
217
- import("child_process").then(({ exec }) => {
218
- const cmd = process.platform === "win32"
219
- ? `start "" "${urlStr}"`
220
- : process.platform === "darwin"
221
- ? `open "${urlStr}"`
222
- : `xdg-open "${urlStr}"`;
223
- exec(cmd);
224
- });
218
+ // Server-supplied URL is untrusted — openUrl validates the scheme
219
+ // and spawns via execFile (no shell), closing the command-injection
220
+ // vector the old exec() opener had.
221
+ openUrl(url);
225
222
  },
226
223
  });
227
224
  closeMcp = mcpBundle.close;
@@ -407,18 +404,29 @@ export class StreamRunner {
407
404
  const joined = texts.join(" ");
408
405
  const mKeep = joined.match(/KEEP_TOOL_IDS\s*[:=]\s*([a-z0-9_, -]+)/i);
409
406
  if (mKeep) {
410
- subKeepToolIds = mKeep[1].split(/[,\s]+/).map((s) => s.trim()).filter(Boolean);
407
+ subKeepToolIds = mKeep[1]
408
+ .split(/[,\s]+/)
409
+ .map((s) => s.trim())
410
+ .filter(Boolean);
411
411
  break;
412
412
  }
413
413
  }
414
414
  // Idea 4 persist for sub-agent elisions (best-effort; may lack full session but EE can still index the artifact content).
415
415
  const persistSubArtifact = (toolCallId, toolName, fullContent, reason) => {
416
+ // Local-first durable cache so ee_query rehydrates even when EE is down.
417
+ recordArtifact(toolCallId, toolName, fullContent);
416
418
  try {
417
419
  getDefaultEEClient()
418
- .extract({ transcript: fullContent.slice(0, 4000), projectPath: process.cwd(), meta: { source: "tool-artifact", toolCallId, toolName, reason } }, AbortSignal.timeout(600))
420
+ .extract({
421
+ transcript: fullContent.slice(0, 4000),
422
+ projectPath: process.cwd(),
423
+ meta: { source: "tool-artifact", toolCallId, toolName, reason },
424
+ }, AbortSignal.timeout(600))
419
425
  .catch(() => { });
420
426
  }
421
- catch { /* fail-open */ }
427
+ catch {
428
+ /* fail-open */
429
+ }
422
430
  };
423
431
  const compacted = compactSubAgentMessages(stripped, {
424
432
  thresholdChars: compactThreshold,
@@ -106,8 +106,11 @@ export interface SubAgentCompactorOptions {
106
106
  export declare const CHARS_PER_TOKEN = 4;
107
107
  export declare const SUBAGENT_COMPACT_DEFAULT_THRESHOLD = 80000;
108
108
  export declare const SUBAGENT_COMPACT_DEFAULT_KEEP_LAST = 3;
109
- /** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast. */
110
- export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp", "bash"];
109
+ /** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast.
110
+ * Extended for meta self-eval: ee_query / usage_forensics / selfverify_* are the exact artifacts
111
+ * the native contract + native-capabilities tell the agent to rely on for "task finished?" and
112
+ * rehydrate during long meta conversations about CLI/PIL/compaction/EE. */
113
+ export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp", "bash", "ee_query", "usage_forensics", "selfverify_start", "selfverify_result", "selfverify_status"];
111
114
  /**
112
115
  * Heuristic: keep full (no stub) for high-signal tool results.
113
116
  * Signals: allowlist tool + (error/todo/plan/keyfile/large output or explicit keep list).
@@ -116,8 +119,14 @@ export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp",
116
119
  export declare function isHighValueToolResult(toolName: string, preview: string, explicitKeepIds?: Set<string>, toolCallId?: string): boolean;
117
120
  export declare function cumulativeMessageChars(messages: ReadonlyArray<ModelMessage>): number;
118
121
  /**
119
- * Compact a sub-agent message array in place-like fashion. Returns a NEW
120
- * array; the input is not mutated. Below the threshold the original array
121
- * reference is returned for cheap identity comparison in tests.
122
+ * Compact a sub-agent message array in place-like fashion. The input is never
123
+ * mutated. When compaction actually elides something a NEW array is returned.
124
+ * On a no-op (below threshold, or too few tool turns to skip) the ORIGINAL input
125
+ * array is returned BY REFERENCE so callers can detect "did not compact this
126
+ * step" via identity (`compacted === input`). The B4 wiring in
127
+ * message-processor.ts (pre-compaction warning + compaction note gating) and the
128
+ * sub-agent wiring in stream-runner.ts both rely on this contract — returning a
129
+ * fresh slice on a no-op silently made the warning dead and the note fire every
130
+ * step.
122
131
  */
123
132
  export declare function compactSubAgentMessages(messages: ReadonlyArray<ModelMessage>, opts?: SubAgentCompactorOptions): ModelMessage[];
@@ -58,8 +58,21 @@ export const SUBAGENT_COMPACT_DEFAULT_THRESHOLD = 80_000;
58
58
  export const SUBAGENT_COMPACT_DEFAULT_KEEP_LAST = 3;
59
59
  const DEFAULT_OUTPUT_PREVIEW_CHARS = 200;
60
60
  const DEFAULT_LABEL = "sub-agent";
61
- /** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast. */
62
- export const IMPORTANT_TOOL_NAMES = ["read_file", "grep", "lsp", "bash"];
61
+ /** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast.
62
+ * Extended for meta self-eval: ee_query / usage_forensics / selfverify_* are the exact artifacts
63
+ * the native contract + native-capabilities tell the agent to rely on for "task finished?" and
64
+ * rehydrate during long meta conversations about CLI/PIL/compaction/EE. */
65
+ export const IMPORTANT_TOOL_NAMES = [
66
+ "read_file",
67
+ "grep",
68
+ "lsp",
69
+ "bash",
70
+ "ee_query",
71
+ "usage_forensics",
72
+ "selfverify_start",
73
+ "selfverify_result",
74
+ "selfverify_status",
75
+ ];
63
76
  /**
64
77
  * Heuristic: keep full (no stub) for high-signal tool results.
65
78
  * Signals: allowlist tool + (error/todo/plan/keyfile/large output or explicit keep list).
@@ -268,7 +281,9 @@ function rewriteOlderToolMessage(msg, previewChars, label, keepToolIds, persistA
268
281
  try {
269
282
  persistArtifact(toolCallId, tr.toolName, rawPreview, "elided-by-compactor");
270
283
  }
271
- catch { /* fail-open */ }
284
+ catch {
285
+ /* fail-open */
286
+ }
272
287
  }
273
288
  return {
274
289
  type: "tool-result",
@@ -282,9 +297,15 @@ function rewriteOlderToolMessage(msg, previewChars, label, keepToolIds, persistA
282
297
  return { ...msg, content: rewritten };
283
298
  }
284
299
  /**
285
- * Compact a sub-agent message array in place-like fashion. Returns a NEW
286
- * array; the input is not mutated. Below the threshold the original array
287
- * reference is returned for cheap identity comparison in tests.
300
+ * Compact a sub-agent message array in place-like fashion. The input is never
301
+ * mutated. When compaction actually elides something a NEW array is returned.
302
+ * On a no-op (below threshold, or too few tool turns to skip) the ORIGINAL input
303
+ * array is returned BY REFERENCE so callers can detect "did not compact this
304
+ * step" via identity (`compacted === input`). The B4 wiring in
305
+ * message-processor.ts (pre-compaction warning + compaction note gating) and the
306
+ * sub-agent wiring in stream-runner.ts both rely on this contract — returning a
307
+ * fresh slice on a no-op silently made the warning dead and the note fire every
308
+ * step.
288
309
  */
289
310
  export function compactSubAgentMessages(messages, opts = {}) {
290
311
  const resolved = resolveOpts(opts);
@@ -299,11 +320,12 @@ export function compactSubAgentMessages(messages, opts = {}) {
299
320
  // window utilization. Falls back to static char threshold + keepLast
300
321
  // when no contextWindowTokens supplied (preserves old behaviour).
301
322
  const { effectiveThresholdChars, effectiveKeepLastTurns } = computeDynamicParams(total, resolved);
323
+ // No-op: return the input BY REFERENCE (contract above) so `compacted === input`.
302
324
  if (total < effectiveThresholdChars)
303
- return messages.slice();
325
+ return messages;
304
326
  const keepFrom = findKeepFromIndex(messages, effectiveKeepLastTurns);
305
327
  if (keepFrom <= 0)
306
- return messages.slice();
328
+ return messages;
307
329
  // Walk older messages; rewrite fresh tool results into stubs, super-shrink
308
330
  // already-stubbed results (F1), and strip args off older assistant
309
331
  // tool-call shells (F1). The 1:1 assistant↔tool pairing required by the AI
@@ -64,6 +64,24 @@ describe("subagent-compactor: compactSubAgentMessages", () => {
64
64
  // No tool-result rewrite happened — output object identity per part preserved.
65
65
  expect(out[3]).toBe(msgs[3]);
66
66
  });
67
+ it("returns the SAME array reference on a no-op below threshold (compacted===input contract)", () => {
68
+ // Callers (message-processor B4 prepareStep:1840/1908/1914) detect "did NOT
69
+ // compact this step" via `compacted === stripped`. The docstring promises the
70
+ // original ref on a no-op; returning a fresh slice silently broke that —
71
+ // making the pre-compaction warning dead and the compaction note fire every
72
+ // step. Lock the identity contract.
73
+ const msgs = buildHistory(2, 5); // below threshold
74
+ expect(compactSubAgentMessages(msgs)).toBe(msgs);
75
+ });
76
+ it("returns a NEW array when compaction actually elides (compacted!==input)", () => {
77
+ const msgs = buildHistory(10, 10); // ~100kb > threshold
78
+ for (const m of msgs) {
79
+ if (m.role === "tool" && Array.isArray(m.content)) {
80
+ m.content[0].toolName = "other_tool"; // force low-value so it elides
81
+ }
82
+ }
83
+ expect(compactSubAgentMessages(msgs)).not.toBe(msgs);
84
+ });
67
85
  it("compacts when cumulative chars exceed threshold", () => {
68
86
  const msgs = buildHistory(10, 10); // ~100kb of tool output
69
87
  // Neutralize to test pure size-based elision (high-value keep would reduce savings).
@@ -6,10 +6,10 @@ describe("detectTextEmittedToolCall", () => {
6
6
  // destructive edit, deepseek emitted this as plain assistant text to
7
7
  // re-read the file — the CLI returned it as the final answer and the turn
8
8
  // was silently wasted with a broken file left behind.
9
- const text = `Let me restore the file properly.
10
-
11
- <read_file>
12
- <path>src/app/screens/story-list/story-list.component.html</path>
9
+ const text = `Let me restore the file properly.
10
+
11
+ <read_file>
12
+ <path>src/app/screens/story-list/story-list.component.html</path>
13
13
  </read_file>`;
14
14
  const r = detectTextEmittedToolCall(text);
15
15
  expect(r.detected).toBe(true);
@@ -43,10 +43,10 @@ describe("detectTextEmittedToolCall", () => {
43
43
  // Live: storyflow_ui explore-A/B, deepseek T3 (session 799f0508e830) emitted
44
44
  // this as text and made no real tool call → empty, silent turn. The generic
45
45
  // <invoke matcher misses it because `<` is followed by the U+FF5C sentinel.
46
- const text = `<||DSML||tool_calls>
47
- <||DSML||invoke name="read_file">
48
- <||DSML||parameter name="file_path" string="true">src/app/foo.html</||DSML||parameter>
49
- </||DSML||invoke>
46
+ const text = `<||DSML||tool_calls>
47
+ <||DSML||invoke name="read_file">
48
+ <||DSML||parameter name="file_path" string="true">src/app/foo.html</||DSML||parameter>
49
+ </||DSML||invoke>
50
50
  </||DSML||tool_calls>`;
51
51
  const r = detectTextEmittedToolCall(text);
52
52
  expect(r.detected).toBe(true);
@@ -60,11 +60,11 @@ describe("detectTextEmittedToolCall", () => {
60
60
  expect(detectTextEmittedToolCall("I edited the file and ran the tests; everything passes.").detected).toBe(false);
61
61
  });
62
62
  it("parseDsmlToolCalls extracts name + args from the DSML block (for targeted re-steer)", () => {
63
- const text = `<||DSML||tool_calls>
64
- <||DSML||invoke name="read_file">
65
- <||DSML||parameter name="file_path" string="true">src/app/foo.html</||DSML||parameter>
66
- <||DSML||parameter name="start_line" string="false">25</||DSML||parameter>
67
- </||DSML||invoke>
63
+ const text = `<||DSML||tool_calls>
64
+ <||DSML||invoke name="read_file">
65
+ <||DSML||parameter name="file_path" string="true">src/app/foo.html</||DSML||parameter>
66
+ <||DSML||parameter name="start_line" string="false">25</||DSML||parameter>
67
+ </||DSML||invoke>
68
68
  </||DSML||tool_calls>`;
69
69
  const calls = parseDsmlToolCalls(text);
70
70
  expect(calls).toHaveLength(1);
@@ -1,213 +1,28 @@
1
1
  import { describe, expect, it } from "vitest";
2
- import { canInferOutcome, countFileReferences, hasExplicitScope, hasExternalInfoScope, hasImageScope, hasOperationalScope, hasSelfContainedComputationScope, hasWholeRepoScope, shouldAutoPass, } from "../clarity-gate.js";
3
- describe("hasWholeRepoScope()", () => {
4
- it("detects whole-repo / whole-project intent (EN + VI)", () => {
5
- // The repo-eval prompt that fired a nonsensical "which part?" askcard.
6
- expect(hasWholeRepoScope("đánh giá repo muonroi-cli này: điểm mạnh, điểm yếu")).toBe(true);
7
- expect(hasWholeRepoScope("evaluate the repo: strengths and weaknesses")).toBe(true);
8
- expect(hasWholeRepoScope("review the whole codebase")).toBe(true);
9
- expect(hasWholeRepoScope("audit the entire project")).toBe(true);
10
- expect(hasWholeRepoScope("phân tích toàn bộ dự án")).toBe(true);
11
- expect(hasWholeRepoScope("give me an overview of the repository")).toBe(true);
12
- // summarize/overview verbs (gap found in the deepseek session probe: "tóm tắt
13
- // repo này" still fired the scope askcard because the verb list lacked it).
14
- expect(hasWholeRepoScope("tóm tắt nhanh repo này")).toBe(true);
15
- expect(hasWholeRepoScope("summarize the repository")).toBe(true);
16
- expect(hasWholeRepoScope("give me a summary of the project")).toBe(true);
17
- });
18
- it("does NOT fire on summarize/review of a narrow target", () => {
19
- expect(hasWholeRepoScope("summarize the login function")).toBe(false);
20
- expect(hasWholeRepoScope("tóm tắt hàm xử auth")).toBe(false);
21
- });
22
- it("does NOT fire on narrow tasks that merely mention a repo/project", () => {
23
- // "this repo" without a wholeness/eval signal must still be scoped.
24
- expect(hasWholeRepoScope("add a logout button to this repo")).toBe(false);
25
- expect(hasWholeRepoScope("fix the login bug in the project")).toBe(false);
26
- expect(hasWholeRepoScope("implement the search feature")).toBe(false);
27
- expect(hasWholeRepoScope("refactor the auth module")).toBe(false);
28
- });
29
- it("whole-repo scope no longer blocks auto-pass (was: scope-gap → false)", () => {
30
- // With an inferable outcome (explicit goal), the ONLY remaining blocker for a
31
- // repo-wide prompt was the scope gap. hasWholeRepoScope clears it.
32
- const prompt = "review the entire codebase — goal: a report of strengths and weaknesses";
33
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, prompt)).toBe(true);
34
- // Control: same shape but NOT repo-wide still fails on the scope gap.
35
- const narrow = "review the system — goal: a report of strengths and weaknesses";
36
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, narrow)).toBe(false);
37
- });
38
- });
39
- describe("hasSelfContainedComputationScope()", () => {
40
- it("detects an inline-data computation prompt (the operand is in the prompt, not the codebase)", () => {
41
- // Live drive (deepseek-vs-grok A/B, session probe 2026-06-05): "Compute
42
- // f([3,1,2]) where f sorts the list ascending then returns the sum of the
43
- // first two elements." classified taskType=analyze (regex:read matched the
44
- // bare word "list") fired the codebase-scope askcard "Which part of the
45
- // codebase should this target?" — nonsensical for a self-contained math
46
- // problem whose input data is supplied inline. Symmetric to image/web/
47
- // operational scope guards.
48
- expect(hasSelfContainedComputationScope("Compute f([3,1,2]) where f sorts the list ascending then returns the sum of the first two elements.")).toBe(true);
49
- expect(hasSelfContainedComputationScope("Given the array [5, 2, 8, 1, 9], what is the second largest element?")).toBe(true);
50
- expect(hasSelfContainedComputationScope("What is the median of [10, 4, 7]?")).toBe(true);
51
- expect(hasSelfContainedComputationScope('Reverse the list ["a", "b", "c"] and return it.')).toBe(true);
52
- });
53
- it("does NOT fire without an inline data literal", () => {
54
- // The framing verb alone is not enough — a codebase task can say "compute"
55
- // ("compute the hash in the auth module"). Only an inline operand qualifies.
56
- expect(hasSelfContainedComputationScope("compute the cache key in the auth module")).toBe(false);
57
- expect(hasSelfContainedComputationScope("sort the users table by created_at")).toBe(false);
58
- expect(hasSelfContainedComputationScope("what is the second largest element of the array")).toBe(false);
59
- });
60
- it("does NOT fire on a real codebase task that merely contains an array literal (no compute framing)", () => {
61
- // Narrowness guard: the literal alone is not enough. A feature/debug task
62
- // that embeds a literal but is scoped to the codebase must KEEP its scope
63
- // askcard. Requires BOTH an inline literal AND computation framing.
64
- expect(hasSelfContainedComputationScope("add the items [1, 2, 3] to the cart in the checkout flow")).toBe(false);
65
- expect(hasSelfContainedComputationScope("fix the bug where parseRange([1, 5]) returns the wrong values")).toBe(false);
66
- expect(hasSelfContainedComputationScope("set the default retry delays to [100, 200, 400] in the config")).toBe(false);
67
- });
68
- it("does NOT fire on bracketed file-name lists (those are codebase-scoped)", () => {
69
- // [a.ts, b.ts] is a list of files, not data — must stay codebase-scoped.
70
- expect(hasSelfContainedComputationScope("compare the exports of [auth.ts, session.ts]")).toBe(false);
71
- });
72
- it("self-contained computation no longer blocks auto-pass (was: scope-gap → false)", () => {
73
- // With an inferable outcome ("return the result"), the ONLY remaining blocker
74
- // for an inline-data computation prompt was the scope gap.
75
- // hasSelfContainedComputationScope clears it.
76
- const prompt = "Compute the sum of the first two sorted elements of [3, 1, 2] and return the result.";
77
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, prompt)).toBe(true);
78
- // Control: same outcome-inferable shape but NO inline literal still fails on
79
- // the scope gap (a real codebase computation must still be scoped).
80
- const codeTask = "Compute the largest element of the users array and return it.";
81
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, codeTask)).toBe(false);
82
- });
83
- });
84
- describe("canInferOutcome()", () => {
85
- it("returns false for null taskType", () => {
86
- expect(canInferOutcome(null, "do something")).toBe(false);
87
- });
88
- it("returns false for general taskType", () => {
89
- expect(canInferOutcome("general", "fix stuff")).toBe(false);
90
- });
91
- it("returns true for a general taskType that is a direct imperative command", () => {
92
- // A direct command has a self-evident outcome (it runs / it shows), so it
93
- // should auto-pass instead of triggering an outcome-clarification askcard.
94
- expect(canInferOutcome("general", "run the test suite")).toBe(true);
95
- expect(canInferOutcome("general", "echo harness-ok")).toBe(true);
96
- expect(canInferOutcome("general", "show the package.json scripts")).toBe(true);
97
- expect(canInferOutcome("general", "list the open ports")).toBe(true);
98
- });
99
- it("returns false for a general imperative verb with no object", () => {
100
- expect(canInferOutcome("general", "run")).toBe(false);
101
- expect(canInferOutcome("general", "execute ")).toBe(false);
102
- });
103
- it("returns false for a general non-imperative prompt", () => {
104
- expect(canInferOutcome("general", "the build is slow")).toBe(false);
105
- });
106
- it("returns true when prompt has error reference", () => {
107
- expect(canInferOutcome("debug", "fix the TypeError in login")).toBe(true);
108
- });
109
- it("returns true when prompt has file:line reference", () => {
110
- expect(canInferOutcome("debug", "fix auth.ts:42")).toBe(true);
111
- });
112
- it("returns true when prompt has target state verb", () => {
113
- expect(canInferOutcome("refactor", "should return a Promise")).toBe(true);
114
- });
115
- it("returns true when prompt has add pattern", () => {
116
- expect(canInferOutcome("generate", "add validation to login form")).toBe(true);
117
- });
118
- it("returns false for vague prompt with valid taskType", () => {
119
- expect(canInferOutcome("debug", "fix auth")).toBe(false);
120
- });
121
- });
122
- describe("countFileReferences()", () => {
123
- it("counts .ts and .tsx files", () => {
124
- expect(countFileReferences("fix login.ts and dashboard.tsx")).toBe(2);
125
- });
126
- it("returns 0 for no file refs", () => {
127
- expect(countFileReferences("fix the auth module")).toBe(0);
128
- });
129
- it("ignores non-code extensions", () => {
130
- expect(countFileReferences("see report.pdf")).toBe(0);
131
- });
132
- });
133
- describe("hasExplicitScope()", () => {
134
- it("detects src/ paths", () => {
135
- expect(hasExplicitScope("refactor src/auth/jwt.ts")).toBe(true);
136
- });
137
- it("detects lib/ paths", () => {
138
- expect(hasExplicitScope("update lib/utils")).toBe(true);
139
- });
140
- it("returns false for no path", () => {
141
- expect(hasExplicitScope("refactor the code")).toBe(false);
142
- });
143
- });
144
- describe("shouldAutoPass()", () => {
145
- it("auto-passes high-confidence + specific file + inferrable outcome", () => {
146
- expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix TypeError in src/auth/login.ts:42")).toBe(true);
147
- });
148
- it("rejects low confidence", () => {
149
- expect(shouldAutoPass({ confidence: 0.6, taskType: "debug", complexity: "low" }, "fix TypeError in login.ts:42")).toBe(false);
150
- });
151
- it("rejects vague prompt despite high confidence", () => {
152
- expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix auth")).toBe(false);
153
- });
154
- it("rejects high complexity", () => {
155
- expect(shouldAutoPass({ confidence: 0.9, taskType: "refactor", complexity: "high" }, "refactor src/auth/login.ts should return Promise")).toBe(false);
156
- });
157
- it("auto-passes with explicit scope path even without file extension", () => {
158
- expect(shouldAutoPass({ confidence: 0.9, taskType: "refactor", complexity: "medium" }, "refactor src/auth/ module to return Promises")).toBe(true);
159
- });
160
- // PIL-L6 fix
161
- it("auto-passes CI/build debug task even without file path (operational scope)", () => {
162
- expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix the ci fail — goal: green pipeline")).toBe(true);
163
- });
164
- // Image-scope fix — an image-analysis task is scoped to the image, not a file
165
- // path, so it should auto-pass when its outcome is inferrable.
166
- it("auto-passes an image-analysis task even without file path (image scope)", () => {
167
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, "analyze screenshot.png — goal: describe the layout")).toBe(true);
168
- });
169
- // External-info fix — a web-search task is scoped to the web, not a file path.
170
- it("auto-passes a web-search task even without file path (external-info scope)", () => {
171
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, "search the web for the vitest release date — goal: find the version")).toBe(true);
172
- });
173
- });
174
- describe("hasExternalInfoScope()", () => {
175
- it("detects web-search / external-info intent", () => {
176
- expect(hasExternalInfoScope("search the web for the latest vitest release notes")).toBe(true);
177
- expect(hasExternalInfoScope("google the error message")).toBe(true);
178
- expect(hasExternalInfoScope("what's the latest news on the framework")).toBe(true);
179
- expect(hasExternalInfoScope("summarize https://example.com/post")).toBe(true);
180
- });
181
- it("returns false for codebase tasks, including in-repo 'search'", () => {
182
- // Narrow: must NOT swallow a real code task. "search the codebase" and
183
- // "search feature" are codebase work and still deserve a scope askcard.
184
- expect(hasExternalInfoScope("search the codebase for usages of foo")).toBe(false);
185
- expect(hasExternalInfoScope("implement the search feature")).toBe(false);
186
- expect(hasExternalInfoScope("add the zod library to the auth module")).toBe(false);
187
- expect(hasExternalInfoScope("refactor the login flow")).toBe(false);
188
- });
189
- });
190
- describe("hasImageScope()", () => {
191
- it("detects an image file extension", () => {
192
- expect(hasImageScope("analyze diagram.png")).toBe(true);
193
- expect(hasImageScope("describe the layout of mock.jpg")).toBe(true);
194
- expect(hasImageScope("read chart.svg")).toBe(true);
195
- });
196
- it("detects a data:image URI and screenshot/photo nouns", () => {
197
- expect(hasImageScope("here is data:image/png;base64,AAAA")).toBe(true);
198
- expect(hasImageScope("take a screenshot and analyze it")).toBe(true);
199
- expect(hasImageScope("look at the photo")).toBe(true);
200
- });
201
- it("returns false for codebase tasks and ambiguous/overloaded words", () => {
202
- // Narrow on purpose: a false positive SUPPRESSES a legitimate scope
203
- // question, so overloaded words must NOT match.
204
- expect(hasImageScope("refactor the login flow")).toBe(false);
205
- expect(hasImageScope("add a logo to the header")).toBe(false); // "logo" excluded
206
- expect(hasImageScope("rebuild the docker image")).toBe(false); // bare "image" excluded
207
- expect(hasImageScope("look at the bigger picture")).toBe(false); // "picture" excluded
208
- });
209
- });
210
- describe("hasOperationalScope() — PIL-L6", () => {
2
+ import { detectNoClarifySignal, hasOperationalScope } from "../clarity-gate.js";
3
+ // Phase 2 (2026-06-16): the regex ASK gate (shouldAutoPass + canInferOutcome +
4
+ // the per-modality scope detectors) was removed the model now decides every
5
+ // clarification. Only two non-gating helpers survive: detectNoClarifySignal
6
+ // (explicit user consent) and hasOperationalScope (outcome-label polish).
7
+ describe("detectNoClarifySignal()", () => {
8
+ it("detects explicit no-clarify directives (EN)", () => {
9
+ expect(detectNoClarifySignal("just answer, don't ask me anything")).toBe(true);
10
+ expect(detectNoClarifySignal("answer directly without asking")).toBe(true);
11
+ expect(detectNoClarifySignal("no questions please, just do it")).toBe(true);
12
+ expect(detectNoClarifySignal("stop asking and give me the result")).toBe(true);
13
+ });
14
+ it("detects explicit no-clarify directives (VI + transliteration)", () => {
15
+ expect(detectNoClarifySignal("Đừng hỏi lại. Trả lời thẳng 3 câu hỏi.")).toBe(true);
16
+ expect(detectNoClarifySignal("không cần hỏi, trả lời luôn")).toBe(true);
17
+ expect(detectNoClarifySignal("tra loi thang dung hoi")).toBe(true);
18
+ });
19
+ it("does NOT match the explanation idiom 'don't ask me why'", () => {
20
+ expect(detectNoClarifySignal("it just works, don't ask me why")).toBe(false);
21
+ expect(detectNoClarifySignal("explain the auth flow")).toBe(false);
22
+ expect(detectNoClarifySignal("which part of the code should I read?")).toBe(false);
23
+ });
24
+ });
25
+ describe("hasOperationalScope()", () => {
211
26
  it("detects ci/build/test/action keywords", () => {
212
27
  expect(hasOperationalScope("fix ci fail")).toBe(true);
213
28
  expect(hasOperationalScope("the build is broken")).toBe(true);
@@ -219,10 +34,4 @@ describe("hasOperationalScope() — PIL-L6", () => {
219
34
  expect(hasOperationalScope("explain hooks")).toBe(false);
220
35
  });
221
36
  });
222
- describe("canInferOutcome() — explicit goal (PIL-L6)", () => {
223
- it("returns true when prompt names an explicit goal", () => {
224
- expect(canInferOutcome("debug", "goal: pipeline green")).toBe(true);
225
- expect(canInferOutcome("debug", "mong muốn: tests passing")).toBe(true);
226
- });
227
- });
228
37
  //# sourceMappingURL=clarity-gate.test.js.map
@@ -1,5 +1,5 @@
1
1
  import { afterEach, beforeEach, describe, expect, it } from "vitest";
2
- import { getAutoPassThreshold, getMaxInterviewQuestions, isDiscoveryEnabled, isUnifiedPilEnabled } from "../config.js";
2
+ import { getMaxInterviewQuestions, isDiscoveryEnabled, isUnifiedPilEnabled } from "../config.js";
3
3
  describe("isUnifiedPilEnabled", () => {
4
4
  const orig = process.env.MUONROI_PIL_UNIFIED;
5
5
  beforeEach(() => {
@@ -43,22 +43,6 @@ describe("isDiscoveryEnabled()", () => {
43
43
  delete process.env.MUONROI_PIL_DISCOVERY;
44
44
  });
45
45
  });
46
- describe("getAutoPassThreshold()", () => {
47
- it("returns 0.85 by default", () => {
48
- delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
49
- expect(getAutoPassThreshold()).toBe(0.85);
50
- });
51
- it("respects env override in range", () => {
52
- process.env.MUONROI_PIL_AUTOPASS_THRESHOLD = "0.7";
53
- expect(getAutoPassThreshold()).toBe(0.7);
54
- delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
55
- });
56
- it("clamps out-of-range to default", () => {
57
- process.env.MUONROI_PIL_AUTOPASS_THRESHOLD = "1.5";
58
- expect(getAutoPassThreshold()).toBe(0.85);
59
- delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
60
- });
61
- });
62
46
  describe("getMaxInterviewQuestions()", () => {
63
47
  it("returns 3 by default", () => {
64
48
  delete process.env.MUONROI_PIL_MAX_QUESTIONS;