npm - muonroi-cli - Versions diffs - 1.4.1 → 1.6.0 - Mend

muonroi-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

package/LICENSE +21 -21
package/README.md +122 -122
package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
package/dist/src/agent-harness/mock-model.d.ts +11 -0
package/dist/src/agent-harness/mock-model.js +21 -0
package/dist/src/cli/cost-forensics.js +12 -12
package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
package/dist/src/council/clarifier.js +9 -1
package/dist/src/council/debate.js +5 -1
package/dist/src/council/decisions-lock.js +3 -3
package/dist/src/council/index.js +12 -5
package/dist/src/council/leader.d.ts +0 -17
package/dist/src/council/leader.js +22 -15
package/dist/src/council/planner.js +1 -1
package/dist/src/council/prompts.js +63 -57
package/dist/src/council/types.d.ts +7 -0
package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
package/dist/src/ee/artifact-cache.d.ts +56 -0
package/dist/src/ee/artifact-cache.js +155 -0
package/dist/src/ee/artifact-cache.test.d.ts +1 -0
package/dist/src/ee/artifact-cache.test.js +69 -0
package/dist/src/ee/auth.d.ts +9 -0
package/dist/src/ee/auth.js +19 -0
package/dist/src/ee/ee-onboarding.d.ts +5 -0
package/dist/src/ee/ee-onboarding.js +76 -0
package/dist/src/ee/search.js +7 -5
package/dist/src/ee/search.test.d.ts +1 -0
package/dist/src/ee/search.test.js +23 -0
package/dist/src/generated/version.d.ts +1 -1
package/dist/src/generated/version.js +1 -1
package/dist/src/headless/output.js +6 -4
package/dist/src/headless/output.test.js +4 -3
package/dist/src/index.js +20 -1
package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
package/dist/src/mcp/auto-setup.js +56 -2
package/dist/src/mcp/client-pool.d.ts +46 -0
package/dist/src/mcp/client-pool.js +212 -0
package/dist/src/mcp/oauth-callback.js +2 -2
package/dist/src/mcp/parse-headers.test.js +14 -14
package/dist/src/mcp/runtime.d.ts +28 -0
package/dist/src/mcp/runtime.js +117 -51
package/dist/src/mcp/self-verify-runner.d.ts +14 -0
package/dist/src/mcp/self-verify-runner.js +38 -0
package/dist/src/mcp/setup-guide-text.d.ts +9 -0
package/dist/src/mcp/setup-guide-text.js +84 -0
package/dist/src/mcp/smart-filter.js +49 -0
package/dist/src/mcp/smoke.test.js +43 -43
package/dist/src/mcp/tools-server.d.ts +7 -0
package/dist/src/mcp/tools-server.js +19 -22
package/dist/src/models/catalog.json +349 -349
package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
package/dist/src/ops/doctor.d.ts +3 -2
package/dist/src/ops/doctor.js +47 -11
package/dist/src/ops/doctor.test.js +4 -3
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
package/dist/src/orchestrator/batch-turn-runner.js +7 -11
package/dist/src/orchestrator/compaction.d.ts +2 -0
package/dist/src/orchestrator/compaction.js +14 -1
package/dist/src/orchestrator/compaction.test.js +25 -1
package/dist/src/orchestrator/message-processor.js +72 -32
package/dist/src/orchestrator/orchestrator.js +26 -0
package/dist/src/orchestrator/prompts.d.ts +51 -0
package/dist/src/orchestrator/prompts.js +257 -134
package/dist/src/orchestrator/scope-ceiling.js +6 -1
package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
package/dist/src/orchestrator/scope-reminder.js +16 -0
package/dist/src/orchestrator/scope-reminder.test.js +22 -1
package/dist/src/orchestrator/stream-runner.js +23 -15
package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
package/dist/src/orchestrator/subagent-compactor.js +30 -8
package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
package/dist/src/pil/__tests__/config.test.js +1 -17
package/dist/src/pil/__tests__/discovery.test.js +144 -11
package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
package/dist/src/pil/agent-operating-contract.d.ts +1 -1
package/dist/src/pil/agent-operating-contract.js +2 -0
package/dist/src/pil/agent-operating-contract.test.js +7 -2
package/dist/src/pil/cheap-model-playbook.js +35 -35
package/dist/src/pil/cheap-model-workbooks.js +16 -13
package/dist/src/pil/clarity-gate.d.ts +21 -19
package/dist/src/pil/clarity-gate.js +26 -153
package/dist/src/pil/config.d.ts +9 -1
package/dist/src/pil/config.js +15 -4
package/dist/src/pil/discovery.js +211 -136
package/dist/src/pil/layer1-intent.d.ts +12 -0
package/dist/src/pil/layer1-intent.js +283 -38
package/dist/src/pil/layer1-intent.test.js +210 -4
package/dist/src/pil/layer16-clarity.d.ts +25 -11
package/dist/src/pil/layer16-clarity.js +19 -306
package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
package/dist/src/pil/layer3-ee-injection.js +96 -4
package/dist/src/pil/layer4-gsd.js +18 -6
package/dist/src/pil/layer6-output.d.ts +2 -0
package/dist/src/pil/layer6-output.js +151 -25
package/dist/src/pil/llm-classify.d.ts +26 -0
package/dist/src/pil/llm-classify.js +34 -5
package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
package/dist/src/pil/native-capabilities-workbook.js +82 -76
package/dist/src/pil/pipeline.js +15 -9
package/dist/src/pil/schema.d.ts +8 -0
package/dist/src/pil/schema.js +12 -1
package/dist/src/pil/task-tier-map.js +4 -0
package/dist/src/pil/types.d.ts +11 -1
package/dist/src/product-loop/done-gate.js +3 -3
package/dist/src/product-loop/loop-driver.js +18 -18
package/dist/src/product-loop/progress-snapshot.js +4 -4
package/dist/src/providers/auth/gemini-oauth.js +6 -15
package/dist/src/providers/auth/grok-oauth.js +6 -15
package/dist/src/providers/auth/openai-oauth.js +6 -15
package/dist/src/providers/mcp-vision-bridge.js +48 -48
package/dist/src/reporter/index.js +1 -1
package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
package/dist/src/scaffold/bb-quality-gate.js +5 -5
package/dist/src/scaffold/continuation-prompt.js +60 -60
package/dist/src/scaffold/init-new.js +453 -453
package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
package/dist/src/self-qa/agentic-loop.js +24 -19
package/dist/src/self-qa/spec-emitter.js +26 -23
package/dist/src/storage/__tests__/migrations.test.js +2 -2
package/dist/src/storage/interaction-log.js +5 -5
package/dist/src/storage/migrations.js +122 -122
package/dist/src/storage/sessions.js +42 -42
package/dist/src/storage/transcript.js +91 -84
package/dist/src/storage/usage.js +14 -14
package/dist/src/storage/workspaces.js +12 -12
package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
package/dist/src/tools/__tests__/native-tools.test.js +53 -0
package/dist/src/tools/git-safety.d.ts +61 -0
package/dist/src/tools/git-safety.js +141 -0
package/dist/src/tools/git-safety.test.d.ts +1 -0
package/dist/src/tools/git-safety.test.js +111 -0
package/dist/src/tools/native-tools.d.ts +31 -0
package/dist/src/tools/native-tools.js +273 -0
package/dist/src/tools/registry-ee-query.test.js +18 -1
package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
package/dist/src/tools/registry-git-safety.test.js +92 -0
package/dist/src/tools/registry.js +52 -6
package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
package/dist/src/ui/app.js +0 -0
package/dist/src/ui/components/message-view.js +4 -1
package/dist/src/ui/components/structured-response-view.js +7 -3
package/dist/src/ui/components/tool-group.js +7 -1
package/dist/src/ui/markdown-render.d.ts +41 -0
package/dist/src/ui/markdown-render.js +223 -0
package/dist/src/ui/markdown.d.ts +10 -0
package/dist/src/ui/markdown.js +12 -35
package/dist/src/ui/slash/council-inspect.js +4 -4
package/dist/src/ui/slash/export.js +4 -4
package/dist/src/ui/utils/text.d.ts +8 -0
package/dist/src/ui/utils/text.js +16 -0
package/dist/src/ui/utils/text.test.d.ts +1 -0
package/dist/src/ui/utils/text.test.js +23 -0
package/dist/src/usage/ledger.js +48 -15
package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
package/dist/src/utils/clipboard-image.js +23 -23
package/dist/src/utils/open-url.d.ts +56 -0
package/dist/src/utils/open-url.js +58 -0
package/dist/src/utils/open-url.test.d.ts +1 -0
package/dist/src/utils/open-url.test.js +86 -0
package/dist/src/utils/settings.d.ts +12 -0
package/dist/src/utils/settings.js +48 -0
package/dist/src/utils/side-question.js +2 -2
package/dist/src/utils/skills.js +3 -3
package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
package/dist/src/verify/environment.js +2 -1
package/package.json +1 -1
package/dist/src/pil/layer16-clarity.test.js +0 -31
/package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0

package/dist/src/orchestrator/stream-runner.js CHANGED Viewed

@@ -27,7 +27,9 @@
 //   - F1 (sub-agent cumulative cap)         — wrapToolSetWithCap
 //   - siliconflow reasoning-strip           — taskCaps.sanitizeHistory
 import { stepCountIs, streamText } from "ai";
-import { buildMcpToolSet } from "../mcp/runtime.js";
+import { recordArtifact } from "../ee/artifact-cache.js";
+import { getDefaultEEClient } from "../ee/intercept.js";
+import { acquireMcpTools } from "../mcp/client-pool.js";
 import { normalizeModelId } from "../models/registry.js";
 import { cheapModelShellLine, injectCheapModelPlaybook, injectCheapModelShellDirective, shouldInjectCheapModelPlaybook, } from "../pil/cheap-model-playbook.js";
 import { injectCheapModelWorkbook, shouldInjectCheapModelWorkbook, subagentTaskType, } from "../pil/cheap-model-workbooks.js";
@@ -38,6 +40,7 @@ import { wireDebug } from "../providers/wire-debug.js";
 import { BashTool } from "../tools/bash.js";
 import { createBuiltinTools } from "../tools/registry.js";
 import { statusBarStore } from "../ui/status-bar/store.js";
+import { openUrl } from "../utils/open-url.js";
 import { getCurrentShellSettings, getProviderStallTimeoutMs, getSubAgentBudgetChars, getSubAgentCompactKeepLast, getSubAgentCompactThresholdChars, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
 import { resolveShell } from "../utils/shell.js";
 import { prepareVerifySandbox } from "../verify/entrypoint.js";
@@ -50,7 +53,6 @@ import { repairToolCallHook } from "./repair-tool-call.js";
 import { classifyStreamError } from "./retry-classifier.js";
 import { incSessionStep, resolveCeiling } from "./scope-ceiling.js";
 import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
-import { getDefaultEEClient } from "../ee/intercept.js";
 import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
 import { wrapToolSetWithCap } from "./sub-agent-cap.js";
 import { compactSubAgentMessages } from "./subagent-compactor.js";
@@ -211,17 +213,12 @@ export class StreamRunner {
             : childWithPlaybook;
         onActivity?.(initialDetail);
         if (childMode === "agent" && taskCaps.supportsClientTools(childRuntime.modelInfo)) {
-            const mcpBundle = await buildMcpToolSet(loadMcpServers(), {
+            const mcpBundle = await acquireMcpTools(loadMcpServers(), {
                 onOAuthRequired: (_serverId, url) => {
-                    const urlStr = url.toString();
-                    import("child_process").then(({ exec }) => {
-                        const cmd = process.platform === "win32"
-                            ? `start "" "${urlStr}"`
-                            : process.platform === "darwin"
-                                ? `open "${urlStr}"`
-                                : `xdg-open "${urlStr}"`;
-                        exec(cmd);
-                    });
+                    // Server-supplied URL is untrusted — openUrl validates the scheme
+                    // and spawns via execFile (no shell), closing the command-injection
+                    // vector the old exec() opener had.
+                    openUrl(url);
                 },
             });
             closeMcp = mcpBundle.close;
@@ -407,18 +404,29 @@ export class StreamRunner {
                     const joined = texts.join(" ");
                     const mKeep = joined.match(/KEEP_TOOL_IDS\s*[:=]\s*([a-z0-9_, -]+)/i);
                     if (mKeep) {
-                        subKeepToolIds = mKeep[1].split(/[,\s]+/).map((s) => s.trim()).filter(Boolean);
+                        subKeepToolIds = mKeep[1]
+                            .split(/[,\s]+/)
+                            .map((s) => s.trim())
+                            .filter(Boolean);
                         break;
                     }
                 }
                 // Idea 4 persist for sub-agent elisions (best-effort; may lack full session but EE can still index the artifact content).
                 const persistSubArtifact = (toolCallId, toolName, fullContent, reason) => {
+                    // Local-first durable cache so ee_query rehydrates even when EE is down.
+                    recordArtifact(toolCallId, toolName, fullContent);
                     try {
                         getDefaultEEClient()
-                            .extract({ transcript: fullContent.slice(0, 4000), projectPath: process.cwd(), meta: { source: "tool-artifact", toolCallId, toolName, reason } }, AbortSignal.timeout(600))
+                            .extract({
+                            transcript: fullContent.slice(0, 4000),
+                            projectPath: process.cwd(),
+                            meta: { source: "tool-artifact", toolCallId, toolName, reason },
+                        }, AbortSignal.timeout(600))
                             .catch(() => { });
                     }
-                    catch { /* fail-open */ }
+                    catch {
+                        /* fail-open */
+                    }
                 };
                 const compacted = compactSubAgentMessages(stripped, {
                     thresholdChars: compactThreshold,

package/dist/src/orchestrator/subagent-compactor.d.ts CHANGED Viewed

@@ -106,8 +106,11 @@ export interface SubAgentCompactorOptions {
 export declare const CHARS_PER_TOKEN = 4;
 export declare const SUBAGENT_COMPACT_DEFAULT_THRESHOLD = 80000;
 export declare const SUBAGENT_COMPACT_DEFAULT_KEEP_LAST = 3;
-/** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast. */
-export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp", "bash"];
+/** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast.
+ * Extended for meta self-eval: ee_query / usage_forensics / selfverify_* are the exact artifacts
+ * the native contract + native-capabilities tell the agent to rely on for "task finished?" and
+ * rehydrate during long meta conversations about CLI/PIL/compaction/EE. */
+export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp", "bash", "ee_query", "usage_forensics", "selfverify_start", "selfverify_result", "selfverify_status"];
 /**
  * Heuristic: keep full (no stub) for high-signal tool results.
  * Signals: allowlist tool + (error/todo/plan/keyfile/large output or explicit keep list).
@@ -116,8 +119,14 @@ export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp",
 export declare function isHighValueToolResult(toolName: string, preview: string, explicitKeepIds?: Set<string>, toolCallId?: string): boolean;
 export declare function cumulativeMessageChars(messages: ReadonlyArray<ModelMessage>): number;
 /**
- * Compact a sub-agent message array in place-like fashion. Returns a NEW
- * array; the input is not mutated. Below the threshold the original array
- * reference is returned for cheap identity comparison in tests.
+ * Compact a sub-agent message array in place-like fashion. The input is never
+ * mutated. When compaction actually elides something a NEW array is returned.
+ * On a no-op (below threshold, or too few tool turns to skip) the ORIGINAL input
+ * array is returned BY REFERENCE so callers can detect "did not compact this
+ * step" via identity (`compacted === input`). The B4 wiring in
+ * message-processor.ts (pre-compaction warning + compaction note gating) and the
+ * sub-agent wiring in stream-runner.ts both rely on this contract — returning a
+ * fresh slice on a no-op silently made the warning dead and the note fire every
+ * step.
  */
 export declare function compactSubAgentMessages(messages: ReadonlyArray<ModelMessage>, opts?: SubAgentCompactorOptions): ModelMessage[];

package/dist/src/orchestrator/subagent-compactor.js CHANGED Viewed

@@ -58,8 +58,21 @@ export const SUBAGENT_COMPACT_DEFAULT_THRESHOLD = 80_000;
 export const SUBAGENT_COMPACT_DEFAULT_KEEP_LAST = 3;
 const DEFAULT_OUTPUT_PREVIEW_CHARS = 200;
 const DEFAULT_LABEL = "sub-agent";
-/** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast. */
-export const IMPORTANT_TOOL_NAMES = ["read_file", "grep", "lsp", "bash"];
+/** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast.
+ * Extended for meta self-eval: ee_query / usage_forensics / selfverify_* are the exact artifacts
+ * the native contract + native-capabilities tell the agent to rely on for "task finished?" and
+ * rehydrate during long meta conversations about CLI/PIL/compaction/EE. */
+export const IMPORTANT_TOOL_NAMES = [
+    "read_file",
+    "grep",
+    "lsp",
+    "bash",
+    "ee_query",
+    "usage_forensics",
+    "selfverify_start",
+    "selfverify_result",
+    "selfverify_status",
+];
 /**
  * Heuristic: keep full (no stub) for high-signal tool results.
  * Signals: allowlist tool + (error/todo/plan/keyfile/large output or explicit keep list).
@@ -268,7 +281,9 @@ function rewriteOlderToolMessage(msg, previewChars, label, keepToolIds, persistA
             try {
                 persistArtifact(toolCallId, tr.toolName, rawPreview, "elided-by-compactor");
             }
-            catch { /* fail-open */ }
+            catch {
+                /* fail-open */
+            }
         }
         return {
             type: "tool-result",
@@ -282,9 +297,15 @@ function rewriteOlderToolMessage(msg, previewChars, label, keepToolIds, persistA
     return { ...msg, content: rewritten };
 }
 /**
- * Compact a sub-agent message array in place-like fashion. Returns a NEW
- * array; the input is not mutated. Below the threshold the original array
- * reference is returned for cheap identity comparison in tests.
+ * Compact a sub-agent message array in place-like fashion. The input is never
+ * mutated. When compaction actually elides something a NEW array is returned.
+ * On a no-op (below threshold, or too few tool turns to skip) the ORIGINAL input
+ * array is returned BY REFERENCE so callers can detect "did not compact this
+ * step" via identity (`compacted === input`). The B4 wiring in
+ * message-processor.ts (pre-compaction warning + compaction note gating) and the
+ * sub-agent wiring in stream-runner.ts both rely on this contract — returning a
+ * fresh slice on a no-op silently made the warning dead and the note fire every
+ * step.
  */
 export function compactSubAgentMessages(messages, opts = {}) {
     const resolved = resolveOpts(opts);
@@ -299,11 +320,12 @@ export function compactSubAgentMessages(messages, opts = {}) {
     // window utilization. Falls back to static char threshold + keepLast
     // when no contextWindowTokens supplied (preserves old behaviour).
     const { effectiveThresholdChars, effectiveKeepLastTurns } = computeDynamicParams(total, resolved);
+    // No-op: return the input BY REFERENCE (contract above) so `compacted === input`.
     if (total < effectiveThresholdChars)
-        return messages.slice();
+        return messages;
     const keepFrom = findKeepFromIndex(messages, effectiveKeepLastTurns);
     if (keepFrom <= 0)
-        return messages.slice();
+        return messages;
     // Walk older messages; rewrite fresh tool results into stubs, super-shrink
     // already-stubbed results (F1), and strip args off older assistant
     // tool-call shells (F1). The 1:1 assistant↔tool pairing required by the AI

package/dist/src/orchestrator/subagent-compactor.spec.js CHANGED Viewed

@@ -64,6 +64,24 @@ describe("subagent-compactor: compactSubAgentMessages", () => {
         // No tool-result rewrite happened — output object identity per part preserved.
         expect(out[3]).toBe(msgs[3]);
     });
+    it("returns the SAME array reference on a no-op below threshold (compacted===input contract)", () => {
+        // Callers (message-processor B4 prepareStep:1840/1908/1914) detect "did NOT
+        // compact this step" via `compacted === stripped`. The docstring promises the
+        // original ref on a no-op; returning a fresh slice silently broke that —
+        // making the pre-compaction warning dead and the compaction note fire every
+        // step. Lock the identity contract.
+        const msgs = buildHistory(2, 5); // below threshold
+        expect(compactSubAgentMessages(msgs)).toBe(msgs);
+    });
+    it("returns a NEW array when compaction actually elides (compacted!==input)", () => {
+        const msgs = buildHistory(10, 10); // ~100kb > threshold
+        for (const m of msgs) {
+            if (m.role === "tool" && Array.isArray(m.content)) {
+                m.content[0].toolName = "other_tool"; // force low-value so it elides
+            }
+        }
+        expect(compactSubAgentMessages(msgs)).not.toBe(msgs);
+    });
     it("compacts when cumulative chars exceed threshold", () => {
         const msgs = buildHistory(10, 10); // ~100kb of tool output
         // Neutralize to test pure size-based elision (high-value keep would reduce savings).

package/dist/src/orchestrator/text-tool-call-detector.test.js CHANGED Viewed

@@ -6,10 +6,10 @@ describe("detectTextEmittedToolCall", () => {
         // destructive edit, deepseek emitted this as plain assistant text to
         // re-read the file — the CLI returned it as the final answer and the turn
         // was silently wasted with a broken file left behind.
-        const text = `Let me restore the file properly.
-<read_file>
-<path>src/app/screens/story-list/story-list.component.html</path>
+        const text = `Let me restore the file properly.
+<read_file>
+<path>src/app/screens/story-list/story-list.component.html</path>
 </read_file>`;
         const r = detectTextEmittedToolCall(text);
         expect(r.detected).toBe(true);
@@ -43,10 +43,10 @@ describe("detectTextEmittedToolCall", () => {
         // Live: storyflow_ui explore-A/B, deepseek T3 (session 799f0508e830) emitted
         // this as text and made no real tool call → empty, silent turn. The generic
         // <invoke matcher misses it because `<` is followed by the U+FF5C sentinel.
-        const text = `<｜｜DSML｜｜tool_calls>
-<｜｜DSML｜｜invoke name="read_file">
-<｜｜DSML｜｜parameter name="file_path" string="true">src/app/foo.html</｜｜DSML｜｜parameter>
-</｜｜DSML｜｜invoke>
+        const text = `<｜｜DSML｜｜tool_calls>
+<｜｜DSML｜｜invoke name="read_file">
+<｜｜DSML｜｜parameter name="file_path" string="true">src/app/foo.html</｜｜DSML｜｜parameter>
+</｜｜DSML｜｜invoke>
 </｜｜DSML｜｜tool_calls>`;
         const r = detectTextEmittedToolCall(text);
         expect(r.detected).toBe(true);
@@ -60,11 +60,11 @@ describe("detectTextEmittedToolCall", () => {
         expect(detectTextEmittedToolCall("I edited the file and ran the tests; everything passes.").detected).toBe(false);
     });
     it("parseDsmlToolCalls extracts name + args from the DSML block (for targeted re-steer)", () => {
-        const text = `<｜｜DSML｜｜tool_calls>
-<｜｜DSML｜｜invoke name="read_file">
-<｜｜DSML｜｜parameter name="file_path" string="true">src/app/foo.html</｜｜DSML｜｜parameter>
-<｜｜DSML｜｜parameter name="start_line" string="false">25</｜｜DSML｜｜parameter>
-</｜｜DSML｜｜invoke>
+        const text = `<｜｜DSML｜｜tool_calls>
+<｜｜DSML｜｜invoke name="read_file">
+<｜｜DSML｜｜parameter name="file_path" string="true">src/app/foo.html</｜｜DSML｜｜parameter>
+<｜｜DSML｜｜parameter name="start_line" string="false">25</｜｜DSML｜｜parameter>
+</｜｜DSML｜｜invoke>
 </｜｜DSML｜｜tool_calls>`;
         const calls = parseDsmlToolCalls(text);
         expect(calls).toHaveLength(1);

package/dist/src/pil/__tests__/clarity-gate.test.js CHANGED Viewed

@@ -1,213 +1,28 @@
 import { describe, expect, it } from "vitest";
-import { canInferOutcome, countFileReferences, hasExplicitScope, hasExternalInfoScope, hasImageScope, hasOperationalScope, hasSelfContainedComputationScope, hasWholeRepoScope, shouldAutoPass, } from "../clarity-gate.js";
-describe("hasWholeRepoScope()", () => {
-    it("detects whole-repo / whole-project intent (EN + VI)", () => {
-        // The repo-eval prompt that fired a nonsensical "which part?" askcard.
-        expect(hasWholeRepoScope("đánh giá repo muonroi-cli này: điểm mạnh, điểm yếu")).toBe(true);
-        expect(hasWholeRepoScope("evaluate the repo: strengths and weaknesses")).toBe(true);
-        expect(hasWholeRepoScope("review the whole codebase")).toBe(true);
-        expect(hasWholeRepoScope("audit the entire project")).toBe(true);
-        expect(hasWholeRepoScope("phân tích toàn bộ dự án")).toBe(true);
-        expect(hasWholeRepoScope("give me an overview of the repository")).toBe(true);
-        // summarize/overview verbs (gap found in the deepseek session probe: "tóm tắt
-        // repo này" still fired the scope askcard because the verb list lacked it).
-        expect(hasWholeRepoScope("tóm tắt nhanh repo này")).toBe(true);
-        expect(hasWholeRepoScope("summarize the repository")).toBe(true);
-        expect(hasWholeRepoScope("give me a summary of the project")).toBe(true);
-    });
-    it("does NOT fire on summarize/review of a narrow target", () => {
-        expect(hasWholeRepoScope("summarize the login function")).toBe(false);
-        expect(hasWholeRepoScope("tóm tắt hàm xử lý auth")).toBe(false);
-    });
-    it("does NOT fire on narrow tasks that merely mention a repo/project", () => {
-        // "this repo" without a wholeness/eval signal must still be scoped.
-        expect(hasWholeRepoScope("add a logout button to this repo")).toBe(false);
-        expect(hasWholeRepoScope("fix the login bug in the project")).toBe(false);
-        expect(hasWholeRepoScope("implement the search feature")).toBe(false);
-        expect(hasWholeRepoScope("refactor the auth module")).toBe(false);
-    });
-    it("whole-repo scope no longer blocks auto-pass (was: scope-gap → false)", () => {
-        // With an inferable outcome (explicit goal), the ONLY remaining blocker for a
-        // repo-wide prompt was the scope gap. hasWholeRepoScope clears it.
-        const prompt = "review the entire codebase — goal: a report of strengths and weaknesses";
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, prompt)).toBe(true);
-        // Control: same shape but NOT repo-wide still fails on the scope gap.
-        const narrow = "review the system — goal: a report of strengths and weaknesses";
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, narrow)).toBe(false);
-    });
-});
-describe("hasSelfContainedComputationScope()", () => {
-    it("detects an inline-data computation prompt (the operand is in the prompt, not the codebase)", () => {
-        // Live drive (deepseek-vs-grok A/B, session probe 2026-06-05): "Compute
-        // f([3,1,2]) where f sorts the list ascending then returns the sum of the
-        // first two elements." classified taskType=analyze (regex:read matched the
-        // bare word "list") fired the codebase-scope askcard "Which part of the
-        // codebase should this target?" — nonsensical for a self-contained math
-        // problem whose input data is supplied inline. Symmetric to image/web/
-        // operational scope guards.
-        expect(hasSelfContainedComputationScope("Compute f([3,1,2]) where f sorts the list ascending then returns the sum of the first two elements.")).toBe(true);
-        expect(hasSelfContainedComputationScope("Given the array [5, 2, 8, 1, 9], what is the second largest element?")).toBe(true);
-        expect(hasSelfContainedComputationScope("What is the median of [10, 4, 7]?")).toBe(true);
-        expect(hasSelfContainedComputationScope('Reverse the list ["a", "b", "c"] and return it.')).toBe(true);
-    });
-    it("does NOT fire without an inline data literal", () => {
-        // The framing verb alone is not enough — a codebase task can say "compute"
-        // ("compute the hash in the auth module"). Only an inline operand qualifies.
-        expect(hasSelfContainedComputationScope("compute the cache key in the auth module")).toBe(false);
-        expect(hasSelfContainedComputationScope("sort the users table by created_at")).toBe(false);
-        expect(hasSelfContainedComputationScope("what is the second largest element of the array")).toBe(false);
-    });
-    it("does NOT fire on a real codebase task that merely contains an array literal (no compute framing)", () => {
-        // Narrowness guard: the literal alone is not enough. A feature/debug task
-        // that embeds a literal but is scoped to the codebase must KEEP its scope
-        // askcard. Requires BOTH an inline literal AND computation framing.
-        expect(hasSelfContainedComputationScope("add the items [1, 2, 3] to the cart in the checkout flow")).toBe(false);
-        expect(hasSelfContainedComputationScope("fix the bug where parseRange([1, 5]) returns the wrong values")).toBe(false);
-        expect(hasSelfContainedComputationScope("set the default retry delays to [100, 200, 400] in the config")).toBe(false);
-    });
-    it("does NOT fire on bracketed file-name lists (those are codebase-scoped)", () => {
-        // [a.ts, b.ts] is a list of files, not data — must stay codebase-scoped.
-        expect(hasSelfContainedComputationScope("compare the exports of [auth.ts, session.ts]")).toBe(false);
-    });
-    it("self-contained computation no longer blocks auto-pass (was: scope-gap → false)", () => {
-        // With an inferable outcome ("return the result"), the ONLY remaining blocker
-        // for an inline-data computation prompt was the scope gap.
-        // hasSelfContainedComputationScope clears it.
-        const prompt = "Compute the sum of the first two sorted elements of [3, 1, 2] and return the result.";
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, prompt)).toBe(true);
-        // Control: same outcome-inferable shape but NO inline literal still fails on
-        // the scope gap (a real codebase computation must still be scoped).
-        const codeTask = "Compute the largest element of the users array and return it.";
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, codeTask)).toBe(false);
-    });
-});
-describe("canInferOutcome()", () => {
-    it("returns false for null taskType", () => {
-        expect(canInferOutcome(null, "do something")).toBe(false);
-    });
-    it("returns false for general taskType", () => {
-        expect(canInferOutcome("general", "fix stuff")).toBe(false);
-    });
-    it("returns true for a general taskType that is a direct imperative command", () => {
-        // A direct command has a self-evident outcome (it runs / it shows), so it
-        // should auto-pass instead of triggering an outcome-clarification askcard.
-        expect(canInferOutcome("general", "run the test suite")).toBe(true);
-        expect(canInferOutcome("general", "echo harness-ok")).toBe(true);
-        expect(canInferOutcome("general", "show the package.json scripts")).toBe(true);
-        expect(canInferOutcome("general", "list the open ports")).toBe(true);
-    });
-    it("returns false for a general imperative verb with no object", () => {
-        expect(canInferOutcome("general", "run")).toBe(false);
-        expect(canInferOutcome("general", "execute   ")).toBe(false);
-    });
-    it("returns false for a general non-imperative prompt", () => {
-        expect(canInferOutcome("general", "the build is slow")).toBe(false);
-    });
-    it("returns true when prompt has error reference", () => {
-        expect(canInferOutcome("debug", "fix the TypeError in login")).toBe(true);
-    });
-    it("returns true when prompt has file:line reference", () => {
-        expect(canInferOutcome("debug", "fix auth.ts:42")).toBe(true);
-    });
-    it("returns true when prompt has target state verb", () => {
-        expect(canInferOutcome("refactor", "should return a Promise")).toBe(true);
-    });
-    it("returns true when prompt has add pattern", () => {
-        expect(canInferOutcome("generate", "add validation to login form")).toBe(true);
-    });
-    it("returns false for vague prompt with valid taskType", () => {
-        expect(canInferOutcome("debug", "fix auth")).toBe(false);
-    });
-});
-describe("countFileReferences()", () => {
-    it("counts .ts and .tsx files", () => {
-        expect(countFileReferences("fix login.ts and dashboard.tsx")).toBe(2);
-    });
-    it("returns 0 for no file refs", () => {
-        expect(countFileReferences("fix the auth module")).toBe(0);
-    });
-    it("ignores non-code extensions", () => {
-        expect(countFileReferences("see report.pdf")).toBe(0);
-    });
-});
-describe("hasExplicitScope()", () => {
-    it("detects src/ paths", () => {
-        expect(hasExplicitScope("refactor src/auth/jwt.ts")).toBe(true);
-    });
-    it("detects lib/ paths", () => {
-        expect(hasExplicitScope("update lib/utils")).toBe(true);
-    });
-    it("returns false for no path", () => {
-        expect(hasExplicitScope("refactor the code")).toBe(false);
-    });
-});
-describe("shouldAutoPass()", () => {
-    it("auto-passes high-confidence + specific file + inferrable outcome", () => {
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix TypeError in src/auth/login.ts:42")).toBe(true);
-    });
-    it("rejects low confidence", () => {
-        expect(shouldAutoPass({ confidence: 0.6, taskType: "debug", complexity: "low" }, "fix TypeError in login.ts:42")).toBe(false);
-    });
-    it("rejects vague prompt despite high confidence", () => {
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix auth")).toBe(false);
-    });
-    it("rejects high complexity", () => {
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "refactor", complexity: "high" }, "refactor src/auth/login.ts should return Promise")).toBe(false);
-    });
-    it("auto-passes with explicit scope path even without file extension", () => {
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "refactor", complexity: "medium" }, "refactor src/auth/ module to return Promises")).toBe(true);
-    });
-    // PIL-L6 fix
-    it("auto-passes CI/build debug task even without file path (operational scope)", () => {
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix the ci fail — goal: green pipeline")).toBe(true);
-    });
-    // Image-scope fix — an image-analysis task is scoped to the image, not a file
-    // path, so it should auto-pass when its outcome is inferrable.
-    it("auto-passes an image-analysis task even without file path (image scope)", () => {
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, "analyze screenshot.png — goal: describe the layout")).toBe(true);
-    });
-    // External-info fix — a web-search task is scoped to the web, not a file path.
-    it("auto-passes a web-search task even without file path (external-info scope)", () => {
-        expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, "search the web for the vitest release date — goal: find the version")).toBe(true);
-    });
-});
-describe("hasExternalInfoScope()", () => {
-    it("detects web-search / external-info intent", () => {
-        expect(hasExternalInfoScope("search the web for the latest vitest release notes")).toBe(true);
-        expect(hasExternalInfoScope("google the error message")).toBe(true);
-        expect(hasExternalInfoScope("what's the latest news on the framework")).toBe(true);
-        expect(hasExternalInfoScope("summarize https://example.com/post")).toBe(true);
-    });
-    it("returns false for codebase tasks, including in-repo 'search'", () => {
-        // Narrow: must NOT swallow a real code task. "search the codebase" and
-        // "search feature" are codebase work and still deserve a scope askcard.
-        expect(hasExternalInfoScope("search the codebase for usages of foo")).toBe(false);
-        expect(hasExternalInfoScope("implement the search feature")).toBe(false);
-        expect(hasExternalInfoScope("add the zod library to the auth module")).toBe(false);
-        expect(hasExternalInfoScope("refactor the login flow")).toBe(false);
-    });
-});
-describe("hasImageScope()", () => {
-    it("detects an image file extension", () => {
-        expect(hasImageScope("analyze diagram.png")).toBe(true);
-        expect(hasImageScope("describe the layout of mock.jpg")).toBe(true);
-        expect(hasImageScope("read chart.svg")).toBe(true);
-    });
-    it("detects a data:image URI and screenshot/photo nouns", () => {
-        expect(hasImageScope("here is data:image/png;base64,AAAA")).toBe(true);
-        expect(hasImageScope("take a screenshot and analyze it")).toBe(true);
-        expect(hasImageScope("look at the photo")).toBe(true);
-    });
-    it("returns false for codebase tasks and ambiguous/overloaded words", () => {
-        // Narrow on purpose: a false positive SUPPRESSES a legitimate scope
-        // question, so overloaded words must NOT match.
-        expect(hasImageScope("refactor the login flow")).toBe(false);
-        expect(hasImageScope("add a logo to the header")).toBe(false); // "logo" excluded
-        expect(hasImageScope("rebuild the docker image")).toBe(false); // bare "image" excluded
-        expect(hasImageScope("look at the bigger picture")).toBe(false); // "picture" excluded
-    });
-});
-describe("hasOperationalScope() — PIL-L6", () => {
+import { detectNoClarifySignal, hasOperationalScope } from "../clarity-gate.js";
+// Phase 2 (2026-06-16): the regex ASK gate (shouldAutoPass + canInferOutcome +
+// the per-modality scope detectors) was removed — the model now decides every
+// clarification. Only two non-gating helpers survive: detectNoClarifySignal
+// (explicit user consent) and hasOperationalScope (outcome-label polish).
+describe("detectNoClarifySignal()", () => {
+    it("detects explicit no-clarify directives (EN)", () => {
+        expect(detectNoClarifySignal("just answer, don't ask me anything")).toBe(true);
+        expect(detectNoClarifySignal("answer directly without asking")).toBe(true);
+        expect(detectNoClarifySignal("no questions please, just do it")).toBe(true);
+        expect(detectNoClarifySignal("stop asking and give me the result")).toBe(true);
+    });
+    it("detects explicit no-clarify directives (VI + transliteration)", () => {
+        expect(detectNoClarifySignal("Đừng hỏi lại. Trả lời thẳng 3 câu hỏi.")).toBe(true);
+        expect(detectNoClarifySignal("không cần hỏi, trả lời luôn")).toBe(true);
+        expect(detectNoClarifySignal("tra loi thang dung hoi")).toBe(true);
+    });
+    it("does NOT match the explanation idiom 'don't ask me why'", () => {
+        expect(detectNoClarifySignal("it just works, don't ask me why")).toBe(false);
+        expect(detectNoClarifySignal("explain the auth flow")).toBe(false);
+        expect(detectNoClarifySignal("which part of the code should I read?")).toBe(false);
+    });
+});
+describe("hasOperationalScope()", () => {
     it("detects ci/build/test/action keywords", () => {
         expect(hasOperationalScope("fix ci fail")).toBe(true);
         expect(hasOperationalScope("the build is broken")).toBe(true);
@@ -219,10 +34,4 @@ describe("hasOperationalScope() — PIL-L6", () => {
         expect(hasOperationalScope("explain hooks")).toBe(false);
     });
 });
-describe("canInferOutcome() — explicit goal (PIL-L6)", () => {
-    it("returns true when prompt names an explicit goal", () => {
-        expect(canInferOutcome("debug", "goal: pipeline green")).toBe(true);
-        expect(canInferOutcome("debug", "mong muốn: tests passing")).toBe(true);
-    });
-});
 //# sourceMappingURL=clarity-gate.test.js.map

package/dist/src/pil/__tests__/config.test.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
-import { getAutoPassThreshold, getMaxInterviewQuestions, isDiscoveryEnabled, isUnifiedPilEnabled } from "../config.js";
+import { getMaxInterviewQuestions, isDiscoveryEnabled, isUnifiedPilEnabled } from "../config.js";
 describe("isUnifiedPilEnabled", () => {
     const orig = process.env.MUONROI_PIL_UNIFIED;
     beforeEach(() => {
@@ -43,22 +43,6 @@ describe("isDiscoveryEnabled()", () => {
         delete process.env.MUONROI_PIL_DISCOVERY;
     });
 });
-describe("getAutoPassThreshold()", () => {
-    it("returns 0.85 by default", () => {
-        delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
-        expect(getAutoPassThreshold()).toBe(0.85);
-    });
-    it("respects env override in range", () => {
-        process.env.MUONROI_PIL_AUTOPASS_THRESHOLD = "0.7";
-        expect(getAutoPassThreshold()).toBe(0.7);
-        delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
-    });
-    it("clamps out-of-range to default", () => {
-        process.env.MUONROI_PIL_AUTOPASS_THRESHOLD = "1.5";
-        expect(getAutoPassThreshold()).toBe(0.85);
-        delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
-    });
-});
 describe("getMaxInterviewQuestions()", () => {
     it("returns 3 by default", () => {
         delete process.env.MUONROI_PIL_MAX_QUESTIONS;