npm - muonroi-cli - Versions diffs - 1.5.0 → 1.6.0 - Mend

muonroi-cli 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/dist/src/ee/artifact-cache.d.ts +56 -0
package/dist/src/ee/artifact-cache.js +155 -0
package/dist/src/ee/artifact-cache.test.d.ts +1 -0
package/dist/src/ee/artifact-cache.test.js +69 -0
package/dist/src/ee/search.js +7 -5
package/dist/src/ee/search.test.d.ts +1 -0
package/dist/src/ee/search.test.js +23 -0
package/dist/src/generated/version.d.ts +1 -1
package/dist/src/generated/version.js +1 -1
package/dist/src/orchestrator/compaction.d.ts +2 -0
package/dist/src/orchestrator/compaction.js +14 -1
package/dist/src/orchestrator/compaction.test.js +25 -1
package/dist/src/orchestrator/message-processor.js +15 -5
package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
package/dist/src/orchestrator/scope-reminder.js +16 -0
package/dist/src/orchestrator/scope-reminder.test.js +22 -1
package/dist/src/orchestrator/stream-runner.js +3 -0
package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
package/dist/src/orchestrator/subagent-compactor.js +30 -8
package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
package/dist/src/pil/__tests__/layer6-output.test.js +21 -0
package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
package/dist/src/pil/layer3-ee-injection.js +96 -4
package/dist/src/pil/layer6-output.js +18 -7
package/dist/src/pil/pipeline.js +15 -9
package/dist/src/tools/registry-ee-query.test.js +18 -1
package/dist/src/tools/registry.js +13 -2
package/package.json +1 -1

package/dist/src/ee/artifact-cache.d.ts ADDED Viewed

@@ -0,0 +1,56 @@
+/**
+ * src/ee/artifact-cache.ts
+ *
+ * Durable fallback for compaction-elided tool outputs (issue #3 increment 2 /
+ * anti-mù durability).
+ *
+ * When B3/B4 compaction rewrites a low-value tool result into a ~200-char stub,
+ * the full content is shipped to the Experience Engine (source="tool-artifact")
+ * so a later `ee_query("tool-artifact id=X")` can rehydrate it. But that recovery
+ * depends on EE (Qdrant/HTTP) being reachable. This module is the EE-independent
+ * recovery path, in two tiers:
+ *   - in-process LRU (keyed by toolCallId): authoritative full content for THIS
+ *     session, instant, survives an EE outage mid-session;
+ *   - append-only disk spill (~/.muonroi-cli/artifact-cache.jsonl): survives a
+ *     PROCESS RESTART too, so a restart + EE-down double-failure can still
+ *     rehydrate. Disable with MUONROI_ARTIFACT_CACHE_DISK=0.
+ *
+ * ee_query reads in-memory first, then disk, then falls back to EE /api/search
+ * (the cross-session source). Both tiers are bounded; both are best-effort and
+ * fail-open (a disk error never breaks recall).
+ */
+export interface ArtifactEntry {
+    toolName: string;
+    content: string;
+}
+/**
+ * Record an elided tool output by toolCallId. In-memory set is synchronous;
+ * the disk append is fire-and-forget (tracked so tests can flush it). No-ops on
+ * empty id/content.
+ */
+export declare function recordArtifact(toolCallId: string, toolName: string, content: string): void;
+/** The actual disk append (awaitable). Resets the file when it exceeds the size cap. */
+export declare function appendArtifactToDisk(toolCallId: string, toolName: string, content: string): Promise<void>;
+/** Exact in-memory lookup by toolCallId. */
+export declare function getArtifact(toolCallId: string): ArtifactEntry | null;
+/**
+ * Synchronous in-memory lookup from a contract query string. Returns null when
+ * the query has no id= or the id is not in the in-process LRU.
+ */
+export declare function findArtifactByQuery(query: string): (ArtifactEntry & {
+    toolCallId: string;
+}) | null;
+/**
+ * Disk-tier lookup (survives restart). Scans the spill file newest-first so the
+ * most recent record for an id wins. Fail-open: a missing/corrupt file yields
+ * null, never throws.
+ */
+export declare function findArtifactOnDisk(query: string): Promise<(ArtifactEntry & {
+    toolCallId: string;
+}) | null>;
+export declare function __resetArtifactCacheForTests(): void;
+export declare function __setArtifactCacheMaxForTests(n: number): void;
+export declare function __setArtifactCacheDiskPathForTests(p: string | null): void;
+export declare function __artifactCacheSize(): number;
+/** Await all in-flight fire-and-forget disk writes (deterministic tests). */
+export declare function flushArtifactDiskWrites(): Promise<void>;

package/dist/src/ee/artifact-cache.js ADDED Viewed

@@ -0,0 +1,155 @@
+/**
+ * src/ee/artifact-cache.ts
+ *
+ * Durable fallback for compaction-elided tool outputs (issue #3 increment 2 /
+ * anti-mù durability).
+ *
+ * When B3/B4 compaction rewrites a low-value tool result into a ~200-char stub,
+ * the full content is shipped to the Experience Engine (source="tool-artifact")
+ * so a later `ee_query("tool-artifact id=X")` can rehydrate it. But that recovery
+ * depends on EE (Qdrant/HTTP) being reachable. This module is the EE-independent
+ * recovery path, in two tiers:
+ *   - in-process LRU (keyed by toolCallId): authoritative full content for THIS
+ *     session, instant, survives an EE outage mid-session;
+ *   - append-only disk spill (~/.muonroi-cli/artifact-cache.jsonl): survives a
+ *     PROCESS RESTART too, so a restart + EE-down double-failure can still
+ *     rehydrate. Disable with MUONROI_ARTIFACT_CACHE_DISK=0.
+ *
+ * ee_query reads in-memory first, then disk, then falls back to EE /api/search
+ * (the cross-session source). Both tiers are bounded; both are best-effort and
+ * fail-open (a disk error never breaks recall).
+ */
+import { appendFile, mkdir, readFile, stat, writeFile } from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+const DEFAULT_MAX_ENTRIES = 100;
+/** Per-entry cap so one giant output can't dominate the footprint. */
+const MAX_CONTENT_CHARS = 200_000;
+/** Disk-file size cap; on overflow the file is reset (EE retains older artifacts). */
+const DISK_MAX_BYTES = 8 * 1024 * 1024;
+const store = new Map();
+let maxEntries = DEFAULT_MAX_ENTRIES;
+let diskPathOverride = null;
+const pendingWrites = new Set();
+function diskEnabled() {
+    return process.env.MUONROI_ARTIFACT_CACHE_DISK !== "0";
+}
+function diskPath() {
+    return diskPathOverride ?? path.join(os.homedir(), ".muonroi-cli", "artifact-cache.jsonl");
+}
+/** Extract the id from a "tool-artifact id=<id>" / "full tool result id=<id>" query. */
+function extractArtifactId(query) {
+    const m = /\bid\s*=\s*["']?([A-Za-z0-9_\-:.]+)/i.exec(query || "");
+    return m ? m[1] : null;
+}
+/**
+ * Record an elided tool output by toolCallId. In-memory set is synchronous;
+ * the disk append is fire-and-forget (tracked so tests can flush it). No-ops on
+ * empty id/content.
+ */
+export function recordArtifact(toolCallId, toolName, content) {
+    if (!toolCallId || typeof content !== "string" || content.length === 0)
+        return;
+    const capped = content.slice(0, MAX_CONTENT_CHARS);
+    if (store.has(toolCallId))
+        store.delete(toolCallId); // refresh recency
+    store.set(toolCallId, { toolName: toolName || "", content: capped });
+    while (store.size > maxEntries) {
+        const oldest = store.keys().next().value;
+        if (oldest === undefined)
+            break;
+        store.delete(oldest);
+    }
+    if (diskEnabled()) {
+        const w = appendArtifactToDisk(toolCallId, toolName || "", capped).catch((err) => {
+            console.error(`[artifact-cache] disk append failed: ${err?.message}`);
+        });
+        pendingWrites.add(w);
+        void w.finally(() => pendingWrites.delete(w));
+    }
+}
+/** The actual disk append (awaitable). Resets the file when it exceeds the size cap. */
+export async function appendArtifactToDisk(toolCallId, toolName, content) {
+    const p = diskPath();
+    await mkdir(path.dirname(p), { recursive: true });
+    try {
+        const s = await stat(p);
+        if (s.size > DISK_MAX_BYTES)
+            await writeFile(p, "");
+    }
+    catch {
+        /* file does not exist yet — nothing to cap */
+    }
+    await appendFile(p, `${JSON.stringify({ id: toolCallId, toolName, content })}\n`);
+}
+/** Exact in-memory lookup by toolCallId. */
+export function getArtifact(toolCallId) {
+    if (!toolCallId)
+        return null;
+    return store.get(toolCallId) ?? null;
+}
+/**
+ * Synchronous in-memory lookup from a contract query string. Returns null when
+ * the query has no id= or the id is not in the in-process LRU.
+ */
+export function findArtifactByQuery(query) {
+    const id = extractArtifactId(query);
+    if (!id)
+        return null;
+    const hit = store.get(id);
+    return hit ? { toolCallId: id, toolName: hit.toolName, content: hit.content } : null;
+}
+/**
+ * Disk-tier lookup (survives restart). Scans the spill file newest-first so the
+ * most recent record for an id wins. Fail-open: a missing/corrupt file yields
+ * null, never throws.
+ */
+export async function findArtifactOnDisk(query) {
+    if (!diskEnabled())
+        return null;
+    const id = extractArtifactId(query);
+    if (!id)
+        return null;
+    let text;
+    try {
+        text = await readFile(diskPath(), "utf8");
+    }
+    catch {
+        return null; // no spill file yet
+    }
+    const lines = text.split("\n");
+    for (let i = lines.length - 1; i >= 0; i--) {
+        const line = lines[i];
+        if (!line)
+            continue;
+        try {
+            const row = JSON.parse(line);
+            if (row.id === id)
+                return { toolCallId: id, toolName: row.toolName ?? "", content: row.content ?? "" };
+        }
+        catch {
+            /* skip a torn/partial append line */
+        }
+    }
+    return null;
+}
+// ─── Test hooks ──────────────────────────────────────────────────────────────
+export function __resetArtifactCacheForTests() {
+    store.clear();
+    maxEntries = DEFAULT_MAX_ENTRIES;
+    diskPathOverride = null;
+}
+export function __setArtifactCacheMaxForTests(n) {
+    maxEntries = Math.max(1, n);
+}
+export function __setArtifactCacheDiskPathForTests(p) {
+    diskPathOverride = p;
+}
+export function __artifactCacheSize() {
+    return store.size;
+}
+/** Await all in-flight fire-and-forget disk writes (deterministic tests). */
+export async function flushArtifactDiskWrites() {
+    await Promise.allSettled([...pendingWrites]);
+}
+//# sourceMappingURL=artifact-cache.js.map

package/dist/src/ee/artifact-cache.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/src/ee/artifact-cache.test.js ADDED Viewed

@@ -0,0 +1,69 @@
+import { rm } from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { __artifactCacheSize, __resetArtifactCacheForTests, __setArtifactCacheDiskPathForTests, __setArtifactCacheMaxForTests, appendArtifactToDisk, findArtifactByQuery, findArtifactOnDisk, flushArtifactDiskWrites, getArtifact, recordArtifact, } from "./artifact-cache.js";
+// Redirect the disk spill to a temp file for EVERY test so recordArtifact never
+// writes the real ~/.muonroi-cli/artifact-cache.jsonl.
+const diskFile = path.join(os.tmpdir(), `muonroi-artifact-cache-test-${process.pid}.jsonl`);
+beforeEach(() => __setArtifactCacheDiskPathForTests(diskFile));
+afterEach(async () => {
+    __resetArtifactCacheForTests();
+    delete process.env.MUONROI_ARTIFACT_CACHE_DISK;
+    await rm(diskFile, { force: true });
+});
+describe("artifact-cache (in-memory tier — durable rehydrate when EE is down)", () => {
+    it("records and retrieves an elided output by toolCallId", () => {
+        recordArtifact("call_7", "read_file", "FULL CONTENT of src/auth.ts");
+        expect(getArtifact("call_7")).toEqual({ toolName: "read_file", content: "FULL CONTENT of src/auth.ts" });
+        expect(getArtifact("missing")).toBeNull();
+    });
+    it("no-ops on empty id or empty content", () => {
+        recordArtifact("", "read_file", "x");
+        recordArtifact("call_x", "read_file", "");
+        expect(__artifactCacheSize()).toBe(0);
+    });
+    it("findArtifactByQuery extracts the id from the contract query strings", () => {
+        recordArtifact("abc123", "grep", "GREP HITS");
+        expect(findArtifactByQuery("tool-artifact id=abc123")?.content).toBe("GREP HITS");
+        expect(findArtifactByQuery("full tool result id=abc123")?.toolCallId).toBe("abc123");
+        expect(findArtifactByQuery("tool-artifact  ID = abc123")?.content).toBe("GREP HITS"); // spacing/case
+        expect(findArtifactByQuery("tool-artifact id=nope")).toBeNull(); // not cached
+        expect(findArtifactByQuery("no id here")).toBeNull(); // no id=
+    });
+    it("evicts the oldest entries past the LRU cap; re-recording refreshes recency", () => {
+        __setArtifactCacheMaxForTests(2);
+        recordArtifact("a", "t", "A");
+        recordArtifact("b", "t", "B");
+        recordArtifact("a", "t", "A2"); // touch 'a' → now 'b' is oldest
+        recordArtifact("c", "t", "C"); // evicts 'b'
+        expect(getArtifact("a")?.content).toBe("A2");
+        expect(getArtifact("c")?.content).toBe("C");
+        expect(getArtifact("b")).toBeNull();
+        expect(__artifactCacheSize()).toBe(2);
+    });
+});
+describe("artifact-cache (disk spill — survives a process restart)", () => {
+    it("rehydrates from disk after the in-memory tier is gone (simulated restart)", async () => {
+        recordArtifact("call_disk", "read_file", "PERSISTED CONTENT");
+        await flushArtifactDiskWrites();
+        // Simulate a restart: in-memory tier cleared, but the disk file persists.
+        __resetArtifactCacheForTests();
+        __setArtifactCacheDiskPathForTests(diskFile);
+        expect(findArtifactByQuery("tool-artifact id=call_disk")).toBeNull(); // memory gone
+        const onDisk = await findArtifactOnDisk("tool-artifact id=call_disk");
+        expect(onDisk?.content).toBe("PERSISTED CONTENT");
+        expect(onDisk?.toolName).toBe("read_file");
+    });
+    it("newest record for an id wins on disk", async () => {
+        await appendArtifactToDisk("dup", "t", "OLD");
+        await appendArtifactToDisk("dup", "t", "NEW");
+        expect((await findArtifactOnDisk("tool-artifact id=dup"))?.content).toBe("NEW");
+    });
+    it("respects MUONROI_ARTIFACT_CACHE_DISK=0 (no disk read)", async () => {
+        await appendArtifactToDisk("x", "t", "C");
+        process.env.MUONROI_ARTIFACT_CACHE_DISK = "0";
+        expect(await findArtifactOnDisk("tool-artifact id=x")).toBeNull();
+    });
+});
+//# sourceMappingURL=artifact-cache.test.js.map

package/dist/src/ee/search.js CHANGED Viewed

@@ -97,11 +97,13 @@ export async function mirrorRecallLocally(query, meta, logPath) {
  * unavailability/timeout — never throws for transport errors.
  */
 export async function searchEE(query, opts = {}) {
-    const { createEEClient } = await import("./client.js");
-    const { loadEEAuthToken, getCachedServerBaseUrl } = await import("./auth.js");
-    const authToken = (await loadEEAuthToken()) ?? undefined;
-    const baseUrl = getCachedServerBaseUrl() ?? undefined;
-    return createEEClient({ baseUrl, authToken }).search(query, opts);
+    // Route through the shared injectable default client (same one the WRITE leg
+    // persistArtifact → getDefaultEEClient().extract uses), NOT a fresh per-call
+    // client. This unifies the anti-mù seam: setDefaultEEClient now intercepts BOTH
+    // the artifact write and the artifact READ leg, and the default client carries
+    // the boot-loaded token + 401 refresh maintained by intercept.ts.
+    const { getDefaultEEClient } = await import("./intercept.js");
+    return getDefaultEEClient().search(query, opts);
 }
 /**
  * Active recall over the EE brain via /api/recall (recallMode) — the fixed

package/dist/src/ee/search.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/src/ee/search.test.js ADDED Viewed

@@ -0,0 +1,23 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { setDefaultEEClient } from "./intercept.js";
+import { searchEE } from "./search.js";
+// Issue #3 seam: searchEE used to build a FRESH createEEClient, so the artifact
+// READ leg (ee_query "tool-artifact id=X") could not be intercepted by
+// setDefaultEEClient — while the WRITE leg (persistArtifact → getDefaultEEClient
+// .extract) could. Routing searchEE through getDefaultEEClient unifies the seam:
+// one injected client now intercepts both legs (testable end-to-end + the spot a
+// durability fallback can hook).
+describe("searchEE — routes through the injectable default EE client", () => {
+    afterEach(() => {
+        setDefaultEEClient(null); // teardown → next getDefaultEEClient lazy-inits a real one
+    });
+    it("uses getDefaultEEClient().search so the artifact READ leg is interceptable", async () => {
+        const fakeResp = { results: [{ id: "x", text: "REHYDRATED" }] };
+        const search = vi.fn().mockResolvedValue(fakeResp);
+        setDefaultEEClient({ search });
+        const out = await searchEE("tool-artifact id=x", { collections: ["experience-behavioral"], limit: 1 });
+        expect(search).toHaveBeenCalledWith("tool-artifact id=x", { collections: ["experience-behavioral"], limit: 1 });
+        expect(out).toBe(fakeResp);
+    });
+});
+//# sourceMappingURL=search.test.js.map

package/dist/src/generated/version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const PACKAGE_VERSION = "1.5.0";
+export declare const PACKAGE_VERSION = "1.6.0";
 export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";

package/dist/src/generated/version.js CHANGED Viewed

@@ -1,5 +1,5 @@
 // AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
 // Sourced from package.json at build time so it survives bun --compile bundling.
-export const PACKAGE_VERSION = "1.5.0";
+export const PACKAGE_VERSION = "1.6.0";
 export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
 //# sourceMappingURL=version.js.map

package/dist/src/orchestrator/compaction.d.ts CHANGED Viewed

@@ -23,6 +23,8 @@ export declare const DEFAULT_RESERVE_TOKENS = 16384;
 export declare const DEFAULT_KEEP_RECENT_TOKENS = 20000;
 export declare const POST_TURN_MIN_TOKENS = 2000;
 export declare const COMPACTION_MAX_OUTPUT_TOKENS = 4096;
+export declare const COMPACTION_META_MAX_OUTPUT_TOKENS = 1536;
+export declare function metaCompactionMaxTokens(): number;
 export declare const TOOL_RESULT_MAX_CHARS_CONFIGURABLE = 8000;
 export declare const COMPACTION_SUMMARY_HEADER = "[Context checkpoint summary]";
 export declare function extractUserContent(content: unknown): string;

package/dist/src/orchestrator/compaction.js CHANGED Viewed

@@ -10,6 +10,19 @@ export const DEFAULT_RESERVE_TOKENS = 16_384;
 export const DEFAULT_KEEP_RECENT_TOKENS = 20_000;
 export const POST_TURN_MIN_TOKENS = 2_000;
 export const COMPACTION_MAX_OUTPUT_TOKENS = 4_096;
+// Meta-analysis (agent/PIL self-eval) summaries are capped tighter than normal
+// to prevent runaway summaries (session df2dbb878984: 73k input → 14k-char
+// summary). Default 1536 (was a hard 1024) — modestly more fidelity now that
+// anti-mù recovery (layer3 surfacing + the in-process/disk artifact cache)
+// backstops detail loss, still ~2.3x below the 14k-char problem. Tune per machine
+// with MUONROI_META_COMPACT_MAX_TOKENS (clamped 512..COMPACTION_MAX_OUTPUT_TOKENS).
+export const COMPACTION_META_MAX_OUTPUT_TOKENS = 1_536;
+export function metaCompactionMaxTokens() {
+    const raw = Number(process.env.MUONROI_META_COMPACT_MAX_TOKENS);
+    if (Number.isFinite(raw) && raw >= 512 && raw <= COMPACTION_MAX_OUTPUT_TOKENS)
+        return Math.floor(raw);
+    return COMPACTION_META_MAX_OUTPUT_TOKENS;
+}
 export const TOOL_RESULT_MAX_CHARS_CONFIGURABLE = 8000;
 export const COMPACTION_SUMMARY_HEADER = "[Context checkpoint summary]";
 const SUMMARIZATION_SYSTEM_PROMPT = `You are a context summarization assistant.
@@ -450,7 +463,7 @@ async function summarizeConversation(provider, modelId, messages, reserveTokens,
     const userText = messages.map((m) => extractUserContent(m.content)).join("\n");
     const isMeta = isMetaAnalysisPrompt(userText);
     const effectiveMax = isMeta
-        ? Math.min(1024, Math.max(512, Math.floor(reserveTokens * 0.5)))
+        ? Math.min(metaCompactionMaxTokens(), Math.max(512, Math.floor(reserveTokens * 0.5)))
         : Math.min(COMPACTION_MAX_OUTPUT_TOKENS, Math.max(512, Math.floor(reserveTokens * 0.8)));
     if (previousSummary) {
         promptParts.push(`Existing summary:\n${previousSummary}`);

package/dist/src/orchestrator/compaction.test.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { afterAll, beforeAll, describe, expect, it } from "vitest";
 import { buildEffectiveTranscript } from "../storage/transcript-view.js";
-import { COMPACTION_SUMMARY_HEADER, createCompactionSummaryMessage, findCutPoint, prepareCompaction, serializeConversation, shouldCompactContext, } from "./compaction.js";
+import { COMPACTION_META_MAX_OUTPUT_TOKENS, COMPACTION_SUMMARY_HEADER, createCompactionSummaryMessage, findCutPoint, metaCompactionMaxTokens, prepareCompaction, serializeConversation, shouldCompactContext, } from "./compaction.js";
 import { buildCheckpointReminder } from "./scope-reminder.js";
 import { __forceFallbackForTests } from "./token-counter.js";
 // Pin token counts to the chars/4 fallback so cut-point assertions remain stable.
@@ -160,4 +160,28 @@ describe("compaction helpers", () => {
         expect(r).toContain("tool-artifact");
     });
 });
+describe("metaCompactionMaxTokens — meta summary cap (tunable, session 2b7a10219499)", () => {
+    it("defaults to 1536 — looser than the old hard 1024, still well below the 14k-char problem", () => {
+        delete process.env.MUONROI_META_COMPACT_MAX_TOKENS;
+        expect(metaCompactionMaxTokens()).toBe(COMPACTION_META_MAX_OUTPUT_TOKENS);
+        expect(COMPACTION_META_MAX_OUTPUT_TOKENS).toBe(1536);
+        expect(COMPACTION_META_MAX_OUTPUT_TOKENS).toBeGreaterThan(1024);
+    });
+    it("honors a valid MUONROI_META_COMPACT_MAX_TOKENS override", () => {
+        process.env.MUONROI_META_COMPACT_MAX_TOKENS = "2048";
+        try {
+            expect(metaCompactionMaxTokens()).toBe(2048);
+        }
+        finally {
+            delete process.env.MUONROI_META_COMPACT_MAX_TOKENS;
+        }
+    });
+    it("clamps out-of-range / garbage overrides to the default", () => {
+        for (const bad of ["999999", "100", "-5", "abc", ""]) {
+            process.env.MUONROI_META_COMPACT_MAX_TOKENS = bad;
+            expect(metaCompactionMaxTokens(), bad).toBe(COMPACTION_META_MAX_OUTPUT_TOKENS);
+        }
+        delete process.env.MUONROI_META_COMPACT_MAX_TOKENS;
+    });
+});
 //# sourceMappingURL=compaction.test.js.map

package/dist/src/orchestrator/message-processor.js CHANGED Viewed

@@ -50,6 +50,7 @@
 //   - O1 (providerOptions shape forensics)  — extractProviderOptionsShape
 //   - siliconflow reasoning-strip           — turnCaps.sanitizeHistory
 import { stepCountIs, streamText } from "ai";
+import { recordArtifact } from "../ee/artifact-cache.js";
 import { getCachedAuthToken, getCachedServerBaseUrl } from "../ee/auth.js";
 import { routeFeedback, routeModel } from "../ee/bridge.js";
 import { getDefaultEEClient } from "../ee/intercept.js";
@@ -101,11 +102,11 @@ import { repairToolCallHook } from "./repair-tool-call.js";
 import { buildRepetitionReminder, recordAssistantBurst, shouldInjectRepetitionReminder, } from "./repetition-detector.js";
 import { classifyStreamError } from "./retry-classifier.js";
 import { forcedFinalize, getSessionLastTask, incSessionStep, parseBudgetOverride, recordSessionLastTask, resetSessionStep, resolveCeiling, } from "./scope-ceiling.js";
-import { attachReminderToMessages, buildCheckpointReminder, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
+import { attachReminderToMessages, buildCheckpointReminder, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, shouldPreWarnCompaction, } from "./scope-reminder.js";
 import { attemptStallRescue, pushStallToolResult } from "./stall-rescue.js";
 import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
 import { wrapToolSetWithCap } from "./sub-agent-cap.js";
-import { compactSubAgentMessages } from "./subagent-compactor.js";
+import { compactSubAgentMessages, cumulativeMessageChars } from "./subagent-compactor.js";
 import { detectTextEmittedToolCall, parseDsmlToolCalls } from "./text-tool-call-detector.js";
 import { createToolLoopCapPredicate } from "./tool-loop-cap.js";
 import { buildToolRepetitionAbortMessage, recordToolError as recordToolRepetitionError, recordToolSuccess as recordToolRepetitionSuccess, } from "./tool-repetition-detector.js";
@@ -1500,6 +1501,10 @@ export class MessageProcessor {
                             const _cwd = process.cwd();
                             const _sess = undefined; // best-effort; EE artifact still indexable by content + meta.toolCallId
                             const persistArtifact = (toolCallId, toolName, fullContent, reason) => {
+                                // Local-first: record the FULL output in-process so ee_query can
+                                // rehydrate it even if EE is down (the EE extract below caps at 8k
+                                // and needs the network; the cache keeps up to 200k, no network).
+                                recordArtifact(toolCallId, toolName, fullContent);
                                 try {
                                     getDefaultEEClient()
                                         .extract({
@@ -1532,9 +1537,14 @@ export class MessageProcessor {
                             // Pre-compaction visibility: give the agent one step of notice
                             // before B4 actually rewrites history into stubs. This is the
                             // advance warning that was missing — agent can now decide to
-                            // summarize, finish, or request preservation.
-                            const _preCompactWarnAt = Math.floor(topLevelCompactThreshold * 0.78);
-                            if (stripped.length > _preCompactWarnAt && compacted === stripped) {
+                            // summarize, finish, or request preservation. Fires when we did
+                            // NOT compact this step (compacted === stripped, restored by the
+                            // compactSubAgentMessages no-op ref contract) AND the prompt is
+                            // approaching the threshold. Must compare CHARS (messages +
+                            // envelope), not stripped.length (a message count that never
+                            // exceeds a char-scaled threshold) — session 2b7a10219499.
+                            const _preWarnChars = cumulativeMessageChars(stripped) + envelopeChars;
+                            if (compacted === stripped && shouldPreWarnCompaction(_preWarnChars, topLevelCompactThreshold)) {
                                 const _cp = buildCheckpointReminder(sn, true);
                                 const _pre = `[pre-compaction warning at step ${sn} — next step(s) will likely rewrite older tool results to stubs (threshold ${topLevelCompactThreshold}, keepLast=${topLevelCompactKeepLast}). ${_cp} Summarize or finish if possible.]`;
                                 return { messages: attachReminderToMessages(stripped, _pre) };

package/dist/src/orchestrator/scope-reminder.d.ts CHANGED Viewed

@@ -100,3 +100,15 @@ export declare function attachReminderToMessages<T>(messages: ReadonlyArray<T>,
  * Used by prepareStep / sub-agent paths after compaction.
  */
 export declare function buildCheckpointReminder(iteration: number, hasEECheckpoint: boolean): string;
+/**
+ * Pre-compaction "advance warning" gate. Fires when the prompt is approaching
+ * (default ≥78% of) the compaction threshold but compaction has NOT yet run this
+ * step — giving the agent one step to PRESERVE / finish before B3/B4 rewrites
+ * older tool results into stubs.
+ *
+ * `promptChars` MUST be the same quantity the compactor thresholds on (cumulative
+ * message chars + envelope chars), NOT the message COUNT. The original B4 wiring
+ * compared `stripped.length` (a message count, ~tens) against a char-scaled
+ * threshold (~156000), so the warning could never fire — session 2b7a10219499.
+ */
+export declare function shouldPreWarnCompaction(promptChars: number, thresholdChars: number, ratio?: number): boolean;

package/dist/src/orchestrator/scope-reminder.js CHANGED Viewed

@@ -218,4 +218,20 @@ export function buildCheckpointReminder(iteration, hasEECheckpoint) {
         return base;
     return base.slice(0, 220);
 }
+/**
+ * Pre-compaction "advance warning" gate. Fires when the prompt is approaching
+ * (default ≥78% of) the compaction threshold but compaction has NOT yet run this
+ * step — giving the agent one step to PRESERVE / finish before B3/B4 rewrites
+ * older tool results into stubs.
+ *
+ * `promptChars` MUST be the same quantity the compactor thresholds on (cumulative
+ * message chars + envelope chars), NOT the message COUNT. The original B4 wiring
+ * compared `stripped.length` (a message count, ~tens) against a char-scaled
+ * threshold (~156000), so the warning could never fire — session 2b7a10219499.
+ */
+export function shouldPreWarnCompaction(promptChars, thresholdChars, ratio = 0.78) {
+    if (thresholdChars <= 0 || promptChars <= 0)
+        return false;
+    return promptChars >= Math.floor(thresholdChars * ratio);
+}
 //# sourceMappingURL=scope-reminder.js.map

package/dist/src/orchestrator/scope-reminder.test.js CHANGED Viewed

@@ -13,7 +13,7 @@
  *   - Reminder lives in tool_result/system message — never in system prompt
  */
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
-import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
+import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, shouldPreWarnCompaction, } from "./scope-reminder.js";
 describe("cadenceForSize", () => {
     it("locks 3/5/8 for small/medium/large with hard floor >= 3", () => {
         expect(cadenceForSize("small")).toBe(3);
@@ -201,4 +201,25 @@ describe("attachReminderToMessages", () => {
         expect(out).toEqual(messages);
     });
 });
+describe("shouldPreWarnCompaction (regression: session 2b7a10219499 dead pre-warning)", () => {
+    const THRESHOLD = 200_000; // MUONROI_TOP_LEVEL_COMPACT_THRESHOLD_CHARS default
+    it("fires when prompt chars reach >=78% of the threshold (approaching compaction)", () => {
+        expect(shouldPreWarnCompaction(Math.floor(THRESHOLD * 0.78), THRESHOLD)).toBe(true);
+        expect(shouldPreWarnCompaction(190_000, THRESHOLD)).toBe(true);
+    });
+    it("does NOT fire while comfortably below the threshold", () => {
+        expect(shouldPreWarnCompaction(100_000, THRESHOLD)).toBe(false);
+        expect(shouldPreWarnCompaction(0, THRESHOLD)).toBe(false);
+    });
+    it("guards against the original bug: a message COUNT can never trip a char threshold", () => {
+        // The dead wiring compared stripped.length (a message count, ~tens) to the
+        // char-scaled threshold. With chars it crosses; with a count it never does.
+        const messageCount = 60; // plausible long-session message count
+        expect(shouldPreWarnCompaction(messageCount, THRESHOLD)).toBe(false);
+        expect(shouldPreWarnCompaction(170_000, THRESHOLD)).toBe(true);
+    });
+    it("is inert for a zero/negative threshold (no compaction configured)", () => {
+        expect(shouldPreWarnCompaction(999_999, 0)).toBe(false);
+    });
+});
 //# sourceMappingURL=scope-reminder.test.js.map

package/dist/src/orchestrator/stream-runner.js CHANGED Viewed

@@ -27,6 +27,7 @@
 //   - F1 (sub-agent cumulative cap)         — wrapToolSetWithCap
 //   - siliconflow reasoning-strip           — taskCaps.sanitizeHistory
 import { stepCountIs, streamText } from "ai";
+import { recordArtifact } from "../ee/artifact-cache.js";
 import { getDefaultEEClient } from "../ee/intercept.js";
 import { acquireMcpTools } from "../mcp/client-pool.js";
 import { normalizeModelId } from "../models/registry.js";
@@ -412,6 +413,8 @@ export class StreamRunner {
                 }
                 // Idea 4 persist for sub-agent elisions (best-effort; may lack full session but EE can still index the artifact content).
                 const persistSubArtifact = (toolCallId, toolName, fullContent, reason) => {
+                    // Local-first durable cache so ee_query rehydrates even when EE is down.
+                    recordArtifact(toolCallId, toolName, fullContent);
                     try {
                         getDefaultEEClient()
                             .extract({

package/dist/src/orchestrator/subagent-compactor.d.ts CHANGED Viewed

@@ -106,8 +106,11 @@ export interface SubAgentCompactorOptions {
 export declare const CHARS_PER_TOKEN = 4;
 export declare const SUBAGENT_COMPACT_DEFAULT_THRESHOLD = 80000;
 export declare const SUBAGENT_COMPACT_DEFAULT_KEEP_LAST = 3;
-/** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast. */
-export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp", "bash"];
+/** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast.
+ * Extended for meta self-eval: ee_query / usage_forensics / selfverify_* are the exact artifacts
+ * the native contract + native-capabilities tell the agent to rely on for "task finished?" and
+ * rehydrate during long meta conversations about CLI/PIL/compaction/EE. */
+export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp", "bash", "ee_query", "usage_forensics", "selfverify_start", "selfverify_result", "selfverify_status"];
 /**
  * Heuristic: keep full (no stub) for high-signal tool results.
  * Signals: allowlist tool + (error/todo/plan/keyfile/large output or explicit keep list).
@@ -116,8 +119,14 @@ export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp",
 export declare function isHighValueToolResult(toolName: string, preview: string, explicitKeepIds?: Set<string>, toolCallId?: string): boolean;
 export declare function cumulativeMessageChars(messages: ReadonlyArray<ModelMessage>): number;
 /**
- * Compact a sub-agent message array in place-like fashion. Returns a NEW
- * array; the input is not mutated. Below the threshold the original array
- * reference is returned for cheap identity comparison in tests.
+ * Compact a sub-agent message array in place-like fashion. The input is never
+ * mutated. When compaction actually elides something a NEW array is returned.
+ * On a no-op (below threshold, or too few tool turns to skip) the ORIGINAL input
+ * array is returned BY REFERENCE so callers can detect "did not compact this
+ * step" via identity (`compacted === input`). The B4 wiring in
+ * message-processor.ts (pre-compaction warning + compaction note gating) and the
+ * sub-agent wiring in stream-runner.ts both rely on this contract — returning a
+ * fresh slice on a no-op silently made the warning dead and the note fire every
+ * step.
  */
 export declare function compactSubAgentMessages(messages: ReadonlyArray<ModelMessage>, opts?: SubAgentCompactorOptions): ModelMessage[];

package/dist/src/orchestrator/subagent-compactor.js CHANGED Viewed

@@ -58,8 +58,21 @@ export const SUBAGENT_COMPACT_DEFAULT_THRESHOLD = 80_000;
 export const SUBAGENT_COMPACT_DEFAULT_KEEP_LAST = 3;
 const DEFAULT_OUTPUT_PREVIEW_CHARS = 200;
 const DEFAULT_LABEL = "sub-agent";
-/** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast. */
-export const IMPORTANT_TOOL_NAMES = ["read_file", "grep", "lsp", "bash"];
+/** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast.
+ * Extended for meta self-eval: ee_query / usage_forensics / selfverify_* are the exact artifacts
+ * the native contract + native-capabilities tell the agent to rely on for "task finished?" and
+ * rehydrate during long meta conversations about CLI/PIL/compaction/EE. */
+export const IMPORTANT_TOOL_NAMES = [
+    "read_file",
+    "grep",
+    "lsp",
+    "bash",
+    "ee_query",
+    "usage_forensics",
+    "selfverify_start",
+    "selfverify_result",
+    "selfverify_status",
+];
 /**
  * Heuristic: keep full (no stub) for high-signal tool results.
  * Signals: allowlist tool + (error/todo/plan/keyfile/large output or explicit keep list).
@@ -268,7 +281,9 @@ function rewriteOlderToolMessage(msg, previewChars, label, keepToolIds, persistA
             try {
                 persistArtifact(toolCallId, tr.toolName, rawPreview, "elided-by-compactor");
             }
-            catch { /* fail-open */ }
+            catch {
+                /* fail-open */
+            }
         }
         return {
             type: "tool-result",
@@ -282,9 +297,15 @@ function rewriteOlderToolMessage(msg, previewChars, label, keepToolIds, persistA
     return { ...msg, content: rewritten };
 }
 /**
- * Compact a sub-agent message array in place-like fashion. Returns a NEW
- * array; the input is not mutated. Below the threshold the original array
- * reference is returned for cheap identity comparison in tests.
+ * Compact a sub-agent message array in place-like fashion. The input is never
+ * mutated. When compaction actually elides something a NEW array is returned.
+ * On a no-op (below threshold, or too few tool turns to skip) the ORIGINAL input
+ * array is returned BY REFERENCE so callers can detect "did not compact this
+ * step" via identity (`compacted === input`). The B4 wiring in
+ * message-processor.ts (pre-compaction warning + compaction note gating) and the
+ * sub-agent wiring in stream-runner.ts both rely on this contract — returning a
+ * fresh slice on a no-op silently made the warning dead and the note fire every
+ * step.
  */
 export function compactSubAgentMessages(messages, opts = {}) {
     const resolved = resolveOpts(opts);
@@ -299,11 +320,12 @@ export function compactSubAgentMessages(messages, opts = {}) {
     // window utilization. Falls back to static char threshold + keepLast
     // when no contextWindowTokens supplied (preserves old behaviour).
     const { effectiveThresholdChars, effectiveKeepLastTurns } = computeDynamicParams(total, resolved);
+    // No-op: return the input BY REFERENCE (contract above) so `compacted === input`.
     if (total < effectiveThresholdChars)
-        return messages.slice();
+        return messages;
     const keepFrom = findKeepFromIndex(messages, effectiveKeepLastTurns);
     if (keepFrom <= 0)
-        return messages.slice();
+        return messages;
     // Walk older messages; rewrite fresh tool results into stubs, super-shrink
     // already-stubbed results (F1), and strip args off older assistant
     // tool-call shells (F1). The 1:1 assistant↔tool pairing required by the AI

package/dist/src/orchestrator/subagent-compactor.spec.js CHANGED Viewed

@@ -64,6 +64,24 @@ describe("subagent-compactor: compactSubAgentMessages", () => {
         // No tool-result rewrite happened — output object identity per part preserved.
         expect(out[3]).toBe(msgs[3]);
     });
+    it("returns the SAME array reference on a no-op below threshold (compacted===input contract)", () => {
+        // Callers (message-processor B4 prepareStep:1840/1908/1914) detect "did NOT
+        // compact this step" via `compacted === stripped`. The docstring promises the
+        // original ref on a no-op; returning a fresh slice silently broke that —
+        // making the pre-compaction warning dead and the compaction note fire every
+        // step. Lock the identity contract.
+        const msgs = buildHistory(2, 5); // below threshold
+        expect(compactSubAgentMessages(msgs)).toBe(msgs);
+    });
+    it("returns a NEW array when compaction actually elides (compacted!==input)", () => {
+        const msgs = buildHistory(10, 10); // ~100kb > threshold
+        for (const m of msgs) {
+            if (m.role === "tool" && Array.isArray(m.content)) {
+                m.content[0].toolName = "other_tool"; // force low-value so it elides
+            }
+        }
+        expect(compactSubAgentMessages(msgs)).not.toBe(msgs);
+    });
     it("compacts when cumulative chars exceed threshold", () => {
         const msgs = buildHistory(10, 10); // ~100kb of tool output
         // Neutralize to test pure size-based elision (high-value keep would reduce savings).

package/dist/src/pil/__tests__/layer6-output.test.js CHANGED Viewed

@@ -222,6 +222,15 @@ describe("getResponseToolSet — Phase 2b deliverableKind consume (model overrid
         // …and an explicit report request keeps it.
         expect(Object.keys(getResponseToolSet({ ...makeCtx("analyze", null), raw: "list all cost leaks" }))).toContain("respond_analyze");
     });
+    it("DROPS respond_* on an implement turn even when mis-classified as report (session 2b7a10219499)", () => {
+        // "lên plan rồi improvement … cải thiện X" is an implement turn the model
+        // tagged deliverable=report; the report-exception used to KEEP respond_plan,
+        // so the model stated a plan and ended the turn with edits done but
+        // uncommitted/unreported. Implementation intent must suppress the terminal
+        // tool BEFORE the deliverable branch is consulted.
+        expect(getResponseToolSet(ctxD("lên plan rồi improvement nhé, focus cải thiện Compaction", "plan", "report"))).toEqual({});
+        expect(getResponseToolSet(ctxD("improve the compactor and implement the fix", "plan", "report"))).toEqual({});
+    });
 });
 describe("applyPilSuffix — outputStyle variants", () => {
     const styles = ["concise", "detailed", "balanced"];
@@ -413,4 +422,16 @@ describe("isQuestionLike — Vietnamese yes/no question frames (regression: sess
         expect(isQuestionLike("explain the pipeline")).toBe(true);
     });
 });
+describe("isImplementationIntent — improve / cải thiện (regression: session 2b7a10219499)", () => {
+    it("recognises improve/improvement + VI cải thiện as implement turns", () => {
+        expect(isImplementationIntent("improve the compactor")).toBe(true);
+        expect(isImplementationIntent("lên plan rồi improvement nhé")).toBe(true);
+        expect(isImplementationIntent("focus cải thiện Compaction")).toBe(true);
+        expect(isImplementationIntent("cai thien phan compaction")).toBe(true);
+    });
+    it("does not over-match analysis questions that merely describe behaviour", () => {
+        expect(isImplementationIntent("what does the enrichment layer do?")).toBe(false);
+        expect(isImplementationIntent("why does the suite fail — break it down")).toBe(false);
+    });
+});
 //# sourceMappingURL=layer6-output.test.js.map

package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js ADDED Viewed

@@ -0,0 +1,112 @@
+import { beforeEach, describe, expect, test, vi } from "vitest";
+import { surfaceCompactionArtifacts } from "../layer3-ee-injection.js";
+// Issue #4 — targeted complement to layer3's checkpoint arm on meta turns.
+// layer3 (now run on meta after issue #2) surfaces checkpoints via a FIXED
+// recency query; this arm searches by the meta question (ctx.raw) to surface the
+// elided tool-artifacts relevant to it, and DEFERS when layer3 already injected a
+// checkpoint block. Mock the EE search + the audit log so the test stays offline.
+vi.mock("../../ee/bridge.js", () => ({
+    searchByText: vi.fn().mockResolvedValue([]),
+}));
+vi.mock("../../storage/interaction-log.js", () => ({
+    logInteraction: vi.fn(),
+}));
+import { searchByText } from "../../ee/bridge.js";
+function makeCtx(overrides = {}) {
+    return {
+        raw: "compaction cần cải thiện gì trong CLI",
+        enriched: "compaction cần cải thiện gì trong CLI",
+        taskType: "general",
+        domain: null,
+        confidence: 0.85,
+        outputStyle: "balanced",
+        tokenBudget: 2000,
+        metrics: null,
+        layers: [],
+        sessionId: "sess-meta-1",
+        ...overrides,
+    };
+}
+const artifactPoint = {
+    id: "art1",
+    score: 0.9,
+    payload: {
+        text: "tool-artifact id=call_7 toolName=read_file elided 4200 chars: src/orchestrator/compaction.ts createCompactionSummaryMessage ...",
+    },
+    collection: "experience-behavioral",
+};
+const checkpointPoint = {
+    id: "cp1",
+    score: 0.8,
+    payload: { text: "Context checkpoint summary ✔ DONE: extended IMPORTANT_TOOL_NAMES; tests 16/16" },
+    collection: "experience-behavioral",
+};
+const genericPoint = {
+    id: "gen1",
+    score: 0.97,
+    payload: { text: "Always run the full test suite before pushing" },
+    collection: "experience-behavioral",
+};
+describe("surfaceCompactionArtifacts (issue #4 — meta-turn auto-surface)", () => {
+    beforeEach(() => {
+        vi.mocked(searchByText).mockReset();
+        vi.mocked(searchByText).mockResolvedValue([]);
+    });
+    test("auto-surfaces [artifact] + checkpoint refs (and the rehydrate instruction) into enriched", async () => {
+        // biome-ignore lint/suspicious/noExplicitAny: test fixture shape mirrors EEPoint
+        vi.mocked(searchByText).mockResolvedValue([artifactPoint, checkpointPoint]);
+        const ctx = makeCtx();
+        const out = await surfaceCompactionArtifacts(ctx);
+        expect(out.enriched).toContain("[artifact]"); // artifact-typed line
+        expect(out.enriched).toContain("ee.query tool"); // how to rehydrate the full output
+        expect(out.enriched).toContain("call_7"); // the concrete tool-artifact id the agent can fetch
+        const layer = out.layers.find((l) => l.name === "ee-meta-artifacts");
+        expect(layer?.applied).toBe(true);
+        expect(layer?.delta).toContain("artifacts=2");
+        // Searches only the behavioral collection (where tool-artifacts are persisted).
+        expect(vi.mocked(searchByText)).toHaveBeenCalledWith(expect.stringContaining("tool-artifact"), ["experience-behavioral"], expect.any(Number), expect.any(Object));
+    });
+    test("no sessionId → unchanged, no EE call (no prior compaction to rehydrate)", async () => {
+        const ctx = makeCtx({ sessionId: undefined });
+        const out = await surfaceCompactionArtifacts(ctx);
+        expect(out.enriched).toBe(ctx.enriched);
+        expect(out.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toBe("no-session");
+        expect(vi.mocked(searchByText)).not.toHaveBeenCalled();
+    });
+    test("search failure is fail-open + recorded (delta=error=…, enriched unchanged)", async () => {
+        vi.mocked(searchByText).mockRejectedValue(new Error("EE down"));
+        const ctx = makeCtx();
+        const out = await surfaceCompactionArtifacts(ctx);
+        expect(out.enriched).toBe(ctx.enriched);
+        expect(out.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toMatch(/^error=/);
+    });
+    test("generic behavioral hits are filtered out (not mislabelled as artifacts)", async () => {
+        // biome-ignore lint/suspicious/noExplicitAny: test fixture shape mirrors EEPoint
+        vi.mocked(searchByText).mockResolvedValue([genericPoint]);
+        const ctx = makeCtx();
+        const out = await surfaceCompactionArtifacts(ctx);
+        expect(out.enriched).toBe(ctx.enriched);
+        expect(out.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toBe("no-artifacts");
+    });
+    test("defers to layer3 — skips with NO EE call when a checkpoint block is already present", async () => {
+        // layer3 ran first this turn and injected a checkpoint block (its marker is
+        // in enriched). The complement must not duplicate it or pay a 2nd round-trip.
+        const enriched = `${makeCtx().raw}\n[task checkpoints …]\n<!-- ee-checkpoint-injected:0123456789abcdef -->`;
+        const out = await surfaceCompactionArtifacts(makeCtx({ enriched }));
+        expect(out.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toBe("already-surfaced");
+        expect(out.enriched).toBe(enriched); // unchanged
+        expect(vi.mocked(searchByText)).not.toHaveBeenCalled();
+    });
+    test("idempotent — a second pass on its own output defers (marker it wrote is seen)", async () => {
+        // biome-ignore lint/suspicious/noExplicitAny: test fixture shape mirrors EEPoint
+        vi.mocked(searchByText).mockResolvedValue([artifactPoint]);
+        const first = await surfaceCompactionArtifacts(makeCtx());
+        expect(first.enriched).toContain("[artifact]");
+        expect(vi.mocked(searchByText)).toHaveBeenCalledTimes(1);
+        const second = await surfaceCompactionArtifacts(makeCtx({ enriched: first.enriched }));
+        expect(second.layers.find((l) => l.name === "ee-meta-artifacts")?.delta).toBe("already-surfaced");
+        expect(second.enriched).toBe(first.enriched); // not grown a second time
+        expect(vi.mocked(searchByText)).toHaveBeenCalledTimes(1); // no second round-trip
+    });
+});
+//# sourceMappingURL=surface-compaction-artifacts.test.js.map

package/dist/src/pil/layer3-ee-injection.d.ts CHANGED Viewed

@@ -16,3 +16,22 @@
  */
 import type { PipelineContext } from "./types.js";
 export declare function layer3EeInjection(ctx: PipelineContext): Promise<PipelineContext>;
+/**
+ * Issue #4 — meta-turn TARGETED complement to Layer 3's checkpoint arm.
+ *
+ * Since issue #2, Layer 3 now runs on the meta-analysis path too, so its
+ * checkpoint arm already surfaces recent checkpoints/artifacts for the agent.
+ * That arm uses a FIXED recency query, though — it isn't biased toward the
+ * current meta question. This arm fills that gap: it searches by `ctx.raw` so a
+ * self-evaluating agent sees the elided tool-artifacts RELEVANT to what it's
+ * analyzing, rendered via the same `formatTaskCheckpoints` so the `[artifact]
+ * … id=X` refs appear automatically instead of waiting on a manual `ee_query`.
+ *
+ * Defers to Layer 3: if a checkpoint block was already injected this turn (any
+ * `ee-checkpoint-injected` marker present) it skips entirely — no duplicate
+ * block and no second EE round-trip. Gated on `sessionId` (no session ⇒ no prior
+ * compaction to rehydrate). Strictly additive and fail-open: any error /
+ * no-session / no-match / already-surfaced returns ctx with the original
+ * `enriched` plus an `ee-meta-artifacts` layer marker for forensics.
+ */
+export declare function surfaceCompactionArtifacts(ctx: PipelineContext): Promise<PipelineContext>;

package/dist/src/pil/layer3-ee-injection.js CHANGED Viewed

@@ -119,7 +119,7 @@ async function queryEeBridge(raw) {
         const [principleRaw, behavioralRaw, checkpointRaw] = await Promise.all([
             searchByText(raw, ["experience-principles"], 3, signal),
             searchByText(raw, ["experience-behavioral"], 4, signal),
-            searchByText("Context checkpoint summary OR \"compaction checkpoint\" recent Progress DONE elided OR tool-artifact OR \"tool result id=\"", ["experience-behavioral"], 3, signal).catch(() => []),
+            searchByText('Context checkpoint summary OR "compaction checkpoint" recent Progress DONE elided OR tool-artifact OR "tool result id="', ["experience-behavioral"], 3, signal).catch(() => []),
         ]);
         const principlePoints = principleRaw.filter((p) => (p.score ?? 0) >= PIL_PRINCIPLES_FLOOR);
         const behavioralPoints = behavioralRaw.filter((p) => (p.score ?? 0) >= PIL_SCORE_FLOOR);
@@ -161,14 +161,16 @@ function formatExperienceHints(points) {
 function formatTaskCheckpoints(points) {
     if (points.length === 0)
         return "";
-    const lines = points.map((p) => {
+    const lines = points
+        .map((p) => {
         const t = extractPointText(p);
         // Idea 4: surface tool-artifact refs so agent sees "elided high-value, query for full"
         if (/tool-artifact|tool result id=|elided.*id=/.test(t.toLowerCase())) {
             return `- [artifact] ${t.slice(0, 160)} [id:${p.id}]`;
         }
         return `- ${t.slice(0, 180)} [id:${p.id}]`;
-    }).filter((l) => l !== "- ");
+    })
+        .filter((l) => l !== "- ");
     if (lines.length === 0)
         return "";
     return `[task checkpoints — prior compactions: use to answer "task finished?", "compacted yet?". Artifacts: use ee.query tool with "tool-artifact id=XXX" for full elided tool output.] \n${lines.join("\n")}`;
@@ -282,7 +284,7 @@ export async function layer3EeInjection(ctx) {
             const text = extractPointText(p);
             return text.length === 0 || !checkpointMarkerShas.has(payloadSha16(text));
         })
-        : (result.checkpointPoints || []);
+        : result.checkpointPoints || [];
     const allPoints = [...deduplicatedPrinciples, ...deduplicatedBehavioral, ...deduplicatedCheckpoints];
     // STALE-01: Register injected point IDs for prompt-stale reconciliation.
     updateLastSurfacedState(allPoints.map((p) => String(p.id)));
@@ -359,4 +361,94 @@ export async function layer3EeInjection(ctx) {
         ],
     };
 }
+/**
+ * Records whose text actually reads like a compaction checkpoint or an elided
+ * tool-artifact. Used to keep generic behavioral hits from being mislabelled as
+ * `[artifact]`/checkpoint lines when we search by the meta question (ctx.raw)
+ * rather than the fixed checkpoint-arm query.
+ */
+const CHECKPOINT_LIKE_RE = /context checkpoint summary|compaction checkpoint|tool-artifact|tool result id=|elided|progress[^a-z]*done|✔/i;
+/**
+ * Issue #4 — meta-turn TARGETED complement to Layer 3's checkpoint arm.
+ *
+ * Since issue #2, Layer 3 now runs on the meta-analysis path too, so its
+ * checkpoint arm already surfaces recent checkpoints/artifacts for the agent.
+ * That arm uses a FIXED recency query, though — it isn't biased toward the
+ * current meta question. This arm fills that gap: it searches by `ctx.raw` so a
+ * self-evaluating agent sees the elided tool-artifacts RELEVANT to what it's
+ * analyzing, rendered via the same `formatTaskCheckpoints` so the `[artifact]
+ * … id=X` refs appear automatically instead of waiting on a manual `ee_query`.
+ *
+ * Defers to Layer 3: if a checkpoint block was already injected this turn (any
+ * `ee-checkpoint-injected` marker present) it skips entirely — no duplicate
+ * block and no second EE round-trip. Gated on `sessionId` (no session ⇒ no prior
+ * compaction to rehydrate). Strictly additive and fail-open: any error /
+ * no-session / no-match / already-surfaced returns ctx with the original
+ * `enriched` plus an `ee-meta-artifacts` layer marker for forensics.
+ */
+export async function surfaceCompactionArtifacts(ctx) {
+    const markLayer = (applied, delta) => ({
+        ...ctx,
+        layers: [...ctx.layers, { name: "ee-meta-artifacts", applied, delta }],
+    });
+    if (!ctx.sessionId)
+        return markLayer(false, "no-session");
+    // Defer to Layer 3: a checkpoint/artifact block is already present this turn,
+    // so don't duplicate it or pay a second EE round-trip. This arm only fills the
+    // gap when Layer 3's fixed-query checkpoint arm surfaced nothing.
+    if (extractCheckpointMarkerShas(ctx.enriched).size > 0)
+        return markLayer(false, "already-surfaced");
+    let points = [];
+    try {
+        const signal = AbortSignal.timeout(PIL_SEARCH_TIMEOUT_MS);
+        // Bias toward records relevant to THIS meta question (ctx.raw) while pulling
+        // in checkpoint/artifact vocabulary so the single cheap arm lands on the
+        // compaction records rather than generic behavioral patterns.
+        const query = `${ctx.raw}\nContext checkpoint summary tool-artifact "tool result id=" elided Progress DONE`;
+        const raw = await searchByText(query, ["experience-behavioral"], 5, signal);
+        points = raw
+            .filter((p) => (p.score ?? 0) >= PIL_SCORE_FLOOR * 0.7)
+            .filter((p) => CHECKPOINT_LIKE_RE.test(extractPointText(p)));
+    }
+    catch (err) {
+        logEeFailure("pil.meta.surfaceCompactionArtifacts", classifyEeError(err), err, { budgetMs: PIL_SEARCH_TIMEOUT_MS });
+        return markLayer(false, `error=${String(err)}`);
+    }
+    if (points.length === 0)
+        return markLayer(false, "no-artifacts");
+    const cpText = formatTaskCheckpoints(points);
+    if (!cpText)
+        return markLayer(false, "no-artifacts");
+    // Append the marker AFTER truncation so it always survives into `enriched`
+    // — that marker is what makes the defer-check above fire on any later pass.
+    const blockSha = payloadSha16(cpText);
+    const body = truncateToBudget(cpText, Math.floor(ctx.tokenBudget * 0.12));
+    const block = `${body}\n<!-- ee-checkpoint-injected:${blockSha} -->`;
+    try {
+        if (ctx.sessionId) {
+            logInteraction(ctx.sessionId, "ee_injection", {
+                eventSubtype: "injected",
+                data: {
+                    phase: "pil_meta_artifacts",
+                    role: "knowledge_retriever",
+                    checkpointCount: points.length,
+                    pointIds: points.map((p) => String(p.id)),
+                    injectedChars: block.length,
+                },
+            });
+        }
+    }
+    catch (err) {
+        // No silent catch: surfacing succeeded; only the audit write failed.
+        console.error(`[pil.meta.surfaceCompactionArtifacts] interaction log failed: ${err?.message}`);
+    }
+    return {
+        ...ctx,
+        enriched: `${ctx.enriched}\n${block}`,
+        layers: [
+            ...ctx.layers,
+            { name: "ee-meta-artifacts", applied: true, delta: `artifacts=${points.length} chars=${block.length}` },
+        ],
+    };
+}
 //# sourceMappingURL=layer3-ee-injection.js.map

package/dist/src/pil/layer6-output.js CHANGED Viewed

@@ -258,12 +258,16 @@ export function applyPilSuffix(systemPrompt, ctx, responseToolsActive = false) {
  * OUTPUT RULES (graceful — exactly what code-heavy tasks already do), so a false
  * positive on an analysis turn only forgoes structured JSON, never breaks output.
  *
- * High-signal verbs only (implement/edit/wire/rewrite/rename/scaffold/refactor,
- * "make the change", "apply the fix/patch", VI equivalents). Bare "fix"/"replace"
- * are excluded — too common in analysis ("explain the fix") — so pure
- * analyze/plan/debug-investigation turns keep their structured output.
+ * High-signal verbs only (implement/edit/wire/rewrite/rename/scaffold/refactor/
+ * improve, "make the change", "apply the fix/patch", VI equivalents incl. "cải
+ * thiện"). Bare "fix"/"replace" are excluded — too common in analysis ("explain
+ * the fix") — so pure analyze/plan/debug-investigation turns keep their
+ * structured output. "improve(ment)" / "cải thiện" added after session
+ * 2b7a10219499: "lên plan rồi improvement … cải thiện Compaction" was an
+ * implement turn the model mis-classified as a `report`, so a terminal
+ * respond_plan ended it on a plan (edits done but uncommitted/unreported).
  */
-const IMPLEMENTATION_INTENT_RE = /\b(implement|edit|wire(?:\s+up)?|rewrite|rename|scaffold|refactor)\b|\bmake\s+(the\s+)?(change|edit|modification)s?\b|\bapply\s+(the\s+)?(fix|change|patch|edit|diff)\b|(?:^|\s)(triển\s*khai|trien\s*khai|chỉnh\s*sửa|chinh\s*sua|viết\s*lại|viet\s*lai|đổi\s*tên|doi\s*ten)\b/i;
+const IMPLEMENTATION_INTENT_RE = /\b(implement|edit|wire(?:\s+up)?|rewrite|rename|scaffold|refactor)\b|\bimprove(?:ment)?\b|\bmake\s+(the\s+)?(change|edit|modification)s?\b|\bapply\s+(the\s+)?(fix|change|patch|edit|diff)\b|(?:^|\s)(triển\s*khai|trien\s*khai|chỉnh\s*sửa|chinh\s*sua|viết\s*lại|viet\s*lai|đổi\s*tên|doi\s*ten|cải\s*thiện|cai\s*thien)\b/i;
 export function isImplementationIntent(raw) {
     return !!raw && IMPLEMENTATION_INTENT_RE.test(raw);
 }
@@ -340,6 +344,15 @@ export function getResponseToolSet(ctx, providerId) {
     //   - report → keep the structured tool (its value IS the structure).
     // Only when the model didn't emit a deliverable (null → legacy cascade / model
     // omitted the word) do we fall back to the legacy regex predicates.
+    // Implementation intent ALWAYS suppresses a terminal respond_* — checked
+    // BEFORE the deliverable branch so a mis-classified `report` can't bypass it
+    // (session 2b7a10219499: a "plan rồi improvement" implement turn got
+    // deliverable=report → the report-exception below kept respond_plan → the
+    // model stated a plan and ended the turn with edits done but uncommitted).
+    // A respond_* tool lets the model "answer" and stop before edits land, so any
+    // implement turn must fall through to the markdown OUTPUT RULES instead.
+    if (isImplementationIntent(ctx.raw))
+        return {};
     if (ctx.deliverableKind) {
         if (ctx.deliverableKind === "code")
             return {};
@@ -347,8 +360,6 @@ export function getResponseToolSet(ctx, providerId) {
             return {};
     }
     else {
-        if (isImplementationIntent(ctx.raw))
-            return {};
         if (ctx.taskType !== "general" && !prefersStructuredReport(ctx.raw))
             return {};
     }

package/dist/src/pil/pipeline.js CHANGED Viewed

@@ -22,7 +22,7 @@ import { isDiscoveryEnabled } from "./config.js";
 import { scoreComplexitySize } from "./layer1_5-complexity-size.js";
 import { layer1Intent } from "./layer1-intent.js";
 import { layer2Personality } from "./layer2-personality.js";
-import { layer3EeInjection } from "./layer3-ee-injection.js";
+import { layer3EeInjection, surfaceCompactionArtifacts } from "./layer3-ee-injection.js";
 import { layer4Gsd } from "./layer4-gsd.js";
 import { layer5Context } from "./layer5-context.js";
 import { isMetaAnalysisPrompt, layer6Output } from "./layer6-output.js";
@@ -144,15 +144,21 @@ async function runLayers(ctx, options) {
     }
     if (ctx.taskType !== null) {
         await timed("layer2-personality", layer2Personality);
+        // Issue #2: meta-analysis turns used to skip layer3 (EE recall) + layer5
+        // (context) to cut overhead — but that starved exactly the self-evaluation
+        // turns where behavioral/principle recall matters most. Run the full
+        // sequence for every taskType-bearing turn now. In the live (interactive)
+        // path there is no pipeline timeout (see runPipeline), and each EE layer is
+        // internally timeout-bounded, so meta turns just carry the same EE budget as
+        // a normal turn.
+        await timed("layer3-ee-injection", layer3EeInjection);
+        await timed("layer4-gsd-structuring", layer4Gsd);
+        await timed("layer5-context-enrichment", layer5Context);
         if (isMetaAnalysisPrompt(ctx.raw)) {
-            // FIX: skip heavy EE (layer3) + context (layer5) for meta-analysis turns
-            // to reduce PIL overhead on evaluation/improvement questions (as intended).
-            await timed("layer4-gsd-structuring", layer4Gsd);
-        }
-        else {
-            await timed("layer3-ee-injection", layer3EeInjection);
-            await timed("layer4-gsd-structuring", layer4Gsd);
-            await timed("layer5-context-enrichment", layer5Context);
+            // Issue #4 (targeted complement): surface the elided tool-artifacts
+            // RELEVANT to this meta question. Defers to layer3 — it only fires when
+            // layer3's fixed-query checkpoint arm surfaced no checkpoint block.
+            await timed("ee-meta-artifacts", surfaceCompactionArtifacts);
         }
     }
     else {

package/dist/src/tools/registry-ee-query.test.js CHANGED Viewed

@@ -9,7 +9,8 @@
  * (no network).
  */
 import os from "node:os";
-import { describe, expect, it } from "vitest";
+import { afterEach, describe, expect, it } from "vitest";
+import { __resetArtifactCacheForTests, recordArtifact } from "../ee/artifact-cache.js";
 import { BashTool } from "./bash.js";
 import { createBuiltinTools, isToolArtifactQuery } from "./registry.js";
 describe("ee_query builtin tool", () => {
@@ -45,4 +46,20 @@ describe("isToolArtifactQuery — ee_query intent routing", () => {
         expect(isToolArtifactQuery("tool-artifact storage design")).toBe(false);
     });
 });
+describe("ee_query — anti-mù rehydrate (local-first, durable when EE is down)", () => {
+    afterEach(() => __resetArtifactCacheForTests());
+    it("rehydrates a tool-artifact from the in-session cache with NO EE/network call", async () => {
+        // Simulates: the compactor elided this output earlier (recordArtifact), EE is
+        // now down. The agent's ee_query("tool-artifact id=X") must still return the
+        // full content from the local cache rather than an [ee_unavailable] note.
+        recordArtifact("call_42", "read_file", "FULL ELIDED CONTENT — line A\nline B\nline C");
+        const tools = createBuiltinTools(new BashTool(os.tmpdir()), "agent");
+        const t = tools.ee_query;
+        const out = String(await t.execute?.({ query: "tool-artifact id=call_42" }));
+        expect(out).toContain("rehydrated from in-session cache");
+        expect(out).toContain("tool=read_file");
+        expect(out).toContain("FULL ELIDED CONTENT");
+        expect(out).not.toMatch(/ee_unavailable/);
+    });
+});
 //# sourceMappingURL=registry-ee-query.test.js.map

package/dist/src/tools/registry.js CHANGED Viewed

@@ -466,14 +466,25 @@ export function createBuiltinTools(bash, mode, opts) {
                 }
                 try {
                     if (isToolArtifactQuery(query)) {
-                        // Artifact rehydration → raw /api/search (exact-collection lookup).
+                        // Local-first (anti-mù durability): the compactor records each elided
+                        // output in-process by toolCallId. For an exact "tool-artifact id=X"
+                        // lookup this is the authoritative full content for THIS session and
+                        // works even when EE is down — the failure window long sessions hit.
+                        const { findArtifactByQuery, findArtifactOnDisk } = await import("../ee/artifact-cache.js");
+                        const mem = findArtifactByQuery(query);
+                        const local = mem ?? (await findArtifactOnDisk(query));
+                        if (local) {
+                            const src = mem ? "in-session cache" : "local disk cache";
+                            return truncateOutput(`[tool-artifact id=${local.toolCallId} tool=${local.toolName} — rehydrated from ${src}]\n${local.content}`);
+                        }
+                        // EE fallback (cross-session / post-restart) → raw /api/search exact lookup.
                         const { searchEE } = await import("../ee/search.js");
                         const resp = await searchEE(query, {
                             ...(Array.isArray(input?.collections) ? { collections: input.collections } : {}),
                             ...(typeof input?.limit === "number" ? { limit: input.limit } : {}),
                         });
                         if (resp === null) {
-                            return "[ee_unavailable] Experience Engine returned no response (server down, timeout, circuit open, or unconfigured). Proceed without EE recall — re-read the source directly if you need the elided content.";
+                            return "[ee_unavailable] Experience Engine returned no response (server down, timeout, circuit open, or unconfigured) and the artifact is not in this session's local cache. Proceed without EE recall — re-read the source directly if you need the elided content.";
                         }
                         return truncateOutput(JSON.stringify(resp));
                     }

package/package.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "workspaces": [
     "packages/*"
   ],
-  "version": "1.5.0",
+  "version": "1.6.0",
   "description": "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.",
   "repository": {
     "type": "git",