muonroi-cli 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/src/cli/cost-forensics.d.ts +3 -0
  2. package/dist/src/cli/cost-forensics.js +11 -0
  3. package/dist/src/cli/cost-forensics.test.js +1 -0
  4. package/dist/src/cli/experience-report.d.ts +20 -0
  5. package/dist/src/cli/experience-report.js +76 -0
  6. package/dist/src/cli/experience-report.test.d.ts +5 -0
  7. package/dist/src/cli/experience-report.test.js +63 -0
  8. package/dist/src/ee/artifact-cache.d.ts +56 -0
  9. package/dist/src/ee/artifact-cache.js +155 -0
  10. package/dist/src/ee/artifact-cache.test.d.ts +1 -0
  11. package/dist/src/ee/artifact-cache.test.js +69 -0
  12. package/dist/src/ee/search.js +7 -5
  13. package/dist/src/ee/search.test.d.ts +1 -0
  14. package/dist/src/ee/search.test.js +23 -0
  15. package/dist/src/generated/version.d.ts +1 -1
  16. package/dist/src/generated/version.js +1 -1
  17. package/dist/src/gsd/__tests__/directives.test.js +24 -1
  18. package/dist/src/gsd/directives.d.ts +22 -0
  19. package/dist/src/gsd/directives.js +34 -10
  20. package/dist/src/index.js +9 -0
  21. package/dist/src/mcp/__tests__/client-pool.spec.js +54 -4
  22. package/dist/src/mcp/__tests__/forensics-tools.test.js +1 -0
  23. package/dist/src/mcp/client-pool.d.ts +9 -2
  24. package/dist/src/mcp/client-pool.js +60 -21
  25. package/dist/src/orchestrator/compaction.d.ts +2 -0
  26. package/dist/src/orchestrator/compaction.js +14 -1
  27. package/dist/src/orchestrator/compaction.test.js +25 -1
  28. package/dist/src/orchestrator/message-processor.js +49 -7
  29. package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
  30. package/dist/src/orchestrator/scope-reminder.js +16 -0
  31. package/dist/src/orchestrator/scope-reminder.test.js +22 -1
  32. package/dist/src/orchestrator/session-experience.d.ts +89 -0
  33. package/dist/src/orchestrator/session-experience.js +169 -0
  34. package/dist/src/orchestrator/session-experience.test.d.ts +6 -0
  35. package/dist/src/orchestrator/session-experience.test.js +72 -0
  36. package/dist/src/orchestrator/stream-runner.js +7 -0
  37. package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
  38. package/dist/src/orchestrator/subagent-compactor.js +30 -8
  39. package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
  40. package/dist/src/pil/__tests__/layer3-ee-injection.test.js +5 -3
  41. package/dist/src/pil/__tests__/layer3-injected-chunk.test.js +31 -0
  42. package/dist/src/pil/__tests__/layer6-output.test.js +21 -0
  43. package/dist/src/pil/__tests__/pipeline.test.js +17 -0
  44. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
  45. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
  46. package/dist/src/pil/layer3-ee-injection.d.ts +28 -0
  47. package/dist/src/pil/layer3-ee-injection.js +125 -4
  48. package/dist/src/pil/layer4-gsd.js +3 -2
  49. package/dist/src/pil/layer6-output.js +18 -7
  50. package/dist/src/pil/pipeline.js +26 -9
  51. package/dist/src/pil/session-experience-injection.d.ts +34 -0
  52. package/dist/src/pil/session-experience-injection.js +54 -0
  53. package/dist/src/pil/session-experience-injection.test.d.ts +6 -0
  54. package/dist/src/pil/session-experience-injection.test.js +79 -0
  55. package/dist/src/storage/interaction-log.d.ts +1 -1
  56. package/dist/src/storage/interaction-log.js +17 -4
  57. package/dist/src/storage/session-experience-store.d.ts +63 -0
  58. package/dist/src/storage/session-experience-store.js +164 -0
  59. package/dist/src/storage/session-experience-store.test.d.ts +5 -0
  60. package/dist/src/storage/session-experience-store.test.js +86 -0
  61. package/dist/src/storage/ui-interaction-log.js +4 -2
  62. package/dist/src/tools/registry-ee-query.test.js +24 -1
  63. package/dist/src/tools/registry.js +20 -2
  64. package/dist/src/types/index.d.ts +6 -0
  65. package/dist/src/ui/app.js +0 -0
  66. package/package.json +1 -1
@@ -8,6 +8,7 @@
8
8
  * confirm that sub-agent context no longer balloons past 80k input
9
9
  * after the cumulative cap kicks in.
10
10
  */
11
+ import type { SessionExperienceCounts } from "../orchestrator/session-experience.js";
11
12
  export interface CostForensicsRow {
12
13
  id: number;
13
14
  source: string;
@@ -35,6 +36,8 @@ export interface CostForensicsSummary {
35
36
  cacheHitRatio: number;
36
37
  peakSingleCallInput: number;
37
38
  events: CostForensicsRow[];
39
+ /** Anti-mù counters for this session (null when none recorded). */
40
+ experience: SessionExperienceCounts | null;
38
41
  }
39
42
  /**
40
43
  * Return ALL session ids matching a prefix (newest first, capped at 5).
@@ -11,6 +11,7 @@
11
11
  import { getProviderCapabilities } from "../providers/capabilities.js";
12
12
  import { detectProviderForModel } from "../providers/runtime.js";
13
13
  import { getDatabase } from "../storage/db.js";
14
+ import { selectSessionExperience } from "../storage/session-experience-store.js";
14
15
  function resolveSessionId(prefix) {
15
16
  const rows = getDatabase()
16
17
  .prepare(`SELECT id FROM sessions WHERE id LIKE ? ORDER BY created_at DESC LIMIT 5`)
@@ -103,6 +104,7 @@ export function collectCostForensics(sessionId) {
103
104
  cacheHitRatio,
104
105
  peakSingleCallInput,
105
106
  events,
107
+ experience: selectSessionExperience(sessionId),
106
108
  };
107
109
  }
108
110
  export function computeCacheCadence(events) {
@@ -164,6 +166,15 @@ export function printCostForensics(summary, opts = {}) {
164
166
  `(~${formatNum(cadence.estReBilledTokens)} tok re-billed). ` +
165
167
  `Likely fast tool-loop latency — fewer/batched tool rounds recover this.`);
166
168
  }
169
+ // Anti-mù counters for this session (rec #1 persisted forensics).
170
+ if (summary.experience) {
171
+ const x = summary.experience;
172
+ const rehydrated = x.rehydratedCache + x.rehydratedDisk + x.rehydratedEe;
173
+ w(`Anti-mù: ${x.compactions} compaction(s), ${x.elided} tool output(s) elided` +
174
+ `${x.elided > 0 ? ` (${formatNum(x.totalElidedChars)} chars)` : ""}, ` +
175
+ `${rehydrated} rehydrated (cache=${x.rehydratedCache} disk=${x.rehydratedDisk} ee=${x.rehydratedEe}), ` +
176
+ `${x.unavailable} needed-but-unavailable.`);
177
+ }
167
178
  w(``);
168
179
  w(`Per-event breakdown:`);
169
180
  w(`${"seq".padEnd(5)}${"src".padEnd(10)}${"input".padStart(9)}${"out".padStart(7)}${"cacheR".padStart(9)}${"cacheC".padStart(8)} ts`);
@@ -62,6 +62,7 @@ function summary(events) {
62
62
  cacheHitRatio: totalInput > 0 ? totalCacheRead / totalInput : 0,
63
63
  peakSingleCallInput: Math.max(0, ...events.map((e) => e.inputTokens)),
64
64
  events,
65
+ experience: null,
65
66
  };
66
67
  }
67
68
  function captureStdout(fn) {
@@ -0,0 +1,20 @@
1
+ /**
2
+ * src/cli/experience-report.ts
3
+ *
4
+ * `muonroi-cli usage experience` — cross-session anti-mù telemetry. Aggregates
5
+ * the per-session session_experience snapshots to answer the measure-before-
6
+ * re-architecting question: how often does compaction actually elide a tool
7
+ * output, and when the agent goes back for one, can it recover it?
8
+ *
9
+ * This is the data gate for the deferred anti-mù re-architecture (auto-protect /
10
+ * auto-rehydrate). Low elision rate or high recovery rate ⇒ the friction is rare
11
+ * or cognitive, not data-loss ⇒ defer. High unavailable / low recovery ⇒ real
12
+ * loss ⇒ justified.
13
+ */
14
+ import { type ExperienceAggregate } from "../storage/session-experience-store.js";
15
+ /** Pure renderer — returns the report lines so it is unit-testable without a DB. */
16
+ export declare function renderExperienceAggregate(agg: ExperienceAggregate, limit: number): string[];
17
+ export declare function runExperienceReport(opts?: {
18
+ limit?: number;
19
+ json?: boolean;
20
+ }): Promise<void>;
@@ -0,0 +1,76 @@
1
+ /**
2
+ * src/cli/experience-report.ts
3
+ *
4
+ * `muonroi-cli usage experience` — cross-session anti-mù telemetry. Aggregates
5
+ * the per-session session_experience snapshots to answer the measure-before-
6
+ * re-architecting question: how often does compaction actually elide a tool
7
+ * output, and when the agent goes back for one, can it recover it?
8
+ *
9
+ * This is the data gate for the deferred anti-mù re-architecture (auto-protect /
10
+ * auto-rehydrate). Low elision rate or high recovery rate ⇒ the friction is rare
11
+ * or cognitive, not data-loss ⇒ defer. High unavailable / low recovery ⇒ real
12
+ * loss ⇒ justified.
13
+ */
14
+ import { aggregateSessionExperience } from "../storage/session-experience-store.js";
15
+ function pct(n, d) {
16
+ return d > 0 ? `${((n / d) * 100).toFixed(0)}%` : "—";
17
+ }
18
+ function num(n) {
19
+ return n.toLocaleString("en-US");
20
+ }
21
+ /** Pure renderer — returns the report lines so it is unit-testable without a DB. */
22
+ export function renderExperienceAggregate(agg, limit) {
23
+ const t = agg.totals;
24
+ const rehydrated = t.rehydratedCache + t.rehydratedDisk + t.rehydratedEe;
25
+ const out = [];
26
+ out.push("");
27
+ out.push(`Session-experience aggregate — latest ${agg.sessionCount} session(s) with a snapshot (cap ${limit})`);
28
+ out.push("─".repeat(72));
29
+ if (agg.sessionCount === 0) {
30
+ out.push("No session_experience snapshots recorded yet.");
31
+ out.push("Run some real (non-meta) sessions, then re-check — compaction only");
32
+ out.push("persists a snapshot once it actually elides / rehydrates something.");
33
+ return out;
34
+ }
35
+ out.push(`Sessions with compaction elision: ${agg.sessionsWithElision} (${pct(agg.sessionsWithElision, agg.sessionCount)})`);
36
+ out.push(`Sessions hitting needed-but-unavail: ${agg.sessionsWithUnavailable} (${pct(agg.sessionsWithUnavailable, agg.sessionCount)})`);
37
+ out.push("");
38
+ out.push("Totals across those sessions:");
39
+ out.push(` Compactions fired: ${num(t.compactions)}`);
40
+ out.push(` Tool outputs elided: ${num(t.elided)} (${num(t.totalElidedChars)} chars)`);
41
+ out.push(` Rehydrated via ee_query: ${num(rehydrated)} (cache=${t.rehydratedCache} disk=${t.rehydratedDisk} ee=${t.rehydratedEe})`);
42
+ out.push(` Needed-but-unavailable: ${num(t.unavailable)}`);
43
+ out.push(` EE timeouts / errors: ${num(t.eeTimeouts)} / ${num(t.eeErrors)}`);
44
+ out.push("");
45
+ out.push(`Rehydrate recovery rate: ${(agg.rehydrateRecoveryRate * 100).toFixed(0)}% (rehydrated / (rehydrated + unavailable))`);
46
+ out.push("");
47
+ // Decision signal for the deferred re-architecture.
48
+ out.push("Re-architecture decision signal:");
49
+ if (t.elided === 0) {
50
+ out.push(" • Compaction has not elided anything — friction is not occurring. DEFER.");
51
+ }
52
+ else {
53
+ const elisionRate = agg.sessionsWithElision / agg.sessionCount;
54
+ if (elisionRate < 0.2) {
55
+ out.push(` • Elision bites in only ${pct(agg.sessionsWithElision, agg.sessionCount)} of sessions — rare. Likely DEFER.`);
56
+ }
57
+ if (agg.rehydrateRecoveryRate >= 0.9 || t.unavailable === 0) {
58
+ out.push(" • Recovery rate high / no unavailable — manual rehydrate works; friction is cognitive, not data-loss. Manifest+keepLast likely enough.");
59
+ }
60
+ else {
61
+ out.push(` • Recovery rate ${(agg.rehydrateRecoveryRate * 100).toFixed(0)}% with ${num(t.unavailable)} unrecoverable — real data loss. Auto-protect/auto-rehydrate JUSTIFIED.`);
62
+ }
63
+ }
64
+ return out;
65
+ }
66
+ export async function runExperienceReport(opts = {}) {
67
+ const limit = opts.limit && opts.limit > 0 ? opts.limit : 100;
68
+ const agg = aggregateSessionExperience(limit);
69
+ if (opts.json) {
70
+ process.stdout.write(`${JSON.stringify(agg, null, 2)}\n`);
71
+ return;
72
+ }
73
+ for (const line of renderExperienceAggregate(agg, limit))
74
+ process.stdout.write(`${line}\n`);
75
+ }
76
+ //# sourceMappingURL=experience-report.js.map
@@ -0,0 +1,5 @@
1
+ /**
2
+ * experience-report renderer — the cross-session decision signal that gates the
3
+ * deferred anti-mù auto-protect/auto-rehydrate re-architecture.
4
+ */
5
+ export {};
@@ -0,0 +1,63 @@
1
+ /**
2
+ * experience-report renderer — the cross-session decision signal that gates the
3
+ * deferred anti-mù auto-protect/auto-rehydrate re-architecture.
4
+ */
5
+ import { describe, expect, it } from "vitest";
6
+ import { renderExperienceAggregate } from "./experience-report.js";
7
+ function counts(p = {}) {
8
+ return {
9
+ compactions: 0,
10
+ elided: 0,
11
+ totalElidedChars: 0,
12
+ rehydratedCache: 0,
13
+ rehydratedDisk: 0,
14
+ rehydratedEe: 0,
15
+ unavailable: 0,
16
+ eeTimeouts: 0,
17
+ eeErrors: 0,
18
+ ...p,
19
+ };
20
+ }
21
+ function agg(p = {}) {
22
+ return {
23
+ sessionCount: p.sessionCount ?? 1,
24
+ sessionsWithElision: p.sessionsWithElision ?? 0,
25
+ sessionsWithUnavailable: p.sessionsWithUnavailable ?? 0,
26
+ totals: counts(p.totals),
27
+ rehydrateRecoveryRate: p.rehydrateRecoveryRate ?? 1,
28
+ perSession: p.perSession ?? [],
29
+ };
30
+ }
31
+ describe("renderExperienceAggregate", () => {
32
+ it("reports the no-data case clearly", () => {
33
+ const text = renderExperienceAggregate(agg({ sessionCount: 0 }), 100).join("\n");
34
+ expect(text).toContain("No session_experience snapshots recorded yet");
35
+ });
36
+ it("signals DEFER when nothing was ever elided", () => {
37
+ const text = renderExperienceAggregate(agg({ sessionCount: 5, totals: { compactions: 3 } }), 100).join("\n");
38
+ expect(text).toContain("has not elided anything");
39
+ expect(text).toContain("DEFER");
40
+ });
41
+ it("signals cognitive-not-data-loss when recovery is high / no unavailable", () => {
42
+ const text = renderExperienceAggregate(agg({
43
+ sessionCount: 4,
44
+ sessionsWithElision: 3,
45
+ rehydrateRecoveryRate: 1,
46
+ totals: { compactions: 5, elided: 20, rehydratedCache: 8 },
47
+ }), 100).join("\n");
48
+ expect(text).toMatch(/cognitive, not data-loss/);
49
+ expect(text).not.toMatch(/JUSTIFIED/);
50
+ });
51
+ it("signals re-architecture JUSTIFIED when recovery is low with unrecoverable artifacts", () => {
52
+ const text = renderExperienceAggregate(agg({
53
+ sessionCount: 6,
54
+ sessionsWithElision: 5,
55
+ sessionsWithUnavailable: 4,
56
+ rehydrateRecoveryRate: 0.3,
57
+ totals: { compactions: 10, elided: 40, rehydratedEe: 3, unavailable: 7 },
58
+ }), 100).join("\n");
59
+ expect(text).toContain("real data loss");
60
+ expect(text).toContain("JUSTIFIED");
61
+ });
62
+ });
63
+ //# sourceMappingURL=experience-report.test.js.map
@@ -0,0 +1,56 @@
1
+ /**
2
+ * src/ee/artifact-cache.ts
3
+ *
4
+ * Durable fallback for compaction-elided tool outputs (issue #3 increment 2 /
5
+ * anti-mù durability).
6
+ *
7
+ * When B3/B4 compaction rewrites a low-value tool result into a ~200-char stub,
8
+ * the full content is shipped to the Experience Engine (source="tool-artifact")
9
+ * so a later `ee_query("tool-artifact id=X")` can rehydrate it. But that recovery
10
+ * depends on EE (Qdrant/HTTP) being reachable. This module is the EE-independent
11
+ * recovery path, in two tiers:
12
+ * - in-process LRU (keyed by toolCallId): authoritative full content for THIS
13
+ * session, instant, survives an EE outage mid-session;
14
+ * - append-only disk spill (~/.muonroi-cli/artifact-cache.jsonl): survives a
15
+ * PROCESS RESTART too, so a restart + EE-down double-failure can still
16
+ * rehydrate. Disable with MUONROI_ARTIFACT_CACHE_DISK=0.
17
+ *
18
+ * ee_query reads in-memory first, then disk, then falls back to EE /api/search
19
+ * (the cross-session source). Both tiers are bounded; both are best-effort and
20
+ * fail-open (a disk error never breaks recall).
21
+ */
22
+ export interface ArtifactEntry {
23
+ toolName: string;
24
+ content: string;
25
+ }
26
+ /**
27
+ * Record an elided tool output by toolCallId. In-memory set is synchronous;
28
+ * the disk append is fire-and-forget (tracked so tests can flush it). No-ops on
29
+ * empty id/content.
30
+ */
31
+ export declare function recordArtifact(toolCallId: string, toolName: string, content: string): void;
32
+ /** The actual disk append (awaitable). Resets the file when it exceeds the size cap. */
33
+ export declare function appendArtifactToDisk(toolCallId: string, toolName: string, content: string): Promise<void>;
34
+ /** Exact in-memory lookup by toolCallId. */
35
+ export declare function getArtifact(toolCallId: string): ArtifactEntry | null;
36
+ /**
37
+ * Synchronous in-memory lookup from a contract query string. Returns null when
38
+ * the query has no id= or the id is not in the in-process LRU.
39
+ */
40
+ export declare function findArtifactByQuery(query: string): (ArtifactEntry & {
41
+ toolCallId: string;
42
+ }) | null;
43
+ /**
44
+ * Disk-tier lookup (survives restart). Scans the spill file newest-first so the
45
+ * most recent record for an id wins. Fail-open: a missing/corrupt file yields
46
+ * null, never throws.
47
+ */
48
+ export declare function findArtifactOnDisk(query: string): Promise<(ArtifactEntry & {
49
+ toolCallId: string;
50
+ }) | null>;
51
+ export declare function __resetArtifactCacheForTests(): void;
52
+ export declare function __setArtifactCacheMaxForTests(n: number): void;
53
+ export declare function __setArtifactCacheDiskPathForTests(p: string | null): void;
54
+ export declare function __artifactCacheSize(): number;
55
+ /** Await all in-flight fire-and-forget disk writes (deterministic tests). */
56
+ export declare function flushArtifactDiskWrites(): Promise<void>;
@@ -0,0 +1,155 @@
1
+ /**
2
+ * src/ee/artifact-cache.ts
3
+ *
4
+ * Durable fallback for compaction-elided tool outputs (issue #3 increment 2 /
5
+ * anti-mù durability).
6
+ *
7
+ * When B3/B4 compaction rewrites a low-value tool result into a ~200-char stub,
8
+ * the full content is shipped to the Experience Engine (source="tool-artifact")
9
+ * so a later `ee_query("tool-artifact id=X")` can rehydrate it. But that recovery
10
+ * depends on EE (Qdrant/HTTP) being reachable. This module is the EE-independent
11
+ * recovery path, in two tiers:
12
+ * - in-process LRU (keyed by toolCallId): authoritative full content for THIS
13
+ * session, instant, survives an EE outage mid-session;
14
+ * - append-only disk spill (~/.muonroi-cli/artifact-cache.jsonl): survives a
15
+ * PROCESS RESTART too, so a restart + EE-down double-failure can still
16
+ * rehydrate. Disable with MUONROI_ARTIFACT_CACHE_DISK=0.
17
+ *
18
+ * ee_query reads in-memory first, then disk, then falls back to EE /api/search
19
+ * (the cross-session source). Both tiers are bounded; both are best-effort and
20
+ * fail-open (a disk error never breaks recall).
21
+ */
22
+ import { appendFile, mkdir, readFile, stat, writeFile } from "node:fs/promises";
23
+ import os from "node:os";
24
+ import path from "node:path";
25
+ const DEFAULT_MAX_ENTRIES = 100;
26
+ /** Per-entry cap so one giant output can't dominate the footprint. */
27
+ const MAX_CONTENT_CHARS = 200_000;
28
+ /** Disk-file size cap; on overflow the file is reset (EE retains older artifacts). */
29
+ const DISK_MAX_BYTES = 8 * 1024 * 1024;
30
+ const store = new Map();
31
+ let maxEntries = DEFAULT_MAX_ENTRIES;
32
+ let diskPathOverride = null;
33
+ const pendingWrites = new Set();
34
+ function diskEnabled() {
35
+ return process.env.MUONROI_ARTIFACT_CACHE_DISK !== "0";
36
+ }
37
+ function diskPath() {
38
+ return diskPathOverride ?? path.join(os.homedir(), ".muonroi-cli", "artifact-cache.jsonl");
39
+ }
40
+ /** Extract the id from a "tool-artifact id=<id>" / "full tool result id=<id>" query. */
41
+ function extractArtifactId(query) {
42
+ const m = /\bid\s*=\s*["']?([A-Za-z0-9_\-:.]+)/i.exec(query || "");
43
+ return m ? m[1] : null;
44
+ }
45
+ /**
46
+ * Record an elided tool output by toolCallId. In-memory set is synchronous;
47
+ * the disk append is fire-and-forget (tracked so tests can flush it). No-ops on
48
+ * empty id/content.
49
+ */
50
+ export function recordArtifact(toolCallId, toolName, content) {
51
+ if (!toolCallId || typeof content !== "string" || content.length === 0)
52
+ return;
53
+ const capped = content.slice(0, MAX_CONTENT_CHARS);
54
+ if (store.has(toolCallId))
55
+ store.delete(toolCallId); // refresh recency
56
+ store.set(toolCallId, { toolName: toolName || "", content: capped });
57
+ while (store.size > maxEntries) {
58
+ const oldest = store.keys().next().value;
59
+ if (oldest === undefined)
60
+ break;
61
+ store.delete(oldest);
62
+ }
63
+ if (diskEnabled()) {
64
+ const w = appendArtifactToDisk(toolCallId, toolName || "", capped).catch((err) => {
65
+ console.error(`[artifact-cache] disk append failed: ${err?.message}`);
66
+ });
67
+ pendingWrites.add(w);
68
+ void w.finally(() => pendingWrites.delete(w));
69
+ }
70
+ }
71
+ /** The actual disk append (awaitable). Resets the file when it exceeds the size cap. */
72
+ export async function appendArtifactToDisk(toolCallId, toolName, content) {
73
+ const p = diskPath();
74
+ await mkdir(path.dirname(p), { recursive: true });
75
+ try {
76
+ const s = await stat(p);
77
+ if (s.size > DISK_MAX_BYTES)
78
+ await writeFile(p, "");
79
+ }
80
+ catch {
81
+ /* file does not exist yet — nothing to cap */
82
+ }
83
+ await appendFile(p, `${JSON.stringify({ id: toolCallId, toolName, content })}\n`);
84
+ }
85
+ /** Exact in-memory lookup by toolCallId. */
86
+ export function getArtifact(toolCallId) {
87
+ if (!toolCallId)
88
+ return null;
89
+ return store.get(toolCallId) ?? null;
90
+ }
91
+ /**
92
+ * Synchronous in-memory lookup from a contract query string. Returns null when
93
+ * the query has no id= or the id is not in the in-process LRU.
94
+ */
95
+ export function findArtifactByQuery(query) {
96
+ const id = extractArtifactId(query);
97
+ if (!id)
98
+ return null;
99
+ const hit = store.get(id);
100
+ return hit ? { toolCallId: id, toolName: hit.toolName, content: hit.content } : null;
101
+ }
102
+ /**
103
+ * Disk-tier lookup (survives restart). Scans the spill file newest-first so the
104
+ * most recent record for an id wins. Fail-open: a missing/corrupt file yields
105
+ * null, never throws.
106
+ */
107
+ export async function findArtifactOnDisk(query) {
108
+ if (!diskEnabled())
109
+ return null;
110
+ const id = extractArtifactId(query);
111
+ if (!id)
112
+ return null;
113
+ let text;
114
+ try {
115
+ text = await readFile(diskPath(), "utf8");
116
+ }
117
+ catch {
118
+ return null; // no spill file yet
119
+ }
120
+ const lines = text.split("\n");
121
+ for (let i = lines.length - 1; i >= 0; i--) {
122
+ const line = lines[i];
123
+ if (!line)
124
+ continue;
125
+ try {
126
+ const row = JSON.parse(line);
127
+ if (row.id === id)
128
+ return { toolCallId: id, toolName: row.toolName ?? "", content: row.content ?? "" };
129
+ }
130
+ catch {
131
+ /* skip a torn/partial append line */
132
+ }
133
+ }
134
+ return null;
135
+ }
136
+ // ─── Test hooks ──────────────────────────────────────────────────────────────
137
+ export function __resetArtifactCacheForTests() {
138
+ store.clear();
139
+ maxEntries = DEFAULT_MAX_ENTRIES;
140
+ diskPathOverride = null;
141
+ }
142
+ export function __setArtifactCacheMaxForTests(n) {
143
+ maxEntries = Math.max(1, n);
144
+ }
145
+ export function __setArtifactCacheDiskPathForTests(p) {
146
+ diskPathOverride = p;
147
+ }
148
+ export function __artifactCacheSize() {
149
+ return store.size;
150
+ }
151
+ /** Await all in-flight fire-and-forget disk writes (deterministic tests). */
152
+ export async function flushArtifactDiskWrites() {
153
+ await Promise.allSettled([...pendingWrites]);
154
+ }
155
+ //# sourceMappingURL=artifact-cache.js.map
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,69 @@
1
+ import { rm } from "node:fs/promises";
2
+ import os from "node:os";
3
+ import path from "node:path";
4
+ import { afterEach, beforeEach, describe, expect, it } from "vitest";
5
+ import { __artifactCacheSize, __resetArtifactCacheForTests, __setArtifactCacheDiskPathForTests, __setArtifactCacheMaxForTests, appendArtifactToDisk, findArtifactByQuery, findArtifactOnDisk, flushArtifactDiskWrites, getArtifact, recordArtifact, } from "./artifact-cache.js";
6
+ // Redirect the disk spill to a temp file for EVERY test so recordArtifact never
7
+ // writes the real ~/.muonroi-cli/artifact-cache.jsonl.
8
+ const diskFile = path.join(os.tmpdir(), `muonroi-artifact-cache-test-${process.pid}.jsonl`);
9
+ beforeEach(() => __setArtifactCacheDiskPathForTests(diskFile));
10
+ afterEach(async () => {
11
+ __resetArtifactCacheForTests();
12
+ delete process.env.MUONROI_ARTIFACT_CACHE_DISK;
13
+ await rm(diskFile, { force: true });
14
+ });
15
+ describe("artifact-cache (in-memory tier — durable rehydrate when EE is down)", () => {
16
+ it("records and retrieves an elided output by toolCallId", () => {
17
+ recordArtifact("call_7", "read_file", "FULL CONTENT of src/auth.ts");
18
+ expect(getArtifact("call_7")).toEqual({ toolName: "read_file", content: "FULL CONTENT of src/auth.ts" });
19
+ expect(getArtifact("missing")).toBeNull();
20
+ });
21
+ it("no-ops on empty id or empty content", () => {
22
+ recordArtifact("", "read_file", "x");
23
+ recordArtifact("call_x", "read_file", "");
24
+ expect(__artifactCacheSize()).toBe(0);
25
+ });
26
+ it("findArtifactByQuery extracts the id from the contract query strings", () => {
27
+ recordArtifact("abc123", "grep", "GREP HITS");
28
+ expect(findArtifactByQuery("tool-artifact id=abc123")?.content).toBe("GREP HITS");
29
+ expect(findArtifactByQuery("full tool result id=abc123")?.toolCallId).toBe("abc123");
30
+ expect(findArtifactByQuery("tool-artifact ID = abc123")?.content).toBe("GREP HITS"); // spacing/case
31
+ expect(findArtifactByQuery("tool-artifact id=nope")).toBeNull(); // not cached
32
+ expect(findArtifactByQuery("no id here")).toBeNull(); // no id=
33
+ });
34
+ it("evicts the oldest entries past the LRU cap; re-recording refreshes recency", () => {
35
+ __setArtifactCacheMaxForTests(2);
36
+ recordArtifact("a", "t", "A");
37
+ recordArtifact("b", "t", "B");
38
+ recordArtifact("a", "t", "A2"); // touch 'a' → now 'b' is oldest
39
+ recordArtifact("c", "t", "C"); // evicts 'b'
40
+ expect(getArtifact("a")?.content).toBe("A2");
41
+ expect(getArtifact("c")?.content).toBe("C");
42
+ expect(getArtifact("b")).toBeNull();
43
+ expect(__artifactCacheSize()).toBe(2);
44
+ });
45
+ });
46
+ describe("artifact-cache (disk spill — survives a process restart)", () => {
47
+ it("rehydrates from disk after the in-memory tier is gone (simulated restart)", async () => {
48
+ recordArtifact("call_disk", "read_file", "PERSISTED CONTENT");
49
+ await flushArtifactDiskWrites();
50
+ // Simulate a restart: in-memory tier cleared, but the disk file persists.
51
+ __resetArtifactCacheForTests();
52
+ __setArtifactCacheDiskPathForTests(diskFile);
53
+ expect(findArtifactByQuery("tool-artifact id=call_disk")).toBeNull(); // memory gone
54
+ const onDisk = await findArtifactOnDisk("tool-artifact id=call_disk");
55
+ expect(onDisk?.content).toBe("PERSISTED CONTENT");
56
+ expect(onDisk?.toolName).toBe("read_file");
57
+ });
58
+ it("newest record for an id wins on disk", async () => {
59
+ await appendArtifactToDisk("dup", "t", "OLD");
60
+ await appendArtifactToDisk("dup", "t", "NEW");
61
+ expect((await findArtifactOnDisk("tool-artifact id=dup"))?.content).toBe("NEW");
62
+ });
63
+ it("respects MUONROI_ARTIFACT_CACHE_DISK=0 (no disk read)", async () => {
64
+ await appendArtifactToDisk("x", "t", "C");
65
+ process.env.MUONROI_ARTIFACT_CACHE_DISK = "0";
66
+ expect(await findArtifactOnDisk("tool-artifact id=x")).toBeNull();
67
+ });
68
+ });
69
+ //# sourceMappingURL=artifact-cache.test.js.map
@@ -97,11 +97,13 @@ export async function mirrorRecallLocally(query, meta, logPath) {
97
97
  * unavailability/timeout — never throws for transport errors.
98
98
  */
99
99
  export async function searchEE(query, opts = {}) {
100
- const { createEEClient } = await import("./client.js");
101
- const { loadEEAuthToken, getCachedServerBaseUrl } = await import("./auth.js");
102
- const authToken = (await loadEEAuthToken()) ?? undefined;
103
- const baseUrl = getCachedServerBaseUrl() ?? undefined;
104
- return createEEClient({ baseUrl, authToken }).search(query, opts);
100
+ // Route through the shared injectable default client (same one the WRITE leg
101
+ // persistArtifact → getDefaultEEClient().extract uses), NOT a fresh per-call
102
+ // client. This unifies the anti-mù seam: setDefaultEEClient now intercepts BOTH
103
+ // the artifact write and the artifact READ leg, and the default client carries
104
+ // the boot-loaded token + 401 refresh maintained by intercept.ts.
105
+ const { getDefaultEEClient } = await import("./intercept.js");
106
+ return getDefaultEEClient().search(query, opts);
105
107
  }
106
108
  /**
107
109
  * Active recall over the EE brain via /api/recall (recallMode) — the fixed
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,23 @@
1
+ import { afterEach, describe, expect, it, vi } from "vitest";
2
+ import { setDefaultEEClient } from "./intercept.js";
3
+ import { searchEE } from "./search.js";
4
+ // Issue #3 seam: searchEE used to build a FRESH createEEClient, so the artifact
5
+ // READ leg (ee_query "tool-artifact id=X") could not be intercepted by
6
+ // setDefaultEEClient — while the WRITE leg (persistArtifact → getDefaultEEClient
7
+ // .extract) could. Routing searchEE through getDefaultEEClient unifies the seam:
8
+ // one injected client now intercepts both legs (testable end-to-end + the spot a
9
+ // durability fallback can hook).
10
+ describe("searchEE — routes through the injectable default EE client", () => {
11
+ afterEach(() => {
12
+ setDefaultEEClient(null); // teardown → next getDefaultEEClient lazy-inits a real one
13
+ });
14
+ it("uses getDefaultEEClient().search so the artifact READ leg is interceptable", async () => {
15
+ const fakeResp = { results: [{ id: "x", text: "REHYDRATED" }] };
16
+ const search = vi.fn().mockResolvedValue(fakeResp);
17
+ setDefaultEEClient({ search });
18
+ const out = await searchEE("tool-artifact id=x", { collections: ["experience-behavioral"], limit: 1 });
19
+ expect(search).toHaveBeenCalledWith("tool-artifact id=x", { collections: ["experience-behavioral"], limit: 1 });
20
+ expect(out).toBe(fakeResp);
21
+ });
22
+ });
23
+ //# sourceMappingURL=search.test.js.map
@@ -1,2 +1,2 @@
1
- export declare const PACKAGE_VERSION = "1.5.0";
1
+ export declare const PACKAGE_VERSION = "1.6.1";
2
2
  export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
@@ -1,5 +1,5 @@
1
1
  // AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
2
2
  // Sourced from package.json at build time so it survives bun --compile bundling.
3
- export const PACKAGE_VERSION = "1.5.0";
3
+ export const PACKAGE_VERSION = "1.6.1";
4
4
  export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
5
5
  //# sourceMappingURL=version.js.map
@@ -1,6 +1,6 @@
1
1
  import { describe, expect, it } from "vitest";
2
2
  import { scoreComplexity } from "../complexity.js";
3
- import { buildDirective } from "../directives.js";
3
+ import { buildDirective, mentionsEcosystemScope } from "../directives.js";
4
4
  import { detectGrayAreas } from "../gray-areas.js";
5
5
  describe("buildDirective", () => {
6
6
  it("emits a blocking heavy directive with mandatory steps", () => {
@@ -66,6 +66,29 @@ describe("buildDirective", () => {
66
66
  expect(out.blocking).toBe(false);
67
67
  expect(out.text.length).toBeLessThan(300);
68
68
  });
69
+ it("appends the muonroi-docs nudge for an ecosystem question (session 41ccfeb2ceee turn 1)", () => {
70
+ const complexity = scoreComplexity("bạn hiểu thế nào về ecosystem muonroi nói chung");
71
+ const out = buildDirective({ complexity, phase: null, grayAreas: [], informational: true, ecosystem: true });
72
+ expect(out.text).toMatch(/QUESTION \/ explanatory/); // still the human-facing question directive
73
+ expect(out.text).toMatch(/ECOSYSTEM SCOPE/);
74
+ expect(out.text).toMatch(/muonroi-docs MCP is the AUTHORITATIVE source|AUTHORITATIVE source/);
75
+ expect(out.text).toMatch(/call it FIRST/i);
76
+ });
77
+ it("does NOT append the ecosystem nudge for a plain question", () => {
78
+ const complexity = scoreComplexity("how does this CLI affect you?");
79
+ const out = buildDirective({ complexity, phase: null, grayAreas: [], informational: true });
80
+ expect(out.text).not.toMatch(/ECOSYSTEM SCOPE/);
81
+ });
82
+ it("mentionsEcosystemScope is tight: ecosystem/BB wording yes, bare CLI-internals no", () => {
83
+ // Fires on genuine ecosystem scope (the case muonroi-docs exists to serve)…
84
+ expect(mentionsEcosystemScope("ecosystem muonroi nói chung và muonroi-cli nói riêng")).toBe(true);
85
+ expect(mentionsEcosystemScope("hệ sinh thái muonroi gồm những gì")).toBe(true);
86
+ expect(mentionsEcosystemScope("how does the building-block rule engine work")).toBe(true);
87
+ // …but NOT on a muonroi-cli internals question that merely names the product,
88
+ // which would wrongly steer toward .NET package docs.
89
+ expect(mentionsEcosystemScope("how does muonroi-cli compaction work")).toBe(false);
90
+ expect(mentionsEcosystemScope("fix the off-by-one in the router")).toBe(false);
91
+ });
69
92
  it("renders the recommended option first in gray-area block", () => {
70
93
  const prompt = "redo everything from scratch";
71
94
  const complexity = scoreComplexity(prompt);
@@ -31,6 +31,21 @@ export interface DirectiveInput {
31
31
  * buildDirective emits a human-facing question directive instead.
32
32
  */
33
33
  informational?: boolean;
34
+ /**
35
+ * True when the turn is about the Muonroi ECOSYSTEM (the whole platform, BB/
36
+ * .NET packages, building-block, open-core boundary, setup/install) rather than
37
+ * muonroi-cli's own TS internals. When set, buildDirective appends a nudge to
38
+ * consult the authoritative muonroi-docs MCP first. Computed by the caller via
39
+ * mentionsEcosystemScope so a CLI-internals question (which merely contains the
40
+ * word "muonroi") does NOT misfire toward .NET docs.
41
+ *
42
+ * Live miss (session 41ccfeb2ceee turn 1): "bạn hiểu thế nào về ecosystem
43
+ * muonroi…" — muonroi-docs WAS in the toolset (smart-filter kept it) but the
44
+ * question directive steered the agent to read/grep local files, so it answered
45
+ * "no comprehensive ecosystem description in the files read" instead of querying
46
+ * the shipped authoritative source.
47
+ */
48
+ ecosystem?: boolean;
34
49
  }
35
50
  export interface DirectiveOutput {
36
51
  text: string;
@@ -38,4 +53,11 @@ export interface DirectiveOutput {
38
53
  /** True when the directive forbids the agent from acting before clarifying. */
39
54
  blocking: boolean;
40
55
  }
56
+ export declare function mentionsEcosystemScope(message: string): boolean;
57
+ /**
58
+ * Appended to any directive when the turn is ecosystem-scoped. Phrased
59
+ * conditionally ("if … available") so it is harmless when muonroi-docs is not
60
+ * configured — the model simply finds no such tool and falls back to local files.
61
+ */
62
+ export declare const ECOSYSTEM_DOCS_NUDGE: string;
41
63
  export declare function buildDirective(input: DirectiveInput): DirectiveOutput;