muonroi-cli 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/cli/cost-forensics.d.ts +3 -0
- package/dist/src/cli/cost-forensics.js +11 -0
- package/dist/src/cli/cost-forensics.test.js +1 -0
- package/dist/src/cli/experience-report.d.ts +20 -0
- package/dist/src/cli/experience-report.js +76 -0
- package/dist/src/cli/experience-report.test.d.ts +5 -0
- package/dist/src/cli/experience-report.test.js +63 -0
- package/dist/src/ee/artifact-cache.d.ts +56 -0
- package/dist/src/ee/artifact-cache.js +155 -0
- package/dist/src/ee/artifact-cache.test.d.ts +1 -0
- package/dist/src/ee/artifact-cache.test.js +69 -0
- package/dist/src/ee/search.js +7 -5
- package/dist/src/ee/search.test.d.ts +1 -0
- package/dist/src/ee/search.test.js +23 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/gsd/__tests__/directives.test.js +24 -1
- package/dist/src/gsd/directives.d.ts +22 -0
- package/dist/src/gsd/directives.js +34 -10
- package/dist/src/index.js +9 -0
- package/dist/src/mcp/__tests__/client-pool.spec.js +54 -4
- package/dist/src/mcp/__tests__/forensics-tools.test.js +1 -0
- package/dist/src/mcp/client-pool.d.ts +9 -2
- package/dist/src/mcp/client-pool.js +60 -21
- package/dist/src/orchestrator/compaction.d.ts +2 -0
- package/dist/src/orchestrator/compaction.js +14 -1
- package/dist/src/orchestrator/compaction.test.js +25 -1
- package/dist/src/orchestrator/message-processor.js +49 -7
- package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
- package/dist/src/orchestrator/scope-reminder.js +16 -0
- package/dist/src/orchestrator/scope-reminder.test.js +22 -1
- package/dist/src/orchestrator/session-experience.d.ts +89 -0
- package/dist/src/orchestrator/session-experience.js +169 -0
- package/dist/src/orchestrator/session-experience.test.d.ts +6 -0
- package/dist/src/orchestrator/session-experience.test.js +72 -0
- package/dist/src/orchestrator/stream-runner.js +7 -0
- package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
- package/dist/src/orchestrator/subagent-compactor.js +30 -8
- package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
- package/dist/src/pil/__tests__/layer3-ee-injection.test.js +5 -3
- package/dist/src/pil/__tests__/layer3-injected-chunk.test.js +31 -0
- package/dist/src/pil/__tests__/layer6-output.test.js +21 -0
- package/dist/src/pil/__tests__/pipeline.test.js +17 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
- package/dist/src/pil/layer3-ee-injection.d.ts +28 -0
- package/dist/src/pil/layer3-ee-injection.js +125 -4
- package/dist/src/pil/layer4-gsd.js +3 -2
- package/dist/src/pil/layer6-output.js +18 -7
- package/dist/src/pil/pipeline.js +26 -9
- package/dist/src/pil/session-experience-injection.d.ts +34 -0
- package/dist/src/pil/session-experience-injection.js +54 -0
- package/dist/src/pil/session-experience-injection.test.d.ts +6 -0
- package/dist/src/pil/session-experience-injection.test.js +79 -0
- package/dist/src/storage/interaction-log.d.ts +1 -1
- package/dist/src/storage/interaction-log.js +17 -4
- package/dist/src/storage/session-experience-store.d.ts +63 -0
- package/dist/src/storage/session-experience-store.js +164 -0
- package/dist/src/storage/session-experience-store.test.d.ts +5 -0
- package/dist/src/storage/session-experience-store.test.js +86 -0
- package/dist/src/storage/ui-interaction-log.js +4 -2
- package/dist/src/tools/registry-ee-query.test.js +24 -1
- package/dist/src/tools/registry.js +20 -2
- package/dist/src/types/index.d.ts +6 -0
- package/dist/src/ui/app.js +0 -0
- package/package.json +1 -1
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* confirm that sub-agent context no longer balloons past 80k input
|
|
9
9
|
* after the cumulative cap kicks in.
|
|
10
10
|
*/
|
|
11
|
+
import type { SessionExperienceCounts } from "../orchestrator/session-experience.js";
|
|
11
12
|
export interface CostForensicsRow {
|
|
12
13
|
id: number;
|
|
13
14
|
source: string;
|
|
@@ -35,6 +36,8 @@ export interface CostForensicsSummary {
|
|
|
35
36
|
cacheHitRatio: number;
|
|
36
37
|
peakSingleCallInput: number;
|
|
37
38
|
events: CostForensicsRow[];
|
|
39
|
+
/** Anti-mù counters for this session (null when none recorded). */
|
|
40
|
+
experience: SessionExperienceCounts | null;
|
|
38
41
|
}
|
|
39
42
|
/**
|
|
40
43
|
* Return ALL session ids matching a prefix (newest first, capped at 5).
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import { getProviderCapabilities } from "../providers/capabilities.js";
|
|
12
12
|
import { detectProviderForModel } from "../providers/runtime.js";
|
|
13
13
|
import { getDatabase } from "../storage/db.js";
|
|
14
|
+
import { selectSessionExperience } from "../storage/session-experience-store.js";
|
|
14
15
|
function resolveSessionId(prefix) {
|
|
15
16
|
const rows = getDatabase()
|
|
16
17
|
.prepare(`SELECT id FROM sessions WHERE id LIKE ? ORDER BY created_at DESC LIMIT 5`)
|
|
@@ -103,6 +104,7 @@ export function collectCostForensics(sessionId) {
|
|
|
103
104
|
cacheHitRatio,
|
|
104
105
|
peakSingleCallInput,
|
|
105
106
|
events,
|
|
107
|
+
experience: selectSessionExperience(sessionId),
|
|
106
108
|
};
|
|
107
109
|
}
|
|
108
110
|
export function computeCacheCadence(events) {
|
|
@@ -164,6 +166,15 @@ export function printCostForensics(summary, opts = {}) {
|
|
|
164
166
|
`(~${formatNum(cadence.estReBilledTokens)} tok re-billed). ` +
|
|
165
167
|
`Likely fast tool-loop latency — fewer/batched tool rounds recover this.`);
|
|
166
168
|
}
|
|
169
|
+
// Anti-mù counters for this session (rec #1 persisted forensics).
|
|
170
|
+
if (summary.experience) {
|
|
171
|
+
const x = summary.experience;
|
|
172
|
+
const rehydrated = x.rehydratedCache + x.rehydratedDisk + x.rehydratedEe;
|
|
173
|
+
w(`Anti-mù: ${x.compactions} compaction(s), ${x.elided} tool output(s) elided` +
|
|
174
|
+
`${x.elided > 0 ? ` (${formatNum(x.totalElidedChars)} chars)` : ""}, ` +
|
|
175
|
+
`${rehydrated} rehydrated (cache=${x.rehydratedCache} disk=${x.rehydratedDisk} ee=${x.rehydratedEe}), ` +
|
|
176
|
+
`${x.unavailable} needed-but-unavailable.`);
|
|
177
|
+
}
|
|
167
178
|
w(``);
|
|
168
179
|
w(`Per-event breakdown:`);
|
|
169
180
|
w(`${"seq".padEnd(5)}${"src".padEnd(10)}${"input".padStart(9)}${"out".padStart(7)}${"cacheR".padStart(9)}${"cacheC".padStart(8)} ts`);
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/cli/experience-report.ts
|
|
3
|
+
*
|
|
4
|
+
* `muonroi-cli usage experience` — cross-session anti-mù telemetry. Aggregates
|
|
5
|
+
* the per-session session_experience snapshots to answer the measure-before-
|
|
6
|
+
* re-architecting question: how often does compaction actually elide a tool
|
|
7
|
+
* output, and when the agent goes back for one, can it recover it?
|
|
8
|
+
*
|
|
9
|
+
* This is the data gate for the deferred anti-mù re-architecture (auto-protect /
|
|
10
|
+
* auto-rehydrate). Low elision rate or high recovery rate ⇒ the friction is rare
|
|
11
|
+
* or cognitive, not data-loss ⇒ defer. High unavailable / low recovery ⇒ real
|
|
12
|
+
* loss ⇒ justified.
|
|
13
|
+
*/
|
|
14
|
+
import { type ExperienceAggregate } from "../storage/session-experience-store.js";
|
|
15
|
+
/** Pure renderer — returns the report lines so it is unit-testable without a DB. */
|
|
16
|
+
export declare function renderExperienceAggregate(agg: ExperienceAggregate, limit: number): string[];
|
|
17
|
+
export declare function runExperienceReport(opts?: {
|
|
18
|
+
limit?: number;
|
|
19
|
+
json?: boolean;
|
|
20
|
+
}): Promise<void>;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/cli/experience-report.ts
|
|
3
|
+
*
|
|
4
|
+
* `muonroi-cli usage experience` — cross-session anti-mù telemetry. Aggregates
|
|
5
|
+
* the per-session session_experience snapshots to answer the measure-before-
|
|
6
|
+
* re-architecting question: how often does compaction actually elide a tool
|
|
7
|
+
* output, and when the agent goes back for one, can it recover it?
|
|
8
|
+
*
|
|
9
|
+
* This is the data gate for the deferred anti-mù re-architecture (auto-protect /
|
|
10
|
+
* auto-rehydrate). Low elision rate or high recovery rate ⇒ the friction is rare
|
|
11
|
+
* or cognitive, not data-loss ⇒ defer. High unavailable / low recovery ⇒ real
|
|
12
|
+
* loss ⇒ justified.
|
|
13
|
+
*/
|
|
14
|
+
import { aggregateSessionExperience } from "../storage/session-experience-store.js";
|
|
15
|
+
function pct(n, d) {
|
|
16
|
+
return d > 0 ? `${((n / d) * 100).toFixed(0)}%` : "—";
|
|
17
|
+
}
|
|
18
|
+
function num(n) {
|
|
19
|
+
return n.toLocaleString("en-US");
|
|
20
|
+
}
|
|
21
|
+
/** Pure renderer — returns the report lines so it is unit-testable without a DB. */
|
|
22
|
+
export function renderExperienceAggregate(agg, limit) {
|
|
23
|
+
const t = agg.totals;
|
|
24
|
+
const rehydrated = t.rehydratedCache + t.rehydratedDisk + t.rehydratedEe;
|
|
25
|
+
const out = [];
|
|
26
|
+
out.push("");
|
|
27
|
+
out.push(`Session-experience aggregate — latest ${agg.sessionCount} session(s) with a snapshot (cap ${limit})`);
|
|
28
|
+
out.push("─".repeat(72));
|
|
29
|
+
if (agg.sessionCount === 0) {
|
|
30
|
+
out.push("No session_experience snapshots recorded yet.");
|
|
31
|
+
out.push("Run some real (non-meta) sessions, then re-check — compaction only");
|
|
32
|
+
out.push("persists a snapshot once it actually elides / rehydrates something.");
|
|
33
|
+
return out;
|
|
34
|
+
}
|
|
35
|
+
out.push(`Sessions with compaction elision: ${agg.sessionsWithElision} (${pct(agg.sessionsWithElision, agg.sessionCount)})`);
|
|
36
|
+
out.push(`Sessions hitting needed-but-unavail: ${agg.sessionsWithUnavailable} (${pct(agg.sessionsWithUnavailable, agg.sessionCount)})`);
|
|
37
|
+
out.push("");
|
|
38
|
+
out.push("Totals across those sessions:");
|
|
39
|
+
out.push(` Compactions fired: ${num(t.compactions)}`);
|
|
40
|
+
out.push(` Tool outputs elided: ${num(t.elided)} (${num(t.totalElidedChars)} chars)`);
|
|
41
|
+
out.push(` Rehydrated via ee_query: ${num(rehydrated)} (cache=${t.rehydratedCache} disk=${t.rehydratedDisk} ee=${t.rehydratedEe})`);
|
|
42
|
+
out.push(` Needed-but-unavailable: ${num(t.unavailable)}`);
|
|
43
|
+
out.push(` EE timeouts / errors: ${num(t.eeTimeouts)} / ${num(t.eeErrors)}`);
|
|
44
|
+
out.push("");
|
|
45
|
+
out.push(`Rehydrate recovery rate: ${(agg.rehydrateRecoveryRate * 100).toFixed(0)}% (rehydrated / (rehydrated + unavailable))`);
|
|
46
|
+
out.push("");
|
|
47
|
+
// Decision signal for the deferred re-architecture.
|
|
48
|
+
out.push("Re-architecture decision signal:");
|
|
49
|
+
if (t.elided === 0) {
|
|
50
|
+
out.push(" • Compaction has not elided anything — friction is not occurring. DEFER.");
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
const elisionRate = agg.sessionsWithElision / agg.sessionCount;
|
|
54
|
+
if (elisionRate < 0.2) {
|
|
55
|
+
out.push(` • Elision bites in only ${pct(agg.sessionsWithElision, agg.sessionCount)} of sessions — rare. Likely DEFER.`);
|
|
56
|
+
}
|
|
57
|
+
if (agg.rehydrateRecoveryRate >= 0.9 || t.unavailable === 0) {
|
|
58
|
+
out.push(" • Recovery rate high / no unavailable — manual rehydrate works; friction is cognitive, not data-loss. Manifest+keepLast likely enough.");
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
out.push(` • Recovery rate ${(agg.rehydrateRecoveryRate * 100).toFixed(0)}% with ${num(t.unavailable)} unrecoverable — real data loss. Auto-protect/auto-rehydrate JUSTIFIED.`);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return out;
|
|
65
|
+
}
|
|
66
|
+
export async function runExperienceReport(opts = {}) {
|
|
67
|
+
const limit = opts.limit && opts.limit > 0 ? opts.limit : 100;
|
|
68
|
+
const agg = aggregateSessionExperience(limit);
|
|
69
|
+
if (opts.json) {
|
|
70
|
+
process.stdout.write(`${JSON.stringify(agg, null, 2)}\n`);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
for (const line of renderExperienceAggregate(agg, limit))
|
|
74
|
+
process.stdout.write(`${line}\n`);
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=experience-report.js.map
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* experience-report renderer — the cross-session decision signal that gates the
|
|
3
|
+
* deferred anti-mù auto-protect/auto-rehydrate re-architecture.
|
|
4
|
+
*/
|
|
5
|
+
import { describe, expect, it } from "vitest";
|
|
6
|
+
import { renderExperienceAggregate } from "./experience-report.js";
|
|
7
|
+
function counts(p = {}) {
|
|
8
|
+
return {
|
|
9
|
+
compactions: 0,
|
|
10
|
+
elided: 0,
|
|
11
|
+
totalElidedChars: 0,
|
|
12
|
+
rehydratedCache: 0,
|
|
13
|
+
rehydratedDisk: 0,
|
|
14
|
+
rehydratedEe: 0,
|
|
15
|
+
unavailable: 0,
|
|
16
|
+
eeTimeouts: 0,
|
|
17
|
+
eeErrors: 0,
|
|
18
|
+
...p,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
function agg(p = {}) {
|
|
22
|
+
return {
|
|
23
|
+
sessionCount: p.sessionCount ?? 1,
|
|
24
|
+
sessionsWithElision: p.sessionsWithElision ?? 0,
|
|
25
|
+
sessionsWithUnavailable: p.sessionsWithUnavailable ?? 0,
|
|
26
|
+
totals: counts(p.totals),
|
|
27
|
+
rehydrateRecoveryRate: p.rehydrateRecoveryRate ?? 1,
|
|
28
|
+
perSession: p.perSession ?? [],
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
describe("renderExperienceAggregate", () => {
|
|
32
|
+
it("reports the no-data case clearly", () => {
|
|
33
|
+
const text = renderExperienceAggregate(agg({ sessionCount: 0 }), 100).join("\n");
|
|
34
|
+
expect(text).toContain("No session_experience snapshots recorded yet");
|
|
35
|
+
});
|
|
36
|
+
it("signals DEFER when nothing was ever elided", () => {
|
|
37
|
+
const text = renderExperienceAggregate(agg({ sessionCount: 5, totals: { compactions: 3 } }), 100).join("\n");
|
|
38
|
+
expect(text).toContain("has not elided anything");
|
|
39
|
+
expect(text).toContain("DEFER");
|
|
40
|
+
});
|
|
41
|
+
it("signals cognitive-not-data-loss when recovery is high / no unavailable", () => {
|
|
42
|
+
const text = renderExperienceAggregate(agg({
|
|
43
|
+
sessionCount: 4,
|
|
44
|
+
sessionsWithElision: 3,
|
|
45
|
+
rehydrateRecoveryRate: 1,
|
|
46
|
+
totals: { compactions: 5, elided: 20, rehydratedCache: 8 },
|
|
47
|
+
}), 100).join("\n");
|
|
48
|
+
expect(text).toMatch(/cognitive, not data-loss/);
|
|
49
|
+
expect(text).not.toMatch(/JUSTIFIED/);
|
|
50
|
+
});
|
|
51
|
+
it("signals re-architecture JUSTIFIED when recovery is low with unrecoverable artifacts", () => {
|
|
52
|
+
const text = renderExperienceAggregate(agg({
|
|
53
|
+
sessionCount: 6,
|
|
54
|
+
sessionsWithElision: 5,
|
|
55
|
+
sessionsWithUnavailable: 4,
|
|
56
|
+
rehydrateRecoveryRate: 0.3,
|
|
57
|
+
totals: { compactions: 10, elided: 40, rehydratedEe: 3, unavailable: 7 },
|
|
58
|
+
}), 100).join("\n");
|
|
59
|
+
expect(text).toContain("real data loss");
|
|
60
|
+
expect(text).toContain("JUSTIFIED");
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
//# sourceMappingURL=experience-report.test.js.map
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/ee/artifact-cache.ts
|
|
3
|
+
*
|
|
4
|
+
* Durable fallback for compaction-elided tool outputs (issue #3 increment 2 /
|
|
5
|
+
* anti-mù durability).
|
|
6
|
+
*
|
|
7
|
+
* When B3/B4 compaction rewrites a low-value tool result into a ~200-char stub,
|
|
8
|
+
* the full content is shipped to the Experience Engine (source="tool-artifact")
|
|
9
|
+
* so a later `ee_query("tool-artifact id=X")` can rehydrate it. But that recovery
|
|
10
|
+
* depends on EE (Qdrant/HTTP) being reachable. This module is the EE-independent
|
|
11
|
+
* recovery path, in two tiers:
|
|
12
|
+
* - in-process LRU (keyed by toolCallId): authoritative full content for THIS
|
|
13
|
+
* session, instant, survives an EE outage mid-session;
|
|
14
|
+
* - append-only disk spill (~/.muonroi-cli/artifact-cache.jsonl): survives a
|
|
15
|
+
* PROCESS RESTART too, so a restart + EE-down double-failure can still
|
|
16
|
+
* rehydrate. Disable with MUONROI_ARTIFACT_CACHE_DISK=0.
|
|
17
|
+
*
|
|
18
|
+
* ee_query reads in-memory first, then disk, then falls back to EE /api/search
|
|
19
|
+
* (the cross-session source). Both tiers are bounded; both are best-effort and
|
|
20
|
+
* fail-open (a disk error never breaks recall).
|
|
21
|
+
*/
|
|
22
|
+
export interface ArtifactEntry {
|
|
23
|
+
toolName: string;
|
|
24
|
+
content: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Record an elided tool output by toolCallId. In-memory set is synchronous;
|
|
28
|
+
* the disk append is fire-and-forget (tracked so tests can flush it). No-ops on
|
|
29
|
+
* empty id/content.
|
|
30
|
+
*/
|
|
31
|
+
export declare function recordArtifact(toolCallId: string, toolName: string, content: string): void;
|
|
32
|
+
/** The actual disk append (awaitable). Resets the file when it exceeds the size cap. */
|
|
33
|
+
export declare function appendArtifactToDisk(toolCallId: string, toolName: string, content: string): Promise<void>;
|
|
34
|
+
/** Exact in-memory lookup by toolCallId. */
|
|
35
|
+
export declare function getArtifact(toolCallId: string): ArtifactEntry | null;
|
|
36
|
+
/**
|
|
37
|
+
* Synchronous in-memory lookup from a contract query string. Returns null when
|
|
38
|
+
* the query has no id= or the id is not in the in-process LRU.
|
|
39
|
+
*/
|
|
40
|
+
export declare function findArtifactByQuery(query: string): (ArtifactEntry & {
|
|
41
|
+
toolCallId: string;
|
|
42
|
+
}) | null;
|
|
43
|
+
/**
|
|
44
|
+
* Disk-tier lookup (survives restart). Scans the spill file newest-first so the
|
|
45
|
+
* most recent record for an id wins. Fail-open: a missing/corrupt file yields
|
|
46
|
+
* null, never throws.
|
|
47
|
+
*/
|
|
48
|
+
export declare function findArtifactOnDisk(query: string): Promise<(ArtifactEntry & {
|
|
49
|
+
toolCallId: string;
|
|
50
|
+
}) | null>;
|
|
51
|
+
export declare function __resetArtifactCacheForTests(): void;
|
|
52
|
+
export declare function __setArtifactCacheMaxForTests(n: number): void;
|
|
53
|
+
export declare function __setArtifactCacheDiskPathForTests(p: string | null): void;
|
|
54
|
+
export declare function __artifactCacheSize(): number;
|
|
55
|
+
/** Await all in-flight fire-and-forget disk writes (deterministic tests). */
|
|
56
|
+
export declare function flushArtifactDiskWrites(): Promise<void>;
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/ee/artifact-cache.ts
|
|
3
|
+
*
|
|
4
|
+
* Durable fallback for compaction-elided tool outputs (issue #3 increment 2 /
|
|
5
|
+
* anti-mù durability).
|
|
6
|
+
*
|
|
7
|
+
* When B3/B4 compaction rewrites a low-value tool result into a ~200-char stub,
|
|
8
|
+
* the full content is shipped to the Experience Engine (source="tool-artifact")
|
|
9
|
+
* so a later `ee_query("tool-artifact id=X")` can rehydrate it. But that recovery
|
|
10
|
+
* depends on EE (Qdrant/HTTP) being reachable. This module is the EE-independent
|
|
11
|
+
* recovery path, in two tiers:
|
|
12
|
+
* - in-process LRU (keyed by toolCallId): authoritative full content for THIS
|
|
13
|
+
* session, instant, survives an EE outage mid-session;
|
|
14
|
+
* - append-only disk spill (~/.muonroi-cli/artifact-cache.jsonl): survives a
|
|
15
|
+
* PROCESS RESTART too, so a restart + EE-down double-failure can still
|
|
16
|
+
* rehydrate. Disable with MUONROI_ARTIFACT_CACHE_DISK=0.
|
|
17
|
+
*
|
|
18
|
+
* ee_query reads in-memory first, then disk, then falls back to EE /api/search
|
|
19
|
+
* (the cross-session source). Both tiers are bounded; both are best-effort and
|
|
20
|
+
* fail-open (a disk error never breaks recall).
|
|
21
|
+
*/
|
|
22
|
+
import { appendFile, mkdir, readFile, stat, writeFile } from "node:fs/promises";
|
|
23
|
+
import os from "node:os";
|
|
24
|
+
import path from "node:path";
|
|
25
|
+
const DEFAULT_MAX_ENTRIES = 100;
|
|
26
|
+
/** Per-entry cap so one giant output can't dominate the footprint. */
|
|
27
|
+
const MAX_CONTENT_CHARS = 200_000;
|
|
28
|
+
/** Disk-file size cap; on overflow the file is reset (EE retains older artifacts). */
|
|
29
|
+
const DISK_MAX_BYTES = 8 * 1024 * 1024;
|
|
30
|
+
const store = new Map();
|
|
31
|
+
let maxEntries = DEFAULT_MAX_ENTRIES;
|
|
32
|
+
let diskPathOverride = null;
|
|
33
|
+
const pendingWrites = new Set();
|
|
34
|
+
function diskEnabled() {
|
|
35
|
+
return process.env.MUONROI_ARTIFACT_CACHE_DISK !== "0";
|
|
36
|
+
}
|
|
37
|
+
function diskPath() {
|
|
38
|
+
return diskPathOverride ?? path.join(os.homedir(), ".muonroi-cli", "artifact-cache.jsonl");
|
|
39
|
+
}
|
|
40
|
+
/** Extract the id from a "tool-artifact id=<id>" / "full tool result id=<id>" query. */
|
|
41
|
+
function extractArtifactId(query) {
|
|
42
|
+
const m = /\bid\s*=\s*["']?([A-Za-z0-9_\-:.]+)/i.exec(query || "");
|
|
43
|
+
return m ? m[1] : null;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Record an elided tool output by toolCallId. In-memory set is synchronous;
|
|
47
|
+
* the disk append is fire-and-forget (tracked so tests can flush it). No-ops on
|
|
48
|
+
* empty id/content.
|
|
49
|
+
*/
|
|
50
|
+
export function recordArtifact(toolCallId, toolName, content) {
|
|
51
|
+
if (!toolCallId || typeof content !== "string" || content.length === 0)
|
|
52
|
+
return;
|
|
53
|
+
const capped = content.slice(0, MAX_CONTENT_CHARS);
|
|
54
|
+
if (store.has(toolCallId))
|
|
55
|
+
store.delete(toolCallId); // refresh recency
|
|
56
|
+
store.set(toolCallId, { toolName: toolName || "", content: capped });
|
|
57
|
+
while (store.size > maxEntries) {
|
|
58
|
+
const oldest = store.keys().next().value;
|
|
59
|
+
if (oldest === undefined)
|
|
60
|
+
break;
|
|
61
|
+
store.delete(oldest);
|
|
62
|
+
}
|
|
63
|
+
if (diskEnabled()) {
|
|
64
|
+
const w = appendArtifactToDisk(toolCallId, toolName || "", capped).catch((err) => {
|
|
65
|
+
console.error(`[artifact-cache] disk append failed: ${err?.message}`);
|
|
66
|
+
});
|
|
67
|
+
pendingWrites.add(w);
|
|
68
|
+
void w.finally(() => pendingWrites.delete(w));
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
/** The actual disk append (awaitable). Resets the file when it exceeds the size cap. */
|
|
72
|
+
export async function appendArtifactToDisk(toolCallId, toolName, content) {
|
|
73
|
+
const p = diskPath();
|
|
74
|
+
await mkdir(path.dirname(p), { recursive: true });
|
|
75
|
+
try {
|
|
76
|
+
const s = await stat(p);
|
|
77
|
+
if (s.size > DISK_MAX_BYTES)
|
|
78
|
+
await writeFile(p, "");
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
/* file does not exist yet — nothing to cap */
|
|
82
|
+
}
|
|
83
|
+
await appendFile(p, `${JSON.stringify({ id: toolCallId, toolName, content })}\n`);
|
|
84
|
+
}
|
|
85
|
+
/** Exact in-memory lookup by toolCallId. */
|
|
86
|
+
export function getArtifact(toolCallId) {
|
|
87
|
+
if (!toolCallId)
|
|
88
|
+
return null;
|
|
89
|
+
return store.get(toolCallId) ?? null;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Synchronous in-memory lookup from a contract query string. Returns null when
|
|
93
|
+
* the query has no id= or the id is not in the in-process LRU.
|
|
94
|
+
*/
|
|
95
|
+
export function findArtifactByQuery(query) {
|
|
96
|
+
const id = extractArtifactId(query);
|
|
97
|
+
if (!id)
|
|
98
|
+
return null;
|
|
99
|
+
const hit = store.get(id);
|
|
100
|
+
return hit ? { toolCallId: id, toolName: hit.toolName, content: hit.content } : null;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Disk-tier lookup (survives restart). Scans the spill file newest-first so the
|
|
104
|
+
* most recent record for an id wins. Fail-open: a missing/corrupt file yields
|
|
105
|
+
* null, never throws.
|
|
106
|
+
*/
|
|
107
|
+
export async function findArtifactOnDisk(query) {
|
|
108
|
+
if (!diskEnabled())
|
|
109
|
+
return null;
|
|
110
|
+
const id = extractArtifactId(query);
|
|
111
|
+
if (!id)
|
|
112
|
+
return null;
|
|
113
|
+
let text;
|
|
114
|
+
try {
|
|
115
|
+
text = await readFile(diskPath(), "utf8");
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
return null; // no spill file yet
|
|
119
|
+
}
|
|
120
|
+
const lines = text.split("\n");
|
|
121
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
122
|
+
const line = lines[i];
|
|
123
|
+
if (!line)
|
|
124
|
+
continue;
|
|
125
|
+
try {
|
|
126
|
+
const row = JSON.parse(line);
|
|
127
|
+
if (row.id === id)
|
|
128
|
+
return { toolCallId: id, toolName: row.toolName ?? "", content: row.content ?? "" };
|
|
129
|
+
}
|
|
130
|
+
catch {
|
|
131
|
+
/* skip a torn/partial append line */
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
// ─── Test hooks ──────────────────────────────────────────────────────────────
|
|
137
|
+
export function __resetArtifactCacheForTests() {
|
|
138
|
+
store.clear();
|
|
139
|
+
maxEntries = DEFAULT_MAX_ENTRIES;
|
|
140
|
+
diskPathOverride = null;
|
|
141
|
+
}
|
|
142
|
+
export function __setArtifactCacheMaxForTests(n) {
|
|
143
|
+
maxEntries = Math.max(1, n);
|
|
144
|
+
}
|
|
145
|
+
export function __setArtifactCacheDiskPathForTests(p) {
|
|
146
|
+
diskPathOverride = p;
|
|
147
|
+
}
|
|
148
|
+
export function __artifactCacheSize() {
|
|
149
|
+
return store.size;
|
|
150
|
+
}
|
|
151
|
+
/** Await all in-flight fire-and-forget disk writes (deterministic tests). */
|
|
152
|
+
export async function flushArtifactDiskWrites() {
|
|
153
|
+
await Promise.allSettled([...pendingWrites]);
|
|
154
|
+
}
|
|
155
|
+
//# sourceMappingURL=artifact-cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { rm } from "node:fs/promises";
|
|
2
|
+
import os from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
5
|
+
import { __artifactCacheSize, __resetArtifactCacheForTests, __setArtifactCacheDiskPathForTests, __setArtifactCacheMaxForTests, appendArtifactToDisk, findArtifactByQuery, findArtifactOnDisk, flushArtifactDiskWrites, getArtifact, recordArtifact, } from "./artifact-cache.js";
|
|
6
|
+
// Redirect the disk spill to a temp file for EVERY test so recordArtifact never
|
|
7
|
+
// writes the real ~/.muonroi-cli/artifact-cache.jsonl.
|
|
8
|
+
const diskFile = path.join(os.tmpdir(), `muonroi-artifact-cache-test-${process.pid}.jsonl`);
|
|
9
|
+
beforeEach(() => __setArtifactCacheDiskPathForTests(diskFile));
|
|
10
|
+
afterEach(async () => {
|
|
11
|
+
__resetArtifactCacheForTests();
|
|
12
|
+
delete process.env.MUONROI_ARTIFACT_CACHE_DISK;
|
|
13
|
+
await rm(diskFile, { force: true });
|
|
14
|
+
});
|
|
15
|
+
describe("artifact-cache (in-memory tier — durable rehydrate when EE is down)", () => {
|
|
16
|
+
it("records and retrieves an elided output by toolCallId", () => {
|
|
17
|
+
recordArtifact("call_7", "read_file", "FULL CONTENT of src/auth.ts");
|
|
18
|
+
expect(getArtifact("call_7")).toEqual({ toolName: "read_file", content: "FULL CONTENT of src/auth.ts" });
|
|
19
|
+
expect(getArtifact("missing")).toBeNull();
|
|
20
|
+
});
|
|
21
|
+
it("no-ops on empty id or empty content", () => {
|
|
22
|
+
recordArtifact("", "read_file", "x");
|
|
23
|
+
recordArtifact("call_x", "read_file", "");
|
|
24
|
+
expect(__artifactCacheSize()).toBe(0);
|
|
25
|
+
});
|
|
26
|
+
it("findArtifactByQuery extracts the id from the contract query strings", () => {
|
|
27
|
+
recordArtifact("abc123", "grep", "GREP HITS");
|
|
28
|
+
expect(findArtifactByQuery("tool-artifact id=abc123")?.content).toBe("GREP HITS");
|
|
29
|
+
expect(findArtifactByQuery("full tool result id=abc123")?.toolCallId).toBe("abc123");
|
|
30
|
+
expect(findArtifactByQuery("tool-artifact ID = abc123")?.content).toBe("GREP HITS"); // spacing/case
|
|
31
|
+
expect(findArtifactByQuery("tool-artifact id=nope")).toBeNull(); // not cached
|
|
32
|
+
expect(findArtifactByQuery("no id here")).toBeNull(); // no id=
|
|
33
|
+
});
|
|
34
|
+
it("evicts the oldest entries past the LRU cap; re-recording refreshes recency", () => {
|
|
35
|
+
__setArtifactCacheMaxForTests(2);
|
|
36
|
+
recordArtifact("a", "t", "A");
|
|
37
|
+
recordArtifact("b", "t", "B");
|
|
38
|
+
recordArtifact("a", "t", "A2"); // touch 'a' → now 'b' is oldest
|
|
39
|
+
recordArtifact("c", "t", "C"); // evicts 'b'
|
|
40
|
+
expect(getArtifact("a")?.content).toBe("A2");
|
|
41
|
+
expect(getArtifact("c")?.content).toBe("C");
|
|
42
|
+
expect(getArtifact("b")).toBeNull();
|
|
43
|
+
expect(__artifactCacheSize()).toBe(2);
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
describe("artifact-cache (disk spill — survives a process restart)", () => {
|
|
47
|
+
it("rehydrates from disk after the in-memory tier is gone (simulated restart)", async () => {
|
|
48
|
+
recordArtifact("call_disk", "read_file", "PERSISTED CONTENT");
|
|
49
|
+
await flushArtifactDiskWrites();
|
|
50
|
+
// Simulate a restart: in-memory tier cleared, but the disk file persists.
|
|
51
|
+
__resetArtifactCacheForTests();
|
|
52
|
+
__setArtifactCacheDiskPathForTests(diskFile);
|
|
53
|
+
expect(findArtifactByQuery("tool-artifact id=call_disk")).toBeNull(); // memory gone
|
|
54
|
+
const onDisk = await findArtifactOnDisk("tool-artifact id=call_disk");
|
|
55
|
+
expect(onDisk?.content).toBe("PERSISTED CONTENT");
|
|
56
|
+
expect(onDisk?.toolName).toBe("read_file");
|
|
57
|
+
});
|
|
58
|
+
it("newest record for an id wins on disk", async () => {
|
|
59
|
+
await appendArtifactToDisk("dup", "t", "OLD");
|
|
60
|
+
await appendArtifactToDisk("dup", "t", "NEW");
|
|
61
|
+
expect((await findArtifactOnDisk("tool-artifact id=dup"))?.content).toBe("NEW");
|
|
62
|
+
});
|
|
63
|
+
it("respects MUONROI_ARTIFACT_CACHE_DISK=0 (no disk read)", async () => {
|
|
64
|
+
await appendArtifactToDisk("x", "t", "C");
|
|
65
|
+
process.env.MUONROI_ARTIFACT_CACHE_DISK = "0";
|
|
66
|
+
expect(await findArtifactOnDisk("tool-artifact id=x")).toBeNull();
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
//# sourceMappingURL=artifact-cache.test.js.map
|
package/dist/src/ee/search.js
CHANGED
|
@@ -97,11 +97,13 @@ export async function mirrorRecallLocally(query, meta, logPath) {
|
|
|
97
97
|
* unavailability/timeout — never throws for transport errors.
|
|
98
98
|
*/
|
|
99
99
|
export async function searchEE(query, opts = {}) {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
100
|
+
// Route through the shared injectable default client (same one the WRITE leg
|
|
101
|
+
// persistArtifact → getDefaultEEClient().extract uses), NOT a fresh per-call
|
|
102
|
+
// client. This unifies the anti-mù seam: setDefaultEEClient now intercepts BOTH
|
|
103
|
+
// the artifact write and the artifact READ leg, and the default client carries
|
|
104
|
+
// the boot-loaded token + 401 refresh maintained by intercept.ts.
|
|
105
|
+
const { getDefaultEEClient } = await import("./intercept.js");
|
|
106
|
+
return getDefaultEEClient().search(query, opts);
|
|
105
107
|
}
|
|
106
108
|
/**
|
|
107
109
|
* Active recall over the EE brain via /api/recall (recallMode) — the fixed
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
import { setDefaultEEClient } from "./intercept.js";
|
|
3
|
+
import { searchEE } from "./search.js";
|
|
4
|
+
// Issue #3 seam: searchEE used to build a FRESH createEEClient, so the artifact
|
|
5
|
+
// READ leg (ee_query "tool-artifact id=X") could not be intercepted by
|
|
6
|
+
// setDefaultEEClient — while the WRITE leg (persistArtifact → getDefaultEEClient
|
|
7
|
+
// .extract) could. Routing searchEE through getDefaultEEClient unifies the seam:
|
|
8
|
+
// one injected client now intercepts both legs (testable end-to-end + the spot a
|
|
9
|
+
// durability fallback can hook).
|
|
10
|
+
describe("searchEE — routes through the injectable default EE client", () => {
|
|
11
|
+
afterEach(() => {
|
|
12
|
+
setDefaultEEClient(null); // teardown → next getDefaultEEClient lazy-inits a real one
|
|
13
|
+
});
|
|
14
|
+
it("uses getDefaultEEClient().search so the artifact READ leg is interceptable", async () => {
|
|
15
|
+
const fakeResp = { results: [{ id: "x", text: "REHYDRATED" }] };
|
|
16
|
+
const search = vi.fn().mockResolvedValue(fakeResp);
|
|
17
|
+
setDefaultEEClient({ search });
|
|
18
|
+
const out = await searchEE("tool-artifact id=x", { collections: ["experience-behavioral"], limit: 1 });
|
|
19
|
+
expect(search).toHaveBeenCalledWith("tool-artifact id=x", { collections: ["experience-behavioral"], limit: 1 });
|
|
20
|
+
expect(out).toBe(fakeResp);
|
|
21
|
+
});
|
|
22
|
+
});
|
|
23
|
+
//# sourceMappingURL=search.test.js.map
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const PACKAGE_VERSION = "1.
|
|
1
|
+
export declare const PACKAGE_VERSION = "1.6.1";
|
|
2
2
|
export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
|
|
2
2
|
// Sourced from package.json at build time so it survives bun --compile bundling.
|
|
3
|
-
export const PACKAGE_VERSION = "1.
|
|
3
|
+
export const PACKAGE_VERSION = "1.6.1";
|
|
4
4
|
export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
|
|
5
5
|
//# sourceMappingURL=version.js.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
2
|
import { scoreComplexity } from "../complexity.js";
|
|
3
|
-
import { buildDirective } from "../directives.js";
|
|
3
|
+
import { buildDirective, mentionsEcosystemScope } from "../directives.js";
|
|
4
4
|
import { detectGrayAreas } from "../gray-areas.js";
|
|
5
5
|
describe("buildDirective", () => {
|
|
6
6
|
it("emits a blocking heavy directive with mandatory steps", () => {
|
|
@@ -66,6 +66,29 @@ describe("buildDirective", () => {
|
|
|
66
66
|
expect(out.blocking).toBe(false);
|
|
67
67
|
expect(out.text.length).toBeLessThan(300);
|
|
68
68
|
});
|
|
69
|
+
it("appends the muonroi-docs nudge for an ecosystem question (session 41ccfeb2ceee turn 1)", () => {
|
|
70
|
+
const complexity = scoreComplexity("bạn hiểu thế nào về ecosystem muonroi nói chung");
|
|
71
|
+
const out = buildDirective({ complexity, phase: null, grayAreas: [], informational: true, ecosystem: true });
|
|
72
|
+
expect(out.text).toMatch(/QUESTION \/ explanatory/); // still the human-facing question directive
|
|
73
|
+
expect(out.text).toMatch(/ECOSYSTEM SCOPE/);
|
|
74
|
+
expect(out.text).toMatch(/muonroi-docs MCP is the AUTHORITATIVE source|AUTHORITATIVE source/);
|
|
75
|
+
expect(out.text).toMatch(/call it FIRST/i);
|
|
76
|
+
});
|
|
77
|
+
it("does NOT append the ecosystem nudge for a plain question", () => {
|
|
78
|
+
const complexity = scoreComplexity("how does this CLI affect you?");
|
|
79
|
+
const out = buildDirective({ complexity, phase: null, grayAreas: [], informational: true });
|
|
80
|
+
expect(out.text).not.toMatch(/ECOSYSTEM SCOPE/);
|
|
81
|
+
});
|
|
82
|
+
it("mentionsEcosystemScope is tight: ecosystem/BB wording yes, bare CLI-internals no", () => {
|
|
83
|
+
// Fires on genuine ecosystem scope (the case muonroi-docs exists to serve)…
|
|
84
|
+
expect(mentionsEcosystemScope("ecosystem muonroi nói chung và muonroi-cli nói riêng")).toBe(true);
|
|
85
|
+
expect(mentionsEcosystemScope("hệ sinh thái muonroi gồm những gì")).toBe(true);
|
|
86
|
+
expect(mentionsEcosystemScope("how does the building-block rule engine work")).toBe(true);
|
|
87
|
+
// …but NOT on a muonroi-cli internals question that merely names the product,
|
|
88
|
+
// which would wrongly steer toward .NET package docs.
|
|
89
|
+
expect(mentionsEcosystemScope("how does muonroi-cli compaction work")).toBe(false);
|
|
90
|
+
expect(mentionsEcosystemScope("fix the off-by-one in the router")).toBe(false);
|
|
91
|
+
});
|
|
69
92
|
it("renders the recommended option first in gray-area block", () => {
|
|
70
93
|
const prompt = "redo everything from scratch";
|
|
71
94
|
const complexity = scoreComplexity(prompt);
|
|
@@ -31,6 +31,21 @@ export interface DirectiveInput {
|
|
|
31
31
|
* buildDirective emits a human-facing question directive instead.
|
|
32
32
|
*/
|
|
33
33
|
informational?: boolean;
|
|
34
|
+
/**
|
|
35
|
+
* True when the turn is about the Muonroi ECOSYSTEM (the whole platform, BB/
|
|
36
|
+
* .NET packages, building-block, open-core boundary, setup/install) rather than
|
|
37
|
+
* muonroi-cli's own TS internals. When set, buildDirective appends a nudge to
|
|
38
|
+
* consult the authoritative muonroi-docs MCP first. Computed by the caller via
|
|
39
|
+
* mentionsEcosystemScope so a CLI-internals question (which merely contains the
|
|
40
|
+
* word "muonroi") does NOT misfire toward .NET docs.
|
|
41
|
+
*
|
|
42
|
+
* Live miss (session 41ccfeb2ceee turn 1): "bạn hiểu thế nào về ecosystem
|
|
43
|
+
* muonroi…" — muonroi-docs WAS in the toolset (smart-filter kept it) but the
|
|
44
|
+
* question directive steered the agent to read/grep local files, so it answered
|
|
45
|
+
* "no comprehensive ecosystem description in the files read" instead of querying
|
|
46
|
+
* the shipped authoritative source.
|
|
47
|
+
*/
|
|
48
|
+
ecosystem?: boolean;
|
|
34
49
|
}
|
|
35
50
|
export interface DirectiveOutput {
|
|
36
51
|
text: string;
|
|
@@ -38,4 +53,11 @@ export interface DirectiveOutput {
|
|
|
38
53
|
/** True when the directive forbids the agent from acting before clarifying. */
|
|
39
54
|
blocking: boolean;
|
|
40
55
|
}
|
|
56
|
+
export declare function mentionsEcosystemScope(message: string): boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Appended to any directive when the turn is ecosystem-scoped. Phrased
|
|
59
|
+
* conditionally ("if … available") so it is harmless when muonroi-docs is not
|
|
60
|
+
* configured — the model simply finds no such tool and falls back to local files.
|
|
61
|
+
*/
|
|
62
|
+
export declare const ECOSYSTEM_DOCS_NUDGE: string;
|
|
41
63
|
export declare function buildDirective(input: DirectiveInput): DirectiveOutput;
|