muonroi-cli 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/cli/cost-forensics.d.ts +3 -0
- package/dist/src/cli/cost-forensics.js +11 -0
- package/dist/src/cli/cost-forensics.test.js +1 -0
- package/dist/src/cli/experience-report.d.ts +20 -0
- package/dist/src/cli/experience-report.js +76 -0
- package/dist/src/cli/experience-report.test.d.ts +5 -0
- package/dist/src/cli/experience-report.test.js +63 -0
- package/dist/src/ee/artifact-cache.d.ts +56 -0
- package/dist/src/ee/artifact-cache.js +155 -0
- package/dist/src/ee/artifact-cache.test.d.ts +1 -0
- package/dist/src/ee/artifact-cache.test.js +69 -0
- package/dist/src/ee/search.js +7 -5
- package/dist/src/ee/search.test.d.ts +1 -0
- package/dist/src/ee/search.test.js +23 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/gsd/__tests__/directives.test.js +24 -1
- package/dist/src/gsd/directives.d.ts +22 -0
- package/dist/src/gsd/directives.js +34 -10
- package/dist/src/index.js +9 -0
- package/dist/src/mcp/__tests__/client-pool.spec.js +54 -4
- package/dist/src/mcp/__tests__/forensics-tools.test.js +1 -0
- package/dist/src/mcp/client-pool.d.ts +9 -2
- package/dist/src/mcp/client-pool.js +60 -21
- package/dist/src/orchestrator/compaction.d.ts +2 -0
- package/dist/src/orchestrator/compaction.js +14 -1
- package/dist/src/orchestrator/compaction.test.js +25 -1
- package/dist/src/orchestrator/message-processor.js +49 -7
- package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
- package/dist/src/orchestrator/scope-reminder.js +16 -0
- package/dist/src/orchestrator/scope-reminder.test.js +22 -1
- package/dist/src/orchestrator/session-experience.d.ts +89 -0
- package/dist/src/orchestrator/session-experience.js +169 -0
- package/dist/src/orchestrator/session-experience.test.d.ts +6 -0
- package/dist/src/orchestrator/session-experience.test.js +72 -0
- package/dist/src/orchestrator/stream-runner.js +7 -0
- package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
- package/dist/src/orchestrator/subagent-compactor.js +30 -8
- package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
- package/dist/src/pil/__tests__/layer3-ee-injection.test.js +5 -3
- package/dist/src/pil/__tests__/layer3-injected-chunk.test.js +31 -0
- package/dist/src/pil/__tests__/layer6-output.test.js +21 -0
- package/dist/src/pil/__tests__/pipeline.test.js +17 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
- package/dist/src/pil/layer3-ee-injection.d.ts +28 -0
- package/dist/src/pil/layer3-ee-injection.js +125 -4
- package/dist/src/pil/layer4-gsd.js +3 -2
- package/dist/src/pil/layer6-output.js +18 -7
- package/dist/src/pil/pipeline.js +26 -9
- package/dist/src/pil/session-experience-injection.d.ts +34 -0
- package/dist/src/pil/session-experience-injection.js +54 -0
- package/dist/src/pil/session-experience-injection.test.d.ts +6 -0
- package/dist/src/pil/session-experience-injection.test.js +79 -0
- package/dist/src/storage/interaction-log.d.ts +1 -1
- package/dist/src/storage/interaction-log.js +17 -4
- package/dist/src/storage/session-experience-store.d.ts +63 -0
- package/dist/src/storage/session-experience-store.js +164 -0
- package/dist/src/storage/session-experience-store.test.d.ts +5 -0
- package/dist/src/storage/session-experience-store.test.js +86 -0
- package/dist/src/storage/ui-interaction-log.js +4 -2
- package/dist/src/tools/registry-ee-query.test.js +24 -1
- package/dist/src/tools/registry.js +20 -2
- package/dist/src/types/index.d.ts +6 -0
- package/dist/src/ui/app.js +0 -0
- package/package.json +1 -1
|
@@ -16,6 +16,17 @@ const RETENTION_DAYS = (() => {
|
|
|
16
16
|
// enough to keep the table bounded, cheap enough not to hurt hot path.
|
|
17
17
|
const PRUNE_PROBABILITY = 1 / 200;
|
|
18
18
|
let _pruneInflight = false;
|
|
19
|
+
// These writes are fail-open (logging must never break a turn), but a swallowed
|
|
20
|
+
// error still has to be diagnosable — a broken DB was previously invisible here.
|
|
21
|
+
// Log the FIRST failure with context, then stay silent so a persistently-broken
|
|
22
|
+
// DB can't spam the hot path (logInteraction fires ~3-5x/turn).
|
|
23
|
+
let _dbFailureLogged = false;
|
|
24
|
+
function logInteractionDbFailureOnce(op, err) {
|
|
25
|
+
if (_dbFailureLogged)
|
|
26
|
+
return;
|
|
27
|
+
_dbFailureLogged = true;
|
|
28
|
+
console.error(`[interaction-log] ${op} failed — interaction logging degraded (further errors suppressed this process): ${err?.message}`);
|
|
29
|
+
}
|
|
19
30
|
function maybePruneOld() {
|
|
20
31
|
if (_pruneInflight)
|
|
21
32
|
return;
|
|
@@ -26,8 +37,9 @@ function maybePruneOld() {
|
|
|
26
37
|
const cutoff = new Date(Date.now() - RETENTION_DAYS * 86_400_000).toISOString();
|
|
27
38
|
getDatabase().prepare(`DELETE FROM interaction_logs WHERE created_at < ?`).run(cutoff);
|
|
28
39
|
}
|
|
29
|
-
catch {
|
|
30
|
-
// Fail-open
|
|
40
|
+
catch (err) {
|
|
41
|
+
// Fail-open: a prune failure must not break the write that triggered it.
|
|
42
|
+
logInteractionDbFailureOnce("prune", err);
|
|
31
43
|
}
|
|
32
44
|
finally {
|
|
33
45
|
_pruneInflight = false;
|
|
@@ -66,8 +78,9 @@ export function logInteraction(sessionId, eventType, metadata) {
|
|
|
66
78
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(sessionId, eventType, metadata?.eventSubtype ?? null, metadata?.model ?? null, metadata?.durationMs ?? null, metadata?.inputTokens ?? null, metadata?.outputTokens ?? null, metadataJson, new Date().toISOString());
|
|
67
79
|
maybePruneOld();
|
|
68
80
|
}
|
|
69
|
-
catch {
|
|
70
|
-
// Fail-open: logging must never break the main flow
|
|
81
|
+
catch (err) {
|
|
82
|
+
// Fail-open: logging must never break the main flow.
|
|
83
|
+
logInteractionDbFailureOnce("insert", err);
|
|
71
84
|
}
|
|
72
85
|
}
|
|
73
86
|
//# sourceMappingURL=interaction-log.js.map
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/storage/session-experience-store.ts
|
|
3
|
+
*
|
|
4
|
+
* Persistence + cross-session aggregation for the session-experience counters
|
|
5
|
+
* (compactions / elisions / ee_query rehydrations / needed-but-unavailable).
|
|
6
|
+
*
|
|
7
|
+
* The in-process tracker (`src/orchestrator/session-experience.ts`) answers the
|
|
8
|
+
* LIVE "cảm nhận trong CLI" question. This module persists a per-session snapshot
|
|
9
|
+
* to `interaction_logs` (event_type='session_experience') at turn end so that
|
|
10
|
+
* `usage experience` can aggregate across many real sessions and answer the
|
|
11
|
+
* measure-before-re-architecting question: how often does compaction actually
|
|
12
|
+
* elide a tool output the agent then needs, and how often can it NOT recover it.
|
|
13
|
+
*
|
|
14
|
+
* One row per turn carrying the session's CUMULATIVE counts; readers take the
|
|
15
|
+
* latest row per session (counts are monotonic, so latest == session total).
|
|
16
|
+
* Fully fail-open: a DB error never breaks the turn. Counts are passed in by the
|
|
17
|
+
* caller (no orchestrator import here — storage stays a leaf).
|
|
18
|
+
*/
|
|
19
|
+
import type { SessionExperienceCounts } from "../orchestrator/session-experience.js";
|
|
20
|
+
/**
|
|
21
|
+
* Persist the session's cumulative experience counts. No-ops on a missing
|
|
22
|
+
* sessionId or an all-zero snapshot (nothing happened → no signal to store).
|
|
23
|
+
*/
|
|
24
|
+
export declare function persistSessionExperience(sessionId: string | undefined | null, counts: SessionExperienceCounts): void;
|
|
25
|
+
/** Latest persisted counts for one session (or null if none). Fail-open. */
|
|
26
|
+
export declare function selectSessionExperience(sessionId: string): SessionExperienceCounts | null;
|
|
27
|
+
export interface ExperiencePerSession {
|
|
28
|
+
sessionId: string;
|
|
29
|
+
createdAt: string;
|
|
30
|
+
counts: SessionExperienceCounts;
|
|
31
|
+
}
|
|
32
|
+
export interface ExperienceAggregate {
|
|
33
|
+
/** Sessions that recorded any experience signal (had ≥1 non-zero snapshot). */
|
|
34
|
+
sessionCount: number;
|
|
35
|
+
/** Of those, how many actually had compaction elide a tool output. */
|
|
36
|
+
sessionsWithElision: number;
|
|
37
|
+
/** Of those, how many hit a needed-but-unavailable rehydrate (the painful case). */
|
|
38
|
+
sessionsWithUnavailable: number;
|
|
39
|
+
totals: SessionExperienceCounts;
|
|
40
|
+
/**
|
|
41
|
+
* rehydrated / (rehydrated + unavailable) — how often, when the agent went
|
|
42
|
+
* back for an elided artifact, it actually recovered it. 1 = never lost; the
|
|
43
|
+
* lower this is, the more the manual-rehydrate friction actually bites.
|
|
44
|
+
*/
|
|
45
|
+
rehydrateRecoveryRate: number;
|
|
46
|
+
perSession: ExperiencePerSession[];
|
|
47
|
+
}
|
|
48
|
+
export interface ExperienceRow {
|
|
49
|
+
session_id: string;
|
|
50
|
+
metadata_json: string | null;
|
|
51
|
+
created_at: string;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Pure aggregation: dedup to the latest row per session (rows MUST be ordered
|
|
55
|
+
* newest-first), parse, cap at `limit` sessions, sum, derive the recovery rate.
|
|
56
|
+
* Separated from the DB query so the logic is unit-testable without SQL.
|
|
57
|
+
*/
|
|
58
|
+
export declare function computeExperienceAggregate(rows: ExperienceRow[], limit?: number): ExperienceAggregate;
|
|
59
|
+
/**
|
|
60
|
+
* Aggregate the latest snapshot per session across the most-recent `limit`
|
|
61
|
+
* sessions that recorded one. Fail-open: returns an empty aggregate on DB error.
|
|
62
|
+
*/
|
|
63
|
+
export declare function aggregateSessionExperience(limit?: number): ExperienceAggregate;
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/storage/session-experience-store.ts
|
|
3
|
+
*
|
|
4
|
+
* Persistence + cross-session aggregation for the session-experience counters
|
|
5
|
+
* (compactions / elisions / ee_query rehydrations / needed-but-unavailable).
|
|
6
|
+
*
|
|
7
|
+
* The in-process tracker (`src/orchestrator/session-experience.ts`) answers the
|
|
8
|
+
* LIVE "cảm nhận trong CLI" question. This module persists a per-session snapshot
|
|
9
|
+
* to `interaction_logs` (event_type='session_experience') at turn end so that
|
|
10
|
+
* `usage experience` can aggregate across many real sessions and answer the
|
|
11
|
+
* measure-before-re-architecting question: how often does compaction actually
|
|
12
|
+
* elide a tool output the agent then needs, and how often can it NOT recover it.
|
|
13
|
+
*
|
|
14
|
+
* One row per turn carrying the session's CUMULATIVE counts; readers take the
|
|
15
|
+
* latest row per session (counts are monotonic, so latest == session total).
|
|
16
|
+
* Fully fail-open: a DB error never breaks the turn. Counts are passed in by the
|
|
17
|
+
* caller (no orchestrator import here — storage stays a leaf).
|
|
18
|
+
*/
|
|
19
|
+
import { getDatabase } from "./db.js";
|
|
20
|
+
import { logInteraction } from "./interaction-log.js";
|
|
21
|
+
const EVENT_TYPE = "session_experience";
|
|
22
|
+
function countsTotal(c) {
|
|
23
|
+
return (c.compactions +
|
|
24
|
+
c.elided +
|
|
25
|
+
c.rehydratedCache +
|
|
26
|
+
c.rehydratedDisk +
|
|
27
|
+
c.rehydratedEe +
|
|
28
|
+
c.unavailable +
|
|
29
|
+
c.eeTimeouts +
|
|
30
|
+
c.eeErrors);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Persist the session's cumulative experience counts. No-ops on a missing
|
|
34
|
+
* sessionId or an all-zero snapshot (nothing happened → no signal to store).
|
|
35
|
+
*/
|
|
36
|
+
export function persistSessionExperience(sessionId, counts) {
|
|
37
|
+
if (!sessionId)
|
|
38
|
+
return;
|
|
39
|
+
if (countsTotal(counts) === 0)
|
|
40
|
+
return;
|
|
41
|
+
// logInteraction is itself fail-open.
|
|
42
|
+
logInteraction(sessionId, EVENT_TYPE, {
|
|
43
|
+
eventSubtype: "snapshot",
|
|
44
|
+
data: counts,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
function parseCounts(json) {
|
|
48
|
+
if (!json)
|
|
49
|
+
return null;
|
|
50
|
+
try {
|
|
51
|
+
const o = JSON.parse(json);
|
|
52
|
+
return {
|
|
53
|
+
compactions: o.compactions ?? 0,
|
|
54
|
+
elided: o.elided ?? 0,
|
|
55
|
+
totalElidedChars: o.totalElidedChars ?? 0,
|
|
56
|
+
rehydratedCache: o.rehydratedCache ?? 0,
|
|
57
|
+
rehydratedDisk: o.rehydratedDisk ?? 0,
|
|
58
|
+
rehydratedEe: o.rehydratedEe ?? 0,
|
|
59
|
+
unavailable: o.unavailable ?? 0,
|
|
60
|
+
eeTimeouts: o.eeTimeouts ?? 0,
|
|
61
|
+
eeErrors: o.eeErrors ?? 0,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
catch (err) {
|
|
65
|
+
console.error(`[session-experience-store] parse failed: ${err?.message}`);
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
/** Latest persisted counts for one session (or null if none). Fail-open. */
|
|
70
|
+
export function selectSessionExperience(sessionId) {
|
|
71
|
+
try {
|
|
72
|
+
const row = getDatabase()
|
|
73
|
+
.prepare(`SELECT metadata_json FROM interaction_logs
|
|
74
|
+
WHERE session_id = ? AND event_type = ?
|
|
75
|
+
ORDER BY created_at DESC, id DESC LIMIT 1`)
|
|
76
|
+
.get(sessionId, EVENT_TYPE);
|
|
77
|
+
return parseCounts(row?.metadata_json ?? null);
|
|
78
|
+
}
|
|
79
|
+
catch (err) {
|
|
80
|
+
console.error(`[session-experience-store] select failed for ${sessionId}: ${err?.message}`);
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
function emptyCounts() {
|
|
85
|
+
return {
|
|
86
|
+
compactions: 0,
|
|
87
|
+
elided: 0,
|
|
88
|
+
totalElidedChars: 0,
|
|
89
|
+
rehydratedCache: 0,
|
|
90
|
+
rehydratedDisk: 0,
|
|
91
|
+
rehydratedEe: 0,
|
|
92
|
+
unavailable: 0,
|
|
93
|
+
eeTimeouts: 0,
|
|
94
|
+
eeErrors: 0,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Pure aggregation: dedup to the latest row per session (rows MUST be ordered
|
|
99
|
+
* newest-first), parse, cap at `limit` sessions, sum, derive the recovery rate.
|
|
100
|
+
* Separated from the DB query so the logic is unit-testable without SQL.
|
|
101
|
+
*/
|
|
102
|
+
export function computeExperienceAggregate(rows, limit = 100) {
|
|
103
|
+
const seen = new Set();
|
|
104
|
+
const perSession = [];
|
|
105
|
+
for (const r of rows) {
|
|
106
|
+
if (seen.has(r.session_id))
|
|
107
|
+
continue; // first row per session == latest
|
|
108
|
+
const counts = parseCounts(r.metadata_json);
|
|
109
|
+
if (!counts)
|
|
110
|
+
continue;
|
|
111
|
+
seen.add(r.session_id);
|
|
112
|
+
perSession.push({ sessionId: r.session_id, createdAt: r.created_at, counts });
|
|
113
|
+
if (perSession.length >= limit)
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
const totals = emptyCounts();
|
|
117
|
+
let sessionsWithElision = 0;
|
|
118
|
+
let sessionsWithUnavailable = 0;
|
|
119
|
+
for (const { counts } of perSession) {
|
|
120
|
+
totals.compactions += counts.compactions;
|
|
121
|
+
totals.elided += counts.elided;
|
|
122
|
+
totals.totalElidedChars += counts.totalElidedChars;
|
|
123
|
+
totals.rehydratedCache += counts.rehydratedCache;
|
|
124
|
+
totals.rehydratedDisk += counts.rehydratedDisk;
|
|
125
|
+
totals.rehydratedEe += counts.rehydratedEe;
|
|
126
|
+
totals.unavailable += counts.unavailable;
|
|
127
|
+
totals.eeTimeouts += counts.eeTimeouts;
|
|
128
|
+
totals.eeErrors += counts.eeErrors;
|
|
129
|
+
if (counts.elided > 0)
|
|
130
|
+
sessionsWithElision += 1;
|
|
131
|
+
if (counts.unavailable > 0)
|
|
132
|
+
sessionsWithUnavailable += 1;
|
|
133
|
+
}
|
|
134
|
+
const rehydrated = totals.rehydratedCache + totals.rehydratedDisk + totals.rehydratedEe;
|
|
135
|
+
const attempts = rehydrated + totals.unavailable;
|
|
136
|
+
const rehydrateRecoveryRate = attempts > 0 ? rehydrated / attempts : 1;
|
|
137
|
+
return {
|
|
138
|
+
sessionCount: perSession.length,
|
|
139
|
+
sessionsWithElision,
|
|
140
|
+
sessionsWithUnavailable,
|
|
141
|
+
totals,
|
|
142
|
+
rehydrateRecoveryRate,
|
|
143
|
+
perSession,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Aggregate the latest snapshot per session across the most-recent `limit`
|
|
148
|
+
* sessions that recorded one. Fail-open: returns an empty aggregate on DB error.
|
|
149
|
+
*/
|
|
150
|
+
export function aggregateSessionExperience(limit = 100) {
|
|
151
|
+
try {
|
|
152
|
+
const rows = getDatabase()
|
|
153
|
+
.prepare(`SELECT session_id, metadata_json, created_at FROM interaction_logs
|
|
154
|
+
WHERE event_type = ?
|
|
155
|
+
ORDER BY created_at DESC, id DESC`)
|
|
156
|
+
.all(EVENT_TYPE);
|
|
157
|
+
return computeExperienceAggregate(rows, limit);
|
|
158
|
+
}
|
|
159
|
+
catch (err) {
|
|
160
|
+
console.error(`[session-experience-store] aggregate failed: ${err?.message}`);
|
|
161
|
+
return computeExperienceAggregate([], limit);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=session-experience-store.js.map
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* session-experience-store — persist + cross-session aggregate of the anti-mù
|
|
3
|
+
* counters that decide whether compaction friction is real at a painful rate.
|
|
4
|
+
*/
|
|
5
|
+
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
6
|
+
vi.mock("./db.js", () => ({ getDatabase: vi.fn(() => ({ prepare: () => ({ all: () => [] }) })) }));
|
|
7
|
+
const logInteraction = vi.fn();
|
|
8
|
+
vi.mock("./interaction-log.js", () => ({ logInteraction: (...a) => logInteraction(...a) }));
|
|
9
|
+
import { computeExperienceAggregate, persistSessionExperience, } from "./session-experience-store.js";
|
|
10
|
+
function counts(p = {}) {
|
|
11
|
+
return {
|
|
12
|
+
compactions: 0,
|
|
13
|
+
elided: 0,
|
|
14
|
+
totalElidedChars: 0,
|
|
15
|
+
rehydratedCache: 0,
|
|
16
|
+
rehydratedDisk: 0,
|
|
17
|
+
rehydratedEe: 0,
|
|
18
|
+
unavailable: 0,
|
|
19
|
+
eeTimeouts: 0,
|
|
20
|
+
eeErrors: 0,
|
|
21
|
+
...p,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
function row(sessionId, createdAt, c) {
|
|
25
|
+
return { session_id: sessionId, created_at: createdAt, metadata_json: JSON.stringify(counts(c)) };
|
|
26
|
+
}
|
|
27
|
+
describe("persistSessionExperience", () => {
|
|
28
|
+
afterEach(() => logInteraction.mockClear());
|
|
29
|
+
it("no-ops on a missing sessionId", () => {
|
|
30
|
+
persistSessionExperience(undefined, counts({ elided: 3 }));
|
|
31
|
+
persistSessionExperience("", counts({ elided: 3 }));
|
|
32
|
+
expect(logInteraction).not.toHaveBeenCalled();
|
|
33
|
+
});
|
|
34
|
+
it("no-ops on an all-zero snapshot (no signal to store)", () => {
|
|
35
|
+
persistSessionExperience("sess-1", counts());
|
|
36
|
+
expect(logInteraction).not.toHaveBeenCalled();
|
|
37
|
+
});
|
|
38
|
+
it("writes a session_experience snapshot when something happened", () => {
|
|
39
|
+
persistSessionExperience("sess-1", counts({ compactions: 2, elided: 5, rehydratedCache: 1 }));
|
|
40
|
+
expect(logInteraction).toHaveBeenCalledTimes(1);
|
|
41
|
+
const [sid, type, meta] = logInteraction.mock.calls[0];
|
|
42
|
+
expect(sid).toBe("sess-1");
|
|
43
|
+
expect(type).toBe("session_experience");
|
|
44
|
+
expect(meta.data.elided).toBe(5);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
describe("computeExperienceAggregate", () => {
|
|
48
|
+
it("dedups to the latest row per session (rows newest-first) and sums totals", () => {
|
|
49
|
+
const rows = [
|
|
50
|
+
// sess-a newest first (cumulative) then an older row that must be ignored
|
|
51
|
+
row("sess-a", "2026-06-17T10:00:00Z", { compactions: 3, elided: 6, rehydratedCache: 4, unavailable: 1 }),
|
|
52
|
+
row("sess-a", "2026-06-17T09:00:00Z", { compactions: 1, elided: 2 }),
|
|
53
|
+
row("sess-b", "2026-06-17T08:00:00Z", { compactions: 1, elided: 2, rehydratedEe: 1, unavailable: 1 }),
|
|
54
|
+
];
|
|
55
|
+
const agg = computeExperienceAggregate(rows);
|
|
56
|
+
expect(agg.sessionCount).toBe(2);
|
|
57
|
+
expect(agg.totals.elided).toBe(8); // 6 (latest a) + 2 (b), NOT the stale 2
|
|
58
|
+
expect(agg.totals.compactions).toBe(4); // 3 + 1
|
|
59
|
+
expect(agg.sessionsWithElision).toBe(2);
|
|
60
|
+
expect(agg.sessionsWithUnavailable).toBe(2);
|
|
61
|
+
// recovery = rehydrated(4+0+1) / (rehydrated 5 + unavailable 2) = 5/7
|
|
62
|
+
expect(agg.rehydrateRecoveryRate).toBeCloseTo(5 / 7, 5);
|
|
63
|
+
});
|
|
64
|
+
it("recovery rate is 1 when no rehydrate was ever attempted", () => {
|
|
65
|
+
const agg = computeExperienceAggregate([row("s", "2026-06-17T10:00:00Z", { compactions: 1, elided: 2 })]);
|
|
66
|
+
expect(agg.rehydrateRecoveryRate).toBe(1);
|
|
67
|
+
expect(agg.sessionsWithUnavailable).toBe(0);
|
|
68
|
+
});
|
|
69
|
+
it("caps at `limit` sessions and skips unparseable rows", () => {
|
|
70
|
+
const rows = [
|
|
71
|
+
row("s1", "2026-06-17T10:00:03Z", { elided: 1 }),
|
|
72
|
+
{ session_id: "s2", created_at: "2026-06-17T10:00:02Z", metadata_json: "{bad json" },
|
|
73
|
+
row("s3", "2026-06-17T10:00:01Z", { elided: 1 }),
|
|
74
|
+
];
|
|
75
|
+
const agg = computeExperienceAggregate(rows, 1);
|
|
76
|
+
expect(agg.sessionCount).toBe(1);
|
|
77
|
+
expect(agg.perSession[0].sessionId).toBe("s1");
|
|
78
|
+
});
|
|
79
|
+
it("empty input yields an empty aggregate with recovery rate 1", () => {
|
|
80
|
+
const agg = computeExperienceAggregate([]);
|
|
81
|
+
expect(agg.sessionCount).toBe(0);
|
|
82
|
+
expect(agg.totals.elided).toBe(0);
|
|
83
|
+
expect(agg.rehydrateRecoveryRate).toBe(1);
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
//# sourceMappingURL=session-experience-store.test.js.map
|
|
@@ -27,8 +27,10 @@ export function logUIInteraction(sessionId, payload) {
|
|
|
27
27
|
data: payload.data,
|
|
28
28
|
});
|
|
29
29
|
}
|
|
30
|
-
catch {
|
|
31
|
-
// Fail-open
|
|
30
|
+
catch (err) {
|
|
31
|
+
// Fail-open (logInteraction is itself guarded; this is defensive). Surface
|
|
32
|
+
// the subtype so a serialization fault here is at least diagnosable.
|
|
33
|
+
console.error(`[ui-interaction-log] persist failed for subtype=${payload.subtype}: ${err?.message}`);
|
|
32
34
|
}
|
|
33
35
|
}
|
|
34
36
|
//# sourceMappingURL=ui-interaction-log.js.map
|
|
@@ -9,7 +9,9 @@
|
|
|
9
9
|
* (no network).
|
|
10
10
|
*/
|
|
11
11
|
import os from "node:os";
|
|
12
|
-
import { describe, expect, it } from "vitest";
|
|
12
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
13
|
+
import { __resetArtifactCacheForTests, recordArtifact } from "../ee/artifact-cache.js";
|
|
14
|
+
import { __resetSessionExperienceForTests, getSessionExperience } from "../orchestrator/session-experience.js";
|
|
13
15
|
import { BashTool } from "./bash.js";
|
|
14
16
|
import { createBuiltinTools, isToolArtifactQuery } from "./registry.js";
|
|
15
17
|
describe("ee_query builtin tool", () => {
|
|
@@ -45,4 +47,25 @@ describe("isToolArtifactQuery — ee_query intent routing", () => {
|
|
|
45
47
|
expect(isToolArtifactQuery("tool-artifact storage design")).toBe(false);
|
|
46
48
|
});
|
|
47
49
|
});
|
|
50
|
+
describe("ee_query — anti-mù rehydrate (local-first, durable when EE is down)", () => {
|
|
51
|
+
afterEach(() => {
|
|
52
|
+
__resetArtifactCacheForTests();
|
|
53
|
+
__resetSessionExperienceForTests();
|
|
54
|
+
});
|
|
55
|
+
it("rehydrates a tool-artifact from the in-session cache with NO EE/network call", async () => {
|
|
56
|
+
// Simulates: the compactor elided this output earlier (recordArtifact), EE is
|
|
57
|
+
// now down. The agent's ee_query("tool-artifact id=X") must still return the
|
|
58
|
+
// full content from the local cache rather than an [ee_unavailable] note.
|
|
59
|
+
recordArtifact("call_42", "read_file", "FULL ELIDED CONTENT — line A\nline B\nline C");
|
|
60
|
+
const tools = createBuiltinTools(new BashTool(os.tmpdir()), "agent");
|
|
61
|
+
const t = tools.ee_query;
|
|
62
|
+
const out = String(await t.execute?.({ query: "tool-artifact id=call_42" }));
|
|
63
|
+
expect(out).toContain("rehydrated from in-session cache");
|
|
64
|
+
expect(out).toContain("tool=read_file");
|
|
65
|
+
expect(out).toContain("FULL ELIDED CONTENT");
|
|
66
|
+
expect(out).not.toMatch(/ee_unavailable/);
|
|
67
|
+
// Lived-experience telemetry recorded the cache-sourced rehydrate.
|
|
68
|
+
expect(getSessionExperience().rehydrations.cache).toBe(1);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
48
71
|
//# sourceMappingURL=registry-ee-query.test.js.map
|
|
@@ -466,15 +466,33 @@ export function createBuiltinTools(bash, mode, opts) {
|
|
|
466
466
|
}
|
|
467
467
|
try {
|
|
468
468
|
if (isToolArtifactQuery(query)) {
|
|
469
|
-
//
|
|
469
|
+
// Local-first (anti-mù durability): the compactor records each elided
|
|
470
|
+
// output in-process by toolCallId. For an exact "tool-artifact id=X"
|
|
471
|
+
// lookup this is the authoritative full content for THIS session and
|
|
472
|
+
// works even when EE is down — the failure window long sessions hit.
|
|
473
|
+
const { findArtifactByQuery, findArtifactOnDisk } = await import("../ee/artifact-cache.js");
|
|
474
|
+
// Lived-experience telemetry: record where the rehydrate came from so
|
|
475
|
+
// a "cảm nhận trong CLI" question (and the measure-first instrumentation)
|
|
476
|
+
// sees cache vs disk vs ee vs needed-but-unavailable.
|
|
477
|
+
const { recordRehydration } = await import("../orchestrator/session-experience.js");
|
|
478
|
+
const mem = findArtifactByQuery(query);
|
|
479
|
+
const local = mem ?? (await findArtifactOnDisk(query));
|
|
480
|
+
if (local) {
|
|
481
|
+
const src = mem ? "in-session cache" : "local disk cache";
|
|
482
|
+
recordRehydration(mem ? "cache" : "disk");
|
|
483
|
+
return truncateOutput(`[tool-artifact id=${local.toolCallId} tool=${local.toolName} — rehydrated from ${src}]\n${local.content}`);
|
|
484
|
+
}
|
|
485
|
+
// EE fallback (cross-session / post-restart) → raw /api/search exact lookup.
|
|
470
486
|
const { searchEE } = await import("../ee/search.js");
|
|
471
487
|
const resp = await searchEE(query, {
|
|
472
488
|
...(Array.isArray(input?.collections) ? { collections: input.collections } : {}),
|
|
473
489
|
...(typeof input?.limit === "number" ? { limit: input.limit } : {}),
|
|
474
490
|
});
|
|
475
491
|
if (resp === null) {
|
|
476
|
-
|
|
492
|
+
recordRehydration("unavailable");
|
|
493
|
+
return "[ee_unavailable] Experience Engine returned no response (server down, timeout, circuit open, or unconfigured) and the artifact is not in this session's local cache. Proceed without EE recall — re-read the source directly if you need the elided content.";
|
|
477
494
|
}
|
|
495
|
+
recordRehydration("ee");
|
|
478
496
|
return truncateOutput(JSON.stringify(resp));
|
|
479
497
|
}
|
|
480
498
|
// General recall → /api/recall (recallMode, [id col] index + surface).
|
|
@@ -308,6 +308,12 @@ export interface ExperienceWarningData {
|
|
|
308
308
|
export interface ExperienceInjectedData {
|
|
309
309
|
pointCount: number;
|
|
310
310
|
pointIds: string[];
|
|
311
|
+
/** Per-point detail so the TUI can show WHAT was injected, not just the count. */
|
|
312
|
+
points?: Array<{
|
|
313
|
+
id: string;
|
|
314
|
+
title: string;
|
|
315
|
+
tier: "principle" | "behavioral" | "checkpoint";
|
|
316
|
+
}>;
|
|
311
317
|
scoreFloor: number;
|
|
312
318
|
taskType?: string;
|
|
313
319
|
domain?: string;
|
package/dist/src/ui/app.js
CHANGED
|
Binary file
|