muonroi-cli 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/src/cli/cost-forensics.d.ts +3 -0
  2. package/dist/src/cli/cost-forensics.js +11 -0
  3. package/dist/src/cli/cost-forensics.test.js +1 -0
  4. package/dist/src/cli/experience-report.d.ts +20 -0
  5. package/dist/src/cli/experience-report.js +76 -0
  6. package/dist/src/cli/experience-report.test.d.ts +5 -0
  7. package/dist/src/cli/experience-report.test.js +63 -0
  8. package/dist/src/ee/artifact-cache.d.ts +56 -0
  9. package/dist/src/ee/artifact-cache.js +155 -0
  10. package/dist/src/ee/artifact-cache.test.d.ts +1 -0
  11. package/dist/src/ee/artifact-cache.test.js +69 -0
  12. package/dist/src/ee/search.js +7 -5
  13. package/dist/src/ee/search.test.d.ts +1 -0
  14. package/dist/src/ee/search.test.js +23 -0
  15. package/dist/src/generated/version.d.ts +1 -1
  16. package/dist/src/generated/version.js +1 -1
  17. package/dist/src/gsd/__tests__/directives.test.js +24 -1
  18. package/dist/src/gsd/directives.d.ts +22 -0
  19. package/dist/src/gsd/directives.js +34 -10
  20. package/dist/src/index.js +9 -0
  21. package/dist/src/mcp/__tests__/client-pool.spec.js +54 -4
  22. package/dist/src/mcp/__tests__/forensics-tools.test.js +1 -0
  23. package/dist/src/mcp/client-pool.d.ts +9 -2
  24. package/dist/src/mcp/client-pool.js +60 -21
  25. package/dist/src/orchestrator/compaction.d.ts +2 -0
  26. package/dist/src/orchestrator/compaction.js +14 -1
  27. package/dist/src/orchestrator/compaction.test.js +25 -1
  28. package/dist/src/orchestrator/message-processor.js +49 -7
  29. package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
  30. package/dist/src/orchestrator/scope-reminder.js +16 -0
  31. package/dist/src/orchestrator/scope-reminder.test.js +22 -1
  32. package/dist/src/orchestrator/session-experience.d.ts +89 -0
  33. package/dist/src/orchestrator/session-experience.js +169 -0
  34. package/dist/src/orchestrator/session-experience.test.d.ts +6 -0
  35. package/dist/src/orchestrator/session-experience.test.js +72 -0
  36. package/dist/src/orchestrator/stream-runner.js +7 -0
  37. package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
  38. package/dist/src/orchestrator/subagent-compactor.js +30 -8
  39. package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
  40. package/dist/src/pil/__tests__/layer3-ee-injection.test.js +5 -3
  41. package/dist/src/pil/__tests__/layer3-injected-chunk.test.js +31 -0
  42. package/dist/src/pil/__tests__/layer6-output.test.js +21 -0
  43. package/dist/src/pil/__tests__/pipeline.test.js +17 -0
  44. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
  45. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
  46. package/dist/src/pil/layer3-ee-injection.d.ts +28 -0
  47. package/dist/src/pil/layer3-ee-injection.js +125 -4
  48. package/dist/src/pil/layer4-gsd.js +3 -2
  49. package/dist/src/pil/layer6-output.js +18 -7
  50. package/dist/src/pil/pipeline.js +26 -9
  51. package/dist/src/pil/session-experience-injection.d.ts +34 -0
  52. package/dist/src/pil/session-experience-injection.js +54 -0
  53. package/dist/src/pil/session-experience-injection.test.d.ts +6 -0
  54. package/dist/src/pil/session-experience-injection.test.js +79 -0
  55. package/dist/src/storage/interaction-log.d.ts +1 -1
  56. package/dist/src/storage/interaction-log.js +17 -4
  57. package/dist/src/storage/session-experience-store.d.ts +63 -0
  58. package/dist/src/storage/session-experience-store.js +164 -0
  59. package/dist/src/storage/session-experience-store.test.d.ts +5 -0
  60. package/dist/src/storage/session-experience-store.test.js +86 -0
  61. package/dist/src/storage/ui-interaction-log.js +4 -2
  62. package/dist/src/tools/registry-ee-query.test.js +24 -1
  63. package/dist/src/tools/registry.js +20 -2
  64. package/dist/src/types/index.d.ts +6 -0
  65. package/dist/src/ui/app.js +0 -0
  66. package/package.json +1 -1
@@ -0,0 +1,89 @@
1
+ /**
2
+ * src/orchestrator/session-experience.ts
3
+ *
4
+ * In-process record of what actually happened to the agent in THIS CLI session:
5
+ * how often compaction fired, which tool outputs were elided, how many of those
6
+ * the agent rehydrated via ee_query (and from where), and whether the Experience
7
+ * Engine misbehaved. It is the single source of truth for the agent's *lived*
8
+ * session experience.
9
+ *
10
+ * Why this exists: when a user asks "cảm nhận trong CLI" / "do you feel blind in
11
+ * this session", the agent used to answer by READING the anti-mù source code and
12
+ * theorizing about mechanisms (session ce816796a57d) — friction it never actually
13
+ * observed. That is backwards. With this tracker the agent answers from data —
14
+ * "compaction fired 3x, I rehydrated 2 artifacts, never lost context" — instead of
15
+ * inferring from code. The same counters double as the "measure before you
16
+ * re-architect" instrumentation: how often a real session actually elides a stub
17
+ * the agent then needs.
18
+ *
19
+ * Process-scoped singleton == session-scoped: one CLI invocation is one session.
20
+ * Pure module, no I/O, fully unit-testable; reset hook for tests.
21
+ */
22
+ export type RehydrateSource = "cache" | "disk" | "ee" | "unavailable";
23
+ export interface ElisionRecord {
24
+ toolCallId: string;
25
+ toolName: string;
26
+ /** Full length of the elided output, in chars. */
27
+ chars: number;
28
+ /** prepareStep step number at which it was elided. */
29
+ step: number;
30
+ }
31
+ export interface SessionExperience {
32
+ compactions: number;
33
+ lastCompactionStep: number | null;
34
+ elisions: ReadonlyArray<ElisionRecord>;
35
+ totalElidedChars: number;
36
+ rehydrations: Readonly<Record<RehydrateSource, number>>;
37
+ eeTimeouts: number;
38
+ eeErrors: number;
39
+ }
40
+ /** Record that B3/B4 compaction actually elided something at `step`. */
41
+ export declare function recordCompaction(step: number): void;
42
+ /** Record a single tool output the compactor rewrote into a stub. */
43
+ export declare function recordElision(toolCallId: string, toolName: string, chars: number, step: number): void;
44
+ /**
45
+ * Record an ee_query rehydrate of an elided artifact, tagged by where it came
46
+ * from. `unavailable` means the agent asked for an artifact that was neither in
47
+ * the local cache nor recoverable from EE — the "needed-but-couldn't-get" signal.
48
+ */
49
+ export declare function recordRehydration(source: RehydrateSource): void;
50
+ /** Record an Experience Engine timeout or non-timeout error felt this session. */
51
+ export declare function recordEeEvent(kind: "timeout" | "error"): void;
52
+ /**
53
+ * Flat scalar counts — the shape persisted per session and aggregated
54
+ * cross-session by `usage experience` to decide whether compaction friction is
55
+ * real at a painful rate (no nested arrays, JSON-stable).
56
+ */
57
+ export interface SessionExperienceCounts {
58
+ compactions: number;
59
+ elided: number;
60
+ totalElidedChars: number;
61
+ rehydratedCache: number;
62
+ rehydratedDisk: number;
63
+ rehydratedEe: number;
64
+ unavailable: number;
65
+ eeTimeouts: number;
66
+ eeErrors: number;
67
+ }
68
+ /** Scalar counts for persistence/aggregation (drops the per-elision array). */
69
+ export declare function getSessionExperienceCounts(): SessionExperienceCounts;
70
+ /** Immutable snapshot of the session so far. */
71
+ export declare function getSessionExperience(): SessionExperience;
72
+ /** Most-recent elisions, newest first — feeds the checkpoint manifest. */
73
+ export declare function recentElisions(n?: number): ElisionRecord[];
74
+ /** True when literally nothing notable has happened yet (context intact). */
75
+ export declare function isSessionExperienceEmpty(): boolean;
76
+ /**
77
+ * A compact manifest of the most-recently elided tool outputs, for the
78
+ * post-compaction checkpoint note: turns the generic "high-value elided? use
79
+ * ee_query" prose into a concrete, actionable list so the agent's rehydrate
80
+ * round-trip is informed rather than blind.
81
+ */
82
+ export declare function formatElisionManifest(n?: number): string;
83
+ /**
84
+ * The agent-facing felt summary. Injected when the user asks how the agent is
85
+ * doing IN this session, so the answer is grounded in what actually happened —
86
+ * not in a fresh reading of the compaction/PIL source.
87
+ */
88
+ export declare function formatSessionExperience(): string;
89
+ export declare function __resetSessionExperienceForTests(): void;
@@ -0,0 +1,169 @@
1
+ /**
2
+ * src/orchestrator/session-experience.ts
3
+ *
4
+ * In-process record of what actually happened to the agent in THIS CLI session:
5
+ * how often compaction fired, which tool outputs were elided, how many of those
6
+ * the agent rehydrated via ee_query (and from where), and whether the Experience
7
+ * Engine misbehaved. It is the single source of truth for the agent's *lived*
8
+ * session experience.
9
+ *
10
+ * Why this exists: when a user asks "cảm nhận trong CLI" / "do you feel blind in
11
+ * this session", the agent used to answer by READING the anti-mù source code and
12
+ * theorizing about mechanisms (session ce816796a57d) — friction it never actually
13
+ * observed. That is backwards. With this tracker the agent answers from data —
14
+ * "compaction fired 3x, I rehydrated 2 artifacts, never lost context" — instead of
15
+ * inferring from code. The same counters double as the "measure before you
16
+ * re-architect" instrumentation: how often a real session actually elides a stub
17
+ * the agent then needs.
18
+ *
19
+ * Process-scoped singleton == session-scoped: one CLI invocation is one session.
20
+ * Pure module, no I/O, fully unit-testable; reset hook for tests.
21
+ */
22
+ /** Bound the elision log so a pathological session can't grow it unbounded. */
23
+ const MAX_ELISIONS = 200;
24
+ function freshState() {
25
+ return {
26
+ compactions: 0,
27
+ lastCompactionStep: null,
28
+ elisions: [],
29
+ rehydrations: { cache: 0, disk: 0, ee: 0, unavailable: 0 },
30
+ eeTimeouts: 0,
31
+ eeErrors: 0,
32
+ };
33
+ }
34
+ let state = freshState();
35
+ /** Record that B3/B4 compaction actually elided something at `step`. */
36
+ export function recordCompaction(step) {
37
+ state.compactions += 1;
38
+ state.lastCompactionStep = Number.isFinite(step) ? step : state.lastCompactionStep;
39
+ }
40
+ /** Record a single tool output the compactor rewrote into a stub. */
41
+ export function recordElision(toolCallId, toolName, chars, step) {
42
+ if (!toolCallId)
43
+ return;
44
+ state.elisions.push({
45
+ toolCallId,
46
+ toolName: toolName || "",
47
+ chars: Number.isFinite(chars) && chars > 0 ? Math.floor(chars) : 0,
48
+ step: Number.isFinite(step) ? step : 0,
49
+ });
50
+ // FIFO trim — keep the most recent MAX_ELISIONS.
51
+ if (state.elisions.length > MAX_ELISIONS) {
52
+ state.elisions.splice(0, state.elisions.length - MAX_ELISIONS);
53
+ }
54
+ }
55
+ /**
56
+ * Record an ee_query rehydrate of an elided artifact, tagged by where it came
57
+ * from. `unavailable` means the agent asked for an artifact that was neither in
58
+ * the local cache nor recoverable from EE — the "needed-but-couldn't-get" signal.
59
+ */
60
+ export function recordRehydration(source) {
61
+ if (source in state.rehydrations)
62
+ state.rehydrations[source] += 1;
63
+ }
64
+ /** Record an Experience Engine timeout or non-timeout error felt this session. */
65
+ export function recordEeEvent(kind) {
66
+ if (kind === "timeout")
67
+ state.eeTimeouts += 1;
68
+ else
69
+ state.eeErrors += 1;
70
+ }
71
+ /** Scalar counts for persistence/aggregation (drops the per-elision array). */
72
+ export function getSessionExperienceCounts() {
73
+ const s = getSessionExperience();
74
+ return {
75
+ compactions: s.compactions,
76
+ elided: s.elisions.length,
77
+ totalElidedChars: s.totalElidedChars,
78
+ rehydratedCache: s.rehydrations.cache,
79
+ rehydratedDisk: s.rehydrations.disk,
80
+ rehydratedEe: s.rehydrations.ee,
81
+ unavailable: s.rehydrations.unavailable,
82
+ eeTimeouts: s.eeTimeouts,
83
+ eeErrors: s.eeErrors,
84
+ };
85
+ }
86
+ /** Immutable snapshot of the session so far. */
87
+ export function getSessionExperience() {
88
+ return {
89
+ compactions: state.compactions,
90
+ lastCompactionStep: state.lastCompactionStep,
91
+ elisions: state.elisions.slice(),
92
+ totalElidedChars: state.elisions.reduce((sum, e) => sum + e.chars, 0),
93
+ rehydrations: { ...state.rehydrations },
94
+ eeTimeouts: state.eeTimeouts,
95
+ eeErrors: state.eeErrors,
96
+ };
97
+ }
98
+ /** Most-recent elisions, newest first — feeds the checkpoint manifest. */
99
+ export function recentElisions(n = 5) {
100
+ const take = Math.max(0, Math.floor(n));
101
+ return state.elisions.slice(-take).reverse();
102
+ }
103
+ /** True when literally nothing notable has happened yet (context intact). */
104
+ export function isSessionExperienceEmpty() {
105
+ return (state.compactions === 0 &&
106
+ state.elisions.length === 0 &&
107
+ state.eeTimeouts === 0 &&
108
+ state.eeErrors === 0 &&
109
+ state.rehydrations.cache === 0 &&
110
+ state.rehydrations.disk === 0 &&
111
+ state.rehydrations.ee === 0 &&
112
+ state.rehydrations.unavailable === 0);
113
+ }
114
+ function shortId(id) {
115
+ return id.length > 12 ? id.slice(0, 12) : id;
116
+ }
117
+ /**
118
+ * A compact manifest of the most-recently elided tool outputs, for the
119
+ * post-compaction checkpoint note: turns the generic "high-value elided? use
120
+ * ee_query" prose into a concrete, actionable list so the agent's rehydrate
121
+ * round-trip is informed rather than blind.
122
+ */
123
+ export function formatElisionManifest(n = 5) {
124
+ const recent = recentElisions(n);
125
+ if (recent.length === 0)
126
+ return "";
127
+ const items = recent.map((e) => `id=${shortId(e.toolCallId)} ${e.toolName || "tool"} (${e.chars}c)`).join(" · ");
128
+ return `Elided this turn: ${items}. ee_query "tool-artifact id=XXX" to rehydrate the one you need.`;
129
+ }
130
+ /**
131
+ * The agent-facing felt summary. Injected when the user asks how the agent is
132
+ * doing IN this session, so the answer is grounded in what actually happened —
133
+ * not in a fresh reading of the compaction/PIL source.
134
+ */
135
+ export function formatSessionExperience() {
136
+ const s = getSessionExperience();
137
+ const lines = [];
138
+ lines.push("[session experience — what ACTUALLY happened to you in THIS CLI session so far]");
139
+ if (isSessionExperienceEmpty()) {
140
+ lines.push("- Nothing notable: no compaction, no elision, no EE failures. Your context is intact this session.");
141
+ }
142
+ else {
143
+ lines.push(s.compactions === 0
144
+ ? "- Compaction: not fired yet — full context retained."
145
+ : `- Compaction: fired ${s.compactions}x${s.lastCompactionStep !== null ? ` (last at step ${s.lastCompactionStep})` : ""}.`);
146
+ if (s.elisions.length === 0) {
147
+ lines.push("- Tool outputs elided: none — nothing was rewritten to a stub.");
148
+ }
149
+ else {
150
+ const tools = [...new Set(s.elisions.map((e) => e.toolName || "tool"))].join(", ");
151
+ lines.push(`- Tool outputs elided: ${s.elisions.length} (${s.totalElidedChars} chars; via ${tools}).`);
152
+ }
153
+ const r = s.rehydrations;
154
+ const rehydratedTotal = r.cache + r.disk + r.ee;
155
+ lines.push(rehydratedTotal === 0 && r.unavailable === 0
156
+ ? "- Rehydrated via ee_query: none requested."
157
+ : `- Rehydrated via ee_query: cache=${r.cache} disk=${r.disk} ee=${r.ee}; needed-but-unavailable=${r.unavailable}.`);
158
+ if (s.eeTimeouts > 0 || s.eeErrors > 0) {
159
+ lines.push(`- Experience Engine: timeouts=${s.eeTimeouts} errors=${s.eeErrors}.`);
160
+ }
161
+ }
162
+ lines.push("Answer the user's how-does-it-feel / are-you-blind question FROM THIS lived data — not by reading the CLI source. If everything is zero, say so plainly: nothing degraded your context this session.");
163
+ return lines.join("\n");
164
+ }
165
+ // ─── Test hook ─────────────────────────────────────────────────────────────
166
+ export function __resetSessionExperienceForTests() {
167
+ state = freshState();
168
+ }
169
+ //# sourceMappingURL=session-experience.js.map
@@ -0,0 +1,6 @@
1
+ /**
2
+ * session-experience — in-process record of the agent's lived session, so a
3
+ * "cảm nhận trong CLI" / "are you blind?" question is answered from data, not by
4
+ * re-reading source. Also the "measure before re-architecting" instrumentation.
5
+ */
6
+ export {};
@@ -0,0 +1,72 @@
1
+ /**
2
+ * session-experience — in-process record of the agent's lived session, so a
3
+ * "cảm nhận trong CLI" / "are you blind?" question is answered from data, not by
4
+ * re-reading source. Also the "measure before re-architecting" instrumentation.
5
+ */
6
+ import { afterEach, describe, expect, it } from "vitest";
7
+ import { __resetSessionExperienceForTests, formatElisionManifest, formatSessionExperience, getSessionExperience, isSessionExperienceEmpty, recentElisions, recordCompaction, recordEeEvent, recordElision, recordRehydration, } from "./session-experience.js";
8
+ describe("session-experience tracker", () => {
9
+ afterEach(() => __resetSessionExperienceForTests());
10
+ it("starts empty and reports an intact-context felt summary", () => {
11
+ expect(isSessionExperienceEmpty()).toBe(true);
12
+ const text = formatSessionExperience();
13
+ expect(text).toContain("Nothing notable");
14
+ expect(text).toContain("context is intact this session");
15
+ // Steering line must always tell the agent to use lived data, not source.
16
+ expect(text).toMatch(/not by reading the CLI source/i);
17
+ });
18
+ it("accumulates compaction, elision, rehydration and EE counters", () => {
19
+ recordCompaction(4);
20
+ recordCompaction(9);
21
+ recordElision("call_a", "read_file", 4100, 4);
22
+ recordElision("call_b", "grep", 2300, 9);
23
+ recordRehydration("cache");
24
+ recordRehydration("unavailable");
25
+ recordEeEvent("timeout");
26
+ const s = getSessionExperience();
27
+ expect(s.compactions).toBe(2);
28
+ expect(s.lastCompactionStep).toBe(9);
29
+ expect(s.elisions).toHaveLength(2);
30
+ expect(s.totalElidedChars).toBe(6400);
31
+ expect(s.rehydrations.cache).toBe(1);
32
+ expect(s.rehydrations.unavailable).toBe(1);
33
+ expect(s.eeTimeouts).toBe(1);
34
+ expect(isSessionExperienceEmpty()).toBe(false);
35
+ });
36
+ it("felt summary reflects real counters when non-empty", () => {
37
+ recordCompaction(3);
38
+ recordElision("call_x", "read_file", 5000, 3);
39
+ recordRehydration("ee");
40
+ const text = formatSessionExperience();
41
+ expect(text).toContain("fired 1x");
42
+ expect(text).toContain("last at step 3");
43
+ expect(text).toContain("Tool outputs elided: 1");
44
+ expect(text).toContain("ee=1");
45
+ expect(text).not.toContain("Nothing notable");
46
+ });
47
+ it("recentElisions returns newest first and respects the cap arg", () => {
48
+ for (let i = 0; i < 6; i++)
49
+ recordElision(`call_${i}`, "read_file", 1000 + i, i);
50
+ const recent = recentElisions(3);
51
+ expect(recent.map((e) => e.toolCallId)).toEqual(["call_5", "call_4", "call_3"]);
52
+ });
53
+ it("formatElisionManifest is empty with no elisions, actionable otherwise", () => {
54
+ expect(formatElisionManifest()).toBe("");
55
+ recordElision("0123456789abcdefXYZ", "read_file", 4096, 7);
56
+ const m = formatElisionManifest();
57
+ // id is shortened, tool + char count present, and points at ee_query.
58
+ expect(m).toContain("id=0123456789ab");
59
+ expect(m).toContain("read_file (4096c)");
60
+ expect(m).toMatch(/ee_query "tool-artifact id=XXX"/);
61
+ });
62
+ it("caps the elision log at 200 (FIFO) without unbounded growth", () => {
63
+ for (let i = 0; i < 250; i++)
64
+ recordElision(`c_${i}`, "bash", 500, i);
65
+ const s = getSessionExperience();
66
+ expect(s.elisions).toHaveLength(200);
67
+ // Oldest 50 dropped; newest retained.
68
+ expect(s.elisions[0].toolCallId).toBe("c_50");
69
+ expect(s.elisions.at(-1).toolCallId).toBe("c_249");
70
+ });
71
+ });
72
+ //# sourceMappingURL=session-experience.test.js.map
@@ -27,6 +27,7 @@
27
27
  // - F1 (sub-agent cumulative cap) — wrapToolSetWithCap
28
28
  // - siliconflow reasoning-strip — taskCaps.sanitizeHistory
29
29
  import { stepCountIs, streamText } from "ai";
30
+ import { recordArtifact } from "../ee/artifact-cache.js";
30
31
  import { getDefaultEEClient } from "../ee/intercept.js";
31
32
  import { acquireMcpTools } from "../mcp/client-pool.js";
32
33
  import { normalizeModelId } from "../models/registry.js";
@@ -52,6 +53,7 @@ import { repairToolCallHook } from "./repair-tool-call.js";
52
53
  import { classifyStreamError } from "./retry-classifier.js";
53
54
  import { incSessionStep, resolveCeiling } from "./scope-ceiling.js";
54
55
  import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
56
+ import { recordCompaction, recordElision } from "./session-experience.js";
55
57
  import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
56
58
  import { wrapToolSetWithCap } from "./sub-agent-cap.js";
57
59
  import { compactSubAgentMessages } from "./subagent-compactor.js";
@@ -412,6 +414,9 @@ export class StreamRunner {
412
414
  }
413
415
  // Idea 4 persist for sub-agent elisions (best-effort; may lack full session but EE can still index the artifact content).
414
416
  const persistSubArtifact = (toolCallId, toolName, fullContent, reason) => {
417
+ // Local-first durable cache so ee_query rehydrates even when EE is down.
418
+ recordArtifact(toolCallId, toolName, fullContent);
419
+ recordElision(toolCallId, toolName, fullContent.length, stepNumber);
415
420
  try {
416
421
  getDefaultEEClient()
417
422
  .extract({
@@ -432,6 +437,8 @@ export class StreamRunner {
432
437
  keepToolIds: subKeepToolIds.length ? subKeepToolIds : undefined,
433
438
  persistArtifact: persistSubArtifact,
434
439
  });
440
+ if (compacted !== stripped)
441
+ recordCompaction(stepNumber);
435
442
  // Phase 4A — scope reminder injection for the sub-agent loop.
436
443
  // Mirror of the top-level wiring in message-processor.ts:
437
444
  // K = cadenceForSize(size) where size defaults to "medium" because
@@ -106,8 +106,11 @@ export interface SubAgentCompactorOptions {
106
106
  export declare const CHARS_PER_TOKEN = 4;
107
107
  export declare const SUBAGENT_COMPACT_DEFAULT_THRESHOLD = 80000;
108
108
  export declare const SUBAGENT_COMPACT_DEFAULT_KEEP_LAST = 3;
109
- /** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast. */
110
- export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp", "bash"];
109
+ /** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast.
110
+ * Extended for meta self-eval: ee_query / usage_forensics / selfverify_* are the exact artifacts
111
+ * the native contract + native-capabilities tell the agent to rely on for "task finished?" and
112
+ * rehydrate during long meta conversations about CLI/PIL/compaction/EE. */
113
+ export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp", "bash", "ee_query", "usage_forensics", "selfverify_start", "selfverify_result", "selfverify_status"];
111
114
  /**
112
115
  * Heuristic: keep full (no stub) for high-signal tool results.
113
116
  * Signals: allowlist tool + (error/todo/plan/keyfile/large output or explicit keep list).
@@ -116,8 +119,14 @@ export declare const IMPORTANT_TOOL_NAMES: readonly ["read_file", "grep", "lsp",
116
119
  export declare function isHighValueToolResult(toolName: string, preview: string, explicitKeepIds?: Set<string>, toolCallId?: string): boolean;
117
120
  export declare function cumulativeMessageChars(messages: ReadonlyArray<ModelMessage>): number;
118
121
  /**
119
- * Compact a sub-agent message array in place-like fashion. Returns a NEW
120
- * array; the input is not mutated. Below the threshold the original array
121
- * reference is returned for cheap identity comparison in tests.
122
+ * Compact a sub-agent message array in place-like fashion. The input is never
123
+ * mutated. When compaction actually elides something a NEW array is returned.
124
+ * On a no-op (below threshold, or too few tool turns to skip) the ORIGINAL input
125
+ * array is returned BY REFERENCE so callers can detect "did not compact this
126
+ * step" via identity (`compacted === input`). The B4 wiring in
127
+ * message-processor.ts (pre-compaction warning + compaction note gating) and the
128
+ * sub-agent wiring in stream-runner.ts both rely on this contract — returning a
129
+ * fresh slice on a no-op silently made the warning dead and the note fire every
130
+ * step.
122
131
  */
123
132
  export declare function compactSubAgentMessages(messages: ReadonlyArray<ModelMessage>, opts?: SubAgentCompactorOptions): ModelMessage[];
@@ -58,8 +58,21 @@ export const SUBAGENT_COMPACT_DEFAULT_THRESHOLD = 80_000;
58
58
  export const SUBAGENT_COMPACT_DEFAULT_KEEP_LAST = 3;
59
59
  const DEFAULT_OUTPUT_PREVIEW_CHARS = 200;
60
60
  const DEFAULT_LABEL = "sub-agent";
61
- /** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast. */
62
- export const IMPORTANT_TOOL_NAMES = ["read_file", "grep", "lsp", "bash"];
61
+ /** Tools whose full outputs are high-value for anti-mù (idea 1). Keep verbatim even if older than keepLast.
62
+ * Extended for meta self-eval: ee_query / usage_forensics / selfverify_* are the exact artifacts
63
+ * the native contract + native-capabilities tell the agent to rely on for "task finished?" and
64
+ * rehydrate during long meta conversations about CLI/PIL/compaction/EE. */
65
+ export const IMPORTANT_TOOL_NAMES = [
66
+ "read_file",
67
+ "grep",
68
+ "lsp",
69
+ "bash",
70
+ "ee_query",
71
+ "usage_forensics",
72
+ "selfverify_start",
73
+ "selfverify_result",
74
+ "selfverify_status",
75
+ ];
63
76
  /**
64
77
  * Heuristic: keep full (no stub) for high-signal tool results.
65
78
  * Signals: allowlist tool + (error/todo/plan/keyfile/large output or explicit keep list).
@@ -268,7 +281,9 @@ function rewriteOlderToolMessage(msg, previewChars, label, keepToolIds, persistA
268
281
  try {
269
282
  persistArtifact(toolCallId, tr.toolName, rawPreview, "elided-by-compactor");
270
283
  }
271
- catch { /* fail-open */ }
284
+ catch {
285
+ /* fail-open */
286
+ }
272
287
  }
273
288
  return {
274
289
  type: "tool-result",
@@ -282,9 +297,15 @@ function rewriteOlderToolMessage(msg, previewChars, label, keepToolIds, persistA
282
297
  return { ...msg, content: rewritten };
283
298
  }
284
299
  /**
285
- * Compact a sub-agent message array in place-like fashion. Returns a NEW
286
- * array; the input is not mutated. Below the threshold the original array
287
- * reference is returned for cheap identity comparison in tests.
300
+ * Compact a sub-agent message array in place-like fashion. The input is never
301
+ * mutated. When compaction actually elides something a NEW array is returned.
302
+ * On a no-op (below threshold, or too few tool turns to skip) the ORIGINAL input
303
+ * array is returned BY REFERENCE so callers can detect "did not compact this
304
+ * step" via identity (`compacted === input`). The B4 wiring in
305
+ * message-processor.ts (pre-compaction warning + compaction note gating) and the
306
+ * sub-agent wiring in stream-runner.ts both rely on this contract — returning a
307
+ * fresh slice on a no-op silently made the warning dead and the note fire every
308
+ * step.
288
309
  */
289
310
  export function compactSubAgentMessages(messages, opts = {}) {
290
311
  const resolved = resolveOpts(opts);
@@ -299,11 +320,12 @@ export function compactSubAgentMessages(messages, opts = {}) {
299
320
  // window utilization. Falls back to static char threshold + keepLast
300
321
  // when no contextWindowTokens supplied (preserves old behaviour).
301
322
  const { effectiveThresholdChars, effectiveKeepLastTurns } = computeDynamicParams(total, resolved);
323
+ // No-op: return the input BY REFERENCE (contract above) so `compacted === input`.
302
324
  if (total < effectiveThresholdChars)
303
- return messages.slice();
325
+ return messages;
304
326
  const keepFrom = findKeepFromIndex(messages, effectiveKeepLastTurns);
305
327
  if (keepFrom <= 0)
306
- return messages.slice();
328
+ return messages;
307
329
  // Walk older messages; rewrite fresh tool results into stubs, super-shrink
308
330
  // already-stubbed results (F1), and strip args off older assistant
309
331
  // tool-call shells (F1). The 1:1 assistant↔tool pairing required by the AI
@@ -64,6 +64,24 @@ describe("subagent-compactor: compactSubAgentMessages", () => {
64
64
  // No tool-result rewrite happened — output object identity per part preserved.
65
65
  expect(out[3]).toBe(msgs[3]);
66
66
  });
67
+ it("returns the SAME array reference on a no-op below threshold (compacted===input contract)", () => {
68
+ // Callers (message-processor B4 prepareStep:1840/1908/1914) detect "did NOT
69
+ // compact this step" via `compacted === stripped`. The docstring promises the
70
+ // original ref on a no-op; returning a fresh slice silently broke that —
71
+ // making the pre-compaction warning dead and the compaction note fire every
72
+ // step. Lock the identity contract.
73
+ const msgs = buildHistory(2, 5); // below threshold
74
+ expect(compactSubAgentMessages(msgs)).toBe(msgs);
75
+ });
76
+ it("returns a NEW array when compaction actually elides (compacted!==input)", () => {
77
+ const msgs = buildHistory(10, 10); // ~100kb > threshold
78
+ for (const m of msgs) {
79
+ if (m.role === "tool" && Array.isArray(m.content)) {
80
+ m.content[0].toolName = "other_tool"; // force low-value so it elides
81
+ }
82
+ }
83
+ expect(compactSubAgentMessages(msgs)).not.toBe(msgs);
84
+ });
67
85
  it("compacts when cumulative chars exceed threshold", () => {
68
86
  const msgs = buildHistory(10, 10); // ~100kb of tool output
69
87
  // Neutralize to test pure size-based elision (high-value keep would reduce savings).
@@ -1,5 +1,5 @@
1
1
  import { beforeEach, describe, expect, test, vi } from "vitest";
2
- import { layer3EeInjection } from "../layer3-ee-injection.js";
2
+ import { layer3EeInjection, RECALL_FEEDBACK_NUDGE } from "../layer3-ee-injection.js";
3
3
  vi.mock("../../ee/bridge.js", () => ({
4
4
  searchByText: vi.fn().mockResolvedValue([]),
5
5
  }));
@@ -93,8 +93,10 @@ describe("layer3EeInjection (bridge-based)", () => {
93
93
  const chars = parseInt(charsMatch[1], 10);
94
94
  // Two parallel collections, each at 15% of budget: 15% of 100 tokens * 4 chars/token
95
95
  // = 60 chars per block + 3 for "..." suffix, joined with newline. Allow generous
96
- // ceiling for header text + 2 blocks.
97
- expect(chars).toBeLessThanOrEqual(260);
96
+ // ceiling for header text + 2 blocks, PLUS the fixed ee_feedback nudge appended
97
+ // when rateable experience is present. The 2000-char input means a truncation
98
+ // regression would blow well past this bound regardless.
99
+ expect(chars).toBeLessThanOrEqual(260 + RECALL_FEEDBACK_NUDGE.length + 1);
98
100
  }
99
101
  }
100
102
  });
@@ -75,6 +75,37 @@ describe("layer3 experience_injected chunk emission (CQ-16b)", () => {
75
75
  expect(chunk?.experienceInjected?.scoreFloor).toBeDefined();
76
76
  expect(typeof chunk?.experienceInjected?.scoreFloor).toBe("number");
77
77
  });
78
+ it("experience_injected chunk carries per-point {id, title, tier} so the TUI can show WHAT was injected", async () => {
79
+ mockSearchByText.mockResolvedValue([
80
+ {
81
+ id: "point-1",
82
+ score: 0.9,
83
+ payload: { text: "Use dependency injection for testability" },
84
+ collection: "experience-behavioral",
85
+ },
86
+ ]);
87
+ await layer3EeInjection(BASE_CTX);
88
+ const chunk = capturedSinkCalls.find((c) => typeof c !== "string" && c.type === "experience_injected");
89
+ const points = chunk?.experienceInjected?.points;
90
+ expect(Array.isArray(points)).toBe(true);
91
+ expect(points.length).toBeGreaterThan(0);
92
+ const p = points[0];
93
+ expect(p.id).toBe("point-1");
94
+ expect(p.title).toContain("dependency injection");
95
+ expect(["principle", "behavioral", "checkpoint"]).toContain(p.tier);
96
+ });
97
+ it("appends an ee_feedback nudge to the injected text when rateable experience is present", async () => {
98
+ mockSearchByText.mockResolvedValue([
99
+ {
100
+ id: "p1",
101
+ score: 0.9,
102
+ payload: { text: "Prefer composition over inheritance" },
103
+ collection: "experience-behavioral",
104
+ },
105
+ ]);
106
+ const result = await layer3EeInjection(BASE_CTX);
107
+ expect(result.enriched).toContain("ee_feedback(id, followed|ignored|noise)");
108
+ });
78
109
  it("does NOT emit experience_injected when searchByText returns empty array", async () => {
79
110
  mockSearchByText.mockResolvedValue([]);
80
111
  await layer3EeInjection(BASE_CTX);
@@ -222,6 +222,15 @@ describe("getResponseToolSet — Phase 2b deliverableKind consume (model overrid
222
222
  // …and an explicit report request keeps it.
223
223
  expect(Object.keys(getResponseToolSet({ ...makeCtx("analyze", null), raw: "list all cost leaks" }))).toContain("respond_analyze");
224
224
  });
225
+ it("DROPS respond_* on an implement turn even when mis-classified as report (session 2b7a10219499)", () => {
226
+ // "lên plan rồi improvement … cải thiện X" is an implement turn the model
227
+ // tagged deliverable=report; the report-exception used to KEEP respond_plan,
228
+ // so the model stated a plan and ended the turn with edits done but
229
+ // uncommitted/unreported. Implementation intent must suppress the terminal
230
+ // tool BEFORE the deliverable branch is consulted.
231
+ expect(getResponseToolSet(ctxD("lên plan rồi improvement nhé, focus cải thiện Compaction", "plan", "report"))).toEqual({});
232
+ expect(getResponseToolSet(ctxD("improve the compactor and implement the fix", "plan", "report"))).toEqual({});
233
+ });
225
234
  });
226
235
  describe("applyPilSuffix — outputStyle variants", () => {
227
236
  const styles = ["concise", "detailed", "balanced"];
@@ -413,4 +422,16 @@ describe("isQuestionLike — Vietnamese yes/no question frames (regression: sess
413
422
  expect(isQuestionLike("explain the pipeline")).toBe(true);
414
423
  });
415
424
  });
425
+ describe("isImplementationIntent — improve / cải thiện (regression: session 2b7a10219499)", () => {
426
+ it("recognises improve/improvement + VI cải thiện as implement turns", () => {
427
+ expect(isImplementationIntent("improve the compactor")).toBe(true);
428
+ expect(isImplementationIntent("lên plan rồi improvement nhé")).toBe(true);
429
+ expect(isImplementationIntent("focus cải thiện Compaction")).toBe(true);
430
+ expect(isImplementationIntent("cai thien phan compaction")).toBe(true);
431
+ });
432
+ it("does not over-match analysis questions that merely describe behaviour", () => {
433
+ expect(isImplementationIntent("what does the enrichment layer do?")).toBe(false);
434
+ expect(isImplementationIntent("why does the suite fail — break it down")).toBe(false);
435
+ });
436
+ });
416
437
  //# sourceMappingURL=layer6-output.test.js.map
@@ -65,6 +65,23 @@ describe("runPipeline()", () => {
65
65
  expect(ctx.layers[i].delta).not.toBe("skipped:null-taskType");
66
66
  }
67
67
  });
68
+ it("felt-experience prompt injects the session snapshot even when taskType is null", async () => {
69
+ // Regression: the felt-experience injection was first placed INSIDE the
70
+ // `taskType !== null` branch, so a "cảm nhận trong CLI" question that
71
+ // classifies to null (not a coding task) silently skipped it. It must run
72
+ // regardless of taskType.
73
+ mockClassify.mockReturnValue({ tier: "abstain", confidence: 0.2, reason: "low-confidence" });
74
+ const ctx = await runPipeline("bạn có bị mù context không trong session này, cảm nhận thế nào");
75
+ expect(ctx.taskType).toBeNull();
76
+ expect(ctx.layers.find((l) => l.name === "session-experience")?.applied).toBe(true);
77
+ expect(ctx.enriched).toContain("[session experience —");
78
+ expect(ctx.enriched).toMatch(/not by reading the CLI source/i);
79
+ });
80
+ it("plain evaluate-the-CLI prompt does NOT inject the session snapshot", async () => {
81
+ const ctx = await runPipeline("đánh giá agent bên trong cli và đề xuất cải thiện");
82
+ expect(ctx.layers.find((l) => l.name === "session-experience")).toBeUndefined();
83
+ expect(ctx.enriched).not.toContain("[session experience —");
84
+ });
68
85
  it("metrics.totalMs is a non-negative number", async () => {
69
86
  const ctx = await runPipeline("refactor this");
70
87
  expect(ctx.metrics).not.toBeNull();