muonroi-cli 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/src/cli/cost-forensics.d.ts +3 -0
  2. package/dist/src/cli/cost-forensics.js +11 -0
  3. package/dist/src/cli/cost-forensics.test.js +1 -0
  4. package/dist/src/cli/experience-report.d.ts +20 -0
  5. package/dist/src/cli/experience-report.js +76 -0
  6. package/dist/src/cli/experience-report.test.d.ts +5 -0
  7. package/dist/src/cli/experience-report.test.js +63 -0
  8. package/dist/src/ee/artifact-cache.d.ts +56 -0
  9. package/dist/src/ee/artifact-cache.js +155 -0
  10. package/dist/src/ee/artifact-cache.test.d.ts +1 -0
  11. package/dist/src/ee/artifact-cache.test.js +69 -0
  12. package/dist/src/ee/search.js +7 -5
  13. package/dist/src/ee/search.test.d.ts +1 -0
  14. package/dist/src/ee/search.test.js +23 -0
  15. package/dist/src/generated/version.d.ts +1 -1
  16. package/dist/src/generated/version.js +1 -1
  17. package/dist/src/gsd/__tests__/directives.test.js +24 -1
  18. package/dist/src/gsd/directives.d.ts +22 -0
  19. package/dist/src/gsd/directives.js +34 -10
  20. package/dist/src/index.js +9 -0
  21. package/dist/src/mcp/__tests__/client-pool.spec.js +54 -4
  22. package/dist/src/mcp/__tests__/forensics-tools.test.js +1 -0
  23. package/dist/src/mcp/client-pool.d.ts +9 -2
  24. package/dist/src/mcp/client-pool.js +60 -21
  25. package/dist/src/orchestrator/compaction.d.ts +2 -0
  26. package/dist/src/orchestrator/compaction.js +14 -1
  27. package/dist/src/orchestrator/compaction.test.js +25 -1
  28. package/dist/src/orchestrator/message-processor.js +49 -7
  29. package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
  30. package/dist/src/orchestrator/scope-reminder.js +16 -0
  31. package/dist/src/orchestrator/scope-reminder.test.js +22 -1
  32. package/dist/src/orchestrator/session-experience.d.ts +89 -0
  33. package/dist/src/orchestrator/session-experience.js +169 -0
  34. package/dist/src/orchestrator/session-experience.test.d.ts +6 -0
  35. package/dist/src/orchestrator/session-experience.test.js +72 -0
  36. package/dist/src/orchestrator/stream-runner.js +7 -0
  37. package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
  38. package/dist/src/orchestrator/subagent-compactor.js +30 -8
  39. package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
  40. package/dist/src/pil/__tests__/layer3-ee-injection.test.js +5 -3
  41. package/dist/src/pil/__tests__/layer3-injected-chunk.test.js +31 -0
  42. package/dist/src/pil/__tests__/layer6-output.test.js +21 -0
  43. package/dist/src/pil/__tests__/pipeline.test.js +17 -0
  44. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
  45. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
  46. package/dist/src/pil/layer3-ee-injection.d.ts +28 -0
  47. package/dist/src/pil/layer3-ee-injection.js +125 -4
  48. package/dist/src/pil/layer4-gsd.js +3 -2
  49. package/dist/src/pil/layer6-output.js +18 -7
  50. package/dist/src/pil/pipeline.js +26 -9
  51. package/dist/src/pil/session-experience-injection.d.ts +34 -0
  52. package/dist/src/pil/session-experience-injection.js +54 -0
  53. package/dist/src/pil/session-experience-injection.test.d.ts +6 -0
  54. package/dist/src/pil/session-experience-injection.test.js +79 -0
  55. package/dist/src/storage/interaction-log.d.ts +1 -1
  56. package/dist/src/storage/interaction-log.js +17 -4
  57. package/dist/src/storage/session-experience-store.d.ts +63 -0
  58. package/dist/src/storage/session-experience-store.js +164 -0
  59. package/dist/src/storage/session-experience-store.test.d.ts +5 -0
  60. package/dist/src/storage/session-experience-store.test.js +86 -0
  61. package/dist/src/storage/ui-interaction-log.js +4 -2
  62. package/dist/src/tools/registry-ee-query.test.js +24 -1
  63. package/dist/src/tools/registry.js +20 -2
  64. package/dist/src/types/index.d.ts +6 -0
  65. package/dist/src/ui/app.js +0 -0
  66. package/package.json +1 -1
@@ -15,6 +15,27 @@
15
15
  * user-facing prompts into the user's language at render time.
16
16
  */
17
17
  const HEADER = "[gsd-native]";
18
+ /**
19
+ * High-precision predicate: is this turn about the Muonroi ECOSYSTEM (where the
20
+ * muonroi-docs MCP is the right source), as opposed to muonroi-cli internals?
21
+ * Deliberately TIGHTER than smart-filter's hasEcosystemSignal — that one keeps
22
+ * the server (over-keeping costs only tokens), but a behavioural "call docs
23
+ * FIRST" nudge must not fire on every "muonroi" mention or it misdirects
24
+ * CLI-internals questions toward .NET package docs. EN + VI.
25
+ */
26
+ const ECOSYSTEM_SCOPE_RE = /\becosystem\b|hệ\s*sinh\s*thái|he\s*sinh\s*thai|building[-\s]?block|open[-\s]?core|rule\s*engine|decision\s*table|\bnuget\b/i;
27
+ export function mentionsEcosystemScope(message) {
28
+ return ECOSYSTEM_SCOPE_RE.test(message);
29
+ }
30
+ /**
31
+ * Appended to any directive when the turn is ecosystem-scoped. Phrased
32
+ * conditionally ("if … available") so it is harmless when muonroi-docs is not
33
+ * configured — the model simply finds no such tool and falls back to local files.
34
+ */
35
+ export const ECOSYSTEM_DOCS_NUDGE = [
36
+ `${HEADER} ECOSYSTEM SCOPE — this turn concerns the Muonroi ecosystem (platform overview, BB/.NET packages, building-block, open-core boundary, setup).`,
37
+ "If the muonroi-docs MCP is available, it is the AUTHORITATIVE source — call it FIRST (docs_search / setup_guide / bb_recipe_list / bb_package_describe), THEN ground with local files. Do NOT characterize the ecosystem from local repo files alone.",
38
+ ].join("\n");
18
39
  function renderGrayAreas(qs) {
19
40
  if (qs.length === 0)
20
41
  return " (no gray areas detected — confirm the request is fully specified before proceeding)";
@@ -94,16 +115,19 @@ function buildQuick(input) {
94
115
  export function buildDirective(input) {
95
116
  // Informational/meta prompts answer a human — never apply the
96
117
  // implement/verify scaffold (it agent-ifies the reply), regardless of tier.
97
- if (input.informational) {
98
- return { text: buildQuestion(), tier: input.complexity.tier, blocking: false };
99
- }
100
- switch (input.complexity.tier) {
101
- case "heavy":
102
- return { text: buildHeavy(input), tier: "heavy", blocking: true };
103
- case "standard":
104
- return { text: buildStandard(input), tier: "standard", blocking: false };
105
- default:
106
- return { text: buildQuick(input), tier: "quick", blocking: false };
118
+ const base = input.informational
119
+ ? { text: buildQuestion(), tier: input.complexity.tier, blocking: false }
120
+ : input.complexity.tier === "heavy"
121
+ ? { text: buildHeavy(input), tier: "heavy", blocking: true }
122
+ : input.complexity.tier === "standard"
123
+ ? { text: buildStandard(input), tier: "standard", blocking: false }
124
+ : { text: buildQuick(input), tier: "quick", blocking: false };
125
+ // Ecosystem-scoped turns get a docs-first nudge regardless of tier (question
126
+ // OR task): muonroi-docs is the authoritative source and must not be skipped
127
+ // in favour of guessing from local files (session 41ccfeb2ceee turn 1).
128
+ if (input.ecosystem) {
129
+ return { ...base, text: `${base.text}\n${ECOSYSTEM_DOCS_NUDGE}` };
107
130
  }
131
+ return base;
108
132
  }
109
133
  //# sourceMappingURL=directives.js.map
package/dist/src/index.js CHANGED
@@ -1319,6 +1319,15 @@ usage
1319
1319
  const { runCostForensics } = await import("./cli/cost-forensics.js");
1320
1320
  await runCostForensics({ prefix: sessionPrefix, json: opts.json });
1321
1321
  });
1322
+ usage
1323
+ .command("experience")
1324
+ .description("Cross-session anti-mù telemetry: how often compaction elides tool outputs and whether the agent recovers them (gates the deferred auto-protect re-architecture).")
1325
+ .option("--limit <n>", "Number of most-recent sessions to aggregate", "100")
1326
+ .option("--json", "Emit aggregate as JSON")
1327
+ .action(async (opts) => {
1328
+ const { runExperienceReport } = await import("./cli/experience-report.js");
1329
+ await runExperienceReport({ limit: parseInt(opts.limit, 10) || 100, json: opts.json });
1330
+ });
1322
1331
  usage
1323
1332
  .command("security-audit")
1324
1333
  .description("Security posture: yolo/permission overrides, high-risk cmds, shuru audits + cost (from decision-log events)")
@@ -42,7 +42,7 @@ describe("acquireMcpTools — cross-turn client pool", () => {
42
42
  expect(Object.keys(b2.tools)).toContain("mcp_fs__ping");
43
43
  expect(connectOneServer).toHaveBeenCalledTimes(2); // retried after eviction
44
44
  });
45
- it("self-heals: a tool hitting a connection error evicts the client so the next turn reconnects", async () => {
45
+ it("self-heals: a connection error reconnects ONCE in-turn; a permanently-dead server surfaces the error (no loop)", async () => {
46
46
  connectOneServer.mockImplementation(async (s) => ({
47
47
  tools: {
48
48
  [`mcp_${s.id}__boom`]: {
@@ -55,9 +55,59 @@ describe("acquireMcpTools — cross-turn client pool", () => {
55
55
  }));
56
56
  const b1 = await acquireMcpTools([srv("fs")]);
57
57
  await expect(b1.tools["mcp_fs__boom"].execute({}, {})).rejects.toThrow(/transport closed/);
58
- const b2 = await acquireMcpTools([srv("fs")]);
59
- expect(b2).toBeDefined();
60
- expect(connectOneServer).toHaveBeenCalledTimes(2); // reconnected after the connection error
58
+ // Initial connect + exactly ONE in-turn reconnect — the retry is not looped.
59
+ expect(connectOneServer).toHaveBeenCalledTimes(2);
60
+ });
61
+ it("in-turn reconnect: a mid-turn transport drop is reconnected and the call retried once — succeeds", async () => {
62
+ let gen = 0;
63
+ connectOneServer.mockImplementation(async (s) => {
64
+ gen += 1;
65
+ const dead = gen === 1; // first connect drops mid-call; the reconnect is healthy
66
+ return {
67
+ tools: {
68
+ [`mcp_${s.id}__ping`]: {
69
+ execute: async () => {
70
+ if (dead)
71
+ throw new Error("Attempted to send a request from a closed client");
72
+ return "pong";
73
+ },
74
+ },
75
+ },
76
+ client: { close: async () => { } },
77
+ };
78
+ });
79
+ const b = await acquireMcpTools([srv("docs")]);
80
+ const result = await b.tools["mcp_docs__ping"].execute({}, {});
81
+ expect(result).toBe("pong"); // recovered within the SAME turn
82
+ expect(connectOneServer).toHaveBeenCalledTimes(2); // drop + one reconnect
83
+ });
84
+ it("a parallel burst on a dropped client shares ONE reconnect; every call retries and succeeds", async () => {
85
+ // Repro of session 41ccfeb2ceee: a 14-call burst at muonroi-docs dropped the
86
+ // HTTP socket after the first calls; previously the rest all threw
87
+ // "Attempted to send a request from a closed client". They must now share a
88
+ // single reconnect and all recover.
89
+ let gen = 0;
90
+ connectOneServer.mockImplementation(async (s) => {
91
+ gen += 1;
92
+ const dead = gen === 1;
93
+ return {
94
+ tools: {
95
+ [`mcp_${s.id}__ping`]: {
96
+ execute: async () => {
97
+ if (dead)
98
+ throw new Error("The socket connection was closed unexpectedly");
99
+ return "pong";
100
+ },
101
+ },
102
+ },
103
+ client: { close: async () => { } },
104
+ };
105
+ });
106
+ const b = await acquireMcpTools([srv("docs")]);
107
+ const tool = b.tools["mcp_docs__ping"];
108
+ const results = await Promise.all(Array.from({ length: 14 }, () => tool.execute({}, {})));
109
+ expect(results.every((r) => r === "pong")).toBe(true);
110
+ expect(connectOneServer).toHaveBeenCalledTimes(2); // 14 failures → exactly ONE shared reconnect
61
111
  });
62
112
  it("keys by cwd/config — a different command reconnects rather than reusing", async () => {
63
113
  connectOneServer.mockImplementation(async (s) => connected(s.id));
@@ -27,6 +27,7 @@ const fakeSummary = (id) => ({
27
27
  cacheHitRatio: 0,
28
28
  peakSingleCallInput: 100,
29
29
  events: [],
30
+ experience: null,
30
31
  });
31
32
  describe("forensics-tools", () => {
32
33
  it("usage_forensics returns the summary for a unique prefix", async () => {
@@ -15,8 +15,15 @@
15
15
  *
16
16
  * Self-healing: a server that fails to connect is evicted (not cached as a
17
17
  * rejection), so a later turn retries. A live client whose child process dies
18
- * later is evicted when one of its tool calls hits a transport/connection error,
19
- * so the next turn reconnects fresh.
18
+ * later is evicted when one of its tool calls hits a transport/connection error.
19
+ *
20
+ * In-turn reconnect: a transport that drops MID-TURN (live: muonroi-docs HTTP
21
+ * socket closed after 2 of a 14-call parallel burst, session 41ccfeb2ceee —
22
+ * every remaining call then threw "Attempted to send a request from a closed
23
+ * client") is reconnected and the failing call is retried ONCE against the fresh
24
+ * client, instead of only reconnecting on the NEXT turn. Concurrent failures in
25
+ * the same burst share one reconnect (the pool dedupes by key); eviction is
26
+ * race-safe so a fresh reconnect is never torn down by a sibling's late failure.
20
27
  */
21
28
  import type { McpServerConfig } from "../utils/settings.js";
22
29
  import { type McpBuildOptions, type McpToolBundle } from "./runtime.js";
@@ -15,8 +15,15 @@
15
15
  *
16
16
  * Self-healing: a server that fails to connect is evicted (not cached as a
17
17
  * rejection), so a later turn retries. A live client whose child process dies
18
- * later is evicted when one of its tool calls hits a transport/connection error,
19
- * so the next turn reconnects fresh.
18
+ * later is evicted when one of its tool calls hits a transport/connection error.
19
+ *
20
+ * In-turn reconnect: a transport that drops MID-TURN (live: muonroi-docs HTTP
21
+ * socket closed after 2 of a 14-call parallel burst, session 41ccfeb2ceee —
22
+ * every remaining call then threw "Attempted to send a request from a closed
23
+ * client") is reconnected and the failing call is retried ONCE against the fresh
24
+ * client, instead of only reconnecting on the NEXT turn. Concurrent failures in
25
+ * the same burst share one reconnect (the pool dedupes by key); eviction is
26
+ * race-safe so a fresh reconnect is never torn down by a sibling's late failure.
20
27
  */
21
28
  import { connectOneServer, getMcpBuildDeadlineMs, } from "./runtime.js";
22
29
  import { validateMcpServerConfig } from "./validate.js";
@@ -38,16 +45,20 @@ function serverKey(s) {
38
45
  cwd: s.cwd ?? process.cwd(),
39
46
  });
40
47
  }
41
- /** Tear down one pooled entry (best-effort) and remove it. */
42
- function evict(key) {
48
+ /**
49
+ * Tear down a pooled entry ONLY if it still holds `dead` (the specific server a
50
+ * failing tool call was bound to). Race-safe under a parallel burst: when 14
51
+ * sibling calls all fail on the same dropped client, the first evicts it and
52
+ * reconnects; the rest find `entry.connected !== dead` (a fresh client, or no
53
+ * entry) and leave the reconnect untouched. Best-effort cleanup of the dead one.
54
+ */
55
+ function evictDeadServer(key, dead) {
43
56
  const entry = pool.get(key);
44
- if (!entry)
57
+ if (!entry || entry.connected !== dead)
45
58
  return;
46
59
  pool.delete(key);
47
- void entry.promise.then((cs) => {
48
- cs.cleanup?.();
49
- void cs.client.close().catch(() => { });
50
- }, () => { });
60
+ dead.cleanup?.();
61
+ void dead.client.close().catch(() => { });
51
62
  }
52
63
  /** Heuristic: does this error mean the MCP transport/child is gone? */
53
64
  function isConnectionError(e) {
@@ -69,22 +80,35 @@ function getOrConnect(server, opts) {
69
80
  const promise = connectOneServer(server, opts);
70
81
  const entry = { key, promise };
71
82
  pool.set(key, entry);
83
+ promise.then(
84
+ // Record the resolved server so evictDeadServer can match by identity.
85
+ (cs) => {
86
+ entry.connected = cs;
87
+ },
72
88
  // Cache a rejection only transiently: evict so the next turn retries rather
73
89
  // than returning the same failed promise forever.
74
- promise.catch(() => {
90
+ () => {
75
91
  if (pool.get(key) === entry)
76
92
  pool.delete(key);
77
93
  });
78
94
  return promise;
79
95
  }
80
96
  /**
81
- * Wrap each tool's execute so a transport/connection failure evicts the pooled
82
- * client (next turn reconnects). The MCP child may die after a successful
83
- * connect; without this the dead client would be reused on every later turn.
97
+ * Wrap each tool's execute so a transport/connection failure is recovered
98
+ * in-turn: evict the dead pooled client (race-safe), reconnect once, and retry
99
+ * the SAME call against the fresh client. Before this, a mid-turn drop only
100
+ * reconnected on the NEXT turn, so the rest of the current turn's batch all
101
+ * failed with "Attempted to send a request from a closed client". The MCP child
102
+ * may also die after a successful connect; the eviction keeps the pool clean for
103
+ * later turns either way.
104
+ *
105
+ * The retry is fired at most ONCE per call (no loop): if the fresh client also
106
+ * drops, or the reconnect itself fails, the original transport error propagates
107
+ * so the model sees a real failure rather than hanging.
84
108
  */
85
- function wrapForSelfHeal(tools, key) {
109
+ function wrapForSelfHeal(cs, key, server, opts) {
86
110
  const out = {};
87
- for (const [name, tool] of Object.entries(tools)) {
111
+ for (const [name, tool] of Object.entries(cs.tools)) {
88
112
  const base = tool.execute;
89
113
  if (typeof base !== "function") {
90
114
  out[name] = tool;
@@ -97,11 +121,25 @@ function wrapForSelfHeal(tools, key) {
97
121
  return await base(args, options);
98
122
  }
99
123
  catch (e) {
100
- if (isConnectionError(e)) {
101
- console.error(`[mcp:pool] '${name}' hit a connection error — evicting cached client so the next turn reconnects`);
102
- evict(key);
124
+ if (!isConnectionError(e))
125
+ throw e;
126
+ console.error(`[mcp:pool] '${name}' hit a connection error — reconnecting '${server.id}' in-turn and retrying once: ${e instanceof Error ? e.message : String(e)}`);
127
+ // Evict THIS dead client (no-op if a sibling already reconnected), then
128
+ // reconnect. getOrConnect dedupes by key, so a burst shares one reconnect.
129
+ evictDeadServer(key, cs);
130
+ let fresh;
131
+ try {
132
+ fresh = await getOrConnect(server, opts);
133
+ }
134
+ catch (reconnectErr) {
135
+ console.error(`[mcp:pool] in-turn reconnect for '${server.id}' failed; surfacing original error: ${reconnectErr instanceof Error ? reconnectErr.message : String(reconnectErr)}`);
136
+ throw e;
103
137
  }
104
- throw e;
138
+ const freshTools = fresh.tools;
139
+ const freshExec = freshTools[name]?.execute;
140
+ if (typeof freshExec !== "function")
141
+ throw e;
142
+ return await freshExec(args, options);
105
143
  }
106
144
  },
107
145
  };
@@ -141,13 +179,14 @@ export async function acquireMcpTools(servers, opts) {
141
179
  await Promise.race([Promise.allSettled(attempts), deadline]);
142
180
  if (deadlineTimer)
143
181
  clearTimeout(deadlineTimer);
144
- for (const slot of slots) {
182
+ for (let i = 0; i < slots.length; i++) {
183
+ const slot = slots[i];
145
184
  if (slot.done) {
146
185
  if (slot.error) {
147
186
  errors.push(`${slot.label}: ${slot.error}`);
148
187
  }
149
188
  else if (slot.result) {
150
- Object.assign(tools, wrapForSelfHeal(slot.result.tools, slot.key));
189
+ Object.assign(tools, wrapForSelfHeal(slot.result, slot.key, enabled[i], opts));
151
190
  }
152
191
  }
153
192
  else {
@@ -23,6 +23,8 @@ export declare const DEFAULT_RESERVE_TOKENS = 16384;
23
23
  export declare const DEFAULT_KEEP_RECENT_TOKENS = 20000;
24
24
  export declare const POST_TURN_MIN_TOKENS = 2000;
25
25
  export declare const COMPACTION_MAX_OUTPUT_TOKENS = 4096;
26
+ export declare const COMPACTION_META_MAX_OUTPUT_TOKENS = 1536;
27
+ export declare function metaCompactionMaxTokens(): number;
26
28
  export declare const TOOL_RESULT_MAX_CHARS_CONFIGURABLE = 8000;
27
29
  export declare const COMPACTION_SUMMARY_HEADER = "[Context checkpoint summary]";
28
30
  export declare function extractUserContent(content: unknown): string;
@@ -10,6 +10,19 @@ export const DEFAULT_RESERVE_TOKENS = 16_384;
10
10
  export const DEFAULT_KEEP_RECENT_TOKENS = 20_000;
11
11
  export const POST_TURN_MIN_TOKENS = 2_000;
12
12
  export const COMPACTION_MAX_OUTPUT_TOKENS = 4_096;
13
+ // Meta-analysis (agent/PIL self-eval) summaries are capped tighter than normal
14
+ // to prevent runaway summaries (session df2dbb878984: 73k input → 14k-char
15
+ // summary). Default 1536 (was a hard 1024) — modestly more fidelity now that
16
+ // anti-mù recovery (layer3 surfacing + the in-process/disk artifact cache)
17
+ // backstops detail loss, still ~2.3x below the 14k-char problem. Tune per machine
18
+ // with MUONROI_META_COMPACT_MAX_TOKENS (clamped 512..COMPACTION_MAX_OUTPUT_TOKENS).
19
+ export const COMPACTION_META_MAX_OUTPUT_TOKENS = 1_536;
20
+ export function metaCompactionMaxTokens() {
21
+ const raw = Number(process.env.MUONROI_META_COMPACT_MAX_TOKENS);
22
+ if (Number.isFinite(raw) && raw >= 512 && raw <= COMPACTION_MAX_OUTPUT_TOKENS)
23
+ return Math.floor(raw);
24
+ return COMPACTION_META_MAX_OUTPUT_TOKENS;
25
+ }
13
26
  export const TOOL_RESULT_MAX_CHARS_CONFIGURABLE = 8000;
14
27
  export const COMPACTION_SUMMARY_HEADER = "[Context checkpoint summary]";
15
28
  const SUMMARIZATION_SYSTEM_PROMPT = `You are a context summarization assistant.
@@ -450,7 +463,7 @@ async function summarizeConversation(provider, modelId, messages, reserveTokens,
450
463
  const userText = messages.map((m) => extractUserContent(m.content)).join("\n");
451
464
  const isMeta = isMetaAnalysisPrompt(userText);
452
465
  const effectiveMax = isMeta
453
- ? Math.min(1024, Math.max(512, Math.floor(reserveTokens * 0.5)))
466
+ ? Math.min(metaCompactionMaxTokens(), Math.max(512, Math.floor(reserveTokens * 0.5)))
454
467
  : Math.min(COMPACTION_MAX_OUTPUT_TOKENS, Math.max(512, Math.floor(reserveTokens * 0.8)));
455
468
  if (previousSummary) {
456
469
  promptParts.push(`Existing summary:\n${previousSummary}`);
@@ -1,6 +1,6 @@
1
1
  import { afterAll, beforeAll, describe, expect, it } from "vitest";
2
2
  import { buildEffectiveTranscript } from "../storage/transcript-view.js";
3
- import { COMPACTION_SUMMARY_HEADER, createCompactionSummaryMessage, findCutPoint, prepareCompaction, serializeConversation, shouldCompactContext, } from "./compaction.js";
3
+ import { COMPACTION_META_MAX_OUTPUT_TOKENS, COMPACTION_SUMMARY_HEADER, createCompactionSummaryMessage, findCutPoint, metaCompactionMaxTokens, prepareCompaction, serializeConversation, shouldCompactContext, } from "./compaction.js";
4
4
  import { buildCheckpointReminder } from "./scope-reminder.js";
5
5
  import { __forceFallbackForTests } from "./token-counter.js";
6
6
  // Pin token counts to the chars/4 fallback so cut-point assertions remain stable.
@@ -160,4 +160,28 @@ describe("compaction helpers", () => {
160
160
  expect(r).toContain("tool-artifact");
161
161
  });
162
162
  });
163
+ describe("metaCompactionMaxTokens — meta summary cap (tunable, session 2b7a10219499)", () => {
164
+ it("defaults to 1536 — looser than the old hard 1024, still well below the 14k-char problem", () => {
165
+ delete process.env.MUONROI_META_COMPACT_MAX_TOKENS;
166
+ expect(metaCompactionMaxTokens()).toBe(COMPACTION_META_MAX_OUTPUT_TOKENS);
167
+ expect(COMPACTION_META_MAX_OUTPUT_TOKENS).toBe(1536);
168
+ expect(COMPACTION_META_MAX_OUTPUT_TOKENS).toBeGreaterThan(1024);
169
+ });
170
+ it("honors a valid MUONROI_META_COMPACT_MAX_TOKENS override", () => {
171
+ process.env.MUONROI_META_COMPACT_MAX_TOKENS = "2048";
172
+ try {
173
+ expect(metaCompactionMaxTokens()).toBe(2048);
174
+ }
175
+ finally {
176
+ delete process.env.MUONROI_META_COMPACT_MAX_TOKENS;
177
+ }
178
+ });
179
+ it("clamps out-of-range / garbage overrides to the default", () => {
180
+ for (const bad of ["999999", "100", "-5", "abc", ""]) {
181
+ process.env.MUONROI_META_COMPACT_MAX_TOKENS = bad;
182
+ expect(metaCompactionMaxTokens(), bad).toBe(COMPACTION_META_MAX_OUTPUT_TOKENS);
183
+ }
184
+ delete process.env.MUONROI_META_COMPACT_MAX_TOKENS;
185
+ });
186
+ });
163
187
  //# sourceMappingURL=compaction.test.js.map
@@ -50,6 +50,7 @@
50
50
  // - O1 (providerOptions shape forensics) — extractProviderOptionsShape
51
51
  // - siliconflow reasoning-strip — turnCaps.sanitizeHistory
52
52
  import { stepCountIs, streamText } from "ai";
53
+ import { recordArtifact } from "../ee/artifact-cache.js";
53
54
  import { getCachedAuthToken, getCachedServerBaseUrl } from "../ee/auth.js";
54
55
  import { routeFeedback, routeModel } from "../ee/bridge.js";
55
56
  import { getDefaultEEClient } from "../ee/intercept.js";
@@ -65,6 +66,7 @@ import { getModelInfo } from "../models/registry.js";
65
66
  import { cheapModelShellLine, injectCheapModelPlaybook, injectCheapModelShellDirective, shouldInjectCheapModelPlaybook, } from "../pil/cheap-model-playbook.js";
66
67
  import { injectCheapModelWorkbook, shouldInjectCheapModelWorkbook } from "../pil/cheap-model-workbooks.js";
67
68
  import { applyPilSuffix, getResponseTaskType, getResponseToolSet, isResponseTool, runPipeline, shouldHaltOnResponseTool, } from "../pil/index.js";
69
+ import { isMetaAnalysisPrompt } from "../pil/layer6-output.js";
68
70
  import { taskTypeToMaxTokens, taskTypeToReasoningEffort, taskTypeToTier } from "../pil/task-tier-map.js";
69
71
  import { getProviderCapabilities } from "../providers/capabilities.js";
70
72
  import { loadKeyForProvider } from "../providers/keychain.js";
@@ -77,6 +79,7 @@ import { reportRouteOutcome } from "../router/decide.js";
77
79
  import { decideStepRouting, getStepRouterConfig } from "../router/step-router.js";
78
80
  import { routerStore } from "../router/store.js";
79
81
  import { getNextMessageSequence, logInteraction, markMessageErrored, markToolCallErrored, persistMessageWriteAhead, persistToolCallWriteAhead, } from "../storage/index.js";
82
+ import { persistSessionExperience } from "../storage/session-experience-store.js";
80
83
  import { createBuiltinTools } from "../tools/registry.js";
81
84
  import { snapshotFromTodoWriteArgs } from "../tools/todo-write-snapshot.js";
82
85
  import { visionToolsNeeded } from "../tools/vision-gate.js";
@@ -101,11 +104,12 @@ import { repairToolCallHook } from "./repair-tool-call.js";
101
104
  import { buildRepetitionReminder, recordAssistantBurst, shouldInjectRepetitionReminder, } from "./repetition-detector.js";
102
105
  import { classifyStreamError } from "./retry-classifier.js";
103
106
  import { forcedFinalize, getSessionLastTask, incSessionStep, parseBudgetOverride, recordSessionLastTask, resetSessionStep, resolveCeiling, } from "./scope-ceiling.js";
104
- import { attachReminderToMessages, buildCheckpointReminder, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
107
+ import { attachReminderToMessages, buildCheckpointReminder, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, shouldPreWarnCompaction, } from "./scope-reminder.js";
108
+ import { formatElisionManifest, getSessionExperienceCounts, recordCompaction, recordElision, } from "./session-experience.js";
105
109
  import { attemptStallRescue, pushStallToolResult } from "./stall-rescue.js";
106
110
  import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
107
111
  import { wrapToolSetWithCap } from "./sub-agent-cap.js";
108
- import { compactSubAgentMessages } from "./subagent-compactor.js";
112
+ import { compactSubAgentMessages, cumulativeMessageChars } from "./subagent-compactor.js";
109
113
  import { detectTextEmittedToolCall, parseDsmlToolCalls } from "./text-tool-call-detector.js";
110
114
  import { createToolLoopCapPredicate } from "./tool-loop-cap.js";
111
115
  import { buildToolRepetitionAbortMessage, recordToolError as recordToolRepetitionError, recordToolSuccess as recordToolRepetitionSuccess, } from "./tool-repetition-detector.js";
@@ -1500,6 +1504,14 @@ export class MessageProcessor {
1500
1504
  const _cwd = process.cwd();
1501
1505
  const _sess = undefined; // best-effort; EE artifact still indexable by content + meta.toolCallId
1502
1506
  const persistArtifact = (toolCallId, toolName, fullContent, reason) => {
1507
+ // Local-first: record the FULL output in-process so ee_query can
1508
+ // rehydrate it even if EE is down (the EE extract below caps at 8k
1509
+ // and needs the network; the cache keeps up to 200k, no network).
1510
+ recordArtifact(toolCallId, toolName, fullContent);
1511
+ // Lived-experience telemetry: count this elision so a later
1512
+ // "cảm nhận trong CLI" question answers from data, and so the
1513
+ // post-compaction note can list what it just stubbed.
1514
+ recordElision(toolCallId, toolName, fullContent.length, sn);
1503
1515
  try {
1504
1516
  getDefaultEEClient()
1505
1517
  .extract({
@@ -1522,19 +1534,31 @@ export class MessageProcessor {
1522
1534
  };
1523
1535
  const compacted = compactSubAgentMessages(stripped, {
1524
1536
  thresholdChars: topLevelCompactThreshold,
1525
- keepLastTurns: topLevelCompactKeepLast,
1537
+ // Rec #1 (cheap part): on meta/self-eval turns keep a couple more
1538
+ // trailing tool turns verbatim — those carry the reasoning the
1539
+ // agent is being asked to reflect on, and over-eliding them is
1540
+ // exactly what starves a self-evaluation. One boolean, no new
1541
+ // detection logic (isMetaAnalysisPrompt already gates layer3/5).
1542
+ keepLastTurns: topLevelCompactKeepLast + (isMetaAnalysisPrompt(userMessage) ? 2 : 0),
1526
1543
  label: "top-level",
1527
1544
  envelopeChars,
1528
1545
  contextWindowTokens,
1529
1546
  keepToolIds: keepToolIds.length ? keepToolIds : undefined,
1530
1547
  persistArtifact,
1531
1548
  });
1549
+ if (compacted !== stripped)
1550
+ recordCompaction(sn);
1532
1551
  // Pre-compaction visibility: give the agent one step of notice
1533
1552
  // before B4 actually rewrites history into stubs. This is the
1534
1553
  // advance warning that was missing — agent can now decide to
1535
- // summarize, finish, or request preservation.
1536
- const _preCompactWarnAt = Math.floor(topLevelCompactThreshold * 0.78);
1537
- if (stripped.length > _preCompactWarnAt && compacted === stripped) {
1554
+ // summarize, finish, or request preservation. Fires when we did
1555
+ // NOT compact this step (compacted === stripped, restored by the
1556
+ // compactSubAgentMessages no-op ref contract) AND the prompt is
1557
+ // approaching the threshold. Must compare CHARS (messages +
1558
+ // envelope), not stripped.length (a message count that never
1559
+ // exceeds a char-scaled threshold) — session 2b7a10219499.
1560
+ const _preWarnChars = cumulativeMessageChars(stripped) + envelopeChars;
1561
+ if (compacted === stripped && shouldPreWarnCompaction(_preWarnChars, topLevelCompactThreshold)) {
1538
1562
  const _cp = buildCheckpointReminder(sn, true);
1539
1563
  const _pre = `[pre-compaction warning at step ${sn} — next step(s) will likely rewrite older tool results to stubs (threshold ${topLevelCompactThreshold}, keepLast=${topLevelCompactKeepLast}). ${_cp} Summarize or finish if possible.]`;
1540
1564
  return { messages: attachReminderToMessages(stripped, _pre) };
@@ -1609,7 +1633,15 @@ export class MessageProcessor {
1609
1633
  // "task finished?", "compacted yet?", "EE checkpoint" so agent can self-assess and avoid mù
1610
1634
  // even when the top-level summary is not in its immediate focus (sub-agents, long loops).
1611
1635
  const _compactNote = compacted !== stripped
1612
- ? `[context compacted at step ${sn} — older or low-value tool results rewritten to stubs to fit budget. High-value evidence (file reads, bash, your previous responses) is kept verbatim. ${buildCheckpointReminder(sn, true)}]`
1636
+ ? (() => {
1637
+ // Rec #2: turn the generic "high-value elided? use ee_query"
1638
+ // prose into a concrete, actionable manifest of what was just
1639
+ // stubbed (id/tool/size) — sourced from the elisions recorded
1640
+ // by persistArtifact above — so the rehydrate round-trip is
1641
+ // informed, not blind.
1642
+ const _m = formatElisionManifest();
1643
+ return `[context compacted at step ${sn} — older or low-value tool results rewritten to stubs to fit budget. High-value evidence (file reads, bash, your previous responses) is kept verbatim. ${buildCheckpointReminder(sn, true)}${_m ? ` ${_m}` : ""}]`;
1644
+ })()
1613
1645
  : null;
1614
1646
  if (_compactNote) {
1615
1647
  return { messages: attachReminderToMessages(compacted, _compactNote) };
@@ -1670,6 +1702,16 @@ export class MessageProcessor {
1670
1702
  console.error("[Agent:onFinish] failed to emit llm-done", err);
1671
1703
  }
1672
1704
  deps.setCurrentCallId("");
1705
+ // Rec #1 persisted forensics: onFinish fires once per top-level turn,
1706
+ // so flush this session's cumulative experience counts here. Readers
1707
+ // take the latest row per session, so the last turn's row is the
1708
+ // session total. No-ops on missing id / all-zero. Fail-open.
1709
+ try {
1710
+ persistSessionExperience(deps.session?.id ?? null, getSessionExperienceCounts());
1711
+ }
1712
+ catch (err) {
1713
+ console.error("[Agent:onFinish] persistSessionExperience failed", err);
1714
+ }
1673
1715
  },
1674
1716
  });
1675
1717
  let _topTokenIndex = 0;
@@ -100,3 +100,15 @@ export declare function attachReminderToMessages<T>(messages: ReadonlyArray<T>,
100
100
  * Used by prepareStep / sub-agent paths after compaction.
101
101
  */
102
102
  export declare function buildCheckpointReminder(iteration: number, hasEECheckpoint: boolean): string;
103
+ /**
104
+ * Pre-compaction "advance warning" gate. Fires when the prompt is approaching
105
+ * (default ≥78% of) the compaction threshold but compaction has NOT yet run this
106
+ * step — giving the agent one step to PRESERVE / finish before B3/B4 rewrites
107
+ * older tool results into stubs.
108
+ *
109
+ * `promptChars` MUST be the same quantity the compactor thresholds on (cumulative
110
+ * message chars + envelope chars), NOT the message COUNT. The original B4 wiring
111
+ * compared `stripped.length` (a message count, ~tens) against a char-scaled
112
+ * threshold (~156000), so the warning could never fire — session 2b7a10219499.
113
+ */
114
+ export declare function shouldPreWarnCompaction(promptChars: number, thresholdChars: number, ratio?: number): boolean;
@@ -218,4 +218,20 @@ export function buildCheckpointReminder(iteration, hasEECheckpoint) {
218
218
  return base;
219
219
  return base.slice(0, 220);
220
220
  }
221
+ /**
222
+ * Pre-compaction "advance warning" gate. Fires when the prompt is approaching
223
+ * (default ≥78% of) the compaction threshold but compaction has NOT yet run this
224
+ * step — giving the agent one step to PRESERVE / finish before B3/B4 rewrites
225
+ * older tool results into stubs.
226
+ *
227
+ * `promptChars` MUST be the same quantity the compactor thresholds on (cumulative
228
+ * message chars + envelope chars), NOT the message COUNT. The original B4 wiring
229
+ * compared `stripped.length` (a message count, ~tens) against a char-scaled
230
+ * threshold (~156000), so the warning could never fire — session 2b7a10219499.
231
+ */
232
+ export function shouldPreWarnCompaction(promptChars, thresholdChars, ratio = 0.78) {
233
+ if (thresholdChars <= 0 || promptChars <= 0)
234
+ return false;
235
+ return promptChars >= Math.floor(thresholdChars * ratio);
236
+ }
221
237
  //# sourceMappingURL=scope-reminder.js.map
@@ -13,7 +13,7 @@
13
13
  * - Reminder lives in tool_result/system message — never in system prompt
14
14
  */
15
15
  import { afterEach, beforeEach, describe, expect, it } from "vitest";
16
- import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, } from "./scope-reminder.js";
16
+ import { attachReminderToMessages, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, shouldPreWarnCompaction, } from "./scope-reminder.js";
17
17
  describe("cadenceForSize", () => {
18
18
  it("locks 3/5/8 for small/medium/large with hard floor >= 3", () => {
19
19
  expect(cadenceForSize("small")).toBe(3);
@@ -201,4 +201,25 @@ describe("attachReminderToMessages", () => {
201
201
  expect(out).toEqual(messages);
202
202
  });
203
203
  });
204
+ describe("shouldPreWarnCompaction (regression: session 2b7a10219499 dead pre-warning)", () => {
205
+ const THRESHOLD = 200_000; // MUONROI_TOP_LEVEL_COMPACT_THRESHOLD_CHARS default
206
+ it("fires when prompt chars reach >=78% of the threshold (approaching compaction)", () => {
207
+ expect(shouldPreWarnCompaction(Math.floor(THRESHOLD * 0.78), THRESHOLD)).toBe(true);
208
+ expect(shouldPreWarnCompaction(190_000, THRESHOLD)).toBe(true);
209
+ });
210
+ it("does NOT fire while comfortably below the threshold", () => {
211
+ expect(shouldPreWarnCompaction(100_000, THRESHOLD)).toBe(false);
212
+ expect(shouldPreWarnCompaction(0, THRESHOLD)).toBe(false);
213
+ });
214
+ it("guards against the original bug: a message COUNT can never trip a char threshold", () => {
215
+ // The dead wiring compared stripped.length (a message count, ~tens) to the
216
+ // char-scaled threshold. With chars it crosses; with a count it never does.
217
+ const messageCount = 60; // plausible long-session message count
218
+ expect(shouldPreWarnCompaction(messageCount, THRESHOLD)).toBe(false);
219
+ expect(shouldPreWarnCompaction(170_000, THRESHOLD)).toBe(true);
220
+ });
221
+ it("is inert for a zero/negative threshold (no compaction configured)", () => {
222
+ expect(shouldPreWarnCompaction(999_999, 0)).toBe(false);
223
+ });
224
+ });
204
225
  //# sourceMappingURL=scope-reminder.test.js.map