@psiclawops/hypermem 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/ARCHITECTURE.md +30 -38
  2. package/README.md +83 -35
  3. package/dist/background-indexer.d.ts +14 -3
  4. package/dist/background-indexer.d.ts.map +1 -1
  5. package/dist/background-indexer.js +126 -18
  6. package/dist/budget-policy.d.ts +22 -0
  7. package/dist/budget-policy.d.ts.map +1 -0
  8. package/dist/budget-policy.js +27 -0
  9. package/dist/cache.d.ts +11 -0
  10. package/dist/cache.d.ts.map +1 -1
  11. package/dist/compositor-utils.d.ts +31 -0
  12. package/dist/compositor-utils.d.ts.map +1 -0
  13. package/dist/compositor-utils.js +47 -0
  14. package/dist/compositor.d.ts +163 -1
  15. package/dist/compositor.d.ts.map +1 -1
  16. package/dist/compositor.js +862 -130
  17. package/dist/content-hash.d.ts +43 -0
  18. package/dist/content-hash.d.ts.map +1 -0
  19. package/dist/content-hash.js +75 -0
  20. package/dist/context-store.d.ts +54 -0
  21. package/dist/context-store.d.ts.map +1 -1
  22. package/dist/context-store.js +102 -0
  23. package/dist/contradiction-audit-store.d.ts +54 -0
  24. package/dist/contradiction-audit-store.d.ts.map +1 -0
  25. package/dist/contradiction-audit-store.js +88 -0
  26. package/dist/contradiction-resolution-policy.d.ts +21 -0
  27. package/dist/contradiction-resolution-policy.d.ts.map +1 -0
  28. package/dist/contradiction-resolution-policy.js +17 -0
  29. package/dist/degradation.d.ts +102 -0
  30. package/dist/degradation.d.ts.map +1 -0
  31. package/dist/degradation.js +141 -0
  32. package/dist/dreaming-promoter.d.ts +38 -0
  33. package/dist/dreaming-promoter.d.ts.map +1 -1
  34. package/dist/dreaming-promoter.js +68 -2
  35. package/dist/index.d.ts +68 -6
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +402 -26
  38. package/dist/knowledge-lint.d.ts +2 -0
  39. package/dist/knowledge-lint.d.ts.map +1 -1
  40. package/dist/knowledge-lint.js +40 -1
  41. package/dist/library-schema.d.ts +7 -2
  42. package/dist/library-schema.d.ts.map +1 -1
  43. package/dist/library-schema.js +236 -1
  44. package/dist/message-store.d.ts +64 -1
  45. package/dist/message-store.d.ts.map +1 -1
  46. package/dist/message-store.js +137 -1
  47. package/dist/open-domain.js +1 -1
  48. package/dist/proactive-pass.d.ts +2 -2
  49. package/dist/proactive-pass.d.ts.map +1 -1
  50. package/dist/proactive-pass.js +66 -12
  51. package/dist/replay-recovery.d.ts +29 -0
  52. package/dist/replay-recovery.d.ts.map +1 -0
  53. package/dist/replay-recovery.js +82 -0
  54. package/dist/reranker.d.ts +95 -0
  55. package/dist/reranker.d.ts.map +1 -0
  56. package/dist/reranker.js +308 -0
  57. package/dist/schema.d.ts +1 -1
  58. package/dist/schema.d.ts.map +1 -1
  59. package/dist/schema.js +46 -1
  60. package/dist/session-flusher.d.ts +2 -2
  61. package/dist/session-flusher.d.ts.map +1 -1
  62. package/dist/session-flusher.js +1 -1
  63. package/dist/temporal-store.js +2 -2
  64. package/dist/tool-artifact-store.d.ts +98 -0
  65. package/dist/tool-artifact-store.d.ts.map +1 -0
  66. package/dist/tool-artifact-store.js +244 -0
  67. package/dist/topic-detector.js +2 -2
  68. package/dist/topic-store.d.ts +6 -0
  69. package/dist/topic-store.d.ts.map +1 -1
  70. package/dist/topic-store.js +39 -0
  71. package/dist/types.d.ts +233 -1
  72. package/dist/types.d.ts.map +1 -1
  73. package/dist/vector-store.d.ts +2 -1
  74. package/dist/vector-store.d.ts.map +1 -1
  75. package/dist/vector-store.js +3 -0
  76. package/dist/version.d.ts +10 -10
  77. package/dist/version.d.ts.map +1 -1
  78. package/dist/version.js +10 -10
  79. package/package.json +6 -4
@@ -10,6 +10,7 @@
10
10
  * Token-budgeted: never exceeds the budget, prioritizes by configured order.
11
11
  * Provider-neutral internally, translates at the output boundary.
12
12
  */
13
+ import { createHash } from 'node:crypto';
13
14
  import { filterByScope } from './retrieval-policy.js';
14
15
  import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
15
16
  import { MessageStore } from './message-store.js';
@@ -25,6 +26,9 @@ import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOu
25
26
  import { KnowledgeStore } from './knowledge-store.js';
26
27
  import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
27
28
  import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
29
+ import { TRIM_BUDGET_POLICY, resolveTrimBudgets } from './budget-policy.js';
30
+ import { formatToolChainStub, parseToolChainStub, formatArtifactRef, isArtifactRef } from './degradation.js';
31
+ import { ToolArtifactStore } from './tool-artifact-store.js';
28
32
  /**
29
33
  * Files that OpenClaw's contextInjection injects into the system prompt.
30
34
  * HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
@@ -34,6 +38,7 @@ export const OPENCLAW_BOOTSTRAP_FILES = new Set([
34
38
  'SOUL.md', 'IDENTITY.md', 'USER.md', 'TOOLS.md',
35
39
  'AGENTS.md', 'HEARTBEAT.md', 'MEMORY.md', 'BOOTSTRAP.md',
36
40
  ]);
41
+ const CACHE_PREFIX_BOUNDARY_SLOT = 'cache-prefix-boundary';
37
42
  /**
38
43
  * Model context window sizes by provider/model string (or partial match).
39
44
  * Used as fallback when tokenBudget is not passed by the runtime.
@@ -68,6 +73,67 @@ const MODEL_CONTEXT_WINDOWS = [
68
73
  { pattern: 'deepseek-v3', tokens: 131_072 },
69
74
  { pattern: 'deepseek', tokens: 131_072 },
70
75
  ];
76
+ const MODEL_MECW = [
77
+ // Claude 200k: effective recall degrades above ~140k; clamp composite budget
78
+ { pattern: 'claude', mecwFloor: 80_000, mecwCeiling: 140_000, preferredHistoryFraction: 0.35, preferredMemoryFraction: 0.45 },
79
+ // Gemini 1M: reliable up to ~180k for grounded retrieval; less for recall
80
+ { pattern: 'gemini', mecwFloor: 100_000, mecwCeiling: 180_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.45 },
81
+ // OpenAI 128k: full window is trustable; use standard fractions
82
+ { pattern: 'gpt', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
83
+ { pattern: 'o3', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
84
+ { pattern: 'o4', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
85
+ // Smaller windows: full window is trustable
86
+ { pattern: 'qwen3', mecwFloor: 262_144, mecwCeiling: 262_144, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
87
+ { pattern: 'qwen', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
88
+ { pattern: 'glm', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
89
+ { pattern: 'deepseek', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
90
+ ];
91
+ /**
92
+ * B4: Compute model-aware lane budget fractions.
93
+ *
94
+ * Resolves the effective historyFraction and memoryFraction for a compose pass
95
+ * given the model and its effective budget. Uses the MECW catalog to blend
96
+ * away from fixed fractions when the budget approaches the MECW ceiling,
97
+ * so the compositor allocates proportionally for what the model can actually use.
98
+ *
99
+ * Returns:
100
+ * historyFraction — fraction of effective budget to give history
101
+ * memoryFraction — fraction of effective budget to give memory pool
102
+ * mecwProfile — which MECW entry matched (undefined = no match / full window)
103
+ * mecwApplied — true when MECW adjustment changed the fractions
104
+ * mecwBlend — 0..1 blend factor (0 = below floor, 1 = at/above ceiling)
105
+ */
106
+ export function resolveModelLaneBudgets(model, effectiveBudget, configHistoryFraction, configMemoryFraction) {
107
+ if (!model) {
108
+ return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: undefined, mecwApplied: false, mecwBlend: 0 };
109
+ }
110
+ const normalized = model.toLowerCase();
111
+ for (const entry of MODEL_MECW) {
112
+ if (!normalized.includes(entry.pattern))
113
+ continue;
114
+ // Budget is at or below the floor — full window is safe, use config fractions
115
+ if (effectiveBudget <= entry.mecwFloor) {
116
+ return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: entry.pattern, mecwApplied: false, mecwBlend: 0 };
117
+ }
118
+ // Budget is at or above the ceiling — use preferred fractions fully
119
+ if (effectiveBudget >= entry.mecwCeiling) {
120
+ return { historyFraction: entry.preferredHistoryFraction, memoryFraction: entry.preferredMemoryFraction, mecwProfile: entry.pattern, mecwApplied: true, mecwBlend: 1 };
121
+ }
122
+ // Budget is between floor and ceiling — linear blend
123
+ const blend = (effectiveBudget - entry.mecwFloor) / (entry.mecwCeiling - entry.mecwFloor);
124
+ const historyFraction = configHistoryFraction + blend * (entry.preferredHistoryFraction - configHistoryFraction);
125
+ const memoryFraction = configMemoryFraction + blend * (entry.preferredMemoryFraction - configMemoryFraction);
126
+ return {
127
+ historyFraction: Math.round(historyFraction * 1000) / 1000,
128
+ memoryFraction: Math.round(memoryFraction * 1000) / 1000,
129
+ mecwProfile: entry.pattern,
130
+ mecwApplied: true,
131
+ mecwBlend: Math.round(blend * 1000) / 1000,
132
+ };
133
+ }
134
+ // No MECW entry matched — use config fractions unchanged
135
+ return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: undefined, mecwApplied: false, mecwBlend: 0 };
136
+ }
71
137
  /**
72
138
  * Resolve effective token budget from model string.
73
139
  * Returns the context window for the model, minus the configured reserve fraction
@@ -166,6 +232,74 @@ function computeDynamicReserve(recentMessages, totalWindow, config) {
166
232
  }
167
233
  return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
168
234
  }
235
+ /**
236
+ * Classify a session based on the ratio of tool messages in the recent sample.
237
+ * 'tool-heavy': >= 20% of sampled messages carry tool calls or tool results.
238
+ * 'plain-chat': below that threshold (text-only or occasional tool use).
239
+ *
240
+ * The 20% threshold is intentionally conservative: most tool-heavy agents
241
+ * have tool messages on every assistant turn, so the ratio quickly exceeds
242
+ * the threshold without false-positive risk for light tool users.
243
+ */
244
+ export function classifySessionType(messages) {
245
+ if (messages.length === 0)
246
+ return 'plain-chat';
247
+ const toolCount = messages.filter(m => hasToolContent(m)).length;
248
+ return toolCount / messages.length >= 0.20 ? 'tool-heavy' : 'plain-chat';
249
+ }
250
+ /**
251
+ * Estimate the average token cost per message from a recent message sample.
252
+ * Uses the same estimateMessageTokens heuristic as the compositor budget walk
253
+ * so the returned depth is directly comparable to the historyFillCap check.
254
+ *
255
+ * Returns a conservative floor (100 tokens) when the sample is empty to avoid
256
+ * returning Infinity when historyBudget is divided by density.
257
+ */
258
+ export function estimateObservedMsgDensity(messages) {
259
+ if (messages.length === 0)
260
+ return 100;
261
+ const total = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
262
+ return Math.max(1, Math.ceil(total / messages.length));
263
+ }
264
+ /**
265
+ * Compute an adaptive history depth that pre-fits the session type.
266
+ *
267
+ * For plain-chat sessions: divides historyBudget by observed density to get a
268
+ * depth that fills the budget without overflow, bounded by the default maximum.
269
+ * Recall quality is preserved because the density estimate is honest for
270
+ * text-only turns.
271
+ *
272
+ * For tool-heavy sessions: applies a post-gradient compression factor
273
+ * (TOOL_GRADIENT_DENSITY_FACTOR = 0.30) to the observed pre-gradient density.
274
+ * This accounts for the gradient transform collapsing large tool payloads to
275
+ * prose stubs before the budget-fit walk runs. A tighter depth is chosen so
276
+ * the gradient-compressed messages fit inside historyFillCap without triggering
277
+ * a rescue trim.
278
+ *
279
+ * A 0.85 safety margin is applied to both paths so estimates that are
280
+ * slightly off don't cause immediate overflow on the first warm compose.
281
+ *
282
+ * Min/max bounds ensure the compositor always sees a meaningful window:
283
+ * - plain-chat min: 20 messages (enough for short recent context)
284
+ * - tool-heavy min: 15 messages (recent tool context + a few prior turns)
285
+ * - shared max: config.maxHistoryMessages (never exceed the DB fetch ceiling)
286
+ */
287
+ export function computeAdaptiveHistoryDepth(sessionType, observedDensity, historyBudgetTokens, maxHistoryMessages) {
288
+ const SAFETY_MARGIN = 0.85;
289
+ if (sessionType === 'tool-heavy') {
290
+ // Tool-heavy: post-gradient density is much lower than pre-gradient.
291
+ // Gradient tiers collapse T2/T3 payloads to compact stubs (15-30% of original).
292
+ // Use a blended factor of 0.30 as the expected post-gradient density ratio.
293
+ const TOOL_GRADIENT_DENSITY_FACTOR = 0.30;
294
+ const postGradientDensity = Math.max(50, Math.floor(observedDensity * TOOL_GRADIENT_DENSITY_FACTOR));
295
+ const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / postGradientDensity);
296
+ return Math.min(maxHistoryMessages, Math.max(15, depth));
297
+ }
298
+ // Plain-chat: pre-gradient and post-gradient density are the same.
299
+ // historyBudget / avgMsgCost gives the message count that fills the budget.
300
+ const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / observedDensity);
301
+ return Math.min(maxHistoryMessages, Math.max(20, depth));
302
+ }
169
303
  const DEFAULT_CONFIG = {
170
304
  // Primary budget controls
171
305
  budgetFraction: 0.703,
@@ -277,7 +411,7 @@ function clusterNeutralMessages(messages) {
277
411
  */
278
412
  export function applyToolGradientToWindow(messages, tokenBudget, totalWindowTokens) {
279
413
  const reshaped = applyToolGradient(messages, { totalWindowTokens });
280
- const targetTokens = Math.floor(tokenBudget * 0.65);
414
+ const { softBudget: targetTokens } = resolveTrimBudgets(tokenBudget);
281
415
  const clusters = clusterNeutralMessages(reshaped);
282
416
  let totalTokens = clusters.reduce((sum, cluster) => sum + cluster.tokenCost, 0);
283
417
  let start = 0;
@@ -328,6 +462,30 @@ function estimateMessageTokens(msg) {
328
462
  tokens += 4;
329
463
  return tokens;
330
464
  }
465
+ function isDynamicBoundaryMessage(msg) {
466
+ return Boolean(msg.metadata?.dynamicBoundary);
467
+ }
468
+ function getStablePrefixMessages(messages) {
469
+ const prefix = [];
470
+ for (const msg of messages) {
471
+ if (msg.role !== 'system')
472
+ break;
473
+ if (isDynamicBoundaryMessage(msg))
474
+ break;
475
+ prefix.push(msg);
476
+ }
477
+ return prefix;
478
+ }
479
+ function computeStablePrefixHash(messages) {
480
+ if (messages.length === 0)
481
+ return undefined;
482
+ const hash = createHash('sha256');
483
+ for (const msg of messages) {
484
+ hash.update(msg.textContent ?? '');
485
+ hash.update('\n␞\n');
486
+ }
487
+ return hash.digest('hex');
488
+ }
331
489
  function parseToolArgs(argumentsJson) {
332
490
  try {
333
491
  return JSON.parse(argumentsJson);
@@ -702,12 +860,211 @@ function evictLargeToolResults(messages) {
702
860
  const approxKTokens = Math.round(content.length / 4 / 1000);
703
861
  return {
704
862
  ...result,
705
- content: `[tool result evicted: ~${approxKTokens}k tokens \u2014 use memory_search or re-run if needed]`,
863
+ content: formatToolChainStub({
864
+ name: result.name || 'tool_result',
865
+ id: result.callId || 'unknown',
866
+ status: 'ejected',
867
+ reason: 'eviction_oversize',
868
+ summary: `~${approxKTokens}k tokens, use memory_search or re-run if needed`,
869
+ }),
706
870
  };
707
871
  });
708
872
  return { ...msg, toolResults: evicted };
709
873
  });
710
874
  }
875
+ // ─── C2: Oversized artifact handling ────────────────────────────────────────
876
+ /**
877
+ * C2: Resolve the artifact oversize threshold (in tokens) for the current compose pass.
878
+ *
879
+ * The threshold scales with the effective model budget from B4 so:
880
+ * - Small-window models (16k–32k effective) get a proportionally tighter threshold
881
+ * (threshold = budget × ARTIFACT_OVERSIZE_FRACTION, floor 500, ceiling 8000).
882
+ * - Large-window models (200k+) get a higher ceiling but it still stays bounded
883
+ * so artifacts never fill the lane unconditionally.
884
+ *
885
+ * ARTIFACT_BUDGET_FRACTION: fraction of the soft budget above which a single
886
+ * retrieved artifact/chunk is considered oversized. Default 0.10 (10%).
887
+ *
888
+ * Headroom preservation comes from replacing the oversized artifact with a cheap
889
+ * reference, not from shrinking the threshold itself.
890
+ */
891
+ const ARTIFACT_BUDGET_FRACTION = 0.10; // 10% of soft budget is the raw threshold
892
+ const ARTIFACT_THRESHOLD_FLOOR = 500; // never below 500 tokens (~2k chars)
893
+ const ARTIFACT_THRESHOLD_CEILING = 8_000; // never above 8k tokens (~32k chars)
894
+ export function resolveArtifactOversizeThreshold(effectiveBudget) {
895
+ const { softBudget } = resolveTrimBudgets(effectiveBudget);
896
+ const raw = Math.floor(softBudget * ARTIFACT_BUDGET_FRACTION);
897
+ return Math.min(ARTIFACT_THRESHOLD_CEILING, Math.max(ARTIFACT_THRESHOLD_FLOOR, raw));
898
+ }
899
+ /**
900
+ * C2: Degrade an oversized doc chunk to a canonical ArtifactRef string.
901
+ *
902
+ * When a retrieved chunk's content exceeds the oversize threshold (in tokens),
903
+ * replace it with a fetchable canonical reference instead of injecting raw content.
904
+ * This preserves headroom in the lane instead of filling it with a large payload.
905
+ *
906
+ * Returns:
907
+ * - `null` → content is within the threshold; caller should inject as-is.
908
+ * - `string` → canonical artifact reference; caller should inject this instead of raw content.
909
+ *
910
+ * The sizeTokens reported in the reference is the ACTUAL estimated size so downstream
911
+ * tooling can make informed decisions about whether to fetch.
912
+ */
913
+ export function degradeOversizedDocChunk(chunkId, sourcePath, content, thresholdTokens) {
914
+ const contentTokens = estimateTokens(content);
915
+ if (contentTokens <= thresholdTokens)
916
+ return null;
917
+ const ref = {
918
+ id: chunkId,
919
+ path: sourcePath,
920
+ sizeTokens: contentTokens,
921
+ status: 'degraded',
922
+ reason: 'artifact_oversize',
923
+ fetchHint: 'memory_search or re-read source file',
924
+ };
925
+ return formatArtifactRef(ref);
926
+ }
927
+ /**
928
+ * C2: Resolve oversized artifacts in a history message array.
929
+ *
930
+ * Scans the message array and replaces user/assistant messages whose text content
931
+ * exceeds the model-aware artifact oversize threshold with canonical ArtifactRef
932
+ * strings. System messages, tool-call messages, and tool-result messages are always
933
+ * passed through unchanged.
934
+ *
935
+ * @param messages — neutral message array (already-assembled history window)
936
+ * @param effectiveBudget — effective model budget from B4 (drives the threshold)
937
+ * @returns { messages, refCount, tokensSaved }
938
+ */
939
+ export function resolveOversizedArtifacts(messages, effectiveBudget) {
940
+ const thresholdTokens = resolveArtifactOversizeThreshold(effectiveBudget);
941
+ let refCount = 0;
942
+ let tokensSaved = 0;
943
+ const out = messages.map(msg => {
944
+ // System messages are never degraded (they are in the stable prefix).
945
+ if (msg.role === 'system')
946
+ return msg;
947
+ // Tool content (calls/results) is C1's domain — never touch here.
948
+ if (msg.toolResults || msg.toolCalls)
949
+ return msg;
950
+ const text = msg.textContent ?? '';
951
+ // Already a ref — idempotent; don't re-degrade.
952
+ if (isArtifactRef(text))
953
+ return msg;
954
+ const contentTokens = estimateTokens(text);
955
+ if (contentTokens <= thresholdTokens)
956
+ return msg;
957
+ // Oversized — replace with canonical artifact reference.
958
+ const meta = msg;
959
+ const id = (typeof meta['_artifactId'] === 'string' ? meta['_artifactId'] : null)
960
+ ?? `msg-${createHash('sha1').update(`${msg.role}:${text}`).digest('hex').slice(0, 12)}`;
961
+ const path = (typeof meta['_artifactPath'] === 'string' ? meta['_artifactPath'] : null)
962
+ ?? '/unknown/artifact';
963
+ const ref = {
964
+ id,
965
+ path,
966
+ sizeTokens: contentTokens,
967
+ status: 'degraded',
968
+ reason: 'artifact_oversize',
969
+ fetchHint: 'memory_search',
970
+ };
971
+ const refText = formatArtifactRef(ref);
972
+ const refTokens = estimateTokens(refText);
973
+ tokensSaved += contentTokens - refTokens;
974
+ refCount++;
975
+ return { ...msg, textContent: refText };
976
+ });
977
+ return { messages: out, refCount, tokensSaved };
978
+ }
979
+ /**
980
+ * C1: Centralized tool-chain dependency ejection.
981
+ *
982
+ * Given a set of tool-use message indices that are being ejected from the
983
+ * context window, this function ensures that no orphaned tool-results survive:
984
+ *
985
+ * - For each ejected assistant message carrying toolCalls, collect the set
986
+ * of call IDs being removed.
987
+ * - Walk the remaining messages: if a message's toolResults reference any
988
+ * of those ejected IDs:
989
+ * a) If the message carries ONLY tool-results and no other text, co-eject
990
+ * it (remove it entirely). This is the zero-cost path.
991
+ * b) If the message also carries text content, replace only the dependent
992
+ * toolResults entries with canonical ToolChainStub strings so the
993
+ * message is not silently mutilated.
994
+ *
995
+ * The caller is responsible for removing the ejected messages by index BEFORE
996
+ * or AFTER calling this function; this function operates on the full array and
997
+ * marks the ejected indices for removal, returning the cleaned result.
998
+ *
999
+ * @param messages Full message array (order preserved)
1000
+ * @param ejectIndices Set of indices into `messages` that are being ejected
1001
+ * (these are the tool-use / assistant messages being removed).
1002
+ * @param reason DegradationReason to embed in any canonical stubs.
1003
+ * @returns Cleaned message array + telemetry counters.
1004
+ */
1005
+ export function resolveToolChainEjections(messages, ejectIndices, reason = 'eviction_oversize') {
1006
+ // Collect all tool-call IDs that are being ejected.
1007
+ const ejectedCallIds = new Set();
1008
+ for (const idx of ejectIndices) {
1009
+ const msg = messages[idx];
1010
+ if (!msg)
1011
+ continue;
1012
+ if (msg.toolCalls) {
1013
+ for (const tc of msg.toolCalls) {
1014
+ if (tc.id)
1015
+ ejectedCallIds.add(tc.id);
1016
+ }
1017
+ }
1018
+ }
1019
+ let coEjections = 0;
1020
+ let stubReplacements = 0;
1021
+ // If no call IDs were ejected, nothing to do beyond dropping the ejected messages.
1022
+ if (ejectedCallIds.size === 0) {
1023
+ const result = messages.filter((_, idx) => !ejectIndices.has(idx));
1024
+ return { messages: result, coEjections, stubReplacements };
1025
+ }
1026
+ // Walk all messages and handle dependent tool-results.
1027
+ const transformed = messages.map((msg, idx) => {
1028
+ // Already being ejected — remove.
1029
+ if (ejectIndices.has(idx))
1030
+ return null;
1031
+ if (!msg.toolResults || msg.toolResults.length === 0)
1032
+ return msg;
1033
+ // Determine which results in this message depend on ejected calls.
1034
+ const dependentResultIds = msg.toolResults
1035
+ .map(r => r.callId)
1036
+ .filter((id) => Boolean(id) && ejectedCallIds.has(id));
1037
+ if (dependentResultIds.length === 0)
1038
+ return msg;
1039
+ const dependentSet = new Set(dependentResultIds);
1040
+ // Case (a): The message carries ONLY tool-results and no other text content,
1041
+ // and ALL of its results are dependent on ejected calls.
1042
+ // Co-eject the whole message — zero budget cost, no stub needed.
1043
+ const hasText = Boolean(msg.textContent && msg.textContent.trim().length > 0);
1044
+ const hasNonDependentResults = msg.toolResults.some(r => !dependentSet.has(r.callId));
1045
+ if (!hasText && !hasNonDependentResults) {
1046
+ coEjections++;
1047
+ return null;
1048
+ }
1049
+ // Case (b): Message has text or unrelated results — stub only the dependent entries.
1050
+ const stubbedResults = msg.toolResults.map(result => {
1051
+ if (!result.callId || !dependentSet.has(result.callId))
1052
+ return result;
1053
+ const stubContent = formatToolChainStub({
1054
+ name: result.name || 'tool_result',
1055
+ id: result.callId || 'unknown',
1056
+ status: 'ejected',
1057
+ reason,
1058
+ summary: 'parent tool-use ejected from context window',
1059
+ });
1060
+ stubReplacements++;
1061
+ return { ...result, content: stubContent };
1062
+ });
1063
+ return { ...msg, toolResults: stubbedResults };
1064
+ });
1065
+ const result = transformed.filter((m) => m !== null);
1066
+ return { messages: result, coEjections, stubReplacements };
1067
+ }
711
1068
  /**
712
1069
  * Apply gradient tool treatment to a message array.
713
1070
  *
@@ -812,6 +1169,131 @@ export class Compositor {
812
1169
  get orgRegistry() {
813
1170
  return this._orgRegistry;
814
1171
  }
1172
+ /**
1173
+ * Sprint 2.1: Hydrate tool-artifact stubs in the active turn.
1174
+ *
1175
+ * The active turn is the contiguous trailing block of tool-bearing messages
1176
+ * at the tail of the assembled window (positional, NOT turn_id-based):
1177
+ * - Walk backward from the last message
1178
+ * - Collect tool-bearing messages (toolCalls != null OR toolResults != null)
1179
+ * - Plus the bounding user message that opened the turn
1180
+ * - Stop at the first plain message once at least one tool message was found
1181
+ *
1182
+ * For every toolResult stub with an `artifact=<id>` pointer, look up the
1183
+ * full payload in ToolArtifactStore and replace the stub content in-place.
1184
+ * Uses a single batched `WHERE id IN (...)` lookup (no N+1 queries).
1185
+ * Touches `last_used_at` on every hydrated artifact in a single batch.
1186
+ *
1187
+ * Failure mode: if a lookup returns null (artifact missing), leave the stub
1188
+ * unchanged and increment hydrationMisses.
1189
+ *
1190
+ * Returns diagnostics counters.
1191
+ */
1192
+ hydrateActiveTurnArtifacts(messages, db) {
1193
+ if (messages.length === 0) {
1194
+ return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
1195
+ }
1196
+ const store = new ToolArtifactStore(db);
1197
+ // ── 1. Detect active turn (positional, backward walk) ─────────────────────
1198
+ // Collect indices belonging to the active turn.
1199
+ const activeTurnIndices = [];
1200
+ let foundToolBearing = false;
1201
+ for (let i = messages.length - 1; i >= 0; i--) {
1202
+ const msg = messages[i];
1203
+ const isToolBearing = msg.toolCalls != null || msg.toolResults != null;
1204
+ if (isToolBearing) {
1205
+ foundToolBearing = true;
1206
+ activeTurnIndices.push(i);
1207
+ }
1208
+ else if (foundToolBearing) {
1209
+ // First plain message after at least one tool-bearing message — this
1210
+ // is the bounding user message that opened the turn. Include it and stop.
1211
+ activeTurnIndices.push(i);
1212
+ break;
1213
+ }
1214
+ else {
1215
+ // Haven't found any tool-bearing messages yet — still in non-tool tail
1216
+ // (e.g., the last message is a plain user message). No active turn.
1217
+ break;
1218
+ }
1219
+ }
1220
+ if (activeTurnIndices.length === 0 || !foundToolBearing) {
1221
+ return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
1222
+ }
1223
+ // ── 2. Collect all artifactIds from stub toolResults in the active turn ───
1224
+ // Map: artifactId -> array of [msgIndex, resultIndex] for in-place replacement
1225
+ const artifactTargets = new Map();
1226
+ for (const msgIdx of activeTurnIndices) {
1227
+ const msg = messages[msgIdx];
1228
+ if (!msg.toolResults)
1229
+ continue;
1230
+ for (let resultIdx = 0; resultIdx < msg.toolResults.length; resultIdx++) {
1231
+ const result = msg.toolResults[resultIdx];
1232
+ const stub = parseToolChainStub(result.content);
1233
+ if (stub && stub.artifactId) {
1234
+ const existing = artifactTargets.get(stub.artifactId) ?? [];
1235
+ existing.push({ msgIdx, resultIdx });
1236
+ artifactTargets.set(stub.artifactId, existing);
1237
+ }
1238
+ }
1239
+ }
1240
+ if (artifactTargets.size === 0) {
1241
+ return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
1242
+ }
1243
+ // ── 3. Batch lookup ────────────────────────────────────────────────────────
1244
+ const ids = Array.from(artifactTargets.keys());
1245
+ const placeholders = ids.map(() => '?').join(', ');
1246
+ const rows = db
1247
+ .prepare(`SELECT * FROM tool_artifacts WHERE id IN (${placeholders})`)
1248
+ .all(...ids);
1249
+ // Build id -> payload map
1250
+ const payloadMap = new Map();
1251
+ for (const row of rows) {
1252
+ payloadMap.set(row.id, row.payload);
1253
+ }
1254
+ // ── 4. Hydrate in-place ────────────────────────────────────────────────────
1255
+ let artifactsHydrated = 0;
1256
+ let hydrationBytes = 0;
1257
+ let hydrationMisses = 0;
1258
+ const touchIds = [];
1259
+ for (const [artifactId, targets] of artifactTargets) {
1260
+ const payload = payloadMap.get(artifactId);
1261
+ if (payload == null) {
1262
+ // Graceful miss — stub stays as-is
1263
+ hydrationMisses += targets.length;
1264
+ continue;
1265
+ }
1266
+ for (const { msgIdx, resultIdx } of targets) {
1267
+ const msg = messages[msgIdx];
1268
+ // Safety: if content doesn't look like a stub anymore (defensive idempotency check)
1269
+ const existingContent = msg.toolResults[resultIdx].content;
1270
+ if (!parseToolChainStub(existingContent)) {
1271
+ // Already full content — pass through unchanged
1272
+ continue;
1273
+ }
1274
+ // Replace stub with full payload
1275
+ msg.toolResults[resultIdx] = {
1276
+ ...msg.toolResults[resultIdx],
1277
+ content: payload,
1278
+ };
1279
+ artifactsHydrated++;
1280
+ hydrationBytes += Buffer.byteLength(payload, 'utf8');
1281
+ }
1282
+ touchIds.push(artifactId);
1283
+ }
1284
+ // ── 5. Batch touch last_used_at ───────────────────────────────────────────
1285
+ if (touchIds.length > 0) {
1286
+ const ts = new Date().toISOString();
1287
+ const touchPlaceholders = touchIds.map(() => '?').join(', ');
1288
+ try {
1289
+ db.prepare(`UPDATE tool_artifacts SET last_used_at = ? WHERE id IN (${touchPlaceholders})`).run(ts, ...touchIds);
1290
+ }
1291
+ catch {
1292
+ // Touch is best-effort — hydration still succeeded
1293
+ }
1294
+ }
1295
+ return { artifactsHydrated, hydrationBytes, hydrationMisses };
1296
+ }
815
1297
  /**
816
1298
  * Compose a complete message array for sending to an LLM.
817
1299
  *
@@ -842,6 +1324,11 @@ export class Compositor {
842
1324
  // Particularly effective for low-frequency sessions (heartbeat agents, council
843
1325
  // seats between rounds). TTL on the cache write remains 120s — this is a
844
1326
  // conservative early-exit before the TTL expires, not a TTL extension.
1327
+ //
1328
+ // B2: prevPrefixHash is set when a cached bundle is found but bypassed due to
1329
+ // prefix-input mutation. It is surfaced in the full-compose diagnostics so
1330
+ // callers can confirm the bypass fired correctly.
1331
+ let _prevPrefixHashFromBypass;
845
1332
  if (request.includeHistory !== false && request.skipWindowCache !== true) {
846
1333
  try {
847
1334
  const newestRow = db.prepare('SELECT MAX(id) AS maxId FROM messages WHERE agent_id = ?').get(request.agentId);
@@ -866,7 +1353,28 @@ export class Compositor {
866
1353
  // historyDepth constrains how many messages the caller wants;
867
1354
  // we can't slice a cached bundle safely, so skip cache.
868
1355
  const depthOk = !request.historyDepth;
869
- if (budgetOk && factsOk && libraryOk && contextOk && depthOk) {
1356
+ // B2: Stable-prefix hash check.
1357
+ // If the system/identity slots changed since this cache entry was
1358
+ // written, the stable prefix is stale even if cursor freshness
1359
+ // passes. Compute a cheap input hash from slot contents and compare
1360
+ // against the one stored in the cache meta. If no stored hash exists
1361
+ // (pre-B2 cache entries), fall through to prefix check on the
1362
+ // cached message content itself.
1363
+ let prefixInputOk = true;
1364
+ const _cachedPrefixInputHash = cachedBundle.meta.prefixInputHash;
1365
+ if (_cachedPrefixInputHash) {
1366
+ const _sysSlot = await this.cache.getSlot(request.agentId, request.sessionKey, 'system');
1367
+ const _idSlot = await this.cache.getSlot(request.agentId, request.sessionKey, 'identity');
1368
+ const _incomingInputHash = createHash('sha256')
1369
+ .update(_sysSlot ?? '')
1370
+ .update('\n␞\n')
1371
+ .update(_idSlot ?? '')
1372
+ .digest('hex');
1373
+ if (_incomingInputHash !== _cachedPrefixInputHash) {
1374
+ prefixInputOk = false;
1375
+ }
1376
+ }
1377
+ if (budgetOk && factsOk && libraryOk && contextOk && depthOk && prefixInputOk) {
870
1378
  const cachedSlots = {
871
1379
  system: cachedBundle.meta.slots['system'] ?? 0,
872
1380
  identity: cachedBundle.meta.slots['identity'] ?? 0,
@@ -875,6 +1383,8 @@ export class Compositor {
875
1383
  context: cachedBundle.meta.slots['context'] ?? 0,
876
1384
  library: cachedBundle.meta.slots['library'] ?? 0,
877
1385
  };
1386
+ // Sprint 2.1: hydrate active-turn artifact stubs before converting.
1387
+ const cachedHydration = this.hydrateActiveTurnArtifacts(cachedBundle.messages, db);
878
1388
  return {
879
1389
  messages: toComposeOutputMessages(cachedBundle.messages),
880
1390
  tokenCount: cachedBundle.meta.totalTokens,
@@ -885,10 +1395,17 @@ export class Compositor {
885
1395
  diagnostics: {
886
1396
  ...cachedBundle.meta.diagnostics,
887
1397
  windowCacheHit: true,
1398
+ // Carry forward the stored prefixHash so callers can observe it.
1399
+ prefixHash: cachedBundle.meta.prefixHash ?? cachedBundle.meta.diagnostics.prefixHash,
1400
+ artifactsHydrated: cachedHydration.artifactsHydrated > 0 ? cachedHydration.artifactsHydrated : undefined,
1401
+ hydrationBytes: cachedHydration.hydrationBytes > 0 ? cachedHydration.hydrationBytes : undefined,
1402
+ hydrationMisses: cachedHydration.hydrationMisses > 0 ? cachedHydration.hydrationMisses : undefined,
888
1403
  },
889
1404
  };
890
1405
  }
891
- // Incompatible request — fall through to full compose
1406
+ // Incompatible request — fall through to full compose.
1407
+ // Surface prevPrefixHash so the full compose diagnostics can report it.
1408
+ _prevPrefixHashFromBypass = cachedBundle.meta.prefixHash ?? cachedBundle.meta.diagnostics.prefixHash;
892
1409
  }
893
1410
  }
894
1411
  }
@@ -907,6 +1424,36 @@ export class Compositor {
907
1424
  : [];
908
1425
  const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
909
1426
  const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve, this.config.budgetFraction);
1427
+ // B4: Model-aware lane budgets.
1428
+ // Resolve historyFraction and memoryFraction by blending config values toward
1429
+ // model-preferred fractions when the effective budget approaches the MECW ceiling.
1430
+ // This ensures the compositor doesn't allocate more history than the model can
1431
+ // reliably reason over, and adjusts the memory pool proportionally.
1432
+ const _b4ConfigHistoryFraction = this.config.historyFraction ?? 0.40;
1433
+ const _b4ConfigMemoryFraction = this.config.memoryFraction ?? 0.40;
1434
+ const { historyFraction: b4HistoryFraction, memoryFraction: b4MemoryFraction, mecwProfile: b4MecwProfile, mecwApplied: b4MecwApplied, mecwBlend: b4MecwBlend, } = resolveModelLaneBudgets(request.model, budget, _b4ConfigHistoryFraction, _b4ConfigMemoryFraction);
1435
+ // C2: Compute the artifact oversize threshold once per compose pass from the
1436
+ // effective model budget (from B4). Chunk injection paths consult this threshold
1437
+ // to degrade retrieved payloads that would fill the lane instead of injecting them.
1438
+ const c2ArtifactThresholdTokens = resolveArtifactOversizeThreshold(budget);
1439
+ let c2ArtifactDegradations = 0;
1440
+ // Sprint 4: Pre-compose history depth tightening.
1441
+ // Classify the session and compute an adaptive depth from observed message
1442
+ // density. This replaces the old fixed maxHistoryMessages ceiling that over-
1443
+ // fed the compositor for tool-heavy sessions.
1444
+ //
1445
+ // If the caller already passed historyDepth (plugin assemble path), honour it
1446
+ // as an explicit cap — the adaptive depth still applies as a lower bound so
1447
+ // we never request more than the budget can absorb.
1448
+ const s4SessionType = classifySessionType(sampleMessages);
1449
+ const s4ObservedDensity = estimateObservedMsgDensity(sampleMessages);
1450
+ const s4HistoryBudget = Math.floor(budget * b4HistoryFraction);
1451
+ const s4AdaptiveDepth = computeAdaptiveHistoryDepth(s4SessionType, s4ObservedDensity, s4HistoryBudget, this.config.maxHistoryMessages);
1452
+ // Effective depth: caller-provided historyDepth overrides adaptive when it is
1453
+ // the tighter constraint; otherwise use the adaptive depth.
1454
+ const s4EffectiveDepth = request.historyDepth
1455
+ ? Math.min(request.historyDepth, s4AdaptiveDepth)
1456
+ : s4AdaptiveDepth;
910
1457
  let remaining = budget;
911
1458
  // Phase 0 fence enforcement: resolve the compaction fence for this conversation.
912
1459
  // All downstream message queries use this as a lower bound to exclude zombie
@@ -1005,6 +1552,11 @@ export class Compositor {
1005
1552
  }
1006
1553
  // ─── Conversation History ──────────────────────────────────
1007
1554
  let diagCrossTopicKeystones = 0;
1555
+ // Sprint 4: hoisted so diagnostics block can read it regardless of includeHistory branch.
1556
+ let s4RescueTrimFired = false;
1557
+ // C1: total tool-chain degradation counters across history budget-fit and safety-valve passes.
1558
+ let c1CoEjections = 0;
1559
+ let c1StubReplacements = 0;
1008
1560
  // Hoisted: activeTopicId/name resolved inside history block, used for window dual-write (VS-1) and wiki page injection
1009
1561
  let composedActiveTopicId;
1010
1562
  let composedActiveTopicName;
@@ -1049,7 +1601,8 @@ export class Compositor {
1049
1601
  // Hoist resolved topic id+name so the window dual-write and wiki injection sections can access them
1050
1602
  composedActiveTopicId = activeTopicId;
1051
1603
  composedActiveTopicName = activeTopic?.name;
1052
- const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, request.historyDepth || this.config.maxHistoryMessages, store, activeTopicId, fenceMessageId, activeContext);
1604
+ const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, s4EffectiveDepth, // Sprint 4: adaptive depth (replaces fixed maxHistoryMessages)
1605
+ store, activeTopicId, fenceMessageId, activeContext);
1053
1606
  // Deduplicate history by StoredMessage.id (second line of defense after
1054
1607
  // pushHistory() tail-check dedup). Guards against any duplicates that
1055
1608
  // slipped through the warm path — e.g. bootstrap re-runs on existing sessions.
@@ -1073,26 +1626,38 @@ export class Compositor {
1073
1626
  // Replace oversized stale results with stubs so they don't burn budget.
1074
1627
  // Current-turn results (turn age 0) are never evicted.
1075
1628
  const evictedHistory = evictLargeToolResults(transformedHistory);
1629
+ const c2ResolvedHistory = resolveOversizedArtifacts(evictedHistory, budget);
1630
+ c2ArtifactDegradations += c2ResolvedHistory.refCount;
1076
1631
  // ── Budget-fit: walk newest→oldest, drop whole clusters ─────────────
1077
1632
  // Group tool_use + tool_result messages into clusters so they are kept
1078
1633
  // or dropped as a unit. Breaking mid-cluster creates orphaned tool
1079
1634
  // pairs that repairToolPairs has to strip downstream — wasting budget
1080
1635
  // and leaving gaps in conversation continuity.
1081
- const budgetClusters = clusterNeutralMessages(evictedHistory);
1636
+ const budgetClusters = clusterNeutralMessages(c2ResolvedHistory.messages);
1082
1637
  let historyTokens = 0;
1083
1638
  const includedClusters = [];
1084
1639
  // Pre-allocate history budget. historyFraction is a fraction of the
1085
1640
  // effective token budget (post-reserve). Falls back to unbounded fill
1086
1641
  // (remaining) when historyFraction is not set.
1087
- const historyBudget = this.config.historyFraction != null
1088
- ? Math.floor(budget * this.config.historyFraction)
1089
- : remaining;
1642
+ // B4: uses b4HistoryFraction (model-aware, blended from MECW catalog) instead
1643
+ // of raw config.historyFraction so history doesn't overflow MECW ceiling.
1644
+ const historyBudget = Math.floor(budget * b4HistoryFraction);
1090
1645
  const historyFillCap = Math.min(historyBudget, remaining);
1091
1646
  for (let i = budgetClusters.length - 1; i >= 0; i--) {
1092
1647
  const cluster = budgetClusters[i];
1093
1648
  if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
1094
- const droppedMsgCount = budgetClusters.slice(0, i + 1).reduce((s, c) => s + c.messages.length, 0);
1095
- warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)`);
1649
+ const droppedClusters = budgetClusters.slice(0, i + 1);
1650
+ const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
1651
+ const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
1652
+ if (droppedToolResultCount > 0) {
1653
+ c1CoEjections += droppedToolResultCount;
1654
+ console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
1655
+ }
1656
+ const c1Note = droppedToolResultCount > 0
1657
+ ? ` [C1: ${droppedToolResultCount} co-ejected reason=budget_cluster_drop]`
1658
+ : '';
1659
+ warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)${c1Note}`);
1660
+ s4RescueTrimFired = true;
1096
1661
  break;
1097
1662
  }
1098
1663
  includedClusters.unshift(cluster);
@@ -1204,18 +1769,12 @@ export class Compositor {
1204
1769
  }
1205
1770
  // Memory budget pool: facts, wiki, semantic recall, cross-session, and
1206
1771
  // trigger-fired doc chunks all draw from this shared pool via `remaining`.
1207
- // memoryFraction is a fraction of the effective token budget (post-reserve).
1208
- // Falls back to targetBudgetFraction cap behavior when memoryFraction is not set.
1772
+ // B4: uses b4MemoryFraction (model-aware, blended from MECW catalog) instead
1773
+ // of raw config.memoryFraction so the memory pool scales with what the model
1774
+ // can effectively attend to within its MECW ceiling.
1209
1775
  let memoryBudget;
1210
- if (this.config.memoryFraction != null) {
1211
- memoryBudget = Math.floor(budget * this.config.memoryFraction);
1212
- if (remaining > memoryBudget) {
1213
- remaining = memoryBudget;
1214
- }
1215
- }
1216
- else {
1217
- const targetFraction = this.config.targetBudgetFraction ?? 0.65;
1218
- memoryBudget = Math.floor(budget * targetFraction);
1776
+ {
1777
+ memoryBudget = Math.floor(budget * b4MemoryFraction);
1219
1778
  if (remaining > memoryBudget) {
1220
1779
  remaining = memoryBudget;
1221
1780
  }
@@ -1246,11 +1805,12 @@ export class Compositor {
1246
1805
  }
1247
1806
  }
1248
1807
  }
1249
- // ─── Injected Context Block ────────────────────────────────
1250
- // Facts, knowledge, preferences, semantic recall, and cross-session
1251
- // context are assembled into a single system message injected before
1252
- // conversation history (after system/identity).
1253
- const contextParts = [];
1808
+ // ─── Cache-ordered context assembly ─────────────────────────
1809
+ // Stable, reusable material is lifted above the cache boundary as its
1810
+ // own system messages. Session-volatile material stays in the dynamic
1811
+ // context block below that boundary.
1812
+ const stablePrefixMessages = [];
1813
+ const volatileContextParts = [];
1254
1814
  let contextTokens = 0;
1255
1815
  // ── C1: Content fingerprint dedup set ────────────────────
1256
1816
  // Replaces fragile substring-match dedup across temporal, open-domain,
@@ -1301,7 +1861,7 @@ export class Compositor {
1301
1861
  if (wikiContent) {
1302
1862
  const tokens = estimateTokens(wikiContent);
1303
1863
  if (tokens <= remaining) {
1304
- contextParts.push(wikiContent);
1864
+ volatileContextParts.push(wikiContent);
1305
1865
  contextTokens += tokens;
1306
1866
  remaining -= tokens;
1307
1867
  slots.library += tokens;
@@ -1309,7 +1869,7 @@ export class Compositor {
1309
1869
  else if (remaining > 200) {
1310
1870
  const truncated = this.truncateToTokens(wikiContent, remaining);
1311
1871
  const truncTokens = estimateTokens(truncated);
1312
- contextParts.push(truncated);
1872
+ volatileContextParts.push(truncated);
1313
1873
  contextTokens += truncTokens;
1314
1874
  remaining -= truncTokens;
1315
1875
  slots.library += truncTokens;
@@ -1320,34 +1880,65 @@ export class Compositor {
1320
1880
  // scope: agent — filtered by agentId via filterByScope after fetch
1321
1881
  // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
1322
1882
  if (request.includeFacts !== false && remaining > 500) {
1323
- const factsContent = this.buildFactsFromDb(request.agentId, request.sessionKey, libDb || db);
1324
- if (factsContent !== null) {
1325
- const [content, factCount, scopeFiltered] = factsContent;
1326
- diagFactsIncluded += factCount;
1327
- diagScopeFiltered += scopeFiltered;
1328
- if (content) {
1329
- const tokens = estimateTokens(content);
1883
+ const factSections = this.buildFactSectionsFromDb(request.agentId, request.sessionKey, libDb || db);
1884
+ if (factSections !== null) {
1885
+ const { stableContent, stableCount, volatileContent, volatileCount, filteredCount } = factSections;
1886
+ diagFactsIncluded += stableCount + volatileCount;
1887
+ diagScopeFiltered += filteredCount;
1888
+ if (stableContent) {
1889
+ const stableFactsBlock = `## Stable Facts\n${stableContent}`;
1890
+ const tokens = estimateTokens(stableFactsBlock);
1330
1891
  if (tokens <= remaining) {
1331
- contextParts.push(`## Active Facts\n${content}`);
1892
+ stablePrefixMessages.push({
1893
+ role: 'system',
1894
+ textContent: stableFactsBlock,
1895
+ toolCalls: null,
1896
+ toolResults: null,
1897
+ });
1332
1898
  contextTokens += tokens;
1333
1899
  remaining -= tokens;
1334
- slots.facts = tokens;
1900
+ slots.facts += tokens;
1335
1901
  }
1336
1902
  else if (remaining > 200) {
1337
- const truncated = this.truncateToTokens(content, remaining);
1903
+ const truncated = this.truncateToTokens(stableFactsBlock, remaining);
1338
1904
  const truncTokens = estimateTokens(truncated);
1339
- contextParts.push(`## Active Facts (truncated)\n${truncated}`);
1905
+ stablePrefixMessages.push({
1906
+ role: 'system',
1907
+ textContent: truncated,
1908
+ toolCalls: null,
1909
+ toolResults: null,
1910
+ });
1340
1911
  contextTokens += truncTokens;
1341
1912
  remaining -= truncTokens;
1342
- slots.facts = truncTokens;
1343
- warnings.push('Facts truncated to fit memory budget');
1913
+ slots.facts += truncTokens;
1914
+ warnings.push('Stable facts truncated to fit memory budget');
1344
1915
  }
1345
- // C1: Fingerprint each fact line so downstream dedup paths can skip duplicates
1346
- const factLines = content.split('\n');
1347
- for (const line of factLines) {
1348
- if (line.startsWith('- [')) {
1916
+ for (const line of stableContent.split('\n')) {
1917
+ if (line.startsWith('- ['))
1918
+ addFingerprint(line);
1919
+ }
1920
+ }
1921
+ if (volatileContent) {
1922
+ const volatileFactsBlock = `## Active Facts\n${volatileContent}`;
1923
+ const tokens = estimateTokens(volatileFactsBlock);
1924
+ if (tokens <= remaining) {
1925
+ volatileContextParts.push(volatileFactsBlock);
1926
+ contextTokens += tokens;
1927
+ remaining -= tokens;
1928
+ slots.facts += tokens;
1929
+ }
1930
+ else if (remaining > 200) {
1931
+ const truncated = this.truncateToTokens(volatileFactsBlock, remaining);
1932
+ const truncTokens = estimateTokens(truncated);
1933
+ volatileContextParts.push(truncated);
1934
+ contextTokens += truncTokens;
1935
+ remaining -= truncTokens;
1936
+ slots.facts += truncTokens;
1937
+ warnings.push('Active facts truncated to fit memory budget');
1938
+ }
1939
+ for (const line of volatileContent.split('\n')) {
1940
+ if (line.startsWith('- ['))
1349
1941
  addFingerprint(line);
1350
- }
1351
1942
  }
1352
1943
  }
1353
1944
  }
@@ -1365,7 +1956,6 @@ export class Compositor {
1365
1956
  order: 'DESC',
1366
1957
  });
1367
1958
  if (temporalFacts.length > 0) {
1368
- // C1: Use fingerprint dedup instead of fragile substring match
1369
1959
  const beforeCount = temporalFacts.length;
1370
1960
  const novel = temporalFacts.filter(f => !isDuplicate(f.content));
1371
1961
  diagFingerprintDedups += beforeCount - novel.length;
@@ -1380,9 +1970,9 @@ export class Compositor {
1380
1970
  .join('\n');
1381
1971
  const temporalSection = `## Temporal Context\n${temporalBlock}`;
1382
1972
  const tempTokens = estimateTokens(temporalSection);
1383
- const tempBudget = Math.floor(remaining * 0.20); // Cap at 20% of remaining
1973
+ const tempBudget = Math.floor(remaining * 0.20);
1384
1974
  if (tempTokens <= tempBudget) {
1385
- contextParts.push(temporalSection);
1975
+ volatileContextParts.push(temporalSection);
1386
1976
  contextTokens += tempTokens;
1387
1977
  remaining -= tempTokens;
1388
1978
  slots.facts = (slots.facts ?? 0) + tempTokens;
@@ -1390,7 +1980,7 @@ export class Compositor {
1390
1980
  else {
1391
1981
  const truncated = this.truncateToTokens(temporalSection, tempBudget);
1392
1982
  const truncTokens = estimateTokens(truncated);
1393
- contextParts.push(truncated);
1983
+ volatileContextParts.push(truncated);
1394
1984
  contextTokens += truncTokens;
1395
1985
  remaining -= truncTokens;
1396
1986
  slots.facts = (slots.facts ?? 0) + truncTokens;
@@ -1409,8 +1999,6 @@ export class Compositor {
1409
1999
  // questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
1410
2000
  if (request.includeSemanticRecall !== false && queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
1411
2001
  try {
1412
- // searchOpenDomain still does intra-result dedup. Existing-context dedup
1413
- // now happens here via fingerprints so we keep one dedup path.
1414
2002
  const rawOdResults = searchOpenDomain(db, queryText, '', 10);
1415
2003
  const beforeOd = rawOdResults.length;
1416
2004
  const odResults = rawOdResults.filter(r => !isDuplicate(r.content));
@@ -1431,9 +2019,9 @@ export class Compositor {
1431
2019
  .join('\n');
1432
2020
  const odSection = `## Open Domain Context\n${odBlock}`;
1433
2021
  const odTokens = estimateTokens(odSection);
1434
- const odBudget = Math.floor(remaining * 0.20); // Cap at 20% of remaining
2022
+ const odBudget = Math.floor(remaining * 0.20);
1435
2023
  if (odTokens <= odBudget) {
1436
- contextParts.push(odSection);
2024
+ volatileContextParts.push(odSection);
1437
2025
  contextTokens += odTokens;
1438
2026
  remaining -= odTokens;
1439
2027
  slots.facts = (slots.facts ?? 0) + odTokens;
@@ -1441,7 +2029,7 @@ export class Compositor {
1441
2029
  else {
1442
2030
  const truncated = this.truncateToTokens(odSection, odBudget);
1443
2031
  const truncTokens = estimateTokens(truncated);
1444
- contextParts.push(truncated);
2032
+ volatileContextParts.push(truncated);
1445
2033
  contextTokens += truncTokens;
1446
2034
  remaining -= truncTokens;
1447
2035
  slots.facts = (slots.facts ?? 0) + truncTokens;
@@ -1458,17 +2046,28 @@ export class Compositor {
1458
2046
  if (request.includeLibrary !== false && remaining > 500 && libDb) {
1459
2047
  const knowledgeContent = this.buildKnowledgeFromDb(request.agentId, libDb);
1460
2048
  if (knowledgeContent) {
1461
- const tokens = estimateTokens(knowledgeContent);
1462
- if (tokens <= remaining * 0.2) { // Cap knowledge at 20% of remaining
1463
- contextParts.push(`## Knowledge\n${knowledgeContent}`);
2049
+ const stableKnowledgeBlock = `## Knowledge\n${knowledgeContent}`;
2050
+ const tokens = estimateTokens(stableKnowledgeBlock);
2051
+ if (tokens <= remaining * 0.2) {
2052
+ stablePrefixMessages.push({
2053
+ role: 'system',
2054
+ textContent: stableKnowledgeBlock,
2055
+ toolCalls: null,
2056
+ toolResults: null,
2057
+ });
1464
2058
  contextTokens += tokens;
1465
2059
  remaining -= tokens;
1466
2060
  slots.library += tokens;
1467
2061
  }
1468
2062
  else {
1469
- const truncated = this.truncateToTokens(knowledgeContent, Math.floor(remaining * 0.2));
2063
+ const truncated = this.truncateToTokens(stableKnowledgeBlock, Math.floor(remaining * 0.2));
1470
2064
  const truncTokens = estimateTokens(truncated);
1471
- contextParts.push(`## Knowledge (truncated)\n${truncated}`);
2065
+ stablePrefixMessages.push({
2066
+ role: 'system',
2067
+ textContent: truncated,
2068
+ toolCalls: null,
2069
+ toolResults: null,
2070
+ });
1472
2071
  contextTokens += truncTokens;
1473
2072
  remaining -= truncTokens;
1474
2073
  slots.library += truncTokens;
@@ -1481,9 +2080,15 @@ export class Compositor {
1481
2080
  if (request.includeLibrary !== false && remaining > 300 && libDb) {
1482
2081
  const prefsContent = this.buildPreferencesFromDb(request.agentId, libDb);
1483
2082
  if (prefsContent) {
1484
- const tokens = estimateTokens(prefsContent);
1485
- if (tokens <= remaining * 0.1) { // Cap preferences at 10% of remaining
1486
- contextParts.push(`## User Preferences\n${prefsContent}`);
2083
+ const stablePrefsBlock = `## User Preferences\n${prefsContent}`;
2084
+ const tokens = estimateTokens(stablePrefsBlock);
2085
+ if (tokens <= remaining * 0.1) {
2086
+ stablePrefixMessages.push({
2087
+ role: 'system',
2088
+ textContent: stablePrefsBlock,
2089
+ toolCalls: null,
2090
+ toolResults: null,
2091
+ });
1487
2092
  contextTokens += tokens;
1488
2093
  remaining -= tokens;
1489
2094
  slots.library += tokens;
@@ -1518,7 +2123,7 @@ export class Compositor {
1518
2123
  );
1519
2124
  if (semanticContent) {
1520
2125
  const tokens = estimateTokens(semanticContent);
1521
- contextParts.push(`## Related Memory\n${semanticContent}`);
2126
+ volatileContextParts.push(`## Related Memory\n${semanticContent}`);
1522
2127
  contextTokens += tokens;
1523
2128
  remaining -= tokens;
1524
2129
  // Semantic recall draws from multiple sources, attribute to context
@@ -1614,14 +2219,24 @@ export class Compositor {
1614
2219
  const chunkLines = [];
1615
2220
  let chunkTokens = 0;
1616
2221
  for (const chunk of chunks) {
1617
- if (chunkTokens + chunk.tokenEstimate > maxTokens)
1618
- break;
1619
2222
  // Skip chunks from files OpenClaw already injects into the system prompt
1620
2223
  const chunkBasename = chunk.sourcePath.split('/').pop() || '';
1621
2224
  if (OPENCLAW_BOOTSTRAP_FILES.has(chunkBasename))
1622
2225
  continue;
1623
- chunkLines.push(`### ${chunk.sectionPath}\n${chunk.content}`);
1624
- chunkTokens += chunk.tokenEstimate;
2226
+ // C2: degrade oversized chunks to canonical artifact references before
2227
+ // enforcing the per-collection budget gate. Otherwise an oversized raw
2228
+ // chunk gets dropped before the tiny degraded ref ever has a chance to fit.
2229
+ const c2ChunkRef = degradeOversizedDocChunk(chunk.id, chunk.sourcePath, chunk.content, c2ArtifactThresholdTokens);
2230
+ const renderedChunk = c2ChunkRef !== null
2231
+ ? `### ${chunk.sectionPath}\n${c2ChunkRef}`
2232
+ : `### ${chunk.sectionPath}\n${chunk.content}`;
2233
+ const renderedTokens = estimateTokens(renderedChunk);
2234
+ if (chunkTokens + renderedTokens > maxTokens)
2235
+ break;
2236
+ chunkLines.push(renderedChunk);
2237
+ chunkTokens += renderedTokens;
2238
+ if (c2ChunkRef !== null)
2239
+ c2ArtifactDegradations++;
1625
2240
  }
1626
2241
  if (chunkLines.length > 0) {
1627
2242
  const collectionLabel = trigger.collection.split('/').pop() || trigger.collection;
@@ -1638,7 +2253,7 @@ export class Compositor {
1638
2253
  }
1639
2254
  }
1640
2255
  if (docParts.length > 0) {
1641
- contextParts.push(docParts.join('\n\n'));
2256
+ volatileContextParts.push(docParts.join('\n\n'));
1642
2257
  }
1643
2258
  }
1644
2259
  else if (remaining > 400 && (this.vectorStore || libDb)) {
@@ -1653,7 +2268,7 @@ export class Compositor {
1653
2268
  new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
1654
2269
  ]);
1655
2270
  if (fallbackContent) {
1656
- contextParts.push(`## Related Memory\n${fallbackContent}`);
2271
+ volatileContextParts.push(`## Related Memory\n${fallbackContent}`);
1657
2272
  const fallbackTokens = estimateTokens(fallbackContent);
1658
2273
  contextTokens += fallbackTokens;
1659
2274
  remaining -= fallbackTokens;
@@ -1681,13 +2296,20 @@ export class Compositor {
1681
2296
  let spawnTokens = 0;
1682
2297
  const maxSpawnTokens = Math.floor(remaining * 0.15);
1683
2298
  for (const chunk of spawnChunks) {
1684
- if (spawnTokens + chunk.tokenEstimate > maxSpawnTokens)
2299
+ // C2: degrade oversized spawn chunks before enforcing the lane budget,
2300
+ // so a bounded reference can fit even when the raw chunk cannot.
2301
+ const c2SpawnRef = degradeOversizedDocChunk(chunk.id, chunk.sourcePath, chunk.content, c2ArtifactThresholdTokens);
2302
+ const renderedChunk = c2SpawnRef ?? chunk.content;
2303
+ const renderedTokens = estimateTokens(renderedChunk);
2304
+ if (spawnTokens + renderedTokens > maxSpawnTokens)
1685
2305
  break;
1686
- spawnLines.push(chunk.content);
1687
- spawnTokens += chunk.tokenEstimate;
2306
+ spawnLines.push(renderedChunk);
2307
+ spawnTokens += renderedTokens;
2308
+ if (c2SpawnRef !== null)
2309
+ c2ArtifactDegradations++;
1688
2310
  }
1689
2311
  if (spawnLines.length > 0) {
1690
- contextParts.push(`## Spawn Context Documents\n${spawnLines.join('\n\n')}`);
2312
+ volatileContextParts.push(`## Spawn Context Documents\n${spawnLines.join('\n\n')}`);
1691
2313
  contextTokens += spawnTokens;
1692
2314
  remaining -= spawnTokens;
1693
2315
  slots.library += spawnTokens;
@@ -1706,7 +2328,7 @@ export class Compositor {
1706
2328
  const tokens = estimateTokens(crossSessionContent);
1707
2329
  const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
1708
2330
  if (tokens <= maxContextTokens) {
1709
- contextParts.push(`## Other Active Sessions\n${crossSessionContent}`);
2331
+ volatileContextParts.push(`## Other Active Sessions\n${crossSessionContent}`);
1710
2332
  contextTokens += tokens;
1711
2333
  remaining -= tokens;
1712
2334
  slots.context += tokens;
@@ -1714,7 +2336,7 @@ export class Compositor {
1714
2336
  else {
1715
2337
  const truncated = this.truncateToTokens(crossSessionContent, maxContextTokens);
1716
2338
  const truncTokens = estimateTokens(truncated);
1717
- contextParts.push(`## Other Active Sessions (truncated)\n${truncated}`);
2339
+ volatileContextParts.push(`## Other Active Sessions (truncated)\n${truncated}`);
1718
2340
  contextTokens += truncTokens;
1719
2341
  remaining -= truncTokens;
1720
2342
  slots.context += truncTokens;
@@ -1730,68 +2352,92 @@ export class Compositor {
1730
2352
  if (actionSummary) {
1731
2353
  const actionTokens = Math.ceil(actionSummary.length / 4);
1732
2354
  if (actionTokens <= remaining) {
1733
- contextParts.push(actionSummary);
2355
+ volatileContextParts.push(actionSummary);
1734
2356
  contextTokens += actionTokens;
1735
2357
  remaining -= actionTokens;
1736
2358
  slots.context += actionTokens;
1737
2359
  }
1738
2360
  }
1739
2361
  }
2362
+ const firstNonSystem = messages.findIndex(m => m.role !== 'system');
2363
+ const stableInsertIdx = firstNonSystem === -1 ? messages.length : firstNonSystem;
2364
+ if (stablePrefixMessages.length > 0) {
2365
+ messages.splice(stableInsertIdx, 0, ...stablePrefixMessages);
2366
+ }
1740
2367
  // ── Inject assembled context block ──────────────────────
1741
- const assembledContextBlock = contextParts.length > 0 ? contextParts.join('\n\n') : undefined;
2368
+ const assembledContextBlock = volatileContextParts.length > 0 ? volatileContextParts.join('\n\n') : undefined;
1742
2369
  if (assembledContextBlock) {
1743
2370
  const contextMsg = {
1744
2371
  role: 'system',
1745
2372
  textContent: assembledContextBlock,
1746
2373
  toolCalls: null,
1747
2374
  toolResults: null,
1748
- // DYNAMIC_BOUNDARY: this slot is session-specific (facts, recall, episodes).
1749
- // It must NOT be included in any prompt caching boundary that spans static content.
1750
- // The provider translator will insert a cache_control ephemeral marker BEFORE
1751
- // this message so providers can cache everything up to identity/system as static context.
1752
- metadata: { dynamicBoundary: true },
2375
+ // CACHE_PREFIX_BOUNDARY_SLOT: this message starts the volatile side of the
2376
+ // prompt. Everything above it is stable-prefix material eligible for reuse;
2377
+ // everything at or below it is per-session / per-turn context.
2378
+ metadata: { dynamicBoundary: true, cacheBoundarySlot: CACHE_PREFIX_BOUNDARY_SLOT },
1753
2379
  };
1754
- // Insert after system/identity, before history
1755
- // Insert context after all system/identity messages, before conversation history.
1756
- // findIndex returns -1 when all messages are system-role — handle explicitly.
1757
- const firstNonSystem = messages.findIndex(m => m.role !== 'system');
1758
- const insertIdx = firstNonSystem === -1 ? messages.length : firstNonSystem;
1759
- messages.splice(insertIdx, 0, contextMsg);
1760
- }
1761
- // ─── Safety Valve: Post-Assembly Budget Check ───────────────────
2380
+ messages.splice(stableInsertIdx + stablePrefixMessages.length, 0, contextMsg);
2381
+ }
2382
+ const stablePrefix = getStablePrefixMessages(messages);
2383
+ const prefixSegmentCount = stablePrefix.length;
2384
+ const prefixTokens = stablePrefix.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2385
+ const volatileHistoryTokens = messages.slice(prefixSegmentCount)
2386
+ .reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2387
+ const prefixHash = computeStablePrefixHash(stablePrefix);
2388
+ // ─── Safety Valve: Post-Assembly Budget Check (C1-aware) ──────────────
1762
2389
  // Re-estimate total tokens after all slots are assembled. If the
1763
2390
  // composition exceeds tokenBudget * 1.05 (5% tolerance for estimation
1764
2391
  // drift), trim history messages from the oldest until we're under budget.
1765
2392
  // History is the most compressible slot — system/identity are never
1766
2393
  // truncated, and context (facts/recall/episodes) is more valuable per-token.
2394
+ //
2395
+ // C1: When an assistant message with toolCalls is ejected, its dependent
2396
+ // tool-result messages are co-ejected or stubbed via resolveToolChainEjections.
2397
+ // This ensures no orphaned tool-results survive above the stable-prefix
2398
+ // boundary and eliminates the downstream repairToolPairs cleanup cost.
1767
2399
  const estimatedTotal = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
1768
2400
  const hardCeiling = Math.floor(budget * 1.05);
1769
2401
  if (estimatedTotal > hardCeiling) {
1770
2402
  const overage = estimatedTotal - budget;
1771
2403
  let trimmed = 0;
1772
2404
  let trimCount = 0;
1773
- // Find history messages (non-system, after system/identity block)
1774
- // Walk forward from the first non-system message, trimming oldest history first
2405
+ // Collect indices of messages to eject before mutating the array.
2406
+ // Walk forward from the first non-system message, trimming oldest first.
1775
2407
  const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
2408
+ const ejectIndices = new Set();
1776
2409
  if (firstNonSystemIdx >= 0) {
1777
2410
  let i = firstNonSystemIdx;
1778
2411
  while (i < messages.length && trimmed < overage) {
1779
- // Don't trim the last user message (current prompt)
2412
+ // Don't trim the last user message (current prompt).
1780
2413
  if (i === messages.length - 1 && messages[i].role === 'user')
1781
2414
  break;
1782
2415
  const msgTokens = estimateMessageTokens(messages[i]);
1783
- messages.splice(i, 1);
2416
+ ejectIndices.add(i);
1784
2417
  trimmed += msgTokens;
1785
2418
  trimCount++;
1786
- // Don't increment i — splice shifts everything down
2419
+ i++;
1787
2420
  }
1788
2421
  }
1789
- if (trimCount > 0) {
2422
+ if (ejectIndices.size > 0) {
2423
+ // C1: centralized ejection — resolves dependent tool-results atomically.
2424
+ const ejectionResult = resolveToolChainEjections(messages, ejectIndices, 'eviction_oversize');
2425
+ // Replace in-place so the rest of the compose path sees the clean array.
2426
+ messages.length = 0;
2427
+ messages.push(...ejectionResult.messages);
2428
+ c1CoEjections += ejectionResult.coEjections;
2429
+ c1StubReplacements += ejectionResult.stubReplacements;
1790
2430
  slots.history = Math.max(0, slots.history - trimmed);
1791
2431
  remaining += trimmed;
1792
- warnings.push(`Safety valve: trimmed ${trimCount} oldest history messages (${trimmed} tokens) to fit budget`);
2432
+ const c1Note = (ejectionResult.coEjections + ejectionResult.stubReplacements > 0)
2433
+ ? ` [C1: ${ejectionResult.coEjections} co-ejected, ${ejectionResult.stubReplacements} stubbed]`
2434
+ : '';
2435
+ warnings.push(`Safety valve: trimmed ${trimCount} oldest history messages (${trimmed} tokens) to fit budget${c1Note}`);
1793
2436
  }
1794
2437
  }
2438
+ // ─── Sprint 2.1: Hydrate active-turn artifact stubs ────────────────────
2439
+ // Must run on NeutralMessages[] BEFORE provider translation.
2440
+ const hydrationResult = this.hydrateActiveTurnArtifacts(messages, db);
1795
2441
  // ─── Translate to provider format (unless caller wants neutral) ───
1796
2442
  // When skipProviderTranslation is set, return NeutralMessages directly.
1797
2443
  // The context engine plugin uses this: the OpenClaw runtime handles its
@@ -1861,7 +2507,7 @@ export class Compositor {
1861
2507
  }
1862
2508
  // W3: Build compose diagnostics
1863
2509
  let zeroResultReason;
1864
- if (contextParts.length === 0) {
2510
+ if (volatileContextParts.length === 0 && stablePrefixMessages.length === 0) {
1865
2511
  if (diagScopeFiltered > 0 && diagFactsIncluded === 0 && diagSemanticResults === 0) {
1866
2512
  zeroResultReason = 'scope_filtered_all';
1867
2513
  }
@@ -1897,6 +2543,37 @@ export class Compositor {
1897
2543
  fingerprintDedups: diagFingerprintDedups,
1898
2544
  fingerprintCollisions: diagFingerprintCollisions,
1899
2545
  windowCacheHit: false,
2546
+ prefixSegmentCount,
2547
+ prefixTokens,
2548
+ prefixHash,
2549
+ // B2: Surface the previous cached prefixHash when this full compose was
2550
+ // triggered by a cache bypass (stable-prefix mutation detected).
2551
+ prevPrefixHash: _prevPrefixHashFromBypass,
2552
+ volatileHistoryTokens,
2553
+ // Sprint 4 fields
2554
+ sessionType: s4SessionType,
2555
+ historyDepthChosen: s4EffectiveDepth,
2556
+ estimatedMsgDensityTokens: s4ObservedDensity,
2557
+ rescueTrimFired: s4RescueTrimFired,
2558
+ // B4: model-aware lane budget diagnostics
2559
+ mecwProfile: b4MecwProfile,
2560
+ mecwApplied: b4MecwApplied,
2561
+ mecwBlend: b4MecwBlend,
2562
+ effectiveHistoryFraction: b4HistoryFraction,
2563
+ effectiveMemoryFraction: b4MemoryFraction,
2564
+ trimSoftTarget: TRIM_BUDGET_POLICY.trimSoftTarget,
2565
+ trimGrowthThreshold: TRIM_BUDGET_POLICY.trimGrowthThreshold,
2566
+ trimHeadroomFraction: TRIM_BUDGET_POLICY.trimHeadroomFraction,
2567
+ // C1: tool-chain ejection telemetry
2568
+ toolChainCoEjections: c1CoEjections > 0 ? c1CoEjections : undefined,
2569
+ toolChainStubReplacements: c1StubReplacements > 0 ? c1StubReplacements : undefined,
2570
+ // C2: artifact oversize degradation telemetry
2571
+ artifactDegradations: c2ArtifactDegradations > 0 ? c2ArtifactDegradations : undefined,
2572
+ artifactOversizeThresholdTokens: c2ArtifactThresholdTokens,
2573
+ // Sprint 2.1: tool artifact hydration telemetry
2574
+ artifactsHydrated: hydrationResult.artifactsHydrated > 0 ? hydrationResult.artifactsHydrated : undefined,
2575
+ hydrationBytes: hydrationResult.hydrationBytes > 0 ? hydrationResult.hydrationBytes : undefined,
2576
+ hydrationMisses: hydrationResult.hydrationMisses > 0 ? hydrationResult.hydrationMisses : undefined,
1900
2577
  };
1901
2578
  if (pressureHigh) {
1902
2579
  warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
@@ -1913,6 +2590,14 @@ export class Compositor {
1913
2590
  // VS-1: Dual-write, session-scoped key for backwards compat;
1914
2591
  // topic-scoped key for per-topic window retrieval when activeTopicId is set.
1915
2592
  try {
2593
+ // B2: Compute a cheap prefix input hash from the system + identity slot
2594
+ // contents that fed the stable prefix. Stored in WindowCacheMeta so the
2595
+ // C4 fast-exit can detect prefix mutations without re-running full compose.
2596
+ const _prefixInputHash = createHash('sha256')
2597
+ .update(systemContent ?? '')
2598
+ .update('\n␞\n')
2599
+ .update(identityContent ?? '')
2600
+ .digest('hex');
1916
2601
  await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
1917
2602
  await this.cache.setWindowMeta(request.agentId, request.sessionKey, {
1918
2603
  slots: slots,
@@ -1920,6 +2605,8 @@ export class Compositor {
1920
2605
  warnings,
1921
2606
  diagnostics,
1922
2607
  composedAt,
2608
+ prefixHash,
2609
+ prefixInputHash: _prefixInputHash,
1923
2610
  }, 120);
1924
2611
  }
1925
2612
  catch {
@@ -1972,7 +2659,7 @@ export class Compositor {
1972
2659
  // Cursor write is best-effort
1973
2660
  }
1974
2661
  }
1975
- console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones}`);
2662
+ console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones} c2_degradations=${c2ArtifactDegradations} c2_threshold=${c2ArtifactThresholdTokens}`);
1976
2663
  return {
1977
2664
  messages: outputMessages,
1978
2665
  tokenCount: totalTokens,
@@ -2080,7 +2767,7 @@ export class Compositor {
2080
2767
  },
2081
2768
  });
2082
2769
  }
2083
- async refreshRedisGradient(agentId, sessionKey, db, tokenBudget) {
2770
+ async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth) {
2084
2771
  const store = new MessageStore(db);
2085
2772
  const conversation = store.getConversation(sessionKey);
2086
2773
  if (!conversation)
@@ -2105,28 +2792,38 @@ export class Compositor {
2105
2792
  // Fence lookup is best-effort
2106
2793
  }
2107
2794
  // Phase 3: prefer DAG walk from context head
2795
+ const refreshHistoryLimit = Math.min(this.config.maxHistoryMessages, Math.max(1, historyDepth ?? this.config.maxHistoryMessages));
2108
2796
  let rawHistory;
2109
2797
  if (activeContext?.headMessageId) {
2110
- rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, this.config.maxHistoryMessages);
2798
+ rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, refreshHistoryLimit);
2111
2799
  if (rawHistory.length === 0) {
2112
- rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, gradientFenceMessageId);
2800
+ rawHistory = store.getRecentMessages(conversation.id, refreshHistoryLimit, gradientFenceMessageId);
2113
2801
  }
2114
2802
  }
2115
2803
  else {
2116
- rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, gradientFenceMessageId);
2117
- }
2804
+ rawHistory = store.getRecentMessages(conversation.id, refreshHistoryLimit, gradientFenceMessageId);
2805
+ }
2806
+ // Sprint 3 (AfterTurn Rebuild/Trim Loop Fix): cap gradient total-window tokens
2807
+ // at the same 65% target that assemble.normal trims to. Previously this was
2808
+ // tokenBudget/0.80 (≈1.25×budget), which made applyToolGradient preserve more
2809
+ // content than the trim target allowed — causing assemble.normal to always trim
2810
+ // on the next turn even in the steady-state path. Aligning the gradient cap to
2811
+ // the trim target means the rebuilt window already fits within the assemble
2812
+ // envelope by construction.
2813
+ const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0);
2118
2814
  const transformedHistory = applyToolGradient(rawHistory, {
2119
2815
  totalWindowTokens: tokenBudget && tokenBudget > 0
2120
- ? Math.max(tokenBudget, Math.floor(tokenBudget / 0.80))
2816
+ ? gradientAssembleBudget
2121
2817
  : TOOL_PLANNING_BASELINE_WINDOW,
2122
2818
  });
2123
2819
  // If a token budget is provided, trim the gradient-compressed window to fit
2124
- // before writing to Redis. Without this, up to maxHistoryMessages messages
2125
- // land in Redis regardless of size, and trimHistoryToTokenBudget fires
2126
- // on every subsequent assemble() causing per-turn churn.
2820
+ // before writing to Redis. The cap uses the same GRADIENT_ASSEMBLE_TARGET
2821
+ // (0.65) so the window written to Redis sits inside the assemble.normal trim
2822
+ // envelope. The next assemble() will find the window already within budget
2823
+ // and skip the trim entirely in the steady-state path.
2127
2824
  let historyToWrite = transformedHistory;
2128
2825
  if (tokenBudget && tokenBudget > 0) {
2129
- const budgetCap = Math.floor(tokenBudget * 0.8);
2826
+ const budgetCap = gradientAssembleBudget;
2130
2827
  let runningTokens = 0;
2131
2828
  const clusters = clusterNeutralMessages(transformedHistory);
2132
2829
  const cappedClusters = [];
@@ -2146,7 +2843,7 @@ export class Compositor {
2146
2843
  `for ${agentId}/${sessionKey} (budgetCap=${budgetCap}, tokenCost=${runningTokens})`);
2147
2844
  }
2148
2845
  }
2149
- await this.cache.replaceHistory(agentId, sessionKey, historyToWrite, this.config.maxHistoryMessages);
2846
+ await this.cache.replaceHistory(agentId, sessionKey, historyToWrite, refreshHistoryLimit);
2150
2847
  }
2151
2848
  // ─── Slot Content Resolution ─────────────────────────────────
2152
2849
  /**
@@ -2210,6 +2907,19 @@ export class Compositor {
2210
2907
  * Returns [content, factCount, scopeFilteredCount] or null if DB unavailable.
2211
2908
  */
2212
2909
  buildFactsFromDb(agentId, sessionKey, db) {
2910
+ const sections = this.buildFactSectionsFromDb(agentId, sessionKey, db);
2911
+ if (!sections)
2912
+ return null;
2913
+ const combined = [sections.stableContent, sections.volatileContent]
2914
+ .filter((value) => Boolean(value))
2915
+ .join('\n');
2916
+ return [
2917
+ combined || null,
2918
+ sections.stableCount + sections.volatileCount,
2919
+ sections.filteredCount,
2920
+ ];
2921
+ }
2922
+ buildFactSectionsFromDb(agentId, sessionKey, db) {
2213
2923
  if (!db)
2214
2924
  return null;
2215
2925
  const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='facts'").get();
@@ -2225,30 +2935,52 @@ export class Compositor {
2225
2935
  ORDER BY confidence DESC, decay_score ASC
2226
2936
  LIMIT ?
2227
2937
  `).all(agentId, this.config.maxFacts);
2228
- if (rawRows.length === 0)
2229
- return [null, 0, 0];
2230
- // W1: Apply scope filter — enforce retrieval access control
2938
+ if (rawRows.length === 0) {
2939
+ return {
2940
+ stableContent: null,
2941
+ stableCount: 0,
2942
+ volatileContent: null,
2943
+ volatileCount: 0,
2944
+ filteredCount: 0,
2945
+ };
2946
+ }
2231
2947
  const ctx = { agentId, sessionKey };
2232
2948
  const { allowed, filteredCount } = filterByScope(rawRows.map(r => ({
2233
2949
  ...r,
2234
2950
  agentId: r.agent_id,
2235
2951
  sessionKey: r.session_key,
2236
2952
  })), ctx);
2237
- if (allowed.length === 0)
2238
- return [null, 0, filteredCount];
2239
- const content = allowed
2240
- .map(r => {
2241
- // Session attribution: label facts from a different session so the model
2242
- // can distinguish current-session context from cross-session facts.
2243
- // Shows last 8 chars of session key as a stable short identifier.
2244
- const fromOtherSession = r.sessionKey && r.sessionKey !== sessionKey;
2245
- const sessionSuffix = fromOtherSession
2246
- ? `, session:${r.sessionKey.slice(-8)}`
2247
- : '';
2248
- return `- [${r.domain || 'general'}${sessionSuffix}] ${r.content}`;
2249
- })
2250
- .join('\n');
2251
- return [content, allowed.length, filteredCount];
2953
+ if (allowed.length === 0) {
2954
+ return {
2955
+ stableContent: null,
2956
+ stableCount: 0,
2957
+ volatileContent: null,
2958
+ volatileCount: 0,
2959
+ filteredCount,
2960
+ };
2961
+ }
2962
+ const formatRows = (rows) => {
2963
+ if (rows.length === 0)
2964
+ return null;
2965
+ return rows
2966
+ .map(r => {
2967
+ const fromOtherSession = r.sessionKey && r.sessionKey !== sessionKey;
2968
+ const sessionSuffix = fromOtherSession
2969
+ ? `, session:${r.sessionKey.slice(-8)}`
2970
+ : '';
2971
+ return `- [${r.domain || 'general'}${sessionSuffix}] ${r.content}`;
2972
+ })
2973
+ .join('\n');
2974
+ };
2975
+ const stableRows = allowed.filter(r => r.scope !== 'session' && (!r.sessionKey || r.sessionKey !== sessionKey));
2976
+ const volatileRows = allowed.filter(r => !stableRows.includes(r));
2977
+ return {
2978
+ stableContent: formatRows(stableRows),
2979
+ stableCount: stableRows.length,
2980
+ volatileContent: formatRows(volatileRows),
2981
+ volatileCount: volatileRows.length,
2982
+ filteredCount,
2983
+ };
2252
2984
  }
2253
2985
  /**
2254
2986
  * Build knowledge content from library DB.