@psiclawops/hypermem 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,15 @@ import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOu
24
24
  import { KnowledgeStore } from './knowledge-store.js';
25
25
  import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
26
26
  import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
27
+ /**
28
+ * Files that OpenClaw's contextInjection injects into the system prompt.
29
+ * HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
30
+ * Exported so plugin and other consumers can share the same dedup set.
31
+ */
32
+ export const OPENCLAW_BOOTSTRAP_FILES = new Set([
33
+ 'SOUL.md', 'IDENTITY.md', 'USER.md', 'TOOLS.md',
34
+ 'AGENTS.md', 'HEARTBEAT.md', 'MEMORY.md', 'BOOTSTRAP.md',
35
+ ]);
27
36
  /**
28
37
  * Model context window sizes by provider/model string (or partial match).
29
38
  * Used as fallback when tokenBudget is not passed by the runtime.
@@ -65,7 +74,26 @@ const MODEL_CONTEXT_WINDOWS = [
65
74
  * Default reserve: 25% (leaves 75% for input context).
66
75
  * Falls back to defaultTokenBudget if no model match.
67
76
  */
68
- function resolveModelBudget(model, defaultBudget, reserve = 0.15) {
77
+ /**
78
+ * Resolve effective input token budget for a model.
79
+ *
80
+ * Priority:
81
+ * 1. If budgetFraction is set AND model window is detected: window × budgetFraction × (1 - reserve)
82
+ * 2. If model window detected but no budgetFraction: window × (1 - reserve)
83
+ * 3. Fallback to defaultTokenBudget (absolute number)
84
+ */
85
+ function resolveModelBudget(model, defaultBudget, reserve = 0.15, budgetFraction) {
86
+ const window = resolveModelWindow(model, defaultBudget);
87
+ // If we detected an actual model window (not the fallback derivation)
88
+ if (model && budgetFraction != null) {
89
+ const normalized = model.toLowerCase();
90
+ for (const entry of MODEL_CONTEXT_WINDOWS) {
91
+ if (normalized.includes(entry.pattern)) {
92
+ return Math.floor(entry.tokens * budgetFraction * (1 - reserve));
93
+ }
94
+ }
95
+ }
96
+ // Original path: detected window × (1 - reserve), or absolute fallback
69
97
  if (!model)
70
98
  return defaultBudget;
71
99
  const normalized = model.toLowerCase();
@@ -102,12 +130,19 @@ function resolveModelWindow(model, defaultBudget) {
102
130
  * emit a warning or trigger checkpointing.
103
131
  */
104
132
  function computeDynamicReserve(recentMessages, totalWindow, config) {
105
- const base = config.contextWindowReserve ?? 0.15;
133
+ const base = config.reserveFraction ?? config.contextWindowReserve ?? 0.25;
106
134
  const horizon = config.dynamicReserveTurnHorizon ?? 5;
107
135
  const max = config.dynamicReserveMax ?? 0.50;
108
136
  const enabled = config.dynamicReserveEnabled ?? true;
109
- if (!enabled || recentMessages.length === 0 || totalWindow <= 0) {
110
- return { reserve: base, avgTurnCost: 0, dynamic: false, pressureHigh: false };
137
+ // Cold sessions (no message history) use a minimal floor so the full window
138
+ // stays available. The static reserveFraction applies only once the session
139
+ // has messages and dynamic sampling can compute a meaningful estimate.
140
+ const COLD_SESSION_FLOOR = 0.15;
141
+ if (!enabled || totalWindow <= 0) {
142
+ return { reserve: COLD_SESSION_FLOOR, avgTurnCost: 0, dynamic: false, pressureHigh: false };
143
+ }
144
+ if (recentMessages.length === 0) {
145
+ return { reserve: COLD_SESSION_FLOOR, avgTurnCost: 0, dynamic: false, pressureHigh: false };
111
146
  }
112
147
  // Sample the last 20 user+assistant messages for turn cost estimation.
113
148
  // Tool messages are excluded — they're already compressed by the gradient
@@ -131,17 +166,26 @@ function computeDynamicReserve(recentMessages, totalWindow, config) {
131
166
  return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
132
167
  }
133
168
  const DEFAULT_CONFIG = {
169
+ // Primary budget controls
170
+ budgetFraction: 0.703,
171
+ reserveFraction: 0.25,
172
+ historyFraction: 0.40,
173
+ memoryFraction: 0.40,
174
+ // Absolute fallback
134
175
  defaultTokenBudget: 90000,
176
+ // History internals
135
177
  maxHistoryMessages: 250,
136
- maxFacts: 28,
137
- maxCrossSessionContext: 6000,
138
- maxRecentToolPairs: 3,
139
- maxProseToolPairs: 10,
140
178
  warmHistoryBudgetFraction: 0.4,
141
179
  keystoneHistoryFraction: 0.2,
142
180
  keystoneMaxMessages: 15,
143
181
  keystoneMinSignificance: 0.5,
144
- contextWindowReserve: 0.15,
182
+ // Memory internals
183
+ maxFacts: 28,
184
+ maxCrossSessionContext: 6000,
185
+ // Tool gradient (internal)
186
+ maxRecentToolPairs: 3,
187
+ maxProseToolPairs: 10,
188
+ // Dynamic reserve
145
189
  dynamicReserveTurnHorizon: 5,
146
190
  dynamicReserveMax: 0.50,
147
191
  dynamicReserveEnabled: true,
@@ -328,7 +372,7 @@ function stripSecurityPreamble(content) {
328
372
  return stripped.trim().length > 20 ? stripped.trim() : content;
329
373
  }
330
374
  // Minimum floor: if trimming would leave less than 30% of original content, return a
331
- // stripped sentinel instead of a misleading fragment. A partial result that looks
375
+ // stripped dave instead of a misleading fragment. A partial result that looks
332
376
  // complete is worse than a clear signal that the result was dropped.
333
377
  // Applied only in applyTierPayloadCap (pressure-driven trimming), not in structural
334
378
  // truncation paths where head+tail is always semantically useful.
@@ -598,9 +642,9 @@ function applyTierPayloadCap(msg, perResultCap, perTurnCap, usedSoFar = 0, maxTa
598
642
  // render the truncated result as: [security notice] + [middle marker] + [last line].
599
643
  const stripped = stripSecurityPreamble(content);
600
644
  // Floor check (TUNE-015): if the cap would leave less than 30% of the stripped content
601
- // AND less than 2000 chars absolute, return a sentinel instead of a misleading fragment.
645
+ // AND less than 2000 chars absolute, return a dave instead of a misleading fragment.
602
646
  // Partial results that look complete are worse than a clear dropped-result signal.
603
- // The absolute floor prevents the sentinel from firing on large natural truncations
647
+ // The absolute floor prevents the dave from firing on large natural truncations
604
648
  // (e.g., 110k → 16k is a meaningful slice, not a misleading fragment).
605
649
  if (perResultCap < stripped.length * TOOL_GRADIENT_MIN_USEFUL_FRACTION && perResultCap < 2_000) {
606
650
  content = `[result too large for current context budget \u2014 ${stripped.length} chars stripped]`;
@@ -784,6 +828,73 @@ export class Compositor {
784
828
  async compose(request, db, libraryDb) {
785
829
  const store = new MessageStore(db);
786
830
  const libDb = libraryDb || this.libraryDb;
831
+ const toComposeOutputMessages = (inputMessages) => {
832
+ // When skipProviderTranslation is set, compose returns the neutral window
833
+ // typed as ProviderMessage[] by contract. The runtime translates later.
834
+ return request.skipProviderTranslation
835
+ ? inputMessages
836
+ : toProviderFormat(inputMessages, request.provider ?? request.model ?? null);
837
+ };
838
+ // ── C4: Window cache fast-exit ────────────────────────────
839
+ // If nothing has changed since the last compose (cursor.lastSentId >= newest
840
+ // message id in the DB), skip the full pipeline and return the cached window.
841
+ // Particularly effective for low-frequency sessions (heartbeat agents, council
842
+ // seats between rounds). TTL on the cache write remains 120s — this is a
843
+ // conservative early-exit before the TTL expires, not a TTL extension.
844
+ if (request.includeHistory !== false && request.skipWindowCache !== true) {
845
+ try {
846
+ const newestRow = db.prepare('SELECT MAX(id) AS maxId FROM messages WHERE agent_id = ?').get(request.agentId);
847
+ const newestMsgId = newestRow?.maxId;
848
+ if (newestMsgId != null) {
849
+ const cachedBundle = await this.cache.getFreshWindowBundle(request.agentId, request.sessionKey, newestMsgId);
850
+ if (cachedBundle) {
851
+ // Validate the cached bundle is compatible with this request.
852
+ // A mismatch on any of these means we must do a full compose:
853
+ // - tokenBudget: cached total exceeds the requested cap
854
+ // - slot flags: caller disabled slots that the cache populated
855
+ // - historyDepth: caller wants fewer messages than the cache holds
856
+ const cachedTotal = cachedBundle.meta.totalTokens;
857
+ const budgetOk = !request.tokenBudget ||
858
+ cachedTotal <= request.tokenBudget * 1.05;
859
+ const factsOk = request.includeFacts !== false ||
860
+ (cachedBundle.meta.slots['facts'] ?? 0) === 0;
861
+ const libraryOk = request.includeLibrary !== false ||
862
+ (cachedBundle.meta.slots['library'] ?? 0) === 0;
863
+ const contextOk = request.includeContext !== false ||
864
+ (cachedBundle.meta.slots['context'] ?? 0) === 0;
865
+ // historyDepth constrains how many messages the caller wants;
866
+ // we can't slice a cached bundle safely, so skip cache.
867
+ const depthOk = !request.historyDepth;
868
+ if (budgetOk && factsOk && libraryOk && contextOk && depthOk) {
869
+ const cachedSlots = {
870
+ system: cachedBundle.meta.slots['system'] ?? 0,
871
+ identity: cachedBundle.meta.slots['identity'] ?? 0,
872
+ history: cachedBundle.meta.slots['history'] ?? 0,
873
+ facts: cachedBundle.meta.slots['facts'] ?? 0,
874
+ context: cachedBundle.meta.slots['context'] ?? 0,
875
+ library: cachedBundle.meta.slots['library'] ?? 0,
876
+ };
877
+ return {
878
+ messages: toComposeOutputMessages(cachedBundle.messages),
879
+ tokenCount: cachedBundle.meta.totalTokens,
880
+ slots: cachedSlots,
881
+ truncated: false,
882
+ hasWarnings: cachedBundle.meta.warnings.length > 0,
883
+ warnings: cachedBundle.meta.warnings,
884
+ diagnostics: {
885
+ ...cachedBundle.meta.diagnostics,
886
+ windowCacheHit: true,
887
+ },
888
+ };
889
+ }
890
+ // Incompatible request — fall through to full compose
891
+ }
892
+ }
893
+ }
894
+ catch {
895
+ // Cache fast-exit is best-effort, fall through to full compose
896
+ }
897
+ }
787
898
  // Dynamic reserve: use a lightweight SQLite sample to estimate avg turn cost
788
899
  // BEFORE assembling the full context. This gives us the reserve fraction we
789
900
  // need to compute the effective token budget at the start of compose.
@@ -794,7 +905,7 @@ export class Compositor {
794
905
  ? store.getRecentMessages(sampleConv.id, 40)
795
906
  : [];
796
907
  const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
797
- const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve);
908
+ const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve, this.config.budgetFraction);
798
909
  let remaining = budget;
799
910
  const warnings = [];
800
911
  const slots = {
@@ -838,7 +949,7 @@ export class Compositor {
838
949
  if (remaining > 100 && request.includeLibrary !== false) {
839
950
  const fosEnabled = this.config?.enableFOS !== false;
840
951
  const modEnabled = this.config?.enableMOD !== false;
841
- const outputTier = resolveOutputTier((this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
952
+ const outputTier = resolveOutputTier((this.config?.hyperformProfile ?? this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
842
953
  const stableOutputParts = [];
843
954
  if (outputTier.tier === 'light') {
844
955
  stableOutputParts.push(renderLightFOS().join('\n'));
@@ -945,9 +1056,16 @@ export class Compositor {
945
1056
  const budgetClusters = clusterNeutralMessages(evictedHistory);
946
1057
  let historyTokens = 0;
947
1058
  const includedClusters = [];
1059
+ // Pre-allocate history budget. historyFraction is a fraction of the
1060
+ // effective token budget (post-reserve). Falls back to unbounded fill
1061
+ // (remaining) when historyFraction is not set.
1062
+ const historyBudget = this.config.historyFraction != null
1063
+ ? Math.floor(budget * this.config.historyFraction)
1064
+ : remaining;
1065
+ const historyFillCap = Math.min(historyBudget, remaining);
948
1066
  for (let i = budgetClusters.length - 1; i >= 0; i--) {
949
1067
  const cluster = budgetClusters[i];
950
- if (historyTokens + cluster.tokenCost > remaining && includedClusters.length > 0) {
1068
+ if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
951
1069
  const droppedMsgCount = budgetClusters.slice(0, i + 1).reduce((s, c) => s + c.messages.length, 0);
952
1070
  warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)`);
953
1071
  break;
@@ -1059,13 +1177,23 @@ export class Compositor {
1059
1177
  slots.history = historyTokens;
1060
1178
  remaining -= historyTokens;
1061
1179
  }
1062
- // targetBudgetFraction cap: limit total context slots to a fraction of the
1063
- // effective budget. This gives operators a single knob to make the system
1064
- // lighter without tuning individual slot fractions.
1065
- const targetFraction = this.config.targetBudgetFraction ?? 0.65;
1066
- const contextCap = Math.floor(budget * targetFraction);
1067
- if (remaining > contextCap) {
1068
- remaining = contextCap;
1180
+ // Memory budget pool: facts, wiki, semantic recall, cross-session, and
1181
+ // trigger-fired doc chunks all draw from this shared pool via `remaining`.
1182
+ // memoryFraction is a fraction of the effective token budget (post-reserve).
1183
+ // Falls back to targetBudgetFraction cap behavior when memoryFraction is not set.
1184
+ let memoryBudget;
1185
+ if (this.config.memoryFraction != null) {
1186
+ memoryBudget = Math.floor(budget * this.config.memoryFraction);
1187
+ if (remaining > memoryBudget) {
1188
+ remaining = memoryBudget;
1189
+ }
1190
+ }
1191
+ else {
1192
+ const targetFraction = this.config.targetBudgetFraction ?? 0.65;
1193
+ memoryBudget = Math.floor(budget * targetFraction);
1194
+ if (remaining > memoryBudget) {
1195
+ remaining = memoryBudget;
1196
+ }
1069
1197
  }
1070
1198
  // T1.3: Ghost message suppression.
1071
1199
  // If the last message in the included history is a warm-seeded user message
@@ -1099,6 +1227,13 @@ export class Compositor {
1099
1227
  // conversation history (after system/identity).
1100
1228
  const contextParts = [];
1101
1229
  let contextTokens = 0;
1230
+ // ── C1: Content fingerprint dedup set ────────────────────
1231
+ // Replaces fragile substring-match dedup across temporal, open-domain,
1232
+ // semantic recall, and cross-session paths. O(1) lookup on a normalized
1233
+ // 120-char prefix catches rephrased duplicates the old 60-char includes()
1234
+ // match missed without needing a hash.
1235
+ const contextFingerprints = new Set();
1236
+ const fingerprintEntries = new Map();
1102
1237
  // ── Compose-level diagnostics tracking vars ──────────────
1103
1238
  let diagTriggerHits = 0;
1104
1239
  let diagTriggerFallbackUsed = false;
@@ -1106,23 +1241,48 @@ export class Compositor {
1106
1241
  let diagSemanticResults = 0;
1107
1242
  let diagDocChunkCollections = 0;
1108
1243
  let diagScopeFiltered = 0;
1244
+ let diagFingerprintDedups = 0;
1245
+ let diagFingerprintCollisions = 0;
1109
1246
  let diagRetrievalMode = 'none';
1247
+ function normalizeFingerprintText(text) {
1248
+ return text.toLowerCase().replace(/\s+/g, ' ').trim();
1249
+ }
1250
+ function contentFingerprint(text) {
1251
+ return normalizeFingerprintText(text).slice(0, 120);
1252
+ }
1253
+ function addFingerprint(text) {
1254
+ const normalized = normalizeFingerprintText(text);
1255
+ const fingerprint = normalized.slice(0, 120);
1256
+ contextFingerprints.add(fingerprint);
1257
+ const entries = fingerprintEntries.get(fingerprint) ?? new Set();
1258
+ entries.add(normalized);
1259
+ fingerprintEntries.set(fingerprint, entries);
1260
+ }
1261
+ function isDuplicate(text) {
1262
+ const normalized = normalizeFingerprintText(text);
1263
+ const fingerprint = normalized.slice(0, 120);
1264
+ if (!contextFingerprints.has(fingerprint))
1265
+ return false;
1266
+ const entries = fingerprintEntries.get(fingerprint);
1267
+ if (entries && !entries.has(normalized))
1268
+ diagFingerprintCollisions += 1;
1269
+ return true;
1270
+ }
1110
1271
  // ── Wiki Page (L4: Library — active topic synthesis) ──────
1111
1272
  // Inject synthesized wiki page for the active topic before general knowledge.
1112
- // Token budget: capped at 15% of remaining.
1273
+ // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
1113
1274
  if (request.includeLibrary !== false && remaining > 300 && libDb && composedActiveTopicName) {
1114
1275
  const wikiContent = this.buildWikiPageContext(request.agentId, composedActiveTopicName, libDb);
1115
1276
  if (wikiContent) {
1116
1277
  const tokens = estimateTokens(wikiContent);
1117
- const cap = Math.floor(remaining * 0.15);
1118
- if (tokens <= cap) {
1278
+ if (tokens <= remaining) {
1119
1279
  contextParts.push(wikiContent);
1120
1280
  contextTokens += tokens;
1121
1281
  remaining -= tokens;
1122
1282
  slots.library += tokens;
1123
1283
  }
1124
- else {
1125
- const truncated = this.truncateToTokens(wikiContent, cap);
1284
+ else if (remaining > 200) {
1285
+ const truncated = this.truncateToTokens(wikiContent, remaining);
1126
1286
  const truncTokens = estimateTokens(truncated);
1127
1287
  contextParts.push(truncated);
1128
1288
  contextTokens += truncTokens;
@@ -1133,6 +1293,7 @@ export class Compositor {
1133
1293
  }
1134
1294
  // ── Facts (L4: Library) ──────────────────────────────────
1135
1295
  // scope: agent — filtered by agentId via filterByScope after fetch
1296
+ // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
1136
1297
  if (request.includeFacts !== false && remaining > 500) {
1137
1298
  const factsContent = this.buildFactsFromDb(request.agentId, request.sessionKey, libDb || db);
1138
1299
  if (factsContent !== null) {
@@ -1141,21 +1302,27 @@ export class Compositor {
1141
1302
  diagScopeFiltered += scopeFiltered;
1142
1303
  if (content) {
1143
1304
  const tokens = estimateTokens(content);
1144
- if (tokens <= remaining * 0.25) { // Cap facts at 25% of remaining (W4: was 0.3)
1305
+ if (tokens <= remaining) {
1145
1306
  contextParts.push(`## Active Facts\n${content}`);
1146
1307
  contextTokens += tokens;
1147
1308
  remaining -= tokens;
1148
1309
  slots.facts = tokens;
1149
1310
  }
1150
- else {
1151
- // Truncate to budget
1152
- const truncated = this.truncateToTokens(content, Math.floor(remaining * 0.25));
1311
+ else if (remaining > 200) {
1312
+ const truncated = this.truncateToTokens(content, remaining);
1153
1313
  const truncTokens = estimateTokens(truncated);
1154
1314
  contextParts.push(`## Active Facts (truncated)\n${truncated}`);
1155
1315
  contextTokens += truncTokens;
1156
1316
  remaining -= truncTokens;
1157
1317
  slots.facts = truncTokens;
1158
- warnings.push('Facts truncated to fit budget');
1318
+ warnings.push('Facts truncated to fit memory budget');
1319
+ }
1320
+ // C1: Fingerprint each fact line so downstream dedup paths can skip duplicates
1321
+ const factLines = content.split('\n');
1322
+ for (const line of factLines) {
1323
+ if (line.startsWith('- [')) {
1324
+ addFingerprint(line);
1325
+ }
1159
1326
  }
1160
1327
  }
1161
1328
  }
@@ -1173,14 +1340,17 @@ export class Compositor {
1173
1340
  order: 'DESC',
1174
1341
  });
1175
1342
  if (temporalFacts.length > 0) {
1176
- // Deduplicate against facts already in context
1177
- const existingContent = contextParts.join('\n');
1178
- const novel = temporalFacts.filter(f => !existingContent.includes(f.content.slice(0, 60)));
1343
+ // C1: Use fingerprint dedup instead of fragile substring match
1344
+ const beforeCount = temporalFacts.length;
1345
+ const novel = temporalFacts.filter(f => !isDuplicate(f.content));
1346
+ diagFingerprintDedups += beforeCount - novel.length;
1179
1347
  if (novel.length > 0) {
1180
1348
  const temporalBlock = novel
1181
1349
  .map(f => {
1182
1350
  const ts = new Date(f.occurredAt).toISOString().slice(0, 10);
1183
- return `[${ts}] ${f.content}`;
1351
+ const line = `[${ts}] ${f.content}`;
1352
+ addFingerprint(f.content);
1353
+ return line;
1184
1354
  })
1185
1355
  .join('\n');
1186
1356
  const temporalSection = `## Temporal Context\n${temporalBlock}`;
@@ -1214,11 +1384,16 @@ export class Compositor {
1214
1384
  // questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
1215
1385
  if (request.includeSemanticRecall !== false && queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
1216
1386
  try {
1217
- const existingContent = contextParts.join('\n');
1218
- const odResults = searchOpenDomain(db, queryText, existingContent, 10);
1387
+ // searchOpenDomain still does intra-result dedup. Existing-context dedup
1388
+ // now happens here via fingerprints so we keep one dedup path.
1389
+ const rawOdResults = searchOpenDomain(db, queryText, '', 10);
1390
+ const beforeOd = rawOdResults.length;
1391
+ const odResults = rawOdResults.filter(r => !isDuplicate(r.content));
1392
+ diagFingerprintDedups += beforeOd - odResults.length;
1219
1393
  if (odResults.length > 0) {
1220
1394
  const odBlock = odResults
1221
1395
  .map(r => {
1396
+ addFingerprint(r.content);
1222
1397
  const ts = r.createdAt
1223
1398
  ? new Date(r.createdAt).toISOString().slice(0, 10)
1224
1399
  : '';
@@ -1314,7 +1489,8 @@ export class Compositor {
1314
1489
  // Redis lookup is best-effort — fall through to Ollama
1315
1490
  }
1316
1491
  const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId, Math.floor(remaining * 0.12), // Cap at 12% of remaining (W4: was 0.15)
1317
- libDb || undefined, precomputedEmbedding);
1492
+ libDb || undefined, precomputedEmbedding, contextFingerprints // C2: skip results already in Active Facts
1493
+ );
1318
1494
  if (semanticContent) {
1319
1495
  const tokens = estimateTokens(semanticContent);
1320
1496
  contextParts.push(`## Related Memory\n${semanticContent}`);
@@ -1388,14 +1564,19 @@ export class Compositor {
1388
1564
  const bLen = Math.max(...matchedKeywords.filter(kw => b.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(b.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
1389
1565
  return bLen - aLen; // Most specific match first
1390
1566
  });
1567
+ // Sanitize FTS5 terms: quote each word, strip internal quotes, add prefix wildcard.
1568
+ // Matches the pattern used in the keystone history FTS path.
1569
+ const sanitizeFtsTerm = (w) => `"${w.replace(/"/g, '')}"*`;
1391
1570
  const ftsTerms = sortedWords.length > 0
1392
- ? sortedWords.slice(0, 6).map(w => `${w}*`).join(' OR ')
1571
+ ? sortedWords.slice(0, 6).map(sanitizeFtsTerm).join(' OR ')
1393
1572
  : matchedKeywords
1394
1573
  .sort((a, b) => b.length - a.length)
1395
1574
  .slice(0, 3)
1396
- .map(kw => `${kw}*`)
1575
+ .map(sanitizeFtsTerm)
1397
1576
  .join(' OR ');
1398
- const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3).join(' ');
1577
+ // Fallback uses raw message words — also sanitize to prevent FTS5 syntax errors.
1578
+ const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3)
1579
+ .map(sanitizeFtsTerm).join(' OR ');
1399
1580
  const chunks = docChunkStore.queryChunks({
1400
1581
  collection: trigger.collection,
1401
1582
  agentId: request.agentId,
@@ -1410,6 +1591,10 @@ export class Compositor {
1410
1591
  for (const chunk of chunks) {
1411
1592
  if (chunkTokens + chunk.tokenEstimate > maxTokens)
1412
1593
  break;
1594
+ // Skip chunks from files OpenClaw already injects into the system prompt
1595
+ const chunkBasename = chunk.sourcePath.split('/').pop() || '';
1596
+ if (OPENCLAW_BOOTSTRAP_FILES.has(chunkBasename))
1597
+ continue;
1413
1598
  chunkLines.push(`### ${chunk.sectionPath}\n${chunk.content}`);
1414
1599
  chunkTokens += chunk.tokenEstimate;
1415
1600
  }
@@ -1438,7 +1623,8 @@ export class Compositor {
1438
1623
  // If refactored to run both paths, cap combined semantic budget to avoid double-recall.
1439
1624
  try {
1440
1625
  const fallbackContent = await Promise.race([
1441
- this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined),
1626
+ this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined, undefined, contextFingerprints // C2: skip results already in Active Facts
1627
+ ),
1442
1628
  new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
1443
1629
  ]);
1444
1630
  if (fallbackContent) {
@@ -1489,7 +1675,8 @@ export class Compositor {
1489
1675
  }
1490
1676
  // ── Cross-Session Context (L2: Messages) ─────────────────
1491
1677
  if (request.includeContext !== false && remaining > 500) {
1492
- const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb);
1678
+ const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb, contextFingerprints // C3: skip entries already in facts/semantic recall
1679
+ );
1493
1680
  if (crossSessionContent) {
1494
1681
  const tokens = estimateTokens(crossSessionContent);
1495
1682
  const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
@@ -1584,9 +1771,7 @@ export class Compositor {
1584
1771
  // When skipProviderTranslation is set, return NeutralMessages directly.
1585
1772
  // The context engine plugin uses this: the OpenClaw runtime handles its
1586
1773
  // own provider translation, so double-translating corrupts tool calls.
1587
- const outputMessages = request.skipProviderTranslation
1588
- ? messages
1589
- : toProviderFormat(messages, request.provider ?? request.model ?? null);
1774
+ const outputMessages = toComposeOutputMessages(messages);
1590
1775
  // T1.3: Strip warm-replay provenance flags before output.
1591
1776
  // _warmed is an internal tag added by warmSession() to mark messages
1592
1777
  // seeded from SQLite into Redis. It must not leak into provider submissions
@@ -1615,68 +1800,6 @@ export class Compositor {
1615
1800
  slots.history = (slots.history ?? 0) + delta;
1616
1801
  }
1617
1802
  }
1618
- // ─── Write Window Cache ─────────────────────────────
1619
- // Cache the composed message array so the plugin can serve it directly
1620
- // on the next assemble() call without re-running the full compose pipeline.
1621
- // Short TTL (120s) — invalidated by afterTurn when new messages arrive.
1622
- //
1623
- // VS-1: Dual-write — session-scoped key for backwards compat;
1624
- // topic-scoped key for per-topic window retrieval when activeTopicId is set.
1625
- try {
1626
- await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
1627
- }
1628
- catch {
1629
- // Window cache write is best-effort
1630
- }
1631
- // VS-1: Topic-scoped window dual-write
1632
- if (composedActiveTopicId) {
1633
- try {
1634
- await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
1635
- }
1636
- catch {
1637
- // Topic window write is best-effort
1638
- }
1639
- }
1640
- // ─── Write Session Cursor ─────────────────────────────────
1641
- // Record the newest message included in the submission window.
1642
- // Background indexer uses this to find unprocessed high-signal content.
1643
- if (request.includeHistory !== false && slots.history > 0) {
1644
- try {
1645
- const historyMsgs = messages.filter(m => m.role !== 'system');
1646
- const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
1647
- if (lastHistoryMsg) {
1648
- const sm = lastHistoryMsg;
1649
- if (sm.id != null && sm.messageIndex != null) {
1650
- const cursor = {
1651
- lastSentId: sm.id,
1652
- lastSentIndex: sm.messageIndex,
1653
- lastSentAt: new Date().toISOString(),
1654
- windowSize: historyMsgs.length,
1655
- tokenCount: totalTokens,
1656
- };
1657
- await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
1658
- // Dual-write cursor to SQLite for durability across Redis eviction (P1.3)
1659
- try {
1660
- db.prepare(`
1661
- UPDATE conversations
1662
- SET cursor_last_sent_id = ?,
1663
- cursor_last_sent_index = ?,
1664
- cursor_last_sent_at = ?,
1665
- cursor_window_size = ?,
1666
- cursor_token_count = ?
1667
- WHERE session_key = ?
1668
- `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
1669
- }
1670
- catch {
1671
- // SQLite cursor write is best-effort — don't block compose
1672
- }
1673
- }
1674
- }
1675
- }
1676
- catch {
1677
- // Cursor write is best-effort
1678
- }
1679
- }
1680
1803
  // ─── Compaction Fence Update ──────────────────────────────
1681
1804
  // Record the oldest message ID that the LLM can see in this compose
1682
1805
  // cycle. Everything below this ID becomes eligible for compaction.
@@ -1746,6 +1869,9 @@ export class Compositor {
1746
1869
  avgTurnCostTokens: avgTurnCost,
1747
1870
  dynamicReserveActive: isDynamic,
1748
1871
  sessionPressureHigh: pressureHigh,
1872
+ fingerprintDedups: diagFingerprintDedups,
1873
+ fingerprintCollisions: diagFingerprintCollisions,
1874
+ windowCacheHit: false,
1749
1875
  };
1750
1876
  if (pressureHigh) {
1751
1877
  warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
@@ -1753,6 +1879,74 @@ export class Compositor {
1753
1879
  else if (dynamicReserve > 0.40) {
1754
1880
  console.info(`[hypermem:compositor] dynamic_reserve=${Math.round(dynamicReserve * 100)}% avg_turn_cost=${Math.round(avgTurnCost / 1000)}k horizon=${this.config.dynamicReserveTurnHorizon ?? 5}`);
1755
1881
  }
1882
+ const composedAt = new Date().toISOString();
1883
+ // ─── Write Window Cache ─────────────────────────────
1884
+ // Cache the composed message array so the plugin can serve it directly
1885
+ // on the next assemble() call without re-running the full compose pipeline.
1886
+ // Short TTL (120s). External L4 mutations should set skipWindowCache=true.
1887
+ //
1888
+ // VS-1: Dual-write, session-scoped key for backwards compat;
1889
+ // topic-scoped key for per-topic window retrieval when activeTopicId is set.
1890
+ try {
1891
+ await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
1892
+ await this.cache.setWindowMeta(request.agentId, request.sessionKey, {
1893
+ slots: slots,
1894
+ totalTokens,
1895
+ warnings,
1896
+ diagnostics,
1897
+ composedAt,
1898
+ }, 120);
1899
+ }
1900
+ catch {
1901
+ // Window cache write is best-effort
1902
+ }
1903
+ if (composedActiveTopicId) {
1904
+ try {
1905
+ await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
1906
+ }
1907
+ catch {
1908
+ // Topic window write is best-effort
1909
+ }
1910
+ }
1911
+ // ─── Write Session Cursor ─────────────────────────────────
1912
+ // Record the newest message included in the submission window.
1913
+ // Background indexer uses this to find unprocessed high-signal content.
1914
+ if (request.includeHistory !== false && slots.history > 0) {
1915
+ try {
1916
+ const historyMsgs = messages.filter(m => m.role !== 'system');
1917
+ const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
1918
+ if (lastHistoryMsg) {
1919
+ const sm = lastHistoryMsg;
1920
+ if (sm.id != null && sm.messageIndex != null) {
1921
+ const cursor = {
1922
+ lastSentId: sm.id,
1923
+ lastSentIndex: sm.messageIndex,
1924
+ lastSentAt: composedAt,
1925
+ windowSize: historyMsgs.length,
1926
+ tokenCount: totalTokens,
1927
+ };
1928
+ await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
1929
+ try {
1930
+ db.prepare(`
1931
+ UPDATE conversations
1932
+ SET cursor_last_sent_id = ?,
1933
+ cursor_last_sent_index = ?,
1934
+ cursor_last_sent_at = ?,
1935
+ cursor_window_size = ?,
1936
+ cursor_token_count = ?
1937
+ WHERE session_key = ?
1938
+ `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
1939
+ }
1940
+ catch {
1941
+ // SQLite cursor write is best-effort, don't block compose
1942
+ }
1943
+ }
1944
+ }
1945
+ }
1946
+ catch {
1947
+ // Cursor write is best-effort
1948
+ }
1949
+ }
1756
1950
  console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones}`);
1757
1951
  return {
1758
1952
  messages: outputMessages,
@@ -1781,7 +1975,7 @@ export class Compositor {
1781
1975
  // Warm budget uses the same reserve fraction as compose() so warm history
1782
1976
  // never pre-fills more than compose() would actually allow.
1783
1977
  const reserve = this.config.contextWindowReserve ?? 0.15;
1784
- const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve);
1978
+ const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve, this.config.budgetFraction);
1785
1979
  const warmBudget = Math.floor(effectiveBudget * (this.config.warmHistoryBudgetFraction ?? 0.4));
1786
1980
  const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
1787
1981
  const transformedForWarm = applyToolGradient(rawHistory, {
@@ -1808,6 +2002,10 @@ export class Compositor {
1808
2002
  // compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
1809
2003
  // from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
1810
2004
  // Caching them here would create stale entries that compose() ignores anyway.
2005
+ // Invalidate the window cache so the next compose rebuilds with the fresh
2006
+ // system/identity slots. Without this, the fast-exit returns a stale bundle
2007
+ // that predates the warm and reports identity=0.
2008
+ await this.cache.invalidateWindow(agentId, sessionKey);
1811
2009
  await this.cache.warmSession(agentId, sessionKey, {
1812
2010
  system: opts?.systemPrompt,
1813
2011
  identity: opts?.identity,
@@ -2048,10 +2246,15 @@ export class Compositor {
2048
2246
  * @param precomputedEmbedding — optional pre-computed embedding for the query.
2049
2247
  * When provided, the Ollama call inside VectorStore.search() is skipped.
2050
2248
  */
2051
- async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding) {
2249
+ async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints // C2: skip results already in Active Facts
2250
+ ) {
2052
2251
  const libDb = libraryDb || this.libraryDb;
2053
2252
  if (!libDb && !this.vectorStore)
2054
2253
  return null;
2254
+ // Inline fingerprint helper (mirrors compose-scope version; C2 dedup only used here)
2255
+ const fpCheck = existingFingerprints
2256
+ ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
2257
+ : () => false;
2055
2258
  // Use hybrid search when library DB is available
2056
2259
  if (libDb) {
2057
2260
  const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
@@ -2108,6 +2311,10 @@ export class Compositor {
2108
2311
  // (score >= 0.04) for episodes to make it into assembled context.
2109
2312
  if (result.sourceTable === 'episodes' && result.score < 0.04)
2110
2313
  continue;
2314
+ // C2: Skip results whose content is already fingerprinted (e.g. in Active Facts)
2315
+ // Dedup count is not tracked separately here — compose-level counter covers the other paths.
2316
+ if (fpCheck(result.content))
2317
+ continue;
2111
2318
  const label = this.formatHybridResult(result);
2112
2319
  const lineTokens = estimateTokens(label);
2113
2320
  if (tokens + lineTokens > maxTokens)
@@ -2181,7 +2388,8 @@ export class Compositor {
2181
2388
  * Build cross-session context by finding recent activity
2182
2389
  * in other sessions for this agent.
2183
2390
  */
2184
- buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb) {
2391
+ buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb, existingFingerprints // C3: skip entries already in facts/semantic recall
2392
+ ) {
2185
2393
  const conversation = db.prepare('SELECT id FROM conversations WHERE session_key = ?').get(currentSessionKey);
2186
2394
  if (!conversation)
2187
2395
  return null;
@@ -2199,11 +2407,18 @@ export class Compositor {
2199
2407
  `).all(agentId, conversation.id);
2200
2408
  if (rows.length === 0)
2201
2409
  return null;
2202
- const lines = rows.map(r => {
2410
+ const fpCheck = existingFingerprints
2411
+ ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
2412
+ : () => false;
2413
+ const lines = [];
2414
+ for (const r of rows) {
2415
+ // C3: Skip cross-session entries whose content fingerprint already appears in context
2416
+ if (fpCheck(r.text_content))
2417
+ continue;
2203
2418
  const preview = r.text_content.substring(0, 200);
2204
- return `- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`;
2205
- });
2206
- return lines.join('\n');
2419
+ lines.push(`- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`);
2420
+ }
2421
+ return lines.length > 0 ? lines.join('\n') : null;
2207
2422
  }
2208
2423
  // ─── Utilities ───────────────────────────────────────────────
2209
2424
  /**