@psiclawops/hypermem 0.5.5 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +108 -62
  2. package/dist/background-indexer.d.ts +18 -0
  3. package/dist/background-indexer.d.ts.map +1 -1
  4. package/dist/background-indexer.js +131 -20
  5. package/dist/cache.d.ts +24 -1
  6. package/dist/cache.d.ts.map +1 -1
  7. package/dist/cache.js +77 -3
  8. package/dist/compositor.d.ts +6 -0
  9. package/dist/compositor.d.ts.map +1 -1
  10. package/dist/compositor.js +471 -129
  11. package/dist/context-backfill.d.ts +46 -0
  12. package/dist/context-backfill.d.ts.map +1 -0
  13. package/dist/context-backfill.js +113 -0
  14. package/dist/context-store.d.ts +77 -0
  15. package/dist/context-store.d.ts.map +1 -0
  16. package/dist/context-store.js +177 -0
  17. package/dist/cross-agent.d.ts +12 -0
  18. package/dist/cross-agent.d.ts.map +1 -1
  19. package/dist/cross-agent.js +31 -19
  20. package/dist/db.d.ts.map +1 -1
  21. package/dist/db.js +8 -0
  22. package/dist/index.d.ts +5 -3
  23. package/dist/index.d.ts.map +1 -1
  24. package/dist/index.js +26 -7
  25. package/dist/knowledge-lint.js +4 -4
  26. package/dist/message-store.d.ts +31 -2
  27. package/dist/message-store.d.ts.map +1 -1
  28. package/dist/message-store.js +131 -17
  29. package/dist/preference-store.d.ts +1 -1
  30. package/dist/preference-store.js +1 -1
  31. package/dist/profiles.d.ts +4 -2
  32. package/dist/profiles.d.ts.map +1 -1
  33. package/dist/profiles.js +72 -37
  34. package/dist/repair-tool-pairs.d.ts.map +1 -1
  35. package/dist/repair-tool-pairs.js +73 -2
  36. package/dist/schema.d.ts +1 -1
  37. package/dist/schema.d.ts.map +1 -1
  38. package/dist/schema.js +27 -1
  39. package/dist/seed.d.ts +1 -1
  40. package/dist/seed.js +1 -1
  41. package/dist/session-flusher.d.ts +2 -2
  42. package/dist/session-flusher.js +2 -2
  43. package/dist/spawn-context.d.ts +1 -1
  44. package/dist/spawn-context.js +1 -1
  45. package/dist/topic-synthesizer.d.ts.map +1 -1
  46. package/dist/topic-synthesizer.js +4 -3
  47. package/dist/trigger-registry.d.ts +1 -1
  48. package/dist/trigger-registry.js +4 -4
  49. package/dist/types.d.ts +74 -32
  50. package/dist/types.d.ts.map +1 -1
  51. package/dist/vector-store.d.ts +10 -1
  52. package/dist/vector-store.d.ts.map +1 -1
  53. package/dist/vector-store.js +353 -0
  54. package/dist/version.d.ts +5 -5
  55. package/dist/version.js +5 -5
  56. package/package.json +4 -2
@@ -17,13 +17,23 @@ import { SessionTopicMap } from './session-topic-map.js';
17
17
  import { toProviderFormat } from './provider-translator.js';
18
18
  import { DocChunkStore } from './doc-chunk-store.js';
19
19
  import { hybridSearch } from './hybrid-retrieval.js';
20
- import { ensureCompactionFenceSchema, updateCompactionFence } from './compaction-fence.js';
20
+ import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence } from './compaction-fence.js';
21
+ import { getActiveContext } from './context-store.js';
21
22
  import { rankKeystones, scoreKeystone } from './keystone-scorer.js';
22
23
  import { buildOrgRegistryFromDb, defaultOrgRegistry } from './cross-agent.js';
23
24
  import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOutputTier, buildActionVerificationSummary } from './fos-mod.js';
24
25
  import { KnowledgeStore } from './knowledge-store.js';
25
26
  import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
26
27
  import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
28
+ /**
29
+ * Files that OpenClaw's contextInjection injects into the system prompt.
30
+ * HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
31
+ * Exported so plugin and other consumers can share the same dedup set.
32
+ */
33
+ export const OPENCLAW_BOOTSTRAP_FILES = new Set([
34
+ 'SOUL.md', 'IDENTITY.md', 'USER.md', 'TOOLS.md',
35
+ 'AGENTS.md', 'HEARTBEAT.md', 'MEMORY.md', 'BOOTSTRAP.md',
36
+ ]);
27
37
  /**
28
38
  * Model context window sizes by provider/model string (or partial match).
29
39
  * Used as fallback when tokenBudget is not passed by the runtime.
@@ -65,7 +75,26 @@ const MODEL_CONTEXT_WINDOWS = [
65
75
  * Default reserve: 25% (leaves 75% for input context).
66
76
  * Falls back to defaultTokenBudget if no model match.
67
77
  */
68
- function resolveModelBudget(model, defaultBudget, reserve = 0.15) {
78
+ /**
79
+ * Resolve effective input token budget for a model.
80
+ *
81
+ * Priority:
82
+ * 1. If budgetFraction is set AND model window is detected: window × budgetFraction × (1 - reserve)
83
+ * 2. If model window detected but no budgetFraction: window × (1 - reserve)
84
+ * 3. Fallback to defaultTokenBudget (absolute number)
85
+ */
86
+ function resolveModelBudget(model, defaultBudget, reserve = 0.15, budgetFraction) {
87
+ const window = resolveModelWindow(model, defaultBudget);
88
+ // If we detected an actual model window (not the fallback derivation)
89
+ if (model && budgetFraction != null) {
90
+ const normalized = model.toLowerCase();
91
+ for (const entry of MODEL_CONTEXT_WINDOWS) {
92
+ if (normalized.includes(entry.pattern)) {
93
+ return Math.floor(entry.tokens * budgetFraction * (1 - reserve));
94
+ }
95
+ }
96
+ }
97
+ // Original path: detected window × (1 - reserve), or absolute fallback
69
98
  if (!model)
70
99
  return defaultBudget;
71
100
  const normalized = model.toLowerCase();
@@ -102,12 +131,19 @@ function resolveModelWindow(model, defaultBudget) {
102
131
  * emit a warning or trigger checkpointing.
103
132
  */
104
133
  function computeDynamicReserve(recentMessages, totalWindow, config) {
105
- const base = config.contextWindowReserve ?? 0.15;
134
+ const base = config.reserveFraction ?? config.contextWindowReserve ?? 0.25;
106
135
  const horizon = config.dynamicReserveTurnHorizon ?? 5;
107
136
  const max = config.dynamicReserveMax ?? 0.50;
108
137
  const enabled = config.dynamicReserveEnabled ?? true;
109
- if (!enabled || recentMessages.length === 0 || totalWindow <= 0) {
110
- return { reserve: base, avgTurnCost: 0, dynamic: false, pressureHigh: false };
138
+ // Cold sessions (no message history) use a minimal floor so the full window
139
+ // stays available. The static reserveFraction applies only once the session
140
+ // has messages and dynamic sampling can compute a meaningful estimate.
141
+ const COLD_SESSION_FLOOR = 0.15;
142
+ if (!enabled || totalWindow <= 0) {
143
+ return { reserve: COLD_SESSION_FLOOR, avgTurnCost: 0, dynamic: false, pressureHigh: false };
144
+ }
145
+ if (recentMessages.length === 0) {
146
+ return { reserve: COLD_SESSION_FLOOR, avgTurnCost: 0, dynamic: false, pressureHigh: false };
111
147
  }
112
148
  // Sample the last 20 user+assistant messages for turn cost estimation.
113
149
  // Tool messages are excluded — they're already compressed by the gradient
@@ -131,17 +167,26 @@ function computeDynamicReserve(recentMessages, totalWindow, config) {
131
167
  return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
132
168
  }
133
169
  const DEFAULT_CONFIG = {
170
+ // Primary budget controls
171
+ budgetFraction: 0.703,
172
+ reserveFraction: 0.25,
173
+ historyFraction: 0.40,
174
+ memoryFraction: 0.40,
175
+ // Absolute fallback
134
176
  defaultTokenBudget: 90000,
177
+ // History internals
135
178
  maxHistoryMessages: 250,
136
- maxFacts: 28,
137
- maxCrossSessionContext: 6000,
138
- maxRecentToolPairs: 3,
139
- maxProseToolPairs: 10,
140
179
  warmHistoryBudgetFraction: 0.4,
141
180
  keystoneHistoryFraction: 0.2,
142
181
  keystoneMaxMessages: 15,
143
182
  keystoneMinSignificance: 0.5,
144
- contextWindowReserve: 0.15,
183
+ // Memory internals
184
+ maxFacts: 28,
185
+ maxCrossSessionContext: 6000,
186
+ // Tool gradient (internal)
187
+ maxRecentToolPairs: 3,
188
+ maxProseToolPairs: 10,
189
+ // Dynamic reserve
145
190
  dynamicReserveTurnHorizon: 5,
146
191
  dynamicReserveMax: 0.50,
147
192
  dynamicReserveEnabled: true,
@@ -784,6 +829,73 @@ export class Compositor {
784
829
  async compose(request, db, libraryDb) {
785
830
  const store = new MessageStore(db);
786
831
  const libDb = libraryDb || this.libraryDb;
832
+ const toComposeOutputMessages = (inputMessages) => {
833
+ // When skipProviderTranslation is set, compose returns the neutral window
834
+ // typed as ProviderMessage[] by contract. The runtime translates later.
835
+ return request.skipProviderTranslation
836
+ ? inputMessages
837
+ : toProviderFormat(inputMessages, request.provider ?? request.model ?? null);
838
+ };
839
+ // ── C4: Window cache fast-exit ────────────────────────────
840
+ // If nothing has changed since the last compose (cursor.lastSentId >= newest
841
+ // message id in the DB), skip the full pipeline and return the cached window.
842
+ // Particularly effective for low-frequency sessions (heartbeat agents, council
843
+ // seats between rounds). TTL on the cache write remains 120s — this is a
844
+ // conservative early-exit before the TTL expires, not a TTL extension.
845
+ if (request.includeHistory !== false && request.skipWindowCache !== true) {
846
+ try {
847
+ const newestRow = db.prepare('SELECT MAX(id) AS maxId FROM messages WHERE agent_id = ?').get(request.agentId);
848
+ const newestMsgId = newestRow?.maxId;
849
+ if (newestMsgId != null) {
850
+ const cachedBundle = await this.cache.getFreshWindowBundle(request.agentId, request.sessionKey, newestMsgId);
851
+ if (cachedBundle) {
852
+ // Validate the cached bundle is compatible with this request.
853
+ // A mismatch on any of these means we must do a full compose:
854
+ // - tokenBudget: cached total exceeds the requested cap
855
+ // - slot flags: caller disabled slots that the cache populated
856
+ // - historyDepth: caller wants fewer messages than the cache holds
857
+ const cachedTotal = cachedBundle.meta.totalTokens;
858
+ const budgetOk = !request.tokenBudget ||
859
+ cachedTotal <= request.tokenBudget * 1.05;
860
+ const factsOk = request.includeFacts !== false ||
861
+ (cachedBundle.meta.slots['facts'] ?? 0) === 0;
862
+ const libraryOk = request.includeLibrary !== false ||
863
+ (cachedBundle.meta.slots['library'] ?? 0) === 0;
864
+ const contextOk = request.includeContext !== false ||
865
+ (cachedBundle.meta.slots['context'] ?? 0) === 0;
866
+ // historyDepth constrains how many messages the caller wants;
867
+ // we can't slice a cached bundle safely, so skip cache.
868
+ const depthOk = !request.historyDepth;
869
+ if (budgetOk && factsOk && libraryOk && contextOk && depthOk) {
870
+ const cachedSlots = {
871
+ system: cachedBundle.meta.slots['system'] ?? 0,
872
+ identity: cachedBundle.meta.slots['identity'] ?? 0,
873
+ history: cachedBundle.meta.slots['history'] ?? 0,
874
+ facts: cachedBundle.meta.slots['facts'] ?? 0,
875
+ context: cachedBundle.meta.slots['context'] ?? 0,
876
+ library: cachedBundle.meta.slots['library'] ?? 0,
877
+ };
878
+ return {
879
+ messages: toComposeOutputMessages(cachedBundle.messages),
880
+ tokenCount: cachedBundle.meta.totalTokens,
881
+ slots: cachedSlots,
882
+ truncated: false,
883
+ hasWarnings: cachedBundle.meta.warnings.length > 0,
884
+ warnings: cachedBundle.meta.warnings,
885
+ diagnostics: {
886
+ ...cachedBundle.meta.diagnostics,
887
+ windowCacheHit: true,
888
+ },
889
+ };
890
+ }
891
+ // Incompatible request — fall through to full compose
892
+ }
893
+ }
894
+ }
895
+ catch {
896
+ // Cache fast-exit is best-effort, fall through to full compose
897
+ }
898
+ }
787
899
  // Dynamic reserve: use a lightweight SQLite sample to estimate avg turn cost
788
900
  // BEFORE assembling the full context. This gives us the reserve fraction we
789
901
  // need to compute the effective token budget at the start of compose.
@@ -794,8 +906,23 @@ export class Compositor {
794
906
  ? store.getRecentMessages(sampleConv.id, 40)
795
907
  : [];
796
908
  const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
797
- const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve);
909
+ const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve, this.config.budgetFraction);
798
910
  let remaining = budget;
911
+ // Phase 0 fence enforcement: resolve the compaction fence for this conversation.
912
+ // All downstream message queries use this as a lower bound to exclude zombie
913
+ // messages below the fence that should have been compacted.
914
+ let fenceMessageId;
915
+ if (sampleConv) {
916
+ try {
917
+ ensureCompactionFenceSchema(db);
918
+ const fence = getCompactionFence(db, sampleConv.id);
919
+ if (fence)
920
+ fenceMessageId = fence.fenceMessageId;
921
+ }
922
+ catch {
923
+ // Fence lookup is best-effort — never fail composition
924
+ }
925
+ }
799
926
  const warnings = [];
800
927
  const slots = {
801
928
  system: 0,
@@ -838,7 +965,7 @@ export class Compositor {
838
965
  if (remaining > 100 && request.includeLibrary !== false) {
839
966
  const fosEnabled = this.config?.enableFOS !== false;
840
967
  const modEnabled = this.config?.enableMOD !== false;
841
- const outputTier = resolveOutputTier((this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
968
+ const outputTier = resolveOutputTier((this.config?.hyperformProfile ?? this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
842
969
  const stableOutputParts = [];
843
970
  if (outputTier.tier === 'light') {
844
971
  stableOutputParts.push(renderLightFOS().join('\n'));
@@ -882,6 +1009,15 @@ export class Compositor {
882
1009
  let composedActiveTopicId;
883
1010
  let composedActiveTopicName;
884
1011
  if (request.includeHistory !== false) {
1012
+ // Phase 3 (Turn DAG): resolve active context for DAG-native reads.
1013
+ // This is the primary branch-scoping mechanism; fence remains as transitional safety.
1014
+ let activeContext = null;
1015
+ try {
1016
+ activeContext = getActiveContext(db, request.agentId, request.sessionKey);
1017
+ }
1018
+ catch {
1019
+ // Context resolution is best-effort — fall back to fence-based reads
1020
+ }
885
1021
  // P3.4: Look up the active topic for this session (non-fatal)
886
1022
  let activeTopicId;
887
1023
  let activeTopic;
@@ -913,7 +1049,7 @@ export class Compositor {
913
1049
  // Hoist resolved topic id+name so the window dual-write and wiki injection sections can access them
914
1050
  composedActiveTopicId = activeTopicId;
915
1051
  composedActiveTopicName = activeTopic?.name;
916
- const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, request.historyDepth || this.config.maxHistoryMessages, store, activeTopicId);
1052
+ const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, request.historyDepth || this.config.maxHistoryMessages, store, activeTopicId, fenceMessageId, activeContext);
917
1053
  // Deduplicate history by StoredMessage.id (second line of defense after
918
1054
  // pushHistory() tail-check dedup). Guards against any duplicates that
919
1055
  // slipped through the warm path — e.g. bootstrap re-runs on existing sessions.
@@ -945,9 +1081,16 @@ export class Compositor {
945
1081
  const budgetClusters = clusterNeutralMessages(evictedHistory);
946
1082
  let historyTokens = 0;
947
1083
  const includedClusters = [];
1084
+ // Pre-allocate history budget. historyFraction is a fraction of the
1085
+ // effective token budget (post-reserve). Falls back to unbounded fill
1086
+ // (remaining) when historyFraction is not set.
1087
+ const historyBudget = this.config.historyFraction != null
1088
+ ? Math.floor(budget * this.config.historyFraction)
1089
+ : remaining;
1090
+ const historyFillCap = Math.min(historyBudget, remaining);
948
1091
  for (let i = budgetClusters.length - 1; i >= 0; i--) {
949
1092
  const cluster = budgetClusters[i];
950
- if (historyTokens + cluster.tokenCost > remaining && includedClusters.length > 0) {
1093
+ if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
951
1094
  const droppedMsgCount = budgetClusters.slice(0, i + 1).reduce((s, c) => s + c.messages.length, 0);
952
1095
  warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)`);
953
1096
  break;
@@ -966,7 +1109,7 @@ export class Compositor {
966
1109
  let keystoneMessages = [];
967
1110
  let keystoneTokens = 0;
968
1111
  if (request.includeKeystones !== false && includedHistory.length >= 30 && keystoneFraction > 0) {
969
- const keystoneResult = await this.buildKeystones(db, request.agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, request.prompt, libDb || undefined);
1112
+ const keystoneResult = await this.buildKeystones(db, request.agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, request.prompt, libDb || undefined, fenceMessageId, activeContext);
970
1113
  if (keystoneResult) {
971
1114
  keystoneMessages = keystoneResult.keystoneMessages;
972
1115
  keystoneTokens = keystoneResult.keystoneTokens;
@@ -984,7 +1127,7 @@ export class Compositor {
984
1127
  let crossTopicTokens = 0;
985
1128
  if (request.includeKeystones !== false && activeTopic && this.vectorStore) {
986
1129
  try {
987
- const rawCrossTopicKeystones = await this.getKeystonesByTopic(request.agentId, request.sessionKey, activeTopic, includedHistory, db, 3);
1130
+ const rawCrossTopicKeystones = await this.getKeystonesByTopic(request.agentId, request.sessionKey, activeTopic, includedHistory, db, 3, fenceMessageId, activeContext);
988
1131
  if (rawCrossTopicKeystones.length > 0) {
989
1132
  // Token budget: cap the full cross-topic block at 15% of remaining,
990
1133
  // including the header line.
@@ -1059,13 +1202,23 @@ export class Compositor {
1059
1202
  slots.history = historyTokens;
1060
1203
  remaining -= historyTokens;
1061
1204
  }
1062
- // targetBudgetFraction cap: limit total context slots to a fraction of the
1063
- // effective budget. This gives operators a single knob to make the system
1064
- // lighter without tuning individual slot fractions.
1065
- const targetFraction = this.config.targetBudgetFraction ?? 0.65;
1066
- const contextCap = Math.floor(budget * targetFraction);
1067
- if (remaining > contextCap) {
1068
- remaining = contextCap;
1205
+ // Memory budget pool: facts, wiki, semantic recall, cross-session, and
1206
+ // trigger-fired doc chunks all draw from this shared pool via `remaining`.
1207
+ // memoryFraction is a fraction of the effective token budget (post-reserve).
1208
+ // Falls back to targetBudgetFraction cap behavior when memoryFraction is not set.
1209
+ let memoryBudget;
1210
+ if (this.config.memoryFraction != null) {
1211
+ memoryBudget = Math.floor(budget * this.config.memoryFraction);
1212
+ if (remaining > memoryBudget) {
1213
+ remaining = memoryBudget;
1214
+ }
1215
+ }
1216
+ else {
1217
+ const targetFraction = this.config.targetBudgetFraction ?? 0.65;
1218
+ memoryBudget = Math.floor(budget * targetFraction);
1219
+ if (remaining > memoryBudget) {
1220
+ remaining = memoryBudget;
1221
+ }
1069
1222
  }
1070
1223
  // T1.3: Ghost message suppression.
1071
1224
  // If the last message in the included history is a warm-seeded user message
@@ -1099,6 +1252,13 @@ export class Compositor {
1099
1252
  // conversation history (after system/identity).
1100
1253
  const contextParts = [];
1101
1254
  let contextTokens = 0;
1255
+ // ── C1: Content fingerprint dedup set ────────────────────
1256
+ // Replaces fragile substring-match dedup across temporal, open-domain,
1257
+ // semantic recall, and cross-session paths. O(1) lookup on a normalized
1258
+ // 120-char prefix catches rephrased duplicates the old 60-char includes()
1259
+ // match missed without needing a hash.
1260
+ const contextFingerprints = new Set();
1261
+ const fingerprintEntries = new Map();
1102
1262
  // ── Compose-level diagnostics tracking vars ──────────────
1103
1263
  let diagTriggerHits = 0;
1104
1264
  let diagTriggerFallbackUsed = false;
@@ -1106,23 +1266,48 @@ export class Compositor {
1106
1266
  let diagSemanticResults = 0;
1107
1267
  let diagDocChunkCollections = 0;
1108
1268
  let diagScopeFiltered = 0;
1269
+ let diagFingerprintDedups = 0;
1270
+ let diagFingerprintCollisions = 0;
1109
1271
  let diagRetrievalMode = 'none';
1272
+ function normalizeFingerprintText(text) {
1273
+ return text.toLowerCase().replace(/\s+/g, ' ').trim();
1274
+ }
1275
+ function contentFingerprint(text) {
1276
+ return normalizeFingerprintText(text).slice(0, 120);
1277
+ }
1278
+ function addFingerprint(text) {
1279
+ const normalized = normalizeFingerprintText(text);
1280
+ const fingerprint = normalized.slice(0, 120);
1281
+ contextFingerprints.add(fingerprint);
1282
+ const entries = fingerprintEntries.get(fingerprint) ?? new Set();
1283
+ entries.add(normalized);
1284
+ fingerprintEntries.set(fingerprint, entries);
1285
+ }
1286
+ function isDuplicate(text) {
1287
+ const normalized = normalizeFingerprintText(text);
1288
+ const fingerprint = normalized.slice(0, 120);
1289
+ if (!contextFingerprints.has(fingerprint))
1290
+ return false;
1291
+ const entries = fingerprintEntries.get(fingerprint);
1292
+ if (entries && !entries.has(normalized))
1293
+ diagFingerprintCollisions += 1;
1294
+ return true;
1295
+ }
1110
1296
  // ── Wiki Page (L4: Library — active topic synthesis) ──────
1111
1297
  // Inject synthesized wiki page for the active topic before general knowledge.
1112
- // Token budget: capped at 15% of remaining.
1298
+ // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
1113
1299
  if (request.includeLibrary !== false && remaining > 300 && libDb && composedActiveTopicName) {
1114
1300
  const wikiContent = this.buildWikiPageContext(request.agentId, composedActiveTopicName, libDb);
1115
1301
  if (wikiContent) {
1116
1302
  const tokens = estimateTokens(wikiContent);
1117
- const cap = Math.floor(remaining * 0.15);
1118
- if (tokens <= cap) {
1303
+ if (tokens <= remaining) {
1119
1304
  contextParts.push(wikiContent);
1120
1305
  contextTokens += tokens;
1121
1306
  remaining -= tokens;
1122
1307
  slots.library += tokens;
1123
1308
  }
1124
- else {
1125
- const truncated = this.truncateToTokens(wikiContent, cap);
1309
+ else if (remaining > 200) {
1310
+ const truncated = this.truncateToTokens(wikiContent, remaining);
1126
1311
  const truncTokens = estimateTokens(truncated);
1127
1312
  contextParts.push(truncated);
1128
1313
  contextTokens += truncTokens;
@@ -1133,6 +1318,7 @@ export class Compositor {
1133
1318
  }
1134
1319
  // ── Facts (L4: Library) ──────────────────────────────────
1135
1320
  // scope: agent — filtered by agentId via filterByScope after fetch
1321
+ // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
1136
1322
  if (request.includeFacts !== false && remaining > 500) {
1137
1323
  const factsContent = this.buildFactsFromDb(request.agentId, request.sessionKey, libDb || db);
1138
1324
  if (factsContent !== null) {
@@ -1141,21 +1327,27 @@ export class Compositor {
1141
1327
  diagScopeFiltered += scopeFiltered;
1142
1328
  if (content) {
1143
1329
  const tokens = estimateTokens(content);
1144
- if (tokens <= remaining * 0.25) { // Cap facts at 25% of remaining (W4: was 0.3)
1330
+ if (tokens <= remaining) {
1145
1331
  contextParts.push(`## Active Facts\n${content}`);
1146
1332
  contextTokens += tokens;
1147
1333
  remaining -= tokens;
1148
1334
  slots.facts = tokens;
1149
1335
  }
1150
- else {
1151
- // Truncate to budget
1152
- const truncated = this.truncateToTokens(content, Math.floor(remaining * 0.25));
1336
+ else if (remaining > 200) {
1337
+ const truncated = this.truncateToTokens(content, remaining);
1153
1338
  const truncTokens = estimateTokens(truncated);
1154
1339
  contextParts.push(`## Active Facts (truncated)\n${truncated}`);
1155
1340
  contextTokens += truncTokens;
1156
1341
  remaining -= truncTokens;
1157
1342
  slots.facts = truncTokens;
1158
- warnings.push('Facts truncated to fit budget');
1343
+ warnings.push('Facts truncated to fit memory budget');
1344
+ }
1345
+ // C1: Fingerprint each fact line so downstream dedup paths can skip duplicates
1346
+ const factLines = content.split('\n');
1347
+ for (const line of factLines) {
1348
+ if (line.startsWith('- [')) {
1349
+ addFingerprint(line);
1350
+ }
1159
1351
  }
1160
1352
  }
1161
1353
  }
@@ -1173,14 +1365,17 @@ export class Compositor {
1173
1365
  order: 'DESC',
1174
1366
  });
1175
1367
  if (temporalFacts.length > 0) {
1176
- // Deduplicate against facts already in context
1177
- const existingContent = contextParts.join('\n');
1178
- const novel = temporalFacts.filter(f => !existingContent.includes(f.content.slice(0, 60)));
1368
+ // C1: Use fingerprint dedup instead of fragile substring match
1369
+ const beforeCount = temporalFacts.length;
1370
+ const novel = temporalFacts.filter(f => !isDuplicate(f.content));
1371
+ diagFingerprintDedups += beforeCount - novel.length;
1179
1372
  if (novel.length > 0) {
1180
1373
  const temporalBlock = novel
1181
1374
  .map(f => {
1182
1375
  const ts = new Date(f.occurredAt).toISOString().slice(0, 10);
1183
- return `[${ts}] ${f.content}`;
1376
+ const line = `[${ts}] ${f.content}`;
1377
+ addFingerprint(f.content);
1378
+ return line;
1184
1379
  })
1185
1380
  .join('\n');
1186
1381
  const temporalSection = `## Temporal Context\n${temporalBlock}`;
@@ -1214,11 +1409,16 @@ export class Compositor {
1214
1409
  // questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
1215
1410
  if (request.includeSemanticRecall !== false && queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
1216
1411
  try {
1217
- const existingContent = contextParts.join('\n');
1218
- const odResults = searchOpenDomain(db, queryText, existingContent, 10);
1412
+ // searchOpenDomain still does intra-result dedup. Existing-context dedup
1413
+ // now happens here via fingerprints so we keep one dedup path.
1414
+ const rawOdResults = searchOpenDomain(db, queryText, '', 10);
1415
+ const beforeOd = rawOdResults.length;
1416
+ const odResults = rawOdResults.filter(r => !isDuplicate(r.content));
1417
+ diagFingerprintDedups += beforeOd - odResults.length;
1219
1418
  if (odResults.length > 0) {
1220
1419
  const odBlock = odResults
1221
1420
  .map(r => {
1421
+ addFingerprint(r.content);
1222
1422
  const ts = r.createdAt
1223
1423
  ? new Date(r.createdAt).toISOString().slice(0, 10)
1224
1424
  : '';
@@ -1314,7 +1514,8 @@ export class Compositor {
1314
1514
  // Redis lookup is best-effort — fall through to Ollama
1315
1515
  }
1316
1516
  const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId, Math.floor(remaining * 0.12), // Cap at 12% of remaining (W4: was 0.15)
1317
- libDb || undefined, precomputedEmbedding);
1517
+ libDb || undefined, precomputedEmbedding, contextFingerprints // C2: skip results already in Active Facts
1518
+ );
1318
1519
  if (semanticContent) {
1319
1520
  const tokens = estimateTokens(semanticContent);
1320
1521
  contextParts.push(`## Related Memory\n${semanticContent}`);
@@ -1388,14 +1589,19 @@ export class Compositor {
1388
1589
  const bLen = Math.max(...matchedKeywords.filter(kw => b.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(b.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
1389
1590
  return bLen - aLen; // Most specific match first
1390
1591
  });
1592
+ // Sanitize FTS5 terms: quote each word, strip internal quotes, add prefix wildcard.
1593
+ // Matches the pattern used in the keystone history FTS path.
1594
+ const sanitizeFtsTerm = (w) => `"${w.replace(/"/g, '')}"*`;
1391
1595
  const ftsTerms = sortedWords.length > 0
1392
- ? sortedWords.slice(0, 6).map(w => `${w}*`).join(' OR ')
1596
+ ? sortedWords.slice(0, 6).map(sanitizeFtsTerm).join(' OR ')
1393
1597
  : matchedKeywords
1394
1598
  .sort((a, b) => b.length - a.length)
1395
1599
  .slice(0, 3)
1396
- .map(kw => `${kw}*`)
1600
+ .map(sanitizeFtsTerm)
1397
1601
  .join(' OR ');
1398
- const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3).join(' ');
1602
+ // Fallback uses raw message words — also sanitize to prevent FTS5 syntax errors.
1603
+ const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3)
1604
+ .map(sanitizeFtsTerm).join(' OR ');
1399
1605
  const chunks = docChunkStore.queryChunks({
1400
1606
  collection: trigger.collection,
1401
1607
  agentId: request.agentId,
@@ -1410,6 +1616,10 @@ export class Compositor {
1410
1616
  for (const chunk of chunks) {
1411
1617
  if (chunkTokens + chunk.tokenEstimate > maxTokens)
1412
1618
  break;
1619
+ // Skip chunks from files OpenClaw already injects into the system prompt
1620
+ const chunkBasename = chunk.sourcePath.split('/').pop() || '';
1621
+ if (OPENCLAW_BOOTSTRAP_FILES.has(chunkBasename))
1622
+ continue;
1413
1623
  chunkLines.push(`### ${chunk.sectionPath}\n${chunk.content}`);
1414
1624
  chunkTokens += chunk.tokenEstimate;
1415
1625
  }
@@ -1438,7 +1648,8 @@ export class Compositor {
1438
1648
  // If refactored to run both paths, cap combined semantic budget to avoid double-recall.
1439
1649
  try {
1440
1650
  const fallbackContent = await Promise.race([
1441
- this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined),
1651
+ this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined, undefined, contextFingerprints // C2: skip results already in Active Facts
1652
+ ),
1442
1653
  new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
1443
1654
  ]);
1444
1655
  if (fallbackContent) {
@@ -1489,7 +1700,8 @@ export class Compositor {
1489
1700
  }
1490
1701
  // ── Cross-Session Context (L2: Messages) ─────────────────
1491
1702
  if (request.includeContext !== false && remaining > 500) {
1492
- const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb);
1703
+ const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb, contextFingerprints // C3: skip entries already in facts/semantic recall
1704
+ );
1493
1705
  if (crossSessionContent) {
1494
1706
  const tokens = estimateTokens(crossSessionContent);
1495
1707
  const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
@@ -1584,9 +1796,7 @@ export class Compositor {
1584
1796
  // When skipProviderTranslation is set, return NeutralMessages directly.
1585
1797
  // The context engine plugin uses this: the OpenClaw runtime handles its
1586
1798
  // own provider translation, so double-translating corrupts tool calls.
1587
- const outputMessages = request.skipProviderTranslation
1588
- ? messages
1589
- : toProviderFormat(messages, request.provider ?? request.model ?? null);
1799
+ const outputMessages = toComposeOutputMessages(messages);
1590
1800
  // T1.3: Strip warm-replay provenance flags before output.
1591
1801
  // _warmed is an internal tag added by warmSession() to mark messages
1592
1802
  // seeded from SQLite into Redis. It must not leak into provider submissions
@@ -1615,68 +1825,6 @@ export class Compositor {
1615
1825
  slots.history = (slots.history ?? 0) + delta;
1616
1826
  }
1617
1827
  }
1618
- // ─── Write Window Cache ─────────────────────────────
1619
- // Cache the composed message array so the plugin can serve it directly
1620
- // on the next assemble() call without re-running the full compose pipeline.
1621
- // Short TTL (120s) — invalidated by afterTurn when new messages arrive.
1622
- //
1623
- // VS-1: Dual-write — session-scoped key for backwards compat;
1624
- // topic-scoped key for per-topic window retrieval when activeTopicId is set.
1625
- try {
1626
- await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
1627
- }
1628
- catch {
1629
- // Window cache write is best-effort
1630
- }
1631
- // VS-1: Topic-scoped window dual-write
1632
- if (composedActiveTopicId) {
1633
- try {
1634
- await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
1635
- }
1636
- catch {
1637
- // Topic window write is best-effort
1638
- }
1639
- }
1640
- // ─── Write Session Cursor ─────────────────────────────────
1641
- // Record the newest message included in the submission window.
1642
- // Background indexer uses this to find unprocessed high-signal content.
1643
- if (request.includeHistory !== false && slots.history > 0) {
1644
- try {
1645
- const historyMsgs = messages.filter(m => m.role !== 'system');
1646
- const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
1647
- if (lastHistoryMsg) {
1648
- const sm = lastHistoryMsg;
1649
- if (sm.id != null && sm.messageIndex != null) {
1650
- const cursor = {
1651
- lastSentId: sm.id,
1652
- lastSentIndex: sm.messageIndex,
1653
- lastSentAt: new Date().toISOString(),
1654
- windowSize: historyMsgs.length,
1655
- tokenCount: totalTokens,
1656
- };
1657
- await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
1658
- // Dual-write cursor to SQLite for durability across Redis eviction (P1.3)
1659
- try {
1660
- db.prepare(`
1661
- UPDATE conversations
1662
- SET cursor_last_sent_id = ?,
1663
- cursor_last_sent_index = ?,
1664
- cursor_last_sent_at = ?,
1665
- cursor_window_size = ?,
1666
- cursor_token_count = ?
1667
- WHERE session_key = ?
1668
- `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
1669
- }
1670
- catch {
1671
- // SQLite cursor write is best-effort — don't block compose
1672
- }
1673
- }
1674
- }
1675
- }
1676
- catch {
1677
- // Cursor write is best-effort
1678
- }
1679
- }
1680
1828
  // ─── Compaction Fence Update ──────────────────────────────
1681
1829
  // Record the oldest message ID that the LLM can see in this compose
1682
1830
  // cycle. Everything below this ID becomes eligible for compaction.
@@ -1746,6 +1894,9 @@ export class Compositor {
1746
1894
  avgTurnCostTokens: avgTurnCost,
1747
1895
  dynamicReserveActive: isDynamic,
1748
1896
  sessionPressureHigh: pressureHigh,
1897
+ fingerprintDedups: diagFingerprintDedups,
1898
+ fingerprintCollisions: diagFingerprintCollisions,
1899
+ windowCacheHit: false,
1749
1900
  };
1750
1901
  if (pressureHigh) {
1751
1902
  warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
@@ -1753,6 +1904,74 @@ export class Compositor {
1753
1904
  else if (dynamicReserve > 0.40) {
1754
1905
  console.info(`[hypermem:compositor] dynamic_reserve=${Math.round(dynamicReserve * 100)}% avg_turn_cost=${Math.round(avgTurnCost / 1000)}k horizon=${this.config.dynamicReserveTurnHorizon ?? 5}`);
1755
1906
  }
1907
+ const composedAt = new Date().toISOString();
1908
+ // ─── Write Window Cache ─────────────────────────────
1909
+ // Cache the composed message array so the plugin can serve it directly
1910
+ // on the next assemble() call without re-running the full compose pipeline.
1911
+ // Short TTL (120s). External L4 mutations should set skipWindowCache=true.
1912
+ //
1913
+ // VS-1: Dual-write, session-scoped key for backwards compat;
1914
+ // topic-scoped key for per-topic window retrieval when activeTopicId is set.
1915
+ try {
1916
+ await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
1917
+ await this.cache.setWindowMeta(request.agentId, request.sessionKey, {
1918
+ slots: slots,
1919
+ totalTokens,
1920
+ warnings,
1921
+ diagnostics,
1922
+ composedAt,
1923
+ }, 120);
1924
+ }
1925
+ catch {
1926
+ // Window cache write is best-effort
1927
+ }
1928
+ if (composedActiveTopicId) {
1929
+ try {
1930
+ await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
1931
+ }
1932
+ catch {
1933
+ // Topic window write is best-effort
1934
+ }
1935
+ }
1936
+ // ─── Write Session Cursor ─────────────────────────────────
1937
+ // Record the newest message included in the submission window.
1938
+ // Background indexer uses this to find unprocessed high-signal content.
1939
+ if (request.includeHistory !== false && slots.history > 0) {
1940
+ try {
1941
+ const historyMsgs = messages.filter(m => m.role !== 'system');
1942
+ const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
1943
+ if (lastHistoryMsg) {
1944
+ const sm = lastHistoryMsg;
1945
+ if (sm.id != null && sm.messageIndex != null) {
1946
+ const cursor = {
1947
+ lastSentId: sm.id,
1948
+ lastSentIndex: sm.messageIndex,
1949
+ lastSentAt: composedAt,
1950
+ windowSize: historyMsgs.length,
1951
+ tokenCount: totalTokens,
1952
+ };
1953
+ await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
1954
+ try {
1955
+ db.prepare(`
1956
+ UPDATE conversations
1957
+ SET cursor_last_sent_id = ?,
1958
+ cursor_last_sent_index = ?,
1959
+ cursor_last_sent_at = ?,
1960
+ cursor_window_size = ?,
1961
+ cursor_token_count = ?
1962
+ WHERE session_key = ?
1963
+ `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
1964
+ }
1965
+ catch {
1966
+ // SQLite cursor write is best-effort, don't block compose
1967
+ }
1968
+ }
1969
+ }
1970
+ }
1971
+ catch {
1972
+ // Cursor write is best-effort
1973
+ }
1974
+ }
1756
1975
  console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones}`);
1757
1976
  return {
1758
1977
  messages: outputMessages,
@@ -1774,6 +1993,27 @@ export class Compositor {
1774
1993
  const conversation = store.getConversation(sessionKey);
1775
1994
  if (!conversation)
1776
1995
  return;
1996
+ // Phase 3 (Turn DAG): resolve active context for DAG-native warm preload.
1997
+ // Uses context.head_message_id to walk only the active branch.
1998
+ let activeContext = null;
1999
+ try {
2000
+ activeContext = getActiveContext(db, agentId, sessionKey);
2001
+ }
2002
+ catch {
2003
+ // Context resolution is best-effort
2004
+ }
2005
+ // Phase 0 fence enforcement: resolve compaction fence for warm bootstrap.
2006
+ // Fence remains as transitional safety — primary scoping is via DAG walk.
2007
+ let warmFenceMessageId;
2008
+ try {
2009
+ ensureCompactionFenceSchema(db);
2010
+ const fence = getCompactionFence(db, conversation.id);
2011
+ if (fence)
2012
+ warmFenceMessageId = fence.fenceMessageId;
2013
+ }
2014
+ catch {
2015
+ // Fence lookup is best-effort
2016
+ }
1777
2017
  // Fetch a generous pool from SQLite, apply gradient transform, then
1778
2018
  // token-budget-cap the warm set. This replaces the old WARM_BOOTSTRAP_CAP
1779
2019
  // message-count constant which was a blunt instrument — 100 messages of
@@ -1781,9 +2021,21 @@ export class Compositor {
1781
2021
  // Warm budget uses the same reserve fraction as compose() so warm history
1782
2022
  // never pre-fills more than compose() would actually allow.
1783
2023
  const reserve = this.config.contextWindowReserve ?? 0.15;
1784
- const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve);
2024
+ const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve, this.config.budgetFraction);
1785
2025
  const warmBudget = Math.floor(effectiveBudget * (this.config.warmHistoryBudgetFraction ?? 0.4));
1786
- const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
2026
+ // Phase 3 (Turn DAG): prefer DAG walk from context head for warm preload.
2027
+ // This ensures only active-branch messages enter the warm cache.
2028
+ let rawHistory;
2029
+ if (activeContext?.headMessageId) {
2030
+ rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, this.config.maxHistoryMessages);
2031
+ // DAG walk may return empty for legacy data — fall back to fence-scoped query
2032
+ if (rawHistory.length === 0) {
2033
+ rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, warmFenceMessageId);
2034
+ }
2035
+ }
2036
+ else {
2037
+ rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, warmFenceMessageId);
2038
+ }
1787
2039
  const transformedForWarm = applyToolGradient(rawHistory, {
1788
2040
  totalWindowTokens: resolveModelWindow(opts?.model, this.config.defaultTokenBudget),
1789
2041
  });
@@ -1808,6 +2060,10 @@ export class Compositor {
1808
2060
  // compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
1809
2061
  // from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
1810
2062
  // Caching them here would create stale entries that compose() ignores anyway.
2063
+ // Invalidate the window cache so the next compose rebuilds with the fresh
2064
+ // system/identity slots. Without this, the fast-exit returns a stale bundle
2065
+ // that predates the warm and reports identity=0.
2066
+ await this.cache.invalidateWindow(agentId, sessionKey);
1811
2067
  await this.cache.warmSession(agentId, sessionKey, {
1812
2068
  system: opts?.systemPrompt,
1813
2069
  identity: opts?.identity,
@@ -1829,7 +2085,36 @@ export class Compositor {
1829
2085
  const conversation = store.getConversation(sessionKey);
1830
2086
  if (!conversation)
1831
2087
  return;
1832
- const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
2088
+ // Phase 3 (Turn DAG): resolve active context for DAG-native gradient refresh
2089
+ let activeContext = null;
2090
+ try {
2091
+ activeContext = getActiveContext(db, agentId, sessionKey);
2092
+ }
2093
+ catch {
2094
+ // Context resolution is best-effort
2095
+ }
2096
+ // Phase 0 fence enforcement for gradient refresh (transitional safety)
2097
+ let gradientFenceMessageId;
2098
+ try {
2099
+ ensureCompactionFenceSchema(db);
2100
+ const fence = getCompactionFence(db, conversation.id);
2101
+ if (fence)
2102
+ gradientFenceMessageId = fence.fenceMessageId;
2103
+ }
2104
+ catch {
2105
+ // Fence lookup is best-effort
2106
+ }
2107
+ // Phase 3: prefer DAG walk from context head
2108
+ let rawHistory;
2109
+ if (activeContext?.headMessageId) {
2110
+ rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, this.config.maxHistoryMessages);
2111
+ if (rawHistory.length === 0) {
2112
+ rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, gradientFenceMessageId);
2113
+ }
2114
+ }
2115
+ else {
2116
+ rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, gradientFenceMessageId);
2117
+ }
1833
2118
  const transformedHistory = applyToolGradient(rawHistory, {
1834
2119
  totalWindowTokens: tokenBudget && tokenBudget > 0
1835
2120
  ? Math.max(tokenBudget, Math.floor(tokenBudget / 0.80))
@@ -1890,21 +2175,30 @@ export class Compositor {
1890
2175
  * The Redis path is unaffected — Redis doesn't index by topic, so topic
1891
2176
  * filtering only applies to the SQLite fallback.
1892
2177
  */
1893
- async getHistory(agentId, sessionKey, limit, store, topicId) {
2178
+ async getHistory(agentId, sessionKey, limit, store, topicId, fenceMessageId, activeContext) {
1894
2179
  // Pass limit through to Redis — this is the correct enforcement point.
1895
2180
  // Previously getHistory() ignored the limit on the Redis path (LRANGE 0 -1),
1896
2181
  // meaning historyDepth in the compose request had no effect on hot sessions.
1897
2182
  const cached = await this.cache.getHistory(agentId, sessionKey, limit);
1898
2183
  if (cached.length > 0)
1899
2184
  return cached;
2185
+ // Phase 3 (Turn DAG): walk from context.head_message_id backward through
2186
+ // parent_id links. This is the primary correctness mechanism — the fence
2187
+ // remains as transitional safety only.
2188
+ if (activeContext?.headMessageId) {
2189
+ const dagMessages = store.getHistoryByDAGWalk(activeContext.headMessageId, limit);
2190
+ if (dagMessages.length > 0)
2191
+ return dagMessages;
2192
+ // DAG walk returned empty (e.g., legacy data without parent chains) — fall through
2193
+ }
1900
2194
  const conversation = store.getConversation(sessionKey);
1901
2195
  if (!conversation)
1902
2196
  return [];
1903
2197
  if (topicId) {
1904
2198
  // P3.4: Option B — active topic messages + legacy NULL messages
1905
- return store.getRecentMessagesByTopic(conversation.id, topicId, limit);
2199
+ return store.getRecentMessagesByTopic(conversation.id, topicId, limit, fenceMessageId);
1906
2200
  }
1907
- return store.getRecentMessages(conversation.id, limit);
2201
+ return store.getRecentMessages(conversation.id, limit, fenceMessageId);
1908
2202
  }
1909
2203
  // ─── L4 Library Builders ─────────────────────────────────────
1910
2204
  /**
@@ -2048,10 +2342,15 @@ export class Compositor {
2048
2342
  * @param precomputedEmbedding — optional pre-computed embedding for the query.
2049
2343
  * When provided, the Ollama call inside VectorStore.search() is skipped.
2050
2344
  */
2051
- async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding) {
2345
+ async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints // C2: skip results already in Active Facts
2346
+ ) {
2052
2347
  const libDb = libraryDb || this.libraryDb;
2053
2348
  if (!libDb && !this.vectorStore)
2054
2349
  return null;
2350
+ // Inline fingerprint helper (mirrors compose-scope version; C2 dedup only used here)
2351
+ const fpCheck = existingFingerprints
2352
+ ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
2353
+ : () => false;
2055
2354
  // Use hybrid search when library DB is available
2056
2355
  if (libDb) {
2057
2356
  const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
@@ -2108,6 +2407,10 @@ export class Compositor {
2108
2407
  // (score >= 0.04) for episodes to make it into assembled context.
2109
2408
  if (result.sourceTable === 'episodes' && result.score < 0.04)
2110
2409
  continue;
2410
+ // C2: Skip results whose content is already fingerprinted (e.g. in Active Facts)
2411
+ // Dedup count is not tracked separately here — compose-level counter covers the other paths.
2412
+ if (fpCheck(result.content))
2413
+ continue;
2111
2414
  const label = this.formatHybridResult(result);
2112
2415
  const lineTokens = estimateTokens(label);
2113
2416
  if (tokens + lineTokens > maxTokens)
@@ -2181,7 +2484,11 @@ export class Compositor {
2181
2484
  * Build cross-session context by finding recent activity
2182
2485
  * in other sessions for this agent.
2183
2486
  */
2184
- buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb) {
2487
+ // TODO Phase 1: buildCrossSessionContext queries OTHER conversations. Each has its
2488
+ // own compaction fence. Per-conversation fence filtering should be added here so
2489
+ // zombie messages from other sessions don't leak into cross-session context.
2490
+ buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb, existingFingerprints // C3: skip entries already in facts/semantic recall
2491
+ ) {
2185
2492
  const conversation = db.prepare('SELECT id FROM conversations WHERE session_key = ?').get(currentSessionKey);
2186
2493
  if (!conversation)
2187
2494
  return null;
@@ -2199,11 +2506,18 @@ export class Compositor {
2199
2506
  `).all(agentId, conversation.id);
2200
2507
  if (rows.length === 0)
2201
2508
  return null;
2202
- const lines = rows.map(r => {
2509
+ const fpCheck = existingFingerprints
2510
+ ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
2511
+ : () => false;
2512
+ const lines = [];
2513
+ for (const r of rows) {
2514
+ // C3: Skip cross-session entries whose content fingerprint already appears in context
2515
+ if (fpCheck(r.text_content))
2516
+ continue;
2203
2517
  const preview = r.text_content.substring(0, 200);
2204
- return `- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`;
2205
- });
2206
- return lines.join('\n');
2518
+ lines.push(`- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`);
2519
+ }
2520
+ return lines.length > 0 ? lines.join('\n') : null;
2207
2521
  }
2208
2522
  // ─── Utilities ───────────────────────────────────────────────
2209
2523
  /**
@@ -2244,7 +2558,7 @@ export class Compositor {
2244
2558
  * Returns null if keystones cannot be injected (no cutoff ID found,
2245
2559
  * no candidates, or all errors).
2246
2560
  */
2247
- async buildKeystones(db, agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, prompt, libraryDb) {
2561
+ async buildKeystones(db, agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, prompt, libraryDb, fenceMessageId, activeContext) {
2248
2562
  const keystoneBudget = Math.floor(historyTokens * keystoneFraction);
2249
2563
  if (keystoneBudget <= 0)
2250
2564
  return null;
@@ -2300,6 +2614,14 @@ export class Compositor {
2300
2614
  // Episodes query is best-effort
2301
2615
  }
2302
2616
  }
2617
+ const fenceClause = fenceMessageId != null ? 'AND m.id >= ?' : '';
2618
+ // Phase 3 (Turn DAG): prefer context_id scoping over conversation_id+fence
2619
+ const contextClause = activeContext ? 'AND m.context_id = ?' : '';
2620
+ const baseParams = [conversationId, cutoffId];
2621
+ if (fenceMessageId != null)
2622
+ baseParams.push(fenceMessageId);
2623
+ if (activeContext)
2624
+ baseParams.push(activeContext.id);
2303
2625
  const baseQuery = `
2304
2626
  SELECT
2305
2627
  m.id,
@@ -2310,6 +2632,8 @@ export class Compositor {
2310
2632
  FROM messages m
2311
2633
  WHERE m.conversation_id = ?
2312
2634
  AND m.id < ?
2635
+ ${fenceClause}
2636
+ ${contextClause}
2313
2637
  AND m.text_content IS NOT NULL
2314
2638
  AND m.is_heartbeat = 0
2315
2639
  AND m.text_content != ''
@@ -2324,6 +2648,12 @@ export class Compositor {
2324
2648
  .join(' OR ');
2325
2649
  if (ftsTerms) {
2326
2650
  try {
2651
+ const ftsParams = [conversationId, cutoffId];
2652
+ if (fenceMessageId != null)
2653
+ ftsParams.push(fenceMessageId);
2654
+ if (activeContext)
2655
+ ftsParams.push(activeContext.id);
2656
+ ftsParams.push(ftsTerms);
2327
2657
  candidateRows = db.prepare(`
2328
2658
  SELECT
2329
2659
  m.id,
@@ -2334,6 +2664,8 @@ export class Compositor {
2334
2664
  FROM messages m
2335
2665
  WHERE m.conversation_id = ?
2336
2666
  AND m.id < ?
2667
+ ${fenceClause}
2668
+ ${contextClause}
2337
2669
  AND m.text_content IS NOT NULL
2338
2670
  AND m.is_heartbeat = 0
2339
2671
  AND m.text_content != ''
@@ -2343,19 +2675,19 @@ export class Compositor {
2343
2675
  LIMIT 100
2344
2676
  )
2345
2677
  LIMIT 200
2346
- `).all(conversationId, cutoffId, ftsTerms);
2678
+ `).all(...ftsParams);
2347
2679
  }
2348
2680
  catch {
2349
2681
  // FTS query may fail on special characters — fall back to base query
2350
- candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
2682
+ candidateRows = db.prepare(baseQuery).all(...baseParams);
2351
2683
  }
2352
2684
  }
2353
2685
  else {
2354
- candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
2686
+ candidateRows = db.prepare(baseQuery).all(...baseParams);
2355
2687
  }
2356
2688
  }
2357
2689
  else {
2358
- candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
2690
+ candidateRows = db.prepare(baseQuery).all(...baseParams);
2359
2691
  }
2360
2692
  if (candidateRows.length === 0)
2361
2693
  return null;
@@ -2437,7 +2769,7 @@ export class Compositor {
2437
2769
  * @param maxKeystones - Max cross-topic keystones to return (default 3)
2438
2770
  * @returns Scored keystones sorted by score DESC, deduplicated by message id
2439
2771
  */
2440
- async getKeystonesByTopic(agentId, sessionKey, activeTopic, currentMessages, db, maxKeystones = 3) {
2772
+ async getKeystonesByTopic(agentId, sessionKey, activeTopic, currentMessages, db, maxKeystones = 3, fenceMessageId, activeContext) {
2441
2773
  const otherTopics = db.prepare(`
2442
2774
  SELECT id, name
2443
2775
  FROM topics
@@ -2458,6 +2790,14 @@ export class Compositor {
2458
2790
  for (const topic of otherTopics) {
2459
2791
  let topicMessages;
2460
2792
  try {
2793
+ const topicFenceClause = fenceMessageId != null ? 'AND m.id >= ?' : '';
2794
+ // Phase 3 (Turn DAG): constrain cross-topic queries to active context_id
2795
+ const topicContextClause = activeContext ? 'AND m.context_id = ?' : '';
2796
+ const topicParams = [sessionKey, agentId, topic.id];
2797
+ if (fenceMessageId != null)
2798
+ topicParams.push(fenceMessageId);
2799
+ if (activeContext)
2800
+ topicParams.push(activeContext.id);
2461
2801
  topicMessages = db.prepare(`
2462
2802
  SELECT m.id, m.message_index, m.role, m.text_content, m.created_at
2463
2803
  FROM messages m
@@ -2465,12 +2805,14 @@ export class Compositor {
2465
2805
  WHERE c.session_key = ?
2466
2806
  AND c.agent_id = ?
2467
2807
  AND m.topic_id = ?
2808
+ ${topicFenceClause}
2809
+ ${topicContextClause}
2468
2810
  AND m.text_content IS NOT NULL
2469
2811
  AND m.text_content != ''
2470
2812
  AND m.is_heartbeat = 0
2471
2813
  ORDER BY m.message_index DESC
2472
2814
  LIMIT 50
2473
- `).all(sessionKey, agentId, topic.id);
2815
+ `).all(...topicParams);
2474
2816
  }
2475
2817
  catch {
2476
2818
  // Corrupt topic data — skip this topic, never throw