npm - @psiclawops/hypermem - Versions diffs - 0.5.4 → 0.5.6 - Mend

@psiclawops/hypermem 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +143 -54
package/dist/background-indexer.d.ts +18 -0
package/dist/background-indexer.d.ts.map +1 -1
package/dist/background-indexer.js +122 -19
package/dist/cache.d.ts +24 -1
package/dist/cache.d.ts.map +1 -1
package/dist/cache.js +61 -1
package/dist/compositor.d.ts +6 -0
package/dist/compositor.d.ts.map +1 -1
package/dist/compositor.js +331 -116
package/dist/cross-agent.js +18 -18
package/dist/dreaming-promoter.d.ts +1 -1
package/dist/dreaming-promoter.js +1 -1
package/dist/index.d.ts +3 -3
package/dist/index.d.ts.map +1 -1
package/dist/index.js +3 -3
package/dist/knowledge-lint.js +4 -4
package/dist/profiles.d.ts +1 -1
package/dist/profiles.d.ts.map +1 -1
package/dist/profiles.js +64 -37
package/dist/seed.d.ts +1 -1
package/dist/seed.js +1 -1
package/dist/session-flusher.d.ts +2 -2
package/dist/session-flusher.js +2 -2
package/dist/spawn-context.d.ts +1 -1
package/dist/spawn-context.js +1 -1
package/dist/topic-synthesizer.d.ts.map +1 -1
package/dist/topic-synthesizer.js +4 -3
package/dist/trigger-registry.d.ts +1 -1
package/dist/trigger-registry.js +4 -4
package/dist/types.d.ts +65 -31
package/dist/types.d.ts.map +1 -1
package/dist/vector-store.d.ts.map +1 -1
package/dist/vector-store.js +8 -0
package/dist/version.d.ts +3 -3
package/dist/version.js +3 -3
package/package.json +3 -2

package/dist/compositor.js CHANGED Viewed

@@ -24,6 +24,15 @@ import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOu
 import { KnowledgeStore } from './knowledge-store.js';
 import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
 import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
+/**
+ * Files that OpenClaw's contextInjection injects into the system prompt.
+ * HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
+ * Exported so plugin and other consumers can share the same dedup set.
+ */
+export const OPENCLAW_BOOTSTRAP_FILES = new Set([
+    'SOUL.md', 'IDENTITY.md', 'USER.md', 'TOOLS.md',
+    'AGENTS.md', 'HEARTBEAT.md', 'MEMORY.md', 'BOOTSTRAP.md',
+]);
 /**
  * Model context window sizes by provider/model string (or partial match).
  * Used as fallback when tokenBudget is not passed by the runtime.
@@ -65,7 +74,26 @@ const MODEL_CONTEXT_WINDOWS = [
  * Default reserve: 25% (leaves 75% for input context).
  * Falls back to defaultTokenBudget if no model match.
  */
-function resolveModelBudget(model, defaultBudget, reserve = 0.15) {
+/**
+ * Resolve effective input token budget for a model.
+ *
+ * Priority:
+ * 1. If budgetFraction is set AND model window is detected: window × budgetFraction × (1 - reserve)
+ * 2. If model window detected but no budgetFraction: window × (1 - reserve)
+ * 3. Fallback to defaultTokenBudget (absolute number)
+ */
+function resolveModelBudget(model, defaultBudget, reserve = 0.15, budgetFraction) {
+    const window = resolveModelWindow(model, defaultBudget);
+    // If we detected an actual model window (not the fallback derivation)
+    if (model && budgetFraction != null) {
+        const normalized = model.toLowerCase();
+        for (const entry of MODEL_CONTEXT_WINDOWS) {
+            if (normalized.includes(entry.pattern)) {
+                return Math.floor(entry.tokens * budgetFraction * (1 - reserve));
+            }
+        }
+    }
+    // Original path: detected window × (1 - reserve), or absolute fallback
     if (!model)
         return defaultBudget;
     const normalized = model.toLowerCase();
@@ -102,12 +130,19 @@ function resolveModelWindow(model, defaultBudget) {
  * emit a warning or trigger checkpointing.
  */
 function computeDynamicReserve(recentMessages, totalWindow, config) {
-    const base = config.contextWindowReserve ?? 0.15;
+    const base = config.reserveFraction ?? config.contextWindowReserve ?? 0.25;
     const horizon = config.dynamicReserveTurnHorizon ?? 5;
     const max = config.dynamicReserveMax ?? 0.50;
     const enabled = config.dynamicReserveEnabled ?? true;
-    if (!enabled || recentMessages.length === 0 || totalWindow <= 0) {
-        return { reserve: base, avgTurnCost: 0, dynamic: false, pressureHigh: false };
+    // Cold sessions (no message history) use a minimal floor so the full window
+    // stays available. The static reserveFraction applies only once the session
+    // has messages and dynamic sampling can compute a meaningful estimate.
+    const COLD_SESSION_FLOOR = 0.15;
+    if (!enabled || totalWindow <= 0) {
+        return { reserve: COLD_SESSION_FLOOR, avgTurnCost: 0, dynamic: false, pressureHigh: false };
+    }
+    if (recentMessages.length === 0) {
+        return { reserve: COLD_SESSION_FLOOR, avgTurnCost: 0, dynamic: false, pressureHigh: false };
     }
     // Sample the last 20 user+assistant messages for turn cost estimation.
     // Tool messages are excluded — they're already compressed by the gradient
@@ -131,17 +166,26 @@ function computeDynamicReserve(recentMessages, totalWindow, config) {
     return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
 }
 const DEFAULT_CONFIG = {
+    // Primary budget controls
+    budgetFraction: 0.703,
+    reserveFraction: 0.25,
+    historyFraction: 0.40,
+    memoryFraction: 0.40,
+    // Absolute fallback
     defaultTokenBudget: 90000,
+    // History internals
     maxHistoryMessages: 250,
-    maxFacts: 28,
-    maxCrossSessionContext: 6000,
-    maxRecentToolPairs: 3,
-    maxProseToolPairs: 10,
     warmHistoryBudgetFraction: 0.4,
     keystoneHistoryFraction: 0.2,
     keystoneMaxMessages: 15,
     keystoneMinSignificance: 0.5,
-    contextWindowReserve: 0.15,
+    // Memory internals
+    maxFacts: 28,
+    maxCrossSessionContext: 6000,
+    // Tool gradient (internal)
+    maxRecentToolPairs: 3,
+    maxProseToolPairs: 10,
+    // Dynamic reserve
     dynamicReserveTurnHorizon: 5,
     dynamicReserveMax: 0.50,
     dynamicReserveEnabled: true,
@@ -328,7 +372,7 @@ function stripSecurityPreamble(content) {
     return stripped.trim().length > 20 ? stripped.trim() : content;
 }
 // Minimum floor: if trimming would leave less than 30% of original content, return a
-// stripped sentinel instead of a misleading fragment. A partial result that looks
+// stripped dave instead of a misleading fragment. A partial result that looks
 // complete is worse than a clear signal that the result was dropped.
 // Applied only in applyTierPayloadCap (pressure-driven trimming), not in structural
 // truncation paths where head+tail is always semantically useful.
@@ -598,9 +642,9 @@ function applyTierPayloadCap(msg, perResultCap, perTurnCap, usedSoFar = 0, maxTa
             // render the truncated result as: [security notice] + [middle marker] + [last line].
             const stripped = stripSecurityPreamble(content);
             // Floor check (TUNE-015): if the cap would leave less than 30% of the stripped content
-            // AND less than 2000 chars absolute, return a sentinel instead of a misleading fragment.
+            // AND less than 2000 chars absolute, return a dave instead of a misleading fragment.
             // Partial results that look complete are worse than a clear dropped-result signal.
-            // The absolute floor prevents the sentinel from firing on large natural truncations
+            // The absolute floor prevents the dave from firing on large natural truncations
             // (e.g., 110k → 16k is a meaningful slice, not a misleading fragment).
             if (perResultCap < stripped.length * TOOL_GRADIENT_MIN_USEFUL_FRACTION && perResultCap < 2_000) {
                 content = `[result too large for current context budget \u2014 ${stripped.length} chars stripped]`;
@@ -784,6 +828,73 @@ export class Compositor {
     async compose(request, db, libraryDb) {
         const store = new MessageStore(db);
         const libDb = libraryDb || this.libraryDb;
+        const toComposeOutputMessages = (inputMessages) => {
+            // When skipProviderTranslation is set, compose returns the neutral window
+            // typed as ProviderMessage[] by contract. The runtime translates later.
+            return request.skipProviderTranslation
+                ? inputMessages
+                : toProviderFormat(inputMessages, request.provider ?? request.model ?? null);
+        };
+        // ── C4: Window cache fast-exit ────────────────────────────
+        // If nothing has changed since the last compose (cursor.lastSentId >= newest
+        // message id in the DB), skip the full pipeline and return the cached window.
+        // Particularly effective for low-frequency sessions (heartbeat agents, council
+        // seats between rounds). TTL on the cache write remains 120s — this is a
+        // conservative early-exit before the TTL expires, not a TTL extension.
+        if (request.includeHistory !== false && request.skipWindowCache !== true) {
+            try {
+                const newestRow = db.prepare('SELECT MAX(id) AS maxId FROM messages WHERE agent_id = ?').get(request.agentId);
+                const newestMsgId = newestRow?.maxId;
+                if (newestMsgId != null) {
+                    const cachedBundle = await this.cache.getFreshWindowBundle(request.agentId, request.sessionKey, newestMsgId);
+                    if (cachedBundle) {
+                        // Validate the cached bundle is compatible with this request.
+                        // A mismatch on any of these means we must do a full compose:
+                        //   - tokenBudget: cached total exceeds the requested cap
+                        //   - slot flags: caller disabled slots that the cache populated
+                        //   - historyDepth: caller wants fewer messages than the cache holds
+                        const cachedTotal = cachedBundle.meta.totalTokens;
+                        const budgetOk = !request.tokenBudget ||
+                            cachedTotal <= request.tokenBudget * 1.05;
+                        const factsOk = request.includeFacts !== false ||
+                            (cachedBundle.meta.slots['facts'] ?? 0) === 0;
+                        const libraryOk = request.includeLibrary !== false ||
+                            (cachedBundle.meta.slots['library'] ?? 0) === 0;
+                        const contextOk = request.includeContext !== false ||
+                            (cachedBundle.meta.slots['context'] ?? 0) === 0;
+                        // historyDepth constrains how many messages the caller wants;
+                        // we can't slice a cached bundle safely, so skip cache.
+                        const depthOk = !request.historyDepth;
+                        if (budgetOk && factsOk && libraryOk && contextOk && depthOk) {
+                            const cachedSlots = {
+                                system: cachedBundle.meta.slots['system'] ?? 0,
+                                identity: cachedBundle.meta.slots['identity'] ?? 0,
+                                history: cachedBundle.meta.slots['history'] ?? 0,
+                                facts: cachedBundle.meta.slots['facts'] ?? 0,
+                                context: cachedBundle.meta.slots['context'] ?? 0,
+                                library: cachedBundle.meta.slots['library'] ?? 0,
+                            };
+                            return {
+                                messages: toComposeOutputMessages(cachedBundle.messages),
+                                tokenCount: cachedBundle.meta.totalTokens,
+                                slots: cachedSlots,
+                                truncated: false,
+                                hasWarnings: cachedBundle.meta.warnings.length > 0,
+                                warnings: cachedBundle.meta.warnings,
+                                diagnostics: {
+                                    ...cachedBundle.meta.diagnostics,
+                                    windowCacheHit: true,
+                                },
+                            };
+                        }
+                        // Incompatible request — fall through to full compose
+                    }
+                }
+            }
+            catch {
+                // Cache fast-exit is best-effort, fall through to full compose
+            }
+        }
         // Dynamic reserve: use a lightweight SQLite sample to estimate avg turn cost
         // BEFORE assembling the full context. This gives us the reserve fraction we
         // need to compute the effective token budget at the start of compose.
@@ -794,7 +905,7 @@ export class Compositor {
             ? store.getRecentMessages(sampleConv.id, 40)
             : [];
         const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
-        const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve);
+        const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve, this.config.budgetFraction);
         let remaining = budget;
         const warnings = [];
         const slots = {
@@ -838,7 +949,7 @@ export class Compositor {
         if (remaining > 100 && request.includeLibrary !== false) {
             const fosEnabled = this.config?.enableFOS !== false;
             const modEnabled = this.config?.enableMOD !== false;
-            const outputTier = resolveOutputTier((this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
+            const outputTier = resolveOutputTier((this.config?.hyperformProfile ?? this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
             const stableOutputParts = [];
             if (outputTier.tier === 'light') {
                 stableOutputParts.push(renderLightFOS().join('\n'));
@@ -945,9 +1056,16 @@ export class Compositor {
             const budgetClusters = clusterNeutralMessages(evictedHistory);
             let historyTokens = 0;
             const includedClusters = [];
+            // Pre-allocate history budget. historyFraction is a fraction of the
+            // effective token budget (post-reserve). Falls back to unbounded fill
+            // (remaining) when historyFraction is not set.
+            const historyBudget = this.config.historyFraction != null
+                ? Math.floor(budget * this.config.historyFraction)
+                : remaining;
+            const historyFillCap = Math.min(historyBudget, remaining);
             for (let i = budgetClusters.length - 1; i >= 0; i--) {
                 const cluster = budgetClusters[i];
-                if (historyTokens + cluster.tokenCost > remaining && includedClusters.length > 0) {
+                if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
                     const droppedMsgCount = budgetClusters.slice(0, i + 1).reduce((s, c) => s + c.messages.length, 0);
                     warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)`);
                     break;
@@ -1059,13 +1177,23 @@ export class Compositor {
                 slots.history = historyTokens;
                 remaining -= historyTokens;
             }
-            // targetBudgetFraction cap: limit total context slots to a fraction of the
-            // effective budget. This gives operators a single knob to make the system
-            // lighter without tuning individual slot fractions.
-            const targetFraction = this.config.targetBudgetFraction ?? 0.65;
-            const contextCap = Math.floor(budget * targetFraction);
-            if (remaining > contextCap) {
-                remaining = contextCap;
+            // Memory budget pool: facts, wiki, semantic recall, cross-session, and
+            // trigger-fired doc chunks all draw from this shared pool via `remaining`.
+            // memoryFraction is a fraction of the effective token budget (post-reserve).
+            // Falls back to targetBudgetFraction cap behavior when memoryFraction is not set.
+            let memoryBudget;
+            if (this.config.memoryFraction != null) {
+                memoryBudget = Math.floor(budget * this.config.memoryFraction);
+                if (remaining > memoryBudget) {
+                    remaining = memoryBudget;
+                }
+            }
+            else {
+                const targetFraction = this.config.targetBudgetFraction ?? 0.65;
+                memoryBudget = Math.floor(budget * targetFraction);
+                if (remaining > memoryBudget) {
+                    remaining = memoryBudget;
+                }
             }
             // T1.3: Ghost message suppression.
             // If the last message in the included history is a warm-seeded user message
@@ -1099,6 +1227,13 @@ export class Compositor {
         // conversation history (after system/identity).
         const contextParts = [];
         let contextTokens = 0;
+        // ── C1: Content fingerprint dedup set ────────────────────
+        // Replaces fragile substring-match dedup across temporal, open-domain,
+        // semantic recall, and cross-session paths. O(1) lookup on a normalized
+        // 120-char prefix catches rephrased duplicates the old 60-char includes()
+        // match missed without needing a hash.
+        const contextFingerprints = new Set();
+        const fingerprintEntries = new Map();
         // ── Compose-level diagnostics tracking vars ──────────────
         let diagTriggerHits = 0;
         let diagTriggerFallbackUsed = false;
@@ -1106,23 +1241,48 @@ export class Compositor {
         let diagSemanticResults = 0;
         let diagDocChunkCollections = 0;
         let diagScopeFiltered = 0;
+        let diagFingerprintDedups = 0;
+        let diagFingerprintCollisions = 0;
         let diagRetrievalMode = 'none';
+        function normalizeFingerprintText(text) {
+            return text.toLowerCase().replace(/\s+/g, ' ').trim();
+        }
+        function contentFingerprint(text) {
+            return normalizeFingerprintText(text).slice(0, 120);
+        }
+        function addFingerprint(text) {
+            const normalized = normalizeFingerprintText(text);
+            const fingerprint = normalized.slice(0, 120);
+            contextFingerprints.add(fingerprint);
+            const entries = fingerprintEntries.get(fingerprint) ?? new Set();
+            entries.add(normalized);
+            fingerprintEntries.set(fingerprint, entries);
+        }
+        function isDuplicate(text) {
+            const normalized = normalizeFingerprintText(text);
+            const fingerprint = normalized.slice(0, 120);
+            if (!contextFingerprints.has(fingerprint))
+                return false;
+            const entries = fingerprintEntries.get(fingerprint);
+            if (entries && !entries.has(normalized))
+                diagFingerprintCollisions += 1;
+            return true;
+        }
         // ── Wiki Page (L4: Library — active topic synthesis) ──────
         // Inject synthesized wiki page for the active topic before general knowledge.
-        // Token budget: capped at 15% of remaining.
+        // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
         if (request.includeLibrary !== false && remaining > 300 && libDb && composedActiveTopicName) {
             const wikiContent = this.buildWikiPageContext(request.agentId, composedActiveTopicName, libDb);
             if (wikiContent) {
                 const tokens = estimateTokens(wikiContent);
-                const cap = Math.floor(remaining * 0.15);
-                if (tokens <= cap) {
+                if (tokens <= remaining) {
                     contextParts.push(wikiContent);
                     contextTokens += tokens;
                     remaining -= tokens;
                     slots.library += tokens;
                 }
-                else {
-                    const truncated = this.truncateToTokens(wikiContent, cap);
+                else if (remaining > 200) {
+                    const truncated = this.truncateToTokens(wikiContent, remaining);
                     const truncTokens = estimateTokens(truncated);
                     contextParts.push(truncated);
                     contextTokens += truncTokens;
@@ -1133,6 +1293,7 @@ export class Compositor {
         }
         // ── Facts (L4: Library) ──────────────────────────────────
         // scope: agent — filtered by agentId via filterByScope after fetch
+        // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
         if (request.includeFacts !== false && remaining > 500) {
             const factsContent = this.buildFactsFromDb(request.agentId, request.sessionKey, libDb || db);
             if (factsContent !== null) {
@@ -1141,21 +1302,27 @@ export class Compositor {
                 diagScopeFiltered += scopeFiltered;
                 if (content) {
                     const tokens = estimateTokens(content);
-                    if (tokens <= remaining * 0.25) { // Cap facts at 25% of remaining (W4: was 0.3)
+                    if (tokens <= remaining) {
                         contextParts.push(`## Active Facts\n${content}`);
                         contextTokens += tokens;
                         remaining -= tokens;
                         slots.facts = tokens;
                     }
-                    else {
-                        // Truncate to budget
-                        const truncated = this.truncateToTokens(content, Math.floor(remaining * 0.25));
+                    else if (remaining > 200) {
+                        const truncated = this.truncateToTokens(content, remaining);
                         const truncTokens = estimateTokens(truncated);
                         contextParts.push(`## Active Facts (truncated)\n${truncated}`);
                         contextTokens += truncTokens;
                         remaining -= truncTokens;
                         slots.facts = truncTokens;
-                        warnings.push('Facts truncated to fit budget');
+                        warnings.push('Facts truncated to fit memory budget');
+                    }
+                    // C1: Fingerprint each fact line so downstream dedup paths can skip duplicates
+                    const factLines = content.split('\n');
+                    for (const line of factLines) {
+                        if (line.startsWith('- [')) {
+                            addFingerprint(line);
+                        }
                     }
                 }
             }
@@ -1173,14 +1340,17 @@ export class Compositor {
                         order: 'DESC',
                     });
                     if (temporalFacts.length > 0) {
-                        // Deduplicate against facts already in context
-                        const existingContent = contextParts.join('\n');
-                        const novel = temporalFacts.filter(f => !existingContent.includes(f.content.slice(0, 60)));
+                        // C1: Use fingerprint dedup instead of fragile substring match
+                        const beforeCount = temporalFacts.length;
+                        const novel = temporalFacts.filter(f => !isDuplicate(f.content));
+                        diagFingerprintDedups += beforeCount - novel.length;
                         if (novel.length > 0) {
                             const temporalBlock = novel
                                 .map(f => {
                                 const ts = new Date(f.occurredAt).toISOString().slice(0, 10);
-                                return `[${ts}] ${f.content}`;
+                                const line = `[${ts}] ${f.content}`;
+                                addFingerprint(f.content);
+                                return line;
                             })
                                 .join('\n');
                             const temporalSection = `## Temporal Context\n${temporalBlock}`;
@@ -1214,11 +1384,16 @@ export class Compositor {
             // questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
             if (request.includeSemanticRecall !== false && queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
                 try {
-                    const existingContent = contextParts.join('\n');
-                    const odResults = searchOpenDomain(db, queryText, existingContent, 10);
+                    // searchOpenDomain still does intra-result dedup. Existing-context dedup
+                    // now happens here via fingerprints so we keep one dedup path.
+                    const rawOdResults = searchOpenDomain(db, queryText, '', 10);
+                    const beforeOd = rawOdResults.length;
+                    const odResults = rawOdResults.filter(r => !isDuplicate(r.content));
+                    diagFingerprintDedups += beforeOd - odResults.length;
                     if (odResults.length > 0) {
                         const odBlock = odResults
                             .map(r => {
+                            addFingerprint(r.content);
                             const ts = r.createdAt
                                 ? new Date(r.createdAt).toISOString().slice(0, 10)
                                 : '';
@@ -1314,7 +1489,8 @@ export class Compositor {
                         // Redis lookup is best-effort — fall through to Ollama
                     }
                     const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId, Math.floor(remaining * 0.12), // Cap at 12% of remaining (W4: was 0.15)
-                    libDb || undefined, precomputedEmbedding);
+                    libDb || undefined, precomputedEmbedding, contextFingerprints // C2: skip results already in Active Facts
+                    );
                     if (semanticContent) {
                         const tokens = estimateTokens(semanticContent);
                         contextParts.push(`## Related Memory\n${semanticContent}`);
@@ -1388,14 +1564,19 @@ export class Compositor {
                             const bLen = Math.max(...matchedKeywords.filter(kw => b.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(b.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
                             return bLen - aLen; // Most specific match first
                         });
+                        // Sanitize FTS5 terms: quote each word, strip internal quotes, add prefix wildcard.
+                        // Matches the pattern used in the keystone history FTS path.
+                        const sanitizeFtsTerm = (w) => `"${w.replace(/"/g, '')}"*`;
                         const ftsTerms = sortedWords.length > 0
-                            ? sortedWords.slice(0, 6).map(w => `${w}*`).join(' OR ')
+                            ? sortedWords.slice(0, 6).map(sanitizeFtsTerm).join(' OR ')
                             : matchedKeywords
                                 .sort((a, b) => b.length - a.length)
                                 .slice(0, 3)
-                                .map(kw => `${kw}*`)
+                                .map(sanitizeFtsTerm)
                                 .join(' OR ');
-                        const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3).join(' ');
+                        // Fallback uses raw message words — also sanitize to prevent FTS5 syntax errors.
+                        const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3)
+                            .map(sanitizeFtsTerm).join(' OR ');
                         const chunks = docChunkStore.queryChunks({
                             collection: trigger.collection,
                             agentId: request.agentId,
@@ -1410,6 +1591,10 @@ export class Compositor {
                         for (const chunk of chunks) {
                             if (chunkTokens + chunk.tokenEstimate > maxTokens)
                                 break;
+                            // Skip chunks from files OpenClaw already injects into the system prompt
+                            const chunkBasename = chunk.sourcePath.split('/').pop() || '';
+                            if (OPENCLAW_BOOTSTRAP_FILES.has(chunkBasename))
+                                continue;
                             chunkLines.push(`### ${chunk.sectionPath}\n${chunk.content}`);
                             chunkTokens += chunk.tokenEstimate;
                         }
@@ -1438,7 +1623,8 @@ export class Compositor {
                 // If refactored to run both paths, cap combined semantic budget to avoid double-recall.
                 try {
                     const fallbackContent = await Promise.race([
-                        this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined),
+                        this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined, undefined, contextFingerprints // C2: skip results already in Active Facts
+                        ),
                         new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
                     ]);
                     if (fallbackContent) {
@@ -1489,7 +1675,8 @@ export class Compositor {
         }
         // ── Cross-Session Context (L2: Messages) ─────────────────
         if (request.includeContext !== false && remaining > 500) {
-            const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb);
+            const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb, contextFingerprints // C3: skip entries already in facts/semantic recall
+            );
             if (crossSessionContent) {
                 const tokens = estimateTokens(crossSessionContent);
                 const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
@@ -1584,9 +1771,7 @@ export class Compositor {
         // When skipProviderTranslation is set, return NeutralMessages directly.
         // The context engine plugin uses this: the OpenClaw runtime handles its
         // own provider translation, so double-translating corrupts tool calls.
-        const outputMessages = request.skipProviderTranslation
-            ? messages
-            : toProviderFormat(messages, request.provider ?? request.model ?? null);
+        const outputMessages = toComposeOutputMessages(messages);
         // T1.3: Strip warm-replay provenance flags before output.
         // _warmed is an internal tag added by warmSession() to mark messages
         // seeded from SQLite into Redis. It must not leak into provider submissions
@@ -1615,68 +1800,6 @@ export class Compositor {
                 slots.history = (slots.history ?? 0) + delta;
             }
         }
-        // ─── Write Window Cache ─────────────────────────────
-        // Cache the composed message array so the plugin can serve it directly
-        // on the next assemble() call without re-running the full compose pipeline.
-        // Short TTL (120s) — invalidated by afterTurn when new messages arrive.
-        //
-        // VS-1: Dual-write — session-scoped key for backwards compat;
-        // topic-scoped key for per-topic window retrieval when activeTopicId is set.
-        try {
-            await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
-        }
-        catch {
-            // Window cache write is best-effort
-        }
-        // VS-1: Topic-scoped window dual-write
-        if (composedActiveTopicId) {
-            try {
-                await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
-            }
-            catch {
-                // Topic window write is best-effort
-            }
-        }
-        // ─── Write Session Cursor ─────────────────────────────────
-        // Record the newest message included in the submission window.
-        // Background indexer uses this to find unprocessed high-signal content.
-        if (request.includeHistory !== false && slots.history > 0) {
-            try {
-                const historyMsgs = messages.filter(m => m.role !== 'system');
-                const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
-                if (lastHistoryMsg) {
-                    const sm = lastHistoryMsg;
-                    if (sm.id != null && sm.messageIndex != null) {
-                        const cursor = {
-                            lastSentId: sm.id,
-                            lastSentIndex: sm.messageIndex,
-                            lastSentAt: new Date().toISOString(),
-                            windowSize: historyMsgs.length,
-                            tokenCount: totalTokens,
-                        };
-                        await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
-                        // Dual-write cursor to SQLite for durability across Redis eviction (P1.3)
-                        try {
-                            db.prepare(`
-                UPDATE conversations
-                SET cursor_last_sent_id = ?,
-                    cursor_last_sent_index = ?,
-                    cursor_last_sent_at = ?,
-                    cursor_window_size = ?,
-                    cursor_token_count = ?
-                WHERE session_key = ?
-              `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
-                        }
-                        catch {
-                            // SQLite cursor write is best-effort — don't block compose
-                        }
-                    }
-                }
-            }
-            catch {
-                // Cursor write is best-effort
-            }
-        }
         // ─── Compaction Fence Update ──────────────────────────────
         // Record the oldest message ID that the LLM can see in this compose
         // cycle. Everything below this ID becomes eligible for compaction.
@@ -1746,6 +1869,9 @@ export class Compositor {
             avgTurnCostTokens: avgTurnCost,
             dynamicReserveActive: isDynamic,
             sessionPressureHigh: pressureHigh,
+            fingerprintDedups: diagFingerprintDedups,
+            fingerprintCollisions: diagFingerprintCollisions,
+            windowCacheHit: false,
         };
         if (pressureHigh) {
             warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
@@ -1753,6 +1879,74 @@ export class Compositor {
         else if (dynamicReserve > 0.40) {
             console.info(`[hypermem:compositor] dynamic_reserve=${Math.round(dynamicReserve * 100)}% avg_turn_cost=${Math.round(avgTurnCost / 1000)}k horizon=${this.config.dynamicReserveTurnHorizon ?? 5}`);
         }
+        const composedAt = new Date().toISOString();
+        // ─── Write Window Cache ─────────────────────────────
+        // Cache the composed message array so the plugin can serve it directly
+        // on the next assemble() call without re-running the full compose pipeline.
+        // Short TTL (120s). External L4 mutations should set skipWindowCache=true.
+        //
+        // VS-1: Dual-write, session-scoped key for backwards compat;
+        // topic-scoped key for per-topic window retrieval when activeTopicId is set.
+        try {
+            await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
+            await this.cache.setWindowMeta(request.agentId, request.sessionKey, {
+                slots: slots,
+                totalTokens,
+                warnings,
+                diagnostics,
+                composedAt,
+            }, 120);
+        }
+        catch {
+            // Window cache write is best-effort
+        }
+        if (composedActiveTopicId) {
+            try {
+                await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
+            }
+            catch {
+                // Topic window write is best-effort
+            }
+        }
+        // ─── Write Session Cursor ─────────────────────────────────
+        // Record the newest message included in the submission window.
+        // Background indexer uses this to find unprocessed high-signal content.
+        if (request.includeHistory !== false && slots.history > 0) {
+            try {
+                const historyMsgs = messages.filter(m => m.role !== 'system');
+                const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
+                if (lastHistoryMsg) {
+                    const sm = lastHistoryMsg;
+                    if (sm.id != null && sm.messageIndex != null) {
+                        const cursor = {
+                            lastSentId: sm.id,
+                            lastSentIndex: sm.messageIndex,
+                            lastSentAt: composedAt,
+                            windowSize: historyMsgs.length,
+                            tokenCount: totalTokens,
+                        };
+                        await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
+                        try {
+                            db.prepare(`
+                UPDATE conversations
+                SET cursor_last_sent_id = ?,
+                    cursor_last_sent_index = ?,
+                    cursor_last_sent_at = ?,
+                    cursor_window_size = ?,
+                    cursor_token_count = ?
+                WHERE session_key = ?
+              `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
+                        }
+                        catch {
+                            // SQLite cursor write is best-effort, don't block compose
+                        }
+                    }
+                }
+            }
+            catch {
+                // Cursor write is best-effort
+            }
+        }
         console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones}`);
         return {
             messages: outputMessages,
@@ -1781,7 +1975,7 @@ export class Compositor {
         // Warm budget uses the same reserve fraction as compose() so warm history
         // never pre-fills more than compose() would actually allow.
         const reserve = this.config.contextWindowReserve ?? 0.15;
-        const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve);
+        const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve, this.config.budgetFraction);
         const warmBudget = Math.floor(effectiveBudget * (this.config.warmHistoryBudgetFraction ?? 0.4));
         const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
         const transformedForWarm = applyToolGradient(rawHistory, {
@@ -1808,6 +2002,10 @@ export class Compositor {
         // compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
         // from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
         // Caching them here would create stale entries that compose() ignores anyway.
+        // Invalidate the window cache so the next compose rebuilds with the fresh
+        // system/identity slots. Without this, the fast-exit returns a stale bundle
+        // that predates the warm and reports identity=0.
+        await this.cache.invalidateWindow(agentId, sessionKey);
         await this.cache.warmSession(agentId, sessionKey, {
             system: opts?.systemPrompt,
             identity: opts?.identity,
@@ -2048,10 +2246,15 @@ export class Compositor {
      * @param precomputedEmbedding — optional pre-computed embedding for the query.
      *   When provided, the Ollama call inside VectorStore.search() is skipped.
      */
-    async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding) {
+    async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints // C2: skip results already in Active Facts
+    ) {
         const libDb = libraryDb || this.libraryDb;
         if (!libDb && !this.vectorStore)
             return null;
+        // Inline fingerprint helper (mirrors compose-scope version; C2 dedup only used here)
+        const fpCheck = existingFingerprints
+            ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
+            : () => false;
         // Use hybrid search when library DB is available
         if (libDb) {
             const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
@@ -2108,6 +2311,10 @@ export class Compositor {
                 // (score >= 0.04) for episodes to make it into assembled context.
                 if (result.sourceTable === 'episodes' && result.score < 0.04)
                     continue;
+                // C2: Skip results whose content is already fingerprinted (e.g. in Active Facts)
+                // Dedup count is not tracked separately here — compose-level counter covers the other paths.
+                if (fpCheck(result.content))
+                    continue;
                 const label = this.formatHybridResult(result);
                 const lineTokens = estimateTokens(label);
                 if (tokens + lineTokens > maxTokens)
@@ -2181,7 +2388,8 @@ export class Compositor {
      * Build cross-session context by finding recent activity
      * in other sessions for this agent.
      */
-    buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb) {
+    buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb, existingFingerprints // C3: skip entries already in facts/semantic recall
+    ) {
         const conversation = db.prepare('SELECT id FROM conversations WHERE session_key = ?').get(currentSessionKey);
         if (!conversation)
             return null;
@@ -2199,11 +2407,18 @@ export class Compositor {
     `).all(agentId, conversation.id);
         if (rows.length === 0)
             return null;
-        const lines = rows.map(r => {
+        const fpCheck = existingFingerprints
+            ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
+            : () => false;
+        const lines = [];
+        for (const r of rows) {
+            // C3: Skip cross-session entries whose content fingerprint already appears in context
+            if (fpCheck(r.text_content))
+                continue;
             const preview = r.text_content.substring(0, 200);
-            return `- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`;
-        });
-        return lines.join('\n');
+            lines.push(`- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`);
+        }
+        return lines.length > 0 ? lines.join('\n') : null;
     }
     // ─── Utilities ───────────────────────────────────────────────
     /**