npm - @psiclawops/hypermem - Versions diffs - 0.7.0 → 0.8.0 - Mend

@psiclawops/hypermem 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/ARCHITECTURE.md +31 -39
package/README.md +20 -14
package/bin/hypermem-status.mjs +1 -1
package/dist/background-indexer.d.ts +14 -3
package/dist/background-indexer.d.ts.map +1 -1
package/dist/background-indexer.js +135 -27
package/dist/budget-policy.d.ts +22 -0
package/dist/budget-policy.d.ts.map +1 -0
package/dist/budget-policy.js +27 -0
package/dist/cache.d.ts +11 -0
package/dist/cache.d.ts.map +1 -1
package/dist/compositor-utils.d.ts +31 -0
package/dist/compositor-utils.d.ts.map +1 -0
package/dist/compositor-utils.js +47 -0
package/dist/compositor.d.ts +163 -1
package/dist/compositor.d.ts.map +1 -1
package/dist/compositor.js +862 -130
package/dist/content-hash.d.ts +43 -0
package/dist/content-hash.d.ts.map +1 -0
package/dist/content-hash.js +75 -0
package/dist/context-store.d.ts +54 -0
package/dist/context-store.d.ts.map +1 -1
package/dist/context-store.js +102 -0
package/dist/contradiction-audit-store.d.ts +54 -0
package/dist/contradiction-audit-store.d.ts.map +1 -0
package/dist/contradiction-audit-store.js +88 -0
package/dist/contradiction-resolution-policy.d.ts +21 -0
package/dist/contradiction-resolution-policy.d.ts.map +1 -0
package/dist/contradiction-resolution-policy.js +17 -0
package/dist/cross-agent.d.ts +1 -1
package/dist/cross-agent.js +17 -17
package/dist/degradation.d.ts +102 -0
package/dist/degradation.d.ts.map +1 -0
package/dist/degradation.js +141 -0
package/dist/dreaming-promoter.d.ts +39 -1
package/dist/dreaming-promoter.d.ts.map +1 -1
package/dist/dreaming-promoter.js +70 -4
package/dist/index.d.ts +70 -8
package/dist/index.d.ts.map +1 -1
package/dist/index.js +405 -29
package/dist/knowledge-lint.d.ts +2 -0
package/dist/knowledge-lint.d.ts.map +1 -1
package/dist/knowledge-lint.js +40 -1
package/dist/library-schema.d.ts +7 -2
package/dist/library-schema.d.ts.map +1 -1
package/dist/library-schema.js +236 -1
package/dist/message-store.d.ts +64 -1
package/dist/message-store.d.ts.map +1 -1
package/dist/message-store.js +137 -1
package/dist/proactive-pass.d.ts +2 -2
package/dist/proactive-pass.d.ts.map +1 -1
package/dist/proactive-pass.js +66 -12
package/dist/replay-recovery.d.ts +29 -0
package/dist/replay-recovery.d.ts.map +1 -0
package/dist/replay-recovery.js +82 -0
package/dist/reranker.d.ts +95 -0
package/dist/reranker.d.ts.map +1 -0
package/dist/reranker.js +308 -0
package/dist/schema.d.ts +1 -1
package/dist/schema.d.ts.map +1 -1
package/dist/schema.js +46 -1
package/dist/seed.d.ts +1 -1
package/dist/seed.js +1 -1
package/dist/session-flusher.d.ts +4 -4
package/dist/session-flusher.d.ts.map +1 -1
package/dist/session-flusher.js +3 -3
package/dist/spawn-context.d.ts +1 -1
package/dist/spawn-context.js +1 -1
package/dist/tool-artifact-store.d.ts +98 -0
package/dist/tool-artifact-store.d.ts.map +1 -0
package/dist/tool-artifact-store.js +244 -0
package/dist/topic-detector.js +2 -2
package/dist/topic-store.d.ts +6 -0
package/dist/topic-store.d.ts.map +1 -1
package/dist/topic-store.js +39 -0
package/dist/topic-synthesizer.js +1 -1
package/dist/trigger-registry.d.ts +1 -1
package/dist/trigger-registry.js +4 -4
package/dist/types.d.ts +235 -3
package/dist/types.d.ts.map +1 -1
package/dist/vector-store.d.ts +2 -1
package/dist/vector-store.d.ts.map +1 -1
package/dist/vector-store.js +3 -0
package/dist/version.d.ts +10 -10
package/dist/version.d.ts.map +1 -1
package/dist/version.js +10 -10
package/package.json +6 -4

package/dist/compositor.js CHANGED Viewed

@@ -10,6 +10,7 @@
  * Token-budgeted: never exceeds the budget, prioritizes by configured order.
  * Provider-neutral internally, translates at the output boundary.
  */
+import { createHash } from 'node:crypto';
 import { filterByScope } from './retrieval-policy.js';
 import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
 import { MessageStore } from './message-store.js';
@@ -25,6 +26,9 @@ import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOu
 import { KnowledgeStore } from './knowledge-store.js';
 import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
 import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
+import { TRIM_BUDGET_POLICY, resolveTrimBudgets } from './budget-policy.js';
+import { formatToolChainStub, parseToolChainStub, formatArtifactRef, isArtifactRef } from './degradation.js';
+import { ToolArtifactStore } from './tool-artifact-store.js';
 /**
  * Files that OpenClaw's contextInjection injects into the system prompt.
  * HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
@@ -34,6 +38,7 @@ export const OPENCLAW_BOOTSTRAP_FILES = new Set([
     'SOUL.md', 'IDENTITY.md', 'USER.md', 'TOOLS.md',
     'AGENTS.md', 'HEARTBEAT.md', 'MEMORY.md', 'BOOTSTRAP.md',
 ]);
+const CACHE_PREFIX_BOUNDARY_SLOT = 'cache-prefix-boundary';
 /**
  * Model context window sizes by provider/model string (or partial match).
  * Used as fallback when tokenBudget is not passed by the runtime.
@@ -68,6 +73,67 @@ const MODEL_CONTEXT_WINDOWS = [
     { pattern: 'deepseek-v3', tokens: 131_072 },
     { pattern: 'deepseek', tokens: 131_072 },
 ];
+const MODEL_MECW = [
+    // Claude 200k: effective recall degrades above ~140k; clamp composite budget
+    { pattern: 'claude', mecwFloor: 80_000, mecwCeiling: 140_000, preferredHistoryFraction: 0.35, preferredMemoryFraction: 0.45 },
+    // Gemini 1M: reliable up to ~180k for grounded retrieval; less for recall
+    { pattern: 'gemini', mecwFloor: 100_000, mecwCeiling: 180_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.45 },
+    // OpenAI 128k: full window is trustable; use standard fractions
+    { pattern: 'gpt', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
+    { pattern: 'o3', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
+    { pattern: 'o4', mecwFloor: 128_000, mecwCeiling: 128_000, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
+    // Smaller windows: full window is trustable
+    { pattern: 'qwen3', mecwFloor: 262_144, mecwCeiling: 262_144, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
+    { pattern: 'qwen', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
+    { pattern: 'glm', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
+    { pattern: 'deepseek', mecwFloor: 131_072, mecwCeiling: 131_072, preferredHistoryFraction: 0.40, preferredMemoryFraction: 0.40 },
+];
+/**
+ * B4: Compute model-aware lane budget fractions.
+ *
+ * Resolves the effective historyFraction and memoryFraction for a compose pass
+ * given the model and its effective budget. Uses the MECW catalog to blend
+ * away from fixed fractions when the budget approaches the MECW ceiling,
+ * so the compositor allocates proportionally for what the model can actually use.
+ *
+ * Returns:
+ *   historyFraction — fraction of effective budget to give history
+ *   memoryFraction  — fraction of effective budget to give memory pool
+ *   mecwProfile     — which MECW entry matched (undefined = no match / full window)
+ *   mecwApplied     — true when MECW adjustment changed the fractions
+ *   mecwBlend       — 0..1 blend factor (0 = below floor, 1 = at/above ceiling)
+ */
+export function resolveModelLaneBudgets(model, effectiveBudget, configHistoryFraction, configMemoryFraction) {
+    if (!model) {
+        return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: undefined, mecwApplied: false, mecwBlend: 0 };
+    }
+    const normalized = model.toLowerCase();
+    for (const entry of MODEL_MECW) {
+        if (!normalized.includes(entry.pattern))
+            continue;
+        // Budget is at or below the floor — full window is safe, use config fractions
+        if (effectiveBudget <= entry.mecwFloor) {
+            return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: entry.pattern, mecwApplied: false, mecwBlend: 0 };
+        }
+        // Budget is at or above the ceiling — use preferred fractions fully
+        if (effectiveBudget >= entry.mecwCeiling) {
+            return { historyFraction: entry.preferredHistoryFraction, memoryFraction: entry.preferredMemoryFraction, mecwProfile: entry.pattern, mecwApplied: true, mecwBlend: 1 };
+        }
+        // Budget is between floor and ceiling — linear blend
+        const blend = (effectiveBudget - entry.mecwFloor) / (entry.mecwCeiling - entry.mecwFloor);
+        const historyFraction = configHistoryFraction + blend * (entry.preferredHistoryFraction - configHistoryFraction);
+        const memoryFraction = configMemoryFraction + blend * (entry.preferredMemoryFraction - configMemoryFraction);
+        return {
+            historyFraction: Math.round(historyFraction * 1000) / 1000,
+            memoryFraction: Math.round(memoryFraction * 1000) / 1000,
+            mecwProfile: entry.pattern,
+            mecwApplied: true,
+            mecwBlend: Math.round(blend * 1000) / 1000,
+        };
+    }
+    // No MECW entry matched — use config fractions unchanged
+    return { historyFraction: configHistoryFraction, memoryFraction: configMemoryFraction, mecwProfile: undefined, mecwApplied: false, mecwBlend: 0 };
+}
 /**
  * Resolve effective token budget from model string.
  * Returns the context window for the model, minus the configured reserve fraction
@@ -166,6 +232,74 @@ function computeDynamicReserve(recentMessages, totalWindow, config) {
     }
     return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
 }
+/**
+ * Classify a session based on the ratio of tool messages in the recent sample.
+ * 'tool-heavy': >= 20% of sampled messages carry tool calls or tool results.
+ * 'plain-chat': below that threshold (text-only or occasional tool use).
+ *
+ * The 20% threshold is intentionally conservative: most tool-heavy agents
+ * have tool messages on every assistant turn, so the ratio quickly exceeds
+ * the threshold without false-positive risk for light tool users.
+ */
+export function classifySessionType(messages) {
+    if (messages.length === 0)
+        return 'plain-chat';
+    const toolCount = messages.filter(m => hasToolContent(m)).length;
+    return toolCount / messages.length >= 0.20 ? 'tool-heavy' : 'plain-chat';
+}
+/**
+ * Estimate the average token cost per message from a recent message sample.
+ * Uses the same estimateMessageTokens heuristic as the compositor budget walk
+ * so the returned depth is directly comparable to the historyFillCap check.
+ *
+ * Returns a conservative floor (100 tokens) when the sample is empty to avoid
+ * returning Infinity when historyBudget is divided by density.
+ */
+export function estimateObservedMsgDensity(messages) {
+    if (messages.length === 0)
+        return 100;
+    const total = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
+    return Math.max(1, Math.ceil(total / messages.length));
+}
+/**
+ * Compute an adaptive history depth that pre-fits the session type.
+ *
+ * For plain-chat sessions: divides historyBudget by observed density to get a
+ * depth that fills the budget without overflow, bounded by the default maximum.
+ * Recall quality is preserved because the density estimate is honest for
+ * text-only turns.
+ *
+ * For tool-heavy sessions: applies a post-gradient compression factor
+ * (TOOL_GRADIENT_DENSITY_FACTOR = 0.30) to the observed pre-gradient density.
+ * This accounts for the gradient transform collapsing large tool payloads to
+ * prose stubs before the budget-fit walk runs. A tighter depth is chosen so
+ * the gradient-compressed messages fit inside historyFillCap without triggering
+ * a rescue trim.
+ *
+ * A 0.85 safety margin is applied to both paths so estimates that are
+ * slightly off don't cause immediate overflow on the first warm compose.
+ *
+ * Min/max bounds ensure the compositor always sees a meaningful window:
+ *   - plain-chat min: 20 messages (enough for short recent context)
+ *   - tool-heavy min: 15 messages (recent tool context + a few prior turns)
+ *   - shared max: config.maxHistoryMessages (never exceed the DB fetch ceiling)
+ */
+export function computeAdaptiveHistoryDepth(sessionType, observedDensity, historyBudgetTokens, maxHistoryMessages) {
+    const SAFETY_MARGIN = 0.85;
+    if (sessionType === 'tool-heavy') {
+        // Tool-heavy: post-gradient density is much lower than pre-gradient.
+        // Gradient tiers collapse T2/T3 payloads to compact stubs (15-30% of original).
+        // Use a blended factor of 0.30 as the expected post-gradient density ratio.
+        const TOOL_GRADIENT_DENSITY_FACTOR = 0.30;
+        const postGradientDensity = Math.max(50, Math.floor(observedDensity * TOOL_GRADIENT_DENSITY_FACTOR));
+        const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / postGradientDensity);
+        return Math.min(maxHistoryMessages, Math.max(15, depth));
+    }
+    // Plain-chat: pre-gradient and post-gradient density are the same.
+    // historyBudget / avgMsgCost gives the message count that fills the budget.
+    const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / observedDensity);
+    return Math.min(maxHistoryMessages, Math.max(20, depth));
+}
 const DEFAULT_CONFIG = {
     // Primary budget controls
     budgetFraction: 0.703,
@@ -277,7 +411,7 @@ function clusterNeutralMessages(messages) {
  */
 export function applyToolGradientToWindow(messages, tokenBudget, totalWindowTokens) {
     const reshaped = applyToolGradient(messages, { totalWindowTokens });
-    const targetTokens = Math.floor(tokenBudget * 0.65);
+    const { softBudget: targetTokens } = resolveTrimBudgets(tokenBudget);
     const clusters = clusterNeutralMessages(reshaped);
     let totalTokens = clusters.reduce((sum, cluster) => sum + cluster.tokenCost, 0);
     let start = 0;
@@ -328,6 +462,30 @@ function estimateMessageTokens(msg) {
     tokens += 4;
     return tokens;
 }
+function isDynamicBoundaryMessage(msg) {
+    return Boolean(msg.metadata?.dynamicBoundary);
+}
+function getStablePrefixMessages(messages) {
+    const prefix = [];
+    for (const msg of messages) {
+        if (msg.role !== 'system')
+            break;
+        if (isDynamicBoundaryMessage(msg))
+            break;
+        prefix.push(msg);
+    }
+    return prefix;
+}
+function computeStablePrefixHash(messages) {
+    if (messages.length === 0)
+        return undefined;
+    const hash = createHash('sha256');
+    for (const msg of messages) {
+        hash.update(msg.textContent ?? '');
+        hash.update('\n␞\n');
+    }
+    return hash.digest('hex');
+}
 function parseToolArgs(argumentsJson) {
     try {
         return JSON.parse(argumentsJson);
@@ -702,12 +860,211 @@ function evictLargeToolResults(messages) {
             const approxKTokens = Math.round(content.length / 4 / 1000);
             return {
                 ...result,
-                content: `[tool result evicted: ~${approxKTokens}k tokens \u2014 use memory_search or re-run if needed]`,
+                content: formatToolChainStub({
+                    name: result.name || 'tool_result',
+                    id: result.callId || 'unknown',
+                    status: 'ejected',
+                    reason: 'eviction_oversize',
+                    summary: `~${approxKTokens}k tokens, use memory_search or re-run if needed`,
+                }),
             };
         });
         return { ...msg, toolResults: evicted };
     });
 }
+// ─── C2: Oversized artifact handling ────────────────────────────────────────
+/**
+ * C2: Resolve the artifact oversize threshold (in tokens) for the current compose pass.
+ *
+ * The threshold scales with the effective model budget from B4 so:
+ *   - Small-window models (16k–32k effective) get a proportionally tighter threshold
+ *     (threshold = budget × ARTIFACT_OVERSIZE_FRACTION, floor 500, ceiling 8000).
+ *   - Large-window models (200k+) get a higher ceiling but it still stays bounded
+ *     so artifacts never fill the lane unconditionally.
+ *
+ * ARTIFACT_BUDGET_FRACTION: fraction of the soft budget above which a single
+ * retrieved artifact/chunk is considered oversized. Default 0.10 (10%).
+ *
+ * Headroom preservation comes from replacing the oversized artifact with a cheap
+ * reference, not from shrinking the threshold itself.
+ */
+const ARTIFACT_BUDGET_FRACTION = 0.10; // 10% of soft budget is the raw threshold
+const ARTIFACT_THRESHOLD_FLOOR = 500; // never below 500 tokens (~2k chars)
+const ARTIFACT_THRESHOLD_CEILING = 8_000; // never above 8k tokens (~32k chars)
+export function resolveArtifactOversizeThreshold(effectiveBudget) {
+    const { softBudget } = resolveTrimBudgets(effectiveBudget);
+    const raw = Math.floor(softBudget * ARTIFACT_BUDGET_FRACTION);
+    return Math.min(ARTIFACT_THRESHOLD_CEILING, Math.max(ARTIFACT_THRESHOLD_FLOOR, raw));
+}
+/**
+ * C2: Degrade an oversized doc chunk to a canonical ArtifactRef string.
+ *
+ * When a retrieved chunk's content exceeds the oversize threshold (in tokens),
+ * replace it with a fetchable canonical reference instead of injecting raw content.
+ * This preserves headroom in the lane instead of filling it with a large payload.
+ *
+ * Returns:
+ *   - `null`  → content is within the threshold; caller should inject as-is.
+ *   - `string` → canonical artifact reference; caller should inject this instead of raw content.
+ *
+ * The sizeTokens reported in the reference is the ACTUAL estimated size so downstream
+ * tooling can make informed decisions about whether to fetch.
+ */
+export function degradeOversizedDocChunk(chunkId, sourcePath, content, thresholdTokens) {
+    const contentTokens = estimateTokens(content);
+    if (contentTokens <= thresholdTokens)
+        return null;
+    const ref = {
+        id: chunkId,
+        path: sourcePath,
+        sizeTokens: contentTokens,
+        status: 'degraded',
+        reason: 'artifact_oversize',
+        fetchHint: 'memory_search or re-read source file',
+    };
+    return formatArtifactRef(ref);
+}
+/**
+ * C2: Resolve oversized artifacts in a history message array.
+ *
+ * Scans the message array and replaces user/assistant messages whose text content
+ * exceeds the model-aware artifact oversize threshold with canonical ArtifactRef
+ * strings. System messages, tool-call messages, and tool-result messages are always
+ * passed through unchanged.
+ *
+ * @param messages — neutral message array (already-assembled history window)
+ * @param effectiveBudget — effective model budget from B4 (drives the threshold)
+ * @returns { messages, refCount, tokensSaved }
+ */
+export function resolveOversizedArtifacts(messages, effectiveBudget) {
+    const thresholdTokens = resolveArtifactOversizeThreshold(effectiveBudget);
+    let refCount = 0;
+    let tokensSaved = 0;
+    const out = messages.map(msg => {
+        // System messages are never degraded (they are in the stable prefix).
+        if (msg.role === 'system')
+            return msg;
+        // Tool content (calls/results) is C1's domain — never touch here.
+        if (msg.toolResults || msg.toolCalls)
+            return msg;
+        const text = msg.textContent ?? '';
+        // Already a ref — idempotent; don't re-degrade.
+        if (isArtifactRef(text))
+            return msg;
+        const contentTokens = estimateTokens(text);
+        if (contentTokens <= thresholdTokens)
+            return msg;
+        // Oversized — replace with canonical artifact reference.
+        const meta = msg;
+        const id = (typeof meta['_artifactId'] === 'string' ? meta['_artifactId'] : null)
+            ?? `msg-${createHash('sha1').update(`${msg.role}:${text}`).digest('hex').slice(0, 12)}`;
+        const path = (typeof meta['_artifactPath'] === 'string' ? meta['_artifactPath'] : null)
+            ?? '/unknown/artifact';
+        const ref = {
+            id,
+            path,
+            sizeTokens: contentTokens,
+            status: 'degraded',
+            reason: 'artifact_oversize',
+            fetchHint: 'memory_search',
+        };
+        const refText = formatArtifactRef(ref);
+        const refTokens = estimateTokens(refText);
+        tokensSaved += contentTokens - refTokens;
+        refCount++;
+        return { ...msg, textContent: refText };
+    });
+    return { messages: out, refCount, tokensSaved };
+}
+/**
+ * C1: Centralized tool-chain dependency ejection.
+ *
+ * Given a set of tool-use message indices that are being ejected from the
+ * context window, this function ensures that no orphaned tool-results survive:
+ *
+ *   - For each ejected assistant message carrying toolCalls, collect the set
+ *     of call IDs being removed.
+ *   - Walk the remaining messages: if a message's toolResults reference any
+ *     of those ejected IDs:
+ *       a) If the message carries ONLY tool-results and no other text, co-eject
+ *          it (remove it entirely). This is the zero-cost path.
+ *       b) If the message also carries text content, replace only the dependent
+ *          toolResults entries with canonical ToolChainStub strings so the
+ *          message is not silently mutilated.
+ *
+ * The caller is responsible for removing the ejected messages by index BEFORE
+ * or AFTER calling this function; this function operates on the full array and
+ * marks the ejected indices for removal, returning the cleaned result.
+ *
+ * @param messages       Full message array (order preserved)
+ * @param ejectIndices   Set of indices into `messages` that are being ejected
+ *                       (these are the tool-use / assistant messages being removed).
+ * @param reason         DegradationReason to embed in any canonical stubs.
+ * @returns              Cleaned message array + telemetry counters.
+ */
+export function resolveToolChainEjections(messages, ejectIndices, reason = 'eviction_oversize') {
+    // Collect all tool-call IDs that are being ejected.
+    const ejectedCallIds = new Set();
+    for (const idx of ejectIndices) {
+        const msg = messages[idx];
+        if (!msg)
+            continue;
+        if (msg.toolCalls) {
+            for (const tc of msg.toolCalls) {
+                if (tc.id)
+                    ejectedCallIds.add(tc.id);
+            }
+        }
+    }
+    let coEjections = 0;
+    let stubReplacements = 0;
+    // If no call IDs were ejected, nothing to do beyond dropping the ejected messages.
+    if (ejectedCallIds.size === 0) {
+        const result = messages.filter((_, idx) => !ejectIndices.has(idx));
+        return { messages: result, coEjections, stubReplacements };
+    }
+    // Walk all messages and handle dependent tool-results.
+    const transformed = messages.map((msg, idx) => {
+        // Already being ejected — remove.
+        if (ejectIndices.has(idx))
+            return null;
+        if (!msg.toolResults || msg.toolResults.length === 0)
+            return msg;
+        // Determine which results in this message depend on ejected calls.
+        const dependentResultIds = msg.toolResults
+            .map(r => r.callId)
+            .filter((id) => Boolean(id) && ejectedCallIds.has(id));
+        if (dependentResultIds.length === 0)
+            return msg;
+        const dependentSet = new Set(dependentResultIds);
+        // Case (a): The message carries ONLY tool-results and no other text content,
+        // and ALL of its results are dependent on ejected calls.
+        // Co-eject the whole message — zero budget cost, no stub needed.
+        const hasText = Boolean(msg.textContent && msg.textContent.trim().length > 0);
+        const hasNonDependentResults = msg.toolResults.some(r => !dependentSet.has(r.callId));
+        if (!hasText && !hasNonDependentResults) {
+            coEjections++;
+            return null;
+        }
+        // Case (b): Message has text or unrelated results — stub only the dependent entries.
+        const stubbedResults = msg.toolResults.map(result => {
+            if (!result.callId || !dependentSet.has(result.callId))
+                return result;
+            const stubContent = formatToolChainStub({
+                name: result.name || 'tool_result',
+                id: result.callId || 'unknown',
+                status: 'ejected',
+                reason,
+                summary: 'parent tool-use ejected from context window',
+            });
+            stubReplacements++;
+            return { ...result, content: stubContent };
+        });
+        return { ...msg, toolResults: stubbedResults };
+    });
+    const result = transformed.filter((m) => m !== null);
+    return { messages: result, coEjections, stubReplacements };
+}
 /**
  * Apply gradient tool treatment to a message array.
  *
@@ -812,6 +1169,131 @@ export class Compositor {
     get orgRegistry() {
         return this._orgRegistry;
     }
+    /**
+     * Sprint 2.1: Hydrate tool-artifact stubs in the active turn.
+     *
+     * The active turn is the contiguous trailing block of tool-bearing messages
+     * at the tail of the assembled window (positional, NOT turn_id-based):
+     *   - Walk backward from the last message
+     *   - Collect tool-bearing messages (toolCalls != null OR toolResults != null)
+     *   - Plus the bounding user message that opened the turn
+     *   - Stop at the first plain message once at least one tool message was found
+     *
+     * For every toolResult stub with an `artifact=<id>` pointer, look up the
+     * full payload in ToolArtifactStore and replace the stub content in-place.
+     * Uses a single batched `WHERE id IN (...)` lookup (no N+1 queries).
+     * Touches `last_used_at` on every hydrated artifact in a single batch.
+     *
+     * Failure mode: if a lookup returns null (artifact missing), leave the stub
+     * unchanged and increment hydrationMisses.
+     *
+     * Returns diagnostics counters.
+     */
+    hydrateActiveTurnArtifacts(messages, db) {
+        if (messages.length === 0) {
+            return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
+        }
+        const store = new ToolArtifactStore(db);
+        // ── 1. Detect active turn (positional, backward walk) ─────────────────────
+        // Collect indices belonging to the active turn.
+        const activeTurnIndices = [];
+        let foundToolBearing = false;
+        for (let i = messages.length - 1; i >= 0; i--) {
+            const msg = messages[i];
+            const isToolBearing = msg.toolCalls != null || msg.toolResults != null;
+            if (isToolBearing) {
+                foundToolBearing = true;
+                activeTurnIndices.push(i);
+            }
+            else if (foundToolBearing) {
+                // First plain message after at least one tool-bearing message — this
+                // is the bounding user message that opened the turn. Include it and stop.
+                activeTurnIndices.push(i);
+                break;
+            }
+            else {
+                // Haven't found any tool-bearing messages yet — still in non-tool tail
+                // (e.g., the last message is a plain user message). No active turn.
+                break;
+            }
+        }
+        if (activeTurnIndices.length === 0 || !foundToolBearing) {
+            return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
+        }
+        // ── 2. Collect all artifactIds from stub toolResults in the active turn ───
+        // Map: artifactId -> array of [msgIndex, resultIndex] for in-place replacement
+        const artifactTargets = new Map();
+        for (const msgIdx of activeTurnIndices) {
+            const msg = messages[msgIdx];
+            if (!msg.toolResults)
+                continue;
+            for (let resultIdx = 0; resultIdx < msg.toolResults.length; resultIdx++) {
+                const result = msg.toolResults[resultIdx];
+                const stub = parseToolChainStub(result.content);
+                if (stub && stub.artifactId) {
+                    const existing = artifactTargets.get(stub.artifactId) ?? [];
+                    existing.push({ msgIdx, resultIdx });
+                    artifactTargets.set(stub.artifactId, existing);
+                }
+            }
+        }
+        if (artifactTargets.size === 0) {
+            return { artifactsHydrated: 0, hydrationBytes: 0, hydrationMisses: 0 };
+        }
+        // ── 3. Batch lookup ────────────────────────────────────────────────────────
+        const ids = Array.from(artifactTargets.keys());
+        const placeholders = ids.map(() => '?').join(', ');
+        const rows = db
+            .prepare(`SELECT * FROM tool_artifacts WHERE id IN (${placeholders})`)
+            .all(...ids);
+        // Build id -> payload map
+        const payloadMap = new Map();
+        for (const row of rows) {
+            payloadMap.set(row.id, row.payload);
+        }
+        // ── 4. Hydrate in-place ────────────────────────────────────────────────────
+        let artifactsHydrated = 0;
+        let hydrationBytes = 0;
+        let hydrationMisses = 0;
+        const touchIds = [];
+        for (const [artifactId, targets] of artifactTargets) {
+            const payload = payloadMap.get(artifactId);
+            if (payload == null) {
+                // Graceful miss — stub stays as-is
+                hydrationMisses += targets.length;
+                continue;
+            }
+            for (const { msgIdx, resultIdx } of targets) {
+                const msg = messages[msgIdx];
+                // Safety: if content doesn't look like a stub anymore (defensive idempotency check)
+                const existingContent = msg.toolResults[resultIdx].content;
+                if (!parseToolChainStub(existingContent)) {
+                    // Already full content — pass through unchanged
+                    continue;
+                }
+                // Replace stub with full payload
+                msg.toolResults[resultIdx] = {
+                    ...msg.toolResults[resultIdx],
+                    content: payload,
+                };
+                artifactsHydrated++;
+                hydrationBytes += Buffer.byteLength(payload, 'utf8');
+            }
+            touchIds.push(artifactId);
+        }
+        // ── 5. Batch touch last_used_at ───────────────────────────────────────────
+        if (touchIds.length > 0) {
+            const ts = new Date().toISOString();
+            const touchPlaceholders = touchIds.map(() => '?').join(', ');
+            try {
+                db.prepare(`UPDATE tool_artifacts SET last_used_at = ? WHERE id IN (${touchPlaceholders})`).run(ts, ...touchIds);
+            }
+            catch {
+                // Touch is best-effort — hydration still succeeded
+            }
+        }
+        return { artifactsHydrated, hydrationBytes, hydrationMisses };
+    }
     /**
      * Compose a complete message array for sending to an LLM.
      *
@@ -842,6 +1324,11 @@ export class Compositor {
         // Particularly effective for low-frequency sessions (heartbeat agents, council
         // seats between rounds). TTL on the cache write remains 120s — this is a
         // conservative early-exit before the TTL expires, not a TTL extension.
+        //
+        // B2: prevPrefixHash is set when a cached bundle is found but bypassed due to
+        // prefix-input mutation. It is surfaced in the full-compose diagnostics so
+        // callers can confirm the bypass fired correctly.
+        let _prevPrefixHashFromBypass;
         if (request.includeHistory !== false && request.skipWindowCache !== true) {
             try {
                 const newestRow = db.prepare('SELECT MAX(id) AS maxId FROM messages WHERE agent_id = ?').get(request.agentId);
@@ -866,7 +1353,28 @@ export class Compositor {
                         // historyDepth constrains how many messages the caller wants;
                         // we can't slice a cached bundle safely, so skip cache.
                         const depthOk = !request.historyDepth;
-                        if (budgetOk && factsOk && libraryOk && contextOk && depthOk) {
+                        // B2: Stable-prefix hash check.
+                        // If the system/identity slots changed since this cache entry was
+                        // written, the stable prefix is stale even if cursor freshness
+                        // passes. Compute a cheap input hash from slot contents and compare
+                        // against the one stored in the cache meta. If no stored hash exists
+                        // (pre-B2 cache entries), fall through to prefix check on the
+                        // cached message content itself.
+                        let prefixInputOk = true;
+                        const _cachedPrefixInputHash = cachedBundle.meta.prefixInputHash;
+                        if (_cachedPrefixInputHash) {
+                            const _sysSlot = await this.cache.getSlot(request.agentId, request.sessionKey, 'system');
+                            const _idSlot = await this.cache.getSlot(request.agentId, request.sessionKey, 'identity');
+                            const _incomingInputHash = createHash('sha256')
+                                .update(_sysSlot ?? '')
+                                .update('\n␞\n')
+                                .update(_idSlot ?? '')
+                                .digest('hex');
+                            if (_incomingInputHash !== _cachedPrefixInputHash) {
+                                prefixInputOk = false;
+                            }
+                        }
+                        if (budgetOk && factsOk && libraryOk && contextOk && depthOk && prefixInputOk) {
                             const cachedSlots = {
                                 system: cachedBundle.meta.slots['system'] ?? 0,
                                 identity: cachedBundle.meta.slots['identity'] ?? 0,
@@ -875,6 +1383,8 @@ export class Compositor {
                                 context: cachedBundle.meta.slots['context'] ?? 0,
                                 library: cachedBundle.meta.slots['library'] ?? 0,
                             };
+                            // Sprint 2.1: hydrate active-turn artifact stubs before converting.
+                            const cachedHydration = this.hydrateActiveTurnArtifacts(cachedBundle.messages, db);
                             return {
                                 messages: toComposeOutputMessages(cachedBundle.messages),
                                 tokenCount: cachedBundle.meta.totalTokens,
@@ -885,10 +1395,17 @@ export class Compositor {
                                 diagnostics: {
                                     ...cachedBundle.meta.diagnostics,
                                     windowCacheHit: true,
+                                    // Carry forward the stored prefixHash so callers can observe it.
+                                    prefixHash: cachedBundle.meta.prefixHash ?? cachedBundle.meta.diagnostics.prefixHash,
+                                    artifactsHydrated: cachedHydration.artifactsHydrated > 0 ? cachedHydration.artifactsHydrated : undefined,
+                                    hydrationBytes: cachedHydration.hydrationBytes > 0 ? cachedHydration.hydrationBytes : undefined,
+                                    hydrationMisses: cachedHydration.hydrationMisses > 0 ? cachedHydration.hydrationMisses : undefined,
                                 },
                             };
                         }
-                        // Incompatible request — fall through to full compose
+                        // Incompatible request — fall through to full compose.
+                        // Surface prevPrefixHash so the full compose diagnostics can report it.
+                        _prevPrefixHashFromBypass = cachedBundle.meta.prefixHash ?? cachedBundle.meta.diagnostics.prefixHash;
                     }
                 }
             }
@@ -907,6 +1424,36 @@ export class Compositor {
             : [];
         const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
         const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve, this.config.budgetFraction);
+        // B4: Model-aware lane budgets.
+        // Resolve historyFraction and memoryFraction by blending config values toward
+        // model-preferred fractions when the effective budget approaches the MECW ceiling.
+        // This ensures the compositor doesn't allocate more history than the model can
+        // reliably reason over, and adjusts the memory pool proportionally.
+        const _b4ConfigHistoryFraction = this.config.historyFraction ?? 0.40;
+        const _b4ConfigMemoryFraction = this.config.memoryFraction ?? 0.40;
+        const { historyFraction: b4HistoryFraction, memoryFraction: b4MemoryFraction, mecwProfile: b4MecwProfile, mecwApplied: b4MecwApplied, mecwBlend: b4MecwBlend, } = resolveModelLaneBudgets(request.model, budget, _b4ConfigHistoryFraction, _b4ConfigMemoryFraction);
+        // C2: Compute the artifact oversize threshold once per compose pass from the
+        // effective model budget (from B4). Chunk injection paths consult this threshold
+        // to degrade retrieved payloads that would fill the lane instead of injecting them.
+        const c2ArtifactThresholdTokens = resolveArtifactOversizeThreshold(budget);
+        let c2ArtifactDegradations = 0;
+        // Sprint 4: Pre-compose history depth tightening.
+        // Classify the session and compute an adaptive depth from observed message
+        // density. This replaces the old fixed maxHistoryMessages ceiling that over-
+        // fed the compositor for tool-heavy sessions.
+        //
+        // If the caller already passed historyDepth (plugin assemble path), honour it
+        // as an explicit cap — the adaptive depth still applies as a lower bound so
+        // we never request more than the budget can absorb.
+        const s4SessionType = classifySessionType(sampleMessages);
+        const s4ObservedDensity = estimateObservedMsgDensity(sampleMessages);
+        const s4HistoryBudget = Math.floor(budget * b4HistoryFraction);
+        const s4AdaptiveDepth = computeAdaptiveHistoryDepth(s4SessionType, s4ObservedDensity, s4HistoryBudget, this.config.maxHistoryMessages);
+        // Effective depth: caller-provided historyDepth overrides adaptive when it is
+        // the tighter constraint; otherwise use the adaptive depth.
+        const s4EffectiveDepth = request.historyDepth
+            ? Math.min(request.historyDepth, s4AdaptiveDepth)
+            : s4AdaptiveDepth;
         let remaining = budget;
         // Phase 0 fence enforcement: resolve the compaction fence for this conversation.
         // All downstream message queries use this as a lower bound to exclude zombie
@@ -1005,6 +1552,11 @@ export class Compositor {
         }
         // ─── Conversation History ──────────────────────────────────
         let diagCrossTopicKeystones = 0;
+        // Sprint 4: hoisted so diagnostics block can read it regardless of includeHistory branch.
+        let s4RescueTrimFired = false;
+        // C1: total tool-chain degradation counters across history budget-fit and safety-valve passes.
+        let c1CoEjections = 0;
+        let c1StubReplacements = 0;
         // Hoisted: activeTopicId/name resolved inside history block, used for window dual-write (VS-1) and wiki page injection
         let composedActiveTopicId;
         let composedActiveTopicName;
@@ -1049,7 +1601,8 @@ export class Compositor {
             // Hoist resolved topic id+name so the window dual-write and wiki injection sections can access them
             composedActiveTopicId = activeTopicId;
             composedActiveTopicName = activeTopic?.name;
-            const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, request.historyDepth || this.config.maxHistoryMessages, store, activeTopicId, fenceMessageId, activeContext);
+            const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, s4EffectiveDepth, // Sprint 4: adaptive depth (replaces fixed maxHistoryMessages)
+            store, activeTopicId, fenceMessageId, activeContext);
             // Deduplicate history by StoredMessage.id (second line of defense after
             // pushHistory() tail-check dedup). Guards against any duplicates that
             // slipped through the warm path — e.g. bootstrap re-runs on existing sessions.
@@ -1073,26 +1626,38 @@ export class Compositor {
             // Replace oversized stale results with stubs so they don't burn budget.
             // Current-turn results (turn age 0) are never evicted.
             const evictedHistory = evictLargeToolResults(transformedHistory);
+            const c2ResolvedHistory = resolveOversizedArtifacts(evictedHistory, budget);
+            c2ArtifactDegradations += c2ResolvedHistory.refCount;
             // ── Budget-fit: walk newest→oldest, drop whole clusters ─────────────
             // Group tool_use + tool_result messages into clusters so they are kept
             // or dropped as a unit. Breaking mid-cluster creates orphaned tool
             // pairs that repairToolPairs has to strip downstream — wasting budget
             // and leaving gaps in conversation continuity.
-            const budgetClusters = clusterNeutralMessages(evictedHistory);
+            const budgetClusters = clusterNeutralMessages(c2ResolvedHistory.messages);
             let historyTokens = 0;
             const includedClusters = [];
             // Pre-allocate history budget. historyFraction is a fraction of the
             // effective token budget (post-reserve). Falls back to unbounded fill
             // (remaining) when historyFraction is not set.
-            const historyBudget = this.config.historyFraction != null
-                ? Math.floor(budget * this.config.historyFraction)
-                : remaining;
+            // B4: uses b4HistoryFraction (model-aware, blended from MECW catalog) instead
+            // of raw config.historyFraction so history doesn't overflow MECW ceiling.
+            const historyBudget = Math.floor(budget * b4HistoryFraction);
             const historyFillCap = Math.min(historyBudget, remaining);
             for (let i = budgetClusters.length - 1; i >= 0; i--) {
                 const cluster = budgetClusters[i];
                 if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
-                    const droppedMsgCount = budgetClusters.slice(0, i + 1).reduce((s, c) => s + c.messages.length, 0);
-                    warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)`);
+                    const droppedClusters = budgetClusters.slice(0, i + 1);
+                    const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
+                    const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
+                    if (droppedToolResultCount > 0) {
+                        c1CoEjections += droppedToolResultCount;
+                        console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
+                    }
+                    const c1Note = droppedToolResultCount > 0
+                        ? ` [C1: ${droppedToolResultCount} co-ejected reason=budget_cluster_drop]`
+                        : '';
+                    warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)${c1Note}`);
+                    s4RescueTrimFired = true;
                     break;
                 }
                 includedClusters.unshift(cluster);
@@ -1204,18 +1769,12 @@ export class Compositor {
             }
             // Memory budget pool: facts, wiki, semantic recall, cross-session, and
             // trigger-fired doc chunks all draw from this shared pool via `remaining`.
-            // memoryFraction is a fraction of the effective token budget (post-reserve).
-            // Falls back to targetBudgetFraction cap behavior when memoryFraction is not set.
+            // B4: uses b4MemoryFraction (model-aware, blended from MECW catalog) instead
+            // of raw config.memoryFraction so the memory pool scales with what the model
+            // can effectively attend to within its MECW ceiling.
             let memoryBudget;
-            if (this.config.memoryFraction != null) {
-                memoryBudget = Math.floor(budget * this.config.memoryFraction);
-                if (remaining > memoryBudget) {
-                    remaining = memoryBudget;
-                }
-            }
-            else {
-                const targetFraction = this.config.targetBudgetFraction ?? 0.65;
-                memoryBudget = Math.floor(budget * targetFraction);
+            {
+                memoryBudget = Math.floor(budget * b4MemoryFraction);
                 if (remaining > memoryBudget) {
                     remaining = memoryBudget;
                 }
@@ -1246,11 +1805,12 @@ export class Compositor {
                 }
             }
         }
-        // ─── Injected Context Block ────────────────────────────────
-        // Facts, knowledge, preferences, semantic recall, and cross-session
-        // context are assembled into a single system message injected before
-        // conversation history (after system/identity).
-        const contextParts = [];
+        // ─── Cache-ordered context assembly ─────────────────────────
+        // Stable, reusable material is lifted above the cache boundary as its
+        // own system messages. Session-volatile material stays in the dynamic
+        // context block below that boundary.
+        const stablePrefixMessages = [];
+        const volatileContextParts = [];
         let contextTokens = 0;
         // ── C1: Content fingerprint dedup set ────────────────────
         // Replaces fragile substring-match dedup across temporal, open-domain,
@@ -1301,7 +1861,7 @@ export class Compositor {
             if (wikiContent) {
                 const tokens = estimateTokens(wikiContent);
                 if (tokens <= remaining) {
-                    contextParts.push(wikiContent);
+                    volatileContextParts.push(wikiContent);
                     contextTokens += tokens;
                     remaining -= tokens;
                     slots.library += tokens;
@@ -1309,7 +1869,7 @@ export class Compositor {
                 else if (remaining > 200) {
                     const truncated = this.truncateToTokens(wikiContent, remaining);
                     const truncTokens = estimateTokens(truncated);
-                    contextParts.push(truncated);
+                    volatileContextParts.push(truncated);
                     contextTokens += truncTokens;
                     remaining -= truncTokens;
                     slots.library += truncTokens;
@@ -1320,34 +1880,65 @@ export class Compositor {
         // scope: agent — filtered by agentId via filterByScope after fetch
         // Draws from the shared memory budget pool (remaining is pre-capped by memoryBudget).
         if (request.includeFacts !== false && remaining > 500) {
-            const factsContent = this.buildFactsFromDb(request.agentId, request.sessionKey, libDb || db);
-            if (factsContent !== null) {
-                const [content, factCount, scopeFiltered] = factsContent;
-                diagFactsIncluded += factCount;
-                diagScopeFiltered += scopeFiltered;
-                if (content) {
-                    const tokens = estimateTokens(content);
+            const factSections = this.buildFactSectionsFromDb(request.agentId, request.sessionKey, libDb || db);
+            if (factSections !== null) {
+                const { stableContent, stableCount, volatileContent, volatileCount, filteredCount } = factSections;
+                diagFactsIncluded += stableCount + volatileCount;
+                diagScopeFiltered += filteredCount;
+                if (stableContent) {
+                    const stableFactsBlock = `## Stable Facts\n${stableContent}`;
+                    const tokens = estimateTokens(stableFactsBlock);
                     if (tokens <= remaining) {
-                        contextParts.push(`## Active Facts\n${content}`);
+                        stablePrefixMessages.push({
+                            role: 'system',
+                            textContent: stableFactsBlock,
+                            toolCalls: null,
+                            toolResults: null,
+                        });
                         contextTokens += tokens;
                         remaining -= tokens;
-                        slots.facts = tokens;
+                        slots.facts += tokens;
                     }
                     else if (remaining > 200) {
-                        const truncated = this.truncateToTokens(content, remaining);
+                        const truncated = this.truncateToTokens(stableFactsBlock, remaining);
                         const truncTokens = estimateTokens(truncated);
-                        contextParts.push(`## Active Facts (truncated)\n${truncated}`);
+                        stablePrefixMessages.push({
+                            role: 'system',
+                            textContent: truncated,
+                            toolCalls: null,
+                            toolResults: null,
+                        });
                         contextTokens += truncTokens;
                         remaining -= truncTokens;
-                        slots.facts = truncTokens;
-                        warnings.push('Facts truncated to fit memory budget');
+                        slots.facts += truncTokens;
+                        warnings.push('Stable facts truncated to fit memory budget');
                     }
-                    // C1: Fingerprint each fact line so downstream dedup paths can skip duplicates
-                    const factLines = content.split('\n');
-                    for (const line of factLines) {
-                        if (line.startsWith('- [')) {
+                    for (const line of stableContent.split('\n')) {
+                        if (line.startsWith('- ['))
+                            addFingerprint(line);
+                    }
+                }
+                if (volatileContent) {
+                    const volatileFactsBlock = `## Active Facts\n${volatileContent}`;
+                    const tokens = estimateTokens(volatileFactsBlock);
+                    if (tokens <= remaining) {
+                        volatileContextParts.push(volatileFactsBlock);
+                        contextTokens += tokens;
+                        remaining -= tokens;
+                        slots.facts += tokens;
+                    }
+                    else if (remaining > 200) {
+                        const truncated = this.truncateToTokens(volatileFactsBlock, remaining);
+                        const truncTokens = estimateTokens(truncated);
+                        volatileContextParts.push(truncated);
+                        contextTokens += truncTokens;
+                        remaining -= truncTokens;
+                        slots.facts += truncTokens;
+                        warnings.push('Active facts truncated to fit memory budget');
+                    }
+                    for (const line of volatileContent.split('\n')) {
+                        if (line.startsWith('- ['))
                             addFingerprint(line);
-                        }
                     }
                 }
             }
@@ -1365,7 +1956,6 @@ export class Compositor {
                         order: 'DESC',
                     });
                     if (temporalFacts.length > 0) {
-                        // C1: Use fingerprint dedup instead of fragile substring match
                         const beforeCount = temporalFacts.length;
                         const novel = temporalFacts.filter(f => !isDuplicate(f.content));
                         diagFingerprintDedups += beforeCount - novel.length;
@@ -1380,9 +1970,9 @@ export class Compositor {
                                 .join('\n');
                             const temporalSection = `## Temporal Context\n${temporalBlock}`;
                             const tempTokens = estimateTokens(temporalSection);
-                            const tempBudget = Math.floor(remaining * 0.20); // Cap at 20% of remaining
+                            const tempBudget = Math.floor(remaining * 0.20);
                             if (tempTokens <= tempBudget) {
-                                contextParts.push(temporalSection);
+                                volatileContextParts.push(temporalSection);
                                 contextTokens += tempTokens;
                                 remaining -= tempTokens;
                                 slots.facts = (slots.facts ?? 0) + tempTokens;
@@ -1390,7 +1980,7 @@ export class Compositor {
                             else {
                                 const truncated = this.truncateToTokens(temporalSection, tempBudget);
                                 const truncTokens = estimateTokens(truncated);
-                                contextParts.push(truncated);
+                                volatileContextParts.push(truncated);
                                 contextTokens += truncTokens;
                                 remaining -= truncTokens;
                                 slots.facts = (slots.facts ?? 0) + truncTokens;
@@ -1409,8 +1999,6 @@ export class Compositor {
             // questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
             if (request.includeSemanticRecall !== false && queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
                 try {
-                    // searchOpenDomain still does intra-result dedup. Existing-context dedup
-                    // now happens here via fingerprints so we keep one dedup path.
                     const rawOdResults = searchOpenDomain(db, queryText, '', 10);
                     const beforeOd = rawOdResults.length;
                     const odResults = rawOdResults.filter(r => !isDuplicate(r.content));
@@ -1431,9 +2019,9 @@ export class Compositor {
                             .join('\n');
                         const odSection = `## Open Domain Context\n${odBlock}`;
                         const odTokens = estimateTokens(odSection);
-                        const odBudget = Math.floor(remaining * 0.20); // Cap at 20% of remaining
+                        const odBudget = Math.floor(remaining * 0.20);
                         if (odTokens <= odBudget) {
-                            contextParts.push(odSection);
+                            volatileContextParts.push(odSection);
                             contextTokens += odTokens;
                             remaining -= odTokens;
                             slots.facts = (slots.facts ?? 0) + odTokens;
@@ -1441,7 +2029,7 @@ export class Compositor {
                         else {
                             const truncated = this.truncateToTokens(odSection, odBudget);
                             const truncTokens = estimateTokens(truncated);
-                            contextParts.push(truncated);
+                            volatileContextParts.push(truncated);
                             contextTokens += truncTokens;
                             remaining -= truncTokens;
                             slots.facts = (slots.facts ?? 0) + truncTokens;
@@ -1458,17 +2046,28 @@ export class Compositor {
         if (request.includeLibrary !== false && remaining > 500 && libDb) {
             const knowledgeContent = this.buildKnowledgeFromDb(request.agentId, libDb);
             if (knowledgeContent) {
-                const tokens = estimateTokens(knowledgeContent);
-                if (tokens <= remaining * 0.2) { // Cap knowledge at 20% of remaining
-                    contextParts.push(`## Knowledge\n${knowledgeContent}`);
+                const stableKnowledgeBlock = `## Knowledge\n${knowledgeContent}`;
+                const tokens = estimateTokens(stableKnowledgeBlock);
+                if (tokens <= remaining * 0.2) {
+                    stablePrefixMessages.push({
+                        role: 'system',
+                        textContent: stableKnowledgeBlock,
+                        toolCalls: null,
+                        toolResults: null,
+                    });
                     contextTokens += tokens;
                     remaining -= tokens;
                     slots.library += tokens;
                 }
                 else {
-                    const truncated = this.truncateToTokens(knowledgeContent, Math.floor(remaining * 0.2));
+                    const truncated = this.truncateToTokens(stableKnowledgeBlock, Math.floor(remaining * 0.2));
                     const truncTokens = estimateTokens(truncated);
-                    contextParts.push(`## Knowledge (truncated)\n${truncated}`);
+                    stablePrefixMessages.push({
+                        role: 'system',
+                        textContent: truncated,
+                        toolCalls: null,
+                        toolResults: null,
+                    });
                     contextTokens += truncTokens;
                     remaining -= truncTokens;
                     slots.library += truncTokens;
@@ -1481,9 +2080,15 @@ export class Compositor {
         if (request.includeLibrary !== false && remaining > 300 && libDb) {
             const prefsContent = this.buildPreferencesFromDb(request.agentId, libDb);
             if (prefsContent) {
-                const tokens = estimateTokens(prefsContent);
-                if (tokens <= remaining * 0.1) { // Cap preferences at 10% of remaining
-                    contextParts.push(`## User Preferences\n${prefsContent}`);
+                const stablePrefsBlock = `## User Preferences\n${prefsContent}`;
+                const tokens = estimateTokens(stablePrefsBlock);
+                if (tokens <= remaining * 0.1) {
+                    stablePrefixMessages.push({
+                        role: 'system',
+                        textContent: stablePrefsBlock,
+                        toolCalls: null,
+                        toolResults: null,
+                    });
                     contextTokens += tokens;
                     remaining -= tokens;
                     slots.library += tokens;
@@ -1518,7 +2123,7 @@ export class Compositor {
                     );
                     if (semanticContent) {
                         const tokens = estimateTokens(semanticContent);
-                        contextParts.push(`## Related Memory\n${semanticContent}`);
+                        volatileContextParts.push(`## Related Memory\n${semanticContent}`);
                         contextTokens += tokens;
                         remaining -= tokens;
                         // Semantic recall draws from multiple sources, attribute to context
@@ -1614,14 +2219,24 @@ export class Compositor {
                         const chunkLines = [];
                         let chunkTokens = 0;
                         for (const chunk of chunks) {
-                            if (chunkTokens + chunk.tokenEstimate > maxTokens)
-                                break;
                             // Skip chunks from files OpenClaw already injects into the system prompt
                             const chunkBasename = chunk.sourcePath.split('/').pop() || '';
                             if (OPENCLAW_BOOTSTRAP_FILES.has(chunkBasename))
                                 continue;
-                            chunkLines.push(`### ${chunk.sectionPath}\n${chunk.content}`);
-                            chunkTokens += chunk.tokenEstimate;
+                            // C2: degrade oversized chunks to canonical artifact references before
+                            // enforcing the per-collection budget gate. Otherwise an oversized raw
+                            // chunk gets dropped before the tiny degraded ref ever has a chance to fit.
+                            const c2ChunkRef = degradeOversizedDocChunk(chunk.id, chunk.sourcePath, chunk.content, c2ArtifactThresholdTokens);
+                            const renderedChunk = c2ChunkRef !== null
+                                ? `### ${chunk.sectionPath}\n${c2ChunkRef}`
+                                : `### ${chunk.sectionPath}\n${chunk.content}`;
+                            const renderedTokens = estimateTokens(renderedChunk);
+                            if (chunkTokens + renderedTokens > maxTokens)
+                                break;
+                            chunkLines.push(renderedChunk);
+                            chunkTokens += renderedTokens;
+                            if (c2ChunkRef !== null)
+                                c2ArtifactDegradations++;
                         }
                         if (chunkLines.length > 0) {
                             const collectionLabel = trigger.collection.split('/').pop() || trigger.collection;
@@ -1638,7 +2253,7 @@ export class Compositor {
                     }
                 }
                 if (docParts.length > 0) {
-                    contextParts.push(docParts.join('\n\n'));
+                    volatileContextParts.push(docParts.join('\n\n'));
                 }
             }
             else if (remaining > 400 && (this.vectorStore || libDb)) {
@@ -1653,7 +2268,7 @@ export class Compositor {
                         new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
                     ]);
                     if (fallbackContent) {
-                        contextParts.push(`## Related Memory\n${fallbackContent}`);
+                        volatileContextParts.push(`## Related Memory\n${fallbackContent}`);
                         const fallbackTokens = estimateTokens(fallbackContent);
                         contextTokens += fallbackTokens;
                         remaining -= fallbackTokens;
@@ -1681,13 +2296,20 @@ export class Compositor {
                     let spawnTokens = 0;
                     const maxSpawnTokens = Math.floor(remaining * 0.15);
                     for (const chunk of spawnChunks) {
-                        if (spawnTokens + chunk.tokenEstimate > maxSpawnTokens)
+                        // C2: degrade oversized spawn chunks before enforcing the lane budget,
+                        // so a bounded reference can fit even when the raw chunk cannot.
+                        const c2SpawnRef = degradeOversizedDocChunk(chunk.id, chunk.sourcePath, chunk.content, c2ArtifactThresholdTokens);
+                        const renderedChunk = c2SpawnRef ?? chunk.content;
+                        const renderedTokens = estimateTokens(renderedChunk);
+                        if (spawnTokens + renderedTokens > maxSpawnTokens)
                             break;
-                        spawnLines.push(chunk.content);
-                        spawnTokens += chunk.tokenEstimate;
+                        spawnLines.push(renderedChunk);
+                        spawnTokens += renderedTokens;
+                        if (c2SpawnRef !== null)
+                            c2ArtifactDegradations++;
                     }
                     if (spawnLines.length > 0) {
-                        contextParts.push(`## Spawn Context Documents\n${spawnLines.join('\n\n')}`);
+                        volatileContextParts.push(`## Spawn Context Documents\n${spawnLines.join('\n\n')}`);
                         contextTokens += spawnTokens;
                         remaining -= spawnTokens;
                         slots.library += spawnTokens;
@@ -1706,7 +2328,7 @@ export class Compositor {
                 const tokens = estimateTokens(crossSessionContent);
                 const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
                 if (tokens <= maxContextTokens) {
-                    contextParts.push(`## Other Active Sessions\n${crossSessionContent}`);
+                    volatileContextParts.push(`## Other Active Sessions\n${crossSessionContent}`);
                     contextTokens += tokens;
                     remaining -= tokens;
                     slots.context += tokens;
@@ -1714,7 +2336,7 @@ export class Compositor {
                 else {
                     const truncated = this.truncateToTokens(crossSessionContent, maxContextTokens);
                     const truncTokens = estimateTokens(truncated);
-                    contextParts.push(`## Other Active Sessions (truncated)\n${truncated}`);
+                    volatileContextParts.push(`## Other Active Sessions (truncated)\n${truncated}`);
                     contextTokens += truncTokens;
                     remaining -= truncTokens;
                     slots.context += truncTokens;
@@ -1730,68 +2352,92 @@ export class Compositor {
             if (actionSummary) {
                 const actionTokens = Math.ceil(actionSummary.length / 4);
                 if (actionTokens <= remaining) {
-                    contextParts.push(actionSummary);
+                    volatileContextParts.push(actionSummary);
                     contextTokens += actionTokens;
                     remaining -= actionTokens;
                     slots.context += actionTokens;
                 }
             }
         }
+        const firstNonSystem = messages.findIndex(m => m.role !== 'system');
+        const stableInsertIdx = firstNonSystem === -1 ? messages.length : firstNonSystem;
+        if (stablePrefixMessages.length > 0) {
+            messages.splice(stableInsertIdx, 0, ...stablePrefixMessages);
+        }
         // ── Inject assembled context block ──────────────────────
-        const assembledContextBlock = contextParts.length > 0 ? contextParts.join('\n\n') : undefined;
+        const assembledContextBlock = volatileContextParts.length > 0 ? volatileContextParts.join('\n\n') : undefined;
         if (assembledContextBlock) {
             const contextMsg = {
                 role: 'system',
                 textContent: assembledContextBlock,
                 toolCalls: null,
                 toolResults: null,
-                // DYNAMIC_BOUNDARY: this slot is session-specific (facts, recall, episodes).
-                // It must NOT be included in any prompt caching boundary that spans static content.
-                // The provider translator will insert a cache_control ephemeral marker BEFORE
-                // this message so providers can cache everything up to identity/system as static context.
-                metadata: { dynamicBoundary: true },
+                // CACHE_PREFIX_BOUNDARY_SLOT: this message starts the volatile side of the
+                // prompt. Everything above it is stable-prefix material eligible for reuse;
+                // everything at or below it is per-session / per-turn context.
+                metadata: { dynamicBoundary: true, cacheBoundarySlot: CACHE_PREFIX_BOUNDARY_SLOT },
             };
-            // Insert after system/identity, before history
-            // Insert context after all system/identity messages, before conversation history.
-            // findIndex returns -1 when all messages are system-role — handle explicitly.
-            const firstNonSystem = messages.findIndex(m => m.role !== 'system');
-            const insertIdx = firstNonSystem === -1 ? messages.length : firstNonSystem;
-            messages.splice(insertIdx, 0, contextMsg);
-        }
-        // ─── Safety Valve: Post-Assembly Budget Check ───────────────────
+            messages.splice(stableInsertIdx + stablePrefixMessages.length, 0, contextMsg);
+        }
+        const stablePrefix = getStablePrefixMessages(messages);
+        const prefixSegmentCount = stablePrefix.length;
+        const prefixTokens = stablePrefix.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
+        const volatileHistoryTokens = messages.slice(prefixSegmentCount)
+            .reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
+        const prefixHash = computeStablePrefixHash(stablePrefix);
+        // ─── Safety Valve: Post-Assembly Budget Check (C1-aware) ──────────────
         // Re-estimate total tokens after all slots are assembled. If the
         // composition exceeds tokenBudget * 1.05 (5% tolerance for estimation
         // drift), trim history messages from the oldest until we're under budget.
         // History is the most compressible slot — system/identity are never
         // truncated, and context (facts/recall/episodes) is more valuable per-token.
+        //
+        // C1: When an assistant message with toolCalls is ejected, its dependent
+        // tool-result messages are co-ejected or stubbed via resolveToolChainEjections.
+        // This ensures no orphaned tool-results survive above the stable-prefix
+        // boundary and eliminates the downstream repairToolPairs cleanup cost.
         const estimatedTotal = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
         const hardCeiling = Math.floor(budget * 1.05);
         if (estimatedTotal > hardCeiling) {
             const overage = estimatedTotal - budget;
             let trimmed = 0;
             let trimCount = 0;
-            // Find history messages (non-system, after system/identity block)
-            // Walk forward from the first non-system message, trimming oldest history first
+            // Collect indices of messages to eject before mutating the array.
+            // Walk forward from the first non-system message, trimming oldest first.
             const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
+            const ejectIndices = new Set();
             if (firstNonSystemIdx >= 0) {
                 let i = firstNonSystemIdx;
                 while (i < messages.length && trimmed < overage) {
-                    // Don't trim the last user message (current prompt)
+                    // Don't trim the last user message (current prompt).
                     if (i === messages.length - 1 && messages[i].role === 'user')
                         break;
                     const msgTokens = estimateMessageTokens(messages[i]);
-                    messages.splice(i, 1);
+                    ejectIndices.add(i);
                     trimmed += msgTokens;
                     trimCount++;
-                    // Don't increment i — splice shifts everything down
+                    i++;
                 }
             }
-            if (trimCount > 0) {
+            if (ejectIndices.size > 0) {
+                // C1: centralized ejection — resolves dependent tool-results atomically.
+                const ejectionResult = resolveToolChainEjections(messages, ejectIndices, 'eviction_oversize');
+                // Replace in-place so the rest of the compose path sees the clean array.
+                messages.length = 0;
+                messages.push(...ejectionResult.messages);
+                c1CoEjections += ejectionResult.coEjections;
+                c1StubReplacements += ejectionResult.stubReplacements;
                 slots.history = Math.max(0, slots.history - trimmed);
                 remaining += trimmed;
-                warnings.push(`Safety valve: trimmed ${trimCount} oldest history messages (${trimmed} tokens) to fit budget`);
+                const c1Note = (ejectionResult.coEjections + ejectionResult.stubReplacements > 0)
+                    ? ` [C1: ${ejectionResult.coEjections} co-ejected, ${ejectionResult.stubReplacements} stubbed]`
+                    : '';
+                warnings.push(`Safety valve: trimmed ${trimCount} oldest history messages (${trimmed} tokens) to fit budget${c1Note}`);
             }
         }
+        // ─── Sprint 2.1: Hydrate active-turn artifact stubs ────────────────────
+        // Must run on NeutralMessages[] BEFORE provider translation.
+        const hydrationResult = this.hydrateActiveTurnArtifacts(messages, db);
         // ─── Translate to provider format (unless caller wants neutral) ───
         // When skipProviderTranslation is set, return NeutralMessages directly.
         // The context engine plugin uses this: the OpenClaw runtime handles its
@@ -1861,7 +2507,7 @@ export class Compositor {
         }
         // W3: Build compose diagnostics
         let zeroResultReason;
-        if (contextParts.length === 0) {
+        if (volatileContextParts.length === 0 && stablePrefixMessages.length === 0) {
             if (diagScopeFiltered > 0 && diagFactsIncluded === 0 && diagSemanticResults === 0) {
                 zeroResultReason = 'scope_filtered_all';
             }
@@ -1897,6 +2543,37 @@ export class Compositor {
             fingerprintDedups: diagFingerprintDedups,
             fingerprintCollisions: diagFingerprintCollisions,
             windowCacheHit: false,
+            prefixSegmentCount,
+            prefixTokens,
+            prefixHash,
+            // B2: Surface the previous cached prefixHash when this full compose was
+            // triggered by a cache bypass (stable-prefix mutation detected).
+            prevPrefixHash: _prevPrefixHashFromBypass,
+            volatileHistoryTokens,
+            // Sprint 4 fields
+            sessionType: s4SessionType,
+            historyDepthChosen: s4EffectiveDepth,
+            estimatedMsgDensityTokens: s4ObservedDensity,
+            rescueTrimFired: s4RescueTrimFired,
+            // B4: model-aware lane budget diagnostics
+            mecwProfile: b4MecwProfile,
+            mecwApplied: b4MecwApplied,
+            mecwBlend: b4MecwBlend,
+            effectiveHistoryFraction: b4HistoryFraction,
+            effectiveMemoryFraction: b4MemoryFraction,
+            trimSoftTarget: TRIM_BUDGET_POLICY.trimSoftTarget,
+            trimGrowthThreshold: TRIM_BUDGET_POLICY.trimGrowthThreshold,
+            trimHeadroomFraction: TRIM_BUDGET_POLICY.trimHeadroomFraction,
+            // C1: tool-chain ejection telemetry
+            toolChainCoEjections: c1CoEjections > 0 ? c1CoEjections : undefined,
+            toolChainStubReplacements: c1StubReplacements > 0 ? c1StubReplacements : undefined,
+            // C2: artifact oversize degradation telemetry
+            artifactDegradations: c2ArtifactDegradations > 0 ? c2ArtifactDegradations : undefined,
+            artifactOversizeThresholdTokens: c2ArtifactThresholdTokens,
+            // Sprint 2.1: tool artifact hydration telemetry
+            artifactsHydrated: hydrationResult.artifactsHydrated > 0 ? hydrationResult.artifactsHydrated : undefined,
+            hydrationBytes: hydrationResult.hydrationBytes > 0 ? hydrationResult.hydrationBytes : undefined,
+            hydrationMisses: hydrationResult.hydrationMisses > 0 ? hydrationResult.hydrationMisses : undefined,
         };
         if (pressureHigh) {
             warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
@@ -1913,6 +2590,14 @@ export class Compositor {
         // VS-1: Dual-write, session-scoped key for backwards compat;
         // topic-scoped key for per-topic window retrieval when activeTopicId is set.
         try {
+            // B2: Compute a cheap prefix input hash from the system + identity slot
+            // contents that fed the stable prefix. Stored in WindowCacheMeta so the
+            // C4 fast-exit can detect prefix mutations without re-running full compose.
+            const _prefixInputHash = createHash('sha256')
+                .update(systemContent ?? '')
+                .update('\n␞\n')
+                .update(identityContent ?? '')
+                .digest('hex');
             await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
             await this.cache.setWindowMeta(request.agentId, request.sessionKey, {
                 slots: slots,
@@ -1920,6 +2605,8 @@ export class Compositor {
                 warnings,
                 diagnostics,
                 composedAt,
+                prefixHash,
+                prefixInputHash: _prefixInputHash,
             }, 120);
         }
         catch {
@@ -1972,7 +2659,7 @@ export class Compositor {
                 // Cursor write is best-effort
             }
         }
-        console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones}`);
+        console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones} c2_degradations=${c2ArtifactDegradations} c2_threshold=${c2ArtifactThresholdTokens}`);
         return {
             messages: outputMessages,
             tokenCount: totalTokens,
@@ -2080,7 +2767,7 @@ export class Compositor {
             },
         });
     }
-    async refreshRedisGradient(agentId, sessionKey, db, tokenBudget) {
+    async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth) {
         const store = new MessageStore(db);
         const conversation = store.getConversation(sessionKey);
         if (!conversation)
@@ -2105,28 +2792,38 @@ export class Compositor {
             // Fence lookup is best-effort
         }
         // Phase 3: prefer DAG walk from context head
+        const refreshHistoryLimit = Math.min(this.config.maxHistoryMessages, Math.max(1, historyDepth ?? this.config.maxHistoryMessages));
         let rawHistory;
         if (activeContext?.headMessageId) {
-            rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, this.config.maxHistoryMessages);
+            rawHistory = store.getHistoryByDAGWalk(activeContext.headMessageId, refreshHistoryLimit);
             if (rawHistory.length === 0) {
-                rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, gradientFenceMessageId);
+                rawHistory = store.getRecentMessages(conversation.id, refreshHistoryLimit, gradientFenceMessageId);
             }
         }
         else {
-            rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages, gradientFenceMessageId);
-        }
+            rawHistory = store.getRecentMessages(conversation.id, refreshHistoryLimit, gradientFenceMessageId);
+        }
+        // Sprint 3 (AfterTurn Rebuild/Trim Loop Fix): cap gradient total-window tokens
+        // at the same 65% target that assemble.normal trims to. Previously this was
+        // tokenBudget/0.80 (≈1.25×budget), which made applyToolGradient preserve more
+        // content than the trim target allowed — causing assemble.normal to always trim
+        // on the next turn even in the steady-state path. Aligning the gradient cap to
+        // the trim target means the rebuilt window already fits within the assemble
+        // envelope by construction.
+        const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0);
         const transformedHistory = applyToolGradient(rawHistory, {
             totalWindowTokens: tokenBudget && tokenBudget > 0
-                ? Math.max(tokenBudget, Math.floor(tokenBudget / 0.80))
+                ? gradientAssembleBudget
                 : TOOL_PLANNING_BASELINE_WINDOW,
         });
         // If a token budget is provided, trim the gradient-compressed window to fit
-        // before writing to Redis. Without this, up to maxHistoryMessages messages
-        // land in Redis regardless of size, and trimHistoryToTokenBudget fires
-        // on every subsequent assemble() causing per-turn churn.
+        // before writing to Redis. The cap uses the same GRADIENT_ASSEMBLE_TARGET
+        // (0.65) so the window written to Redis sits inside the assemble.normal trim
+        // envelope. The next assemble() will find the window already within budget
+        // and skip the trim entirely in the steady-state path.
         let historyToWrite = transformedHistory;
         if (tokenBudget && tokenBudget > 0) {
-            const budgetCap = Math.floor(tokenBudget * 0.8);
+            const budgetCap = gradientAssembleBudget;
             let runningTokens = 0;
             const clusters = clusterNeutralMessages(transformedHistory);
             const cappedClusters = [];
@@ -2146,7 +2843,7 @@ export class Compositor {
                     `for ${agentId}/${sessionKey} (budgetCap=${budgetCap}, tokenCost=${runningTokens})`);
             }
         }
-        await this.cache.replaceHistory(agentId, sessionKey, historyToWrite, this.config.maxHistoryMessages);
+        await this.cache.replaceHistory(agentId, sessionKey, historyToWrite, refreshHistoryLimit);
     }
     // ─── Slot Content Resolution ─────────────────────────────────
     /**
@@ -2210,6 +2907,19 @@ export class Compositor {
      * Returns [content, factCount, scopeFilteredCount] or null if DB unavailable.
      */
     buildFactsFromDb(agentId, sessionKey, db) {
+        const sections = this.buildFactSectionsFromDb(agentId, sessionKey, db);
+        if (!sections)
+            return null;
+        const combined = [sections.stableContent, sections.volatileContent]
+            .filter((value) => Boolean(value))
+            .join('\n');
+        return [
+            combined || null,
+            sections.stableCount + sections.volatileCount,
+            sections.filteredCount,
+        ];
+    }
+    buildFactSectionsFromDb(agentId, sessionKey, db) {
         if (!db)
             return null;
         const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='facts'").get();
@@ -2225,30 +2935,52 @@ export class Compositor {
       ORDER BY confidence DESC, decay_score ASC
       LIMIT ?
     `).all(agentId, this.config.maxFacts);
-        if (rawRows.length === 0)
-            return [null, 0, 0];
-        // W1: Apply scope filter — enforce retrieval access control
+        if (rawRows.length === 0) {
+            return {
+                stableContent: null,
+                stableCount: 0,
+                volatileContent: null,
+                volatileCount: 0,
+                filteredCount: 0,
+            };
+        }
         const ctx = { agentId, sessionKey };
         const { allowed, filteredCount } = filterByScope(rawRows.map(r => ({
             ...r,
             agentId: r.agent_id,
             sessionKey: r.session_key,
         })), ctx);
-        if (allowed.length === 0)
-            return [null, 0, filteredCount];
-        const content = allowed
-            .map(r => {
-            // Session attribution: label facts from a different session so the model
-            // can distinguish current-session context from cross-session facts.
-            // Shows last 8 chars of session key as a stable short identifier.
-            const fromOtherSession = r.sessionKey && r.sessionKey !== sessionKey;
-            const sessionSuffix = fromOtherSession
-                ? `, session:${r.sessionKey.slice(-8)}`
-                : '';
-            return `- [${r.domain || 'general'}${sessionSuffix}] ${r.content}`;
-        })
-            .join('\n');
-        return [content, allowed.length, filteredCount];
+        if (allowed.length === 0) {
+            return {
+                stableContent: null,
+                stableCount: 0,
+                volatileContent: null,
+                volatileCount: 0,
+                filteredCount,
+            };
+        }
+        const formatRows = (rows) => {
+            if (rows.length === 0)
+                return null;
+            return rows
+                .map(r => {
+                const fromOtherSession = r.sessionKey && r.sessionKey !== sessionKey;
+                const sessionSuffix = fromOtherSession
+                    ? `, session:${r.sessionKey.slice(-8)}`
+                    : '';
+                return `- [${r.domain || 'general'}${sessionSuffix}] ${r.content}`;
+            })
+                .join('\n');
+        };
+        const stableRows = allowed.filter(r => r.scope !== 'session' && (!r.sessionKey || r.sessionKey !== sessionKey));
+        const volatileRows = allowed.filter(r => !stableRows.includes(r));
+        return {
+            stableContent: formatRows(stableRows),
+            stableCount: stableRows.length,
+            volatileContent: formatRows(volatileRows),
+            volatileCount: volatileRows.length,
+            filteredCount,
+        };
     }
     /**
      * Build knowledge content from library DB.