npm - @psiclawops/hypermem - Versions diffs - 0.8.5 → 0.9.0 - Mend

@psiclawops/hypermem 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/CHANGELOG.md +26 -0
package/INSTALL.md +132 -9
package/README.md +119 -272
package/bench/README.md +42 -0
package/bench/data-access-bench.mjs +380 -0
package/bin/hypermem-bench.mjs +2 -0
package/bin/hypermem-doctor.mjs +412 -0
package/bin/hypermem-model-audit.mjs +339 -0
package/bin/hypermem-status.mjs +491 -70
package/dist/adaptive-lifecycle.d.ts +81 -0
package/dist/adaptive-lifecycle.d.ts.map +1 -0
package/dist/adaptive-lifecycle.js +190 -0
package/dist/budget-policy.d.ts +1 -1
package/dist/budget-policy.d.ts.map +1 -1
package/dist/budget-policy.js +10 -5
package/dist/cache.d.ts +1 -0
package/dist/cache.d.ts.map +1 -1
package/dist/cache.js +2 -0
package/dist/composition-snapshot-integrity.d.ts +36 -0
package/dist/composition-snapshot-integrity.d.ts.map +1 -0
package/dist/composition-snapshot-integrity.js +131 -0
package/dist/composition-snapshot-runtime.d.ts +59 -0
package/dist/composition-snapshot-runtime.d.ts.map +1 -0
package/dist/composition-snapshot-runtime.js +250 -0
package/dist/composition-snapshot-store.d.ts +44 -0
package/dist/composition-snapshot-store.d.ts.map +1 -0
package/dist/composition-snapshot-store.js +117 -0
package/dist/compositor.d.ts +125 -1
package/dist/compositor.d.ts.map +1 -1
package/dist/compositor.js +692 -44
package/dist/doc-chunk-store.d.ts +19 -0
package/dist/doc-chunk-store.d.ts.map +1 -1
package/dist/doc-chunk-store.js +56 -6
package/dist/hybrid-retrieval.d.ts +38 -0
package/dist/hybrid-retrieval.d.ts.map +1 -1
package/dist/hybrid-retrieval.js +86 -1
package/dist/index.d.ts +12 -3
package/dist/index.d.ts.map +1 -1
package/dist/index.js +28 -2
package/dist/knowledge-store.d.ts +4 -1
package/dist/knowledge-store.d.ts.map +1 -1
package/dist/knowledge-store.js +27 -4
package/dist/library-schema.d.ts +12 -8
package/dist/library-schema.d.ts.map +1 -1
package/dist/library-schema.js +22 -8
package/dist/message-store.d.ts.map +1 -1
package/dist/message-store.js +7 -3
package/dist/metrics-dashboard.d.ts +18 -1
package/dist/metrics-dashboard.d.ts.map +1 -1
package/dist/metrics-dashboard.js +52 -14
package/dist/reranker.d.ts +1 -1
package/dist/reranker.js +2 -2
package/dist/schema.d.ts +1 -1
package/dist/schema.d.ts.map +1 -1
package/dist/schema.js +28 -1
package/dist/seed.d.ts.map +1 -1
package/dist/seed.js +2 -0
package/dist/topic-synthesizer.d.ts +20 -0
package/dist/topic-synthesizer.d.ts.map +1 -1
package/dist/topic-synthesizer.js +113 -3
package/dist/trigger-registry.d.ts.map +1 -1
package/dist/trigger-registry.js +10 -2
package/dist/types.d.ts +271 -1
package/dist/types.d.ts.map +1 -1
package/dist/version.d.ts +7 -7
package/dist/version.d.ts.map +1 -1
package/dist/version.js +17 -7
package/docs/DIAGNOSTICS.md +205 -0
package/docs/INTEGRATION_VALIDATION.md +186 -0
package/docs/MIGRATION.md +9 -6
package/docs/MIGRATION_GUIDE.md +125 -101
package/docs/ROADMAP.md +238 -20
package/docs/TUNING.md +19 -5
package/install.sh +152 -401
package/memory-plugin/LICENSE +190 -0
package/memory-plugin/README.md +20 -0
package/memory-plugin/dist/index.js +50 -0
package/memory-plugin/package.json +2 -2
package/package.json +18 -4
package/plugin/LICENSE +190 -0
package/plugin/README.md +20 -0
package/plugin/dist/index.d.ts +29 -0
package/plugin/dist/index.d.ts.map +1 -1
package/plugin/dist/index.js +288 -23
package/plugin/dist/index.js.map +1 -1
package/plugin/package.json +2 -2
package/scripts/install-runtime.mjs +12 -1

package/dist/compositor.js CHANGED Viewed

@@ -15,11 +15,11 @@ import { filterByScope } from './retrieval-policy.js';
 import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
 import { MessageStore } from './message-store.js';
 import { SessionTopicMap } from './session-topic-map.js';
-import { toProviderFormat } from './provider-translator.js';
+import { toProviderFormat, detectProvider as s4DetectProvider } from './provider-translator.js';
 import { DocChunkStore } from './doc-chunk-store.js';
 import { hybridSearch } from './hybrid-retrieval.js';
-import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence } from './compaction-fence.js';
-import { getActiveContext } from './context-store.js';
+import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence, getCompactionEligibility } from './compaction-fence.js';
+import { getActiveContext, getOrCreateActiveContext } from './context-store.js';
 import { rankKeystones, scoreKeystone } from './keystone-scorer.js';
 import { buildOrgRegistryFromDb, defaultOrgRegistry } from './cross-agent.js';
 import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOutputTier, buildActionVerificationSummary } from './fos-mod.js';
@@ -27,8 +27,11 @@ import { KnowledgeStore } from './knowledge-store.js';
 import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
 import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
 import { TRIM_BUDGET_POLICY, resolveTrimBudgets } from './budget-policy.js';
+import { resolveAdaptiveLifecyclePolicy } from './adaptive-lifecycle.js';
 import { formatToolChainStub, parseToolChainStub, formatArtifactRef, isArtifactRef } from './degradation.js';
 import { ToolArtifactStore } from './tool-artifact-store.js';
+import { insertCompositionSnapshot, getLatestValidCompositionSnapshot, listCompositionSnapshots, MAX_WARM_RESTORE_REPAIR_DEPTH, } from './composition-snapshot-store.js';
+import { buildCompositionSnapshotSlots, restoreWarmSnapshotState, WARM_RESTORE_MEASUREMENT_GATES, } from './composition-snapshot-runtime.js';
 /**
  * Files that OpenClaw's contextInjection injects into the system prompt.
  * HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
@@ -300,6 +303,71 @@ export function computeAdaptiveHistoryDepth(sessionType, observedDensity, histor
     const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / observedDensity);
     return Math.min(maxHistoryMessages, Math.max(20, depth));
 }
+// ─── Sprint 3: Unified Pressure Signal ───────────────────────────────────────────────────────
+/**
+ * Canonical pressure labels shared across compose and compaction paths.
+ * Use these constants when setting the `pressureSource` field so all consumers
+ * can filter logs with a stable string without guessing spellings.
+ */
+export const PRESSURE_SOURCE = {
+    /** Compose path: pressure derived from (budget - remaining) after full slot assembly. */
+    COMPOSE_POST_ASSEMBLY: 'compose:post-assembly',
+    /** Compose path: pressure measured immediately before semantic recall runs. */
+    COMPOSE_PRE_RECALL: 'compose:pre-recall',
+    /** Compaction path: pressure from Redis token estimate / effectiveBudget. */
+    COMPACT_REDIS_ESTIMATE: 'compact:redis-estimate',
+    /** Compaction path: pressure from runtime-reported currentTokenCount / effectiveBudget. */
+    COMPACT_RUNTIME_TOTAL: 'compact:runtime-total',
+    /** Tool-loop assemble path: pressure from in-memory working message array / effectiveBudget. */
+    TOOLLOOP_RUNTIME_ARRAY: 'toolloop:runtime-array',
+};
+/**
+ * Compute a unified pressure fraction so compose and compaction paths report
+ * the same numeric concept without drift.
+ *
+ * Always clamps to [0, Infinity) — callers get the raw fraction so they can
+ * decide their own thresholds without us hardcoding them here.
+ *
+ * @param usedTokens  Tokens consumed (numerator).
+ * @param budgetTokens  Effective budget (denominator). Must be > 0.
+ * @param source  Label from PRESSURE_SOURCE for telemetry (metadata only).
+ * @returns { fraction, pct, source } where fraction = usedTokens / budgetTokens,
+ *          pct = Math.round(fraction * 100), source = canonical label.
+ */
+export function computeUnifiedPressure(usedTokens, budgetTokens, source) {
+    const fraction = budgetTokens > 0 ? usedTokens / budgetTokens : 0;
+    const pct = Math.round(fraction * 100);
+    return { fraction, pct, source };
+}
+/**
+ * 0.9.0: adaptive lifecycle scales semantic-recall breadth in compose.
+ *
+ * Base fractions match the historical compositor constants so that a steady
+ * (multiplier=1.0) call reproduces prior behavior exactly. Candidate limit is
+ * clamped so even a critical-pressure pass keeps a usable retrieval window
+ * and a /new surge does not blow up hybrid search cost.
+ */
+export const RECALL_BREADTH_BASE = Object.freeze({
+    mainBudgetFraction: 0.12,
+    fallbackBudgetFraction: 0.10,
+    candidateLimit: 10,
+    candidateLimitMin: 6,
+    candidateLimitMax: 16,
+});
+/**
+ * Apply the adaptive lifecycle smartRecallMultiplier to recall breadth.
+ * Pure helper — does not read state or mutate anything. Steady multiplier=1
+ * preserves the historical (0.12, 0.10, limit=10) recall envelope.
+ */
+export function scaleRecallBreadth(remainingTokens, multiplier) {
+    const safeMultiplier = Number.isFinite(multiplier) && multiplier > 0 ? multiplier : 1;
+    const remaining = Math.max(0, Math.floor(remainingTokens || 0));
+    const mainBudgetTokens = Math.max(0, Math.floor(remaining * RECALL_BREADTH_BASE.mainBudgetFraction * safeMultiplier));
+    const fallbackBudgetTokens = Math.max(0, Math.floor(remaining * RECALL_BREADTH_BASE.fallbackBudgetFraction * safeMultiplier));
+    const limitRaw = Math.ceil(RECALL_BREADTH_BASE.candidateLimit * safeMultiplier);
+    const candidateLimit = Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.max(RECALL_BREADTH_BASE.candidateLimitMin, limitRaw));
+    return { mainBudgetTokens, fallbackBudgetTokens, candidateLimit, multiplier: safeMultiplier };
+}
 const DEFAULT_CONFIG = {
     // Primary budget controls
     budgetFraction: 0.703,
@@ -397,6 +465,82 @@ function clusterNeutralMessages(messages) {
     }
     return clusters;
 }
+export function orderClustersForAdaptiveEviction(clusters, policy, opts = {}) {
+    const plan = policy.evictionPlan;
+    const protectedIndices = new Set();
+    // Protect the most-recent user-role cluster (current-user-turn proxy when
+    // the prompt is appended via history rather than as a separate message).
+    for (let i = clusters.length - 1; i >= 0; i--) {
+        if (clusters[i].messages.some(m => m.role === 'user')) {
+            protectedIndices.add(i);
+            break;
+        }
+    }
+    // Protect dynamicBoundary clusters and pure-system clusters.
+    for (let i = 0; i < clusters.length; i++) {
+        const cluster = clusters[i];
+        const hasDynamicBoundary = cluster.messages.some(m => {
+            const meta = m.metadata;
+            return meta?.dynamicBoundary === true;
+        });
+        if (hasDynamicBoundary)
+            protectedIndices.add(i);
+        if (cluster.messages.length > 0 && cluster.messages.every(m => m.role === 'system')) {
+            protectedIndices.add(i);
+        }
+    }
+    const totalMessages = clusters.reduce((sum, cluster) => sum + cluster.messages.length, 0);
+    const stampedMessages = clusters.reduce((sum, cluster) => sum + cluster.messages.filter(m => typeof m.topicId === 'string').length, 0);
+    const topicIdCoveragePct = totalMessages > 0
+        ? Math.round((stampedMessages / totalMessages) * 10000) / 100
+        : 0;
+    const topicAwareDropOrder = [];
+    const activeId = opts.activeTopicId;
+    if (plan.preferTopicAwareDrop && activeId) {
+        for (let i = 0; i < clusters.length; i++) {
+            if (protectedIndices.has(i))
+                continue;
+            const cluster = clusters[i];
+            // Tool clusters are handled by ballast reduction; skip from
+            // topic-aware drop preference to keep tool chains atomic.
+            const hasToolContent = cluster.messages.some(m => (m.toolCalls && m.toolCalls.length > 0)
+                || (m.toolResults && m.toolResults.length > 0));
+            if (hasToolContent)
+                continue;
+            // Inactive-topic predicate: every message in the cluster carries a
+            // topicId distinct from the active topic. Messages without topicId
+            // (legacy/unscoped) are not promoted to drop candidates so we don't
+            // regress sessions that pre-date topic stamping.
+            const tids = cluster.messages.map(m => m.topicId);
+            if (tids.length === 0)
+                continue;
+            const allInactive = tids.every(tid => typeof tid === 'string' && tid !== activeId);
+            if (allInactive)
+                topicAwareDropOrder.push(i);
+        }
+    }
+    let bypassReason;
+    if (!activeId)
+        bypassReason = 'no-active-topic';
+    else if (stampedMessages === 0)
+        bypassReason = 'no-stamped-clusters';
+    else if (!plan.preferTopicAwareDrop)
+        bypassReason = 'band-not-topic-aware';
+    else if (topicAwareDropOrder.length === 0)
+        bypassReason = 'no-eligible-inactive-topic-clusters';
+    return {
+        preferTopicAwareDrop: plan.preferTopicAwareDrop,
+        topicAwareDropOrder,
+        protectedIndices,
+        telemetry: {
+            topicAwareEligibleClusters: topicAwareDropOrder.length,
+            topicAwareDroppedClusters: 0,
+            protectedClusters: protectedIndices.size,
+            topicIdCoveragePct,
+            bypassReason,
+        },
+    };
+}
 /**
  * Public reshape helper: apply tool gradient then trim to fit within a token budget.
  *
@@ -896,6 +1040,9 @@ export function resolveArtifactOversizeThreshold(effectiveBudget) {
     const raw = Math.floor(softBudget * ARTIFACT_BUDGET_FRACTION);
     return Math.min(ARTIFACT_THRESHOLD_CEILING, Math.max(ARTIFACT_THRESHOLD_FLOOR, raw));
 }
+function isExplicitNewSessionPrompt(prompt) {
+    return /^\/new(?:\s|$)/i.test((prompt ?? '').trim());
+}
 /**
  * C2: Degrade an oversized doc chunk to a canonical ArtifactRef string.
  *
@@ -1127,6 +1274,10 @@ export class Compositor {
     vectorStore;
     libraryDb;
     triggerRegistry;
+    reranker;
+    rerankerMinCandidates;
+    rerankerMaxDocuments;
+    rerankerTopK;
     /** Cached org registry loaded from fleet_agents at construction time. */
     _orgRegistry;
     constructor(deps, config) {
@@ -1134,6 +1285,10 @@ export class Compositor {
         this.vectorStore = deps.vectorStore || null;
         this.libraryDb = deps.libraryDb || null;
         this.triggerRegistry = deps.triggerRegistry || DEFAULT_TRIGGERS;
+        this.reranker = deps.reranker ?? null;
+        this.rerankerMinCandidates = deps.rerankerMinCandidates ?? 2;
+        this.rerankerMaxDocuments = deps.rerankerMaxDocuments;
+        this.rerankerTopK = deps.rerankerTopK;
         // Load org registry from DB on init; fall back to hardcoded if DB empty.
         this._orgRegistry = this.libraryDb
             ? buildOrgRegistryFromDb(this.libraryDb)
@@ -1151,6 +1306,13 @@ export class Compositor {
     setVectorStore(vs) {
         this.vectorStore = vs;
     }
+    /**
+     * Set or replace the reranker after construction.
+     * Called by hypermem.create() once the reranker config has been resolved.
+     */
+    setReranker(rr) {
+        this.reranker = rr;
+    }
     /**
      * Hot-reload the org registry from the fleet_agents table.
      * Call after fleet membership changes (new agent, org restructure)
@@ -1455,6 +1617,41 @@ export class Compositor {
             ? Math.min(request.historyDepth, s4AdaptiveDepth)
             : s4AdaptiveDepth;
         let remaining = budget;
+        // 0.9.0: resolve an early adaptive lifecycle posture for the
+        // compose-window cluster-drop pass. Pressure is estimated from the
+        // SQLite sample over the effective budget so the eviction-order
+        // decision routes through the same band classifier the rest of the
+        // 0.9.0 paths already use — no parallel pressure constants here.
+        const s09SampleTokens = sampleMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
+        const s09EvictionPressure = computeUnifiedPressure(s09SampleTokens, budget, PRESSURE_SOURCE.COMPOSE_PRE_RECALL);
+        let s09ObservedUserTurnCount = sampleMessages.filter(m => m.role === 'user').length;
+        const s09ForkedContextSeed = request.forkedContext?.enabled ? request.forkedContext : undefined;
+        const s09ForkedParentPressure = typeof s09ForkedContextSeed?.parentPressureFraction === 'number'
+            && Number.isFinite(s09ForkedContextSeed.parentPressureFraction)
+            ? s09ForkedContextSeed.parentPressureFraction
+            : undefined;
+        const s09EvictionPolicyPressure = s09ForkedContextSeed
+            && s09ObservedUserTurnCount === 0
+            && s09ForkedParentPressure != null
+            ? s09ForkedParentPressure
+            : s09EvictionPressure.fraction;
+        const evictionLifecyclePolicy = resolveAdaptiveLifecyclePolicy({
+            pressureFraction: s09EvictionPolicyPressure,
+            userTurnCount: s09ObservedUserTurnCount,
+            explicitNewSession: isExplicitNewSessionPrompt(request.prompt ?? null),
+            forkedContext: Boolean(s09ForkedContextSeed),
+            forkedParentPressureFraction: s09ForkedParentPressure,
+            forkedParentUserTurnCount: s09ForkedContextSeed?.parentUserTurnCount,
+        });
+        let adaptiveEvictionTopicAwareEligibleClusters = 0;
+        let adaptiveEvictionTopicAwareDroppedClusters = 0;
+        let adaptiveEvictionProtectedClusters = 0;
+        let adaptiveEvictionTopicIdCoveragePct = 0;
+        let adaptiveEvictionBypassReason;
+        let composeTopicSource = 'none';
+        let composeTopicState = 'history-disabled';
+        let composeTopicMessageCount = 0;
+        let composeTopicStampedMessageCount = 0;
         // Phase 0 fence enforcement: resolve the compaction fence for this conversation.
         // All downstream message queries use this as a lower bound to exclude zombie
         // messages below the fence that should have been compacted.
@@ -1506,6 +1703,27 @@ export class Compositor {
             slots.identity = tokens;
             remaining -= tokens;
         }
+        const repairNoticeContent = await this.getSlotContent(request.agentId, request.sessionKey, 'repair_notice', db);
+        // ─── Warm-Restore Repair Notice (never suppressed) ─────────
+        // If a session was reconstructed from a snapshot, the repair notice must
+        // stay above restored conversation content even under budget pressure.
+        // This mirrors the system/identity invariant: history and memory slots may
+        // be trimmed, but the provenance notice is not optional operational state.
+        if (repairNoticeContent) {
+            const tokens = estimateTokens(repairNoticeContent);
+            messages.push({
+                role: 'system',
+                textContent: repairNoticeContent,
+                toolCalls: null,
+                toolResults: null,
+                metadata: { warmRestoreRepairNotice: true },
+            });
+            slots.system += tokens;
+            remaining -= tokens;
+            if (remaining < 0) {
+                warnings.push('Warm-restore repair notice exceeded budget but was retained as non-suppressible system context');
+            }
+        }
         // ─── Stable Output Profile Prefix ──────────────────────────
         // Keep deterministic output instructions on the static side of the cache
         // boundary so Anthropic and OpenAI warm-prefix caching can reuse them.
@@ -1577,8 +1795,10 @@ export class Compositor {
                 try {
                     const topicMap = new SessionTopicMap(db);
                     activeTopic = topicMap.getActiveTopic(request.sessionKey) || undefined;
-                    if (activeTopic)
+                    if (activeTopic) {
                         activeTopicId = activeTopic.id;
+                        composeTopicSource = 'session-topic-map';
+                    }
                 }
                 catch {
                     // Topic lookup is best-effort — fall back to full history
@@ -1586,6 +1806,7 @@ export class Compositor {
             }
             else {
                 activeTopicId = request.topicId;
+                composeTopicSource = 'request-topic-id';
                 try {
                     activeTopic = db.prepare(`
             SELECT id, name
@@ -1616,6 +1837,9 @@ export class Compositor {
                 }
                 return true;
             });
+            s09ObservedUserTurnCount = Math.max(s09ObservedUserTurnCount, historyMessages.filter(m => m.role === 'user').length);
+            composeTopicMessageCount = historyMessages.length;
+            composeTopicStampedMessageCount = historyMessages.filter(m => typeof m.topicId === 'string').length;
             // ── Transform-first: apply gradient tool treatment BEFORE budget math ──
             // All tool payloads are in their final form before any token estimation.
             // This ensures estimateMessageTokens() measures actual submission cost,
@@ -1643,25 +1867,84 @@ export class Compositor {
             // of raw config.historyFraction so history doesn't overflow MECW ceiling.
             const historyBudget = Math.floor(budget * b4HistoryFraction);
             const historyFillCap = Math.min(historyBudget, remaining);
+            // 0.9.0: adaptive eviction ordering. For elevated/high/critical bands,
+            // drop inactive-topic non-tool clusters first when an active topic is
+            // known. Bootstrap/warmup/steady reproduce the historical newest-first
+            // sweep exactly (preferTopicAwareDrop=false → evictedByPlan stays empty).
+            const adaptiveOrdering = orderClustersForAdaptiveEviction(budgetClusters, evictionLifecyclePolicy, { activeTopicId });
+            adaptiveEvictionTopicAwareEligibleClusters = adaptiveOrdering.telemetry.topicAwareEligibleClusters;
+            adaptiveEvictionProtectedClusters = adaptiveOrdering.telemetry.protectedClusters;
+            adaptiveEvictionTopicIdCoveragePct = adaptiveOrdering.telemetry.topicIdCoveragePct;
+            adaptiveEvictionBypassReason = adaptiveOrdering.telemetry.bypassReason;
+            if (!activeTopicId)
+                composeTopicState = 'no-active-topic';
+            else if (composeTopicStampedMessageCount === 0)
+                composeTopicState = 'active-topic-missing-stamped-history';
+            else
+                composeTopicState = 'active-topic-ready';
+            const evictedByPlan = new Set();
+            let projectedTokens = budgetClusters.reduce((s, c) => s + c.tokenCost, 0);
+            if (adaptiveOrdering.preferTopicAwareDrop
+                && adaptiveOrdering.topicAwareDropOrder.length > 0
+                && projectedTokens <= historyFillCap) {
+                adaptiveEvictionBypassReason = 'within-budget';
+            }
+            if (adaptiveOrdering.preferTopicAwareDrop
+                && adaptiveOrdering.topicAwareDropOrder.length > 0
+                && projectedTokens > historyFillCap) {
+                for (const idx of adaptiveOrdering.topicAwareDropOrder) {
+                    if (projectedTokens <= historyFillCap)
+                        break;
+                    if (adaptiveOrdering.protectedIndices.has(idx))
+                        continue;
+                    evictedByPlan.add(idx);
+                    projectedTokens -= budgetClusters[idx].tokenCost;
+                }
+                adaptiveEvictionTopicAwareDroppedClusters = evictedByPlan.size;
+            }
+            let truncationCutIndex = -1;
             for (let i = budgetClusters.length - 1; i >= 0; i--) {
+                if (evictedByPlan.has(i))
+                    continue;
                 const cluster = budgetClusters[i];
                 if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
-                    const droppedClusters = budgetClusters.slice(0, i + 1);
-                    const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
-                    const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
-                    if (droppedToolResultCount > 0) {
-                        c1CoEjections += droppedToolResultCount;
-                        console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
+                    truncationCutIndex = i;
+                    break;
+                }
+                includedClusters.unshift(cluster);
+                historyTokens += cluster.tokenCost;
+            }
+            if (truncationCutIndex >= 0 || evictedByPlan.size > 0) {
+                const droppedIndices = [];
+                if (truncationCutIndex >= 0) {
+                    for (let i = 0; i <= truncationCutIndex; i++) {
+                        if (!evictedByPlan.has(i))
+                            droppedIndices.push(i);
                     }
+                }
+                for (const idx of evictedByPlan)
+                    droppedIndices.push(idx);
+                const droppedClusters = droppedIndices.map(i => budgetClusters[i]);
+                const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
+                const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
+                if (droppedToolResultCount > 0) {
+                    c1CoEjections += droppedToolResultCount;
+                    console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
+                }
+                if (droppedMsgCount > 0) {
                     const c1Note = droppedToolResultCount > 0
                         ? ` [C1: ${droppedToolResultCount} co-ejected reason=budget_cluster_drop]`
                         : '';
-                    warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)${c1Note}`);
-                    s4RescueTrimFired = true;
-                    break;
+                    const planNote = evictedByPlan.size > 0
+                        ? ` [adaptive: band=${evictionLifecyclePolicy.band} topic-aware-dropped=${evictedByPlan.size}]`
+                        : '';
+                    const cutLabel = truncationCutIndex >= 0
+                        ? `${truncationCutIndex + 1}/${budgetClusters.length}`
+                        : `0/${budgetClusters.length}`;
+                    warnings.push(`History truncated at cluster ${cutLabel} (${droppedMsgCount} messages dropped)${c1Note}${planNote}`);
+                    if (truncationCutIndex >= 0)
+                        s4RescueTrimFired = true;
                 }
-                includedClusters.unshift(cluster);
-                historyTokens += cluster.tokenCost;
             }
             const includedHistory = includedClusters.flatMap(c => c.messages);
             // ── Keystone History Slot (P2.1) ──────────────────────────────────
@@ -1829,6 +2112,10 @@ export class Compositor {
         let diagFingerprintDedups = 0;
         let diagFingerprintCollisions = 0;
         let diagRetrievalMode = 'none';
+        // Sprint 1: reranker telemetry captured from hybridSearch via onRerankerTelemetry
+        let diagRerankerStatus;
+        let diagRerankerCandidates;
+        let diagRerankerProvider;
         function normalizeFingerprintText(text) {
             return text.toLowerCase().replace(/\s+/g, ' ').trim();
         }
@@ -2104,6 +2391,26 @@ export class Compositor {
         // Use request.prompt as the retrieval query when available — it is the
         // live current-turn text. Falling back to getLastUserMessage(messages)
         // reads from the already-assembled history, which is one turn stale.
+        // 0.9.0: resolve adaptive lifecycle policy immediately before semantic recall
+        // so smartRecallMultiplier scales the recall token budget and candidate limit
+        // from the same policy object that compose diagnostics later report.
+        const composePreRecallPressure = computeUnifiedPressure(contextTokens, budget, PRESSURE_SOURCE.COMPOSE_PRE_RECALL);
+        const s09ComposePolicyPressure = s09ForkedContextSeed
+            && s09ObservedUserTurnCount === 0
+            && s09ForkedParentPressure != null
+            ? s09ForkedParentPressure
+            : composePreRecallPressure.fraction;
+        const composeLifecyclePolicy = resolveAdaptiveLifecyclePolicy({
+            pressureFraction: s09ComposePolicyPressure,
+            userTurnCount: s09ObservedUserTurnCount,
+            explicitNewSession: isExplicitNewSessionPrompt(request.prompt ?? this.getLastUserMessage(messages)),
+            forkedContext: Boolean(s09ForkedContextSeed),
+            forkedParentPressureFraction: s09ForkedParentPressure,
+            forkedParentUserTurnCount: s09ForkedContextSeed?.parentUserTurnCount,
+        });
+        const recallBreadth = scaleRecallBreadth(remaining, composeLifecyclePolicy.smartRecallMultiplier);
+        let diagAdaptiveRecallBudgetTokens;
+        let diagAdaptiveRecallCandidateLimit;
         if (request.includeSemanticRecall !== false && remaining > 500 && (this.vectorStore || libDb)) {
             const lastUserMsg = request.prompt?.trim() || this.getLastUserMessage(messages);
             if (lastUserMsg) {
@@ -2118,9 +2425,17 @@ export class Compositor {
                     catch {
                         // Redis lookup is best-effort — fall through to Ollama
                     }
-                    const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId, Math.floor(remaining * 0.12), // Cap at 12% of remaining (W4: was 0.15)
-                    libDb || undefined, precomputedEmbedding, contextFingerprints // C2: skip results already in Active Facts
-                    );
+                    diagAdaptiveRecallBudgetTokens = recallBreadth.mainBudgetTokens;
+                    diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
+                    const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId,
+                    // 0.9.0: recall token budget = base 0.12 of remaining * lifecycle multiplier.
+                    recallBreadth.mainBudgetTokens, libDb || undefined, precomputedEmbedding, contextFingerprints, // C2: skip results already in Active Facts
+                    // Sprint 1: capture reranker telemetry at assemble level
+                    (ev) => {
+                        diagRerankerStatus = ev.status;
+                        diagRerankerCandidates = ev.candidates;
+                        diagRerankerProvider = ev.provider;
+                    }, recallBreadth.candidateLimit);
                     if (semanticContent) {
                         const tokens = estimateTokens(semanticContent);
                         volatileContextParts.push(`## Related Memory\n${semanticContent}`);
@@ -2256,15 +2571,21 @@ export class Compositor {
                     volatileContextParts.push(docParts.join('\n\n'));
                 }
             }
-            else if (remaining > 400 && (this.vectorStore || libDb)) {
+            else if (request.includeSemanticRecall !== false && remaining > 400 && (this.vectorStore || libDb)) {
                 // Trigger-miss fallback: no trigger fired — attempt bounded semantic retrieval
                 // so there is never a silent zero-memory path on doc chunks.
                 // INVARIANT: this block is mutually exclusive with triggered-retrieval above.
                 // If refactored to run both paths, cap combined semantic budget to avoid double-recall.
                 try {
+                    // 0.9.0: trigger-miss fallback uses the same lifecycle-scaled breadth so
+                    // a /new surge widens fallback recall and high/critical pressure narrows it.
+                    if (diagAdaptiveRecallBudgetTokens === undefined) {
+                        diagAdaptiveRecallBudgetTokens = recallBreadth.fallbackBudgetTokens;
+                        diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
+                    }
                     const fallbackContent = await Promise.race([
-                        this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined, undefined, contextFingerprints // C2: skip results already in Active Facts
-                        ),
+                        this.buildSemanticRecall(lastMsg, request.agentId, recallBreadth.fallbackBudgetTokens, libDb || undefined, undefined, contextFingerprints, // C2: skip results already in Active Facts
+                        undefined, recallBreadth.candidateLimit),
                         new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
                     ]);
                     if (fallbackContent) {
@@ -2365,7 +2686,23 @@ export class Compositor {
             messages.splice(stableInsertIdx, 0, ...stablePrefixMessages);
         }
         // ── Inject assembled context block ──────────────────────
+        // Sprint 4: Prompt-tail placement.
+        // Volatile context (active facts, temporal, open-domain, semantic recall,
+        // doc chunks, cross-session) moves AFTER all history messages so that
+        // query-shaped material lands near the user turn rather than buried mid-prompt.
+        //
+        // Layout after Sprint 4:
+        //   [stable prefix: system, identity, FOS/MOD, stable facts, knowledge, prefs]
+        //   [history: keystones, cross-topic, recent conversation messages]
+        //   [volatile context block ← here, at the tail]   ← Sprint 4 reorder
+        //   [last user message]
+        //
+        // The cache boundary (dynamicBoundary: true) stays on this block so the
+        // Anthropic/OpenAI cache-prefix logic still fires correctly — everything
+        // ABOVE this message is the stable prefix eligible for caching.
         const assembledContextBlock = volatileContextParts.length > 0 ? volatileContextParts.join('\n\n') : undefined;
+        let s4VolatileContextPosition;
+        let s4MessagesBeforeVolatile;
         if (assembledContextBlock) {
             const contextMsg = {
                 role: 'system',
@@ -2377,7 +2714,23 @@ export class Compositor {
                 // everything at or below it is per-session / per-turn context.
                 metadata: { dynamicBoundary: true, cacheBoundarySlot: CACHE_PREFIX_BOUNDARY_SLOT },
             };
-            messages.splice(stableInsertIdx + stablePrefixMessages.length, 0, contextMsg);
+            // Sprint 4: Insert at tail (end of messages array), AFTER history.
+            // The last user message (if any) should remain the final message, so we
+            // insert the volatile block just before the last user message.
+            const lastMsgIdx = messages.length - 1;
+            const lastMsg = lastMsgIdx >= 0 ? messages[lastMsgIdx] : undefined;
+            if (lastMsg && lastMsg.role === 'user') {
+                // Insert volatile block before the last user message so user turn stays last
+                messages.splice(lastMsgIdx, 0, contextMsg);
+                s4VolatileContextPosition = lastMsgIdx;
+                s4MessagesBeforeVolatile = lastMsgIdx;
+            }
+            else {
+                // No trailing user message — append at end
+                messages.push(contextMsg);
+                s4VolatileContextPosition = messages.length - 1;
+                s4MessagesBeforeVolatile = messages.length - 1;
+            }
         }
         const stablePrefix = getStablePrefixMessages(messages);
         const prefixSegmentCount = stablePrefix.length;
@@ -2404,6 +2757,9 @@ export class Compositor {
             let trimCount = 0;
             // Collect indices of messages to eject before mutating the array.
             // Walk forward from the first non-system message, trimming oldest first.
+            // Sprint 4: Skip the volatile context block (dynamicBoundary: true) — it
+            // is query-shaped content that should not be evicted during the safety
+            // valve pass. The stable prefix system messages are also protected (role=system).
             const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
             const ejectIndices = new Set();
             if (firstNonSystemIdx >= 0) {
@@ -2412,6 +2768,12 @@ export class Compositor {
                     // Don't trim the last user message (current prompt).
                     if (i === messages.length - 1 && messages[i].role === 'user')
                         break;
+                    // Sprint 4: Don't trim the volatile context block (dynamicBoundary marker).
+                    const meta = messages[i].metadata;
+                    if (meta?.dynamicBoundary) {
+                        i++;
+                        continue;
+                    }
                     const msgTokens = estimateMessageTokens(messages[i]);
                     ejectIndices.add(i);
                     trimmed += msgTokens;
@@ -2455,6 +2817,8 @@ export class Compositor {
             }
         }
         const totalTokens = budget - remaining;
+        // Sprint 3: Unified pressure signal — compose path
+        const s3Pressure = computeUnifiedPressure(totalTokens, budget, PRESSURE_SOURCE.COMPOSE_POST_ASSEMBLY);
         // ─── Slot reconciliation ─────────────────────────────────────────────────
         // totalTokens = budget - remaining is the authoritative spend figure.
         // The slot accounting can drift from this due to history trim (which
@@ -2475,10 +2839,33 @@ export class Compositor {
         // Record the oldest message ID that the LLM can see in this compose
         // cycle. Everything below this ID becomes eligible for compaction.
         // If history was included, query the DB for the oldest included message.
+        //
+        // Sprint 1: Capture compaction eligibility counts BEFORE updating the fence
+        // so we can report how many messages were eligible at the start of this pass.
+        let diagCompactionEligibleCount;
+        let diagCompactionEligibleRatio;
+        let diagCompactionProcessedCount;
         if (request.includeHistory !== false && slots.history > 0) {
             try {
                 const conversation = store.getConversation(request.sessionKey);
                 if (conversation) {
+                    // Sprint 1: read eligibility BEFORE advancing the fence
+                    try {
+                        ensureCompactionFenceSchema(db);
+                        const eligibilityBefore = getCompactionEligibility(db, conversation.id);
+                        if (eligibilityBefore.fence !== null) {
+                            // Total messages below fence (denominator for ratio)
+                            const totalRow = db.prepare('SELECT COUNT(*) AS cnt FROM messages WHERE conversation_id = ?').get(conversation.id);
+                            const totalMessages = totalRow?.cnt ?? 0;
+                            diagCompactionEligibleCount = eligibilityBefore.eligibleCount;
+                            diagCompactionEligibleRatio = totalMessages > 0
+                                ? Math.round((eligibilityBefore.eligibleCount / totalMessages) * 1000) / 1000
+                                : 0;
+                        }
+                    }
+                    catch {
+                        // Eligibility query is best-effort
+                    }
                     // The compositor included N history messages (after truncation).
                     // Count how many non-system messages are in the output to determine
                     // how far back we reached.
@@ -2494,8 +2881,18 @@ export class Compositor {
               LIMIT 1 OFFSET ?
             `).get(conversation.id, historyMsgCount - 1);
                         if (oldestIncluded) {
-                            ensureCompactionFenceSchema(db);
                             updateCompactionFence(db, conversation.id, oldestIncluded.id, { minTailMessages: 8 });
+                            // Sprint 1: count how many messages moved from eligible -> fence-protected
+                            // (i.e. they are now above the updated fence)
+                            try {
+                                const eligibilityAfter = getCompactionEligibility(db, conversation.id);
+                                if (diagCompactionEligibleCount !== undefined) {
+                                    diagCompactionProcessedCount = Math.max(0, diagCompactionEligibleCount - eligibilityAfter.eligibleCount);
+                                }
+                            }
+                            catch {
+                                // After-eligibility query is best-effort
+                            }
                         }
                     }
                 }
@@ -2526,6 +2923,70 @@ export class Compositor {
                 zeroResultReason = 'empty_corpus';
             }
         }
+        // ── Sprint 4: Explicit budget lanes ───────────────────────────────────────────────
+        // Compute allocated token lanes for this compose pass.
+        // Budget = effective input budget (post-reserve).
+        // Filled values reflect actual spend after slot fill and safety-valve trim.
+        const s4HistoryLane = Math.floor(budget * b4HistoryFraction);
+        const s4MemoryLane = Math.floor(budget * b4MemoryFraction);
+        const s4StableFilledTokens = (slots.system ?? 0) + (slots.identity ?? 0);
+        const s4HistoryFilledTokens = slots.history ?? 0;
+        const s4MemoryFilledTokens = (slots.facts ?? 0) + (slots.context ?? 0) + (slots.library ?? 0);
+        const s4TotalFilled = s4StableFilledTokens + s4HistoryFilledTokens + s4MemoryFilledTokens;
+        const budgetLanes = {
+            effectiveBudget: budget,
+            stablePrefix: slots.system + slots.identity,
+            history: s4HistoryLane,
+            memory: s4MemoryLane,
+            historyFraction: b4HistoryFraction,
+            memoryFraction: b4MemoryFraction,
+            overhead: Math.max(0, budget - s4TotalFilled),
+            filled: {
+                stablePrefix: s4StableFilledTokens,
+                history: s4HistoryFilledTokens,
+                memory: s4MemoryFilledTokens,
+            },
+        };
+        // ── Sprint 4: OpenAI prefix-cache diagnostics ────────────────────────────────────
+        // Expose prefix-boundary information for OpenAI providers so operators
+        // can tune prompt layout for cache hit rate without guesswork.
+        // Non-fatal — never block compose.
+        let openaiPrefixCacheDiag;
+        try {
+            const s4Provider = s4DetectProvider(request.provider ?? request.model);
+            if (s4Provider === 'openai' || s4Provider === 'openai-responses') {
+                const totalWindowTokens = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
+                const cacheableFraction = totalWindowTokens > 0
+                    ? Math.round((prefixTokens / totalWindowTokens) * 1000) / 1000
+                    : 0;
+                // Sprint 4: volatileAtTail is true when the volatile context block is
+                // positioned AFTER any history (or, vacuously, when no history exists and
+                // the block sits just before the final user turn). In both cases nothing
+                // but the current user message follows the boundary, which is the
+                // cacheable layout. When assembledContextBlock is missing we report
+                // false since there is nothing to place at tail.
+                let s4VolatileAtTail = false;
+                if (s4VolatileContextPosition !== undefined) {
+                    // Any messages after the boundary must be user turns only (no history).
+                    const tail = messages.slice(s4VolatileContextPosition + 1);
+                    s4VolatileAtTail = tail.every(m => m.role === 'user')
+                        && s4VolatileContextPosition >= prefixSegmentCount;
+                }
+                openaiPrefixCacheDiag = {
+                    stablePrefixMessageCount: prefixSegmentCount,
+                    stablePrefixTokens: prefixTokens,
+                    volatileAtTail: s4VolatileAtTail,
+                    cacheableFraction,
+                    prefixHash,
+                };
+            }
+        }
+        catch {
+            // Provider detection is best-effort — never block compose
+        }
+        // 0.9.0: lifecycle policy was resolved pre-recall and used to scale recall
+        // breadth. Diagnostics surface the same object so reported band/multiplier
+        // matches what actually controlled retrieval this compose pass.
         const diagnostics = {
             triggerHits: diagTriggerHits,
             triggerFallbackUsed: diagTriggerFallbackUsed,
@@ -2555,6 +3016,14 @@ export class Compositor {
             historyDepthChosen: s4EffectiveDepth,
             estimatedMsgDensityTokens: s4ObservedDensity,
             rescueTrimFired: s4RescueTrimFired,
+            // Sprint 4: prompt-tail placement diagnostics
+            budgetLanes,
+            volatileContextPosition: s4VolatileContextPosition,
+            messagesBeforeVolatile: s4MessagesBeforeVolatile,
+            openaiPrefixCacheDiag,
+            // Sprint 3: unified pressure signal
+            sessionPressureFraction: s3Pressure.fraction,
+            pressureSource: s3Pressure.source,
             // B4: model-aware lane budget diagnostics
             mecwProfile: b4MecwProfile,
             mecwApplied: b4MecwApplied,
@@ -2564,6 +3033,37 @@ export class Compositor {
             trimSoftTarget: TRIM_BUDGET_POLICY.trimSoftTarget,
             trimGrowthThreshold: TRIM_BUDGET_POLICY.trimGrowthThreshold,
             trimHeadroomFraction: TRIM_BUDGET_POLICY.trimHeadroomFraction,
+            // 0.9.0: adaptive lifecycle diagnostics for compose.preRecall
+            adaptiveLifecycleBand: composeLifecyclePolicy.band,
+            adaptiveLifecyclePressurePct: composeLifecyclePolicy.pressurePct,
+            adaptiveWarmHistoryBudgetFraction: composeLifecyclePolicy.warmHistoryBudgetFraction,
+            adaptiveSmartRecallMultiplier: composeLifecyclePolicy.smartRecallMultiplier,
+            adaptiveTrimSoftTarget: composeLifecyclePolicy.trimSoftTarget,
+            adaptiveCompactionTargetFraction: composeLifecyclePolicy.compactionTargetFraction,
+            adaptiveBreadcrumbPackage: composeLifecyclePolicy.emitBreadcrumbPackage,
+            adaptiveTopicCentroidEviction: composeLifecyclePolicy.enableTopicCentroidEviction,
+            adaptiveProactiveCompaction: composeLifecyclePolicy.triggerProactiveCompaction,
+            adaptiveLifecycleReasons: composeLifecyclePolicy.reasons,
+            adaptiveRecallBudgetTokens: diagAdaptiveRecallBudgetTokens,
+            adaptiveRecallCandidateLimit: diagAdaptiveRecallCandidateLimit,
+            adaptiveEvictionLifecycleBand: evictionLifecyclePolicy.band,
+            adaptiveEvictionPressurePct: evictionLifecyclePolicy.pressurePct,
+            adaptiveEvictionTopicAwareEligibleClusters,
+            adaptiveEvictionTopicAwareDroppedClusters,
+            adaptiveEvictionProtectedClusters,
+            adaptiveEvictionTopicIdCoveragePct,
+            adaptiveEvictionBypassReason,
+            composeTopicSource,
+            composeTopicState,
+            composeTopicMessageCount,
+            composeTopicStampedMessageCount,
+            composeTopicTelemetryStatus: 'emitted',
+            adaptiveLifecycleBandDiverged: evictionLifecyclePolicy.band !== composeLifecyclePolicy.band,
+            adaptiveForkedContext: s09ForkedContextSeed ? true : undefined,
+            adaptiveForkedParentPressurePct: s09ForkedParentPressure != null
+                ? Math.round(s09ForkedParentPressure * 100)
+                : undefined,
+            adaptiveForkedParentUserTurns: s09ForkedContextSeed?.parentUserTurnCount,
             // C1: tool-chain ejection telemetry
             toolChainCoEjections: c1CoEjections > 0 ? c1CoEjections : undefined,
             toolChainStubReplacements: c1StubReplacements > 0 ? c1StubReplacements : undefined,
@@ -2574,6 +3074,23 @@ export class Compositor {
             artifactsHydrated: hydrationResult.artifactsHydrated > 0 ? hydrationResult.artifactsHydrated : undefined,
             hydrationBytes: hydrationResult.hydrationBytes > 0 ? hydrationResult.hydrationBytes : undefined,
             hydrationMisses: hydrationResult.hydrationMisses > 0 ? hydrationResult.hydrationMisses : undefined,
+            // Sprint 1: observability layer
+            rerankerStatus: diagRerankerStatus,
+            rerankerCandidates: diagRerankerCandidates,
+            rerankerProvider: diagRerankerProvider,
+            // Sprint 1: named slot spans (allocated vs filled, overflow flag)
+            slotSpans: {
+                system: { allocated: slots.system, filled: slots.system, overflow: false },
+                identity: { allocated: slots.identity, filled: slots.identity, overflow: false },
+                history: { allocated: Math.floor(budget * b4HistoryFraction), filled: slots.history, overflow: slots.history > Math.floor(budget * b4HistoryFraction) },
+                facts: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.facts, overflow: false },
+                context: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.context, overflow: false },
+                library: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.library, overflow: false },
+            },
+            // Sprint 1: compaction eligibility
+            compactionEligibleCount: diagCompactionEligibleCount,
+            compactionEligibleRatio: diagCompactionEligibleRatio,
+            compactionProcessedCount: diagCompactionProcessedCount,
         };
         if (pressureHigh) {
             warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
@@ -2659,6 +3176,33 @@ export class Compositor {
                 // Cursor write is best-effort
             }
         }
+        try {
+            const conversation = sampleConv ?? store.getConversation(request.sessionKey);
+            if (conversation) {
+                const snapshotContext = getOrCreateActiveContext(db, request.agentId, request.sessionKey, conversation.id);
+                const repairNoticeContent = await this.cache.getSlot(request.agentId, request.sessionKey, 'repair_notice');
+                insertCompositionSnapshot(db, {
+                    contextId: snapshotContext.id,
+                    headMessageId: snapshotContext.headMessageId ?? null,
+                    model: request.model ?? request.provider ?? 'unknown',
+                    contextWindow: totalWindow,
+                    totalTokens,
+                    fillPct: totalWindow > 0 ? Math.round((totalTokens / totalWindow) * 10000) / 10000 : 0,
+                    snapshotKind: 'composed_window',
+                    repairDepth: repairNoticeContent ? MAX_WARM_RESTORE_REPAIR_DEPTH : 0,
+                    slots: buildCompositionSnapshotSlots({
+                        system: systemContent,
+                        identity: identityContent,
+                        repairNotice: repairNoticeContent,
+                        messages,
+                        contextBlock: assembledContextBlock,
+                    }),
+                });
+            }
+        }
+        catch (error) {
+            console.warn(`[hypermem:compositor] composition snapshot write skipped: ${error.message}`);
+        }
         console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones} c2_degradations=${c2ArtifactDegradations} c2_threshold=${c2ArtifactThresholdTokens}`);
         return {
             messages: outputMessages,
@@ -2684,10 +3228,15 @@ export class Compositor {
         // Uses context.head_message_id to walk only the active branch.
         let activeContext = null;
         try {
-            activeContext = getActiveContext(db, agentId, sessionKey);
+            activeContext = getOrCreateActiveContext(db, agentId, sessionKey, conversation.id);
         }
         catch {
-            // Context resolution is best-effort
+            try {
+                activeContext = getActiveContext(db, agentId, sessionKey);
+            }
+            catch {
+                // Context resolution is best-effort
+            }
         }
         // Phase 0 fence enforcement: resolve compaction fence for warm bootstrap.
         // Fence remains as transitional safety — primary scoping is via DAG walk.
@@ -2701,6 +3250,97 @@ export class Compositor {
         catch {
             // Fence lookup is best-effort
         }
+        const warmMeta = {
+            agentId,
+            sessionKey,
+            provider: conversation.provider,
+            model: conversation.model,
+            channelType: conversation.channelType,
+            tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
+            lastActive: conversation.updatedAt,
+            status: conversation.status,
+        };
+        if (activeContext) {
+            const warnSnapshotVerifyFallback = (reason, detail) => {
+                const detailSuffix = detail ? ` ${detail}` : '';
+                console.warn(`[hypermem:compositor] warm snapshot verify fallback session=${sessionKey} reason=${reason} verify_fallback_count=1 cold_rewarm_count=1${detailSuffix}`);
+            };
+            try {
+                const snapshotCandidates = listCompositionSnapshots(db, activeContext.id, 2);
+                const latestSnapshot = getLatestValidCompositionSnapshot(db, activeContext.id);
+                if (latestSnapshot?.verification.slots) {
+                    const targetModel = opts?.model ?? conversation.model ?? 'unknown';
+                    const sourceModel = latestSnapshot.snapshot.model;
+                    const sourceProvider = s4DetectProvider(sourceModel);
+                    const targetProvider = s4DetectProvider(conversation.provider ?? targetModel);
+                    const restored = restoreWarmSnapshotState(latestSnapshot.verification.slots, {
+                        sourceProvider,
+                        targetProvider,
+                    });
+                    if (restored) {
+                        if (!restored.diagnostics.rolloutGatePassed) {
+                            const gateSummary = restored.diagnostics.rolloutGateViolations
+                                .map(violation => `${violation.gate}=${violation.actual}/${violation.max}`)
+                                .join(', ');
+                            console.warn(`[hypermem:compositor] warm snapshot rollout gate blocked session=${sessionKey} snapshot=${latestSnapshot.snapshot.id} violations=${JSON.stringify(gateSummary)} verify_fallback_count=${latestSnapshot.fallbackUsed ? 1 : 0} cold_rewarm_count=1`);
+                            warnSnapshotVerifyFallback('rollout_gate_blocked', `snapshot=${latestSnapshot.snapshot.id} violations=${JSON.stringify(gateSummary)}`);
+                        }
+                        else {
+                            if (latestSnapshot.fallbackUsed) {
+                                console.warn(`[hypermem:compositor] warm snapshot verify fallback session=${sessionKey} restored_snapshot=${latestSnapshot.snapshot.id} verify_fallback_count=1 cold_rewarm_count=0 reason=latest_snapshot_invalid_or_unverifiable`);
+                            }
+                            const repairNoticeLines = [
+                                `Repair notice: this session is a repaired continuation from snapshot ${latestSnapshot.snapshot.id}.`,
+                                `Source model: ${sourceModel}. Target model: ${targetModel}.`,
+                                `Source provider: ${sourceProvider}. Target provider: ${targetProvider}.`,
+                                `Cross-model boundary: ${sourceModel !== targetModel ? 'yes' : 'no'}.`,
+                                `Cross-provider boundary: ${restored.diagnostics.crossProviderBoundary ? 'yes' : 'no'}.`,
+                                `Repair depth: ${MAX_WARM_RESTORE_REPAIR_DEPTH}.`
+                            ];
+                            if (latestSnapshot.fallbackUsed) {
+                                repairNoticeLines.push('Snapshot verify fallback count: 1.');
+                            }
+                            if (restored.diagnostics.quotedAssistantTurns > 0) {
+                                repairNoticeLines.push(`Quoted foreign-provider assistant turns: ${restored.diagnostics.quotedAssistantTurns}.`);
+                            }
+                            if (restored.diagnostics.toolPairParityViolations > 0) {
+                                repairNoticeLines.push(`Tool-pair parity gaps flagged: ${restored.diagnostics.toolPairParityViolations}.`);
+                            }
+                            if (restored.diagnostics.requiredSlotDrops.length > 0) {
+                                repairNoticeLines.push(`Required-slot gaps flagged: ${restored.diagnostics.requiredSlotDrops.join(', ')}.`);
+                            }
+                            const tokenParityDriftExceeded = restored.diagnostics.tokenParityDriftP95 > WARM_RESTORE_MEASUREMENT_GATES.tokenParityDriftP95Max
+                                || restored.diagnostics.tokenParityDriftP99 > WARM_RESTORE_MEASUREMENT_GATES.tokenParityDriftP99Max;
+                            if (tokenParityDriftExceeded
+                                || restored.diagnostics.requiredSlotDropRate > WARM_RESTORE_MEASUREMENT_GATES.requiredSlotDropRateMax
+                                || restored.diagnostics.stablePrefixBoundaryViolations > WARM_RESTORE_MEASUREMENT_GATES.stablePrefixBoundaryViolationsMax
+                                || restored.diagnostics.toolPairParityViolations > WARM_RESTORE_MEASUREMENT_GATES.toolPairParityViolationsMax
+                                || restored.diagnostics.continuityCriticalBoundaryTransformRate > WARM_RESTORE_MEASUREMENT_GATES.continuityCriticalBoundaryTransformRateMax) {
+                                repairNoticeLines.push(`Warm-restore instrumentation gap: token parity drift p95=${restored.diagnostics.tokenParityDriftP95.toFixed(4)}, p99=${restored.diagnostics.tokenParityDriftP99.toFixed(4)}, stable_prefix violations=${restored.diagnostics.stablePrefixBoundaryViolations}, continuity-critical transform rate=${restored.diagnostics.continuityCriticalBoundaryTransformRate.toFixed(4)}.`);
+                            }
+                            const repairNoticeContent = repairNoticeLines.join(' ');
+                            await this.cache.invalidateWindow(agentId, sessionKey);
+                            await this.cache.warmSession(agentId, sessionKey, {
+                                system: restored.system ?? opts?.systemPrompt,
+                                identity: restored.identity ?? opts?.identity,
+                                repairNotice: repairNoticeContent,
+                                history: restored.history,
+                                meta: warmMeta,
+                            });
+                            console.info(`[hypermem:compositor] warm snapshot restore session=${sessionKey} snapshot=${latestSnapshot.snapshot.id} fallback=${latestSnapshot.fallbackUsed} cross_provider=${restored.diagnostics.crossProviderBoundary} quoted_assistant_turns=${restored.diagnostics.quotedAssistantTurns} tool_pair_gaps=${restored.diagnostics.toolPairParityViolations} rollout_gate_passed=${restored.diagnostics.rolloutGatePassed} token_parity_drift_p95=${restored.diagnostics.tokenParityDriftP95.toFixed(4)} token_parity_drift_p99=${restored.diagnostics.tokenParityDriftP99.toFixed(4)}`);
+                            return;
+                        }
+                    }
+                    warnSnapshotVerifyFallback('restore_unusable', `snapshot_count=${snapshotCandidates.length}`);
+                }
+                else if (snapshotCandidates.length > 0) {
+                    warnSnapshotVerifyFallback('no_valid_snapshot', `snapshot_count=${snapshotCandidates.length}`);
+                }
+            }
+            catch (error) {
+                warnSnapshotVerifyFallback('restore_exception', `error=${JSON.stringify(error.message)}`);
+            }
+        }
         // Fetch a generous pool from SQLite, apply gradient transform, then
         // token-budget-cap the warm set. This replaces the old WARM_BOOTSTRAP_CAP
         // message-count constant which was a blunt instrument — 100 messages of
@@ -2742,7 +3382,6 @@ export class Compositor {
             history.unshift(tagged);
             warmTokens += cost;
         }
-        const libDb = opts?.libraryDb || this.libraryDb;
         // Note: facts and context are intentionally NOT cached here.
         // compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
         // from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
@@ -2755,19 +3394,10 @@ export class Compositor {
             system: opts?.systemPrompt,
             identity: opts?.identity,
             history,
-            meta: {
-                agentId,
-                sessionKey,
-                provider: conversation.provider,
-                model: conversation.model,
-                channelType: conversation.channelType,
-                tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
-                lastActive: conversation.updatedAt,
-                status: conversation.status,
-            },
+            meta: warmMeta,
         });
     }
-    async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth) {
+    async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth, trimSoftTarget) {
         const store = new MessageStore(db);
         const conversation = store.getConversation(sessionKey);
         if (!conversation)
@@ -2810,7 +3440,7 @@ export class Compositor {
         // on the next turn even in the steady-state path. Aligning the gradient cap to
         // the trim target means the rebuilt window already fits within the assemble
         // envelope by construction.
-        const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0);
+        const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0, { trimSoftTarget });
         const transformedHistory = applyToolGradient(rawHistory, {
             totalWindowTokens: tokenBudget && tokenBudget > 0
                 ? gradientAssembleBudget
@@ -3074,11 +3704,20 @@ export class Compositor {
      * @param precomputedEmbedding — optional pre-computed embedding for the query.
      *   When provided, the Ollama call inside VectorStore.search() is skipped.
      */
-    async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints // C2: skip results already in Active Facts
-    ) {
+    async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints, // C2: skip results already in Active Facts
+    onRerankerTelemetry, // Sprint 1: surface reranker status at assemble level
+    resultLimit) {
         const libDb = libraryDb || this.libraryDb;
         if (!libDb && !this.vectorStore)
             return null;
+        // 0.9.0: clamp the lifecycle-scaled candidate limit. Caller already clamps
+        // via scaleRecallBreadth; this is a defensive floor so direct callers (none
+        // outside compose today) cannot accidentally request 0 results.
+        const hybridLimit = Math.max(RECALL_BREADTH_BASE.candidateLimitMin, Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.floor(resultLimit && resultLimit > 0 ? resultLimit : RECALL_BREADTH_BASE.candidateLimit)));
+        // KNN-only legacy fallback historically used 8 — keep it slightly below the
+        // hybrid limit to preserve prior behavior at multiplier=1, while still
+        // scaling with the same adaptive limit.
+        const knnFallbackLimit = Math.max(RECALL_BREADTH_BASE.candidateLimitMin, Math.min(RECALL_BREADTH_BASE.candidateLimitMax, hybridLimit - 2));
         // Inline fingerprint helper (mirrors compose-scope version; C2 dedup only used here)
         const fpCheck = existingFingerprints
             ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
@@ -3087,10 +3726,16 @@ export class Compositor {
         if (libDb) {
             const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
                 tables: ['facts', 'knowledge', 'episodes'],
-                limit: 10,
+                limit: hybridLimit,
                 agentId,
                 maxKnnDistance: 1.2,
                 precomputedEmbedding,
+                reranker: this.reranker,
+                rerankerMinCandidates: this.rerankerMinCandidates,
+                rerankerMaxDocuments: this.rerankerMaxDocuments,
+                rerankerTopK: this.rerankerTopK,
+                // Sprint 1: thread reranker telemetry into compose diagnostics
+                onRerankerTelemetry,
             });
             if (results.length === 0)
                 return null;
@@ -3157,7 +3802,7 @@ export class Compositor {
             return null;
         const results = await this.vectorStore.search(userMessage, {
             tables: ['facts', 'knowledge', 'episodes'],
-            limit: 8,
+            limit: knnFallbackLimit,
             maxDistance: 1.2,
             precomputedEmbedding,
         });
@@ -3347,8 +3992,11 @@ export class Compositor {
                 }
             }
             const fenceClause = fenceMessageId != null ? 'AND m.id >= ?' : '';
-            // Phase 3 (Turn DAG): prefer context_id scoping over conversation_id+fence
-            const contextClause = activeContext ? 'AND m.context_id = ?' : '';
+            // Phase 3 (Turn DAG): prefer context_id scoping, but keep legacy NULL
+            // rows eligible. Warmed or migrated sessions can have an active context
+            // while older messages predate context_id backfill; excluding NULL rows
+            // disables within-session keystone recall for those conversations.
+            const contextClause = activeContext ? 'AND (m.context_id = ? OR m.context_id IS NULL)' : '';
             const baseParams = [conversationId, cutoffId];
             if (fenceMessageId != null)
                 baseParams.push(fenceMessageId);