npm - @psiclawops/hypermem - Versions diffs - 0.8.4 → 0.9.0 - Mend

@psiclawops/hypermem 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/CHANGELOG.md +33 -0
package/INSTALL.md +203 -23
package/README.md +139 -216
package/bench/README.md +42 -0
package/bench/data-access-bench.mjs +380 -0
package/bin/hypermem-bench.mjs +2 -0
package/bin/hypermem-doctor.mjs +412 -0
package/bin/hypermem-model-audit.mjs +339 -0
package/bin/hypermem-status.mjs +491 -70
package/dist/adaptive-lifecycle.d.ts +81 -0
package/dist/adaptive-lifecycle.d.ts.map +1 -0
package/dist/adaptive-lifecycle.js +190 -0
package/dist/background-indexer.js +9 -9
package/dist/budget-policy.d.ts +1 -1
package/dist/budget-policy.d.ts.map +1 -1
package/dist/budget-policy.js +10 -5
package/dist/cache.d.ts +4 -0
package/dist/cache.d.ts.map +1 -1
package/dist/cache.js +2 -0
package/dist/composition-snapshot-integrity.d.ts +36 -0
package/dist/composition-snapshot-integrity.d.ts.map +1 -0
package/dist/composition-snapshot-integrity.js +131 -0
package/dist/composition-snapshot-runtime.d.ts +59 -0
package/dist/composition-snapshot-runtime.d.ts.map +1 -0
package/dist/composition-snapshot-runtime.js +250 -0
package/dist/composition-snapshot-store.d.ts +44 -0
package/dist/composition-snapshot-store.d.ts.map +1 -0
package/dist/composition-snapshot-store.js +117 -0
package/dist/compositor.d.ts +125 -1
package/dist/compositor.d.ts.map +1 -1
package/dist/compositor.js +692 -44
package/dist/cross-agent.d.ts +1 -1
package/dist/cross-agent.js +17 -17
package/dist/doc-chunk-store.d.ts +19 -0
package/dist/doc-chunk-store.d.ts.map +1 -1
package/dist/doc-chunk-store.js +56 -6
package/dist/dreaming-promoter.d.ts +1 -1
package/dist/dreaming-promoter.js +2 -2
package/dist/hybrid-retrieval.d.ts +38 -0
package/dist/hybrid-retrieval.d.ts.map +1 -1
package/dist/hybrid-retrieval.js +86 -1
package/dist/index.d.ts +15 -6
package/dist/index.d.ts.map +1 -1
package/dist/index.js +33 -7
package/dist/knowledge-store.d.ts +4 -1
package/dist/knowledge-store.d.ts.map +1 -1
package/dist/knowledge-store.js +27 -4
package/dist/library-schema.d.ts +12 -8
package/dist/library-schema.d.ts.map +1 -1
package/dist/library-schema.js +22 -8
package/dist/message-store.d.ts.map +1 -1
package/dist/message-store.js +7 -3
package/dist/metrics-dashboard.d.ts +18 -1
package/dist/metrics-dashboard.d.ts.map +1 -1
package/dist/metrics-dashboard.js +52 -14
package/dist/reranker.d.ts +1 -1
package/dist/reranker.js +2 -2
package/dist/schema.d.ts +1 -1
package/dist/schema.d.ts.map +1 -1
package/dist/schema.js +28 -1
package/dist/seed.d.ts +1 -1
package/dist/seed.d.ts.map +1 -1
package/dist/seed.js +3 -1
package/dist/session-flusher.d.ts +2 -2
package/dist/session-flusher.js +2 -2
package/dist/spawn-context.d.ts +1 -1
package/dist/spawn-context.js +1 -1
package/dist/topic-store.js +5 -5
package/dist/topic-synthesizer.d.ts +20 -0
package/dist/topic-synthesizer.d.ts.map +1 -1
package/dist/topic-synthesizer.js +114 -4
package/dist/trigger-registry.d.ts +1 -1
package/dist/trigger-registry.d.ts.map +1 -1
package/dist/trigger-registry.js +14 -6
package/dist/types.d.ts +273 -3
package/dist/types.d.ts.map +1 -1
package/dist/version.d.ts +7 -7
package/dist/version.d.ts.map +1 -1
package/dist/version.js +17 -7
package/docs/DIAGNOSTICS.md +205 -0
package/docs/INTEGRATION_VALIDATION.md +186 -0
package/docs/MIGRATION.md +9 -6
package/docs/MIGRATION_GUIDE.md +125 -101
package/docs/ROADMAP.md +238 -20
package/docs/TUNING.md +30 -6
package/install.sh +159 -408
package/memory-plugin/LICENSE +190 -0
package/memory-plugin/README.md +20 -0
package/memory-plugin/dist/index.js +50 -0
package/memory-plugin/package.json +2 -2
package/package.json +18 -4
package/plugin/LICENSE +190 -0
package/plugin/README.md +20 -0
package/plugin/dist/index.d.ts +55 -0
package/plugin/dist/index.d.ts.map +1 -1
package/plugin/dist/index.js +362 -42
package/plugin/dist/index.js.map +1 -1
package/plugin/package.json +2 -2
package/scripts/install-runtime.mjs +13 -3

package/plugin/dist/index.js CHANGED Viewed

@@ -22,12 +22,15 @@
 import { definePluginEntry } from 'openclaw/plugin-sdk/plugin-entry';
 import { buildPluginConfigSchema } from 'openclaw/plugin-sdk/core';
 import { z } from 'zod';
-import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, formatToolChainStub, decideReplayRecovery, isReplayState, } from '@psiclawops/hypermem';
+import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, resolveAdaptiveLifecyclePolicy, formatToolChainStub, decideReplayRecovery, isReplayState, recordOutputMetrics,
+// Sprint 3: unified pressure signal
+computeUnifiedPressure, PRESSURE_SOURCE, } from '@psiclawops/hypermem';
 import { evictStaleContent } from '@psiclawops/hypermem/image-eviction';
 import { repairToolPairs } from '@psiclawops/hypermem';
 import os from 'os';
 import path from 'path';
 import fs from 'fs/promises';
+import { randomUUID } from 'node:crypto';
 import { fileURLToPath } from 'url';
 import fsSync from 'fs';
 let _telemetryStream = null;
@@ -107,6 +110,24 @@ function degradationTelemetry(fields) {
         // Telemetry must never throw
     }
 }
+function lifecyclePolicyTelemetry(fields) {
+    if (!telemetryEnabled())
+        return;
+    const stream = getTelemetryStream();
+    if (!stream)
+        return;
+    try {
+        const record = {
+            event: 'lifecycle-policy',
+            ts: new Date().toISOString(),
+            ...fields,
+        };
+        stream.write(JSON.stringify(record) + '\n');
+    }
+    catch {
+        // Telemetry must never throw
+    }
+}
 function nextTurnId() {
     _telemetryTurnCounter = (_telemetryTurnCounter + 1) >>> 0;
     return `${Date.now().toString(36)}-${_telemetryTurnCounter.toString(36)}`;
@@ -279,6 +300,7 @@ export const __telemetryForTests = {
     assembleTrace,
     degradationTelemetry,
     guardTelemetry,
+    lifecyclePolicyTelemetry,
     nextTurnId,
     beginTrimOwnerTurn,
     endTrimOwnerTurn,
@@ -400,6 +422,48 @@ export function resolveEffectiveBudget(args) {
         source: 'fallback contextWindowSize',
     };
 }
+export function resolveModelIdentity(model) {
+    const modelKey = normalizeModelKey(model);
+    if (!modelKey) {
+        return {
+            rawModel: model ?? null,
+            modelKey: null,
+            provider: null,
+            modelId: null,
+        };
+    }
+    const slash = modelKey.indexOf('/');
+    return {
+        rawModel: model ?? null,
+        modelKey,
+        provider: slash > 0 ? modelKey.slice(0, slash) : null,
+        modelId: slash > 0 && slash < modelKey.length - 1 ? modelKey.slice(slash + 1) : modelKey,
+    };
+}
+export function diffModelState(previous, current) {
+    const previousIdentity = previous?.modelKey || previous?.provider || previous?.modelId
+        ? {
+            rawModel: previous.model ?? null,
+            modelKey: previous.modelKey ?? normalizeModelKey(previous.model),
+            provider: previous.provider ?? resolveModelIdentity(previous.model).provider,
+            modelId: previous.modelId ?? resolveModelIdentity(previous.model).modelId,
+        }
+        : resolveModelIdentity(previous?.model);
+    const currentIdentity = resolveModelIdentity(current.model);
+    const previousBudget = previous?.tokenBudget;
+    const currentBudget = current.tokenBudget;
+    const budgetChanged = previousBudget != null && currentBudget != null && previousBudget !== currentBudget;
+    return {
+        previousIdentity,
+        currentIdentity,
+        modelChanged: previousIdentity.modelKey !== currentIdentity.modelKey,
+        providerChanged: previousIdentity.provider !== currentIdentity.provider,
+        modelIdChanged: previousIdentity.modelId !== currentIdentity.modelId,
+        budgetChanged,
+        budgetDownshift: previousBudget != null && currentBudget != null && currentBudget < previousBudget,
+        budgetUplift: previousBudget != null && currentBudget != null && currentBudget > previousBudget,
+    };
+}
 function normalizeModelKey(model) {
     if (!model)
         return null;
@@ -422,6 +486,7 @@ function resolveConfiguredWindow(model) {
 // Subagent warming mode: 'full' | 'light' | 'off'. Default: 'light'.
 // Controls how much HyperMem context is injected into subagent sessions.
 let _subagentWarming = 'light';
+const FORKED_CONTEXT_META_SLOT = 'forkedContextMeta';
 // Cache replay threshold: 15min default. Set to 0 in user config to disable.
 let _cacheReplayThresholdMs = 900_000;
 // ─── System overhead cache ────────────────────────────────────
@@ -537,6 +602,8 @@ async function loadUserConfig() {
         merged.eviction = { ...merged.eviction, ..._pluginConfig.eviction };
     if (_pluginConfig.embedding)
         merged.embedding = { ...merged.embedding, ..._pluginConfig.embedding };
+    if (_pluginConfig.reranker)
+        merged.reranker = { ...merged.reranker, ..._pluginConfig.reranker };
     if (Object.keys(fileConfig).length > 0 && Object.keys(_pluginConfig).filter(k => k !== 'hyperMemPath' && k !== 'dataDir').length > 0) {
         console.log('[hypermem-plugin] Note: migrating config.json keys to plugins.entries.hypercompositor.config in openclaw.json is recommended');
     }
@@ -621,15 +688,19 @@ async function getHyperMem() {
             `effective history budget: ${_contextWindowSize - reservedTokens} tokens`);
         verboseLog(`[hypermem-plugin] warmCacheReplayThresholdMs=${_cacheReplayThresholdMs}`);
         verboseLog(`[hypermem-plugin] contextWindowOverrides keys=${Object.keys(_contextWindowOverrides).join(', ') || '(none)'}`);
+        const cacheConfig = userConfig.cache;
         const instance = await HyperMem.create({
             dataDir: _pluginConfig.dataDir ?? path.join(os.homedir(), '.openclaw/hypermem'),
             cache: {
-                keyPrefix: 'hm:',
-                sessionTTL: 14400, // 4h for system/identity/meta slots
-                historyTTL: 86400, // 24h for history — ages out, not count-trimmed
+                keyPrefix: cacheConfig?.keyPrefix ?? 'hm:',
+                sessionTTL: cacheConfig?.sessionTTL ?? 14400, // 4h default for system/identity/meta slots
+                historyTTL: cacheConfig?.historyTTL ?? 86400, // 24h default for history/cursor hot cache
             },
             ...(userConfig.compositor ? { compositor: userConfig.compositor } : {}),
             ...(_embeddingConfig ? { embedding: _embeddingConfig } : {}),
+            ...(userConfig.reranker
+                ? { reranker: userConfig.reranker }
+                : {}),
         });
         _hm = instance;
         // Wire up fleet store and background indexer from dynamic module
@@ -748,6 +819,33 @@ function resolveAssistantTokenCount(msg, runtimeContext) {
     }
     return undefined;
 }
+function resolveAssistantOutputTokenCount(msg, runtimeContext) {
+    const usage = msg.usage;
+    if (usage && typeof usage === 'object') {
+        const candidates = [
+            usage.output,
+            usage.outputTokens,
+            usage.output_tokens,
+            usage.completionTokens,
+            usage.completion_tokens,
+            usage.totalTokens,
+            usage.total_tokens,
+            usage.total,
+        ];
+        for (const candidate of candidates) {
+            if (typeof candidate === 'number' && Number.isFinite(candidate) && candidate > 0) {
+                return Math.floor(candidate);
+            }
+        }
+    }
+    const runtimeTokenCount = runtimeContext?.currentTokenCount;
+    if (typeof runtimeTokenCount === 'number' && Number.isFinite(runtimeTokenCount) && runtimeTokenCount > 0) {
+        return Math.floor(runtimeTokenCount);
+    }
+    const text = extractTextFromInboundContent(msg.content);
+    const tokenEstimate = Math.ceil(text.length / 4);
+    return tokenEstimate > 0 ? tokenEstimate : undefined;
+}
 function collectNeutralToolPairStats(messages) {
     const callIds = new Set();
     const resultIds = new Set();
@@ -1316,10 +1414,10 @@ function createHyperMemEngine() {
                 // Non-fatal: missing files are silently skipped.
                 let identityBlock;
                 try {
-                    // Council agents live at workspace-council/<agentId>/
+                    // Council agents live at workspace/<agentId>/
                     // Other agents at workspace/<agentId>/ — try council path first
                     const homedir = os.homedir();
-                    const councilPath = path.join(homedir, '.openclaw', 'workspace-council', agentId);
+                    const councilPath = path.join(homedir, '.openclaw', 'workspace', agentId);
                     const workspacePath = path.join(homedir, '.openclaw', 'workspace', agentId);
                     let wsPath = councilPath;
                     try {
@@ -1351,7 +1449,7 @@ function createHyperMemEngine() {
                 let _wsPathForSeed;
                 try {
                     const homedir2 = os.homedir();
-                    const councilPath2 = path.join(homedir2, '.openclaw', 'workspace-council', agentId);
+                    const councilPath2 = path.join(homedir2, '.openclaw', 'workspace', agentId);
                     const workspacePath2 = path.join(homedir2, '.openclaw', 'workspace', agentId);
                     try {
                         await fs.access(councilPath2);
@@ -1386,7 +1484,7 @@ function createHyperMemEngine() {
                 // Post-warm pressure check: if messages.db had accumulated history,
                 // warm() may have loaded the session straight to 80%+. Pre-trim now
                 // so the first turn has headroom instead of starting saturated.
-                // This is the "restart at 98%" failure mode reported by Helm 2026-04-05:
+                // This is the "restart at 98%" failure mode reported by Eve 2026-04-05:
                 // JSONL truncation + Redis flush isn't enough if messages.db is still full
                 // and warm() reloads it. Trim here closes the loop.
                 try {
@@ -1733,7 +1831,9 @@ function createHyperMemEngine() {
                         });
                         const replayMarkerText = replayRecovery.emittedText;
                         const preTrimTokens = runtimeTokens;
-                        const pressure = preTrimTokens / effectiveBudget;
+                        // Sprint 3: unified pressure signal — tool-loop assemble path
+                        const s3ToolLoopPressure = computeUnifiedPressure(preTrimTokens, effectiveBudget, PRESSURE_SOURCE.TOOLLOOP_RUNTIME_ARRAY);
+                        const pressure = s3ToolLoopPressure.fraction;
                         // Pressure-tiered trim targets use a single authority: the working
                         // message array. Redis drift is logged as an anomaly, never used as
                         // a trim trigger. Replay recovery gets its own explicit bounded mode
@@ -1885,17 +1985,17 @@ function createHyperMemEngine() {
                             const kept = keptClusters.flat();
                             const keptCount = processedConvMsgs.length - kept.length;
                             if (keptCount > 0) {
-                                console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
+                                console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
                                     `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs, messages=${keptCount} dropped)`);
                                 trimmedMessages = [...systemMsgs, ...kept];
                             }
                             else if (trimmed > 0) {
-                                console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
+                                console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
                                     `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
                             }
                         }
                         else if (trimmed > 0) {
-                            console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
+                            console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
                                 `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
                         }
                         // Apply tool gradient to compress large tool results before returning.
@@ -2088,23 +2188,32 @@ function createHyperMemEngine() {
                         console.warn('[hypermem-plugin] assemble: Redis trim failed (non-fatal):', trimErr.message);
                     }
                     // ── Budget downshift: proactive reshape pass ───────────────────────────────────────
-                    // If this session previously composed at a higher token budget (e.g. gpt-5.4
-                    // → claude-sonnet model switch), the Redis window is still sized for the old
-                    // budget. trimHistoryToTokenBudget above trims by count but skips tool
-                    // gradient logic. A downshift >10% triggers a full reshape: apply tool
-                    // gradient at the new budget + trim, then write back before compose runs.
-                    // This prevents several turns of compaction churn after a model switch.
-                    //
-                    // Bug fix: previously read from getWindow() which is always null here
-                    // (afterTurn invalidates it every turn). Also fixed: was doing setWindow()
-                    // then invalidateWindow() which is a write-then-delete no-op. Now reads
-                    // from history list and writes back via replaceHistory().
+                    // Detect provider/model identity changes as well as raw budget changes.
+                    // Provider routing matters operationally because the same model family can
+                    // land on a different effective context window, for example Copilot Sonnet
+                    // vs direct Anthropic Sonnet. Only budget downshifts trigger the demoted
+                    // reshape guard, but verbose logs now show provider/model swaps even when
+                    // the effective budget stays flat or increases.
                     let lastState = null;
                     try {
                         lastState = await hm.cache.getModelState(agentId, sk);
                         const DOWNSHIFT_THRESHOLD = 0.10;
-                        const isDownshift = lastState &&
-                            (lastState.tokenBudget - effectiveBudget) / lastState.tokenBudget > DOWNSHIFT_THRESHOLD;
+                        const modelDelta = diffModelState(lastState, {
+                            model,
+                            tokenBudget: effectiveBudget,
+                        });
+                        const downshiftFraction = lastState?.tokenBudget
+                            ? (lastState.tokenBudget - effectiveBudget) / lastState.tokenBudget
+                            : 0;
+                        const isDownshift = modelDelta.budgetDownshift && downshiftFraction > DOWNSHIFT_THRESHOLD;
+                        if (lastState && (modelDelta.modelChanged || modelDelta.budgetChanged)) {
+                            verboseLog(`[hypermem-plugin] model state change: ` +
+                                `prev=${modelDelta.previousIdentity.modelKey ?? 'unknown'} ` +
+                                `next=${modelDelta.currentIdentity.modelKey ?? 'unknown'} ` +
+                                `providerChanged=${modelDelta.providerChanged} ` +
+                                `modelIdChanged=${modelDelta.modelIdChanged} ` +
+                                `budget=${lastState.tokenBudget}->${effectiveBudget}`);
+                        }
                         if (isDownshift && !_deferToolPruning) {
                             // Sprint 2.2a: demote reshape to guard telemetry.
                             //
@@ -2154,6 +2263,7 @@ function createHyperMemEngine() {
                                             path: 'replay',
                                             toolLoop: isToolLoop,
                                             msgCount: messages.length,
+                                            composeTopicTelemetryStatus: 'intentionally-omitted',
                                         });
                                     }
                                 }
@@ -2166,6 +2276,20 @@ function createHyperMemEngine() {
                     // Subagent light mode: skip library/wiki/semantic/keystones/doc chunks.
                     // Keeps: system, identity, history, active facts, output profile, tool gradient.
                     const subagentLight = isSubagent && _subagentWarming === 'light';
+                    let forkedContext;
+                    if (isSubagent) {
+                        try {
+                            const rawForkedContext = await hm.cache.getSlot(agentId, sk, FORKED_CONTEXT_META_SLOT);
+                            if (rawForkedContext) {
+                                const parsed = JSON.parse(rawForkedContext);
+                                if (parsed?.enabled === true)
+                                    forkedContext = parsed;
+                            }
+                        }
+                        catch {
+                            // Fork metadata is advisory; fall back to normal subagent lifecycle.
+                        }
+                    }
                     const request = {
                         agentId,
                         sessionKey: sk,
@@ -2180,6 +2304,7 @@ function createHyperMemEngine() {
                         includeSemanticRecall: subagentLight ? false : undefined, // skip vector/FTS recall
                         includeKeystones: subagentLight ? false : undefined, // skip keystone history injection
                         prompt,
+                        forkedContext,
                         skipProviderTranslation: true, // runtime handles provider translation
                     };
                     const result = await hm.compose(request);
@@ -2195,6 +2320,63 @@ function createHyperMemEngine() {
                         replayState: replayRecovery.emittedMarker?.state,
                         replayReason: replayRecovery.emittedMarker?.reason,
                     });
+                    // Sprint 1: emit assemble-level trace with full observability fields
+                    // after a full compose (not replay). Surfaces prefix stability,
+                    // reranker outcome, slot spans, and compaction eligibility.
+                    if (telemetryEnabled() && !cachedContextBlock) {
+                        const diag = result.diagnostics;
+                        // prefixChanged: compare current prefixHash against prevPrefixHash
+                        // (surfaced by the compositor when a cache bypass detected prefix mutation).
+                        // When no previous hash is available (first turn), leave prefixChanged undefined.
+                        let prefixChanged;
+                        if (diag?.prefixHash && diag?.prevPrefixHash) {
+                            prefixChanged = diag.prefixHash !== diag.prevPrefixHash;
+                        }
+                        assembleTrace({
+                            agentId,
+                            sessionKey: sk,
+                            turnId: _asmTurnId,
+                            path: isSubagent ? 'subagent' : 'cold',
+                            toolLoop: isToolLoop,
+                            msgCount: result.messages.length,
+                            prefixChanged,
+                            prefixHash: diag?.prefixHash,
+                            rerankerStatus: diag?.rerankerStatus,
+                            rerankerCandidates: diag?.rerankerCandidates,
+                            rerankerProvider: diag?.rerankerProvider,
+                            slotSpans: diag?.slotSpans,
+                            compactionEligibleCount: diag?.compactionEligibleCount,
+                            compactionEligibleRatio: diag?.compactionEligibleRatio,
+                            compactionProcessedCount: diag?.compactionProcessedCount,
+                            composeTopicSource: diag?.composeTopicSource,
+                            composeTopicState: diag?.composeTopicState,
+                            composeTopicMessageCount: diag?.composeTopicMessageCount,
+                            composeTopicStampedMessageCount: diag?.composeTopicStampedMessageCount,
+                            composeTopicTelemetryStatus: diag?.composeTopicTelemetryStatus,
+                        });
+                        if (diag?.adaptiveLifecycleBand) {
+                            lifecyclePolicyTelemetry({
+                                path: 'compose.preRecall',
+                                agentId,
+                                sessionKey: sk,
+                                band: diag.adaptiveLifecycleBand,
+                                pressurePct: diag.adaptiveLifecyclePressurePct,
+                                trimSoftTarget: diag.adaptiveTrimSoftTarget,
+                                reasons: diag.adaptiveLifecycleReasons,
+                            });
+                        }
+                        if (diag?.adaptiveEvictionLifecycleBand) {
+                            lifecyclePolicyTelemetry({
+                                path: 'compose.eviction',
+                                agentId,
+                                sessionKey: sk,
+                                band: diag.adaptiveEvictionLifecycleBand,
+                                pressurePct: diag.adaptiveEvictionPressurePct,
+                                trimSoftTarget: diag.adaptiveTrimSoftTarget,
+                                reasons: diag.adaptiveLifecycleBandDiverged ? ['diverged-from-preRecall'] : undefined,
+                            });
+                        }
+                    }
                     // Use cached contextBlock if available (cache replay), otherwise use fresh result.
                     // After a full compose, write the new contextBlock to cache for the next turn.
                     if (cachedContextBlock) {
@@ -2267,10 +2449,17 @@ ${replayRecovery.emittedText}`
                     const runtimeSystemTokens = getOverheadFallback(tier);
                     _overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
                     await persistReplayRecoveryState(hm, agentId, sk, replayRecovery.nextState);
+                    if (forkedContext) {
+                        await hm.cache.setSlot(agentId, sk, FORKED_CONTEXT_META_SLOT, '').catch(() => { });
+                    }
                     // Update model state for downshift detection on next turn
                     try {
+                        const modelIdentity = resolveModelIdentity(model);
                         await hm.cache.setModelState(agentId, sk, {
                             model: model ?? 'unknown',
+                            modelKey: modelIdentity.modelKey ?? undefined,
+                            provider: modelIdentity.provider ?? undefined,
+                            modelId: modelIdentity.modelId ?? undefined,
                             tokenBudget: effectiveBudget,
                             composedAt: new Date().toISOString(),
                             historyDepth,
@@ -2351,6 +2540,9 @@ ${replayRecovery.emittedText}`
                 // budget the history is competing for. We trim history to make room.
                 const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
                 const tokensBefore = await estimateWindowTokens(hm, agentId, sk);
+                // Sprint 3: Unified pressure signal — compact path (Redis estimate)
+                const s3CompactPressure = computeUnifiedPressure(tokensBefore, effectiveBudget, PRESSURE_SOURCE.COMPACT_REDIS_ESTIMATE);
+                console.log(`[hypermem-plugin] compact: pressure=${s3CompactPressure.pct}% source=${s3CompactPressure.source} tokens=${tokensBefore}/${effectiveBudget}`);
                 // Target depth for both Redis trimming and JSONL truncation.
                 // Target 50% of budget capacity, assume ~500 tokens/message average.
                 const targetDepth = Math.max(20, Math.floor((effectiveBudget * 0.5) / 500));
@@ -2364,6 +2556,10 @@ ${replayRecovery.emittedText}`
                 // Also triggered when reshape ran recently but the session is still
                 // critically full — bypass the reshape guard in that case.
                 const NUCLEAR_THRESHOLD = 0.85;
+                // Sprint 3: runtime-total pressure for nuclear check uses its own source label
+                const s3NuclearPressure = currentTokenCount != null
+                    ? computeUnifiedPressure(currentTokenCount, effectiveBudget, PRESSURE_SOURCE.COMPACT_RUNTIME_TOTAL)
+                    : s3CompactPressure;
                 const isNuclear = currentTokenCount != null && currentTokenCount > effectiveBudget * NUCLEAR_THRESHOLD;
                 if (isNuclear) {
                     // Cut deep: target 20% of normal depth = ~25 messages for a 128k session.
@@ -2382,11 +2578,11 @@ ${replayRecovery.emittedText}`
                             postTokens: tokensAfter,
                             removed: nuclearRemoved,
                             cacheInvalidated: true,
-                            reason: `currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
+                            reason: `${s3NuclearPressure.source}:${s3NuclearPressure.pct}% currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
                         });
                     }
-                    console.log(`[hypermem-plugin] compact: NUCLEAR — session at ${currentTokenCount}/${effectiveBudget} tokens ` +
-                        `(${Math.round((currentTokenCount / effectiveBudget) * 100)}% full), ` +
+                    console.log(`[hypermem-plugin] compact: NUCLEAR — pressure=${s3NuclearPressure.pct}% source=${s3NuclearPressure.source} ` +
+                        `session at ${currentTokenCount}/${effectiveBudget} tokens, ` +
                         `deep-trimmed JSONL to ${nuclearDepth} messages, Redis ${tokensBefore}→${tokensAfter} tokens`);
                     return { ok: true, compacted: true, result: { tokensBefore, tokensAfter } };
                 }
@@ -2473,10 +2669,10 @@ ${replayRecovery.emittedText}`
                         postTokens: tokensAfter,
                         removed: historyTrimmed,
                         cacheInvalidated: true,
-                        reason: `over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
+                        reason: `${s3CompactPressure.source}:${s3CompactPressure.pct}% over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
                     });
                 }
-                console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget})`);
+                console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget}, pressure=${s3CompactPressure.pct}% source=${s3CompactPressure.source})`);
                 // Density-aware JSONL truncation: derive target depth from actual avg tokens/message
                 // rather than assuming a fixed 500 tokens/message. This prevents a large-message
                 // session (e.g. 145 msgs × 882 tok = 128k) from bypassing the 1.5x guard and
@@ -2553,8 +2749,54 @@ ${replayRecovery.emittedText}`
                         });
                     }
                 }
+                try {
+                    const lastAssistantMessage = [...newMessages].reverse().find(m => m.role === 'assistant');
+                    if (lastAssistantMessage) {
+                        const modelState = await hm.cache.getModelState(agentId, sk).catch(() => null);
+                        const promptCacheUsage = runtimeContext?.promptCache?.lastCallUsage;
+                        const outputTokens = resolveAssistantOutputTokenCount(lastAssistantMessage, runtimeContext) ?? 1;
+                        const inputTokens = typeof promptCacheUsage?.input === 'number'
+                            ? Math.floor(promptCacheUsage.input)
+                            : typeof runtimeContext?.currentTokenCount === 'number'
+                                ? Math.floor(runtimeContext.currentTokenCount)
+                                : null;
+                        const cacheReadTokens = typeof promptCacheUsage?.cacheRead === 'number'
+                            ? Math.floor(promptCacheUsage.cacheRead)
+                            : null;
+                        const modelId = typeof lastAssistantMessage.model === 'string'
+                            ? lastAssistantMessage.model
+                            : modelState?.modelId ?? modelState?.model ?? 'unknown';
+                        const provider = typeof lastAssistantMessage.provider === 'string'
+                            ? lastAssistantMessage.provider
+                            : modelState?.provider ?? 'unknown';
+                        const taskType = typeof runtimeContext?.taskType === 'string'
+                            ? runtimeContext.taskType ?? null
+                            : null;
+                        recordOutputMetrics(hm.dbManager.getLibraryDb(), {
+                            id: `turn-metric-${agentId}-${Date.now()}-${randomUUID()}`,
+                            timestamp: new Date().toISOString(),
+                            agent_id: agentId,
+                            session_key: sk,
+                            model_id: modelId,
+                            provider,
+                            fos_version: null,
+                            mod_version: null,
+                            mod_id: null,
+                            task_type: taskType,
+                            output_tokens: outputTokens,
+                            input_tokens: inputTokens,
+                            cache_read_tokens: cacheReadTokens,
+                            corrections_fired: [],
+                            latency_ms: null,
+                        });
+                    }
+                }
+                catch {
+                    // Non-fatal telemetry path
+                }
                 // P3.1: Topic detection on the inbound user message
                 // Non-fatal: topic detection never blocks afterTurn
+                let adaptiveTopicShiftConfidence;
                 try {
                     const inboundUserMsg = newMessages
                         .map(m => m)
@@ -2571,6 +2813,7 @@ ${replayRecovery.emittedText}`
                             const topicMap = new SessionTopicMap(db);
                             const activeTopic = topicMap.getActiveTopic(sk);
                             const signal = detectTopicShift(neutralUser, contextMessages, activeTopic?.id ?? null);
+                            adaptiveTopicShiftConfidence = signal.confidence;
                             if (signal.isNewTopic && signal.topicName) {
                                 const newTopicId = topicMap.createTopic(sk, signal.topicName);
                                 // New topic starts with count 1 (the message that triggered the shift)
@@ -2610,13 +2853,36 @@ ${replayRecovery.emittedText}`
                 // gradient-compressed window to budget before writing to Redis. Without
                 // this, afterTurn writes up to 250 messages regardless of budget, causing
                 // trimHistoryToTokenBudget to fire and trim ~200 messages on every
-                // subsequent assemble() — the churn loop seen in Helm's logs.
+                // subsequent assemble() — the churn loop seen in Eve's logs.
                 if (hm.cache.isConnected) {
                     try {
                         const modelState = await hm.cache.getModelState(agentId, sk);
                         const gradientBudget = modelState?.tokenBudget;
                         const gradientDepth = modelState?.historyDepth;
-                        await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth);
+                        const inboundUserMsg = newMessages
+                            .map(m => m)
+                            .find(m => m.role === 'user');
+                        const inboundUserText = inboundUserMsg
+                            ? stripMessageMetadata(extractTextFromInboundContent(inboundUserMsg.content))
+                            : '';
+                        const lifecyclePolicy = resolveAdaptiveLifecyclePolicy({
+                            usedTokens: estimateMessageArrayTokens(messages),
+                            effectiveBudget: gradientBudget,
+                            userTurnCount: messages.filter(m => m.role === 'user').length,
+                            explicitNewSession: /^\/new(?:\s|$)/i.test(inboundUserText.trim()),
+                            topicShiftConfidence: adaptiveTopicShiftConfidence,
+                        });
+                        lifecyclePolicyTelemetry({
+                            path: 'afterTurn.gradient',
+                            agentId,
+                            sessionKey: sk,
+                            band: lifecyclePolicy.band,
+                            pressurePct: lifecyclePolicy.pressurePct,
+                            topicShiftConfidence: adaptiveTopicShiftConfidence,
+                            trimSoftTarget: lifecyclePolicy.trimSoftTarget,
+                            reasons: lifecyclePolicy.reasons,
+                        });
+                        await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth, lifecyclePolicy.trimSoftTarget);
                     }
                     catch (refreshErr) {
                         console.warn('[hypermem-plugin] afterTurn: refreshRedisGradient failed (non-fatal):', refreshErr.message);
@@ -2634,7 +2900,7 @@ ${replayRecovery.emittedText}`
                 // If a session just finished a turn at >80% pressure, the NEXT turn's
                 // incoming tool results (parallel web searches, large exec output, etc.)
                 // will hit a window with no headroom — the ingestion wave failure mode
-                // (reported by Helm, 2026-04-05). Pre-trim here so the tool-loop
+                // (reported by Eve, 2026-04-05). Pre-trim here so the tool-loop
                 // assemble() path starts the next turn with meaningful space.
                 //
                 // Uses modelState.tokenBudget if cached; skips if unavailable (non-fatal).
@@ -2786,7 +3052,12 @@ ${replayRecovery.emittedText}`
          * subagentWarming config ('full' | 'light' | 'off').
          * Returns a rollback handle to clean up if spawn fails.
          */
-        async prepareSubagentSpawn({ parentSessionKey, childSessionKey }) {
+        async prepareSubagentSpawn(params) {
+            const { parentSessionKey, childSessionKey } = params;
+            const forkParams = params;
+            const contextMode = forkParams.contextMode;
+            const parentSessionId = forkParams.parentSessionId;
+            const childSessionId = forkParams.childSessionId;
             if (_subagentWarming === 'off') {
                 return undefined;
             }
@@ -2794,7 +3065,12 @@ ${replayRecovery.emittedText}`
                 const hm = await getHyperMem();
                 const parentAgentId = extractAgentId(parentSessionKey);
                 const childAgentId = extractAgentId(childSessionKey);
-                // Seed child with parent's active facts
+                const isForkedContext = contextMode === 'fork';
+                let parentHistoryMessages = 0;
+                let parentUserTurnCount = 0;
+                let parentPressureFraction;
+                // Seed child with parent's active facts. This preserves the historical
+                // slot for compatibility; facts still primarily come from L4 by agent id.
                 const facts = hm.getActiveFacts(parentAgentId, { limit: 50 });
                 if (facts && facts.length > 0) {
                     const factBlock = facts
@@ -2802,22 +3078,48 @@ ${replayRecovery.emittedText}`
                         .join('\n');
                     await hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', factBlock);
                 }
-                // For 'full' warming, also seed recent history context
-                if (_subagentWarming === 'full') {
-                    const history = await hm.cache.getHistory(parentAgentId, parentSessionKey);
-                    if (history && history.length > 0) {
-                        const recentHistory = history.slice(-10);
-                        await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', JSON.stringify(recentHistory));
+                const history = await hm.cache.getHistory(parentAgentId, parentSessionKey);
+                if (history && history.length > 0) {
+                    const maxSeededHistory = _subagentWarming === 'full' ? 25 : 12;
+                    const recentHistory = history.slice(-maxSeededHistory);
+                    parentHistoryMessages = recentHistory.length;
+                    parentUserTurnCount = recentHistory.filter(m => m.role === 'user').length;
+                    const parentTokens = estimateMessageArrayTokens(recentHistory);
+                    const parentModelState = await hm.cache.getModelState(parentAgentId, parentSessionKey).catch(() => null);
+                    const parentBudget = parentModelState?.tokenBudget && parentModelState.tokenBudget > 0
+                        ? parentModelState.tokenBudget
+                        : undefined;
+                    parentPressureFraction = parentBudget ? parentTokens / parentBudget : undefined;
+                    if (isForkedContext || _subagentWarming === 'full') {
+                        await hm.cache.replaceHistory(childAgentId, childSessionKey, recentHistory, maxSeededHistory);
+                        await hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { });
                     }
+                    await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', JSON.stringify(recentHistory));
+                }
+                if (isForkedContext) {
+                    const forkedMeta = {
+                        enabled: true,
+                        parentSessionKey,
+                        parentSessionId,
+                        childSessionId,
+                        parentPressureFraction,
+                        parentUserTurnCount,
+                        parentHistoryMessages,
+                    };
+                    await hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, JSON.stringify(forkedMeta));
                 }
                 console.log(`[hypermem-plugin] prepareSubagentSpawn: seeded ${childSessionKey} ` +
-                    `from ${parentSessionKey} (warming=${_subagentWarming})`);
+                    `from ${parentSessionKey} (warming=${_subagentWarming}, contextMode=${contextMode ?? 'isolated'}, ` +
+                    `history=${parentHistoryMessages})`);
                 return {
                     async rollback() {
                         try {
                             const hm = await getHyperMem();
                             await hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', '');
                             await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', '');
+                            await hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, '');
+                            await hm.cache.replaceHistory(childAgentId, childSessionKey, [], 0);
+                            await hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { });
                         }
                         catch {
                             // Rollback is best-effort
@@ -2843,6 +3145,7 @@ ${replayRecovery.emittedText}`
                 await Promise.all([
                     hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', ''),
                     hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', ''),
+                    hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, ''),
                     hm.cache.setSlot(childAgentId, childSessionKey, 'assemblyContextBlock', ''),
                     hm.cache.setSlot(childAgentId, childSessionKey, 'assemblyContextAt', '0'),
                     hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { }),
@@ -3054,6 +3357,23 @@ const hypercompositorConfigSchema = z.object({
         timeout: z.number().int().positive().optional(),
         batchSize: z.number().int().positive().optional(),
     }).optional(),
+    /**
+     * Optional reranker config. When omitted or provider is 'none', the
+     * compositor runs with RRF-only ordering. See INSTALL.md → Reranker.
+     */
+    reranker: z.object({
+        provider: z.enum(['zeroentropy', 'openrouter', 'local', 'none']),
+        minCandidates: z.number().int().nonnegative().optional(),
+        maxDocuments: z.number().int().positive().optional(),
+        topK: z.number().int().positive().optional(),
+        timeoutMs: z.number().int().positive().optional(),
+        zeroEntropyApiKey: z.string().optional(),
+        zeroEntropyModel: z.string().optional(),
+        openrouterApiKey: z.string().optional(),
+        openrouterModel: z.string().optional(),
+        ollamaUrl: z.string().optional(),
+        ollamaModel: z.string().optional(),
+    }).optional(),
 });
 // ─── Plugin Entry ───────────────────────────────────────────────
 const engine = createHyperMemEngine();