@psiclawops/hypermem 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/INSTALL.md +132 -9
  3. package/README.md +119 -272
  4. package/bench/README.md +42 -0
  5. package/bench/data-access-bench.mjs +380 -0
  6. package/bin/hypermem-bench.mjs +2 -0
  7. package/bin/hypermem-doctor.mjs +412 -0
  8. package/bin/hypermem-model-audit.mjs +339 -0
  9. package/bin/hypermem-status.mjs +491 -70
  10. package/dist/adaptive-lifecycle.d.ts +81 -0
  11. package/dist/adaptive-lifecycle.d.ts.map +1 -0
  12. package/dist/adaptive-lifecycle.js +190 -0
  13. package/dist/budget-policy.d.ts +1 -1
  14. package/dist/budget-policy.d.ts.map +1 -1
  15. package/dist/budget-policy.js +10 -5
  16. package/dist/cache.d.ts +1 -0
  17. package/dist/cache.d.ts.map +1 -1
  18. package/dist/cache.js +2 -0
  19. package/dist/composition-snapshot-integrity.d.ts +36 -0
  20. package/dist/composition-snapshot-integrity.d.ts.map +1 -0
  21. package/dist/composition-snapshot-integrity.js +131 -0
  22. package/dist/composition-snapshot-runtime.d.ts +59 -0
  23. package/dist/composition-snapshot-runtime.d.ts.map +1 -0
  24. package/dist/composition-snapshot-runtime.js +250 -0
  25. package/dist/composition-snapshot-store.d.ts +44 -0
  26. package/dist/composition-snapshot-store.d.ts.map +1 -0
  27. package/dist/composition-snapshot-store.js +117 -0
  28. package/dist/compositor.d.ts +125 -1
  29. package/dist/compositor.d.ts.map +1 -1
  30. package/dist/compositor.js +692 -44
  31. package/dist/doc-chunk-store.d.ts +19 -0
  32. package/dist/doc-chunk-store.d.ts.map +1 -1
  33. package/dist/doc-chunk-store.js +56 -6
  34. package/dist/hybrid-retrieval.d.ts +38 -0
  35. package/dist/hybrid-retrieval.d.ts.map +1 -1
  36. package/dist/hybrid-retrieval.js +86 -1
  37. package/dist/index.d.ts +12 -3
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +28 -2
  40. package/dist/knowledge-store.d.ts +4 -1
  41. package/dist/knowledge-store.d.ts.map +1 -1
  42. package/dist/knowledge-store.js +27 -4
  43. package/dist/library-schema.d.ts +12 -8
  44. package/dist/library-schema.d.ts.map +1 -1
  45. package/dist/library-schema.js +22 -8
  46. package/dist/message-store.d.ts.map +1 -1
  47. package/dist/message-store.js +7 -3
  48. package/dist/metrics-dashboard.d.ts +18 -1
  49. package/dist/metrics-dashboard.d.ts.map +1 -1
  50. package/dist/metrics-dashboard.js +52 -14
  51. package/dist/reranker.d.ts +1 -1
  52. package/dist/reranker.js +2 -2
  53. package/dist/schema.d.ts +1 -1
  54. package/dist/schema.d.ts.map +1 -1
  55. package/dist/schema.js +28 -1
  56. package/dist/seed.d.ts.map +1 -1
  57. package/dist/seed.js +2 -0
  58. package/dist/topic-synthesizer.d.ts +20 -0
  59. package/dist/topic-synthesizer.d.ts.map +1 -1
  60. package/dist/topic-synthesizer.js +113 -3
  61. package/dist/trigger-registry.d.ts.map +1 -1
  62. package/dist/trigger-registry.js +10 -2
  63. package/dist/types.d.ts +271 -1
  64. package/dist/types.d.ts.map +1 -1
  65. package/dist/version.d.ts +7 -7
  66. package/dist/version.d.ts.map +1 -1
  67. package/dist/version.js +17 -7
  68. package/docs/DIAGNOSTICS.md +205 -0
  69. package/docs/INTEGRATION_VALIDATION.md +186 -0
  70. package/docs/MIGRATION.md +9 -6
  71. package/docs/MIGRATION_GUIDE.md +125 -101
  72. package/docs/ROADMAP.md +238 -20
  73. package/docs/TUNING.md +19 -5
  74. package/install.sh +152 -401
  75. package/memory-plugin/LICENSE +190 -0
  76. package/memory-plugin/README.md +20 -0
  77. package/memory-plugin/dist/index.js +50 -0
  78. package/memory-plugin/package.json +2 -2
  79. package/package.json +18 -4
  80. package/plugin/LICENSE +190 -0
  81. package/plugin/README.md +20 -0
  82. package/plugin/dist/index.d.ts +29 -0
  83. package/plugin/dist/index.d.ts.map +1 -1
  84. package/plugin/dist/index.js +288 -23
  85. package/plugin/dist/index.js.map +1 -1
  86. package/plugin/package.json +2 -2
  87. package/scripts/install-runtime.mjs +12 -1
@@ -22,12 +22,15 @@
22
22
  import { definePluginEntry } from 'openclaw/plugin-sdk/plugin-entry';
23
23
  import { buildPluginConfigSchema } from 'openclaw/plugin-sdk/core';
24
24
  import { z } from 'zod';
25
- import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, formatToolChainStub, decideReplayRecovery, isReplayState, } from '@psiclawops/hypermem';
25
+ import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, resolveAdaptiveLifecyclePolicy, formatToolChainStub, decideReplayRecovery, isReplayState, recordOutputMetrics,
26
+ // Sprint 3: unified pressure signal
27
+ computeUnifiedPressure, PRESSURE_SOURCE, } from '@psiclawops/hypermem';
26
28
  import { evictStaleContent } from '@psiclawops/hypermem/image-eviction';
27
29
  import { repairToolPairs } from '@psiclawops/hypermem';
28
30
  import os from 'os';
29
31
  import path from 'path';
30
32
  import fs from 'fs/promises';
33
+ import { randomUUID } from 'node:crypto';
31
34
  import { fileURLToPath } from 'url';
32
35
  import fsSync from 'fs';
33
36
  let _telemetryStream = null;
@@ -107,6 +110,24 @@ function degradationTelemetry(fields) {
107
110
  // Telemetry must never throw
108
111
  }
109
112
  }
113
+ function lifecyclePolicyTelemetry(fields) {
114
+ if (!telemetryEnabled())
115
+ return;
116
+ const stream = getTelemetryStream();
117
+ if (!stream)
118
+ return;
119
+ try {
120
+ const record = {
121
+ event: 'lifecycle-policy',
122
+ ts: new Date().toISOString(),
123
+ ...fields,
124
+ };
125
+ stream.write(JSON.stringify(record) + '\n');
126
+ }
127
+ catch {
128
+ // Telemetry must never throw
129
+ }
130
+ }
110
131
  function nextTurnId() {
111
132
  _telemetryTurnCounter = (_telemetryTurnCounter + 1) >>> 0;
112
133
  return `${Date.now().toString(36)}-${_telemetryTurnCounter.toString(36)}`;
@@ -279,6 +300,7 @@ export const __telemetryForTests = {
279
300
  assembleTrace,
280
301
  degradationTelemetry,
281
302
  guardTelemetry,
303
+ lifecyclePolicyTelemetry,
282
304
  nextTurnId,
283
305
  beginTrimOwnerTurn,
284
306
  endTrimOwnerTurn,
@@ -464,6 +486,7 @@ function resolveConfiguredWindow(model) {
464
486
  // Subagent warming mode: 'full' | 'light' | 'off'. Default: 'light'.
465
487
  // Controls how much HyperMem context is injected into subagent sessions.
466
488
  let _subagentWarming = 'light';
489
+ const FORKED_CONTEXT_META_SLOT = 'forkedContextMeta';
467
490
  // Cache replay threshold: 15min default. Set to 0 in user config to disable.
468
491
  let _cacheReplayThresholdMs = 900_000;
469
492
  // ─── System overhead cache ────────────────────────────────────
@@ -579,6 +602,8 @@ async function loadUserConfig() {
579
602
  merged.eviction = { ...merged.eviction, ..._pluginConfig.eviction };
580
603
  if (_pluginConfig.embedding)
581
604
  merged.embedding = { ...merged.embedding, ..._pluginConfig.embedding };
605
+ if (_pluginConfig.reranker)
606
+ merged.reranker = { ...merged.reranker, ..._pluginConfig.reranker };
582
607
  if (Object.keys(fileConfig).length > 0 && Object.keys(_pluginConfig).filter(k => k !== 'hyperMemPath' && k !== 'dataDir').length > 0) {
583
608
  console.log('[hypermem-plugin] Note: migrating config.json keys to plugins.entries.hypercompositor.config in openclaw.json is recommended');
584
609
  }
@@ -663,15 +688,19 @@ async function getHyperMem() {
663
688
  `effective history budget: ${_contextWindowSize - reservedTokens} tokens`);
664
689
  verboseLog(`[hypermem-plugin] warmCacheReplayThresholdMs=${_cacheReplayThresholdMs}`);
665
690
  verboseLog(`[hypermem-plugin] contextWindowOverrides keys=${Object.keys(_contextWindowOverrides).join(', ') || '(none)'}`);
691
+ const cacheConfig = userConfig.cache;
666
692
  const instance = await HyperMem.create({
667
693
  dataDir: _pluginConfig.dataDir ?? path.join(os.homedir(), '.openclaw/hypermem'),
668
694
  cache: {
669
- keyPrefix: 'hm:',
670
- sessionTTL: 14400, // 4h for system/identity/meta slots
671
- historyTTL: 86400, // 24h for history ages out, not count-trimmed
695
+ keyPrefix: cacheConfig?.keyPrefix ?? 'hm:',
696
+ sessionTTL: cacheConfig?.sessionTTL ?? 14400, // 4h default for system/identity/meta slots
697
+ historyTTL: cacheConfig?.historyTTL ?? 86400, // 24h default for history/cursor hot cache
672
698
  },
673
699
  ...(userConfig.compositor ? { compositor: userConfig.compositor } : {}),
674
700
  ...(_embeddingConfig ? { embedding: _embeddingConfig } : {}),
701
+ ...(userConfig.reranker
702
+ ? { reranker: userConfig.reranker }
703
+ : {}),
675
704
  });
676
705
  _hm = instance;
677
706
  // Wire up fleet store and background indexer from dynamic module
@@ -790,6 +819,33 @@ function resolveAssistantTokenCount(msg, runtimeContext) {
790
819
  }
791
820
  return undefined;
792
821
  }
822
+ function resolveAssistantOutputTokenCount(msg, runtimeContext) {
823
+ const usage = msg.usage;
824
+ if (usage && typeof usage === 'object') {
825
+ const candidates = [
826
+ usage.output,
827
+ usage.outputTokens,
828
+ usage.output_tokens,
829
+ usage.completionTokens,
830
+ usage.completion_tokens,
831
+ usage.totalTokens,
832
+ usage.total_tokens,
833
+ usage.total,
834
+ ];
835
+ for (const candidate of candidates) {
836
+ if (typeof candidate === 'number' && Number.isFinite(candidate) && candidate > 0) {
837
+ return Math.floor(candidate);
838
+ }
839
+ }
840
+ }
841
+ const runtimeTokenCount = runtimeContext?.currentTokenCount;
842
+ if (typeof runtimeTokenCount === 'number' && Number.isFinite(runtimeTokenCount) && runtimeTokenCount > 0) {
843
+ return Math.floor(runtimeTokenCount);
844
+ }
845
+ const text = extractTextFromInboundContent(msg.content);
846
+ const tokenEstimate = Math.ceil(text.length / 4);
847
+ return tokenEstimate > 0 ? tokenEstimate : undefined;
848
+ }
793
849
  function collectNeutralToolPairStats(messages) {
794
850
  const callIds = new Set();
795
851
  const resultIds = new Set();
@@ -1775,7 +1831,9 @@ function createHyperMemEngine() {
1775
1831
  });
1776
1832
  const replayMarkerText = replayRecovery.emittedText;
1777
1833
  const preTrimTokens = runtimeTokens;
1778
- const pressure = preTrimTokens / effectiveBudget;
1834
+ // Sprint 3: unified pressure signal tool-loop assemble path
1835
+ const s3ToolLoopPressure = computeUnifiedPressure(preTrimTokens, effectiveBudget, PRESSURE_SOURCE.TOOLLOOP_RUNTIME_ARRAY);
1836
+ const pressure = s3ToolLoopPressure.fraction;
1779
1837
  // Pressure-tiered trim targets use a single authority: the working
1780
1838
  // message array. Redis drift is logged as an anomaly, never used as
1781
1839
  // a trim trigger. Replay recovery gets its own explicit bounded mode
@@ -1927,17 +1985,17 @@ function createHyperMemEngine() {
1927
1985
  const kept = keptClusters.flat();
1928
1986
  const keptCount = processedConvMsgs.length - kept.length;
1929
1987
  if (keptCount > 0) {
1930
- console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1988
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
1931
1989
  `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs, messages=${keptCount} dropped)`);
1932
1990
  trimmedMessages = [...systemMsgs, ...kept];
1933
1991
  }
1934
1992
  else if (trimmed > 0) {
1935
- console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1993
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
1936
1994
  `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1937
1995
  }
1938
1996
  }
1939
1997
  else if (trimmed > 0) {
1940
- console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1998
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
1941
1999
  `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1942
2000
  }
1943
2001
  // Apply tool gradient to compress large tool results before returning.
@@ -2205,6 +2263,7 @@ function createHyperMemEngine() {
2205
2263
  path: 'replay',
2206
2264
  toolLoop: isToolLoop,
2207
2265
  msgCount: messages.length,
2266
+ composeTopicTelemetryStatus: 'intentionally-omitted',
2208
2267
  });
2209
2268
  }
2210
2269
  }
@@ -2217,6 +2276,20 @@ function createHyperMemEngine() {
2217
2276
  // Subagent light mode: skip library/wiki/semantic/keystones/doc chunks.
2218
2277
  // Keeps: system, identity, history, active facts, output profile, tool gradient.
2219
2278
  const subagentLight = isSubagent && _subagentWarming === 'light';
2279
+ let forkedContext;
2280
+ if (isSubagent) {
2281
+ try {
2282
+ const rawForkedContext = await hm.cache.getSlot(agentId, sk, FORKED_CONTEXT_META_SLOT);
2283
+ if (rawForkedContext) {
2284
+ const parsed = JSON.parse(rawForkedContext);
2285
+ if (parsed?.enabled === true)
2286
+ forkedContext = parsed;
2287
+ }
2288
+ }
2289
+ catch {
2290
+ // Fork metadata is advisory; fall back to normal subagent lifecycle.
2291
+ }
2292
+ }
2220
2293
  const request = {
2221
2294
  agentId,
2222
2295
  sessionKey: sk,
@@ -2231,6 +2304,7 @@ function createHyperMemEngine() {
2231
2304
  includeSemanticRecall: subagentLight ? false : undefined, // skip vector/FTS recall
2232
2305
  includeKeystones: subagentLight ? false : undefined, // skip keystone history injection
2233
2306
  prompt,
2307
+ forkedContext,
2234
2308
  skipProviderTranslation: true, // runtime handles provider translation
2235
2309
  };
2236
2310
  const result = await hm.compose(request);
@@ -2246,6 +2320,63 @@ function createHyperMemEngine() {
2246
2320
  replayState: replayRecovery.emittedMarker?.state,
2247
2321
  replayReason: replayRecovery.emittedMarker?.reason,
2248
2322
  });
2323
+ // Sprint 1: emit assemble-level trace with full observability fields
2324
+ // after a full compose (not replay). Surfaces prefix stability,
2325
+ // reranker outcome, slot spans, and compaction eligibility.
2326
+ if (telemetryEnabled() && !cachedContextBlock) {
2327
+ const diag = result.diagnostics;
2328
+ // prefixChanged: compare current prefixHash against prevPrefixHash
2329
+ // (surfaced by the compositor when a cache bypass detected prefix mutation).
2330
+ // When no previous hash is available (first turn), leave prefixChanged undefined.
2331
+ let prefixChanged;
2332
+ if (diag?.prefixHash && diag?.prevPrefixHash) {
2333
+ prefixChanged = diag.prefixHash !== diag.prevPrefixHash;
2334
+ }
2335
+ assembleTrace({
2336
+ agentId,
2337
+ sessionKey: sk,
2338
+ turnId: _asmTurnId,
2339
+ path: isSubagent ? 'subagent' : 'cold',
2340
+ toolLoop: isToolLoop,
2341
+ msgCount: result.messages.length,
2342
+ prefixChanged,
2343
+ prefixHash: diag?.prefixHash,
2344
+ rerankerStatus: diag?.rerankerStatus,
2345
+ rerankerCandidates: diag?.rerankerCandidates,
2346
+ rerankerProvider: diag?.rerankerProvider,
2347
+ slotSpans: diag?.slotSpans,
2348
+ compactionEligibleCount: diag?.compactionEligibleCount,
2349
+ compactionEligibleRatio: diag?.compactionEligibleRatio,
2350
+ compactionProcessedCount: diag?.compactionProcessedCount,
2351
+ composeTopicSource: diag?.composeTopicSource,
2352
+ composeTopicState: diag?.composeTopicState,
2353
+ composeTopicMessageCount: diag?.composeTopicMessageCount,
2354
+ composeTopicStampedMessageCount: diag?.composeTopicStampedMessageCount,
2355
+ composeTopicTelemetryStatus: diag?.composeTopicTelemetryStatus,
2356
+ });
2357
+ if (diag?.adaptiveLifecycleBand) {
2358
+ lifecyclePolicyTelemetry({
2359
+ path: 'compose.preRecall',
2360
+ agentId,
2361
+ sessionKey: sk,
2362
+ band: diag.adaptiveLifecycleBand,
2363
+ pressurePct: diag.adaptiveLifecyclePressurePct,
2364
+ trimSoftTarget: diag.adaptiveTrimSoftTarget,
2365
+ reasons: diag.adaptiveLifecycleReasons,
2366
+ });
2367
+ }
2368
+ if (diag?.adaptiveEvictionLifecycleBand) {
2369
+ lifecyclePolicyTelemetry({
2370
+ path: 'compose.eviction',
2371
+ agentId,
2372
+ sessionKey: sk,
2373
+ band: diag.adaptiveEvictionLifecycleBand,
2374
+ pressurePct: diag.adaptiveEvictionPressurePct,
2375
+ trimSoftTarget: diag.adaptiveTrimSoftTarget,
2376
+ reasons: diag.adaptiveLifecycleBandDiverged ? ['diverged-from-preRecall'] : undefined,
2377
+ });
2378
+ }
2379
+ }
2249
2380
  // Use cached contextBlock if available (cache replay), otherwise use fresh result.
2250
2381
  // After a full compose, write the new contextBlock to cache for the next turn.
2251
2382
  if (cachedContextBlock) {
@@ -2318,6 +2449,9 @@ ${replayRecovery.emittedText}`
2318
2449
  const runtimeSystemTokens = getOverheadFallback(tier);
2319
2450
  _overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
2320
2451
  await persistReplayRecoveryState(hm, agentId, sk, replayRecovery.nextState);
2452
+ if (forkedContext) {
2453
+ await hm.cache.setSlot(agentId, sk, FORKED_CONTEXT_META_SLOT, '').catch(() => { });
2454
+ }
2321
2455
  // Update model state for downshift detection on next turn
2322
2456
  try {
2323
2457
  const modelIdentity = resolveModelIdentity(model);
@@ -2406,6 +2540,9 @@ ${replayRecovery.emittedText}`
2406
2540
  // budget the history is competing for. We trim history to make room.
2407
2541
  const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
2408
2542
  const tokensBefore = await estimateWindowTokens(hm, agentId, sk);
2543
+ // Sprint 3: Unified pressure signal — compact path (Redis estimate)
2544
+ const s3CompactPressure = computeUnifiedPressure(tokensBefore, effectiveBudget, PRESSURE_SOURCE.COMPACT_REDIS_ESTIMATE);
2545
+ console.log(`[hypermem-plugin] compact: pressure=${s3CompactPressure.pct}% source=${s3CompactPressure.source} tokens=${tokensBefore}/${effectiveBudget}`);
2409
2546
  // Target depth for both Redis trimming and JSONL truncation.
2410
2547
  // Target 50% of budget capacity, assume ~500 tokens/message average.
2411
2548
  const targetDepth = Math.max(20, Math.floor((effectiveBudget * 0.5) / 500));
@@ -2419,6 +2556,10 @@ ${replayRecovery.emittedText}`
2419
2556
  // Also triggered when reshape ran recently but the session is still
2420
2557
  // critically full — bypass the reshape guard in that case.
2421
2558
  const NUCLEAR_THRESHOLD = 0.85;
2559
+ // Sprint 3: runtime-total pressure for nuclear check uses its own source label
2560
+ const s3NuclearPressure = currentTokenCount != null
2561
+ ? computeUnifiedPressure(currentTokenCount, effectiveBudget, PRESSURE_SOURCE.COMPACT_RUNTIME_TOTAL)
2562
+ : s3CompactPressure;
2422
2563
  const isNuclear = currentTokenCount != null && currentTokenCount > effectiveBudget * NUCLEAR_THRESHOLD;
2423
2564
  if (isNuclear) {
2424
2565
  // Cut deep: target 20% of normal depth = ~25 messages for a 128k session.
@@ -2437,11 +2578,11 @@ ${replayRecovery.emittedText}`
2437
2578
  postTokens: tokensAfter,
2438
2579
  removed: nuclearRemoved,
2439
2580
  cacheInvalidated: true,
2440
- reason: `currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
2581
+ reason: `${s3NuclearPressure.source}:${s3NuclearPressure.pct}% currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
2441
2582
  });
2442
2583
  }
2443
- console.log(`[hypermem-plugin] compact: NUCLEAR — session at ${currentTokenCount}/${effectiveBudget} tokens ` +
2444
- `(${Math.round((currentTokenCount / effectiveBudget) * 100)}% full), ` +
2584
+ console.log(`[hypermem-plugin] compact: NUCLEAR — pressure=${s3NuclearPressure.pct}% source=${s3NuclearPressure.source} ` +
2585
+ `session at ${currentTokenCount}/${effectiveBudget} tokens, ` +
2445
2586
  `deep-trimmed JSONL to ${nuclearDepth} messages, Redis ${tokensBefore}→${tokensAfter} tokens`);
2446
2587
  return { ok: true, compacted: true, result: { tokensBefore, tokensAfter } };
2447
2588
  }
@@ -2528,10 +2669,10 @@ ${replayRecovery.emittedText}`
2528
2669
  postTokens: tokensAfter,
2529
2670
  removed: historyTrimmed,
2530
2671
  cacheInvalidated: true,
2531
- reason: `over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
2672
+ reason: `${s3CompactPressure.source}:${s3CompactPressure.pct}% over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
2532
2673
  });
2533
2674
  }
2534
- console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget})`);
2675
+ console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget}, pressure=${s3CompactPressure.pct}% source=${s3CompactPressure.source})`);
2535
2676
  // Density-aware JSONL truncation: derive target depth from actual avg tokens/message
2536
2677
  // rather than assuming a fixed 500 tokens/message. This prevents a large-message
2537
2678
  // session (e.g. 145 msgs × 882 tok = 128k) from bypassing the 1.5x guard and
@@ -2608,8 +2749,54 @@ ${replayRecovery.emittedText}`
2608
2749
  });
2609
2750
  }
2610
2751
  }
2752
+ try {
2753
+ const lastAssistantMessage = [...newMessages].reverse().find(m => m.role === 'assistant');
2754
+ if (lastAssistantMessage) {
2755
+ const modelState = await hm.cache.getModelState(agentId, sk).catch(() => null);
2756
+ const promptCacheUsage = runtimeContext?.promptCache?.lastCallUsage;
2757
+ const outputTokens = resolveAssistantOutputTokenCount(lastAssistantMessage, runtimeContext) ?? 1;
2758
+ const inputTokens = typeof promptCacheUsage?.input === 'number'
2759
+ ? Math.floor(promptCacheUsage.input)
2760
+ : typeof runtimeContext?.currentTokenCount === 'number'
2761
+ ? Math.floor(runtimeContext.currentTokenCount)
2762
+ : null;
2763
+ const cacheReadTokens = typeof promptCacheUsage?.cacheRead === 'number'
2764
+ ? Math.floor(promptCacheUsage.cacheRead)
2765
+ : null;
2766
+ const modelId = typeof lastAssistantMessage.model === 'string'
2767
+ ? lastAssistantMessage.model
2768
+ : modelState?.modelId ?? modelState?.model ?? 'unknown';
2769
+ const provider = typeof lastAssistantMessage.provider === 'string'
2770
+ ? lastAssistantMessage.provider
2771
+ : modelState?.provider ?? 'unknown';
2772
+ const taskType = typeof runtimeContext?.taskType === 'string'
2773
+ ? runtimeContext.taskType ?? null
2774
+ : null;
2775
+ recordOutputMetrics(hm.dbManager.getLibraryDb(), {
2776
+ id: `turn-metric-${agentId}-${Date.now()}-${randomUUID()}`,
2777
+ timestamp: new Date().toISOString(),
2778
+ agent_id: agentId,
2779
+ session_key: sk,
2780
+ model_id: modelId,
2781
+ provider,
2782
+ fos_version: null,
2783
+ mod_version: null,
2784
+ mod_id: null,
2785
+ task_type: taskType,
2786
+ output_tokens: outputTokens,
2787
+ input_tokens: inputTokens,
2788
+ cache_read_tokens: cacheReadTokens,
2789
+ corrections_fired: [],
2790
+ latency_ms: null,
2791
+ });
2792
+ }
2793
+ }
2794
+ catch {
2795
+ // Non-fatal telemetry path
2796
+ }
2611
2797
  // P3.1: Topic detection on the inbound user message
2612
2798
  // Non-fatal: topic detection never blocks afterTurn
2799
+ let adaptiveTopicShiftConfidence;
2613
2800
  try {
2614
2801
  const inboundUserMsg = newMessages
2615
2802
  .map(m => m)
@@ -2626,6 +2813,7 @@ ${replayRecovery.emittedText}`
2626
2813
  const topicMap = new SessionTopicMap(db);
2627
2814
  const activeTopic = topicMap.getActiveTopic(sk);
2628
2815
  const signal = detectTopicShift(neutralUser, contextMessages, activeTopic?.id ?? null);
2816
+ adaptiveTopicShiftConfidence = signal.confidence;
2629
2817
  if (signal.isNewTopic && signal.topicName) {
2630
2818
  const newTopicId = topicMap.createTopic(sk, signal.topicName);
2631
2819
  // New topic starts with count 1 (the message that triggered the shift)
@@ -2671,7 +2859,30 @@ ${replayRecovery.emittedText}`
2671
2859
  const modelState = await hm.cache.getModelState(agentId, sk);
2672
2860
  const gradientBudget = modelState?.tokenBudget;
2673
2861
  const gradientDepth = modelState?.historyDepth;
2674
- await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth);
2862
+ const inboundUserMsg = newMessages
2863
+ .map(m => m)
2864
+ .find(m => m.role === 'user');
2865
+ const inboundUserText = inboundUserMsg
2866
+ ? stripMessageMetadata(extractTextFromInboundContent(inboundUserMsg.content))
2867
+ : '';
2868
+ const lifecyclePolicy = resolveAdaptiveLifecyclePolicy({
2869
+ usedTokens: estimateMessageArrayTokens(messages),
2870
+ effectiveBudget: gradientBudget,
2871
+ userTurnCount: messages.filter(m => m.role === 'user').length,
2872
+ explicitNewSession: /^\/new(?:\s|$)/i.test(inboundUserText.trim()),
2873
+ topicShiftConfidence: adaptiveTopicShiftConfidence,
2874
+ });
2875
+ lifecyclePolicyTelemetry({
2876
+ path: 'afterTurn.gradient',
2877
+ agentId,
2878
+ sessionKey: sk,
2879
+ band: lifecyclePolicy.band,
2880
+ pressurePct: lifecyclePolicy.pressurePct,
2881
+ topicShiftConfidence: adaptiveTopicShiftConfidence,
2882
+ trimSoftTarget: lifecyclePolicy.trimSoftTarget,
2883
+ reasons: lifecyclePolicy.reasons,
2884
+ });
2885
+ await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth, lifecyclePolicy.trimSoftTarget);
2675
2886
  }
2676
2887
  catch (refreshErr) {
2677
2888
  console.warn('[hypermem-plugin] afterTurn: refreshRedisGradient failed (non-fatal):', refreshErr.message);
@@ -2841,7 +3052,12 @@ ${replayRecovery.emittedText}`
2841
3052
  * subagentWarming config ('full' | 'light' | 'off').
2842
3053
  * Returns a rollback handle to clean up if spawn fails.
2843
3054
  */
2844
- async prepareSubagentSpawn({ parentSessionKey, childSessionKey }) {
3055
+ async prepareSubagentSpawn(params) {
3056
+ const { parentSessionKey, childSessionKey } = params;
3057
+ const forkParams = params;
3058
+ const contextMode = forkParams.contextMode;
3059
+ const parentSessionId = forkParams.parentSessionId;
3060
+ const childSessionId = forkParams.childSessionId;
2845
3061
  if (_subagentWarming === 'off') {
2846
3062
  return undefined;
2847
3063
  }
@@ -2849,7 +3065,12 @@ ${replayRecovery.emittedText}`
2849
3065
  const hm = await getHyperMem();
2850
3066
  const parentAgentId = extractAgentId(parentSessionKey);
2851
3067
  const childAgentId = extractAgentId(childSessionKey);
2852
- // Seed child with parent's active facts
3068
+ const isForkedContext = contextMode === 'fork';
3069
+ let parentHistoryMessages = 0;
3070
+ let parentUserTurnCount = 0;
3071
+ let parentPressureFraction;
3072
+ // Seed child with parent's active facts. This preserves the historical
3073
+ // slot for compatibility; facts still primarily come from L4 by agent id.
2853
3074
  const facts = hm.getActiveFacts(parentAgentId, { limit: 50 });
2854
3075
  if (facts && facts.length > 0) {
2855
3076
  const factBlock = facts
@@ -2857,22 +3078,48 @@ ${replayRecovery.emittedText}`
2857
3078
  .join('\n');
2858
3079
  await hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', factBlock);
2859
3080
  }
2860
- // For 'full' warming, also seed recent history context
2861
- if (_subagentWarming === 'full') {
2862
- const history = await hm.cache.getHistory(parentAgentId, parentSessionKey);
2863
- if (history && history.length > 0) {
2864
- const recentHistory = history.slice(-10);
2865
- await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', JSON.stringify(recentHistory));
3081
+ const history = await hm.cache.getHistory(parentAgentId, parentSessionKey);
3082
+ if (history && history.length > 0) {
3083
+ const maxSeededHistory = _subagentWarming === 'full' ? 25 : 12;
3084
+ const recentHistory = history.slice(-maxSeededHistory);
3085
+ parentHistoryMessages = recentHistory.length;
3086
+ parentUserTurnCount = recentHistory.filter(m => m.role === 'user').length;
3087
+ const parentTokens = estimateMessageArrayTokens(recentHistory);
3088
+ const parentModelState = await hm.cache.getModelState(parentAgentId, parentSessionKey).catch(() => null);
3089
+ const parentBudget = parentModelState?.tokenBudget && parentModelState.tokenBudget > 0
3090
+ ? parentModelState.tokenBudget
3091
+ : undefined;
3092
+ parentPressureFraction = parentBudget ? parentTokens / parentBudget : undefined;
3093
+ if (isForkedContext || _subagentWarming === 'full') {
3094
+ await hm.cache.replaceHistory(childAgentId, childSessionKey, recentHistory, maxSeededHistory);
3095
+ await hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { });
2866
3096
  }
3097
+ await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', JSON.stringify(recentHistory));
3098
+ }
3099
+ if (isForkedContext) {
3100
+ const forkedMeta = {
3101
+ enabled: true,
3102
+ parentSessionKey,
3103
+ parentSessionId,
3104
+ childSessionId,
3105
+ parentPressureFraction,
3106
+ parentUserTurnCount,
3107
+ parentHistoryMessages,
3108
+ };
3109
+ await hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, JSON.stringify(forkedMeta));
2867
3110
  }
2868
3111
  console.log(`[hypermem-plugin] prepareSubagentSpawn: seeded ${childSessionKey} ` +
2869
- `from ${parentSessionKey} (warming=${_subagentWarming})`);
3112
+ `from ${parentSessionKey} (warming=${_subagentWarming}, contextMode=${contextMode ?? 'isolated'}, ` +
3113
+ `history=${parentHistoryMessages})`);
2870
3114
  return {
2871
3115
  async rollback() {
2872
3116
  try {
2873
3117
  const hm = await getHyperMem();
2874
3118
  await hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', '');
2875
3119
  await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', '');
3120
+ await hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, '');
3121
+ await hm.cache.replaceHistory(childAgentId, childSessionKey, [], 0);
3122
+ await hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { });
2876
3123
  }
2877
3124
  catch {
2878
3125
  // Rollback is best-effort
@@ -2898,6 +3145,7 @@ ${replayRecovery.emittedText}`
2898
3145
  await Promise.all([
2899
3146
  hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', ''),
2900
3147
  hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', ''),
3148
+ hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, ''),
2901
3149
  hm.cache.setSlot(childAgentId, childSessionKey, 'assemblyContextBlock', ''),
2902
3150
  hm.cache.setSlot(childAgentId, childSessionKey, 'assemblyContextAt', '0'),
2903
3151
  hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { }),
@@ -3109,6 +3357,23 @@ const hypercompositorConfigSchema = z.object({
3109
3357
  timeout: z.number().int().positive().optional(),
3110
3358
  batchSize: z.number().int().positive().optional(),
3111
3359
  }).optional(),
3360
+ /**
3361
+ * Optional reranker config. When omitted or provider is 'none', the
3362
+ * compositor runs with RRF-only ordering. See INSTALL.md → Reranker.
3363
+ */
3364
+ reranker: z.object({
3365
+ provider: z.enum(['zeroentropy', 'openrouter', 'local', 'none']),
3366
+ minCandidates: z.number().int().nonnegative().optional(),
3367
+ maxDocuments: z.number().int().positive().optional(),
3368
+ topK: z.number().int().positive().optional(),
3369
+ timeoutMs: z.number().int().positive().optional(),
3370
+ zeroEntropyApiKey: z.string().optional(),
3371
+ zeroEntropyModel: z.string().optional(),
3372
+ openrouterApiKey: z.string().optional(),
3373
+ openrouterModel: z.string().optional(),
3374
+ ollamaUrl: z.string().optional(),
3375
+ ollamaModel: z.string().optional(),
3376
+ }).optional(),
3112
3377
  });
3113
3378
  // ─── Plugin Entry ───────────────────────────────────────────────
3114
3379
  const engine = createHyperMemEngine();