@psiclawops/hypermem 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/INSTALL.md +203 -23
  3. package/README.md +139 -216
  4. package/bench/README.md +42 -0
  5. package/bench/data-access-bench.mjs +380 -0
  6. package/bin/hypermem-bench.mjs +2 -0
  7. package/bin/hypermem-doctor.mjs +412 -0
  8. package/bin/hypermem-model-audit.mjs +339 -0
  9. package/bin/hypermem-status.mjs +491 -70
  10. package/dist/adaptive-lifecycle.d.ts +81 -0
  11. package/dist/adaptive-lifecycle.d.ts.map +1 -0
  12. package/dist/adaptive-lifecycle.js +190 -0
  13. package/dist/background-indexer.js +9 -9
  14. package/dist/budget-policy.d.ts +1 -1
  15. package/dist/budget-policy.d.ts.map +1 -1
  16. package/dist/budget-policy.js +10 -5
  17. package/dist/cache.d.ts +4 -0
  18. package/dist/cache.d.ts.map +1 -1
  19. package/dist/cache.js +2 -0
  20. package/dist/composition-snapshot-integrity.d.ts +36 -0
  21. package/dist/composition-snapshot-integrity.d.ts.map +1 -0
  22. package/dist/composition-snapshot-integrity.js +131 -0
  23. package/dist/composition-snapshot-runtime.d.ts +59 -0
  24. package/dist/composition-snapshot-runtime.d.ts.map +1 -0
  25. package/dist/composition-snapshot-runtime.js +250 -0
  26. package/dist/composition-snapshot-store.d.ts +44 -0
  27. package/dist/composition-snapshot-store.d.ts.map +1 -0
  28. package/dist/composition-snapshot-store.js +117 -0
  29. package/dist/compositor.d.ts +125 -1
  30. package/dist/compositor.d.ts.map +1 -1
  31. package/dist/compositor.js +692 -44
  32. package/dist/cross-agent.d.ts +1 -1
  33. package/dist/cross-agent.js +17 -17
  34. package/dist/doc-chunk-store.d.ts +19 -0
  35. package/dist/doc-chunk-store.d.ts.map +1 -1
  36. package/dist/doc-chunk-store.js +56 -6
  37. package/dist/dreaming-promoter.d.ts +1 -1
  38. package/dist/dreaming-promoter.js +2 -2
  39. package/dist/hybrid-retrieval.d.ts +38 -0
  40. package/dist/hybrid-retrieval.d.ts.map +1 -1
  41. package/dist/hybrid-retrieval.js +86 -1
  42. package/dist/index.d.ts +15 -6
  43. package/dist/index.d.ts.map +1 -1
  44. package/dist/index.js +33 -7
  45. package/dist/knowledge-store.d.ts +4 -1
  46. package/dist/knowledge-store.d.ts.map +1 -1
  47. package/dist/knowledge-store.js +27 -4
  48. package/dist/library-schema.d.ts +12 -8
  49. package/dist/library-schema.d.ts.map +1 -1
  50. package/dist/library-schema.js +22 -8
  51. package/dist/message-store.d.ts.map +1 -1
  52. package/dist/message-store.js +7 -3
  53. package/dist/metrics-dashboard.d.ts +18 -1
  54. package/dist/metrics-dashboard.d.ts.map +1 -1
  55. package/dist/metrics-dashboard.js +52 -14
  56. package/dist/reranker.d.ts +1 -1
  57. package/dist/reranker.js +2 -2
  58. package/dist/schema.d.ts +1 -1
  59. package/dist/schema.d.ts.map +1 -1
  60. package/dist/schema.js +28 -1
  61. package/dist/seed.d.ts +1 -1
  62. package/dist/seed.d.ts.map +1 -1
  63. package/dist/seed.js +3 -1
  64. package/dist/session-flusher.d.ts +2 -2
  65. package/dist/session-flusher.js +2 -2
  66. package/dist/spawn-context.d.ts +1 -1
  67. package/dist/spawn-context.js +1 -1
  68. package/dist/topic-store.js +5 -5
  69. package/dist/topic-synthesizer.d.ts +20 -0
  70. package/dist/topic-synthesizer.d.ts.map +1 -1
  71. package/dist/topic-synthesizer.js +114 -4
  72. package/dist/trigger-registry.d.ts +1 -1
  73. package/dist/trigger-registry.d.ts.map +1 -1
  74. package/dist/trigger-registry.js +14 -6
  75. package/dist/types.d.ts +273 -3
  76. package/dist/types.d.ts.map +1 -1
  77. package/dist/version.d.ts +7 -7
  78. package/dist/version.d.ts.map +1 -1
  79. package/dist/version.js +17 -7
  80. package/docs/DIAGNOSTICS.md +205 -0
  81. package/docs/INTEGRATION_VALIDATION.md +186 -0
  82. package/docs/MIGRATION.md +9 -6
  83. package/docs/MIGRATION_GUIDE.md +125 -101
  84. package/docs/ROADMAP.md +238 -20
  85. package/docs/TUNING.md +30 -6
  86. package/install.sh +159 -408
  87. package/memory-plugin/LICENSE +190 -0
  88. package/memory-plugin/README.md +20 -0
  89. package/memory-plugin/dist/index.js +50 -0
  90. package/memory-plugin/package.json +2 -2
  91. package/package.json +18 -4
  92. package/plugin/LICENSE +190 -0
  93. package/plugin/README.md +20 -0
  94. package/plugin/dist/index.d.ts +55 -0
  95. package/plugin/dist/index.d.ts.map +1 -1
  96. package/plugin/dist/index.js +362 -42
  97. package/plugin/dist/index.js.map +1 -1
  98. package/plugin/package.json +2 -2
  99. package/scripts/install-runtime.mjs +13 -3
@@ -22,12 +22,15 @@
22
22
  import { definePluginEntry } from 'openclaw/plugin-sdk/plugin-entry';
23
23
  import { buildPluginConfigSchema } from 'openclaw/plugin-sdk/core';
24
24
  import { z } from 'zod';
25
- import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, formatToolChainStub, decideReplayRecovery, isReplayState, } from '@psiclawops/hypermem';
25
+ import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, resolveAdaptiveLifecyclePolicy, formatToolChainStub, decideReplayRecovery, isReplayState, recordOutputMetrics,
26
+ // Sprint 3: unified pressure signal
27
+ computeUnifiedPressure, PRESSURE_SOURCE, } from '@psiclawops/hypermem';
26
28
  import { evictStaleContent } from '@psiclawops/hypermem/image-eviction';
27
29
  import { repairToolPairs } from '@psiclawops/hypermem';
28
30
  import os from 'os';
29
31
  import path from 'path';
30
32
  import fs from 'fs/promises';
33
+ import { randomUUID } from 'node:crypto';
31
34
  import { fileURLToPath } from 'url';
32
35
  import fsSync from 'fs';
33
36
  let _telemetryStream = null;
@@ -107,6 +110,24 @@ function degradationTelemetry(fields) {
107
110
  // Telemetry must never throw
108
111
  }
109
112
  }
113
+ function lifecyclePolicyTelemetry(fields) {
114
+ if (!telemetryEnabled())
115
+ return;
116
+ const stream = getTelemetryStream();
117
+ if (!stream)
118
+ return;
119
+ try {
120
+ const record = {
121
+ event: 'lifecycle-policy',
122
+ ts: new Date().toISOString(),
123
+ ...fields,
124
+ };
125
+ stream.write(JSON.stringify(record) + '\n');
126
+ }
127
+ catch {
128
+ // Telemetry must never throw
129
+ }
130
+ }
110
131
  function nextTurnId() {
111
132
  _telemetryTurnCounter = (_telemetryTurnCounter + 1) >>> 0;
112
133
  return `${Date.now().toString(36)}-${_telemetryTurnCounter.toString(36)}`;
@@ -279,6 +300,7 @@ export const __telemetryForTests = {
279
300
  assembleTrace,
280
301
  degradationTelemetry,
281
302
  guardTelemetry,
303
+ lifecyclePolicyTelemetry,
282
304
  nextTurnId,
283
305
  beginTrimOwnerTurn,
284
306
  endTrimOwnerTurn,
@@ -400,6 +422,48 @@ export function resolveEffectiveBudget(args) {
400
422
  source: 'fallback contextWindowSize',
401
423
  };
402
424
  }
425
+ export function resolveModelIdentity(model) {
426
+ const modelKey = normalizeModelKey(model);
427
+ if (!modelKey) {
428
+ return {
429
+ rawModel: model ?? null,
430
+ modelKey: null,
431
+ provider: null,
432
+ modelId: null,
433
+ };
434
+ }
435
+ const slash = modelKey.indexOf('/');
436
+ return {
437
+ rawModel: model ?? null,
438
+ modelKey,
439
+ provider: slash > 0 ? modelKey.slice(0, slash) : null,
440
+ modelId: slash > 0 && slash < modelKey.length - 1 ? modelKey.slice(slash + 1) : modelKey,
441
+ };
442
+ }
443
+ export function diffModelState(previous, current) {
444
+ const previousIdentity = previous?.modelKey || previous?.provider || previous?.modelId
445
+ ? {
446
+ rawModel: previous.model ?? null,
447
+ modelKey: previous.modelKey ?? normalizeModelKey(previous.model),
448
+ provider: previous.provider ?? resolveModelIdentity(previous.model).provider,
449
+ modelId: previous.modelId ?? resolveModelIdentity(previous.model).modelId,
450
+ }
451
+ : resolveModelIdentity(previous?.model);
452
+ const currentIdentity = resolveModelIdentity(current.model);
453
+ const previousBudget = previous?.tokenBudget;
454
+ const currentBudget = current.tokenBudget;
455
+ const budgetChanged = previousBudget != null && currentBudget != null && previousBudget !== currentBudget;
456
+ return {
457
+ previousIdentity,
458
+ currentIdentity,
459
+ modelChanged: previousIdentity.modelKey !== currentIdentity.modelKey,
460
+ providerChanged: previousIdentity.provider !== currentIdentity.provider,
461
+ modelIdChanged: previousIdentity.modelId !== currentIdentity.modelId,
462
+ budgetChanged,
463
+ budgetDownshift: previousBudget != null && currentBudget != null && currentBudget < previousBudget,
464
+ budgetUplift: previousBudget != null && currentBudget != null && currentBudget > previousBudget,
465
+ };
466
+ }
403
467
  function normalizeModelKey(model) {
404
468
  if (!model)
405
469
  return null;
@@ -422,6 +486,7 @@ function resolveConfiguredWindow(model) {
422
486
  // Subagent warming mode: 'full' | 'light' | 'off'. Default: 'light'.
423
487
  // Controls how much HyperMem context is injected into subagent sessions.
424
488
  let _subagentWarming = 'light';
489
+ const FORKED_CONTEXT_META_SLOT = 'forkedContextMeta';
425
490
  // Cache replay threshold: 15min default. Set to 0 in user config to disable.
426
491
  let _cacheReplayThresholdMs = 900_000;
427
492
  // ─── System overhead cache ────────────────────────────────────
@@ -537,6 +602,8 @@ async function loadUserConfig() {
537
602
  merged.eviction = { ...merged.eviction, ..._pluginConfig.eviction };
538
603
  if (_pluginConfig.embedding)
539
604
  merged.embedding = { ...merged.embedding, ..._pluginConfig.embedding };
605
+ if (_pluginConfig.reranker)
606
+ merged.reranker = { ...merged.reranker, ..._pluginConfig.reranker };
540
607
  if (Object.keys(fileConfig).length > 0 && Object.keys(_pluginConfig).filter(k => k !== 'hyperMemPath' && k !== 'dataDir').length > 0) {
541
608
  console.log('[hypermem-plugin] Note: migrating config.json keys to plugins.entries.hypercompositor.config in openclaw.json is recommended');
542
609
  }
@@ -621,15 +688,19 @@ async function getHyperMem() {
621
688
  `effective history budget: ${_contextWindowSize - reservedTokens} tokens`);
622
689
  verboseLog(`[hypermem-plugin] warmCacheReplayThresholdMs=${_cacheReplayThresholdMs}`);
623
690
  verboseLog(`[hypermem-plugin] contextWindowOverrides keys=${Object.keys(_contextWindowOverrides).join(', ') || '(none)'}`);
691
+ const cacheConfig = userConfig.cache;
624
692
  const instance = await HyperMem.create({
625
693
  dataDir: _pluginConfig.dataDir ?? path.join(os.homedir(), '.openclaw/hypermem'),
626
694
  cache: {
627
- keyPrefix: 'hm:',
628
- sessionTTL: 14400, // 4h for system/identity/meta slots
629
- historyTTL: 86400, // 24h for history ages out, not count-trimmed
695
+ keyPrefix: cacheConfig?.keyPrefix ?? 'hm:',
696
+ sessionTTL: cacheConfig?.sessionTTL ?? 14400, // 4h default for system/identity/meta slots
697
+ historyTTL: cacheConfig?.historyTTL ?? 86400, // 24h default for history/cursor hot cache
630
698
  },
631
699
  ...(userConfig.compositor ? { compositor: userConfig.compositor } : {}),
632
700
  ...(_embeddingConfig ? { embedding: _embeddingConfig } : {}),
701
+ ...(userConfig.reranker
702
+ ? { reranker: userConfig.reranker }
703
+ : {}),
633
704
  });
634
705
  _hm = instance;
635
706
  // Wire up fleet store and background indexer from dynamic module
@@ -748,6 +819,33 @@ function resolveAssistantTokenCount(msg, runtimeContext) {
748
819
  }
749
820
  return undefined;
750
821
  }
822
+ function resolveAssistantOutputTokenCount(msg, runtimeContext) {
823
+ const usage = msg.usage;
824
+ if (usage && typeof usage === 'object') {
825
+ const candidates = [
826
+ usage.output,
827
+ usage.outputTokens,
828
+ usage.output_tokens,
829
+ usage.completionTokens,
830
+ usage.completion_tokens,
831
+ usage.totalTokens,
832
+ usage.total_tokens,
833
+ usage.total,
834
+ ];
835
+ for (const candidate of candidates) {
836
+ if (typeof candidate === 'number' && Number.isFinite(candidate) && candidate > 0) {
837
+ return Math.floor(candidate);
838
+ }
839
+ }
840
+ }
841
+ const runtimeTokenCount = runtimeContext?.currentTokenCount;
842
+ if (typeof runtimeTokenCount === 'number' && Number.isFinite(runtimeTokenCount) && runtimeTokenCount > 0) {
843
+ return Math.floor(runtimeTokenCount);
844
+ }
845
+ const text = extractTextFromInboundContent(msg.content);
846
+ const tokenEstimate = Math.ceil(text.length / 4);
847
+ return tokenEstimate > 0 ? tokenEstimate : undefined;
848
+ }
751
849
  function collectNeutralToolPairStats(messages) {
752
850
  const callIds = new Set();
753
851
  const resultIds = new Set();
@@ -1316,10 +1414,10 @@ function createHyperMemEngine() {
1316
1414
  // Non-fatal: missing files are silently skipped.
1317
1415
  let identityBlock;
1318
1416
  try {
1319
- // Council agents live at workspace-council/<agentId>/
1417
+ // Council agents live at workspace/<agentId>/
1320
1418
  // Other agents at workspace/<agentId>/ — try council path first
1321
1419
  const homedir = os.homedir();
1322
- const councilPath = path.join(homedir, '.openclaw', 'workspace-council', agentId);
1420
+ const councilPath = path.join(homedir, '.openclaw', 'workspace', agentId);
1323
1421
  const workspacePath = path.join(homedir, '.openclaw', 'workspace', agentId);
1324
1422
  let wsPath = councilPath;
1325
1423
  try {
@@ -1351,7 +1449,7 @@ function createHyperMemEngine() {
1351
1449
  let _wsPathForSeed;
1352
1450
  try {
1353
1451
  const homedir2 = os.homedir();
1354
- const councilPath2 = path.join(homedir2, '.openclaw', 'workspace-council', agentId);
1452
+ const councilPath2 = path.join(homedir2, '.openclaw', 'workspace', agentId);
1355
1453
  const workspacePath2 = path.join(homedir2, '.openclaw', 'workspace', agentId);
1356
1454
  try {
1357
1455
  await fs.access(councilPath2);
@@ -1386,7 +1484,7 @@ function createHyperMemEngine() {
1386
1484
  // Post-warm pressure check: if messages.db had accumulated history,
1387
1485
  // warm() may have loaded the session straight to 80%+. Pre-trim now
1388
1486
  // so the first turn has headroom instead of starting saturated.
1389
- // This is the "restart at 98%" failure mode reported by Helm 2026-04-05:
1487
+ // This is the "restart at 98%" failure mode reported by Eve 2026-04-05:
1390
1488
  // JSONL truncation + Redis flush isn't enough if messages.db is still full
1391
1489
  // and warm() reloads it. Trim here closes the loop.
1392
1490
  try {
@@ -1733,7 +1831,9 @@ function createHyperMemEngine() {
1733
1831
  });
1734
1832
  const replayMarkerText = replayRecovery.emittedText;
1735
1833
  const preTrimTokens = runtimeTokens;
1736
- const pressure = preTrimTokens / effectiveBudget;
1834
+ // Sprint 3: unified pressure signal tool-loop assemble path
1835
+ const s3ToolLoopPressure = computeUnifiedPressure(preTrimTokens, effectiveBudget, PRESSURE_SOURCE.TOOLLOOP_RUNTIME_ARRAY);
1836
+ const pressure = s3ToolLoopPressure.fraction;
1737
1837
  // Pressure-tiered trim targets use a single authority: the working
1738
1838
  // message array. Redis drift is logged as an anomaly, never used as
1739
1839
  // a trim trigger. Replay recovery gets its own explicit bounded mode
@@ -1885,17 +1985,17 @@ function createHyperMemEngine() {
1885
1985
  const kept = keptClusters.flat();
1886
1986
  const keptCount = processedConvMsgs.length - kept.length;
1887
1987
  if (keptCount > 0) {
1888
- console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1988
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
1889
1989
  `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs, messages=${keptCount} dropped)`);
1890
1990
  trimmedMessages = [...systemMsgs, ...kept];
1891
1991
  }
1892
1992
  else if (trimmed > 0) {
1893
- console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1993
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
1894
1994
  `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1895
1995
  }
1896
1996
  }
1897
1997
  else if (trimmed > 0) {
1898
- console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1998
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
1899
1999
  `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1900
2000
  }
1901
2001
  // Apply tool gradient to compress large tool results before returning.
@@ -2088,23 +2188,32 @@ function createHyperMemEngine() {
2088
2188
  console.warn('[hypermem-plugin] assemble: Redis trim failed (non-fatal):', trimErr.message);
2089
2189
  }
2090
2190
  // ── Budget downshift: proactive reshape pass ───────────────────────────────────────
2091
- // If this session previously composed at a higher token budget (e.g. gpt-5.4
2092
- // claude-sonnet model switch), the Redis window is still sized for the old
2093
- // budget. trimHistoryToTokenBudget above trims by count but skips tool
2094
- // gradient logic. A downshift >10% triggers a full reshape: apply tool
2095
- // gradient at the new budget + trim, then write back before compose runs.
2096
- // This prevents several turns of compaction churn after a model switch.
2097
- //
2098
- // Bug fix: previously read from getWindow() which is always null here
2099
- // (afterTurn invalidates it every turn). Also fixed: was doing setWindow()
2100
- // then invalidateWindow() which is a write-then-delete no-op. Now reads
2101
- // from history list and writes back via replaceHistory().
2191
+ // Detect provider/model identity changes as well as raw budget changes.
2192
+ // Provider routing matters operationally because the same model family can
2193
+ // land on a different effective context window, for example Copilot Sonnet
2194
+ // vs direct Anthropic Sonnet. Only budget downshifts trigger the demoted
2195
+ // reshape guard, but verbose logs now show provider/model swaps even when
2196
+ // the effective budget stays flat or increases.
2102
2197
  let lastState = null;
2103
2198
  try {
2104
2199
  lastState = await hm.cache.getModelState(agentId, sk);
2105
2200
  const DOWNSHIFT_THRESHOLD = 0.10;
2106
- const isDownshift = lastState &&
2107
- (lastState.tokenBudget - effectiveBudget) / lastState.tokenBudget > DOWNSHIFT_THRESHOLD;
2201
+ const modelDelta = diffModelState(lastState, {
2202
+ model,
2203
+ tokenBudget: effectiveBudget,
2204
+ });
2205
+ const downshiftFraction = lastState?.tokenBudget
2206
+ ? (lastState.tokenBudget - effectiveBudget) / lastState.tokenBudget
2207
+ : 0;
2208
+ const isDownshift = modelDelta.budgetDownshift && downshiftFraction > DOWNSHIFT_THRESHOLD;
2209
+ if (lastState && (modelDelta.modelChanged || modelDelta.budgetChanged)) {
2210
+ verboseLog(`[hypermem-plugin] model state change: ` +
2211
+ `prev=${modelDelta.previousIdentity.modelKey ?? 'unknown'} ` +
2212
+ `next=${modelDelta.currentIdentity.modelKey ?? 'unknown'} ` +
2213
+ `providerChanged=${modelDelta.providerChanged} ` +
2214
+ `modelIdChanged=${modelDelta.modelIdChanged} ` +
2215
+ `budget=${lastState.tokenBudget}->${effectiveBudget}`);
2216
+ }
2108
2217
  if (isDownshift && !_deferToolPruning) {
2109
2218
  // Sprint 2.2a: demote reshape to guard telemetry.
2110
2219
  //
@@ -2154,6 +2263,7 @@ function createHyperMemEngine() {
2154
2263
  path: 'replay',
2155
2264
  toolLoop: isToolLoop,
2156
2265
  msgCount: messages.length,
2266
+ composeTopicTelemetryStatus: 'intentionally-omitted',
2157
2267
  });
2158
2268
  }
2159
2269
  }
@@ -2166,6 +2276,20 @@ function createHyperMemEngine() {
2166
2276
  // Subagent light mode: skip library/wiki/semantic/keystones/doc chunks.
2167
2277
  // Keeps: system, identity, history, active facts, output profile, tool gradient.
2168
2278
  const subagentLight = isSubagent && _subagentWarming === 'light';
2279
+ let forkedContext;
2280
+ if (isSubagent) {
2281
+ try {
2282
+ const rawForkedContext = await hm.cache.getSlot(agentId, sk, FORKED_CONTEXT_META_SLOT);
2283
+ if (rawForkedContext) {
2284
+ const parsed = JSON.parse(rawForkedContext);
2285
+ if (parsed?.enabled === true)
2286
+ forkedContext = parsed;
2287
+ }
2288
+ }
2289
+ catch {
2290
+ // Fork metadata is advisory; fall back to normal subagent lifecycle.
2291
+ }
2292
+ }
2169
2293
  const request = {
2170
2294
  agentId,
2171
2295
  sessionKey: sk,
@@ -2180,6 +2304,7 @@ function createHyperMemEngine() {
2180
2304
  includeSemanticRecall: subagentLight ? false : undefined, // skip vector/FTS recall
2181
2305
  includeKeystones: subagentLight ? false : undefined, // skip keystone history injection
2182
2306
  prompt,
2307
+ forkedContext,
2183
2308
  skipProviderTranslation: true, // runtime handles provider translation
2184
2309
  };
2185
2310
  const result = await hm.compose(request);
@@ -2195,6 +2320,63 @@ function createHyperMemEngine() {
2195
2320
  replayState: replayRecovery.emittedMarker?.state,
2196
2321
  replayReason: replayRecovery.emittedMarker?.reason,
2197
2322
  });
2323
+ // Sprint 1: emit assemble-level trace with full observability fields
2324
+ // after a full compose (not replay). Surfaces prefix stability,
2325
+ // reranker outcome, slot spans, and compaction eligibility.
2326
+ if (telemetryEnabled() && !cachedContextBlock) {
2327
+ const diag = result.diagnostics;
2328
+ // prefixChanged: compare current prefixHash against prevPrefixHash
2329
+ // (surfaced by the compositor when a cache bypass detected prefix mutation).
2330
+ // When no previous hash is available (first turn), leave prefixChanged undefined.
2331
+ let prefixChanged;
2332
+ if (diag?.prefixHash && diag?.prevPrefixHash) {
2333
+ prefixChanged = diag.prefixHash !== diag.prevPrefixHash;
2334
+ }
2335
+ assembleTrace({
2336
+ agentId,
2337
+ sessionKey: sk,
2338
+ turnId: _asmTurnId,
2339
+ path: isSubagent ? 'subagent' : 'cold',
2340
+ toolLoop: isToolLoop,
2341
+ msgCount: result.messages.length,
2342
+ prefixChanged,
2343
+ prefixHash: diag?.prefixHash,
2344
+ rerankerStatus: diag?.rerankerStatus,
2345
+ rerankerCandidates: diag?.rerankerCandidates,
2346
+ rerankerProvider: diag?.rerankerProvider,
2347
+ slotSpans: diag?.slotSpans,
2348
+ compactionEligibleCount: diag?.compactionEligibleCount,
2349
+ compactionEligibleRatio: diag?.compactionEligibleRatio,
2350
+ compactionProcessedCount: diag?.compactionProcessedCount,
2351
+ composeTopicSource: diag?.composeTopicSource,
2352
+ composeTopicState: diag?.composeTopicState,
2353
+ composeTopicMessageCount: diag?.composeTopicMessageCount,
2354
+ composeTopicStampedMessageCount: diag?.composeTopicStampedMessageCount,
2355
+ composeTopicTelemetryStatus: diag?.composeTopicTelemetryStatus,
2356
+ });
2357
+ if (diag?.adaptiveLifecycleBand) {
2358
+ lifecyclePolicyTelemetry({
2359
+ path: 'compose.preRecall',
2360
+ agentId,
2361
+ sessionKey: sk,
2362
+ band: diag.adaptiveLifecycleBand,
2363
+ pressurePct: diag.adaptiveLifecyclePressurePct,
2364
+ trimSoftTarget: diag.adaptiveTrimSoftTarget,
2365
+ reasons: diag.adaptiveLifecycleReasons,
2366
+ });
2367
+ }
2368
+ if (diag?.adaptiveEvictionLifecycleBand) {
2369
+ lifecyclePolicyTelemetry({
2370
+ path: 'compose.eviction',
2371
+ agentId,
2372
+ sessionKey: sk,
2373
+ band: diag.adaptiveEvictionLifecycleBand,
2374
+ pressurePct: diag.adaptiveEvictionPressurePct,
2375
+ trimSoftTarget: diag.adaptiveTrimSoftTarget,
2376
+ reasons: diag.adaptiveLifecycleBandDiverged ? ['diverged-from-preRecall'] : undefined,
2377
+ });
2378
+ }
2379
+ }
2198
2380
  // Use cached contextBlock if available (cache replay), otherwise use fresh result.
2199
2381
  // After a full compose, write the new contextBlock to cache for the next turn.
2200
2382
  if (cachedContextBlock) {
@@ -2267,10 +2449,17 @@ ${replayRecovery.emittedText}`
2267
2449
  const runtimeSystemTokens = getOverheadFallback(tier);
2268
2450
  _overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
2269
2451
  await persistReplayRecoveryState(hm, agentId, sk, replayRecovery.nextState);
2452
+ if (forkedContext) {
2453
+ await hm.cache.setSlot(agentId, sk, FORKED_CONTEXT_META_SLOT, '').catch(() => { });
2454
+ }
2270
2455
  // Update model state for downshift detection on next turn
2271
2456
  try {
2457
+ const modelIdentity = resolveModelIdentity(model);
2272
2458
  await hm.cache.setModelState(agentId, sk, {
2273
2459
  model: model ?? 'unknown',
2460
+ modelKey: modelIdentity.modelKey ?? undefined,
2461
+ provider: modelIdentity.provider ?? undefined,
2462
+ modelId: modelIdentity.modelId ?? undefined,
2274
2463
  tokenBudget: effectiveBudget,
2275
2464
  composedAt: new Date().toISOString(),
2276
2465
  historyDepth,
@@ -2351,6 +2540,9 @@ ${replayRecovery.emittedText}`
2351
2540
  // budget the history is competing for. We trim history to make room.
2352
2541
  const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
2353
2542
  const tokensBefore = await estimateWindowTokens(hm, agentId, sk);
2543
+ // Sprint 3: Unified pressure signal — compact path (Redis estimate)
2544
+ const s3CompactPressure = computeUnifiedPressure(tokensBefore, effectiveBudget, PRESSURE_SOURCE.COMPACT_REDIS_ESTIMATE);
2545
+ console.log(`[hypermem-plugin] compact: pressure=${s3CompactPressure.pct}% source=${s3CompactPressure.source} tokens=${tokensBefore}/${effectiveBudget}`);
2354
2546
  // Target depth for both Redis trimming and JSONL truncation.
2355
2547
  // Target 50% of budget capacity, assume ~500 tokens/message average.
2356
2548
  const targetDepth = Math.max(20, Math.floor((effectiveBudget * 0.5) / 500));
@@ -2364,6 +2556,10 @@ ${replayRecovery.emittedText}`
2364
2556
  // Also triggered when reshape ran recently but the session is still
2365
2557
  // critically full — bypass the reshape guard in that case.
2366
2558
  const NUCLEAR_THRESHOLD = 0.85;
2559
+ // Sprint 3: runtime-total pressure for nuclear check uses its own source label
2560
+ const s3NuclearPressure = currentTokenCount != null
2561
+ ? computeUnifiedPressure(currentTokenCount, effectiveBudget, PRESSURE_SOURCE.COMPACT_RUNTIME_TOTAL)
2562
+ : s3CompactPressure;
2367
2563
  const isNuclear = currentTokenCount != null && currentTokenCount > effectiveBudget * NUCLEAR_THRESHOLD;
2368
2564
  if (isNuclear) {
2369
2565
  // Cut deep: target 20% of normal depth = ~25 messages for a 128k session.
@@ -2382,11 +2578,11 @@ ${replayRecovery.emittedText}`
2382
2578
  postTokens: tokensAfter,
2383
2579
  removed: nuclearRemoved,
2384
2580
  cacheInvalidated: true,
2385
- reason: `currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
2581
+ reason: `${s3NuclearPressure.source}:${s3NuclearPressure.pct}% currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
2386
2582
  });
2387
2583
  }
2388
- console.log(`[hypermem-plugin] compact: NUCLEAR — session at ${currentTokenCount}/${effectiveBudget} tokens ` +
2389
- `(${Math.round((currentTokenCount / effectiveBudget) * 100)}% full), ` +
2584
+ console.log(`[hypermem-plugin] compact: NUCLEAR — pressure=${s3NuclearPressure.pct}% source=${s3NuclearPressure.source} ` +
2585
+ `session at ${currentTokenCount}/${effectiveBudget} tokens, ` +
2390
2586
  `deep-trimmed JSONL to ${nuclearDepth} messages, Redis ${tokensBefore}→${tokensAfter} tokens`);
2391
2587
  return { ok: true, compacted: true, result: { tokensBefore, tokensAfter } };
2392
2588
  }
@@ -2473,10 +2669,10 @@ ${replayRecovery.emittedText}`
2473
2669
  postTokens: tokensAfter,
2474
2670
  removed: historyTrimmed,
2475
2671
  cacheInvalidated: true,
2476
- reason: `over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
2672
+ reason: `${s3CompactPressure.source}:${s3CompactPressure.pct}% over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
2477
2673
  });
2478
2674
  }
2479
- console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget})`);
2675
+ console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget}, pressure=${s3CompactPressure.pct}% source=${s3CompactPressure.source})`);
2480
2676
  // Density-aware JSONL truncation: derive target depth from actual avg tokens/message
2481
2677
  // rather than assuming a fixed 500 tokens/message. This prevents a large-message
2482
2678
  // session (e.g. 145 msgs × 882 tok = 128k) from bypassing the 1.5x guard and
@@ -2553,8 +2749,54 @@ ${replayRecovery.emittedText}`
2553
2749
  });
2554
2750
  }
2555
2751
  }
2752
+ try {
2753
+ const lastAssistantMessage = [...newMessages].reverse().find(m => m.role === 'assistant');
2754
+ if (lastAssistantMessage) {
2755
+ const modelState = await hm.cache.getModelState(agentId, sk).catch(() => null);
2756
+ const promptCacheUsage = runtimeContext?.promptCache?.lastCallUsage;
2757
+ const outputTokens = resolveAssistantOutputTokenCount(lastAssistantMessage, runtimeContext) ?? 1;
2758
+ const inputTokens = typeof promptCacheUsage?.input === 'number'
2759
+ ? Math.floor(promptCacheUsage.input)
2760
+ : typeof runtimeContext?.currentTokenCount === 'number'
2761
+ ? Math.floor(runtimeContext.currentTokenCount)
2762
+ : null;
2763
+ const cacheReadTokens = typeof promptCacheUsage?.cacheRead === 'number'
2764
+ ? Math.floor(promptCacheUsage.cacheRead)
2765
+ : null;
2766
+ const modelId = typeof lastAssistantMessage.model === 'string'
2767
+ ? lastAssistantMessage.model
2768
+ : modelState?.modelId ?? modelState?.model ?? 'unknown';
2769
+ const provider = typeof lastAssistantMessage.provider === 'string'
2770
+ ? lastAssistantMessage.provider
2771
+ : modelState?.provider ?? 'unknown';
2772
+ const taskType = typeof runtimeContext?.taskType === 'string'
2773
+ ? runtimeContext.taskType ?? null
2774
+ : null;
2775
+ recordOutputMetrics(hm.dbManager.getLibraryDb(), {
2776
+ id: `turn-metric-${agentId}-${Date.now()}-${randomUUID()}`,
2777
+ timestamp: new Date().toISOString(),
2778
+ agent_id: agentId,
2779
+ session_key: sk,
2780
+ model_id: modelId,
2781
+ provider,
2782
+ fos_version: null,
2783
+ mod_version: null,
2784
+ mod_id: null,
2785
+ task_type: taskType,
2786
+ output_tokens: outputTokens,
2787
+ input_tokens: inputTokens,
2788
+ cache_read_tokens: cacheReadTokens,
2789
+ corrections_fired: [],
2790
+ latency_ms: null,
2791
+ });
2792
+ }
2793
+ }
2794
+ catch {
2795
+ // Non-fatal telemetry path
2796
+ }
2556
2797
  // P3.1: Topic detection on the inbound user message
2557
2798
  // Non-fatal: topic detection never blocks afterTurn
2799
+ let adaptiveTopicShiftConfidence;
2558
2800
  try {
2559
2801
  const inboundUserMsg = newMessages
2560
2802
  .map(m => m)
@@ -2571,6 +2813,7 @@ ${replayRecovery.emittedText}`
2571
2813
  const topicMap = new SessionTopicMap(db);
2572
2814
  const activeTopic = topicMap.getActiveTopic(sk);
2573
2815
  const signal = detectTopicShift(neutralUser, contextMessages, activeTopic?.id ?? null);
2816
+ adaptiveTopicShiftConfidence = signal.confidence;
2574
2817
  if (signal.isNewTopic && signal.topicName) {
2575
2818
  const newTopicId = topicMap.createTopic(sk, signal.topicName);
2576
2819
  // New topic starts with count 1 (the message that triggered the shift)
@@ -2610,13 +2853,36 @@ ${replayRecovery.emittedText}`
2610
2853
  // gradient-compressed window to budget before writing to Redis. Without
2611
2854
  // this, afterTurn writes up to 250 messages regardless of budget, causing
2612
2855
  // trimHistoryToTokenBudget to fire and trim ~200 messages on every
2613
- // subsequent assemble() — the churn loop seen in Helm's logs.
2856
+ // subsequent assemble() — the churn loop seen in Eve's logs.
2614
2857
  if (hm.cache.isConnected) {
2615
2858
  try {
2616
2859
  const modelState = await hm.cache.getModelState(agentId, sk);
2617
2860
  const gradientBudget = modelState?.tokenBudget;
2618
2861
  const gradientDepth = modelState?.historyDepth;
2619
- await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth);
2862
+ const inboundUserMsg = newMessages
2863
+ .map(m => m)
2864
+ .find(m => m.role === 'user');
2865
+ const inboundUserText = inboundUserMsg
2866
+ ? stripMessageMetadata(extractTextFromInboundContent(inboundUserMsg.content))
2867
+ : '';
2868
+ const lifecyclePolicy = resolveAdaptiveLifecyclePolicy({
2869
+ usedTokens: estimateMessageArrayTokens(messages),
2870
+ effectiveBudget: gradientBudget,
2871
+ userTurnCount: messages.filter(m => m.role === 'user').length,
2872
+ explicitNewSession: /^\/new(?:\s|$)/i.test(inboundUserText.trim()),
2873
+ topicShiftConfidence: adaptiveTopicShiftConfidence,
2874
+ });
2875
+ lifecyclePolicyTelemetry({
2876
+ path: 'afterTurn.gradient',
2877
+ agentId,
2878
+ sessionKey: sk,
2879
+ band: lifecyclePolicy.band,
2880
+ pressurePct: lifecyclePolicy.pressurePct,
2881
+ topicShiftConfidence: adaptiveTopicShiftConfidence,
2882
+ trimSoftTarget: lifecyclePolicy.trimSoftTarget,
2883
+ reasons: lifecyclePolicy.reasons,
2884
+ });
2885
+ await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth, lifecyclePolicy.trimSoftTarget);
2620
2886
  }
2621
2887
  catch (refreshErr) {
2622
2888
  console.warn('[hypermem-plugin] afterTurn: refreshRedisGradient failed (non-fatal):', refreshErr.message);
@@ -2634,7 +2900,7 @@ ${replayRecovery.emittedText}`
2634
2900
  // If a session just finished a turn at >80% pressure, the NEXT turn's
2635
2901
  // incoming tool results (parallel web searches, large exec output, etc.)
2636
2902
  // will hit a window with no headroom — the ingestion wave failure mode
2637
- // (reported by Helm, 2026-04-05). Pre-trim here so the tool-loop
2903
+ // (reported by Eve, 2026-04-05). Pre-trim here so the tool-loop
2638
2904
  // assemble() path starts the next turn with meaningful space.
2639
2905
  //
2640
2906
  // Uses modelState.tokenBudget if cached; skips if unavailable (non-fatal).
@@ -2786,7 +3052,12 @@ ${replayRecovery.emittedText}`
2786
3052
  * subagentWarming config ('full' | 'light' | 'off').
2787
3053
  * Returns a rollback handle to clean up if spawn fails.
2788
3054
  */
2789
- async prepareSubagentSpawn({ parentSessionKey, childSessionKey }) {
3055
+ async prepareSubagentSpawn(params) {
3056
+ const { parentSessionKey, childSessionKey } = params;
3057
+ const forkParams = params;
3058
+ const contextMode = forkParams.contextMode;
3059
+ const parentSessionId = forkParams.parentSessionId;
3060
+ const childSessionId = forkParams.childSessionId;
2790
3061
  if (_subagentWarming === 'off') {
2791
3062
  return undefined;
2792
3063
  }
@@ -2794,7 +3065,12 @@ ${replayRecovery.emittedText}`
2794
3065
  const hm = await getHyperMem();
2795
3066
  const parentAgentId = extractAgentId(parentSessionKey);
2796
3067
  const childAgentId = extractAgentId(childSessionKey);
2797
- // Seed child with parent's active facts
3068
+ const isForkedContext = contextMode === 'fork';
3069
+ let parentHistoryMessages = 0;
3070
+ let parentUserTurnCount = 0;
3071
+ let parentPressureFraction;
3072
+ // Seed child with parent's active facts. This preserves the historical
3073
+ // slot for compatibility; facts still primarily come from L4 by agent id.
2798
3074
  const facts = hm.getActiveFacts(parentAgentId, { limit: 50 });
2799
3075
  if (facts && facts.length > 0) {
2800
3076
  const factBlock = facts
@@ -2802,22 +3078,48 @@ ${replayRecovery.emittedText}`
2802
3078
  .join('\n');
2803
3079
  await hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', factBlock);
2804
3080
  }
2805
- // For 'full' warming, also seed recent history context
2806
- if (_subagentWarming === 'full') {
2807
- const history = await hm.cache.getHistory(parentAgentId, parentSessionKey);
2808
- if (history && history.length > 0) {
2809
- const recentHistory = history.slice(-10);
2810
- await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', JSON.stringify(recentHistory));
3081
+ const history = await hm.cache.getHistory(parentAgentId, parentSessionKey);
3082
+ if (history && history.length > 0) {
3083
+ const maxSeededHistory = _subagentWarming === 'full' ? 25 : 12;
3084
+ const recentHistory = history.slice(-maxSeededHistory);
3085
+ parentHistoryMessages = recentHistory.length;
3086
+ parentUserTurnCount = recentHistory.filter(m => m.role === 'user').length;
3087
+ const parentTokens = estimateMessageArrayTokens(recentHistory);
3088
+ const parentModelState = await hm.cache.getModelState(parentAgentId, parentSessionKey).catch(() => null);
3089
+ const parentBudget = parentModelState?.tokenBudget && parentModelState.tokenBudget > 0
3090
+ ? parentModelState.tokenBudget
3091
+ : undefined;
3092
+ parentPressureFraction = parentBudget ? parentTokens / parentBudget : undefined;
3093
+ if (isForkedContext || _subagentWarming === 'full') {
3094
+ await hm.cache.replaceHistory(childAgentId, childSessionKey, recentHistory, maxSeededHistory);
3095
+ await hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { });
2811
3096
  }
3097
+ await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', JSON.stringify(recentHistory));
3098
+ }
3099
+ if (isForkedContext) {
3100
+ const forkedMeta = {
3101
+ enabled: true,
3102
+ parentSessionKey,
3103
+ parentSessionId,
3104
+ childSessionId,
3105
+ parentPressureFraction,
3106
+ parentUserTurnCount,
3107
+ parentHistoryMessages,
3108
+ };
3109
+ await hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, JSON.stringify(forkedMeta));
2812
3110
  }
2813
3111
  console.log(`[hypermem-plugin] prepareSubagentSpawn: seeded ${childSessionKey} ` +
2814
- `from ${parentSessionKey} (warming=${_subagentWarming})`);
3112
+ `from ${parentSessionKey} (warming=${_subagentWarming}, contextMode=${contextMode ?? 'isolated'}, ` +
3113
+ `history=${parentHistoryMessages})`);
2815
3114
  return {
2816
3115
  async rollback() {
2817
3116
  try {
2818
3117
  const hm = await getHyperMem();
2819
3118
  await hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', '');
2820
3119
  await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', '');
3120
+ await hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, '');
3121
+ await hm.cache.replaceHistory(childAgentId, childSessionKey, [], 0);
3122
+ await hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { });
2821
3123
  }
2822
3124
  catch {
2823
3125
  // Rollback is best-effort
@@ -2843,6 +3145,7 @@ ${replayRecovery.emittedText}`
2843
3145
  await Promise.all([
2844
3146
  hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', ''),
2845
3147
  hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', ''),
3148
+ hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, ''),
2846
3149
  hm.cache.setSlot(childAgentId, childSessionKey, 'assemblyContextBlock', ''),
2847
3150
  hm.cache.setSlot(childAgentId, childSessionKey, 'assemblyContextAt', '0'),
2848
3151
  hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { }),
@@ -3054,6 +3357,23 @@ const hypercompositorConfigSchema = z.object({
3054
3357
  timeout: z.number().int().positive().optional(),
3055
3358
  batchSize: z.number().int().positive().optional(),
3056
3359
  }).optional(),
3360
+ /**
3361
+ * Optional reranker config. When omitted or provider is 'none', the
3362
+ * compositor runs with RRF-only ordering. See INSTALL.md → Reranker.
3363
+ */
3364
+ reranker: z.object({
3365
+ provider: z.enum(['zeroentropy', 'openrouter', 'local', 'none']),
3366
+ minCandidates: z.number().int().nonnegative().optional(),
3367
+ maxDocuments: z.number().int().positive().optional(),
3368
+ topK: z.number().int().positive().optional(),
3369
+ timeoutMs: z.number().int().positive().optional(),
3370
+ zeroEntropyApiKey: z.string().optional(),
3371
+ zeroEntropyModel: z.string().optional(),
3372
+ openrouterApiKey: z.string().optional(),
3373
+ openrouterModel: z.string().optional(),
3374
+ ollamaUrl: z.string().optional(),
3375
+ ollamaModel: z.string().optional(),
3376
+ }).optional(),
3057
3377
  });
3058
3378
  // ─── Plugin Entry ───────────────────────────────────────────────
3059
3379
  const engine = createHyperMemEngine();