@psiclawops/hypermem 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/INSTALL.md +132 -9
  3. package/README.md +119 -272
  4. package/bench/README.md +42 -0
  5. package/bench/data-access-bench.mjs +380 -0
  6. package/bin/hypermem-bench.mjs +2 -0
  7. package/bin/hypermem-doctor.mjs +412 -0
  8. package/bin/hypermem-model-audit.mjs +339 -0
  9. package/bin/hypermem-status.mjs +491 -70
  10. package/dist/adaptive-lifecycle.d.ts +81 -0
  11. package/dist/adaptive-lifecycle.d.ts.map +1 -0
  12. package/dist/adaptive-lifecycle.js +190 -0
  13. package/dist/budget-policy.d.ts +1 -1
  14. package/dist/budget-policy.d.ts.map +1 -1
  15. package/dist/budget-policy.js +10 -5
  16. package/dist/cache.d.ts +1 -0
  17. package/dist/cache.d.ts.map +1 -1
  18. package/dist/cache.js +2 -0
  19. package/dist/composition-snapshot-integrity.d.ts +36 -0
  20. package/dist/composition-snapshot-integrity.d.ts.map +1 -0
  21. package/dist/composition-snapshot-integrity.js +131 -0
  22. package/dist/composition-snapshot-runtime.d.ts +59 -0
  23. package/dist/composition-snapshot-runtime.d.ts.map +1 -0
  24. package/dist/composition-snapshot-runtime.js +250 -0
  25. package/dist/composition-snapshot-store.d.ts +44 -0
  26. package/dist/composition-snapshot-store.d.ts.map +1 -0
  27. package/dist/composition-snapshot-store.js +117 -0
  28. package/dist/compositor.d.ts +125 -1
  29. package/dist/compositor.d.ts.map +1 -1
  30. package/dist/compositor.js +692 -44
  31. package/dist/doc-chunk-store.d.ts +19 -0
  32. package/dist/doc-chunk-store.d.ts.map +1 -1
  33. package/dist/doc-chunk-store.js +56 -6
  34. package/dist/hybrid-retrieval.d.ts +38 -0
  35. package/dist/hybrid-retrieval.d.ts.map +1 -1
  36. package/dist/hybrid-retrieval.js +86 -1
  37. package/dist/index.d.ts +12 -3
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +28 -2
  40. package/dist/knowledge-store.d.ts +4 -1
  41. package/dist/knowledge-store.d.ts.map +1 -1
  42. package/dist/knowledge-store.js +27 -4
  43. package/dist/library-schema.d.ts +12 -8
  44. package/dist/library-schema.d.ts.map +1 -1
  45. package/dist/library-schema.js +22 -8
  46. package/dist/message-store.d.ts.map +1 -1
  47. package/dist/message-store.js +7 -3
  48. package/dist/metrics-dashboard.d.ts +18 -1
  49. package/dist/metrics-dashboard.d.ts.map +1 -1
  50. package/dist/metrics-dashboard.js +52 -14
  51. package/dist/reranker.d.ts +1 -1
  52. package/dist/reranker.js +2 -2
  53. package/dist/schema.d.ts +1 -1
  54. package/dist/schema.d.ts.map +1 -1
  55. package/dist/schema.js +28 -1
  56. package/dist/seed.d.ts.map +1 -1
  57. package/dist/seed.js +2 -0
  58. package/dist/topic-synthesizer.d.ts +20 -0
  59. package/dist/topic-synthesizer.d.ts.map +1 -1
  60. package/dist/topic-synthesizer.js +113 -3
  61. package/dist/trigger-registry.d.ts.map +1 -1
  62. package/dist/trigger-registry.js +10 -2
  63. package/dist/types.d.ts +271 -1
  64. package/dist/types.d.ts.map +1 -1
  65. package/dist/version.d.ts +7 -7
  66. package/dist/version.d.ts.map +1 -1
  67. package/dist/version.js +17 -7
  68. package/docs/DIAGNOSTICS.md +205 -0
  69. package/docs/INTEGRATION_VALIDATION.md +186 -0
  70. package/docs/MIGRATION.md +9 -6
  71. package/docs/MIGRATION_GUIDE.md +125 -101
  72. package/docs/ROADMAP.md +238 -20
  73. package/docs/TUNING.md +19 -5
  74. package/install.sh +152 -401
  75. package/memory-plugin/LICENSE +190 -0
  76. package/memory-plugin/README.md +20 -0
  77. package/memory-plugin/dist/index.js +50 -0
  78. package/memory-plugin/package.json +2 -2
  79. package/package.json +18 -4
  80. package/plugin/LICENSE +190 -0
  81. package/plugin/README.md +20 -0
  82. package/plugin/dist/index.d.ts +29 -0
  83. package/plugin/dist/index.d.ts.map +1 -1
  84. package/plugin/dist/index.js +288 -23
  85. package/plugin/dist/index.js.map +1 -1
  86. package/plugin/package.json +2 -2
  87. package/scripts/install-runtime.mjs +12 -1
@@ -15,11 +15,11 @@ import { filterByScope } from './retrieval-policy.js';
15
15
  import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
16
16
  import { MessageStore } from './message-store.js';
17
17
  import { SessionTopicMap } from './session-topic-map.js';
18
- import { toProviderFormat } from './provider-translator.js';
18
+ import { toProviderFormat, detectProvider as s4DetectProvider } from './provider-translator.js';
19
19
  import { DocChunkStore } from './doc-chunk-store.js';
20
20
  import { hybridSearch } from './hybrid-retrieval.js';
21
- import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence } from './compaction-fence.js';
22
- import { getActiveContext } from './context-store.js';
21
+ import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence, getCompactionEligibility } from './compaction-fence.js';
22
+ import { getActiveContext, getOrCreateActiveContext } from './context-store.js';
23
23
  import { rankKeystones, scoreKeystone } from './keystone-scorer.js';
24
24
  import { buildOrgRegistryFromDb, defaultOrgRegistry } from './cross-agent.js';
25
25
  import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOutputTier, buildActionVerificationSummary } from './fos-mod.js';
@@ -27,8 +27,11 @@ import { KnowledgeStore } from './knowledge-store.js';
27
27
  import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
28
28
  import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
29
29
  import { TRIM_BUDGET_POLICY, resolveTrimBudgets } from './budget-policy.js';
30
+ import { resolveAdaptiveLifecyclePolicy } from './adaptive-lifecycle.js';
30
31
  import { formatToolChainStub, parseToolChainStub, formatArtifactRef, isArtifactRef } from './degradation.js';
31
32
  import { ToolArtifactStore } from './tool-artifact-store.js';
33
+ import { insertCompositionSnapshot, getLatestValidCompositionSnapshot, listCompositionSnapshots, MAX_WARM_RESTORE_REPAIR_DEPTH, } from './composition-snapshot-store.js';
34
+ import { buildCompositionSnapshotSlots, restoreWarmSnapshotState, WARM_RESTORE_MEASUREMENT_GATES, } from './composition-snapshot-runtime.js';
32
35
  /**
33
36
  * Files that OpenClaw's contextInjection injects into the system prompt.
34
37
  * HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
@@ -300,6 +303,71 @@ export function computeAdaptiveHistoryDepth(sessionType, observedDensity, histor
300
303
  const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / observedDensity);
301
304
  return Math.min(maxHistoryMessages, Math.max(20, depth));
302
305
  }
306
+ // ─── Sprint 3: Unified Pressure Signal ───────────────────────────────────────────────────────
307
+ /**
308
+ * Canonical pressure labels shared across compose and compaction paths.
309
+ * Use these constants when setting the `pressureSource` field so all consumers
310
+ * can filter logs with a stable string without guessing spellings.
311
+ */
312
+ export const PRESSURE_SOURCE = {
313
+ /** Compose path: pressure derived from (budget - remaining) after full slot assembly. */
314
+ COMPOSE_POST_ASSEMBLY: 'compose:post-assembly',
315
+ /** Compose path: pressure measured immediately before semantic recall runs. */
316
+ COMPOSE_PRE_RECALL: 'compose:pre-recall',
317
+ /** Compaction path: pressure from Redis token estimate / effectiveBudget. */
318
+ COMPACT_REDIS_ESTIMATE: 'compact:redis-estimate',
319
+ /** Compaction path: pressure from runtime-reported currentTokenCount / effectiveBudget. */
320
+ COMPACT_RUNTIME_TOTAL: 'compact:runtime-total',
321
+ /** Tool-loop assemble path: pressure from in-memory working message array / effectiveBudget. */
322
+ TOOLLOOP_RUNTIME_ARRAY: 'toolloop:runtime-array',
323
+ };
324
+ /**
325
+ * Compute a unified pressure fraction so compose and compaction paths report
326
+ * the same numeric concept without drift.
327
+ *
328
+ * Always clamps to [0, Infinity) — callers get the raw fraction so they can
329
+ * decide their own thresholds without us hardcoding them here.
330
+ *
331
+ * @param usedTokens Tokens consumed (numerator).
332
+ * @param budgetTokens Effective budget (denominator). Must be > 0.
333
+ * @param source Label from PRESSURE_SOURCE for telemetry (metadata only).
334
+ * @returns { fraction, pct, source } where fraction = usedTokens / budgetTokens,
335
+ * pct = Math.round(fraction * 100), source = canonical label.
336
+ */
337
+ export function computeUnifiedPressure(usedTokens, budgetTokens, source) {
338
+ const fraction = budgetTokens > 0 ? usedTokens / budgetTokens : 0;
339
+ const pct = Math.round(fraction * 100);
340
+ return { fraction, pct, source };
341
+ }
342
+ /**
343
+ * 0.9.0: adaptive lifecycle scales semantic-recall breadth in compose.
344
+ *
345
+ * Base fractions match the historical compositor constants so that a steady
346
+ * (multiplier=1.0) call reproduces prior behavior exactly. Candidate limit is
347
+ * clamped so even a critical-pressure pass keeps a usable retrieval window
348
+ * and a /new surge does not blow up hybrid search cost.
349
+ */
350
+ export const RECALL_BREADTH_BASE = Object.freeze({
351
+ mainBudgetFraction: 0.12,
352
+ fallbackBudgetFraction: 0.10,
353
+ candidateLimit: 10,
354
+ candidateLimitMin: 6,
355
+ candidateLimitMax: 16,
356
+ });
357
+ /**
358
+ * Apply the adaptive lifecycle smartRecallMultiplier to recall breadth.
359
+ * Pure helper — does not read state or mutate anything. Steady multiplier=1
360
+ * preserves the historical (0.12, 0.10, limit=10) recall envelope.
361
+ */
362
+ export function scaleRecallBreadth(remainingTokens, multiplier) {
363
+ const safeMultiplier = Number.isFinite(multiplier) && multiplier > 0 ? multiplier : 1;
364
+ const remaining = Math.max(0, Math.floor(remainingTokens || 0));
365
+ const mainBudgetTokens = Math.max(0, Math.floor(remaining * RECALL_BREADTH_BASE.mainBudgetFraction * safeMultiplier));
366
+ const fallbackBudgetTokens = Math.max(0, Math.floor(remaining * RECALL_BREADTH_BASE.fallbackBudgetFraction * safeMultiplier));
367
+ const limitRaw = Math.ceil(RECALL_BREADTH_BASE.candidateLimit * safeMultiplier);
368
+ const candidateLimit = Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.max(RECALL_BREADTH_BASE.candidateLimitMin, limitRaw));
369
+ return { mainBudgetTokens, fallbackBudgetTokens, candidateLimit, multiplier: safeMultiplier };
370
+ }
303
371
  const DEFAULT_CONFIG = {
304
372
  // Primary budget controls
305
373
  budgetFraction: 0.703,
@@ -397,6 +465,82 @@ function clusterNeutralMessages(messages) {
397
465
  }
398
466
  return clusters;
399
467
  }
468
+ export function orderClustersForAdaptiveEviction(clusters, policy, opts = {}) {
469
+ const plan = policy.evictionPlan;
470
+ const protectedIndices = new Set();
471
+ // Protect the most-recent user-role cluster (current-user-turn proxy when
472
+ // the prompt is appended via history rather than as a separate message).
473
+ for (let i = clusters.length - 1; i >= 0; i--) {
474
+ if (clusters[i].messages.some(m => m.role === 'user')) {
475
+ protectedIndices.add(i);
476
+ break;
477
+ }
478
+ }
479
+ // Protect dynamicBoundary clusters and pure-system clusters.
480
+ for (let i = 0; i < clusters.length; i++) {
481
+ const cluster = clusters[i];
482
+ const hasDynamicBoundary = cluster.messages.some(m => {
483
+ const meta = m.metadata;
484
+ return meta?.dynamicBoundary === true;
485
+ });
486
+ if (hasDynamicBoundary)
487
+ protectedIndices.add(i);
488
+ if (cluster.messages.length > 0 && cluster.messages.every(m => m.role === 'system')) {
489
+ protectedIndices.add(i);
490
+ }
491
+ }
492
+ const totalMessages = clusters.reduce((sum, cluster) => sum + cluster.messages.length, 0);
493
+ const stampedMessages = clusters.reduce((sum, cluster) => sum + cluster.messages.filter(m => typeof m.topicId === 'string').length, 0);
494
+ const topicIdCoveragePct = totalMessages > 0
495
+ ? Math.round((stampedMessages / totalMessages) * 10000) / 100
496
+ : 0;
497
+ const topicAwareDropOrder = [];
498
+ const activeId = opts.activeTopicId;
499
+ if (plan.preferTopicAwareDrop && activeId) {
500
+ for (let i = 0; i < clusters.length; i++) {
501
+ if (protectedIndices.has(i))
502
+ continue;
503
+ const cluster = clusters[i];
504
+ // Tool clusters are handled by ballast reduction; skip from
505
+ // topic-aware drop preference to keep tool chains atomic.
506
+ const hasToolContent = cluster.messages.some(m => (m.toolCalls && m.toolCalls.length > 0)
507
+ || (m.toolResults && m.toolResults.length > 0));
508
+ if (hasToolContent)
509
+ continue;
510
+ // Inactive-topic predicate: every message in the cluster carries a
511
+ // topicId distinct from the active topic. Messages without topicId
512
+ // (legacy/unscoped) are not promoted to drop candidates so we don't
513
+ // regress sessions that pre-date topic stamping.
514
+ const tids = cluster.messages.map(m => m.topicId);
515
+ if (tids.length === 0)
516
+ continue;
517
+ const allInactive = tids.every(tid => typeof tid === 'string' && tid !== activeId);
518
+ if (allInactive)
519
+ topicAwareDropOrder.push(i);
520
+ }
521
+ }
522
+ let bypassReason;
523
+ if (!activeId)
524
+ bypassReason = 'no-active-topic';
525
+ else if (stampedMessages === 0)
526
+ bypassReason = 'no-stamped-clusters';
527
+ else if (!plan.preferTopicAwareDrop)
528
+ bypassReason = 'band-not-topic-aware';
529
+ else if (topicAwareDropOrder.length === 0)
530
+ bypassReason = 'no-eligible-inactive-topic-clusters';
531
+ return {
532
+ preferTopicAwareDrop: plan.preferTopicAwareDrop,
533
+ topicAwareDropOrder,
534
+ protectedIndices,
535
+ telemetry: {
536
+ topicAwareEligibleClusters: topicAwareDropOrder.length,
537
+ topicAwareDroppedClusters: 0,
538
+ protectedClusters: protectedIndices.size,
539
+ topicIdCoveragePct,
540
+ bypassReason,
541
+ },
542
+ };
543
+ }
400
544
  /**
401
545
  * Public reshape helper: apply tool gradient then trim to fit within a token budget.
402
546
  *
@@ -896,6 +1040,9 @@ export function resolveArtifactOversizeThreshold(effectiveBudget) {
896
1040
  const raw = Math.floor(softBudget * ARTIFACT_BUDGET_FRACTION);
897
1041
  return Math.min(ARTIFACT_THRESHOLD_CEILING, Math.max(ARTIFACT_THRESHOLD_FLOOR, raw));
898
1042
  }
1043
+ function isExplicitNewSessionPrompt(prompt) {
1044
+ return /^\/new(?:\s|$)/i.test((prompt ?? '').trim());
1045
+ }
899
1046
  /**
900
1047
  * C2: Degrade an oversized doc chunk to a canonical ArtifactRef string.
901
1048
  *
@@ -1127,6 +1274,10 @@ export class Compositor {
1127
1274
  vectorStore;
1128
1275
  libraryDb;
1129
1276
  triggerRegistry;
1277
+ reranker;
1278
+ rerankerMinCandidates;
1279
+ rerankerMaxDocuments;
1280
+ rerankerTopK;
1130
1281
  /** Cached org registry loaded from fleet_agents at construction time. */
1131
1282
  _orgRegistry;
1132
1283
  constructor(deps, config) {
@@ -1134,6 +1285,10 @@ export class Compositor {
1134
1285
  this.vectorStore = deps.vectorStore || null;
1135
1286
  this.libraryDb = deps.libraryDb || null;
1136
1287
  this.triggerRegistry = deps.triggerRegistry || DEFAULT_TRIGGERS;
1288
+ this.reranker = deps.reranker ?? null;
1289
+ this.rerankerMinCandidates = deps.rerankerMinCandidates ?? 2;
1290
+ this.rerankerMaxDocuments = deps.rerankerMaxDocuments;
1291
+ this.rerankerTopK = deps.rerankerTopK;
1137
1292
  // Load org registry from DB on init; fall back to hardcoded if DB empty.
1138
1293
  this._orgRegistry = this.libraryDb
1139
1294
  ? buildOrgRegistryFromDb(this.libraryDb)
@@ -1151,6 +1306,13 @@ export class Compositor {
1151
1306
  setVectorStore(vs) {
1152
1307
  this.vectorStore = vs;
1153
1308
  }
1309
+ /**
1310
+ * Set or replace the reranker after construction.
1311
+ * Called by hypermem.create() once the reranker config has been resolved.
1312
+ */
1313
+ setReranker(rr) {
1314
+ this.reranker = rr;
1315
+ }
1154
1316
  /**
1155
1317
  * Hot-reload the org registry from the fleet_agents table.
1156
1318
  * Call after fleet membership changes (new agent, org restructure)
@@ -1455,6 +1617,41 @@ export class Compositor {
1455
1617
  ? Math.min(request.historyDepth, s4AdaptiveDepth)
1456
1618
  : s4AdaptiveDepth;
1457
1619
  let remaining = budget;
1620
+ // 0.9.0: resolve an early adaptive lifecycle posture for the
1621
+ // compose-window cluster-drop pass. Pressure is estimated from the
1622
+ // SQLite sample over the effective budget so the eviction-order
1623
+ // decision routes through the same band classifier the rest of the
1624
+ // 0.9.0 paths already use — no parallel pressure constants here.
1625
+ const s09SampleTokens = sampleMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
1626
+ const s09EvictionPressure = computeUnifiedPressure(s09SampleTokens, budget, PRESSURE_SOURCE.COMPOSE_PRE_RECALL);
1627
+ let s09ObservedUserTurnCount = sampleMessages.filter(m => m.role === 'user').length;
1628
+ const s09ForkedContextSeed = request.forkedContext?.enabled ? request.forkedContext : undefined;
1629
+ const s09ForkedParentPressure = typeof s09ForkedContextSeed?.parentPressureFraction === 'number'
1630
+ && Number.isFinite(s09ForkedContextSeed.parentPressureFraction)
1631
+ ? s09ForkedContextSeed.parentPressureFraction
1632
+ : undefined;
1633
+ const s09EvictionPolicyPressure = s09ForkedContextSeed
1634
+ && s09ObservedUserTurnCount === 0
1635
+ && s09ForkedParentPressure != null
1636
+ ? s09ForkedParentPressure
1637
+ : s09EvictionPressure.fraction;
1638
+ const evictionLifecyclePolicy = resolveAdaptiveLifecyclePolicy({
1639
+ pressureFraction: s09EvictionPolicyPressure,
1640
+ userTurnCount: s09ObservedUserTurnCount,
1641
+ explicitNewSession: isExplicitNewSessionPrompt(request.prompt ?? null),
1642
+ forkedContext: Boolean(s09ForkedContextSeed),
1643
+ forkedParentPressureFraction: s09ForkedParentPressure,
1644
+ forkedParentUserTurnCount: s09ForkedContextSeed?.parentUserTurnCount,
1645
+ });
1646
+ let adaptiveEvictionTopicAwareEligibleClusters = 0;
1647
+ let adaptiveEvictionTopicAwareDroppedClusters = 0;
1648
+ let adaptiveEvictionProtectedClusters = 0;
1649
+ let adaptiveEvictionTopicIdCoveragePct = 0;
1650
+ let adaptiveEvictionBypassReason;
1651
+ let composeTopicSource = 'none';
1652
+ let composeTopicState = 'history-disabled';
1653
+ let composeTopicMessageCount = 0;
1654
+ let composeTopicStampedMessageCount = 0;
1458
1655
  // Phase 0 fence enforcement: resolve the compaction fence for this conversation.
1459
1656
  // All downstream message queries use this as a lower bound to exclude zombie
1460
1657
  // messages below the fence that should have been compacted.
@@ -1506,6 +1703,27 @@ export class Compositor {
1506
1703
  slots.identity = tokens;
1507
1704
  remaining -= tokens;
1508
1705
  }
1706
+ const repairNoticeContent = await this.getSlotContent(request.agentId, request.sessionKey, 'repair_notice', db);
1707
+ // ─── Warm-Restore Repair Notice (never suppressed) ─────────
1708
+ // If a session was reconstructed from a snapshot, the repair notice must
1709
+ // stay above restored conversation content even under budget pressure.
1710
+ // This mirrors the system/identity invariant: history and memory slots may
1711
+ // be trimmed, but the provenance notice is not optional operational state.
1712
+ if (repairNoticeContent) {
1713
+ const tokens = estimateTokens(repairNoticeContent);
1714
+ messages.push({
1715
+ role: 'system',
1716
+ textContent: repairNoticeContent,
1717
+ toolCalls: null,
1718
+ toolResults: null,
1719
+ metadata: { warmRestoreRepairNotice: true },
1720
+ });
1721
+ slots.system += tokens;
1722
+ remaining -= tokens;
1723
+ if (remaining < 0) {
1724
+ warnings.push('Warm-restore repair notice exceeded budget but was retained as non-suppressible system context');
1725
+ }
1726
+ }
1509
1727
  // ─── Stable Output Profile Prefix ──────────────────────────
1510
1728
  // Keep deterministic output instructions on the static side of the cache
1511
1729
  // boundary so Anthropic and OpenAI warm-prefix caching can reuse them.
@@ -1577,8 +1795,10 @@ export class Compositor {
1577
1795
  try {
1578
1796
  const topicMap = new SessionTopicMap(db);
1579
1797
  activeTopic = topicMap.getActiveTopic(request.sessionKey) || undefined;
1580
- if (activeTopic)
1798
+ if (activeTopic) {
1581
1799
  activeTopicId = activeTopic.id;
1800
+ composeTopicSource = 'session-topic-map';
1801
+ }
1582
1802
  }
1583
1803
  catch {
1584
1804
  // Topic lookup is best-effort — fall back to full history
@@ -1586,6 +1806,7 @@ export class Compositor {
1586
1806
  }
1587
1807
  else {
1588
1808
  activeTopicId = request.topicId;
1809
+ composeTopicSource = 'request-topic-id';
1589
1810
  try {
1590
1811
  activeTopic = db.prepare(`
1591
1812
  SELECT id, name
@@ -1616,6 +1837,9 @@ export class Compositor {
1616
1837
  }
1617
1838
  return true;
1618
1839
  });
1840
+ s09ObservedUserTurnCount = Math.max(s09ObservedUserTurnCount, historyMessages.filter(m => m.role === 'user').length);
1841
+ composeTopicMessageCount = historyMessages.length;
1842
+ composeTopicStampedMessageCount = historyMessages.filter(m => typeof m.topicId === 'string').length;
1619
1843
  // ── Transform-first: apply gradient tool treatment BEFORE budget math ──
1620
1844
  // All tool payloads are in their final form before any token estimation.
1621
1845
  // This ensures estimateMessageTokens() measures actual submission cost,
@@ -1643,25 +1867,84 @@ export class Compositor {
1643
1867
  // of raw config.historyFraction so history doesn't overflow MECW ceiling.
1644
1868
  const historyBudget = Math.floor(budget * b4HistoryFraction);
1645
1869
  const historyFillCap = Math.min(historyBudget, remaining);
1870
+ // 0.9.0: adaptive eviction ordering. For elevated/high/critical bands,
1871
+ // drop inactive-topic non-tool clusters first when an active topic is
1872
+ // known. Bootstrap/warmup/steady reproduce the historical newest-first
1873
+ // sweep exactly (preferTopicAwareDrop=false → evictedByPlan stays empty).
1874
+ const adaptiveOrdering = orderClustersForAdaptiveEviction(budgetClusters, evictionLifecyclePolicy, { activeTopicId });
1875
+ adaptiveEvictionTopicAwareEligibleClusters = adaptiveOrdering.telemetry.topicAwareEligibleClusters;
1876
+ adaptiveEvictionProtectedClusters = adaptiveOrdering.telemetry.protectedClusters;
1877
+ adaptiveEvictionTopicIdCoveragePct = adaptiveOrdering.telemetry.topicIdCoveragePct;
1878
+ adaptiveEvictionBypassReason = adaptiveOrdering.telemetry.bypassReason;
1879
+ if (!activeTopicId)
1880
+ composeTopicState = 'no-active-topic';
1881
+ else if (composeTopicStampedMessageCount === 0)
1882
+ composeTopicState = 'active-topic-missing-stamped-history';
1883
+ else
1884
+ composeTopicState = 'active-topic-ready';
1885
+ const evictedByPlan = new Set();
1886
+ let projectedTokens = budgetClusters.reduce((s, c) => s + c.tokenCost, 0);
1887
+ if (adaptiveOrdering.preferTopicAwareDrop
1888
+ && adaptiveOrdering.topicAwareDropOrder.length > 0
1889
+ && projectedTokens <= historyFillCap) {
1890
+ adaptiveEvictionBypassReason = 'within-budget';
1891
+ }
1892
+ if (adaptiveOrdering.preferTopicAwareDrop
1893
+ && adaptiveOrdering.topicAwareDropOrder.length > 0
1894
+ && projectedTokens > historyFillCap) {
1895
+ for (const idx of adaptiveOrdering.topicAwareDropOrder) {
1896
+ if (projectedTokens <= historyFillCap)
1897
+ break;
1898
+ if (adaptiveOrdering.protectedIndices.has(idx))
1899
+ continue;
1900
+ evictedByPlan.add(idx);
1901
+ projectedTokens -= budgetClusters[idx].tokenCost;
1902
+ }
1903
+ adaptiveEvictionTopicAwareDroppedClusters = evictedByPlan.size;
1904
+ }
1905
+ let truncationCutIndex = -1;
1646
1906
  for (let i = budgetClusters.length - 1; i >= 0; i--) {
1907
+ if (evictedByPlan.has(i))
1908
+ continue;
1647
1909
  const cluster = budgetClusters[i];
1648
1910
  if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
1649
- const droppedClusters = budgetClusters.slice(0, i + 1);
1650
- const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
1651
- const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
1652
- if (droppedToolResultCount > 0) {
1653
- c1CoEjections += droppedToolResultCount;
1654
- console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
1911
+ truncationCutIndex = i;
1912
+ break;
1913
+ }
1914
+ includedClusters.unshift(cluster);
1915
+ historyTokens += cluster.tokenCost;
1916
+ }
1917
+ if (truncationCutIndex >= 0 || evictedByPlan.size > 0) {
1918
+ const droppedIndices = [];
1919
+ if (truncationCutIndex >= 0) {
1920
+ for (let i = 0; i <= truncationCutIndex; i++) {
1921
+ if (!evictedByPlan.has(i))
1922
+ droppedIndices.push(i);
1655
1923
  }
1924
+ }
1925
+ for (const idx of evictedByPlan)
1926
+ droppedIndices.push(idx);
1927
+ const droppedClusters = droppedIndices.map(i => budgetClusters[i]);
1928
+ const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
1929
+ const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
1930
+ if (droppedToolResultCount > 0) {
1931
+ c1CoEjections += droppedToolResultCount;
1932
+ console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
1933
+ }
1934
+ if (droppedMsgCount > 0) {
1656
1935
  const c1Note = droppedToolResultCount > 0
1657
1936
  ? ` [C1: ${droppedToolResultCount} co-ejected reason=budget_cluster_drop]`
1658
1937
  : '';
1659
- warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)${c1Note}`);
1660
- s4RescueTrimFired = true;
1661
- break;
1938
+ const planNote = evictedByPlan.size > 0
1939
+ ? ` [adaptive: band=${evictionLifecyclePolicy.band} topic-aware-dropped=${evictedByPlan.size}]`
1940
+ : '';
1941
+ const cutLabel = truncationCutIndex >= 0
1942
+ ? `${truncationCutIndex + 1}/${budgetClusters.length}`
1943
+ : `0/${budgetClusters.length}`;
1944
+ warnings.push(`History truncated at cluster ${cutLabel} (${droppedMsgCount} messages dropped)${c1Note}${planNote}`);
1945
+ if (truncationCutIndex >= 0)
1946
+ s4RescueTrimFired = true;
1662
1947
  }
1663
- includedClusters.unshift(cluster);
1664
- historyTokens += cluster.tokenCost;
1665
1948
  }
1666
1949
  const includedHistory = includedClusters.flatMap(c => c.messages);
1667
1950
  // ── Keystone History Slot (P2.1) ──────────────────────────────────
@@ -1829,6 +2112,10 @@ export class Compositor {
1829
2112
  let diagFingerprintDedups = 0;
1830
2113
  let diagFingerprintCollisions = 0;
1831
2114
  let diagRetrievalMode = 'none';
2115
+ // Sprint 1: reranker telemetry captured from hybridSearch via onRerankerTelemetry
2116
+ let diagRerankerStatus;
2117
+ let diagRerankerCandidates;
2118
+ let diagRerankerProvider;
1832
2119
  function normalizeFingerprintText(text) {
1833
2120
  return text.toLowerCase().replace(/\s+/g, ' ').trim();
1834
2121
  }
@@ -2104,6 +2391,26 @@ export class Compositor {
2104
2391
  // Use request.prompt as the retrieval query when available — it is the
2105
2392
  // live current-turn text. Falling back to getLastUserMessage(messages)
2106
2393
  // reads from the already-assembled history, which is one turn stale.
2394
+ // 0.9.0: resolve adaptive lifecycle policy immediately before semantic recall
2395
+ // so smartRecallMultiplier scales the recall token budget and candidate limit
2396
+ // from the same policy object that compose diagnostics later report.
2397
+ const composePreRecallPressure = computeUnifiedPressure(contextTokens, budget, PRESSURE_SOURCE.COMPOSE_PRE_RECALL);
2398
+ const s09ComposePolicyPressure = s09ForkedContextSeed
2399
+ && s09ObservedUserTurnCount === 0
2400
+ && s09ForkedParentPressure != null
2401
+ ? s09ForkedParentPressure
2402
+ : composePreRecallPressure.fraction;
2403
+ const composeLifecyclePolicy = resolveAdaptiveLifecyclePolicy({
2404
+ pressureFraction: s09ComposePolicyPressure,
2405
+ userTurnCount: s09ObservedUserTurnCount,
2406
+ explicitNewSession: isExplicitNewSessionPrompt(request.prompt ?? this.getLastUserMessage(messages)),
2407
+ forkedContext: Boolean(s09ForkedContextSeed),
2408
+ forkedParentPressureFraction: s09ForkedParentPressure,
2409
+ forkedParentUserTurnCount: s09ForkedContextSeed?.parentUserTurnCount,
2410
+ });
2411
+ const recallBreadth = scaleRecallBreadth(remaining, composeLifecyclePolicy.smartRecallMultiplier);
2412
+ let diagAdaptiveRecallBudgetTokens;
2413
+ let diagAdaptiveRecallCandidateLimit;
2107
2414
  if (request.includeSemanticRecall !== false && remaining > 500 && (this.vectorStore || libDb)) {
2108
2415
  const lastUserMsg = request.prompt?.trim() || this.getLastUserMessage(messages);
2109
2416
  if (lastUserMsg) {
@@ -2118,9 +2425,17 @@ export class Compositor {
2118
2425
  catch {
2119
2426
  // Redis lookup is best-effort — fall through to Ollama
2120
2427
  }
2121
- const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId, Math.floor(remaining * 0.12), // Cap at 12% of remaining (W4: was 0.15)
2122
- libDb || undefined, precomputedEmbedding, contextFingerprints // C2: skip results already in Active Facts
2123
- );
2428
+ diagAdaptiveRecallBudgetTokens = recallBreadth.mainBudgetTokens;
2429
+ diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
2430
+ const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId,
2431
+ // 0.9.0: recall token budget = base 0.12 of remaining * lifecycle multiplier.
2432
+ recallBreadth.mainBudgetTokens, libDb || undefined, precomputedEmbedding, contextFingerprints, // C2: skip results already in Active Facts
2433
+ // Sprint 1: capture reranker telemetry at assemble level
2434
+ (ev) => {
2435
+ diagRerankerStatus = ev.status;
2436
+ diagRerankerCandidates = ev.candidates;
2437
+ diagRerankerProvider = ev.provider;
2438
+ }, recallBreadth.candidateLimit);
2124
2439
  if (semanticContent) {
2125
2440
  const tokens = estimateTokens(semanticContent);
2126
2441
  volatileContextParts.push(`## Related Memory\n${semanticContent}`);
@@ -2256,15 +2571,21 @@ export class Compositor {
2256
2571
  volatileContextParts.push(docParts.join('\n\n'));
2257
2572
  }
2258
2573
  }
2259
- else if (remaining > 400 && (this.vectorStore || libDb)) {
2574
+ else if (request.includeSemanticRecall !== false && remaining > 400 && (this.vectorStore || libDb)) {
2260
2575
  // Trigger-miss fallback: no trigger fired — attempt bounded semantic retrieval
2261
2576
  // so there is never a silent zero-memory path on doc chunks.
2262
2577
  // INVARIANT: this block is mutually exclusive with triggered-retrieval above.
2263
2578
  // If refactored to run both paths, cap combined semantic budget to avoid double-recall.
2264
2579
  try {
2580
+ // 0.9.0: trigger-miss fallback uses the same lifecycle-scaled breadth so
2581
+ // a /new surge widens fallback recall and high/critical pressure narrows it.
2582
+ if (diagAdaptiveRecallBudgetTokens === undefined) {
2583
+ diagAdaptiveRecallBudgetTokens = recallBreadth.fallbackBudgetTokens;
2584
+ diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
2585
+ }
2265
2586
  const fallbackContent = await Promise.race([
2266
- this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined, undefined, contextFingerprints // C2: skip results already in Active Facts
2267
- ),
2587
+ this.buildSemanticRecall(lastMsg, request.agentId, recallBreadth.fallbackBudgetTokens, libDb || undefined, undefined, contextFingerprints, // C2: skip results already in Active Facts
2588
+ undefined, recallBreadth.candidateLimit),
2268
2589
  new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
2269
2590
  ]);
2270
2591
  if (fallbackContent) {
@@ -2365,7 +2686,23 @@ export class Compositor {
2365
2686
  messages.splice(stableInsertIdx, 0, ...stablePrefixMessages);
2366
2687
  }
2367
2688
  // ── Inject assembled context block ──────────────────────
2689
+ // Sprint 4: Prompt-tail placement.
2690
+ // Volatile context (active facts, temporal, open-domain, semantic recall,
2691
+ // doc chunks, cross-session) moves AFTER all history messages so that
2692
+ // query-shaped material lands near the user turn rather than buried mid-prompt.
2693
+ //
2694
+ // Layout after Sprint 4:
2695
+ // [stable prefix: system, identity, FOS/MOD, stable facts, knowledge, prefs]
2696
+ // [history: keystones, cross-topic, recent conversation messages]
2697
+ // [volatile context block ← here, at the tail] ← Sprint 4 reorder
2698
+ // [last user message]
2699
+ //
2700
+ // The cache boundary (dynamicBoundary: true) stays on this block so the
2701
+ // Anthropic/OpenAI cache-prefix logic still fires correctly — everything
2702
+ // ABOVE this message is the stable prefix eligible for caching.
2368
2703
  const assembledContextBlock = volatileContextParts.length > 0 ? volatileContextParts.join('\n\n') : undefined;
2704
+ let s4VolatileContextPosition;
2705
+ let s4MessagesBeforeVolatile;
2369
2706
  if (assembledContextBlock) {
2370
2707
  const contextMsg = {
2371
2708
  role: 'system',
@@ -2377,7 +2714,23 @@ export class Compositor {
2377
2714
  // everything at or below it is per-session / per-turn context.
2378
2715
  metadata: { dynamicBoundary: true, cacheBoundarySlot: CACHE_PREFIX_BOUNDARY_SLOT },
2379
2716
  };
2380
- messages.splice(stableInsertIdx + stablePrefixMessages.length, 0, contextMsg);
2717
+ // Sprint 4: Insert at tail (end of messages array), AFTER history.
2718
+ // The last user message (if any) should remain the final message, so we
2719
+ // insert the volatile block just before the last user message.
2720
+ const lastMsgIdx = messages.length - 1;
2721
+ const lastMsg = lastMsgIdx >= 0 ? messages[lastMsgIdx] : undefined;
2722
+ if (lastMsg && lastMsg.role === 'user') {
2723
+ // Insert volatile block before the last user message so user turn stays last
2724
+ messages.splice(lastMsgIdx, 0, contextMsg);
2725
+ s4VolatileContextPosition = lastMsgIdx;
2726
+ s4MessagesBeforeVolatile = lastMsgIdx;
2727
+ }
2728
+ else {
2729
+ // No trailing user message — append at end
2730
+ messages.push(contextMsg);
2731
+ s4VolatileContextPosition = messages.length - 1;
2732
+ s4MessagesBeforeVolatile = messages.length - 1;
2733
+ }
2381
2734
  }
2382
2735
  const stablePrefix = getStablePrefixMessages(messages);
2383
2736
  const prefixSegmentCount = stablePrefix.length;
@@ -2404,6 +2757,9 @@ export class Compositor {
2404
2757
  let trimCount = 0;
2405
2758
  // Collect indices of messages to eject before mutating the array.
2406
2759
  // Walk forward from the first non-system message, trimming oldest first.
2760
+ // Sprint 4: Skip the volatile context block (dynamicBoundary: true) — it
2761
+ // is query-shaped content that should not be evicted during the safety
2762
+ // valve pass. The stable prefix system messages are also protected (role=system).
2407
2763
  const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
2408
2764
  const ejectIndices = new Set();
2409
2765
  if (firstNonSystemIdx >= 0) {
@@ -2412,6 +2768,12 @@ export class Compositor {
2412
2768
  // Don't trim the last user message (current prompt).
2413
2769
  if (i === messages.length - 1 && messages[i].role === 'user')
2414
2770
  break;
2771
+ // Sprint 4: Don't trim the volatile context block (dynamicBoundary marker).
2772
+ const meta = messages[i].metadata;
2773
+ if (meta?.dynamicBoundary) {
2774
+ i++;
2775
+ continue;
2776
+ }
2415
2777
  const msgTokens = estimateMessageTokens(messages[i]);
2416
2778
  ejectIndices.add(i);
2417
2779
  trimmed += msgTokens;
@@ -2455,6 +2817,8 @@ export class Compositor {
2455
2817
  }
2456
2818
  }
2457
2819
  const totalTokens = budget - remaining;
2820
+ // Sprint 3: Unified pressure signal — compose path
2821
+ const s3Pressure = computeUnifiedPressure(totalTokens, budget, PRESSURE_SOURCE.COMPOSE_POST_ASSEMBLY);
2458
2822
  // ─── Slot reconciliation ─────────────────────────────────────────────────
2459
2823
  // totalTokens = budget - remaining is the authoritative spend figure.
2460
2824
  // The slot accounting can drift from this due to history trim (which
@@ -2475,10 +2839,33 @@ export class Compositor {
2475
2839
  // Record the oldest message ID that the LLM can see in this compose
2476
2840
  // cycle. Everything below this ID becomes eligible for compaction.
2477
2841
  // If history was included, query the DB for the oldest included message.
2842
+ //
2843
+ // Sprint 1: Capture compaction eligibility counts BEFORE updating the fence
2844
+ // so we can report how many messages were eligible at the start of this pass.
2845
+ let diagCompactionEligibleCount;
2846
+ let diagCompactionEligibleRatio;
2847
+ let diagCompactionProcessedCount;
2478
2848
  if (request.includeHistory !== false && slots.history > 0) {
2479
2849
  try {
2480
2850
  const conversation = store.getConversation(request.sessionKey);
2481
2851
  if (conversation) {
2852
+ // Sprint 1: read eligibility BEFORE advancing the fence
2853
+ try {
2854
+ ensureCompactionFenceSchema(db);
2855
+ const eligibilityBefore = getCompactionEligibility(db, conversation.id);
2856
+ if (eligibilityBefore.fence !== null) {
2857
+ // Total messages below fence (denominator for ratio)
2858
+ const totalRow = db.prepare('SELECT COUNT(*) AS cnt FROM messages WHERE conversation_id = ?').get(conversation.id);
2859
+ const totalMessages = totalRow?.cnt ?? 0;
2860
+ diagCompactionEligibleCount = eligibilityBefore.eligibleCount;
2861
+ diagCompactionEligibleRatio = totalMessages > 0
2862
+ ? Math.round((eligibilityBefore.eligibleCount / totalMessages) * 1000) / 1000
2863
+ : 0;
2864
+ }
2865
+ }
2866
+ catch {
2867
+ // Eligibility query is best-effort
2868
+ }
2482
2869
  // The compositor included N history messages (after truncation).
2483
2870
  // Count how many non-system messages are in the output to determine
2484
2871
  // how far back we reached.
@@ -2494,8 +2881,18 @@ export class Compositor {
2494
2881
  LIMIT 1 OFFSET ?
2495
2882
  `).get(conversation.id, historyMsgCount - 1);
2496
2883
  if (oldestIncluded) {
2497
- ensureCompactionFenceSchema(db);
2498
2884
  updateCompactionFence(db, conversation.id, oldestIncluded.id, { minTailMessages: 8 });
2885
+ // Sprint 1: count how many messages moved from eligible -> fence-protected
2886
+ // (i.e. they are now above the updated fence)
2887
+ try {
2888
+ const eligibilityAfter = getCompactionEligibility(db, conversation.id);
2889
+ if (diagCompactionEligibleCount !== undefined) {
2890
+ diagCompactionProcessedCount = Math.max(0, diagCompactionEligibleCount - eligibilityAfter.eligibleCount);
2891
+ }
2892
+ }
2893
+ catch {
2894
+ // After-eligibility query is best-effort
2895
+ }
2499
2896
  }
2500
2897
  }
2501
2898
  }
@@ -2526,6 +2923,70 @@ export class Compositor {
2526
2923
  zeroResultReason = 'empty_corpus';
2527
2924
  }
2528
2925
  }
2926
+ // ── Sprint 4: Explicit budget lanes ───────────────────────────────────────────────
2927
+ // Compute allocated token lanes for this compose pass.
2928
+ // Budget = effective input budget (post-reserve).
2929
+ // Filled values reflect actual spend after slot fill and safety-valve trim.
2930
+ const s4HistoryLane = Math.floor(budget * b4HistoryFraction);
2931
+ const s4MemoryLane = Math.floor(budget * b4MemoryFraction);
2932
+ const s4StableFilledTokens = (slots.system ?? 0) + (slots.identity ?? 0);
2933
+ const s4HistoryFilledTokens = slots.history ?? 0;
2934
+ const s4MemoryFilledTokens = (slots.facts ?? 0) + (slots.context ?? 0) + (slots.library ?? 0);
2935
+ const s4TotalFilled = s4StableFilledTokens + s4HistoryFilledTokens + s4MemoryFilledTokens;
2936
+ const budgetLanes = {
2937
+ effectiveBudget: budget,
2938
+ stablePrefix: slots.system + slots.identity,
2939
+ history: s4HistoryLane,
2940
+ memory: s4MemoryLane,
2941
+ historyFraction: b4HistoryFraction,
2942
+ memoryFraction: b4MemoryFraction,
2943
+ overhead: Math.max(0, budget - s4TotalFilled),
2944
+ filled: {
2945
+ stablePrefix: s4StableFilledTokens,
2946
+ history: s4HistoryFilledTokens,
2947
+ memory: s4MemoryFilledTokens,
2948
+ },
2949
+ };
2950
+ // ── Sprint 4: OpenAI prefix-cache diagnostics ────────────────────────────────────
2951
+ // Expose prefix-boundary information for OpenAI providers so operators
2952
+ // can tune prompt layout for cache hit rate without guesswork.
2953
+ // Non-fatal — never block compose.
2954
+ let openaiPrefixCacheDiag;
2955
+ try {
2956
+ const s4Provider = s4DetectProvider(request.provider ?? request.model);
2957
+ if (s4Provider === 'openai' || s4Provider === 'openai-responses') {
2958
+ const totalWindowTokens = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2959
+ const cacheableFraction = totalWindowTokens > 0
2960
+ ? Math.round((prefixTokens / totalWindowTokens) * 1000) / 1000
2961
+ : 0;
2962
+ // Sprint 4: volatileAtTail is true when the volatile context block is
2963
+ // positioned AFTER any history (or, vacuously, when no history exists and
2964
+ // the block sits just before the final user turn). In both cases nothing
2965
+ // but the current user message follows the boundary, which is the
2966
+ // cacheable layout. When assembledContextBlock is missing we report
2967
+ // false since there is nothing to place at tail.
2968
+ let s4VolatileAtTail = false;
2969
+ if (s4VolatileContextPosition !== undefined) {
2970
+ // Any messages after the boundary must be user turns only (no history).
2971
+ const tail = messages.slice(s4VolatileContextPosition + 1);
2972
+ s4VolatileAtTail = tail.every(m => m.role === 'user')
2973
+ && s4VolatileContextPosition >= prefixSegmentCount;
2974
+ }
2975
+ openaiPrefixCacheDiag = {
2976
+ stablePrefixMessageCount: prefixSegmentCount,
2977
+ stablePrefixTokens: prefixTokens,
2978
+ volatileAtTail: s4VolatileAtTail,
2979
+ cacheableFraction,
2980
+ prefixHash,
2981
+ };
2982
+ }
2983
+ }
2984
+ catch {
2985
+ // Provider detection is best-effort — never block compose
2986
+ }
2987
+ // 0.9.0: lifecycle policy was resolved pre-recall and used to scale recall
2988
+ // breadth. Diagnostics surface the same object so reported band/multiplier
2989
+ // matches what actually controlled retrieval this compose pass.
2529
2990
  const diagnostics = {
2530
2991
  triggerHits: diagTriggerHits,
2531
2992
  triggerFallbackUsed: diagTriggerFallbackUsed,
@@ -2555,6 +3016,14 @@ export class Compositor {
2555
3016
  historyDepthChosen: s4EffectiveDepth,
2556
3017
  estimatedMsgDensityTokens: s4ObservedDensity,
2557
3018
  rescueTrimFired: s4RescueTrimFired,
3019
+ // Sprint 4: prompt-tail placement diagnostics
3020
+ budgetLanes,
3021
+ volatileContextPosition: s4VolatileContextPosition,
3022
+ messagesBeforeVolatile: s4MessagesBeforeVolatile,
3023
+ openaiPrefixCacheDiag,
3024
+ // Sprint 3: unified pressure signal
3025
+ sessionPressureFraction: s3Pressure.fraction,
3026
+ pressureSource: s3Pressure.source,
2558
3027
  // B4: model-aware lane budget diagnostics
2559
3028
  mecwProfile: b4MecwProfile,
2560
3029
  mecwApplied: b4MecwApplied,
@@ -2564,6 +3033,37 @@ export class Compositor {
2564
3033
  trimSoftTarget: TRIM_BUDGET_POLICY.trimSoftTarget,
2565
3034
  trimGrowthThreshold: TRIM_BUDGET_POLICY.trimGrowthThreshold,
2566
3035
  trimHeadroomFraction: TRIM_BUDGET_POLICY.trimHeadroomFraction,
3036
+ // 0.9.0: adaptive lifecycle diagnostics for compose.preRecall
3037
+ adaptiveLifecycleBand: composeLifecyclePolicy.band,
3038
+ adaptiveLifecyclePressurePct: composeLifecyclePolicy.pressurePct,
3039
+ adaptiveWarmHistoryBudgetFraction: composeLifecyclePolicy.warmHistoryBudgetFraction,
3040
+ adaptiveSmartRecallMultiplier: composeLifecyclePolicy.smartRecallMultiplier,
3041
+ adaptiveTrimSoftTarget: composeLifecyclePolicy.trimSoftTarget,
3042
+ adaptiveCompactionTargetFraction: composeLifecyclePolicy.compactionTargetFraction,
3043
+ adaptiveBreadcrumbPackage: composeLifecyclePolicy.emitBreadcrumbPackage,
3044
+ adaptiveTopicCentroidEviction: composeLifecyclePolicy.enableTopicCentroidEviction,
3045
+ adaptiveProactiveCompaction: composeLifecyclePolicy.triggerProactiveCompaction,
3046
+ adaptiveLifecycleReasons: composeLifecyclePolicy.reasons,
3047
+ adaptiveRecallBudgetTokens: diagAdaptiveRecallBudgetTokens,
3048
+ adaptiveRecallCandidateLimit: diagAdaptiveRecallCandidateLimit,
3049
+ adaptiveEvictionLifecycleBand: evictionLifecyclePolicy.band,
3050
+ adaptiveEvictionPressurePct: evictionLifecyclePolicy.pressurePct,
3051
+ adaptiveEvictionTopicAwareEligibleClusters,
3052
+ adaptiveEvictionTopicAwareDroppedClusters,
3053
+ adaptiveEvictionProtectedClusters,
3054
+ adaptiveEvictionTopicIdCoveragePct,
3055
+ adaptiveEvictionBypassReason,
3056
+ composeTopicSource,
3057
+ composeTopicState,
3058
+ composeTopicMessageCount,
3059
+ composeTopicStampedMessageCount,
3060
+ composeTopicTelemetryStatus: 'emitted',
3061
+ adaptiveLifecycleBandDiverged: evictionLifecyclePolicy.band !== composeLifecyclePolicy.band,
3062
+ adaptiveForkedContext: s09ForkedContextSeed ? true : undefined,
3063
+ adaptiveForkedParentPressurePct: s09ForkedParentPressure != null
3064
+ ? Math.round(s09ForkedParentPressure * 100)
3065
+ : undefined,
3066
+ adaptiveForkedParentUserTurns: s09ForkedContextSeed?.parentUserTurnCount,
2567
3067
  // C1: tool-chain ejection telemetry
2568
3068
  toolChainCoEjections: c1CoEjections > 0 ? c1CoEjections : undefined,
2569
3069
  toolChainStubReplacements: c1StubReplacements > 0 ? c1StubReplacements : undefined,
@@ -2574,6 +3074,23 @@ export class Compositor {
2574
3074
  artifactsHydrated: hydrationResult.artifactsHydrated > 0 ? hydrationResult.artifactsHydrated : undefined,
2575
3075
  hydrationBytes: hydrationResult.hydrationBytes > 0 ? hydrationResult.hydrationBytes : undefined,
2576
3076
  hydrationMisses: hydrationResult.hydrationMisses > 0 ? hydrationResult.hydrationMisses : undefined,
3077
+ // Sprint 1: observability layer
3078
+ rerankerStatus: diagRerankerStatus,
3079
+ rerankerCandidates: diagRerankerCandidates,
3080
+ rerankerProvider: diagRerankerProvider,
3081
+ // Sprint 1: named slot spans (allocated vs filled, overflow flag)
3082
+ slotSpans: {
3083
+ system: { allocated: slots.system, filled: slots.system, overflow: false },
3084
+ identity: { allocated: slots.identity, filled: slots.identity, overflow: false },
3085
+ history: { allocated: Math.floor(budget * b4HistoryFraction), filled: slots.history, overflow: slots.history > Math.floor(budget * b4HistoryFraction) },
3086
+ facts: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.facts, overflow: false },
3087
+ context: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.context, overflow: false },
3088
+ library: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.library, overflow: false },
3089
+ },
3090
+ // Sprint 1: compaction eligibility
3091
+ compactionEligibleCount: diagCompactionEligibleCount,
3092
+ compactionEligibleRatio: diagCompactionEligibleRatio,
3093
+ compactionProcessedCount: diagCompactionProcessedCount,
2577
3094
  };
2578
3095
  if (pressureHigh) {
2579
3096
  warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
@@ -2659,6 +3176,33 @@ export class Compositor {
2659
3176
  // Cursor write is best-effort
2660
3177
  }
2661
3178
  }
3179
+ try {
3180
+ const conversation = sampleConv ?? store.getConversation(request.sessionKey);
3181
+ if (conversation) {
3182
+ const snapshotContext = getOrCreateActiveContext(db, request.agentId, request.sessionKey, conversation.id);
3183
+ const repairNoticeContent = await this.cache.getSlot(request.agentId, request.sessionKey, 'repair_notice');
3184
+ insertCompositionSnapshot(db, {
3185
+ contextId: snapshotContext.id,
3186
+ headMessageId: snapshotContext.headMessageId ?? null,
3187
+ model: request.model ?? request.provider ?? 'unknown',
3188
+ contextWindow: totalWindow,
3189
+ totalTokens,
3190
+ fillPct: totalWindow > 0 ? Math.round((totalTokens / totalWindow) * 10000) / 10000 : 0,
3191
+ snapshotKind: 'composed_window',
3192
+ repairDepth: repairNoticeContent ? MAX_WARM_RESTORE_REPAIR_DEPTH : 0,
3193
+ slots: buildCompositionSnapshotSlots({
3194
+ system: systemContent,
3195
+ identity: identityContent,
3196
+ repairNotice: repairNoticeContent,
3197
+ messages,
3198
+ contextBlock: assembledContextBlock,
3199
+ }),
3200
+ });
3201
+ }
3202
+ }
3203
+ catch (error) {
3204
+ console.warn(`[hypermem:compositor] composition snapshot write skipped: ${error.message}`);
3205
+ }
2662
3206
  console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones} c2_degradations=${c2ArtifactDegradations} c2_threshold=${c2ArtifactThresholdTokens}`);
2663
3207
  return {
2664
3208
  messages: outputMessages,
@@ -2684,10 +3228,15 @@ export class Compositor {
2684
3228
  // Uses context.head_message_id to walk only the active branch.
2685
3229
  let activeContext = null;
2686
3230
  try {
2687
- activeContext = getActiveContext(db, agentId, sessionKey);
3231
+ activeContext = getOrCreateActiveContext(db, agentId, sessionKey, conversation.id);
2688
3232
  }
2689
3233
  catch {
2690
- // Context resolution is best-effort
3234
+ try {
3235
+ activeContext = getActiveContext(db, agentId, sessionKey);
3236
+ }
3237
+ catch {
3238
+ // Context resolution is best-effort
3239
+ }
2691
3240
  }
2692
3241
  // Phase 0 fence enforcement: resolve compaction fence for warm bootstrap.
2693
3242
  // Fence remains as transitional safety — primary scoping is via DAG walk.
@@ -2701,6 +3250,97 @@ export class Compositor {
2701
3250
  catch {
2702
3251
  // Fence lookup is best-effort
2703
3252
  }
3253
+ const warmMeta = {
3254
+ agentId,
3255
+ sessionKey,
3256
+ provider: conversation.provider,
3257
+ model: conversation.model,
3258
+ channelType: conversation.channelType,
3259
+ tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
3260
+ lastActive: conversation.updatedAt,
3261
+ status: conversation.status,
3262
+ };
3263
+ if (activeContext) {
3264
+ const warnSnapshotVerifyFallback = (reason, detail) => {
3265
+ const detailSuffix = detail ? ` ${detail}` : '';
3266
+ console.warn(`[hypermem:compositor] warm snapshot verify fallback session=${sessionKey} reason=${reason} verify_fallback_count=1 cold_rewarm_count=1${detailSuffix}`);
3267
+ };
3268
+ try {
3269
+ const snapshotCandidates = listCompositionSnapshots(db, activeContext.id, 2);
3270
+ const latestSnapshot = getLatestValidCompositionSnapshot(db, activeContext.id);
3271
+ if (latestSnapshot?.verification.slots) {
3272
+ const targetModel = opts?.model ?? conversation.model ?? 'unknown';
3273
+ const sourceModel = latestSnapshot.snapshot.model;
3274
+ const sourceProvider = s4DetectProvider(sourceModel);
3275
+ const targetProvider = s4DetectProvider(conversation.provider ?? targetModel);
3276
+ const restored = restoreWarmSnapshotState(latestSnapshot.verification.slots, {
3277
+ sourceProvider,
3278
+ targetProvider,
3279
+ });
3280
+ if (restored) {
3281
+ if (!restored.diagnostics.rolloutGatePassed) {
3282
+ const gateSummary = restored.diagnostics.rolloutGateViolations
3283
+ .map(violation => `${violation.gate}=${violation.actual}/${violation.max}`)
3284
+ .join(', ');
3285
+ console.warn(`[hypermem:compositor] warm snapshot rollout gate blocked session=${sessionKey} snapshot=${latestSnapshot.snapshot.id} violations=${JSON.stringify(gateSummary)} verify_fallback_count=${latestSnapshot.fallbackUsed ? 1 : 0} cold_rewarm_count=1`);
3286
+ warnSnapshotVerifyFallback('rollout_gate_blocked', `snapshot=${latestSnapshot.snapshot.id} violations=${JSON.stringify(gateSummary)}`);
3287
+ }
3288
+ else {
3289
+ if (latestSnapshot.fallbackUsed) {
3290
+ console.warn(`[hypermem:compositor] warm snapshot verify fallback session=${sessionKey} restored_snapshot=${latestSnapshot.snapshot.id} verify_fallback_count=1 cold_rewarm_count=0 reason=latest_snapshot_invalid_or_unverifiable`);
3291
+ }
3292
+ const repairNoticeLines = [
3293
+ `Repair notice: this session is a repaired continuation from snapshot ${latestSnapshot.snapshot.id}.`,
3294
+ `Source model: ${sourceModel}. Target model: ${targetModel}.`,
3295
+ `Source provider: ${sourceProvider}. Target provider: ${targetProvider}.`,
3296
+ `Cross-model boundary: ${sourceModel !== targetModel ? 'yes' : 'no'}.`,
3297
+ `Cross-provider boundary: ${restored.diagnostics.crossProviderBoundary ? 'yes' : 'no'}.`,
3298
+ `Repair depth: ${MAX_WARM_RESTORE_REPAIR_DEPTH}.`
3299
+ ];
3300
+ if (latestSnapshot.fallbackUsed) {
3301
+ repairNoticeLines.push('Snapshot verify fallback count: 1.');
3302
+ }
3303
+ if (restored.diagnostics.quotedAssistantTurns > 0) {
3304
+ repairNoticeLines.push(`Quoted foreign-provider assistant turns: ${restored.diagnostics.quotedAssistantTurns}.`);
3305
+ }
3306
+ if (restored.diagnostics.toolPairParityViolations > 0) {
3307
+ repairNoticeLines.push(`Tool-pair parity gaps flagged: ${restored.diagnostics.toolPairParityViolations}.`);
3308
+ }
3309
+ if (restored.diagnostics.requiredSlotDrops.length > 0) {
3310
+ repairNoticeLines.push(`Required-slot gaps flagged: ${restored.diagnostics.requiredSlotDrops.join(', ')}.`);
3311
+ }
3312
+ const tokenParityDriftExceeded = restored.diagnostics.tokenParityDriftP95 > WARM_RESTORE_MEASUREMENT_GATES.tokenParityDriftP95Max
3313
+ || restored.diagnostics.tokenParityDriftP99 > WARM_RESTORE_MEASUREMENT_GATES.tokenParityDriftP99Max;
3314
+ if (tokenParityDriftExceeded
3315
+ || restored.diagnostics.requiredSlotDropRate > WARM_RESTORE_MEASUREMENT_GATES.requiredSlotDropRateMax
3316
+ || restored.diagnostics.stablePrefixBoundaryViolations > WARM_RESTORE_MEASUREMENT_GATES.stablePrefixBoundaryViolationsMax
3317
+ || restored.diagnostics.toolPairParityViolations > WARM_RESTORE_MEASUREMENT_GATES.toolPairParityViolationsMax
3318
+ || restored.diagnostics.continuityCriticalBoundaryTransformRate > WARM_RESTORE_MEASUREMENT_GATES.continuityCriticalBoundaryTransformRateMax) {
3319
+ repairNoticeLines.push(`Warm-restore instrumentation gap: token parity drift p95=${restored.diagnostics.tokenParityDriftP95.toFixed(4)}, p99=${restored.diagnostics.tokenParityDriftP99.toFixed(4)}, stable_prefix violations=${restored.diagnostics.stablePrefixBoundaryViolations}, continuity-critical transform rate=${restored.diagnostics.continuityCriticalBoundaryTransformRate.toFixed(4)}.`);
3320
+ }
3321
+ const repairNoticeContent = repairNoticeLines.join(' ');
3322
+ await this.cache.invalidateWindow(agentId, sessionKey);
3323
+ await this.cache.warmSession(agentId, sessionKey, {
3324
+ system: restored.system ?? opts?.systemPrompt,
3325
+ identity: restored.identity ?? opts?.identity,
3326
+ repairNotice: repairNoticeContent,
3327
+ history: restored.history,
3328
+ meta: warmMeta,
3329
+ });
3330
+ console.info(`[hypermem:compositor] warm snapshot restore session=${sessionKey} snapshot=${latestSnapshot.snapshot.id} fallback=${latestSnapshot.fallbackUsed} cross_provider=${restored.diagnostics.crossProviderBoundary} quoted_assistant_turns=${restored.diagnostics.quotedAssistantTurns} tool_pair_gaps=${restored.diagnostics.toolPairParityViolations} rollout_gate_passed=${restored.diagnostics.rolloutGatePassed} token_parity_drift_p95=${restored.diagnostics.tokenParityDriftP95.toFixed(4)} token_parity_drift_p99=${restored.diagnostics.tokenParityDriftP99.toFixed(4)}`);
3331
+ return;
3332
+ }
3333
+ }
3334
+ warnSnapshotVerifyFallback('restore_unusable', `snapshot_count=${snapshotCandidates.length}`);
3335
+ }
3336
+ else if (snapshotCandidates.length > 0) {
3337
+ warnSnapshotVerifyFallback('no_valid_snapshot', `snapshot_count=${snapshotCandidates.length}`);
3338
+ }
3339
+ }
3340
+ catch (error) {
3341
+ warnSnapshotVerifyFallback('restore_exception', `error=${JSON.stringify(error.message)}`);
3342
+ }
3343
+ }
2704
3344
  // Fetch a generous pool from SQLite, apply gradient transform, then
2705
3345
  // token-budget-cap the warm set. This replaces the old WARM_BOOTSTRAP_CAP
2706
3346
  // message-count constant which was a blunt instrument — 100 messages of
@@ -2742,7 +3382,6 @@ export class Compositor {
2742
3382
  history.unshift(tagged);
2743
3383
  warmTokens += cost;
2744
3384
  }
2745
- const libDb = opts?.libraryDb || this.libraryDb;
2746
3385
  // Note: facts and context are intentionally NOT cached here.
2747
3386
  // compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
2748
3387
  // from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
@@ -2755,19 +3394,10 @@ export class Compositor {
2755
3394
  system: opts?.systemPrompt,
2756
3395
  identity: opts?.identity,
2757
3396
  history,
2758
- meta: {
2759
- agentId,
2760
- sessionKey,
2761
- provider: conversation.provider,
2762
- model: conversation.model,
2763
- channelType: conversation.channelType,
2764
- tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
2765
- lastActive: conversation.updatedAt,
2766
- status: conversation.status,
2767
- },
3397
+ meta: warmMeta,
2768
3398
  });
2769
3399
  }
2770
- async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth) {
3400
+ async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth, trimSoftTarget) {
2771
3401
  const store = new MessageStore(db);
2772
3402
  const conversation = store.getConversation(sessionKey);
2773
3403
  if (!conversation)
@@ -2810,7 +3440,7 @@ export class Compositor {
2810
3440
  // on the next turn even in the steady-state path. Aligning the gradient cap to
2811
3441
  // the trim target means the rebuilt window already fits within the assemble
2812
3442
  // envelope by construction.
2813
- const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0);
3443
+ const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0, { trimSoftTarget });
2814
3444
  const transformedHistory = applyToolGradient(rawHistory, {
2815
3445
  totalWindowTokens: tokenBudget && tokenBudget > 0
2816
3446
  ? gradientAssembleBudget
@@ -3074,11 +3704,20 @@ export class Compositor {
3074
3704
  * @param precomputedEmbedding — optional pre-computed embedding for the query.
3075
3705
  * When provided, the Ollama call inside VectorStore.search() is skipped.
3076
3706
  */
3077
- async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints // C2: skip results already in Active Facts
3078
- ) {
3707
+ async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints, // C2: skip results already in Active Facts
3708
+ onRerankerTelemetry, // Sprint 1: surface reranker status at assemble level
3709
+ resultLimit) {
3079
3710
  const libDb = libraryDb || this.libraryDb;
3080
3711
  if (!libDb && !this.vectorStore)
3081
3712
  return null;
3713
+ // 0.9.0: clamp the lifecycle-scaled candidate limit. Caller already clamps
3714
+ // via scaleRecallBreadth; this is a defensive floor so direct callers (none
3715
+ // outside compose today) cannot accidentally request 0 results.
3716
+ const hybridLimit = Math.max(RECALL_BREADTH_BASE.candidateLimitMin, Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.floor(resultLimit && resultLimit > 0 ? resultLimit : RECALL_BREADTH_BASE.candidateLimit)));
3717
+ // KNN-only legacy fallback historically used 8 — keep it slightly below the
3718
+ // hybrid limit to preserve prior behavior at multiplier=1, while still
3719
+ // scaling with the same adaptive limit.
3720
+ const knnFallbackLimit = Math.max(RECALL_BREADTH_BASE.candidateLimitMin, Math.min(RECALL_BREADTH_BASE.candidateLimitMax, hybridLimit - 2));
3082
3721
  // Inline fingerprint helper (mirrors compose-scope version; C2 dedup only used here)
3083
3722
  const fpCheck = existingFingerprints
3084
3723
  ? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
@@ -3087,10 +3726,16 @@ export class Compositor {
3087
3726
  if (libDb) {
3088
3727
  const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
3089
3728
  tables: ['facts', 'knowledge', 'episodes'],
3090
- limit: 10,
3729
+ limit: hybridLimit,
3091
3730
  agentId,
3092
3731
  maxKnnDistance: 1.2,
3093
3732
  precomputedEmbedding,
3733
+ reranker: this.reranker,
3734
+ rerankerMinCandidates: this.rerankerMinCandidates,
3735
+ rerankerMaxDocuments: this.rerankerMaxDocuments,
3736
+ rerankerTopK: this.rerankerTopK,
3737
+ // Sprint 1: thread reranker telemetry into compose diagnostics
3738
+ onRerankerTelemetry,
3094
3739
  });
3095
3740
  if (results.length === 0)
3096
3741
  return null;
@@ -3157,7 +3802,7 @@ export class Compositor {
3157
3802
  return null;
3158
3803
  const results = await this.vectorStore.search(userMessage, {
3159
3804
  tables: ['facts', 'knowledge', 'episodes'],
3160
- limit: 8,
3805
+ limit: knnFallbackLimit,
3161
3806
  maxDistance: 1.2,
3162
3807
  precomputedEmbedding,
3163
3808
  });
@@ -3347,8 +3992,11 @@ export class Compositor {
3347
3992
  }
3348
3993
  }
3349
3994
  const fenceClause = fenceMessageId != null ? 'AND m.id >= ?' : '';
3350
- // Phase 3 (Turn DAG): prefer context_id scoping over conversation_id+fence
3351
- const contextClause = activeContext ? 'AND m.context_id = ?' : '';
3995
+ // Phase 3 (Turn DAG): prefer context_id scoping, but keep legacy NULL
3996
+ // rows eligible. Warmed or migrated sessions can have an active context
3997
+ // while older messages predate context_id backfill; excluding NULL rows
3998
+ // disables within-session keystone recall for those conversations.
3999
+ const contextClause = activeContext ? 'AND (m.context_id = ? OR m.context_id IS NULL)' : '';
3352
4000
  const baseParams = [conversationId, cutoffId];
3353
4001
  if (fenceMessageId != null)
3354
4002
  baseParams.push(fenceMessageId);