@psiclawops/hypermem 0.8.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/INSTALL.md +132 -9
- package/README.md +119 -272
- package/bench/README.md +42 -0
- package/bench/data-access-bench.mjs +380 -0
- package/bin/hypermem-bench.mjs +2 -0
- package/bin/hypermem-doctor.mjs +412 -0
- package/bin/hypermem-model-audit.mjs +339 -0
- package/bin/hypermem-status.mjs +491 -70
- package/dist/adaptive-lifecycle.d.ts +81 -0
- package/dist/adaptive-lifecycle.d.ts.map +1 -0
- package/dist/adaptive-lifecycle.js +190 -0
- package/dist/budget-policy.d.ts +1 -1
- package/dist/budget-policy.d.ts.map +1 -1
- package/dist/budget-policy.js +10 -5
- package/dist/cache.d.ts +1 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +2 -0
- package/dist/composition-snapshot-integrity.d.ts +36 -0
- package/dist/composition-snapshot-integrity.d.ts.map +1 -0
- package/dist/composition-snapshot-integrity.js +131 -0
- package/dist/composition-snapshot-runtime.d.ts +59 -0
- package/dist/composition-snapshot-runtime.d.ts.map +1 -0
- package/dist/composition-snapshot-runtime.js +250 -0
- package/dist/composition-snapshot-store.d.ts +44 -0
- package/dist/composition-snapshot-store.d.ts.map +1 -0
- package/dist/composition-snapshot-store.js +117 -0
- package/dist/compositor.d.ts +125 -1
- package/dist/compositor.d.ts.map +1 -1
- package/dist/compositor.js +692 -44
- package/dist/doc-chunk-store.d.ts +19 -0
- package/dist/doc-chunk-store.d.ts.map +1 -1
- package/dist/doc-chunk-store.js +56 -6
- package/dist/hybrid-retrieval.d.ts +38 -0
- package/dist/hybrid-retrieval.d.ts.map +1 -1
- package/dist/hybrid-retrieval.js +86 -1
- package/dist/index.d.ts +12 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +28 -2
- package/dist/knowledge-store.d.ts +4 -1
- package/dist/knowledge-store.d.ts.map +1 -1
- package/dist/knowledge-store.js +27 -4
- package/dist/library-schema.d.ts +12 -8
- package/dist/library-schema.d.ts.map +1 -1
- package/dist/library-schema.js +22 -8
- package/dist/message-store.d.ts.map +1 -1
- package/dist/message-store.js +7 -3
- package/dist/metrics-dashboard.d.ts +18 -1
- package/dist/metrics-dashboard.d.ts.map +1 -1
- package/dist/metrics-dashboard.js +52 -14
- package/dist/reranker.d.ts +1 -1
- package/dist/reranker.js +2 -2
- package/dist/schema.d.ts +1 -1
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +28 -1
- package/dist/seed.d.ts.map +1 -1
- package/dist/seed.js +2 -0
- package/dist/topic-synthesizer.d.ts +20 -0
- package/dist/topic-synthesizer.d.ts.map +1 -1
- package/dist/topic-synthesizer.js +113 -3
- package/dist/trigger-registry.d.ts.map +1 -1
- package/dist/trigger-registry.js +10 -2
- package/dist/types.d.ts +271 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/version.d.ts +7 -7
- package/dist/version.d.ts.map +1 -1
- package/dist/version.js +17 -7
- package/docs/DIAGNOSTICS.md +205 -0
- package/docs/INTEGRATION_VALIDATION.md +186 -0
- package/docs/MIGRATION.md +9 -6
- package/docs/MIGRATION_GUIDE.md +125 -101
- package/docs/ROADMAP.md +238 -20
- package/docs/TUNING.md +19 -5
- package/install.sh +152 -401
- package/memory-plugin/LICENSE +190 -0
- package/memory-plugin/README.md +20 -0
- package/memory-plugin/dist/index.js +50 -0
- package/memory-plugin/package.json +2 -2
- package/package.json +18 -4
- package/plugin/LICENSE +190 -0
- package/plugin/README.md +20 -0
- package/plugin/dist/index.d.ts +29 -0
- package/plugin/dist/index.d.ts.map +1 -1
- package/plugin/dist/index.js +288 -23
- package/plugin/dist/index.js.map +1 -1
- package/plugin/package.json +2 -2
- package/scripts/install-runtime.mjs +12 -1
package/dist/compositor.js
CHANGED
|
@@ -15,11 +15,11 @@ import { filterByScope } from './retrieval-policy.js';
|
|
|
15
15
|
import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
|
|
16
16
|
import { MessageStore } from './message-store.js';
|
|
17
17
|
import { SessionTopicMap } from './session-topic-map.js';
|
|
18
|
-
import { toProviderFormat } from './provider-translator.js';
|
|
18
|
+
import { toProviderFormat, detectProvider as s4DetectProvider } from './provider-translator.js';
|
|
19
19
|
import { DocChunkStore } from './doc-chunk-store.js';
|
|
20
20
|
import { hybridSearch } from './hybrid-retrieval.js';
|
|
21
|
-
import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence } from './compaction-fence.js';
|
|
22
|
-
import { getActiveContext } from './context-store.js';
|
|
21
|
+
import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence, getCompactionEligibility } from './compaction-fence.js';
|
|
22
|
+
import { getActiveContext, getOrCreateActiveContext } from './context-store.js';
|
|
23
23
|
import { rankKeystones, scoreKeystone } from './keystone-scorer.js';
|
|
24
24
|
import { buildOrgRegistryFromDb, defaultOrgRegistry } from './cross-agent.js';
|
|
25
25
|
import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOutputTier, buildActionVerificationSummary } from './fos-mod.js';
|
|
@@ -27,8 +27,11 @@ import { KnowledgeStore } from './knowledge-store.js';
|
|
|
27
27
|
import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
|
|
28
28
|
import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
|
|
29
29
|
import { TRIM_BUDGET_POLICY, resolveTrimBudgets } from './budget-policy.js';
|
|
30
|
+
import { resolveAdaptiveLifecyclePolicy } from './adaptive-lifecycle.js';
|
|
30
31
|
import { formatToolChainStub, parseToolChainStub, formatArtifactRef, isArtifactRef } from './degradation.js';
|
|
31
32
|
import { ToolArtifactStore } from './tool-artifact-store.js';
|
|
33
|
+
import { insertCompositionSnapshot, getLatestValidCompositionSnapshot, listCompositionSnapshots, MAX_WARM_RESTORE_REPAIR_DEPTH, } from './composition-snapshot-store.js';
|
|
34
|
+
import { buildCompositionSnapshotSlots, restoreWarmSnapshotState, WARM_RESTORE_MEASUREMENT_GATES, } from './composition-snapshot-runtime.js';
|
|
32
35
|
/**
|
|
33
36
|
* Files that OpenClaw's contextInjection injects into the system prompt.
|
|
34
37
|
* HyperMem must not re-inject these via doc chunk retrieval to avoid duplication.
|
|
@@ -300,6 +303,71 @@ export function computeAdaptiveHistoryDepth(sessionType, observedDensity, histor
|
|
|
300
303
|
const depth = Math.floor((historyBudgetTokens * SAFETY_MARGIN) / observedDensity);
|
|
301
304
|
return Math.min(maxHistoryMessages, Math.max(20, depth));
|
|
302
305
|
}
|
|
306
|
+
// ─── Sprint 3: Unified Pressure Signal ───────────────────────────────────────────────────────
|
|
307
|
+
/**
|
|
308
|
+
* Canonical pressure labels shared across compose and compaction paths.
|
|
309
|
+
* Use these constants when setting the `pressureSource` field so all consumers
|
|
310
|
+
* can filter logs with a stable string without guessing spellings.
|
|
311
|
+
*/
|
|
312
|
+
export const PRESSURE_SOURCE = {
|
|
313
|
+
/** Compose path: pressure derived from (budget - remaining) after full slot assembly. */
|
|
314
|
+
COMPOSE_POST_ASSEMBLY: 'compose:post-assembly',
|
|
315
|
+
/** Compose path: pressure measured immediately before semantic recall runs. */
|
|
316
|
+
COMPOSE_PRE_RECALL: 'compose:pre-recall',
|
|
317
|
+
/** Compaction path: pressure from Redis token estimate / effectiveBudget. */
|
|
318
|
+
COMPACT_REDIS_ESTIMATE: 'compact:redis-estimate',
|
|
319
|
+
/** Compaction path: pressure from runtime-reported currentTokenCount / effectiveBudget. */
|
|
320
|
+
COMPACT_RUNTIME_TOTAL: 'compact:runtime-total',
|
|
321
|
+
/** Tool-loop assemble path: pressure from in-memory working message array / effectiveBudget. */
|
|
322
|
+
TOOLLOOP_RUNTIME_ARRAY: 'toolloop:runtime-array',
|
|
323
|
+
};
|
|
324
|
+
/**
|
|
325
|
+
* Compute a unified pressure fraction so compose and compaction paths report
|
|
326
|
+
* the same numeric concept without drift.
|
|
327
|
+
*
|
|
328
|
+
* Always clamps to [0, Infinity) — callers get the raw fraction so they can
|
|
329
|
+
* decide their own thresholds without us hardcoding them here.
|
|
330
|
+
*
|
|
331
|
+
* @param usedTokens Tokens consumed (numerator).
|
|
332
|
+
* @param budgetTokens Effective budget (denominator). Must be > 0.
|
|
333
|
+
* @param source Label from PRESSURE_SOURCE for telemetry (metadata only).
|
|
334
|
+
* @returns { fraction, pct, source } where fraction = usedTokens / budgetTokens,
|
|
335
|
+
* pct = Math.round(fraction * 100), source = canonical label.
|
|
336
|
+
*/
|
|
337
|
+
export function computeUnifiedPressure(usedTokens, budgetTokens, source) {
|
|
338
|
+
const fraction = budgetTokens > 0 ? usedTokens / budgetTokens : 0;
|
|
339
|
+
const pct = Math.round(fraction * 100);
|
|
340
|
+
return { fraction, pct, source };
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* 0.9.0: adaptive lifecycle scales semantic-recall breadth in compose.
|
|
344
|
+
*
|
|
345
|
+
* Base fractions match the historical compositor constants so that a steady
|
|
346
|
+
* (multiplier=1.0) call reproduces prior behavior exactly. Candidate limit is
|
|
347
|
+
* clamped so even a critical-pressure pass keeps a usable retrieval window
|
|
348
|
+
* and a /new surge does not blow up hybrid search cost.
|
|
349
|
+
*/
|
|
350
|
+
export const RECALL_BREADTH_BASE = Object.freeze({
|
|
351
|
+
mainBudgetFraction: 0.12,
|
|
352
|
+
fallbackBudgetFraction: 0.10,
|
|
353
|
+
candidateLimit: 10,
|
|
354
|
+
candidateLimitMin: 6,
|
|
355
|
+
candidateLimitMax: 16,
|
|
356
|
+
});
|
|
357
|
+
/**
|
|
358
|
+
* Apply the adaptive lifecycle smartRecallMultiplier to recall breadth.
|
|
359
|
+
* Pure helper — does not read state or mutate anything. Steady multiplier=1
|
|
360
|
+
* preserves the historical (0.12, 0.10, limit=10) recall envelope.
|
|
361
|
+
*/
|
|
362
|
+
export function scaleRecallBreadth(remainingTokens, multiplier) {
|
|
363
|
+
const safeMultiplier = Number.isFinite(multiplier) && multiplier > 0 ? multiplier : 1;
|
|
364
|
+
const remaining = Math.max(0, Math.floor(remainingTokens || 0));
|
|
365
|
+
const mainBudgetTokens = Math.max(0, Math.floor(remaining * RECALL_BREADTH_BASE.mainBudgetFraction * safeMultiplier));
|
|
366
|
+
const fallbackBudgetTokens = Math.max(0, Math.floor(remaining * RECALL_BREADTH_BASE.fallbackBudgetFraction * safeMultiplier));
|
|
367
|
+
const limitRaw = Math.ceil(RECALL_BREADTH_BASE.candidateLimit * safeMultiplier);
|
|
368
|
+
const candidateLimit = Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.max(RECALL_BREADTH_BASE.candidateLimitMin, limitRaw));
|
|
369
|
+
return { mainBudgetTokens, fallbackBudgetTokens, candidateLimit, multiplier: safeMultiplier };
|
|
370
|
+
}
|
|
303
371
|
const DEFAULT_CONFIG = {
|
|
304
372
|
// Primary budget controls
|
|
305
373
|
budgetFraction: 0.703,
|
|
@@ -397,6 +465,82 @@ function clusterNeutralMessages(messages) {
|
|
|
397
465
|
}
|
|
398
466
|
return clusters;
|
|
399
467
|
}
|
|
468
|
+
export function orderClustersForAdaptiveEviction(clusters, policy, opts = {}) {
|
|
469
|
+
const plan = policy.evictionPlan;
|
|
470
|
+
const protectedIndices = new Set();
|
|
471
|
+
// Protect the most-recent user-role cluster (current-user-turn proxy when
|
|
472
|
+
// the prompt is appended via history rather than as a separate message).
|
|
473
|
+
for (let i = clusters.length - 1; i >= 0; i--) {
|
|
474
|
+
if (clusters[i].messages.some(m => m.role === 'user')) {
|
|
475
|
+
protectedIndices.add(i);
|
|
476
|
+
break;
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
// Protect dynamicBoundary clusters and pure-system clusters.
|
|
480
|
+
for (let i = 0; i < clusters.length; i++) {
|
|
481
|
+
const cluster = clusters[i];
|
|
482
|
+
const hasDynamicBoundary = cluster.messages.some(m => {
|
|
483
|
+
const meta = m.metadata;
|
|
484
|
+
return meta?.dynamicBoundary === true;
|
|
485
|
+
});
|
|
486
|
+
if (hasDynamicBoundary)
|
|
487
|
+
protectedIndices.add(i);
|
|
488
|
+
if (cluster.messages.length > 0 && cluster.messages.every(m => m.role === 'system')) {
|
|
489
|
+
protectedIndices.add(i);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
const totalMessages = clusters.reduce((sum, cluster) => sum + cluster.messages.length, 0);
|
|
493
|
+
const stampedMessages = clusters.reduce((sum, cluster) => sum + cluster.messages.filter(m => typeof m.topicId === 'string').length, 0);
|
|
494
|
+
const topicIdCoveragePct = totalMessages > 0
|
|
495
|
+
? Math.round((stampedMessages / totalMessages) * 10000) / 100
|
|
496
|
+
: 0;
|
|
497
|
+
const topicAwareDropOrder = [];
|
|
498
|
+
const activeId = opts.activeTopicId;
|
|
499
|
+
if (plan.preferTopicAwareDrop && activeId) {
|
|
500
|
+
for (let i = 0; i < clusters.length; i++) {
|
|
501
|
+
if (protectedIndices.has(i))
|
|
502
|
+
continue;
|
|
503
|
+
const cluster = clusters[i];
|
|
504
|
+
// Tool clusters are handled by ballast reduction; skip from
|
|
505
|
+
// topic-aware drop preference to keep tool chains atomic.
|
|
506
|
+
const hasToolContent = cluster.messages.some(m => (m.toolCalls && m.toolCalls.length > 0)
|
|
507
|
+
|| (m.toolResults && m.toolResults.length > 0));
|
|
508
|
+
if (hasToolContent)
|
|
509
|
+
continue;
|
|
510
|
+
// Inactive-topic predicate: every message in the cluster carries a
|
|
511
|
+
// topicId distinct from the active topic. Messages without topicId
|
|
512
|
+
// (legacy/unscoped) are not promoted to drop candidates so we don't
|
|
513
|
+
// regress sessions that pre-date topic stamping.
|
|
514
|
+
const tids = cluster.messages.map(m => m.topicId);
|
|
515
|
+
if (tids.length === 0)
|
|
516
|
+
continue;
|
|
517
|
+
const allInactive = tids.every(tid => typeof tid === 'string' && tid !== activeId);
|
|
518
|
+
if (allInactive)
|
|
519
|
+
topicAwareDropOrder.push(i);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
let bypassReason;
|
|
523
|
+
if (!activeId)
|
|
524
|
+
bypassReason = 'no-active-topic';
|
|
525
|
+
else if (stampedMessages === 0)
|
|
526
|
+
bypassReason = 'no-stamped-clusters';
|
|
527
|
+
else if (!plan.preferTopicAwareDrop)
|
|
528
|
+
bypassReason = 'band-not-topic-aware';
|
|
529
|
+
else if (topicAwareDropOrder.length === 0)
|
|
530
|
+
bypassReason = 'no-eligible-inactive-topic-clusters';
|
|
531
|
+
return {
|
|
532
|
+
preferTopicAwareDrop: plan.preferTopicAwareDrop,
|
|
533
|
+
topicAwareDropOrder,
|
|
534
|
+
protectedIndices,
|
|
535
|
+
telemetry: {
|
|
536
|
+
topicAwareEligibleClusters: topicAwareDropOrder.length,
|
|
537
|
+
topicAwareDroppedClusters: 0,
|
|
538
|
+
protectedClusters: protectedIndices.size,
|
|
539
|
+
topicIdCoveragePct,
|
|
540
|
+
bypassReason,
|
|
541
|
+
},
|
|
542
|
+
};
|
|
543
|
+
}
|
|
400
544
|
/**
|
|
401
545
|
* Public reshape helper: apply tool gradient then trim to fit within a token budget.
|
|
402
546
|
*
|
|
@@ -896,6 +1040,9 @@ export function resolveArtifactOversizeThreshold(effectiveBudget) {
|
|
|
896
1040
|
const raw = Math.floor(softBudget * ARTIFACT_BUDGET_FRACTION);
|
|
897
1041
|
return Math.min(ARTIFACT_THRESHOLD_CEILING, Math.max(ARTIFACT_THRESHOLD_FLOOR, raw));
|
|
898
1042
|
}
|
|
1043
|
+
function isExplicitNewSessionPrompt(prompt) {
|
|
1044
|
+
return /^\/new(?:\s|$)/i.test((prompt ?? '').trim());
|
|
1045
|
+
}
|
|
899
1046
|
/**
|
|
900
1047
|
* C2: Degrade an oversized doc chunk to a canonical ArtifactRef string.
|
|
901
1048
|
*
|
|
@@ -1127,6 +1274,10 @@ export class Compositor {
|
|
|
1127
1274
|
vectorStore;
|
|
1128
1275
|
libraryDb;
|
|
1129
1276
|
triggerRegistry;
|
|
1277
|
+
reranker;
|
|
1278
|
+
rerankerMinCandidates;
|
|
1279
|
+
rerankerMaxDocuments;
|
|
1280
|
+
rerankerTopK;
|
|
1130
1281
|
/** Cached org registry loaded from fleet_agents at construction time. */
|
|
1131
1282
|
_orgRegistry;
|
|
1132
1283
|
constructor(deps, config) {
|
|
@@ -1134,6 +1285,10 @@ export class Compositor {
|
|
|
1134
1285
|
this.vectorStore = deps.vectorStore || null;
|
|
1135
1286
|
this.libraryDb = deps.libraryDb || null;
|
|
1136
1287
|
this.triggerRegistry = deps.triggerRegistry || DEFAULT_TRIGGERS;
|
|
1288
|
+
this.reranker = deps.reranker ?? null;
|
|
1289
|
+
this.rerankerMinCandidates = deps.rerankerMinCandidates ?? 2;
|
|
1290
|
+
this.rerankerMaxDocuments = deps.rerankerMaxDocuments;
|
|
1291
|
+
this.rerankerTopK = deps.rerankerTopK;
|
|
1137
1292
|
// Load org registry from DB on init; fall back to hardcoded if DB empty.
|
|
1138
1293
|
this._orgRegistry = this.libraryDb
|
|
1139
1294
|
? buildOrgRegistryFromDb(this.libraryDb)
|
|
@@ -1151,6 +1306,13 @@ export class Compositor {
|
|
|
1151
1306
|
setVectorStore(vs) {
|
|
1152
1307
|
this.vectorStore = vs;
|
|
1153
1308
|
}
|
|
1309
|
+
/**
|
|
1310
|
+
* Set or replace the reranker after construction.
|
|
1311
|
+
* Called by hypermem.create() once the reranker config has been resolved.
|
|
1312
|
+
*/
|
|
1313
|
+
setReranker(rr) {
|
|
1314
|
+
this.reranker = rr;
|
|
1315
|
+
}
|
|
1154
1316
|
/**
|
|
1155
1317
|
* Hot-reload the org registry from the fleet_agents table.
|
|
1156
1318
|
* Call after fleet membership changes (new agent, org restructure)
|
|
@@ -1455,6 +1617,41 @@ export class Compositor {
|
|
|
1455
1617
|
? Math.min(request.historyDepth, s4AdaptiveDepth)
|
|
1456
1618
|
: s4AdaptiveDepth;
|
|
1457
1619
|
let remaining = budget;
|
|
1620
|
+
// 0.9.0: resolve an early adaptive lifecycle posture for the
|
|
1621
|
+
// compose-window cluster-drop pass. Pressure is estimated from the
|
|
1622
|
+
// SQLite sample over the effective budget so the eviction-order
|
|
1623
|
+
// decision routes through the same band classifier the rest of the
|
|
1624
|
+
// 0.9.0 paths already use — no parallel pressure constants here.
|
|
1625
|
+
const s09SampleTokens = sampleMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
1626
|
+
const s09EvictionPressure = computeUnifiedPressure(s09SampleTokens, budget, PRESSURE_SOURCE.COMPOSE_PRE_RECALL);
|
|
1627
|
+
let s09ObservedUserTurnCount = sampleMessages.filter(m => m.role === 'user').length;
|
|
1628
|
+
const s09ForkedContextSeed = request.forkedContext?.enabled ? request.forkedContext : undefined;
|
|
1629
|
+
const s09ForkedParentPressure = typeof s09ForkedContextSeed?.parentPressureFraction === 'number'
|
|
1630
|
+
&& Number.isFinite(s09ForkedContextSeed.parentPressureFraction)
|
|
1631
|
+
? s09ForkedContextSeed.parentPressureFraction
|
|
1632
|
+
: undefined;
|
|
1633
|
+
const s09EvictionPolicyPressure = s09ForkedContextSeed
|
|
1634
|
+
&& s09ObservedUserTurnCount === 0
|
|
1635
|
+
&& s09ForkedParentPressure != null
|
|
1636
|
+
? s09ForkedParentPressure
|
|
1637
|
+
: s09EvictionPressure.fraction;
|
|
1638
|
+
const evictionLifecyclePolicy = resolveAdaptiveLifecyclePolicy({
|
|
1639
|
+
pressureFraction: s09EvictionPolicyPressure,
|
|
1640
|
+
userTurnCount: s09ObservedUserTurnCount,
|
|
1641
|
+
explicitNewSession: isExplicitNewSessionPrompt(request.prompt ?? null),
|
|
1642
|
+
forkedContext: Boolean(s09ForkedContextSeed),
|
|
1643
|
+
forkedParentPressureFraction: s09ForkedParentPressure,
|
|
1644
|
+
forkedParentUserTurnCount: s09ForkedContextSeed?.parentUserTurnCount,
|
|
1645
|
+
});
|
|
1646
|
+
let adaptiveEvictionTopicAwareEligibleClusters = 0;
|
|
1647
|
+
let adaptiveEvictionTopicAwareDroppedClusters = 0;
|
|
1648
|
+
let adaptiveEvictionProtectedClusters = 0;
|
|
1649
|
+
let adaptiveEvictionTopicIdCoveragePct = 0;
|
|
1650
|
+
let adaptiveEvictionBypassReason;
|
|
1651
|
+
let composeTopicSource = 'none';
|
|
1652
|
+
let composeTopicState = 'history-disabled';
|
|
1653
|
+
let composeTopicMessageCount = 0;
|
|
1654
|
+
let composeTopicStampedMessageCount = 0;
|
|
1458
1655
|
// Phase 0 fence enforcement: resolve the compaction fence for this conversation.
|
|
1459
1656
|
// All downstream message queries use this as a lower bound to exclude zombie
|
|
1460
1657
|
// messages below the fence that should have been compacted.
|
|
@@ -1506,6 +1703,27 @@ export class Compositor {
|
|
|
1506
1703
|
slots.identity = tokens;
|
|
1507
1704
|
remaining -= tokens;
|
|
1508
1705
|
}
|
|
1706
|
+
const repairNoticeContent = await this.getSlotContent(request.agentId, request.sessionKey, 'repair_notice', db);
|
|
1707
|
+
// ─── Warm-Restore Repair Notice (never suppressed) ─────────
|
|
1708
|
+
// If a session was reconstructed from a snapshot, the repair notice must
|
|
1709
|
+
// stay above restored conversation content even under budget pressure.
|
|
1710
|
+
// This mirrors the system/identity invariant: history and memory slots may
|
|
1711
|
+
// be trimmed, but the provenance notice is not optional operational state.
|
|
1712
|
+
if (repairNoticeContent) {
|
|
1713
|
+
const tokens = estimateTokens(repairNoticeContent);
|
|
1714
|
+
messages.push({
|
|
1715
|
+
role: 'system',
|
|
1716
|
+
textContent: repairNoticeContent,
|
|
1717
|
+
toolCalls: null,
|
|
1718
|
+
toolResults: null,
|
|
1719
|
+
metadata: { warmRestoreRepairNotice: true },
|
|
1720
|
+
});
|
|
1721
|
+
slots.system += tokens;
|
|
1722
|
+
remaining -= tokens;
|
|
1723
|
+
if (remaining < 0) {
|
|
1724
|
+
warnings.push('Warm-restore repair notice exceeded budget but was retained as non-suppressible system context');
|
|
1725
|
+
}
|
|
1726
|
+
}
|
|
1509
1727
|
// ─── Stable Output Profile Prefix ──────────────────────────
|
|
1510
1728
|
// Keep deterministic output instructions on the static side of the cache
|
|
1511
1729
|
// boundary so Anthropic and OpenAI warm-prefix caching can reuse them.
|
|
@@ -1577,8 +1795,10 @@ export class Compositor {
|
|
|
1577
1795
|
try {
|
|
1578
1796
|
const topicMap = new SessionTopicMap(db);
|
|
1579
1797
|
activeTopic = topicMap.getActiveTopic(request.sessionKey) || undefined;
|
|
1580
|
-
if (activeTopic)
|
|
1798
|
+
if (activeTopic) {
|
|
1581
1799
|
activeTopicId = activeTopic.id;
|
|
1800
|
+
composeTopicSource = 'session-topic-map';
|
|
1801
|
+
}
|
|
1582
1802
|
}
|
|
1583
1803
|
catch {
|
|
1584
1804
|
// Topic lookup is best-effort — fall back to full history
|
|
@@ -1586,6 +1806,7 @@ export class Compositor {
|
|
|
1586
1806
|
}
|
|
1587
1807
|
else {
|
|
1588
1808
|
activeTopicId = request.topicId;
|
|
1809
|
+
composeTopicSource = 'request-topic-id';
|
|
1589
1810
|
try {
|
|
1590
1811
|
activeTopic = db.prepare(`
|
|
1591
1812
|
SELECT id, name
|
|
@@ -1616,6 +1837,9 @@ export class Compositor {
|
|
|
1616
1837
|
}
|
|
1617
1838
|
return true;
|
|
1618
1839
|
});
|
|
1840
|
+
s09ObservedUserTurnCount = Math.max(s09ObservedUserTurnCount, historyMessages.filter(m => m.role === 'user').length);
|
|
1841
|
+
composeTopicMessageCount = historyMessages.length;
|
|
1842
|
+
composeTopicStampedMessageCount = historyMessages.filter(m => typeof m.topicId === 'string').length;
|
|
1619
1843
|
// ── Transform-first: apply gradient tool treatment BEFORE budget math ──
|
|
1620
1844
|
// All tool payloads are in their final form before any token estimation.
|
|
1621
1845
|
// This ensures estimateMessageTokens() measures actual submission cost,
|
|
@@ -1643,25 +1867,84 @@ export class Compositor {
|
|
|
1643
1867
|
// of raw config.historyFraction so history doesn't overflow MECW ceiling.
|
|
1644
1868
|
const historyBudget = Math.floor(budget * b4HistoryFraction);
|
|
1645
1869
|
const historyFillCap = Math.min(historyBudget, remaining);
|
|
1870
|
+
// 0.9.0: adaptive eviction ordering. For elevated/high/critical bands,
|
|
1871
|
+
// drop inactive-topic non-tool clusters first when an active topic is
|
|
1872
|
+
// known. Bootstrap/warmup/steady reproduce the historical newest-first
|
|
1873
|
+
// sweep exactly (preferTopicAwareDrop=false → evictedByPlan stays empty).
|
|
1874
|
+
const adaptiveOrdering = orderClustersForAdaptiveEviction(budgetClusters, evictionLifecyclePolicy, { activeTopicId });
|
|
1875
|
+
adaptiveEvictionTopicAwareEligibleClusters = adaptiveOrdering.telemetry.topicAwareEligibleClusters;
|
|
1876
|
+
adaptiveEvictionProtectedClusters = adaptiveOrdering.telemetry.protectedClusters;
|
|
1877
|
+
adaptiveEvictionTopicIdCoveragePct = adaptiveOrdering.telemetry.topicIdCoveragePct;
|
|
1878
|
+
adaptiveEvictionBypassReason = adaptiveOrdering.telemetry.bypassReason;
|
|
1879
|
+
if (!activeTopicId)
|
|
1880
|
+
composeTopicState = 'no-active-topic';
|
|
1881
|
+
else if (composeTopicStampedMessageCount === 0)
|
|
1882
|
+
composeTopicState = 'active-topic-missing-stamped-history';
|
|
1883
|
+
else
|
|
1884
|
+
composeTopicState = 'active-topic-ready';
|
|
1885
|
+
const evictedByPlan = new Set();
|
|
1886
|
+
let projectedTokens = budgetClusters.reduce((s, c) => s + c.tokenCost, 0);
|
|
1887
|
+
if (adaptiveOrdering.preferTopicAwareDrop
|
|
1888
|
+
&& adaptiveOrdering.topicAwareDropOrder.length > 0
|
|
1889
|
+
&& projectedTokens <= historyFillCap) {
|
|
1890
|
+
adaptiveEvictionBypassReason = 'within-budget';
|
|
1891
|
+
}
|
|
1892
|
+
if (adaptiveOrdering.preferTopicAwareDrop
|
|
1893
|
+
&& adaptiveOrdering.topicAwareDropOrder.length > 0
|
|
1894
|
+
&& projectedTokens > historyFillCap) {
|
|
1895
|
+
for (const idx of adaptiveOrdering.topicAwareDropOrder) {
|
|
1896
|
+
if (projectedTokens <= historyFillCap)
|
|
1897
|
+
break;
|
|
1898
|
+
if (adaptiveOrdering.protectedIndices.has(idx))
|
|
1899
|
+
continue;
|
|
1900
|
+
evictedByPlan.add(idx);
|
|
1901
|
+
projectedTokens -= budgetClusters[idx].tokenCost;
|
|
1902
|
+
}
|
|
1903
|
+
adaptiveEvictionTopicAwareDroppedClusters = evictedByPlan.size;
|
|
1904
|
+
}
|
|
1905
|
+
let truncationCutIndex = -1;
|
|
1646
1906
|
for (let i = budgetClusters.length - 1; i >= 0; i--) {
|
|
1907
|
+
if (evictedByPlan.has(i))
|
|
1908
|
+
continue;
|
|
1647
1909
|
const cluster = budgetClusters[i];
|
|
1648
1910
|
if (historyTokens + cluster.tokenCost > historyFillCap && includedClusters.length > 0) {
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1911
|
+
truncationCutIndex = i;
|
|
1912
|
+
break;
|
|
1913
|
+
}
|
|
1914
|
+
includedClusters.unshift(cluster);
|
|
1915
|
+
historyTokens += cluster.tokenCost;
|
|
1916
|
+
}
|
|
1917
|
+
if (truncationCutIndex >= 0 || evictedByPlan.size > 0) {
|
|
1918
|
+
const droppedIndices = [];
|
|
1919
|
+
if (truncationCutIndex >= 0) {
|
|
1920
|
+
for (let i = 0; i <= truncationCutIndex; i++) {
|
|
1921
|
+
if (!evictedByPlan.has(i))
|
|
1922
|
+
droppedIndices.push(i);
|
|
1655
1923
|
}
|
|
1924
|
+
}
|
|
1925
|
+
for (const idx of evictedByPlan)
|
|
1926
|
+
droppedIndices.push(idx);
|
|
1927
|
+
const droppedClusters = droppedIndices.map(i => budgetClusters[i]);
|
|
1928
|
+
const droppedMsgCount = droppedClusters.reduce((s, c) => s + c.messages.length, 0);
|
|
1929
|
+
const droppedToolResultCount = droppedClusters.reduce((sum, c) => sum + c.messages.filter(m => (m.toolResults?.length ?? 0) > 0).length, 0);
|
|
1930
|
+
if (droppedToolResultCount > 0) {
|
|
1931
|
+
c1CoEjections += droppedToolResultCount;
|
|
1932
|
+
console.info(`[hypermem:compositor] tool-chain co-eject reason=budget_cluster_drop count=${droppedToolResultCount} messages dropped`);
|
|
1933
|
+
}
|
|
1934
|
+
if (droppedMsgCount > 0) {
|
|
1656
1935
|
const c1Note = droppedToolResultCount > 0
|
|
1657
1936
|
? ` [C1: ${droppedToolResultCount} co-ejected reason=budget_cluster_drop]`
|
|
1658
1937
|
: '';
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1938
|
+
const planNote = evictedByPlan.size > 0
|
|
1939
|
+
? ` [adaptive: band=${evictionLifecyclePolicy.band} topic-aware-dropped=${evictedByPlan.size}]`
|
|
1940
|
+
: '';
|
|
1941
|
+
const cutLabel = truncationCutIndex >= 0
|
|
1942
|
+
? `${truncationCutIndex + 1}/${budgetClusters.length}`
|
|
1943
|
+
: `0/${budgetClusters.length}`;
|
|
1944
|
+
warnings.push(`History truncated at cluster ${cutLabel} (${droppedMsgCount} messages dropped)${c1Note}${planNote}`);
|
|
1945
|
+
if (truncationCutIndex >= 0)
|
|
1946
|
+
s4RescueTrimFired = true;
|
|
1662
1947
|
}
|
|
1663
|
-
includedClusters.unshift(cluster);
|
|
1664
|
-
historyTokens += cluster.tokenCost;
|
|
1665
1948
|
}
|
|
1666
1949
|
const includedHistory = includedClusters.flatMap(c => c.messages);
|
|
1667
1950
|
// ── Keystone History Slot (P2.1) ──────────────────────────────────
|
|
@@ -1829,6 +2112,10 @@ export class Compositor {
|
|
|
1829
2112
|
let diagFingerprintDedups = 0;
|
|
1830
2113
|
let diagFingerprintCollisions = 0;
|
|
1831
2114
|
let diagRetrievalMode = 'none';
|
|
2115
|
+
// Sprint 1: reranker telemetry captured from hybridSearch via onRerankerTelemetry
|
|
2116
|
+
let diagRerankerStatus;
|
|
2117
|
+
let diagRerankerCandidates;
|
|
2118
|
+
let diagRerankerProvider;
|
|
1832
2119
|
function normalizeFingerprintText(text) {
|
|
1833
2120
|
return text.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
1834
2121
|
}
|
|
@@ -2104,6 +2391,26 @@ export class Compositor {
|
|
|
2104
2391
|
// Use request.prompt as the retrieval query when available — it is the
|
|
2105
2392
|
// live current-turn text. Falling back to getLastUserMessage(messages)
|
|
2106
2393
|
// reads from the already-assembled history, which is one turn stale.
|
|
2394
|
+
// 0.9.0: resolve adaptive lifecycle policy immediately before semantic recall
|
|
2395
|
+
// so smartRecallMultiplier scales the recall token budget and candidate limit
|
|
2396
|
+
// from the same policy object that compose diagnostics later report.
|
|
2397
|
+
const composePreRecallPressure = computeUnifiedPressure(contextTokens, budget, PRESSURE_SOURCE.COMPOSE_PRE_RECALL);
|
|
2398
|
+
const s09ComposePolicyPressure = s09ForkedContextSeed
|
|
2399
|
+
&& s09ObservedUserTurnCount === 0
|
|
2400
|
+
&& s09ForkedParentPressure != null
|
|
2401
|
+
? s09ForkedParentPressure
|
|
2402
|
+
: composePreRecallPressure.fraction;
|
|
2403
|
+
const composeLifecyclePolicy = resolveAdaptiveLifecyclePolicy({
|
|
2404
|
+
pressureFraction: s09ComposePolicyPressure,
|
|
2405
|
+
userTurnCount: s09ObservedUserTurnCount,
|
|
2406
|
+
explicitNewSession: isExplicitNewSessionPrompt(request.prompt ?? this.getLastUserMessage(messages)),
|
|
2407
|
+
forkedContext: Boolean(s09ForkedContextSeed),
|
|
2408
|
+
forkedParentPressureFraction: s09ForkedParentPressure,
|
|
2409
|
+
forkedParentUserTurnCount: s09ForkedContextSeed?.parentUserTurnCount,
|
|
2410
|
+
});
|
|
2411
|
+
const recallBreadth = scaleRecallBreadth(remaining, composeLifecyclePolicy.smartRecallMultiplier);
|
|
2412
|
+
let diagAdaptiveRecallBudgetTokens;
|
|
2413
|
+
let diagAdaptiveRecallCandidateLimit;
|
|
2107
2414
|
if (request.includeSemanticRecall !== false && remaining > 500 && (this.vectorStore || libDb)) {
|
|
2108
2415
|
const lastUserMsg = request.prompt?.trim() || this.getLastUserMessage(messages);
|
|
2109
2416
|
if (lastUserMsg) {
|
|
@@ -2118,9 +2425,17 @@ export class Compositor {
|
|
|
2118
2425
|
catch {
|
|
2119
2426
|
// Redis lookup is best-effort — fall through to Ollama
|
|
2120
2427
|
}
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2428
|
+
diagAdaptiveRecallBudgetTokens = recallBreadth.mainBudgetTokens;
|
|
2429
|
+
diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
|
|
2430
|
+
const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId,
|
|
2431
|
+
// 0.9.0: recall token budget = base 0.12 of remaining * lifecycle multiplier.
|
|
2432
|
+
recallBreadth.mainBudgetTokens, libDb || undefined, precomputedEmbedding, contextFingerprints, // C2: skip results already in Active Facts
|
|
2433
|
+
// Sprint 1: capture reranker telemetry at assemble level
|
|
2434
|
+
(ev) => {
|
|
2435
|
+
diagRerankerStatus = ev.status;
|
|
2436
|
+
diagRerankerCandidates = ev.candidates;
|
|
2437
|
+
diagRerankerProvider = ev.provider;
|
|
2438
|
+
}, recallBreadth.candidateLimit);
|
|
2124
2439
|
if (semanticContent) {
|
|
2125
2440
|
const tokens = estimateTokens(semanticContent);
|
|
2126
2441
|
volatileContextParts.push(`## Related Memory\n${semanticContent}`);
|
|
@@ -2256,15 +2571,21 @@ export class Compositor {
|
|
|
2256
2571
|
volatileContextParts.push(docParts.join('\n\n'));
|
|
2257
2572
|
}
|
|
2258
2573
|
}
|
|
2259
|
-
else if (remaining > 400 && (this.vectorStore || libDb)) {
|
|
2574
|
+
else if (request.includeSemanticRecall !== false && remaining > 400 && (this.vectorStore || libDb)) {
|
|
2260
2575
|
// Trigger-miss fallback: no trigger fired — attempt bounded semantic retrieval
|
|
2261
2576
|
// so there is never a silent zero-memory path on doc chunks.
|
|
2262
2577
|
// INVARIANT: this block is mutually exclusive with triggered-retrieval above.
|
|
2263
2578
|
// If refactored to run both paths, cap combined semantic budget to avoid double-recall.
|
|
2264
2579
|
try {
|
|
2580
|
+
// 0.9.0: trigger-miss fallback uses the same lifecycle-scaled breadth so
|
|
2581
|
+
// a /new surge widens fallback recall and high/critical pressure narrows it.
|
|
2582
|
+
if (diagAdaptiveRecallBudgetTokens === undefined) {
|
|
2583
|
+
diagAdaptiveRecallBudgetTokens = recallBreadth.fallbackBudgetTokens;
|
|
2584
|
+
diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
|
|
2585
|
+
}
|
|
2265
2586
|
const fallbackContent = await Promise.race([
|
|
2266
|
-
this.buildSemanticRecall(lastMsg, request.agentId,
|
|
2267
|
-
),
|
|
2587
|
+
this.buildSemanticRecall(lastMsg, request.agentId, recallBreadth.fallbackBudgetTokens, libDb || undefined, undefined, contextFingerprints, // C2: skip results already in Active Facts
|
|
2588
|
+
undefined, recallBreadth.candidateLimit),
|
|
2268
2589
|
new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
|
|
2269
2590
|
]);
|
|
2270
2591
|
if (fallbackContent) {
|
|
@@ -2365,7 +2686,23 @@ export class Compositor {
|
|
|
2365
2686
|
messages.splice(stableInsertIdx, 0, ...stablePrefixMessages);
|
|
2366
2687
|
}
|
|
2367
2688
|
// ── Inject assembled context block ──────────────────────
|
|
2689
|
+
// Sprint 4: Prompt-tail placement.
|
|
2690
|
+
// Volatile context (active facts, temporal, open-domain, semantic recall,
|
|
2691
|
+
// doc chunks, cross-session) moves AFTER all history messages so that
|
|
2692
|
+
// query-shaped material lands near the user turn rather than buried mid-prompt.
|
|
2693
|
+
//
|
|
2694
|
+
// Layout after Sprint 4:
|
|
2695
|
+
// [stable prefix: system, identity, FOS/MOD, stable facts, knowledge, prefs]
|
|
2696
|
+
// [history: keystones, cross-topic, recent conversation messages]
|
|
2697
|
+
// [volatile context block ← here, at the tail] ← Sprint 4 reorder
|
|
2698
|
+
// [last user message]
|
|
2699
|
+
//
|
|
2700
|
+
// The cache boundary (dynamicBoundary: true) stays on this block so the
|
|
2701
|
+
// Anthropic/OpenAI cache-prefix logic still fires correctly — everything
|
|
2702
|
+
// ABOVE this message is the stable prefix eligible for caching.
|
|
2368
2703
|
const assembledContextBlock = volatileContextParts.length > 0 ? volatileContextParts.join('\n\n') : undefined;
|
|
2704
|
+
let s4VolatileContextPosition;
|
|
2705
|
+
let s4MessagesBeforeVolatile;
|
|
2369
2706
|
if (assembledContextBlock) {
|
|
2370
2707
|
const contextMsg = {
|
|
2371
2708
|
role: 'system',
|
|
@@ -2377,7 +2714,23 @@ export class Compositor {
|
|
|
2377
2714
|
// everything at or below it is per-session / per-turn context.
|
|
2378
2715
|
metadata: { dynamicBoundary: true, cacheBoundarySlot: CACHE_PREFIX_BOUNDARY_SLOT },
|
|
2379
2716
|
};
|
|
2380
|
-
|
|
2717
|
+
// Sprint 4: Insert at tail (end of messages array), AFTER history.
|
|
2718
|
+
// The last user message (if any) should remain the final message, so we
|
|
2719
|
+
// insert the volatile block just before the last user message.
|
|
2720
|
+
const lastMsgIdx = messages.length - 1;
|
|
2721
|
+
const lastMsg = lastMsgIdx >= 0 ? messages[lastMsgIdx] : undefined;
|
|
2722
|
+
if (lastMsg && lastMsg.role === 'user') {
|
|
2723
|
+
// Insert volatile block before the last user message so user turn stays last
|
|
2724
|
+
messages.splice(lastMsgIdx, 0, contextMsg);
|
|
2725
|
+
s4VolatileContextPosition = lastMsgIdx;
|
|
2726
|
+
s4MessagesBeforeVolatile = lastMsgIdx;
|
|
2727
|
+
}
|
|
2728
|
+
else {
|
|
2729
|
+
// No trailing user message — append at end
|
|
2730
|
+
messages.push(contextMsg);
|
|
2731
|
+
s4VolatileContextPosition = messages.length - 1;
|
|
2732
|
+
s4MessagesBeforeVolatile = messages.length - 1;
|
|
2733
|
+
}
|
|
2381
2734
|
}
|
|
2382
2735
|
const stablePrefix = getStablePrefixMessages(messages);
|
|
2383
2736
|
const prefixSegmentCount = stablePrefix.length;
|
|
@@ -2404,6 +2757,9 @@ export class Compositor {
|
|
|
2404
2757
|
let trimCount = 0;
|
|
2405
2758
|
// Collect indices of messages to eject before mutating the array.
|
|
2406
2759
|
// Walk forward from the first non-system message, trimming oldest first.
|
|
2760
|
+
// Sprint 4: Skip the volatile context block (dynamicBoundary: true) — it
|
|
2761
|
+
// is query-shaped content that should not be evicted during the safety
|
|
2762
|
+
// valve pass. The stable prefix system messages are also protected (role=system).
|
|
2407
2763
|
const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
|
|
2408
2764
|
const ejectIndices = new Set();
|
|
2409
2765
|
if (firstNonSystemIdx >= 0) {
|
|
@@ -2412,6 +2768,12 @@ export class Compositor {
|
|
|
2412
2768
|
// Don't trim the last user message (current prompt).
|
|
2413
2769
|
if (i === messages.length - 1 && messages[i].role === 'user')
|
|
2414
2770
|
break;
|
|
2771
|
+
// Sprint 4: Don't trim the volatile context block (dynamicBoundary marker).
|
|
2772
|
+
const meta = messages[i].metadata;
|
|
2773
|
+
if (meta?.dynamicBoundary) {
|
|
2774
|
+
i++;
|
|
2775
|
+
continue;
|
|
2776
|
+
}
|
|
2415
2777
|
const msgTokens = estimateMessageTokens(messages[i]);
|
|
2416
2778
|
ejectIndices.add(i);
|
|
2417
2779
|
trimmed += msgTokens;
|
|
@@ -2455,6 +2817,8 @@ export class Compositor {
|
|
|
2455
2817
|
}
|
|
2456
2818
|
}
|
|
2457
2819
|
const totalTokens = budget - remaining;
|
|
2820
|
+
// Sprint 3: Unified pressure signal — compose path
|
|
2821
|
+
const s3Pressure = computeUnifiedPressure(totalTokens, budget, PRESSURE_SOURCE.COMPOSE_POST_ASSEMBLY);
|
|
2458
2822
|
// ─── Slot reconciliation ─────────────────────────────────────────────────
|
|
2459
2823
|
// totalTokens = budget - remaining is the authoritative spend figure.
|
|
2460
2824
|
// The slot accounting can drift from this due to history trim (which
|
|
@@ -2475,10 +2839,33 @@ export class Compositor {
|
|
|
2475
2839
|
// Record the oldest message ID that the LLM can see in this compose
|
|
2476
2840
|
// cycle. Everything below this ID becomes eligible for compaction.
|
|
2477
2841
|
// If history was included, query the DB for the oldest included message.
|
|
2842
|
+
//
|
|
2843
|
+
// Sprint 1: Capture compaction eligibility counts BEFORE updating the fence
|
|
2844
|
+
// so we can report how many messages were eligible at the start of this pass.
|
|
2845
|
+
let diagCompactionEligibleCount;
|
|
2846
|
+
let diagCompactionEligibleRatio;
|
|
2847
|
+
let diagCompactionProcessedCount;
|
|
2478
2848
|
if (request.includeHistory !== false && slots.history > 0) {
|
|
2479
2849
|
try {
|
|
2480
2850
|
const conversation = store.getConversation(request.sessionKey);
|
|
2481
2851
|
if (conversation) {
|
|
2852
|
+
// Sprint 1: read eligibility BEFORE advancing the fence
|
|
2853
|
+
try {
|
|
2854
|
+
ensureCompactionFenceSchema(db);
|
|
2855
|
+
const eligibilityBefore = getCompactionEligibility(db, conversation.id);
|
|
2856
|
+
if (eligibilityBefore.fence !== null) {
|
|
2857
|
+
// Total messages below fence (denominator for ratio)
|
|
2858
|
+
const totalRow = db.prepare('SELECT COUNT(*) AS cnt FROM messages WHERE conversation_id = ?').get(conversation.id);
|
|
2859
|
+
const totalMessages = totalRow?.cnt ?? 0;
|
|
2860
|
+
diagCompactionEligibleCount = eligibilityBefore.eligibleCount;
|
|
2861
|
+
diagCompactionEligibleRatio = totalMessages > 0
|
|
2862
|
+
? Math.round((eligibilityBefore.eligibleCount / totalMessages) * 1000) / 1000
|
|
2863
|
+
: 0;
|
|
2864
|
+
}
|
|
2865
|
+
}
|
|
2866
|
+
catch {
|
|
2867
|
+
// Eligibility query is best-effort
|
|
2868
|
+
}
|
|
2482
2869
|
// The compositor included N history messages (after truncation).
|
|
2483
2870
|
// Count how many non-system messages are in the output to determine
|
|
2484
2871
|
// how far back we reached.
|
|
@@ -2494,8 +2881,18 @@ export class Compositor {
|
|
|
2494
2881
|
LIMIT 1 OFFSET ?
|
|
2495
2882
|
`).get(conversation.id, historyMsgCount - 1);
|
|
2496
2883
|
if (oldestIncluded) {
|
|
2497
|
-
ensureCompactionFenceSchema(db);
|
|
2498
2884
|
updateCompactionFence(db, conversation.id, oldestIncluded.id, { minTailMessages: 8 });
|
|
2885
|
+
// Sprint 1: count how many messages moved from eligible -> fence-protected
|
|
2886
|
+
// (i.e. they are now above the updated fence)
|
|
2887
|
+
try {
|
|
2888
|
+
const eligibilityAfter = getCompactionEligibility(db, conversation.id);
|
|
2889
|
+
if (diagCompactionEligibleCount !== undefined) {
|
|
2890
|
+
diagCompactionProcessedCount = Math.max(0, diagCompactionEligibleCount - eligibilityAfter.eligibleCount);
|
|
2891
|
+
}
|
|
2892
|
+
}
|
|
2893
|
+
catch {
|
|
2894
|
+
// After-eligibility query is best-effort
|
|
2895
|
+
}
|
|
2499
2896
|
}
|
|
2500
2897
|
}
|
|
2501
2898
|
}
|
|
@@ -2526,6 +2923,70 @@ export class Compositor {
|
|
|
2526
2923
|
zeroResultReason = 'empty_corpus';
|
|
2527
2924
|
}
|
|
2528
2925
|
}
|
|
2926
|
+
// ── Sprint 4: Explicit budget lanes ───────────────────────────────────────────────
|
|
2927
|
+
// Compute allocated token lanes for this compose pass.
|
|
2928
|
+
// Budget = effective input budget (post-reserve).
|
|
2929
|
+
// Filled values reflect actual spend after slot fill and safety-valve trim.
|
|
2930
|
+
const s4HistoryLane = Math.floor(budget * b4HistoryFraction);
|
|
2931
|
+
const s4MemoryLane = Math.floor(budget * b4MemoryFraction);
|
|
2932
|
+
const s4StableFilledTokens = (slots.system ?? 0) + (slots.identity ?? 0);
|
|
2933
|
+
const s4HistoryFilledTokens = slots.history ?? 0;
|
|
2934
|
+
const s4MemoryFilledTokens = (slots.facts ?? 0) + (slots.context ?? 0) + (slots.library ?? 0);
|
|
2935
|
+
const s4TotalFilled = s4StableFilledTokens + s4HistoryFilledTokens + s4MemoryFilledTokens;
|
|
2936
|
+
const budgetLanes = {
|
|
2937
|
+
effectiveBudget: budget,
|
|
2938
|
+
stablePrefix: slots.system + slots.identity,
|
|
2939
|
+
history: s4HistoryLane,
|
|
2940
|
+
memory: s4MemoryLane,
|
|
2941
|
+
historyFraction: b4HistoryFraction,
|
|
2942
|
+
memoryFraction: b4MemoryFraction,
|
|
2943
|
+
overhead: Math.max(0, budget - s4TotalFilled),
|
|
2944
|
+
filled: {
|
|
2945
|
+
stablePrefix: s4StableFilledTokens,
|
|
2946
|
+
history: s4HistoryFilledTokens,
|
|
2947
|
+
memory: s4MemoryFilledTokens,
|
|
2948
|
+
},
|
|
2949
|
+
};
|
|
2950
|
+
// ── Sprint 4: OpenAI prefix-cache diagnostics ────────────────────────────────────
|
|
2951
|
+
// Expose prefix-boundary information for OpenAI providers so operators
|
|
2952
|
+
// can tune prompt layout for cache hit rate without guesswork.
|
|
2953
|
+
// Non-fatal — never block compose.
|
|
2954
|
+
let openaiPrefixCacheDiag;
|
|
2955
|
+
try {
|
|
2956
|
+
const s4Provider = s4DetectProvider(request.provider ?? request.model);
|
|
2957
|
+
if (s4Provider === 'openai' || s4Provider === 'openai-responses') {
|
|
2958
|
+
const totalWindowTokens = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
|
2959
|
+
const cacheableFraction = totalWindowTokens > 0
|
|
2960
|
+
? Math.round((prefixTokens / totalWindowTokens) * 1000) / 1000
|
|
2961
|
+
: 0;
|
|
2962
|
+
// Sprint 4: volatileAtTail is true when the volatile context block is
|
|
2963
|
+
// positioned AFTER any history (or, vacuously, when no history exists and
|
|
2964
|
+
// the block sits just before the final user turn). In both cases nothing
|
|
2965
|
+
// but the current user message follows the boundary, which is the
|
|
2966
|
+
// cacheable layout. When assembledContextBlock is missing we report
|
|
2967
|
+
// false since there is nothing to place at tail.
|
|
2968
|
+
let s4VolatileAtTail = false;
|
|
2969
|
+
if (s4VolatileContextPosition !== undefined) {
|
|
2970
|
+
// Any messages after the boundary must be user turns only (no history).
|
|
2971
|
+
const tail = messages.slice(s4VolatileContextPosition + 1);
|
|
2972
|
+
s4VolatileAtTail = tail.every(m => m.role === 'user')
|
|
2973
|
+
&& s4VolatileContextPosition >= prefixSegmentCount;
|
|
2974
|
+
}
|
|
2975
|
+
openaiPrefixCacheDiag = {
|
|
2976
|
+
stablePrefixMessageCount: prefixSegmentCount,
|
|
2977
|
+
stablePrefixTokens: prefixTokens,
|
|
2978
|
+
volatileAtTail: s4VolatileAtTail,
|
|
2979
|
+
cacheableFraction,
|
|
2980
|
+
prefixHash,
|
|
2981
|
+
};
|
|
2982
|
+
}
|
|
2983
|
+
}
|
|
2984
|
+
catch {
|
|
2985
|
+
// Provider detection is best-effort — never block compose
|
|
2986
|
+
}
|
|
2987
|
+
// 0.9.0: lifecycle policy was resolved pre-recall and used to scale recall
|
|
2988
|
+
// breadth. Diagnostics surface the same object so reported band/multiplier
|
|
2989
|
+
// matches what actually controlled retrieval this compose pass.
|
|
2529
2990
|
const diagnostics = {
|
|
2530
2991
|
triggerHits: diagTriggerHits,
|
|
2531
2992
|
triggerFallbackUsed: diagTriggerFallbackUsed,
|
|
@@ -2555,6 +3016,14 @@ export class Compositor {
|
|
|
2555
3016
|
historyDepthChosen: s4EffectiveDepth,
|
|
2556
3017
|
estimatedMsgDensityTokens: s4ObservedDensity,
|
|
2557
3018
|
rescueTrimFired: s4RescueTrimFired,
|
|
3019
|
+
// Sprint 4: prompt-tail placement diagnostics
|
|
3020
|
+
budgetLanes,
|
|
3021
|
+
volatileContextPosition: s4VolatileContextPosition,
|
|
3022
|
+
messagesBeforeVolatile: s4MessagesBeforeVolatile,
|
|
3023
|
+
openaiPrefixCacheDiag,
|
|
3024
|
+
// Sprint 3: unified pressure signal
|
|
3025
|
+
sessionPressureFraction: s3Pressure.fraction,
|
|
3026
|
+
pressureSource: s3Pressure.source,
|
|
2558
3027
|
// B4: model-aware lane budget diagnostics
|
|
2559
3028
|
mecwProfile: b4MecwProfile,
|
|
2560
3029
|
mecwApplied: b4MecwApplied,
|
|
@@ -2564,6 +3033,37 @@ export class Compositor {
|
|
|
2564
3033
|
trimSoftTarget: TRIM_BUDGET_POLICY.trimSoftTarget,
|
|
2565
3034
|
trimGrowthThreshold: TRIM_BUDGET_POLICY.trimGrowthThreshold,
|
|
2566
3035
|
trimHeadroomFraction: TRIM_BUDGET_POLICY.trimHeadroomFraction,
|
|
3036
|
+
// 0.9.0: adaptive lifecycle diagnostics for compose.preRecall
|
|
3037
|
+
adaptiveLifecycleBand: composeLifecyclePolicy.band,
|
|
3038
|
+
adaptiveLifecyclePressurePct: composeLifecyclePolicy.pressurePct,
|
|
3039
|
+
adaptiveWarmHistoryBudgetFraction: composeLifecyclePolicy.warmHistoryBudgetFraction,
|
|
3040
|
+
adaptiveSmartRecallMultiplier: composeLifecyclePolicy.smartRecallMultiplier,
|
|
3041
|
+
adaptiveTrimSoftTarget: composeLifecyclePolicy.trimSoftTarget,
|
|
3042
|
+
adaptiveCompactionTargetFraction: composeLifecyclePolicy.compactionTargetFraction,
|
|
3043
|
+
adaptiveBreadcrumbPackage: composeLifecyclePolicy.emitBreadcrumbPackage,
|
|
3044
|
+
adaptiveTopicCentroidEviction: composeLifecyclePolicy.enableTopicCentroidEviction,
|
|
3045
|
+
adaptiveProactiveCompaction: composeLifecyclePolicy.triggerProactiveCompaction,
|
|
3046
|
+
adaptiveLifecycleReasons: composeLifecyclePolicy.reasons,
|
|
3047
|
+
adaptiveRecallBudgetTokens: diagAdaptiveRecallBudgetTokens,
|
|
3048
|
+
adaptiveRecallCandidateLimit: diagAdaptiveRecallCandidateLimit,
|
|
3049
|
+
adaptiveEvictionLifecycleBand: evictionLifecyclePolicy.band,
|
|
3050
|
+
adaptiveEvictionPressurePct: evictionLifecyclePolicy.pressurePct,
|
|
3051
|
+
adaptiveEvictionTopicAwareEligibleClusters,
|
|
3052
|
+
adaptiveEvictionTopicAwareDroppedClusters,
|
|
3053
|
+
adaptiveEvictionProtectedClusters,
|
|
3054
|
+
adaptiveEvictionTopicIdCoveragePct,
|
|
3055
|
+
adaptiveEvictionBypassReason,
|
|
3056
|
+
composeTopicSource,
|
|
3057
|
+
composeTopicState,
|
|
3058
|
+
composeTopicMessageCount,
|
|
3059
|
+
composeTopicStampedMessageCount,
|
|
3060
|
+
composeTopicTelemetryStatus: 'emitted',
|
|
3061
|
+
adaptiveLifecycleBandDiverged: evictionLifecyclePolicy.band !== composeLifecyclePolicy.band,
|
|
3062
|
+
adaptiveForkedContext: s09ForkedContextSeed ? true : undefined,
|
|
3063
|
+
adaptiveForkedParentPressurePct: s09ForkedParentPressure != null
|
|
3064
|
+
? Math.round(s09ForkedParentPressure * 100)
|
|
3065
|
+
: undefined,
|
|
3066
|
+
adaptiveForkedParentUserTurns: s09ForkedContextSeed?.parentUserTurnCount,
|
|
2567
3067
|
// C1: tool-chain ejection telemetry
|
|
2568
3068
|
toolChainCoEjections: c1CoEjections > 0 ? c1CoEjections : undefined,
|
|
2569
3069
|
toolChainStubReplacements: c1StubReplacements > 0 ? c1StubReplacements : undefined,
|
|
@@ -2574,6 +3074,23 @@ export class Compositor {
|
|
|
2574
3074
|
artifactsHydrated: hydrationResult.artifactsHydrated > 0 ? hydrationResult.artifactsHydrated : undefined,
|
|
2575
3075
|
hydrationBytes: hydrationResult.hydrationBytes > 0 ? hydrationResult.hydrationBytes : undefined,
|
|
2576
3076
|
hydrationMisses: hydrationResult.hydrationMisses > 0 ? hydrationResult.hydrationMisses : undefined,
|
|
3077
|
+
// Sprint 1: observability layer
|
|
3078
|
+
rerankerStatus: diagRerankerStatus,
|
|
3079
|
+
rerankerCandidates: diagRerankerCandidates,
|
|
3080
|
+
rerankerProvider: diagRerankerProvider,
|
|
3081
|
+
// Sprint 1: named slot spans (allocated vs filled, overflow flag)
|
|
3082
|
+
slotSpans: {
|
|
3083
|
+
system: { allocated: slots.system, filled: slots.system, overflow: false },
|
|
3084
|
+
identity: { allocated: slots.identity, filled: slots.identity, overflow: false },
|
|
3085
|
+
history: { allocated: Math.floor(budget * b4HistoryFraction), filled: slots.history, overflow: slots.history > Math.floor(budget * b4HistoryFraction) },
|
|
3086
|
+
facts: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.facts, overflow: false },
|
|
3087
|
+
context: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.context, overflow: false },
|
|
3088
|
+
library: { allocated: Math.floor(budget * b4MemoryFraction), filled: slots.library, overflow: false },
|
|
3089
|
+
},
|
|
3090
|
+
// Sprint 1: compaction eligibility
|
|
3091
|
+
compactionEligibleCount: diagCompactionEligibleCount,
|
|
3092
|
+
compactionEligibleRatio: diagCompactionEligibleRatio,
|
|
3093
|
+
compactionProcessedCount: diagCompactionProcessedCount,
|
|
2577
3094
|
};
|
|
2578
3095
|
if (pressureHigh) {
|
|
2579
3096
|
warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
|
|
@@ -2659,6 +3176,33 @@ export class Compositor {
|
|
|
2659
3176
|
// Cursor write is best-effort
|
|
2660
3177
|
}
|
|
2661
3178
|
}
|
|
3179
|
+
try {
|
|
3180
|
+
const conversation = sampleConv ?? store.getConversation(request.sessionKey);
|
|
3181
|
+
if (conversation) {
|
|
3182
|
+
const snapshotContext = getOrCreateActiveContext(db, request.agentId, request.sessionKey, conversation.id);
|
|
3183
|
+
const repairNoticeContent = await this.cache.getSlot(request.agentId, request.sessionKey, 'repair_notice');
|
|
3184
|
+
insertCompositionSnapshot(db, {
|
|
3185
|
+
contextId: snapshotContext.id,
|
|
3186
|
+
headMessageId: snapshotContext.headMessageId ?? null,
|
|
3187
|
+
model: request.model ?? request.provider ?? 'unknown',
|
|
3188
|
+
contextWindow: totalWindow,
|
|
3189
|
+
totalTokens,
|
|
3190
|
+
fillPct: totalWindow > 0 ? Math.round((totalTokens / totalWindow) * 10000) / 10000 : 0,
|
|
3191
|
+
snapshotKind: 'composed_window',
|
|
3192
|
+
repairDepth: repairNoticeContent ? MAX_WARM_RESTORE_REPAIR_DEPTH : 0,
|
|
3193
|
+
slots: buildCompositionSnapshotSlots({
|
|
3194
|
+
system: systemContent,
|
|
3195
|
+
identity: identityContent,
|
|
3196
|
+
repairNotice: repairNoticeContent,
|
|
3197
|
+
messages,
|
|
3198
|
+
contextBlock: assembledContextBlock,
|
|
3199
|
+
}),
|
|
3200
|
+
});
|
|
3201
|
+
}
|
|
3202
|
+
}
|
|
3203
|
+
catch (error) {
|
|
3204
|
+
console.warn(`[hypermem:compositor] composition snapshot write skipped: ${error.message}`);
|
|
3205
|
+
}
|
|
2662
3206
|
console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones} c2_degradations=${c2ArtifactDegradations} c2_threshold=${c2ArtifactThresholdTokens}`);
|
|
2663
3207
|
return {
|
|
2664
3208
|
messages: outputMessages,
|
|
@@ -2684,10 +3228,15 @@ export class Compositor {
|
|
|
2684
3228
|
// Uses context.head_message_id to walk only the active branch.
|
|
2685
3229
|
let activeContext = null;
|
|
2686
3230
|
try {
|
|
2687
|
-
activeContext =
|
|
3231
|
+
activeContext = getOrCreateActiveContext(db, agentId, sessionKey, conversation.id);
|
|
2688
3232
|
}
|
|
2689
3233
|
catch {
|
|
2690
|
-
|
|
3234
|
+
try {
|
|
3235
|
+
activeContext = getActiveContext(db, agentId, sessionKey);
|
|
3236
|
+
}
|
|
3237
|
+
catch {
|
|
3238
|
+
// Context resolution is best-effort
|
|
3239
|
+
}
|
|
2691
3240
|
}
|
|
2692
3241
|
// Phase 0 fence enforcement: resolve compaction fence for warm bootstrap.
|
|
2693
3242
|
// Fence remains as transitional safety — primary scoping is via DAG walk.
|
|
@@ -2701,6 +3250,97 @@ export class Compositor {
|
|
|
2701
3250
|
catch {
|
|
2702
3251
|
// Fence lookup is best-effort
|
|
2703
3252
|
}
|
|
3253
|
+
const warmMeta = {
|
|
3254
|
+
agentId,
|
|
3255
|
+
sessionKey,
|
|
3256
|
+
provider: conversation.provider,
|
|
3257
|
+
model: conversation.model,
|
|
3258
|
+
channelType: conversation.channelType,
|
|
3259
|
+
tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
|
|
3260
|
+
lastActive: conversation.updatedAt,
|
|
3261
|
+
status: conversation.status,
|
|
3262
|
+
};
|
|
3263
|
+
if (activeContext) {
|
|
3264
|
+
const warnSnapshotVerifyFallback = (reason, detail) => {
|
|
3265
|
+
const detailSuffix = detail ? ` ${detail}` : '';
|
|
3266
|
+
console.warn(`[hypermem:compositor] warm snapshot verify fallback session=${sessionKey} reason=${reason} verify_fallback_count=1 cold_rewarm_count=1${detailSuffix}`);
|
|
3267
|
+
};
|
|
3268
|
+
try {
|
|
3269
|
+
const snapshotCandidates = listCompositionSnapshots(db, activeContext.id, 2);
|
|
3270
|
+
const latestSnapshot = getLatestValidCompositionSnapshot(db, activeContext.id);
|
|
3271
|
+
if (latestSnapshot?.verification.slots) {
|
|
3272
|
+
const targetModel = opts?.model ?? conversation.model ?? 'unknown';
|
|
3273
|
+
const sourceModel = latestSnapshot.snapshot.model;
|
|
3274
|
+
const sourceProvider = s4DetectProvider(sourceModel);
|
|
3275
|
+
const targetProvider = s4DetectProvider(conversation.provider ?? targetModel);
|
|
3276
|
+
const restored = restoreWarmSnapshotState(latestSnapshot.verification.slots, {
|
|
3277
|
+
sourceProvider,
|
|
3278
|
+
targetProvider,
|
|
3279
|
+
});
|
|
3280
|
+
if (restored) {
|
|
3281
|
+
if (!restored.diagnostics.rolloutGatePassed) {
|
|
3282
|
+
const gateSummary = restored.diagnostics.rolloutGateViolations
|
|
3283
|
+
.map(violation => `${violation.gate}=${violation.actual}/${violation.max}`)
|
|
3284
|
+
.join(', ');
|
|
3285
|
+
console.warn(`[hypermem:compositor] warm snapshot rollout gate blocked session=${sessionKey} snapshot=${latestSnapshot.snapshot.id} violations=${JSON.stringify(gateSummary)} verify_fallback_count=${latestSnapshot.fallbackUsed ? 1 : 0} cold_rewarm_count=1`);
|
|
3286
|
+
warnSnapshotVerifyFallback('rollout_gate_blocked', `snapshot=${latestSnapshot.snapshot.id} violations=${JSON.stringify(gateSummary)}`);
|
|
3287
|
+
}
|
|
3288
|
+
else {
|
|
3289
|
+
if (latestSnapshot.fallbackUsed) {
|
|
3290
|
+
console.warn(`[hypermem:compositor] warm snapshot verify fallback session=${sessionKey} restored_snapshot=${latestSnapshot.snapshot.id} verify_fallback_count=1 cold_rewarm_count=0 reason=latest_snapshot_invalid_or_unverifiable`);
|
|
3291
|
+
}
|
|
3292
|
+
const repairNoticeLines = [
|
|
3293
|
+
`Repair notice: this session is a repaired continuation from snapshot ${latestSnapshot.snapshot.id}.`,
|
|
3294
|
+
`Source model: ${sourceModel}. Target model: ${targetModel}.`,
|
|
3295
|
+
`Source provider: ${sourceProvider}. Target provider: ${targetProvider}.`,
|
|
3296
|
+
`Cross-model boundary: ${sourceModel !== targetModel ? 'yes' : 'no'}.`,
|
|
3297
|
+
`Cross-provider boundary: ${restored.diagnostics.crossProviderBoundary ? 'yes' : 'no'}.`,
|
|
3298
|
+
`Repair depth: ${MAX_WARM_RESTORE_REPAIR_DEPTH}.`
|
|
3299
|
+
];
|
|
3300
|
+
if (latestSnapshot.fallbackUsed) {
|
|
3301
|
+
repairNoticeLines.push('Snapshot verify fallback count: 1.');
|
|
3302
|
+
}
|
|
3303
|
+
if (restored.diagnostics.quotedAssistantTurns > 0) {
|
|
3304
|
+
repairNoticeLines.push(`Quoted foreign-provider assistant turns: ${restored.diagnostics.quotedAssistantTurns}.`);
|
|
3305
|
+
}
|
|
3306
|
+
if (restored.diagnostics.toolPairParityViolations > 0) {
|
|
3307
|
+
repairNoticeLines.push(`Tool-pair parity gaps flagged: ${restored.diagnostics.toolPairParityViolations}.`);
|
|
3308
|
+
}
|
|
3309
|
+
if (restored.diagnostics.requiredSlotDrops.length > 0) {
|
|
3310
|
+
repairNoticeLines.push(`Required-slot gaps flagged: ${restored.diagnostics.requiredSlotDrops.join(', ')}.`);
|
|
3311
|
+
}
|
|
3312
|
+
const tokenParityDriftExceeded = restored.diagnostics.tokenParityDriftP95 > WARM_RESTORE_MEASUREMENT_GATES.tokenParityDriftP95Max
|
|
3313
|
+
|| restored.diagnostics.tokenParityDriftP99 > WARM_RESTORE_MEASUREMENT_GATES.tokenParityDriftP99Max;
|
|
3314
|
+
if (tokenParityDriftExceeded
|
|
3315
|
+
|| restored.diagnostics.requiredSlotDropRate > WARM_RESTORE_MEASUREMENT_GATES.requiredSlotDropRateMax
|
|
3316
|
+
|| restored.diagnostics.stablePrefixBoundaryViolations > WARM_RESTORE_MEASUREMENT_GATES.stablePrefixBoundaryViolationsMax
|
|
3317
|
+
|| restored.diagnostics.toolPairParityViolations > WARM_RESTORE_MEASUREMENT_GATES.toolPairParityViolationsMax
|
|
3318
|
+
|| restored.diagnostics.continuityCriticalBoundaryTransformRate > WARM_RESTORE_MEASUREMENT_GATES.continuityCriticalBoundaryTransformRateMax) {
|
|
3319
|
+
repairNoticeLines.push(`Warm-restore instrumentation gap: token parity drift p95=${restored.diagnostics.tokenParityDriftP95.toFixed(4)}, p99=${restored.diagnostics.tokenParityDriftP99.toFixed(4)}, stable_prefix violations=${restored.diagnostics.stablePrefixBoundaryViolations}, continuity-critical transform rate=${restored.diagnostics.continuityCriticalBoundaryTransformRate.toFixed(4)}.`);
|
|
3320
|
+
}
|
|
3321
|
+
const repairNoticeContent = repairNoticeLines.join(' ');
|
|
3322
|
+
await this.cache.invalidateWindow(agentId, sessionKey);
|
|
3323
|
+
await this.cache.warmSession(agentId, sessionKey, {
|
|
3324
|
+
system: restored.system ?? opts?.systemPrompt,
|
|
3325
|
+
identity: restored.identity ?? opts?.identity,
|
|
3326
|
+
repairNotice: repairNoticeContent,
|
|
3327
|
+
history: restored.history,
|
|
3328
|
+
meta: warmMeta,
|
|
3329
|
+
});
|
|
3330
|
+
console.info(`[hypermem:compositor] warm snapshot restore session=${sessionKey} snapshot=${latestSnapshot.snapshot.id} fallback=${latestSnapshot.fallbackUsed} cross_provider=${restored.diagnostics.crossProviderBoundary} quoted_assistant_turns=${restored.diagnostics.quotedAssistantTurns} tool_pair_gaps=${restored.diagnostics.toolPairParityViolations} rollout_gate_passed=${restored.diagnostics.rolloutGatePassed} token_parity_drift_p95=${restored.diagnostics.tokenParityDriftP95.toFixed(4)} token_parity_drift_p99=${restored.diagnostics.tokenParityDriftP99.toFixed(4)}`);
|
|
3331
|
+
return;
|
|
3332
|
+
}
|
|
3333
|
+
}
|
|
3334
|
+
warnSnapshotVerifyFallback('restore_unusable', `snapshot_count=${snapshotCandidates.length}`);
|
|
3335
|
+
}
|
|
3336
|
+
else if (snapshotCandidates.length > 0) {
|
|
3337
|
+
warnSnapshotVerifyFallback('no_valid_snapshot', `snapshot_count=${snapshotCandidates.length}`);
|
|
3338
|
+
}
|
|
3339
|
+
}
|
|
3340
|
+
catch (error) {
|
|
3341
|
+
warnSnapshotVerifyFallback('restore_exception', `error=${JSON.stringify(error.message)}`);
|
|
3342
|
+
}
|
|
3343
|
+
}
|
|
2704
3344
|
// Fetch a generous pool from SQLite, apply gradient transform, then
|
|
2705
3345
|
// token-budget-cap the warm set. This replaces the old WARM_BOOTSTRAP_CAP
|
|
2706
3346
|
// message-count constant which was a blunt instrument — 100 messages of
|
|
@@ -2742,7 +3382,6 @@ export class Compositor {
|
|
|
2742
3382
|
history.unshift(tagged);
|
|
2743
3383
|
warmTokens += cost;
|
|
2744
3384
|
}
|
|
2745
|
-
const libDb = opts?.libraryDb || this.libraryDb;
|
|
2746
3385
|
// Note: facts and context are intentionally NOT cached here.
|
|
2747
3386
|
// compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
|
|
2748
3387
|
// from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
|
|
@@ -2755,19 +3394,10 @@ export class Compositor {
|
|
|
2755
3394
|
system: opts?.systemPrompt,
|
|
2756
3395
|
identity: opts?.identity,
|
|
2757
3396
|
history,
|
|
2758
|
-
meta:
|
|
2759
|
-
agentId,
|
|
2760
|
-
sessionKey,
|
|
2761
|
-
provider: conversation.provider,
|
|
2762
|
-
model: conversation.model,
|
|
2763
|
-
channelType: conversation.channelType,
|
|
2764
|
-
tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
|
|
2765
|
-
lastActive: conversation.updatedAt,
|
|
2766
|
-
status: conversation.status,
|
|
2767
|
-
},
|
|
3397
|
+
meta: warmMeta,
|
|
2768
3398
|
});
|
|
2769
3399
|
}
|
|
2770
|
-
async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth) {
|
|
3400
|
+
async refreshRedisGradient(agentId, sessionKey, db, tokenBudget, historyDepth, trimSoftTarget) {
|
|
2771
3401
|
const store = new MessageStore(db);
|
|
2772
3402
|
const conversation = store.getConversation(sessionKey);
|
|
2773
3403
|
if (!conversation)
|
|
@@ -2810,7 +3440,7 @@ export class Compositor {
|
|
|
2810
3440
|
// on the next turn even in the steady-state path. Aligning the gradient cap to
|
|
2811
3441
|
// the trim target means the rebuilt window already fits within the assemble
|
|
2812
3442
|
// envelope by construction.
|
|
2813
|
-
const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0);
|
|
3443
|
+
const { softBudget: gradientAssembleBudget } = resolveTrimBudgets(tokenBudget ?? 0, { trimSoftTarget });
|
|
2814
3444
|
const transformedHistory = applyToolGradient(rawHistory, {
|
|
2815
3445
|
totalWindowTokens: tokenBudget && tokenBudget > 0
|
|
2816
3446
|
? gradientAssembleBudget
|
|
@@ -3074,11 +3704,20 @@ export class Compositor {
|
|
|
3074
3704
|
* @param precomputedEmbedding — optional pre-computed embedding for the query.
|
|
3075
3705
|
* When provided, the Ollama call inside VectorStore.search() is skipped.
|
|
3076
3706
|
*/
|
|
3077
|
-
async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints // C2: skip results already in Active Facts
|
|
3078
|
-
|
|
3707
|
+
async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints, // C2: skip results already in Active Facts
|
|
3708
|
+
onRerankerTelemetry, // Sprint 1: surface reranker status at assemble level
|
|
3709
|
+
resultLimit) {
|
|
3079
3710
|
const libDb = libraryDb || this.libraryDb;
|
|
3080
3711
|
if (!libDb && !this.vectorStore)
|
|
3081
3712
|
return null;
|
|
3713
|
+
// 0.9.0: clamp the lifecycle-scaled candidate limit. Caller already clamps
|
|
3714
|
+
// via scaleRecallBreadth; this is a defensive floor so direct callers (none
|
|
3715
|
+
// outside compose today) cannot accidentally request 0 results.
|
|
3716
|
+
const hybridLimit = Math.max(RECALL_BREADTH_BASE.candidateLimitMin, Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.floor(resultLimit && resultLimit > 0 ? resultLimit : RECALL_BREADTH_BASE.candidateLimit)));
|
|
3717
|
+
// KNN-only legacy fallback historically used 8 — keep it slightly below the
|
|
3718
|
+
// hybrid limit to preserve prior behavior at multiplier=1, while still
|
|
3719
|
+
// scaling with the same adaptive limit.
|
|
3720
|
+
const knnFallbackLimit = Math.max(RECALL_BREADTH_BASE.candidateLimitMin, Math.min(RECALL_BREADTH_BASE.candidateLimitMax, hybridLimit - 2));
|
|
3082
3721
|
// Inline fingerprint helper (mirrors compose-scope version; C2 dedup only used here)
|
|
3083
3722
|
const fpCheck = existingFingerprints
|
|
3084
3723
|
? (text) => existingFingerprints.has(text.toLowerCase().replace(/\s+/g, ' ').trim().slice(0, 120))
|
|
@@ -3087,10 +3726,16 @@ export class Compositor {
|
|
|
3087
3726
|
if (libDb) {
|
|
3088
3727
|
const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
|
|
3089
3728
|
tables: ['facts', 'knowledge', 'episodes'],
|
|
3090
|
-
limit:
|
|
3729
|
+
limit: hybridLimit,
|
|
3091
3730
|
agentId,
|
|
3092
3731
|
maxKnnDistance: 1.2,
|
|
3093
3732
|
precomputedEmbedding,
|
|
3733
|
+
reranker: this.reranker,
|
|
3734
|
+
rerankerMinCandidates: this.rerankerMinCandidates,
|
|
3735
|
+
rerankerMaxDocuments: this.rerankerMaxDocuments,
|
|
3736
|
+
rerankerTopK: this.rerankerTopK,
|
|
3737
|
+
// Sprint 1: thread reranker telemetry into compose diagnostics
|
|
3738
|
+
onRerankerTelemetry,
|
|
3094
3739
|
});
|
|
3095
3740
|
if (results.length === 0)
|
|
3096
3741
|
return null;
|
|
@@ -3157,7 +3802,7 @@ export class Compositor {
|
|
|
3157
3802
|
return null;
|
|
3158
3803
|
const results = await this.vectorStore.search(userMessage, {
|
|
3159
3804
|
tables: ['facts', 'knowledge', 'episodes'],
|
|
3160
|
-
limit:
|
|
3805
|
+
limit: knnFallbackLimit,
|
|
3161
3806
|
maxDistance: 1.2,
|
|
3162
3807
|
precomputedEmbedding,
|
|
3163
3808
|
});
|
|
@@ -3347,8 +3992,11 @@ export class Compositor {
|
|
|
3347
3992
|
}
|
|
3348
3993
|
}
|
|
3349
3994
|
const fenceClause = fenceMessageId != null ? 'AND m.id >= ?' : '';
|
|
3350
|
-
// Phase 3 (Turn DAG): prefer context_id scoping
|
|
3351
|
-
|
|
3995
|
+
// Phase 3 (Turn DAG): prefer context_id scoping, but keep legacy NULL
|
|
3996
|
+
// rows eligible. Warmed or migrated sessions can have an active context
|
|
3997
|
+
// while older messages predate context_id backfill; excluding NULL rows
|
|
3998
|
+
// disables within-session keystone recall for those conversations.
|
|
3999
|
+
const contextClause = activeContext ? 'AND (m.context_id = ? OR m.context_id IS NULL)' : '';
|
|
3352
4000
|
const baseParams = [conversationId, cutoffId];
|
|
3353
4001
|
if (fenceMessageId != null)
|
|
3354
4002
|
baseParams.push(fenceMessageId);
|