@psiclawops/hypermem 0.8.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/INSTALL.md +132 -9
- package/README.md +119 -272
- package/bench/README.md +42 -0
- package/bench/data-access-bench.mjs +380 -0
- package/bin/hypermem-bench.mjs +2 -0
- package/bin/hypermem-doctor.mjs +412 -0
- package/bin/hypermem-model-audit.mjs +339 -0
- package/bin/hypermem-status.mjs +491 -70
- package/dist/adaptive-lifecycle.d.ts +81 -0
- package/dist/adaptive-lifecycle.d.ts.map +1 -0
- package/dist/adaptive-lifecycle.js +190 -0
- package/dist/budget-policy.d.ts +1 -1
- package/dist/budget-policy.d.ts.map +1 -1
- package/dist/budget-policy.js +10 -5
- package/dist/cache.d.ts +1 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +2 -0
- package/dist/composition-snapshot-integrity.d.ts +36 -0
- package/dist/composition-snapshot-integrity.d.ts.map +1 -0
- package/dist/composition-snapshot-integrity.js +131 -0
- package/dist/composition-snapshot-runtime.d.ts +59 -0
- package/dist/composition-snapshot-runtime.d.ts.map +1 -0
- package/dist/composition-snapshot-runtime.js +250 -0
- package/dist/composition-snapshot-store.d.ts +44 -0
- package/dist/composition-snapshot-store.d.ts.map +1 -0
- package/dist/composition-snapshot-store.js +117 -0
- package/dist/compositor.d.ts +125 -1
- package/dist/compositor.d.ts.map +1 -1
- package/dist/compositor.js +692 -44
- package/dist/doc-chunk-store.d.ts +19 -0
- package/dist/doc-chunk-store.d.ts.map +1 -1
- package/dist/doc-chunk-store.js +56 -6
- package/dist/hybrid-retrieval.d.ts +38 -0
- package/dist/hybrid-retrieval.d.ts.map +1 -1
- package/dist/hybrid-retrieval.js +86 -1
- package/dist/index.d.ts +12 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +28 -2
- package/dist/knowledge-store.d.ts +4 -1
- package/dist/knowledge-store.d.ts.map +1 -1
- package/dist/knowledge-store.js +27 -4
- package/dist/library-schema.d.ts +12 -8
- package/dist/library-schema.d.ts.map +1 -1
- package/dist/library-schema.js +22 -8
- package/dist/message-store.d.ts.map +1 -1
- package/dist/message-store.js +7 -3
- package/dist/metrics-dashboard.d.ts +18 -1
- package/dist/metrics-dashboard.d.ts.map +1 -1
- package/dist/metrics-dashboard.js +52 -14
- package/dist/reranker.d.ts +1 -1
- package/dist/reranker.js +2 -2
- package/dist/schema.d.ts +1 -1
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +28 -1
- package/dist/seed.d.ts.map +1 -1
- package/dist/seed.js +2 -0
- package/dist/topic-synthesizer.d.ts +20 -0
- package/dist/topic-synthesizer.d.ts.map +1 -1
- package/dist/topic-synthesizer.js +113 -3
- package/dist/trigger-registry.d.ts.map +1 -1
- package/dist/trigger-registry.js +10 -2
- package/dist/types.d.ts +271 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/version.d.ts +7 -7
- package/dist/version.d.ts.map +1 -1
- package/dist/version.js +17 -7
- package/docs/DIAGNOSTICS.md +205 -0
- package/docs/INTEGRATION_VALIDATION.md +186 -0
- package/docs/MIGRATION.md +9 -6
- package/docs/MIGRATION_GUIDE.md +125 -101
- package/docs/ROADMAP.md +238 -20
- package/docs/TUNING.md +19 -5
- package/install.sh +152 -401
- package/memory-plugin/LICENSE +190 -0
- package/memory-plugin/README.md +20 -0
- package/memory-plugin/dist/index.js +50 -0
- package/memory-plugin/package.json +2 -2
- package/package.json +18 -4
- package/plugin/LICENSE +190 -0
- package/plugin/README.md +20 -0
- package/plugin/dist/index.d.ts +29 -0
- package/plugin/dist/index.d.ts.map +1 -1
- package/plugin/dist/index.js +288 -23
- package/plugin/dist/index.js.map +1 -1
- package/plugin/package.json +2 -2
- package/scripts/install-runtime.mjs +12 -1
package/plugin/dist/index.js
CHANGED
|
@@ -22,12 +22,15 @@
|
|
|
22
22
|
import { definePluginEntry } from 'openclaw/plugin-sdk/plugin-entry';
|
|
23
23
|
import { buildPluginConfigSchema } from 'openclaw/plugin-sdk/core';
|
|
24
24
|
import { z } from 'zod';
|
|
25
|
-
import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, formatToolChainStub, decideReplayRecovery, isReplayState,
|
|
25
|
+
import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, resolveAdaptiveLifecyclePolicy, formatToolChainStub, decideReplayRecovery, isReplayState, recordOutputMetrics,
|
|
26
|
+
// Sprint 3: unified pressure signal
|
|
27
|
+
computeUnifiedPressure, PRESSURE_SOURCE, } from '@psiclawops/hypermem';
|
|
26
28
|
import { evictStaleContent } from '@psiclawops/hypermem/image-eviction';
|
|
27
29
|
import { repairToolPairs } from '@psiclawops/hypermem';
|
|
28
30
|
import os from 'os';
|
|
29
31
|
import path from 'path';
|
|
30
32
|
import fs from 'fs/promises';
|
|
33
|
+
import { randomUUID } from 'node:crypto';
|
|
31
34
|
import { fileURLToPath } from 'url';
|
|
32
35
|
import fsSync from 'fs';
|
|
33
36
|
let _telemetryStream = null;
|
|
@@ -107,6 +110,24 @@ function degradationTelemetry(fields) {
|
|
|
107
110
|
// Telemetry must never throw
|
|
108
111
|
}
|
|
109
112
|
}
|
|
113
|
+
function lifecyclePolicyTelemetry(fields) {
|
|
114
|
+
if (!telemetryEnabled())
|
|
115
|
+
return;
|
|
116
|
+
const stream = getTelemetryStream();
|
|
117
|
+
if (!stream)
|
|
118
|
+
return;
|
|
119
|
+
try {
|
|
120
|
+
const record = {
|
|
121
|
+
event: 'lifecycle-policy',
|
|
122
|
+
ts: new Date().toISOString(),
|
|
123
|
+
...fields,
|
|
124
|
+
};
|
|
125
|
+
stream.write(JSON.stringify(record) + '\n');
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
// Telemetry must never throw
|
|
129
|
+
}
|
|
130
|
+
}
|
|
110
131
|
function nextTurnId() {
|
|
111
132
|
_telemetryTurnCounter = (_telemetryTurnCounter + 1) >>> 0;
|
|
112
133
|
return `${Date.now().toString(36)}-${_telemetryTurnCounter.toString(36)}`;
|
|
@@ -279,6 +300,7 @@ export const __telemetryForTests = {
|
|
|
279
300
|
assembleTrace,
|
|
280
301
|
degradationTelemetry,
|
|
281
302
|
guardTelemetry,
|
|
303
|
+
lifecyclePolicyTelemetry,
|
|
282
304
|
nextTurnId,
|
|
283
305
|
beginTrimOwnerTurn,
|
|
284
306
|
endTrimOwnerTurn,
|
|
@@ -464,6 +486,7 @@ function resolveConfiguredWindow(model) {
|
|
|
464
486
|
// Subagent warming mode: 'full' | 'light' | 'off'. Default: 'light'.
|
|
465
487
|
// Controls how much HyperMem context is injected into subagent sessions.
|
|
466
488
|
let _subagentWarming = 'light';
|
|
489
|
+
const FORKED_CONTEXT_META_SLOT = 'forkedContextMeta';
|
|
467
490
|
// Cache replay threshold: 15min default. Set to 0 in user config to disable.
|
|
468
491
|
let _cacheReplayThresholdMs = 900_000;
|
|
469
492
|
// ─── System overhead cache ────────────────────────────────────
|
|
@@ -579,6 +602,8 @@ async function loadUserConfig() {
|
|
|
579
602
|
merged.eviction = { ...merged.eviction, ..._pluginConfig.eviction };
|
|
580
603
|
if (_pluginConfig.embedding)
|
|
581
604
|
merged.embedding = { ...merged.embedding, ..._pluginConfig.embedding };
|
|
605
|
+
if (_pluginConfig.reranker)
|
|
606
|
+
merged.reranker = { ...merged.reranker, ..._pluginConfig.reranker };
|
|
582
607
|
if (Object.keys(fileConfig).length > 0 && Object.keys(_pluginConfig).filter(k => k !== 'hyperMemPath' && k !== 'dataDir').length > 0) {
|
|
583
608
|
console.log('[hypermem-plugin] Note: migrating config.json keys to plugins.entries.hypercompositor.config in openclaw.json is recommended');
|
|
584
609
|
}
|
|
@@ -663,15 +688,19 @@ async function getHyperMem() {
|
|
|
663
688
|
`effective history budget: ${_contextWindowSize - reservedTokens} tokens`);
|
|
664
689
|
verboseLog(`[hypermem-plugin] warmCacheReplayThresholdMs=${_cacheReplayThresholdMs}`);
|
|
665
690
|
verboseLog(`[hypermem-plugin] contextWindowOverrides keys=${Object.keys(_contextWindowOverrides).join(', ') || '(none)'}`);
|
|
691
|
+
const cacheConfig = userConfig.cache;
|
|
666
692
|
const instance = await HyperMem.create({
|
|
667
693
|
dataDir: _pluginConfig.dataDir ?? path.join(os.homedir(), '.openclaw/hypermem'),
|
|
668
694
|
cache: {
|
|
669
|
-
keyPrefix: 'hm:',
|
|
670
|
-
sessionTTL: 14400, // 4h for system/identity/meta slots
|
|
671
|
-
historyTTL: 86400, // 24h for history
|
|
695
|
+
keyPrefix: cacheConfig?.keyPrefix ?? 'hm:',
|
|
696
|
+
sessionTTL: cacheConfig?.sessionTTL ?? 14400, // 4h default for system/identity/meta slots
|
|
697
|
+
historyTTL: cacheConfig?.historyTTL ?? 86400, // 24h default for history/cursor hot cache
|
|
672
698
|
},
|
|
673
699
|
...(userConfig.compositor ? { compositor: userConfig.compositor } : {}),
|
|
674
700
|
...(_embeddingConfig ? { embedding: _embeddingConfig } : {}),
|
|
701
|
+
...(userConfig.reranker
|
|
702
|
+
? { reranker: userConfig.reranker }
|
|
703
|
+
: {}),
|
|
675
704
|
});
|
|
676
705
|
_hm = instance;
|
|
677
706
|
// Wire up fleet store and background indexer from dynamic module
|
|
@@ -790,6 +819,33 @@ function resolveAssistantTokenCount(msg, runtimeContext) {
|
|
|
790
819
|
}
|
|
791
820
|
return undefined;
|
|
792
821
|
}
|
|
822
|
+
function resolveAssistantOutputTokenCount(msg, runtimeContext) {
|
|
823
|
+
const usage = msg.usage;
|
|
824
|
+
if (usage && typeof usage === 'object') {
|
|
825
|
+
const candidates = [
|
|
826
|
+
usage.output,
|
|
827
|
+
usage.outputTokens,
|
|
828
|
+
usage.output_tokens,
|
|
829
|
+
usage.completionTokens,
|
|
830
|
+
usage.completion_tokens,
|
|
831
|
+
usage.totalTokens,
|
|
832
|
+
usage.total_tokens,
|
|
833
|
+
usage.total,
|
|
834
|
+
];
|
|
835
|
+
for (const candidate of candidates) {
|
|
836
|
+
if (typeof candidate === 'number' && Number.isFinite(candidate) && candidate > 0) {
|
|
837
|
+
return Math.floor(candidate);
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
const runtimeTokenCount = runtimeContext?.currentTokenCount;
|
|
842
|
+
if (typeof runtimeTokenCount === 'number' && Number.isFinite(runtimeTokenCount) && runtimeTokenCount > 0) {
|
|
843
|
+
return Math.floor(runtimeTokenCount);
|
|
844
|
+
}
|
|
845
|
+
const text = extractTextFromInboundContent(msg.content);
|
|
846
|
+
const tokenEstimate = Math.ceil(text.length / 4);
|
|
847
|
+
return tokenEstimate > 0 ? tokenEstimate : undefined;
|
|
848
|
+
}
|
|
793
849
|
function collectNeutralToolPairStats(messages) {
|
|
794
850
|
const callIds = new Set();
|
|
795
851
|
const resultIds = new Set();
|
|
@@ -1775,7 +1831,9 @@ function createHyperMemEngine() {
|
|
|
1775
1831
|
});
|
|
1776
1832
|
const replayMarkerText = replayRecovery.emittedText;
|
|
1777
1833
|
const preTrimTokens = runtimeTokens;
|
|
1778
|
-
|
|
1834
|
+
// Sprint 3: unified pressure signal — tool-loop assemble path
|
|
1835
|
+
const s3ToolLoopPressure = computeUnifiedPressure(preTrimTokens, effectiveBudget, PRESSURE_SOURCE.TOOLLOOP_RUNTIME_ARRAY);
|
|
1836
|
+
const pressure = s3ToolLoopPressure.fraction;
|
|
1779
1837
|
// Pressure-tiered trim targets use a single authority: the working
|
|
1780
1838
|
// message array. Redis drift is logged as an anomaly, never used as
|
|
1781
1839
|
// a trim trigger. Replay recovery gets its own explicit bounded mode
|
|
@@ -1927,17 +1985,17 @@ function createHyperMemEngine() {
|
|
|
1927
1985
|
const kept = keptClusters.flat();
|
|
1928
1986
|
const keptCount = processedConvMsgs.length - kept.length;
|
|
1929
1987
|
if (keptCount > 0) {
|
|
1930
|
-
console.log(`[hypermem-plugin] tool-loop trim: pressure=${
|
|
1988
|
+
console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
|
|
1931
1989
|
`target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs, messages=${keptCount} dropped)`);
|
|
1932
1990
|
trimmedMessages = [...systemMsgs, ...kept];
|
|
1933
1991
|
}
|
|
1934
1992
|
else if (trimmed > 0) {
|
|
1935
|
-
console.log(`[hypermem-plugin] tool-loop trim: pressure=${
|
|
1993
|
+
console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
|
|
1936
1994
|
`target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
|
|
1937
1995
|
}
|
|
1938
1996
|
}
|
|
1939
1997
|
else if (trimmed > 0) {
|
|
1940
|
-
console.log(`[hypermem-plugin] tool-loop trim: pressure=${
|
|
1998
|
+
console.log(`[hypermem-plugin] tool-loop trim: pressure=${s3ToolLoopPressure.pct}% source=${s3ToolLoopPressure.source} → ` +
|
|
1941
1999
|
`target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
|
|
1942
2000
|
}
|
|
1943
2001
|
// Apply tool gradient to compress large tool results before returning.
|
|
@@ -2205,6 +2263,7 @@ function createHyperMemEngine() {
|
|
|
2205
2263
|
path: 'replay',
|
|
2206
2264
|
toolLoop: isToolLoop,
|
|
2207
2265
|
msgCount: messages.length,
|
|
2266
|
+
composeTopicTelemetryStatus: 'intentionally-omitted',
|
|
2208
2267
|
});
|
|
2209
2268
|
}
|
|
2210
2269
|
}
|
|
@@ -2217,6 +2276,20 @@ function createHyperMemEngine() {
|
|
|
2217
2276
|
// Subagent light mode: skip library/wiki/semantic/keystones/doc chunks.
|
|
2218
2277
|
// Keeps: system, identity, history, active facts, output profile, tool gradient.
|
|
2219
2278
|
const subagentLight = isSubagent && _subagentWarming === 'light';
|
|
2279
|
+
let forkedContext;
|
|
2280
|
+
if (isSubagent) {
|
|
2281
|
+
try {
|
|
2282
|
+
const rawForkedContext = await hm.cache.getSlot(agentId, sk, FORKED_CONTEXT_META_SLOT);
|
|
2283
|
+
if (rawForkedContext) {
|
|
2284
|
+
const parsed = JSON.parse(rawForkedContext);
|
|
2285
|
+
if (parsed?.enabled === true)
|
|
2286
|
+
forkedContext = parsed;
|
|
2287
|
+
}
|
|
2288
|
+
}
|
|
2289
|
+
catch {
|
|
2290
|
+
// Fork metadata is advisory; fall back to normal subagent lifecycle.
|
|
2291
|
+
}
|
|
2292
|
+
}
|
|
2220
2293
|
const request = {
|
|
2221
2294
|
agentId,
|
|
2222
2295
|
sessionKey: sk,
|
|
@@ -2231,6 +2304,7 @@ function createHyperMemEngine() {
|
|
|
2231
2304
|
includeSemanticRecall: subagentLight ? false : undefined, // skip vector/FTS recall
|
|
2232
2305
|
includeKeystones: subagentLight ? false : undefined, // skip keystone history injection
|
|
2233
2306
|
prompt,
|
|
2307
|
+
forkedContext,
|
|
2234
2308
|
skipProviderTranslation: true, // runtime handles provider translation
|
|
2235
2309
|
};
|
|
2236
2310
|
const result = await hm.compose(request);
|
|
@@ -2246,6 +2320,63 @@ function createHyperMemEngine() {
|
|
|
2246
2320
|
replayState: replayRecovery.emittedMarker?.state,
|
|
2247
2321
|
replayReason: replayRecovery.emittedMarker?.reason,
|
|
2248
2322
|
});
|
|
2323
|
+
// Sprint 1: emit assemble-level trace with full observability fields
|
|
2324
|
+
// after a full compose (not replay). Surfaces prefix stability,
|
|
2325
|
+
// reranker outcome, slot spans, and compaction eligibility.
|
|
2326
|
+
if (telemetryEnabled() && !cachedContextBlock) {
|
|
2327
|
+
const diag = result.diagnostics;
|
|
2328
|
+
// prefixChanged: compare current prefixHash against prevPrefixHash
|
|
2329
|
+
// (surfaced by the compositor when a cache bypass detected prefix mutation).
|
|
2330
|
+
// When no previous hash is available (first turn), leave prefixChanged undefined.
|
|
2331
|
+
let prefixChanged;
|
|
2332
|
+
if (diag?.prefixHash && diag?.prevPrefixHash) {
|
|
2333
|
+
prefixChanged = diag.prefixHash !== diag.prevPrefixHash;
|
|
2334
|
+
}
|
|
2335
|
+
assembleTrace({
|
|
2336
|
+
agentId,
|
|
2337
|
+
sessionKey: sk,
|
|
2338
|
+
turnId: _asmTurnId,
|
|
2339
|
+
path: isSubagent ? 'subagent' : 'cold',
|
|
2340
|
+
toolLoop: isToolLoop,
|
|
2341
|
+
msgCount: result.messages.length,
|
|
2342
|
+
prefixChanged,
|
|
2343
|
+
prefixHash: diag?.prefixHash,
|
|
2344
|
+
rerankerStatus: diag?.rerankerStatus,
|
|
2345
|
+
rerankerCandidates: diag?.rerankerCandidates,
|
|
2346
|
+
rerankerProvider: diag?.rerankerProvider,
|
|
2347
|
+
slotSpans: diag?.slotSpans,
|
|
2348
|
+
compactionEligibleCount: diag?.compactionEligibleCount,
|
|
2349
|
+
compactionEligibleRatio: diag?.compactionEligibleRatio,
|
|
2350
|
+
compactionProcessedCount: diag?.compactionProcessedCount,
|
|
2351
|
+
composeTopicSource: diag?.composeTopicSource,
|
|
2352
|
+
composeTopicState: diag?.composeTopicState,
|
|
2353
|
+
composeTopicMessageCount: diag?.composeTopicMessageCount,
|
|
2354
|
+
composeTopicStampedMessageCount: diag?.composeTopicStampedMessageCount,
|
|
2355
|
+
composeTopicTelemetryStatus: diag?.composeTopicTelemetryStatus,
|
|
2356
|
+
});
|
|
2357
|
+
if (diag?.adaptiveLifecycleBand) {
|
|
2358
|
+
lifecyclePolicyTelemetry({
|
|
2359
|
+
path: 'compose.preRecall',
|
|
2360
|
+
agentId,
|
|
2361
|
+
sessionKey: sk,
|
|
2362
|
+
band: diag.adaptiveLifecycleBand,
|
|
2363
|
+
pressurePct: diag.adaptiveLifecyclePressurePct,
|
|
2364
|
+
trimSoftTarget: diag.adaptiveTrimSoftTarget,
|
|
2365
|
+
reasons: diag.adaptiveLifecycleReasons,
|
|
2366
|
+
});
|
|
2367
|
+
}
|
|
2368
|
+
if (diag?.adaptiveEvictionLifecycleBand) {
|
|
2369
|
+
lifecyclePolicyTelemetry({
|
|
2370
|
+
path: 'compose.eviction',
|
|
2371
|
+
agentId,
|
|
2372
|
+
sessionKey: sk,
|
|
2373
|
+
band: diag.adaptiveEvictionLifecycleBand,
|
|
2374
|
+
pressurePct: diag.adaptiveEvictionPressurePct,
|
|
2375
|
+
trimSoftTarget: diag.adaptiveTrimSoftTarget,
|
|
2376
|
+
reasons: diag.adaptiveLifecycleBandDiverged ? ['diverged-from-preRecall'] : undefined,
|
|
2377
|
+
});
|
|
2378
|
+
}
|
|
2379
|
+
}
|
|
2249
2380
|
// Use cached contextBlock if available (cache replay), otherwise use fresh result.
|
|
2250
2381
|
// After a full compose, write the new contextBlock to cache for the next turn.
|
|
2251
2382
|
if (cachedContextBlock) {
|
|
@@ -2318,6 +2449,9 @@ ${replayRecovery.emittedText}`
|
|
|
2318
2449
|
const runtimeSystemTokens = getOverheadFallback(tier);
|
|
2319
2450
|
_overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
|
|
2320
2451
|
await persistReplayRecoveryState(hm, agentId, sk, replayRecovery.nextState);
|
|
2452
|
+
if (forkedContext) {
|
|
2453
|
+
await hm.cache.setSlot(agentId, sk, FORKED_CONTEXT_META_SLOT, '').catch(() => { });
|
|
2454
|
+
}
|
|
2321
2455
|
// Update model state for downshift detection on next turn
|
|
2322
2456
|
try {
|
|
2323
2457
|
const modelIdentity = resolveModelIdentity(model);
|
|
@@ -2406,6 +2540,9 @@ ${replayRecovery.emittedText}`
|
|
|
2406
2540
|
// budget the history is competing for. We trim history to make room.
|
|
2407
2541
|
const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
|
|
2408
2542
|
const tokensBefore = await estimateWindowTokens(hm, agentId, sk);
|
|
2543
|
+
// Sprint 3: Unified pressure signal — compact path (Redis estimate)
|
|
2544
|
+
const s3CompactPressure = computeUnifiedPressure(tokensBefore, effectiveBudget, PRESSURE_SOURCE.COMPACT_REDIS_ESTIMATE);
|
|
2545
|
+
console.log(`[hypermem-plugin] compact: pressure=${s3CompactPressure.pct}% source=${s3CompactPressure.source} tokens=${tokensBefore}/${effectiveBudget}`);
|
|
2409
2546
|
// Target depth for both Redis trimming and JSONL truncation.
|
|
2410
2547
|
// Target 50% of budget capacity, assume ~500 tokens/message average.
|
|
2411
2548
|
const targetDepth = Math.max(20, Math.floor((effectiveBudget * 0.5) / 500));
|
|
@@ -2419,6 +2556,10 @@ ${replayRecovery.emittedText}`
|
|
|
2419
2556
|
// Also triggered when reshape ran recently but the session is still
|
|
2420
2557
|
// critically full — bypass the reshape guard in that case.
|
|
2421
2558
|
const NUCLEAR_THRESHOLD = 0.85;
|
|
2559
|
+
// Sprint 3: runtime-total pressure for nuclear check uses its own source label
|
|
2560
|
+
const s3NuclearPressure = currentTokenCount != null
|
|
2561
|
+
? computeUnifiedPressure(currentTokenCount, effectiveBudget, PRESSURE_SOURCE.COMPACT_RUNTIME_TOTAL)
|
|
2562
|
+
: s3CompactPressure;
|
|
2422
2563
|
const isNuclear = currentTokenCount != null && currentTokenCount > effectiveBudget * NUCLEAR_THRESHOLD;
|
|
2423
2564
|
if (isNuclear) {
|
|
2424
2565
|
// Cut deep: target 20% of normal depth = ~25 messages for a 128k session.
|
|
@@ -2437,11 +2578,11 @@ ${replayRecovery.emittedText}`
|
|
|
2437
2578
|
postTokens: tokensAfter,
|
|
2438
2579
|
removed: nuclearRemoved,
|
|
2439
2580
|
cacheInvalidated: true,
|
|
2440
|
-
reason:
|
|
2581
|
+
reason: `${s3NuclearPressure.source}:${s3NuclearPressure.pct}% currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
|
|
2441
2582
|
});
|
|
2442
2583
|
}
|
|
2443
|
-
console.log(`[hypermem-plugin] compact: NUCLEAR —
|
|
2444
|
-
`
|
|
2584
|
+
console.log(`[hypermem-plugin] compact: NUCLEAR — pressure=${s3NuclearPressure.pct}% source=${s3NuclearPressure.source} ` +
|
|
2585
|
+
`session at ${currentTokenCount}/${effectiveBudget} tokens, ` +
|
|
2445
2586
|
`deep-trimmed JSONL to ${nuclearDepth} messages, Redis ${tokensBefore}→${tokensAfter} tokens`);
|
|
2446
2587
|
return { ok: true, compacted: true, result: { tokensBefore, tokensAfter } };
|
|
2447
2588
|
}
|
|
@@ -2528,10 +2669,10 @@ ${replayRecovery.emittedText}`
|
|
|
2528
2669
|
postTokens: tokensAfter,
|
|
2529
2670
|
removed: historyTrimmed,
|
|
2530
2671
|
cacheInvalidated: true,
|
|
2531
|
-
reason:
|
|
2672
|
+
reason: `${s3CompactPressure.source}:${s3CompactPressure.pct}% over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
|
|
2532
2673
|
});
|
|
2533
2674
|
}
|
|
2534
|
-
console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget})`);
|
|
2675
|
+
console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget}, pressure=${s3CompactPressure.pct}% source=${s3CompactPressure.source})`);
|
|
2535
2676
|
// Density-aware JSONL truncation: derive target depth from actual avg tokens/message
|
|
2536
2677
|
// rather than assuming a fixed 500 tokens/message. This prevents a large-message
|
|
2537
2678
|
// session (e.g. 145 msgs × 882 tok = 128k) from bypassing the 1.5x guard and
|
|
@@ -2608,8 +2749,54 @@ ${replayRecovery.emittedText}`
|
|
|
2608
2749
|
});
|
|
2609
2750
|
}
|
|
2610
2751
|
}
|
|
2752
|
+
try {
|
|
2753
|
+
const lastAssistantMessage = [...newMessages].reverse().find(m => m.role === 'assistant');
|
|
2754
|
+
if (lastAssistantMessage) {
|
|
2755
|
+
const modelState = await hm.cache.getModelState(agentId, sk).catch(() => null);
|
|
2756
|
+
const promptCacheUsage = runtimeContext?.promptCache?.lastCallUsage;
|
|
2757
|
+
const outputTokens = resolveAssistantOutputTokenCount(lastAssistantMessage, runtimeContext) ?? 1;
|
|
2758
|
+
const inputTokens = typeof promptCacheUsage?.input === 'number'
|
|
2759
|
+
? Math.floor(promptCacheUsage.input)
|
|
2760
|
+
: typeof runtimeContext?.currentTokenCount === 'number'
|
|
2761
|
+
? Math.floor(runtimeContext.currentTokenCount)
|
|
2762
|
+
: null;
|
|
2763
|
+
const cacheReadTokens = typeof promptCacheUsage?.cacheRead === 'number'
|
|
2764
|
+
? Math.floor(promptCacheUsage.cacheRead)
|
|
2765
|
+
: null;
|
|
2766
|
+
const modelId = typeof lastAssistantMessage.model === 'string'
|
|
2767
|
+
? lastAssistantMessage.model
|
|
2768
|
+
: modelState?.modelId ?? modelState?.model ?? 'unknown';
|
|
2769
|
+
const provider = typeof lastAssistantMessage.provider === 'string'
|
|
2770
|
+
? lastAssistantMessage.provider
|
|
2771
|
+
: modelState?.provider ?? 'unknown';
|
|
2772
|
+
const taskType = typeof runtimeContext?.taskType === 'string'
|
|
2773
|
+
? runtimeContext.taskType ?? null
|
|
2774
|
+
: null;
|
|
2775
|
+
recordOutputMetrics(hm.dbManager.getLibraryDb(), {
|
|
2776
|
+
id: `turn-metric-${agentId}-${Date.now()}-${randomUUID()}`,
|
|
2777
|
+
timestamp: new Date().toISOString(),
|
|
2778
|
+
agent_id: agentId,
|
|
2779
|
+
session_key: sk,
|
|
2780
|
+
model_id: modelId,
|
|
2781
|
+
provider,
|
|
2782
|
+
fos_version: null,
|
|
2783
|
+
mod_version: null,
|
|
2784
|
+
mod_id: null,
|
|
2785
|
+
task_type: taskType,
|
|
2786
|
+
output_tokens: outputTokens,
|
|
2787
|
+
input_tokens: inputTokens,
|
|
2788
|
+
cache_read_tokens: cacheReadTokens,
|
|
2789
|
+
corrections_fired: [],
|
|
2790
|
+
latency_ms: null,
|
|
2791
|
+
});
|
|
2792
|
+
}
|
|
2793
|
+
}
|
|
2794
|
+
catch {
|
|
2795
|
+
// Non-fatal telemetry path
|
|
2796
|
+
}
|
|
2611
2797
|
// P3.1: Topic detection on the inbound user message
|
|
2612
2798
|
// Non-fatal: topic detection never blocks afterTurn
|
|
2799
|
+
let adaptiveTopicShiftConfidence;
|
|
2613
2800
|
try {
|
|
2614
2801
|
const inboundUserMsg = newMessages
|
|
2615
2802
|
.map(m => m)
|
|
@@ -2626,6 +2813,7 @@ ${replayRecovery.emittedText}`
|
|
|
2626
2813
|
const topicMap = new SessionTopicMap(db);
|
|
2627
2814
|
const activeTopic = topicMap.getActiveTopic(sk);
|
|
2628
2815
|
const signal = detectTopicShift(neutralUser, contextMessages, activeTopic?.id ?? null);
|
|
2816
|
+
adaptiveTopicShiftConfidence = signal.confidence;
|
|
2629
2817
|
if (signal.isNewTopic && signal.topicName) {
|
|
2630
2818
|
const newTopicId = topicMap.createTopic(sk, signal.topicName);
|
|
2631
2819
|
// New topic starts with count 1 (the message that triggered the shift)
|
|
@@ -2671,7 +2859,30 @@ ${replayRecovery.emittedText}`
|
|
|
2671
2859
|
const modelState = await hm.cache.getModelState(agentId, sk);
|
|
2672
2860
|
const gradientBudget = modelState?.tokenBudget;
|
|
2673
2861
|
const gradientDepth = modelState?.historyDepth;
|
|
2674
|
-
|
|
2862
|
+
const inboundUserMsg = newMessages
|
|
2863
|
+
.map(m => m)
|
|
2864
|
+
.find(m => m.role === 'user');
|
|
2865
|
+
const inboundUserText = inboundUserMsg
|
|
2866
|
+
? stripMessageMetadata(extractTextFromInboundContent(inboundUserMsg.content))
|
|
2867
|
+
: '';
|
|
2868
|
+
const lifecyclePolicy = resolveAdaptiveLifecyclePolicy({
|
|
2869
|
+
usedTokens: estimateMessageArrayTokens(messages),
|
|
2870
|
+
effectiveBudget: gradientBudget,
|
|
2871
|
+
userTurnCount: messages.filter(m => m.role === 'user').length,
|
|
2872
|
+
explicitNewSession: /^\/new(?:\s|$)/i.test(inboundUserText.trim()),
|
|
2873
|
+
topicShiftConfidence: adaptiveTopicShiftConfidence,
|
|
2874
|
+
});
|
|
2875
|
+
lifecyclePolicyTelemetry({
|
|
2876
|
+
path: 'afterTurn.gradient',
|
|
2877
|
+
agentId,
|
|
2878
|
+
sessionKey: sk,
|
|
2879
|
+
band: lifecyclePolicy.band,
|
|
2880
|
+
pressurePct: lifecyclePolicy.pressurePct,
|
|
2881
|
+
topicShiftConfidence: adaptiveTopicShiftConfidence,
|
|
2882
|
+
trimSoftTarget: lifecyclePolicy.trimSoftTarget,
|
|
2883
|
+
reasons: lifecyclePolicy.reasons,
|
|
2884
|
+
});
|
|
2885
|
+
await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth, lifecyclePolicy.trimSoftTarget);
|
|
2675
2886
|
}
|
|
2676
2887
|
catch (refreshErr) {
|
|
2677
2888
|
console.warn('[hypermem-plugin] afterTurn: refreshRedisGradient failed (non-fatal):', refreshErr.message);
|
|
@@ -2841,7 +3052,12 @@ ${replayRecovery.emittedText}`
|
|
|
2841
3052
|
* subagentWarming config ('full' | 'light' | 'off').
|
|
2842
3053
|
* Returns a rollback handle to clean up if spawn fails.
|
|
2843
3054
|
*/
|
|
2844
|
-
async prepareSubagentSpawn(
|
|
3055
|
+
async prepareSubagentSpawn(params) {
|
|
3056
|
+
const { parentSessionKey, childSessionKey } = params;
|
|
3057
|
+
const forkParams = params;
|
|
3058
|
+
const contextMode = forkParams.contextMode;
|
|
3059
|
+
const parentSessionId = forkParams.parentSessionId;
|
|
3060
|
+
const childSessionId = forkParams.childSessionId;
|
|
2845
3061
|
if (_subagentWarming === 'off') {
|
|
2846
3062
|
return undefined;
|
|
2847
3063
|
}
|
|
@@ -2849,7 +3065,12 @@ ${replayRecovery.emittedText}`
|
|
|
2849
3065
|
const hm = await getHyperMem();
|
|
2850
3066
|
const parentAgentId = extractAgentId(parentSessionKey);
|
|
2851
3067
|
const childAgentId = extractAgentId(childSessionKey);
|
|
2852
|
-
|
|
3068
|
+
const isForkedContext = contextMode === 'fork';
|
|
3069
|
+
let parentHistoryMessages = 0;
|
|
3070
|
+
let parentUserTurnCount = 0;
|
|
3071
|
+
let parentPressureFraction;
|
|
3072
|
+
// Seed child with parent's active facts. This preserves the historical
|
|
3073
|
+
// slot for compatibility; facts still primarily come from L4 by agent id.
|
|
2853
3074
|
const facts = hm.getActiveFacts(parentAgentId, { limit: 50 });
|
|
2854
3075
|
if (facts && facts.length > 0) {
|
|
2855
3076
|
const factBlock = facts
|
|
@@ -2857,22 +3078,48 @@ ${replayRecovery.emittedText}`
|
|
|
2857
3078
|
.join('\n');
|
|
2858
3079
|
await hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', factBlock);
|
|
2859
3080
|
}
|
|
2860
|
-
|
|
2861
|
-
if (
|
|
2862
|
-
const
|
|
2863
|
-
|
|
2864
|
-
|
|
2865
|
-
|
|
3081
|
+
const history = await hm.cache.getHistory(parentAgentId, parentSessionKey);
|
|
3082
|
+
if (history && history.length > 0) {
|
|
3083
|
+
const maxSeededHistory = _subagentWarming === 'full' ? 25 : 12;
|
|
3084
|
+
const recentHistory = history.slice(-maxSeededHistory);
|
|
3085
|
+
parentHistoryMessages = recentHistory.length;
|
|
3086
|
+
parentUserTurnCount = recentHistory.filter(m => m.role === 'user').length;
|
|
3087
|
+
const parentTokens = estimateMessageArrayTokens(recentHistory);
|
|
3088
|
+
const parentModelState = await hm.cache.getModelState(parentAgentId, parentSessionKey).catch(() => null);
|
|
3089
|
+
const parentBudget = parentModelState?.tokenBudget && parentModelState.tokenBudget > 0
|
|
3090
|
+
? parentModelState.tokenBudget
|
|
3091
|
+
: undefined;
|
|
3092
|
+
parentPressureFraction = parentBudget ? parentTokens / parentBudget : undefined;
|
|
3093
|
+
if (isForkedContext || _subagentWarming === 'full') {
|
|
3094
|
+
await hm.cache.replaceHistory(childAgentId, childSessionKey, recentHistory, maxSeededHistory);
|
|
3095
|
+
await hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { });
|
|
2866
3096
|
}
|
|
3097
|
+
await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', JSON.stringify(recentHistory));
|
|
3098
|
+
}
|
|
3099
|
+
if (isForkedContext) {
|
|
3100
|
+
const forkedMeta = {
|
|
3101
|
+
enabled: true,
|
|
3102
|
+
parentSessionKey,
|
|
3103
|
+
parentSessionId,
|
|
3104
|
+
childSessionId,
|
|
3105
|
+
parentPressureFraction,
|
|
3106
|
+
parentUserTurnCount,
|
|
3107
|
+
parentHistoryMessages,
|
|
3108
|
+
};
|
|
3109
|
+
await hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, JSON.stringify(forkedMeta));
|
|
2867
3110
|
}
|
|
2868
3111
|
console.log(`[hypermem-plugin] prepareSubagentSpawn: seeded ${childSessionKey} ` +
|
|
2869
|
-
`from ${parentSessionKey} (warming=${_subagentWarming}
|
|
3112
|
+
`from ${parentSessionKey} (warming=${_subagentWarming}, contextMode=${contextMode ?? 'isolated'}, ` +
|
|
3113
|
+
`history=${parentHistoryMessages})`);
|
|
2870
3114
|
return {
|
|
2871
3115
|
async rollback() {
|
|
2872
3116
|
try {
|
|
2873
3117
|
const hm = await getHyperMem();
|
|
2874
3118
|
await hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', '');
|
|
2875
3119
|
await hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', '');
|
|
3120
|
+
await hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, '');
|
|
3121
|
+
await hm.cache.replaceHistory(childAgentId, childSessionKey, [], 0);
|
|
3122
|
+
await hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { });
|
|
2876
3123
|
}
|
|
2877
3124
|
catch {
|
|
2878
3125
|
// Rollback is best-effort
|
|
@@ -2898,6 +3145,7 @@ ${replayRecovery.emittedText}`
|
|
|
2898
3145
|
await Promise.all([
|
|
2899
3146
|
hm.cache.setSlot(childAgentId, childSessionKey, 'parentFacts', ''),
|
|
2900
3147
|
hm.cache.setSlot(childAgentId, childSessionKey, 'parentHistory', ''),
|
|
3148
|
+
hm.cache.setSlot(childAgentId, childSessionKey, FORKED_CONTEXT_META_SLOT, ''),
|
|
2901
3149
|
hm.cache.setSlot(childAgentId, childSessionKey, 'assemblyContextBlock', ''),
|
|
2902
3150
|
hm.cache.setSlot(childAgentId, childSessionKey, 'assemblyContextAt', '0'),
|
|
2903
3151
|
hm.cache.invalidateWindow(childAgentId, childSessionKey).catch(() => { }),
|
|
@@ -3109,6 +3357,23 @@ const hypercompositorConfigSchema = z.object({
|
|
|
3109
3357
|
timeout: z.number().int().positive().optional(),
|
|
3110
3358
|
batchSize: z.number().int().positive().optional(),
|
|
3111
3359
|
}).optional(),
|
|
3360
|
+
/**
|
|
3361
|
+
* Optional reranker config. When omitted or provider is 'none', the
|
|
3362
|
+
* compositor runs with RRF-only ordering. See INSTALL.md → Reranker.
|
|
3363
|
+
*/
|
|
3364
|
+
reranker: z.object({
|
|
3365
|
+
provider: z.enum(['zeroentropy', 'openrouter', 'local', 'none']),
|
|
3366
|
+
minCandidates: z.number().int().nonnegative().optional(),
|
|
3367
|
+
maxDocuments: z.number().int().positive().optional(),
|
|
3368
|
+
topK: z.number().int().positive().optional(),
|
|
3369
|
+
timeoutMs: z.number().int().positive().optional(),
|
|
3370
|
+
zeroEntropyApiKey: z.string().optional(),
|
|
3371
|
+
zeroEntropyModel: z.string().optional(),
|
|
3372
|
+
openrouterApiKey: z.string().optional(),
|
|
3373
|
+
openrouterModel: z.string().optional(),
|
|
3374
|
+
ollamaUrl: z.string().optional(),
|
|
3375
|
+
ollamaModel: z.string().optional(),
|
|
3376
|
+
}).optional(),
|
|
3112
3377
|
});
|
|
3113
3378
|
// ─── Plugin Entry ───────────────────────────────────────────────
|
|
3114
3379
|
const engine = createHyperMemEngine();
|