@psiclawops/hypercompositor 0.7.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +117 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1389 -587
- package/package.json +7 -6
package/dist/index.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Implements OpenClaw's ContextEngine interface backed by hypermem's
|
|
5
5
|
* four-layer memory architecture:
|
|
6
6
|
*
|
|
7
|
-
* L1
|
|
7
|
+
* L1 Cache — SQLite `:memory:` hot session working memory
|
|
8
8
|
* L2 Messages — per-agent conversation history (SQLite)
|
|
9
9
|
* L3 Vectors — semantic + keyword search (KNN + FTS5)
|
|
10
10
|
* L4 Library — facts, knowledge, episodes, preferences
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* assemble() → compositor builds context from all four layers
|
|
15
15
|
* compact() → delegate to runtime (ownsCompaction: false)
|
|
16
16
|
* afterTurn() → trigger background indexer (fire-and-forget)
|
|
17
|
-
* bootstrap() → warm
|
|
17
|
+
* bootstrap() → warm hot-cache session, register agent in fleet
|
|
18
18
|
* dispose() → close hypermem connections
|
|
19
19
|
*
|
|
20
20
|
* Session key format expected: "agent:<agentId>:<channel>:<name>"
|
|
@@ -22,22 +22,293 @@
|
|
|
22
22
|
import { definePluginEntry } from 'openclaw/plugin-sdk/plugin-entry';
|
|
23
23
|
import { buildPluginConfigSchema } from 'openclaw/plugin-sdk/core';
|
|
24
24
|
import { z } from 'zod';
|
|
25
|
-
import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow,
|
|
25
|
+
import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, formatToolChainStub, decideReplayRecovery, isReplayState, } from '@psiclawops/hypermem';
|
|
26
26
|
import { evictStaleContent } from '@psiclawops/hypermem/image-eviction';
|
|
27
27
|
import { repairToolPairs } from '@psiclawops/hypermem';
|
|
28
28
|
import os from 'os';
|
|
29
29
|
import path from 'path';
|
|
30
30
|
import fs from 'fs/promises';
|
|
31
|
-
import { createRequire } from 'module';
|
|
32
31
|
import { fileURLToPath } from 'url';
|
|
32
|
+
import fsSync from 'fs';
|
|
33
|
+
let _telemetryStream = null;
|
|
34
|
+
let _telemetryStreamFailed = false;
|
|
35
|
+
let _telemetryTurnCounter = 0;
|
|
36
|
+
function telemetryEnabled() {
|
|
37
|
+
return process.env.HYPERMEM_TELEMETRY === '1';
|
|
38
|
+
}
|
|
39
|
+
function getTelemetryStream() {
|
|
40
|
+
if (_telemetryStream || _telemetryStreamFailed)
|
|
41
|
+
return _telemetryStream;
|
|
42
|
+
try {
|
|
43
|
+
const p = process.env.HYPERMEM_TELEMETRY_PATH || './hypermem-telemetry.jsonl';
|
|
44
|
+
_telemetryStream = fsSync.createWriteStream(p, { flags: 'a' });
|
|
45
|
+
_telemetryStream.on('error', () => {
|
|
46
|
+
_telemetryStreamFailed = true;
|
|
47
|
+
_telemetryStream = null;
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
_telemetryStreamFailed = true;
|
|
52
|
+
_telemetryStream = null;
|
|
53
|
+
}
|
|
54
|
+
return _telemetryStream;
|
|
55
|
+
}
|
|
56
|
+
function trimTelemetry(fields) {
|
|
57
|
+
if (!telemetryEnabled())
|
|
58
|
+
return;
|
|
59
|
+
const stream = getTelemetryStream();
|
|
60
|
+
if (!stream)
|
|
61
|
+
return;
|
|
62
|
+
try {
|
|
63
|
+
const record = {
|
|
64
|
+
event: 'trim',
|
|
65
|
+
ts: new Date().toISOString(),
|
|
66
|
+
...fields,
|
|
67
|
+
};
|
|
68
|
+
stream.write(JSON.stringify(record) + '\n');
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
// Telemetry must never throw
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function assembleTrace(fields) {
|
|
75
|
+
if (!telemetryEnabled())
|
|
76
|
+
return;
|
|
77
|
+
const stream = getTelemetryStream();
|
|
78
|
+
if (!stream)
|
|
79
|
+
return;
|
|
80
|
+
try {
|
|
81
|
+
const record = {
|
|
82
|
+
event: 'assemble',
|
|
83
|
+
ts: new Date().toISOString(),
|
|
84
|
+
...fields,
|
|
85
|
+
};
|
|
86
|
+
stream.write(JSON.stringify(record) + '\n');
|
|
87
|
+
}
|
|
88
|
+
catch {
|
|
89
|
+
// Telemetry must never throw
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
function degradationTelemetry(fields) {
|
|
93
|
+
if (!telemetryEnabled())
|
|
94
|
+
return;
|
|
95
|
+
const stream = getTelemetryStream();
|
|
96
|
+
if (!stream)
|
|
97
|
+
return;
|
|
98
|
+
try {
|
|
99
|
+
const record = {
|
|
100
|
+
event: 'degradation',
|
|
101
|
+
ts: new Date().toISOString(),
|
|
102
|
+
...fields,
|
|
103
|
+
};
|
|
104
|
+
stream.write(JSON.stringify(record) + '\n');
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
// Telemetry must never throw
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
function nextTurnId() {
|
|
111
|
+
_telemetryTurnCounter = (_telemetryTurnCounter + 1) >>> 0;
|
|
112
|
+
return `${Date.now().toString(36)}-${_telemetryTurnCounter.toString(36)}`;
|
|
113
|
+
}
|
|
114
|
+
// ─── Trim Ownership (Phase A Sprint 2) ───────────────────────────
|
|
115
|
+
//
|
|
116
|
+
// Sprint 2 consolidates trim ownership: the assemble-owned family
|
|
117
|
+
// (assemble.normal, assemble.subagent, assemble.toolLoop) is the single
|
|
118
|
+
// steady-state trim owner. Compact paths (compact.nuclear, compact.history,
|
|
119
|
+
// compact.history2) are exempted — they're exception-only. warmstart,
|
|
120
|
+
// reshape, and afterTurn.secondary are demoted in sub-tasks 2.2 and 2.3.
|
|
121
|
+
//
|
|
122
|
+
// This block adds:
|
|
123
|
+
// 1. A per-session turn context (beginTrimOwnerTurn/endTrimOwnerTurn) scoped
|
|
124
|
+
// by the main assemble() flow.
|
|
125
|
+
// 2. A single shared trimOwner claim helper that lets exactly one **real**
|
|
126
|
+
// steady-state trim claim ownership per turn and throws loudly in
|
|
127
|
+
// development (NODE_ENV='development') when a second real steady-state
|
|
128
|
+
// trim path attempts to claim the same turn.
|
|
129
|
+
// 3. A non-counting guard/noop telemetry helper (same JSONL channel) that
|
|
130
|
+
// demoted paths can emit to preserve visibility of warm-start/reshape
|
|
131
|
+
// without consuming a steady-state owner slot.
|
|
132
|
+
//
|
|
133
|
+
// Sub-task 2.1 only adds the scaffolding + invariant; no existing trim call
|
|
134
|
+
// is removed here. Demotions of warm-start/reshape/afterTurn.secondary land
|
|
135
|
+
// in 2.2 and 2.3.
|
|
136
|
+
const STEADY_STATE_TRIM_PATHS = new Set([
|
|
137
|
+
'assemble.normal',
|
|
138
|
+
'assemble.subagent',
|
|
139
|
+
'assemble.toolLoop',
|
|
140
|
+
]);
|
|
141
|
+
const COMPACT_TRIM_PATHS = new Set([
|
|
142
|
+
'compact.nuclear',
|
|
143
|
+
'compact.history',
|
|
144
|
+
'compact.history2',
|
|
145
|
+
]);
|
|
146
|
+
// ─── Guard-telemetry reason enum (Phase A Sprint 2.2a) ──────────────────
|
|
147
|
+
// Plugin-local, constant-backed union of allowed `reason` values on
|
|
148
|
+
// `event: 'trim-guard'` records. Keeping this bounded prevents ad-hoc
|
|
149
|
+
// numeric/user strings from leaking into the telemetry JSONL channel and
|
|
150
|
+
// makes downstream reporting stable. Do NOT widen this to arbitrary
|
|
151
|
+
// strings — add a new member here first, then reference it at call sites.
|
|
152
|
+
//
|
|
153
|
+
// Scope note: this union is plugin-local (per planner 2.2 §C). It is not
|
|
154
|
+
// re-exported via `src/types.ts` because the shared public types surface
|
|
155
|
+
// must not gain a telemetry-reason enum as part of this sprint.
|
|
156
|
+
const GUARD_TELEMETRY_REASONS = [
|
|
157
|
+
'warmstart-pressure-demoted',
|
|
158
|
+
'reshape-downshift-demoted',
|
|
159
|
+
'duplicate-claim-suppressed',
|
|
160
|
+
'afterturn-secondary-demoted',
|
|
161
|
+
'window-within-budget-skip',
|
|
162
|
+
'pressure-accounting-anomaly',
|
|
163
|
+
];
|
|
164
|
+
// Turn-scoped ownership map (Phase A Sprint 2.2a).
|
|
165
|
+
//
|
|
166
|
+
// Previously keyed by `sessionKey` alone, which clobbered overlapping same-
|
|
167
|
+
// session assemble() flows (Sprint 2.1 security eval, medium finding #1).
|
|
168
|
+
// Now keyed by the composite `sessionKey|turnId` so two concurrent turns on
|
|
169
|
+
// the same session key remain isolated: each `beginTrimOwnerTurn` gets its
|
|
170
|
+
// own slot, `claimTrimOwner` checks the exact turn's slot, and
|
|
171
|
+
// `endTrimOwnerTurn` removes only that turn's slot.
|
|
172
|
+
const _trimOwnerTurns = new Map();
|
|
173
|
+
function _trimOwnerKey(sessionKey, turnId) {
|
|
174
|
+
return `${sessionKey}|${turnId}`;
|
|
175
|
+
}
|
|
176
|
+
function beginTrimOwnerTurn(sessionKey, turnId) {
|
|
177
|
+
_trimOwnerTurns.set(_trimOwnerKey(sessionKey, turnId), { turnId });
|
|
178
|
+
}
|
|
179
|
+
function endTrimOwnerTurn(sessionKey, turnId) {
|
|
180
|
+
_trimOwnerTurns.delete(_trimOwnerKey(sessionKey, turnId));
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Claim the steady-state trim owner slot for the current turn.
|
|
184
|
+
*
|
|
185
|
+
* Behavior:
|
|
186
|
+
* - compact.* paths are exception-only and pass through without claiming.
|
|
187
|
+
* - Non-steady paths (warmstart, reshape, afterTurn.secondary) also pass
|
|
188
|
+
* through without claiming. Demoted/no-op sites should normally emit
|
|
189
|
+
* via guardTelemetry() instead so they stay visible without contending
|
|
190
|
+
* for ownership (sub-tasks 2.2 and 2.3 wire this in).
|
|
191
|
+
* - Steady-state paths (assemble.normal, assemble.subagent,
|
|
192
|
+
* assemble.toolLoop) claim the single owner slot for the current turn.
|
|
193
|
+
* The first such claim succeeds. A second steady-state claim against the
|
|
194
|
+
* same turn is a duplicate-turn violation: it throws loudly under
|
|
195
|
+
* NODE_ENV='development' and warns in other environments (returning
|
|
196
|
+
* false so non-dev runtimes keep working).
|
|
197
|
+
*
|
|
198
|
+
* Callers should invoke this immediately before the real
|
|
199
|
+
* trimHistoryToTokenBudget() call. Guard telemetry does NOT route through
|
|
200
|
+
* this helper — it is explicitly excluded from the steady-state invariant.
|
|
201
|
+
*
|
|
202
|
+
* Returns true when the claim succeeds (or is exempt); false on a swallowed
|
|
203
|
+
* duplicate claim in non-development. In development the duplicate throws
|
|
204
|
+
* before returning.
|
|
205
|
+
*/
|
|
206
|
+
function claimTrimOwner(sessionKey, turnId, path) {
|
|
207
|
+
// Compact paths: exempt — they represent an exceptional pressure path and
|
|
208
|
+
// never contend for the steady-state slot.
|
|
209
|
+
if (COMPACT_TRIM_PATHS.has(path))
|
|
210
|
+
return true;
|
|
211
|
+
// Non-steady paths: pass through (warmstart/reshape/afterTurn.secondary).
|
|
212
|
+
// Warmstart + reshape are demoted to guardTelemetry in 2.2a.
|
|
213
|
+
if (!STEADY_STATE_TRIM_PATHS.has(path))
|
|
214
|
+
return true;
|
|
215
|
+
const ctx = _trimOwnerTurns.get(_trimOwnerKey(sessionKey, turnId));
|
|
216
|
+
if (!ctx)
|
|
217
|
+
return true; // No active assemble-turn scope — nothing to enforce here.
|
|
218
|
+
if (ctx.claimedPath) {
|
|
219
|
+
const msg = `[hypermem-plugin] trimOwner: duplicate steady-state trim claim in turn ` +
|
|
220
|
+
`${ctx.turnId} (sessionKey=${sessionKey}): first=${ctx.claimedPath} second=${path}`;
|
|
221
|
+
if (process.env.NODE_ENV === 'development') {
|
|
222
|
+
throw new Error(msg);
|
|
223
|
+
}
|
|
224
|
+
// Non-development: do not throw, but leave a loud trail so telemetry
|
|
225
|
+
// surfaces the violation. Callers MUST honor the false return and skip
|
|
226
|
+
// the second real trim (Sprint 2.2a enforcement).
|
|
227
|
+
console.warn(msg);
|
|
228
|
+
return false;
|
|
229
|
+
}
|
|
230
|
+
ctx.claimedPath = path;
|
|
231
|
+
return true;
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Non-counting guard / noop telemetry.
|
|
235
|
+
*
|
|
236
|
+
* Emits a `trim-guard` record on the same JSONL channel as trimTelemetry()
|
|
237
|
+
* but with a distinct event name so per-turn reporting (scripts/trim-report.mjs,
|
|
238
|
+
* future ownership dashboards) can keep it out of `trimCount`. Used by
|
|
239
|
+
* demoted/no-op call sites in 2.2 and 2.3 so their path labels stay visible
|
|
240
|
+
* in telemetry without consuming a steady-state owner slot.
|
|
241
|
+
*
|
|
242
|
+
* Zero-cost when telemetry is off. Never throws.
|
|
243
|
+
*/
|
|
244
|
+
function guardTelemetry(fields) {
|
|
245
|
+
if (!telemetryEnabled())
|
|
246
|
+
return;
|
|
247
|
+
const stream = getTelemetryStream();
|
|
248
|
+
if (!stream)
|
|
249
|
+
return;
|
|
250
|
+
try {
|
|
251
|
+
const record = {
|
|
252
|
+
event: 'trim-guard',
|
|
253
|
+
ts: new Date().toISOString(),
|
|
254
|
+
...fields,
|
|
255
|
+
};
|
|
256
|
+
stream.write(JSON.stringify(record) + '\n');
|
|
257
|
+
}
|
|
258
|
+
catch {
|
|
259
|
+
// Telemetry must never throw
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// ─── B3: Batch trim with growth allowance ────────────────────────────────
|
|
263
|
+
// Trim fires only when window usage exceeds the soft target by this fraction.
|
|
264
|
+
// Small natural growth (e.g. a short assistant reply) never triggers a trim;
|
|
265
|
+
// only genuine spikes (model switch, cold-start, multi-tool overrun) do.
|
|
266
|
+
// When trim fires, the target is (softTarget * (1 - headroomFraction)) so the
|
|
267
|
+
// window has room to grow for several turns before the next trim fires.
|
|
268
|
+
//
|
|
269
|
+
// softTarget (0.65): matches refreshRedisGradient → steady state never trims
|
|
270
|
+
// growthThreshold (0.05): 5% overage buffer before trim fires
|
|
271
|
+
// headroomFraction (0.10): trim target = softTarget * 0.90 → ~58.5% of budget
|
|
272
|
+
// Canonical values live in the core package so plugin trim guards and compose
|
|
273
|
+
// paths cannot drift.
|
|
274
|
+
// Test-only: expose emitters so the unit test can exercise them directly
|
|
275
|
+
// without standing up a real session. Wrapped in a getter object so the flag
|
|
276
|
+
// guard still runs (zero-cost when off).
|
|
277
|
+
export const __telemetryForTests = {
|
|
278
|
+
trimTelemetry,
|
|
279
|
+
assembleTrace,
|
|
280
|
+
degradationTelemetry,
|
|
281
|
+
guardTelemetry,
|
|
282
|
+
nextTurnId,
|
|
283
|
+
beginTrimOwnerTurn,
|
|
284
|
+
endTrimOwnerTurn,
|
|
285
|
+
claimTrimOwner,
|
|
286
|
+
// B3/C0.1: Expose the canonical policy surface so tests can assert against
|
|
287
|
+
// the shared source of truth instead of embedding formulas locally.
|
|
288
|
+
TRIM_SOFT_TARGET,
|
|
289
|
+
TRIM_GROWTH_THRESHOLD,
|
|
290
|
+
TRIM_HEADROOM_FRACTION,
|
|
291
|
+
resolveTrimBudgets,
|
|
292
|
+
reset() {
|
|
293
|
+
if (_telemetryStream) {
|
|
294
|
+
try {
|
|
295
|
+
_telemetryStream.end();
|
|
296
|
+
}
|
|
297
|
+
catch { /* ignore */ }
|
|
298
|
+
}
|
|
299
|
+
_telemetryStream = null;
|
|
300
|
+
_telemetryStreamFailed = false;
|
|
301
|
+
_telemetryTurnCounter = 0;
|
|
302
|
+
_trimOwnerTurns.clear();
|
|
303
|
+
},
|
|
304
|
+
};
|
|
33
305
|
// ─── hypermem singleton ────────────────────────────────────────
|
|
34
306
|
// Runtime load is dynamic (hypermem is a sibling package loaded from repo dist,
|
|
35
307
|
// not installed via npm). Types come from the core package devDependency.
|
|
36
308
|
// This pattern keeps the runtime path stable while TypeScript resolves types
|
|
37
309
|
// from the canonical source — no more local shim drift.
|
|
38
|
-
// Resolved at init time: pluginConfig.hyperMemPath >
|
|
310
|
+
// Resolved at init time: pluginConfig.hyperMemPath > import.meta.resolve('@psiclawops/hypermem') > dev fallback
|
|
39
311
|
let HYPERMEM_PATH = '';
|
|
40
|
-
const require = createRequire(import.meta.url);
|
|
41
312
|
let _hm = null;
|
|
42
313
|
let _hmInitPromise = null;
|
|
43
314
|
let _indexer = null;
|
|
@@ -65,6 +336,89 @@ let _evictionConfig;
|
|
|
65
336
|
let _contextWindowSize = 128_000;
|
|
66
337
|
let _contextWindowReserve = 0.25;
|
|
67
338
|
let _deferToolPruning = false;
|
|
339
|
+
let _verboseLogging = false;
|
|
340
|
+
let _contextWindowOverrides = {};
|
|
341
|
+
const _budgetFallbackWarnings = new Set();
|
|
342
|
+
export const CONTEXT_WINDOW_OVERRIDE_KEY_REGEX = /^[^/\s]+\/[^/\s]+$/;
|
|
343
|
+
const contextWindowOverrideSchema = z.object({
|
|
344
|
+
contextTokens: z.number().int().positive().optional(),
|
|
345
|
+
contextWindow: z.number().int().positive().optional(),
|
|
346
|
+
}).superRefine((value, ctx) => {
|
|
347
|
+
if (value.contextTokens == null && value.contextWindow == null) {
|
|
348
|
+
ctx.addIssue({
|
|
349
|
+
code: z.ZodIssueCode.custom,
|
|
350
|
+
message: 'override must declare contextTokens, contextWindow, or both',
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
if (value.contextTokens != null &&
|
|
354
|
+
value.contextWindow != null &&
|
|
355
|
+
value.contextTokens > value.contextWindow) {
|
|
356
|
+
ctx.addIssue({
|
|
357
|
+
code: z.ZodIssueCode.custom,
|
|
358
|
+
message: 'contextTokens must be less than or equal to contextWindow',
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
});
|
|
362
|
+
export function sanitizeContextWindowOverrides(raw) {
|
|
363
|
+
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
|
|
364
|
+
return { value: {}, warnings: [] };
|
|
365
|
+
}
|
|
366
|
+
const value = {};
|
|
367
|
+
const warnings = [];
|
|
368
|
+
for (const [key, candidate] of Object.entries(raw)) {
|
|
369
|
+
const normalizedKey = key.trim().toLowerCase();
|
|
370
|
+
if (!CONTEXT_WINDOW_OVERRIDE_KEY_REGEX.test(normalizedKey)) {
|
|
371
|
+
warnings.push(`ignoring contextWindowOverrides[${JSON.stringify(key)}]: key must be "provider/model"`);
|
|
372
|
+
continue;
|
|
373
|
+
}
|
|
374
|
+
const parsed = contextWindowOverrideSchema.safeParse(candidate);
|
|
375
|
+
if (!parsed.success) {
|
|
376
|
+
warnings.push(`ignoring contextWindowOverrides[${JSON.stringify(key)}]: ` +
|
|
377
|
+
parsed.error.issues.map(issue => issue.message).join('; '));
|
|
378
|
+
continue;
|
|
379
|
+
}
|
|
380
|
+
value[normalizedKey] = parsed.data;
|
|
381
|
+
}
|
|
382
|
+
return { value, warnings };
|
|
383
|
+
}
|
|
384
|
+
export function resolveEffectiveBudget(args) {
|
|
385
|
+
const { tokenBudget, model, contextWindowSize, contextWindowReserve } = args;
|
|
386
|
+
if (tokenBudget) {
|
|
387
|
+
return { budget: tokenBudget, source: 'runtime tokenBudget' };
|
|
388
|
+
}
|
|
389
|
+
const key = normalizeModelKey(model);
|
|
390
|
+
const override = key ? args.contextWindowOverrides?.[key] : undefined;
|
|
391
|
+
const configuredWindow = override?.contextTokens ?? override?.contextWindow;
|
|
392
|
+
if (configuredWindow) {
|
|
393
|
+
return {
|
|
394
|
+
budget: Math.floor(configuredWindow * (1 - contextWindowReserve)),
|
|
395
|
+
source: `contextWindowOverrides[${key}]`,
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
return {
|
|
399
|
+
budget: Math.floor(contextWindowSize * (1 - contextWindowReserve)),
|
|
400
|
+
source: 'fallback contextWindowSize',
|
|
401
|
+
};
|
|
402
|
+
}
|
|
403
|
+
function normalizeModelKey(model) {
|
|
404
|
+
if (!model)
|
|
405
|
+
return null;
|
|
406
|
+
const key = model.trim().toLowerCase();
|
|
407
|
+
return key.length > 0 ? key : null;
|
|
408
|
+
}
|
|
409
|
+
function verboseLog(message) {
|
|
410
|
+
if (_verboseLogging)
|
|
411
|
+
console.log(message);
|
|
412
|
+
}
|
|
413
|
+
function resolveConfiguredWindow(model) {
|
|
414
|
+
const key = normalizeModelKey(model);
|
|
415
|
+
if (!key)
|
|
416
|
+
return null;
|
|
417
|
+
const override = _contextWindowOverrides[key];
|
|
418
|
+
if (!override)
|
|
419
|
+
return null;
|
|
420
|
+
return override.contextTokens ?? override.contextWindow ?? null;
|
|
421
|
+
}
|
|
68
422
|
// Subagent warming mode: 'full' | 'light' | 'off'. Default: 'light'.
|
|
69
423
|
// Controls how much HyperMem context is injected into subagent sessions.
|
|
70
424
|
let _subagentWarming = 'light';
|
|
@@ -106,11 +460,34 @@ function getOverheadFallback(tier) {
|
|
|
106
460
|
* total context (history + system) exceeds the model window before trim
|
|
107
461
|
* completes, causing result stripping.
|
|
108
462
|
*/
|
|
109
|
-
function computeEffectiveBudget(tokenBudget) {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
463
|
+
function computeEffectiveBudget(tokenBudget, model) {
|
|
464
|
+
const resolved = resolveEffectiveBudget({
|
|
465
|
+
tokenBudget,
|
|
466
|
+
model,
|
|
467
|
+
contextWindowSize: _contextWindowSize,
|
|
468
|
+
contextWindowReserve: _contextWindowReserve,
|
|
469
|
+
contextWindowOverrides: _contextWindowOverrides,
|
|
470
|
+
});
|
|
471
|
+
if (resolved.source === 'runtime tokenBudget') {
|
|
472
|
+
verboseLog(`[hypermem-plugin] budget source: runtime tokenBudget=${tokenBudget}${model ? ` model=${model}` : ''}`);
|
|
473
|
+
return resolved.budget;
|
|
474
|
+
}
|
|
475
|
+
const configuredWindow = resolveConfiguredWindow(model);
|
|
476
|
+
if (configuredWindow) {
|
|
477
|
+
verboseLog(`[hypermem-plugin] budget source: contextWindowOverrides[${normalizeModelKey(model)}]=${configuredWindow}, ` +
|
|
478
|
+
`reserve=${_contextWindowReserve}, effective=${resolved.budget}`);
|
|
479
|
+
return resolved.budget;
|
|
480
|
+
}
|
|
481
|
+
verboseLog(`[hypermem-plugin] budget source: fallback contextWindowSize=${_contextWindowSize}, ` +
|
|
482
|
+
`reserve=${_contextWindowReserve}, effective=${resolved.budget}${model ? ` model=${model}` : ''}`);
|
|
483
|
+
const warningKey = normalizeModelKey(model) ?? '(unknown-model)';
|
|
484
|
+
if (!_budgetFallbackWarnings.has(warningKey)) {
|
|
485
|
+
_budgetFallbackWarnings.add(warningKey);
|
|
486
|
+
console.warn(`[hypermem-plugin] No runtime tokenBudget${model ? ` for model ${model}` : ''}; ` +
|
|
487
|
+
`falling back to contextWindowSize=${_contextWindowSize}. ` +
|
|
488
|
+
`Add contextWindowOverrides["provider/model"] to config.json or openclaw.json if detection is wrong.`);
|
|
489
|
+
}
|
|
490
|
+
return resolved.budget;
|
|
114
491
|
}
|
|
115
492
|
// ─── Plugin config cache ───────────────────────────────────────
|
|
116
493
|
// Populated from openclaw.json plugins.entries.hypercompositor.config
|
|
@@ -146,6 +523,12 @@ async function loadUserConfig() {
|
|
|
146
523
|
merged.contextWindowReserve = _pluginConfig.contextWindowReserve;
|
|
147
524
|
if (_pluginConfig.deferToolPruning != null)
|
|
148
525
|
merged.deferToolPruning = _pluginConfig.deferToolPruning;
|
|
526
|
+
if (_pluginConfig.verboseLogging != null)
|
|
527
|
+
merged.verboseLogging = _pluginConfig.verboseLogging;
|
|
528
|
+
if (_pluginConfig.contextWindowOverrides != null)
|
|
529
|
+
merged.contextWindowOverrides = { ...merged.contextWindowOverrides, ..._pluginConfig.contextWindowOverrides };
|
|
530
|
+
if (_pluginConfig.warmCacheReplayThresholdMs != null)
|
|
531
|
+
merged.warmCacheReplayThresholdMs = _pluginConfig.warmCacheReplayThresholdMs;
|
|
149
532
|
if (_pluginConfig.subagentWarming != null)
|
|
150
533
|
merged.subagentWarming = _pluginConfig.subagentWarming;
|
|
151
534
|
if (_pluginConfig.compositor)
|
|
@@ -214,10 +597,16 @@ async function getHyperMem() {
|
|
|
214
597
|
userConfig.contextWindowReserve >= 0 && userConfig.contextWindowReserve <= 0.5) {
|
|
215
598
|
_contextWindowReserve = userConfig.contextWindowReserve;
|
|
216
599
|
}
|
|
217
|
-
|
|
218
|
-
|
|
600
|
+
_deferToolPruning = userConfig.deferToolPruning === true;
|
|
601
|
+
if (_deferToolPruning) {
|
|
219
602
|
console.log('[hypermem-plugin] deferToolPruning: true — tool gradient deferred to host contextPruning');
|
|
220
603
|
}
|
|
604
|
+
_verboseLogging = userConfig.verboseLogging === true;
|
|
605
|
+
const sanitizedOverrides = sanitizeContextWindowOverrides(userConfig.contextWindowOverrides);
|
|
606
|
+
_contextWindowOverrides = sanitizedOverrides.value;
|
|
607
|
+
for (const warning of sanitizedOverrides.warnings) {
|
|
608
|
+
console.warn(`[hypermem-plugin] ${warning}`);
|
|
609
|
+
}
|
|
221
610
|
const warmingVal = userConfig.subagentWarming;
|
|
222
611
|
if (warmingVal === 'full' || warmingVal === 'light' || warmingVal === 'off') {
|
|
223
612
|
_subagentWarming = warmingVal;
|
|
@@ -230,6 +619,8 @@ async function getHyperMem() {
|
|
|
230
619
|
console.log(`[hypermem-plugin] context window: ${_contextWindowSize} tokens, ` +
|
|
231
620
|
`${Math.round(_contextWindowReserve * 100)}% reserved (${reservedTokens} tokens), ` +
|
|
232
621
|
`effective history budget: ${_contextWindowSize - reservedTokens} tokens`);
|
|
622
|
+
verboseLog(`[hypermem-plugin] warmCacheReplayThresholdMs=${_cacheReplayThresholdMs}`);
|
|
623
|
+
verboseLog(`[hypermem-plugin] contextWindowOverrides keys=${Object.keys(_contextWindowOverrides).join(', ') || '(none)'}`);
|
|
233
624
|
const instance = await HyperMem.create({
|
|
234
625
|
dataDir: _pluginConfig.dataDir ?? path.join(os.homedir(), '.openclaw/hypermem'),
|
|
235
626
|
cache: {
|
|
@@ -260,16 +651,31 @@ async function getHyperMem() {
|
|
|
260
651
|
catch {
|
|
261
652
|
return [];
|
|
262
653
|
}
|
|
263
|
-
}, {
|
|
264
|
-
|
|
654
|
+
}, {
|
|
655
|
+
enabled: true,
|
|
656
|
+
periodicInterval: userConfig?.maintenance?.periodicInterval ?? 300000,
|
|
657
|
+
maxActiveConversations: userConfig?.maintenance?.maxActiveConversations ?? 5,
|
|
658
|
+
recentConversationCooldownMs: userConfig?.maintenance?.recentConversationCooldownMs ?? 30000,
|
|
659
|
+
maxCandidatesPerPass: userConfig?.maintenance?.maxCandidatesPerPass ?? 200,
|
|
660
|
+
},
|
|
661
|
+
// Cursor fetcher: reads the SQLite-backed session cursor
|
|
265
662
|
async (agentId, sessionKey) => {
|
|
266
663
|
return instance.getSessionCursor(agentId, sessionKey);
|
|
267
664
|
},
|
|
268
665
|
// Pass vector store so new facts/episodes are embedded at index time
|
|
269
666
|
instance.getVectorStore() ?? undefined,
|
|
270
667
|
// Dreaming config — passed from hypermem user config if set
|
|
271
|
-
userConfig?.dreaming ?? {}
|
|
668
|
+
userConfig?.dreaming ?? {},
|
|
669
|
+
// KL-01: global write policy — passed from hypermem user config
|
|
670
|
+
userConfig?.globalWritePolicy ?? 'deny');
|
|
272
671
|
_indexer.start();
|
|
672
|
+
if (_verboseLogging) {
|
|
673
|
+
const mc = userConfig?.maintenance ?? {};
|
|
674
|
+
console.log(`[hypermem-plugin] maintenance settings: periodicInterval=${mc.periodicInterval ?? 300000}ms ` +
|
|
675
|
+
`maxActiveConversations=${mc.maxActiveConversations ?? 5} ` +
|
|
676
|
+
`cooldown=${mc.recentConversationCooldownMs ?? 30000}ms ` +
|
|
677
|
+
`maxCandidatesPerPass=${mc.maxCandidatesPerPass ?? 200}`);
|
|
678
|
+
}
|
|
273
679
|
}
|
|
274
680
|
catch {
|
|
275
681
|
// Non-fatal — indexer wiring can fail without breaking context assembly
|
|
@@ -510,6 +916,84 @@ function estimateTokens(text) {
|
|
|
510
916
|
return 0;
|
|
511
917
|
return Math.ceil(text.length / 4);
|
|
512
918
|
}
|
|
919
|
+
function estimateMessagePartTokens(part) {
|
|
920
|
+
if (part.type === 'image' || part.type === 'image_url') {
|
|
921
|
+
const src = part.source?.data;
|
|
922
|
+
const url = part.image_url?.url;
|
|
923
|
+
const dataStr = typeof src === 'string' ? src : (typeof url === 'string' ? url : '');
|
|
924
|
+
return Math.ceil(dataStr.length / 3);
|
|
925
|
+
}
|
|
926
|
+
if (part.type === 'toolCall' || part.type === 'tool_use') {
|
|
927
|
+
return Math.ceil(JSON.stringify(part).length / 2);
|
|
928
|
+
}
|
|
929
|
+
const textVal = typeof part.text === 'string' ? part.text
|
|
930
|
+
: typeof part.content === 'string' ? part.content
|
|
931
|
+
: part.content != null ? JSON.stringify(part.content) : null;
|
|
932
|
+
return estimateTokens(textVal);
|
|
933
|
+
}
|
|
934
|
+
function estimateMessageTokens(msg) {
|
|
935
|
+
let total = estimateTokens(typeof msg.textContent === 'string' ? msg.textContent : null);
|
|
936
|
+
if (typeof msg.content === 'string' && typeof msg.textContent !== 'string') {
|
|
937
|
+
total += estimateTokens(msg.content);
|
|
938
|
+
}
|
|
939
|
+
if (msg.toolCalls)
|
|
940
|
+
total += Math.ceil(JSON.stringify(msg.toolCalls).length / 2);
|
|
941
|
+
if (msg.toolResults)
|
|
942
|
+
total += Math.ceil(JSON.stringify(msg.toolResults).length / 2);
|
|
943
|
+
if (Array.isArray(msg.content)) {
|
|
944
|
+
total += msg.content.reduce((sum, part) => sum + estimateMessagePartTokens(part), 0);
|
|
945
|
+
}
|
|
946
|
+
return total;
|
|
947
|
+
}
|
|
948
|
+
function estimateMessageArrayTokens(messages) {
|
|
949
|
+
return messages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
|
|
950
|
+
}
|
|
951
|
+
function maybeLogPressureAccountingAnomaly(fields) {
|
|
952
|
+
const threshold = Math.max(500, Math.floor(fields.budget * 0.05));
|
|
953
|
+
const deltas = {
|
|
954
|
+
runtimeVsComposed: Math.abs(fields.runtimeTokens - fields.composedTokens),
|
|
955
|
+
redisVsComposed: Math.abs(fields.redisTokens - fields.composedTokens),
|
|
956
|
+
runtimeVsRedis: Math.abs(fields.runtimeTokens - fields.redisTokens),
|
|
957
|
+
};
|
|
958
|
+
// Post-0.6.0: "redis" is actually the L1 SQLite cache window, which lags
|
|
959
|
+
// behind the runtime message array between trim passes. Cache-vs-runtime
|
|
960
|
+
// drift is structural and harmless — the runtime array is authoritative
|
|
961
|
+
// (it's what the model sees). Only warn when runtimeVsComposed diverges,
|
|
962
|
+
// which indicates an actual trim accounting bug.
|
|
963
|
+
if (deltas.runtimeVsComposed < threshold) {
|
|
964
|
+
// Log cache drift at debug level for observability, not as a warning.
|
|
965
|
+
if (deltas.redisVsComposed >= threshold || deltas.runtimeVsRedis >= threshold) {
|
|
966
|
+
console.debug(`[hypermem-plugin] cache-drift (non-anomalous): path=${fields.path} ` +
|
|
967
|
+
`runtime=${fields.runtimeTokens} cache=${fields.redisTokens} composed=${fields.composedTokens} ` +
|
|
968
|
+
`budget=${fields.budget}`);
|
|
969
|
+
}
|
|
970
|
+
return;
|
|
971
|
+
}
|
|
972
|
+
console.warn(`[hypermem-plugin] pressure-accounting anomaly: path=${fields.path} ` +
|
|
973
|
+
`runtime=${fields.runtimeTokens} cache=${fields.redisTokens} composed=${fields.composedTokens} ` +
|
|
974
|
+
`budget=${fields.budget} threshold=${threshold}`);
|
|
975
|
+
guardTelemetry({
|
|
976
|
+
path: fields.path,
|
|
977
|
+
agentId: fields.agentId,
|
|
978
|
+
sessionKey: fields.sessionKey,
|
|
979
|
+
reason: 'pressure-accounting-anomaly',
|
|
980
|
+
});
|
|
981
|
+
}
|
|
982
|
+
function normalizeReplayRecoveryState(value) {
|
|
983
|
+
if (value == null)
|
|
984
|
+
return null;
|
|
985
|
+
if (value === '')
|
|
986
|
+
return '';
|
|
987
|
+
return isReplayState(value) ? value : null;
|
|
988
|
+
}
|
|
989
|
+
async function persistReplayRecoveryState(hm, agentId, sessionKey, nextState) {
|
|
990
|
+
try {
|
|
991
|
+
await hm.cache.setSlot(agentId, sessionKey, 'replayRecoveryState', nextState ?? '');
|
|
992
|
+
}
|
|
993
|
+
catch {
|
|
994
|
+
// Non-fatal
|
|
995
|
+
}
|
|
996
|
+
}
|
|
513
997
|
function hasStructuredToolCallMessage(msg) {
|
|
514
998
|
if (Array.isArray(msg.toolCalls) && msg.toolCalls.length > 0)
|
|
515
999
|
return true;
|
|
@@ -604,15 +1088,7 @@ async function estimateWindowTokens(hm, agentId, sessionKey) {
|
|
|
604
1088
|
?? await hm.cache.getHistory(agentId, sessionKey);
|
|
605
1089
|
if (!window || window.length === 0)
|
|
606
1090
|
return 0;
|
|
607
|
-
return window
|
|
608
|
-
let t = estimateTokens(msg.textContent);
|
|
609
|
-
// Tool payloads are dense JSON — use /2 not /4 to avoid systematic undercount
|
|
610
|
-
if (msg.toolCalls)
|
|
611
|
-
t += Math.ceil(JSON.stringify(msg.toolCalls).length / 2);
|
|
612
|
-
if (msg.toolResults)
|
|
613
|
-
t += Math.ceil(JSON.stringify(msg.toolResults).length / 2);
|
|
614
|
-
return sum + t;
|
|
615
|
-
}, 0);
|
|
1091
|
+
return estimateMessageArrayTokens(window);
|
|
616
1092
|
}
|
|
617
1093
|
catch {
|
|
618
1094
|
return 0;
|
|
@@ -716,7 +1192,7 @@ async function truncateJsonlIfNeeded(sessionFile, targetDepth, force = false, to
|
|
|
716
1192
|
function createHyperMemEngine() {
|
|
717
1193
|
return {
|
|
718
1194
|
info: {
|
|
719
|
-
id: '
|
|
1195
|
+
id: 'hypercompositor',
|
|
720
1196
|
name: 'hypermem context engine',
|
|
721
1197
|
version: '0.6.3',
|
|
722
1198
|
// We own compaction — assemble() trims to budget via the compositor safety
|
|
@@ -743,6 +1219,55 @@ function createHyperMemEngine() {
|
|
|
743
1219
|
const sk = resolveSessionKey(sessionId, sessionKey);
|
|
744
1220
|
const agentId = extractAgentId(sk);
|
|
745
1221
|
// EC1 JSONL truncation moved to maintain() — bootstrap stays fast.
|
|
1222
|
+
// B2: Session-restart detection — rotateSessionContext hook.
|
|
1223
|
+
// When the runtime starts a new session (new sessionId) for an existing
|
|
1224
|
+
// sessionKey, archive the old context head and create a fresh active
|
|
1225
|
+
// context so the new conversation starts clean. This prevents the new
|
|
1226
|
+
// session from inheriting a stale context head pointer from the prior run.
|
|
1227
|
+
//
|
|
1228
|
+
// Detection: if a conversation row exists for this sessionKey AND the
|
|
1229
|
+
// stored session_id differs from the incoming sessionId (runtime-assigned),
|
|
1230
|
+
// treat this as a session restart.
|
|
1231
|
+
//
|
|
1232
|
+
// Non-fatal: context rotation is best-effort and never blocks bootstrap.
|
|
1233
|
+
if (sessionId) {
|
|
1234
|
+
try {
|
|
1235
|
+
const _msgDb = hm.dbManager.getMessageDb(agentId);
|
|
1236
|
+
if (_msgDb) {
|
|
1237
|
+
const _existingConv = _msgDb.prepare('SELECT id, session_id FROM conversations WHERE session_key = ? LIMIT 1').get(sk);
|
|
1238
|
+
if (_existingConv &&
|
|
1239
|
+
_existingConv.session_id !== null &&
|
|
1240
|
+
_existingConv.session_id !== sessionId) {
|
|
1241
|
+
// Distinct sessionId — this is a session restart for an existing sessionKey.
|
|
1242
|
+
rotateSessionContext(_msgDb, agentId, sk, _existingConv.id);
|
|
1243
|
+
// Update the stored session_id to the new one.
|
|
1244
|
+
try {
|
|
1245
|
+
_msgDb.prepare('UPDATE conversations SET session_id = ? WHERE id = ?')
|
|
1246
|
+
.run(sessionId, _existingConv.id);
|
|
1247
|
+
}
|
|
1248
|
+
catch {
|
|
1249
|
+
// Best-effort — column may not exist in older schemas
|
|
1250
|
+
}
|
|
1251
|
+
console.log(`[hypermem-plugin] bootstrap: session restart detected for ${agentId}/${sk} ` +
|
|
1252
|
+
`(prev session_id=${_existingConv.session_id}, new=${sessionId}) — context rotated`);
|
|
1253
|
+
}
|
|
1254
|
+
else if (_existingConv && _existingConv.session_id === null && sessionId) {
|
|
1255
|
+
// Conversation exists but session_id was never recorded — stamp it now.
|
|
1256
|
+
try {
|
|
1257
|
+
_msgDb.prepare('UPDATE conversations SET session_id = ? WHERE id = ?')
|
|
1258
|
+
.run(sessionId, _existingConv.id);
|
|
1259
|
+
}
|
|
1260
|
+
catch {
|
|
1261
|
+
// Best-effort
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
catch (rotateErr) {
|
|
1267
|
+
// Non-fatal — never block bootstrap on context rotation
|
|
1268
|
+
console.warn('[hypermem-plugin] bootstrap: rotateSessionContext failed (non-fatal):', rotateErr.message);
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
746
1271
|
// Fast path: if session already has history in Redis, skip warm entirely.
|
|
747
1272
|
// sessionExists() is a single EXISTS call — sub-millisecond cost.
|
|
748
1273
|
const alreadyWarm = await hm.cache.sessionExists(agentId, sk);
|
|
@@ -846,15 +1371,20 @@ function createHyperMemEngine() {
|
|
|
846
1371
|
const warmBudget = 90_000;
|
|
847
1372
|
const warmPressure = postWarmTokens / warmBudget;
|
|
848
1373
|
if (warmPressure > 0.80) {
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
1374
|
+
// Sprint 2.2a: demote warmstart to guard telemetry.
|
|
1375
|
+
//
|
|
1376
|
+
// Previously this path performed a real trim + invalidateWindow
|
|
1377
|
+
// and emitted `event:'trim'` with path='warmstart'. Assemble
|
|
1378
|
+
// (tool-loop + normal/subagent) is the steady-state owner now,
|
|
1379
|
+
// so the first turn's assemble.* trim absorbs any remaining
|
|
1380
|
+
// post-warm pressure. Keeping the pressure check + threshold
|
|
1381
|
+
// branch here preserves observability via `event:'trim-guard'`
|
|
1382
|
+
// without mutating Redis history or the window cache.
|
|
1383
|
+
guardTelemetry({
|
|
1384
|
+
path: 'warmstart',
|
|
1385
|
+
agentId, sessionKey: sk,
|
|
1386
|
+
reason: 'warmstart-pressure-demoted',
|
|
1387
|
+
});
|
|
858
1388
|
}
|
|
859
1389
|
}
|
|
860
1390
|
catch {
|
|
@@ -957,55 +1487,74 @@ function createHyperMemEngine() {
|
|
|
957
1487
|
// ── Pre-ingestion wave guard ──────────────────────────────────────────
|
|
958
1488
|
// Tool result payloads can be 10k-50k tokens each. When a parallel tool
|
|
959
1489
|
// batch (4-6 results) lands while the session is already at 70%+, storing
|
|
960
|
-
// full payloads pushes
|
|
961
|
-
// next assemble() can trim. Use
|
|
962
|
-
// we're deciding what to write TO
|
|
963
|
-
//
|
|
964
|
-
//
|
|
1490
|
+
// full payloads pushes the hot window past the nuclear path threshold
|
|
1491
|
+
// before the next assemble() can trim. Use current hot-window state as
|
|
1492
|
+
// the pressure signal (appropriate here, we're deciding what to write TO
|
|
1493
|
+
// the window).
|
|
1494
|
+
//
|
|
1495
|
+
// Above 70%: truncate toolResult content in transcript, but keep the
|
|
1496
|
+
// full payload durable in tool_artifacts (schema v9). Stub carries
|
|
1497
|
+
// artifactId so the compositor can hydrate on demand.
|
|
1498
|
+
// Above 85%: full stub replacement in transcript, still with artifactId.
|
|
1499
|
+
// At all levels: the full payload is persisted durably. No data loss.
|
|
965
1500
|
const isInboundToolResult = msg.role === 'tool' || msg.role === 'tool_result' || msg.role === 'toolResult';
|
|
966
1501
|
if (isInboundToolResult && neutral.toolResults && neutral.toolResults.length > 0) {
|
|
967
|
-
const
|
|
1502
|
+
const windowTokens = await estimateWindowTokens(hm, agentId, sk);
|
|
968
1503
|
const effectiveBudget = computeEffectiveBudget(undefined);
|
|
969
|
-
const
|
|
970
|
-
// Error tool results are always preserved intact
|
|
1504
|
+
const windowPressure = windowTokens / effectiveBudget;
|
|
1505
|
+
// Error tool results are always preserved intact: they're small and
|
|
971
1506
|
// the model needs the error signal to understand what went wrong.
|
|
972
1507
|
const hasErrorResult = neutral.toolResults.some(tr => tr.isError);
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
1508
|
+
// Only apply degradation / artifact capture above elevated pressure.
|
|
1509
|
+
if (windowPressure > 0.70) {
|
|
1510
|
+
const MAX_TOOL_RESULT_CHARS = 500;
|
|
1511
|
+
const highPressure = windowPressure > 0.85;
|
|
1512
|
+
const reason = highPressure ? 'wave_guard_pressure_high' : 'wave_guard_pressure_elevated';
|
|
1513
|
+
// For each non-error tool result, persist the full payload as a
|
|
1514
|
+
// durable artifact first, then rewrite the transcript entry to
|
|
1515
|
+
// either a full stub (high pressure) or a truncated stub with an
|
|
1516
|
+
// artifact pointer (elevated pressure).
|
|
1517
|
+
const rewrittenResults = await Promise.all(neutral.toolResults.map(async (tr) => {
|
|
979
1518
|
if (tr.isError)
|
|
980
|
-
return tr;
|
|
1519
|
+
return tr;
|
|
1520
|
+
const content = typeof tr.content === 'string'
|
|
1521
|
+
? tr.content
|
|
1522
|
+
: JSON.stringify(tr.content);
|
|
1523
|
+
// At elevated pressure, small payloads pass through unchanged.
|
|
1524
|
+
if (!highPressure && content.length <= MAX_TOOL_RESULT_CHARS) {
|
|
1525
|
+
return tr;
|
|
1526
|
+
}
|
|
1527
|
+
let artifactId;
|
|
1528
|
+
try {
|
|
1529
|
+
const record = await hm.recordToolArtifact(agentId, sk, {
|
|
1530
|
+
toolName: tr.name || 'tool_result',
|
|
1531
|
+
toolCallId: tr.callId || undefined,
|
|
1532
|
+
isError: false,
|
|
1533
|
+
payload: content,
|
|
1534
|
+
summary: content.slice(0, 160),
|
|
1535
|
+
});
|
|
1536
|
+
artifactId = record.id;
|
|
1537
|
+
}
|
|
1538
|
+
catch (artErr) {
|
|
1539
|
+
console.warn('[hypermem-plugin] tool artifact capture failed (non-fatal):', artErr.message);
|
|
1540
|
+
}
|
|
1541
|
+
const summary = highPressure
|
|
1542
|
+
? `omitted at ${(windowPressure * 100).toFixed(0)}% window pressure`
|
|
1543
|
+
: `truncated at ${(windowPressure * 100).toFixed(0)}% pressure: ${Math.ceil(content.length / 4)} tokens`;
|
|
981
1544
|
return {
|
|
982
1545
|
...tr,
|
|
983
|
-
content:
|
|
1546
|
+
content: formatToolChainStub({
|
|
1547
|
+
name: tr.name || 'tool_result',
|
|
1548
|
+
id: tr.callId || 'unknown',
|
|
1549
|
+
status: 'ejected',
|
|
1550
|
+
reason,
|
|
1551
|
+
summary,
|
|
1552
|
+
artifactId,
|
|
1553
|
+
}),
|
|
984
1554
|
};
|
|
985
|
-
});
|
|
986
|
-
|
|
987
|
-
console.log(`[hypermem] ingest wave-guard:
|
|
988
|
-
await hm.recordAssistantMessage(agentId, sk, stubNeutral);
|
|
989
|
-
return { ingested: true };
|
|
990
|
-
}
|
|
991
|
-
else if (redisPressure > 0.70) {
|
|
992
|
-
// Elevated: store truncated stub to preserve tool call pairing in history
|
|
993
|
-
const MAX_TOOL_RESULT_CHARS = 500;
|
|
994
|
-
neutral = {
|
|
995
|
-
...neutral,
|
|
996
|
-
toolResults: neutral.toolResults.map(tr => {
|
|
997
|
-
if (tr.isError)
|
|
998
|
-
return tr; // preserve error results intact
|
|
999
|
-
const content = typeof tr.content === 'string' ? tr.content : JSON.stringify(tr.content);
|
|
1000
|
-
if (content.length <= MAX_TOOL_RESULT_CHARS)
|
|
1001
|
-
return tr;
|
|
1002
|
-
return {
|
|
1003
|
-
...tr,
|
|
1004
|
-
content: `[truncated by wave-guard at ${(redisPressure * 100).toFixed(0)}% pressure: ${Math.ceil(content.length / 4)} tokens]`,
|
|
1005
|
-
};
|
|
1006
|
-
}),
|
|
1007
|
-
};
|
|
1008
|
-
console.log(`[hypermem] ingest wave-guard: truncated toolResult (Redis pressure ${(redisPressure * 100).toFixed(0)}% > 70%)${hasErrorResult ? ' — error results preserved' : ''}`);
|
|
1555
|
+
}));
|
|
1556
|
+
neutral = { ...neutral, toolResults: rewrittenResults };
|
|
1557
|
+
console.log(`[hypermem] ingest wave-guard: ${highPressure ? 'stubbed' : 'truncated'} toolResult (window pressure ${(windowPressure * 100).toFixed(0)}% > ${highPressure ? 85 : 70}%)${hasErrorResult ? ' + error results preserved' : ''} - full payload persisted to tool_artifacts`);
|
|
1009
1558
|
}
|
|
1010
1559
|
}
|
|
1011
1560
|
await hm.recordAssistantMessage(agentId, sk, neutral);
|
|
@@ -1076,514 +1625,656 @@ function createHyperMemEngine() {
|
|
|
1076
1625
|
// pass-through that never re-injects context on tool-loop calls.
|
|
1077
1626
|
const lastMsg = messages[messages.length - 1];
|
|
1078
1627
|
const isToolLoop = lastMsg?.role === 'toolResult' || lastMsg?.role === 'tool';
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1628
|
+
// Telemetry: emit one assembleTrace at entry. Path taxonomy:
|
|
1629
|
+
// 'subagent' - session key matches the subagent pattern
|
|
1630
|
+
// 'cold' - normal full-assembly or tool-loop entry (a separate
|
|
1631
|
+
// 'replay' trace is emitted if the cache replay fast
|
|
1632
|
+
// path is taken below)
|
|
1633
|
+
// Zero-cost when HYPERMEM_TELEMETRY !== '1'.
|
|
1634
|
+
//
|
|
1635
|
+
// Trim-ownership turn context (Sprint 2): the turnId is also used to
|
|
1636
|
+
// scope the shared trim-owner claim helper so duplicate steady-state
|
|
1637
|
+
// trims in a single assemble() turn can be detected and (under
|
|
1638
|
+
// NODE_ENV='development') throw loudly. We always allocate the turnId
|
|
1639
|
+
// and open the scope — the map write is cheap and keeps enforcement
|
|
1640
|
+
// active even when telemetry is off. The scope is closed in the
|
|
1641
|
+
// finally block wrapping the full assemble body below.
|
|
1642
|
+
const _asmSk = resolveSessionKey(sessionId, sessionKey);
|
|
1643
|
+
const _asmTurnId = nextTurnId();
|
|
1644
|
+
beginTrimOwnerTurn(_asmSk, _asmTurnId);
|
|
1645
|
+
if (telemetryEnabled()) {
|
|
1646
|
+
const _agentId = extractAgentId(_asmSk);
|
|
1647
|
+
const _entryPath = _asmSk.includes('subagent:')
|
|
1648
|
+
? 'subagent'
|
|
1649
|
+
: 'cold';
|
|
1650
|
+
assembleTrace({
|
|
1651
|
+
agentId: _agentId,
|
|
1652
|
+
sessionKey: _asmSk,
|
|
1653
|
+
turnId: _asmTurnId,
|
|
1654
|
+
path: _entryPath,
|
|
1655
|
+
toolLoop: isToolLoop,
|
|
1656
|
+
msgCount: messages.length,
|
|
1657
|
+
});
|
|
1658
|
+
}
|
|
1659
|
+
try {
|
|
1660
|
+
if (isToolLoop) {
|
|
1661
|
+
// Tool-loop turns: pass messages through unchanged but still:
|
|
1662
|
+
// 1. Run the trim guardrail — tool loops accumulate history as fast
|
|
1663
|
+
// as regular turns, and the old path skipped trim entirely, leaving
|
|
1664
|
+
// the compaction guard blind (received estimatedTokens=0).
|
|
1665
|
+
// 2. Return a real estimatedTokens = windowTokens + cached overhead,
|
|
1666
|
+
// so the guard has accurate signal and can fire when needed.
|
|
1667
|
+
//
|
|
1668
|
+
// Fix (ingestion-wave): use pressure-tiered trim instead of fixed 80%.
|
|
1669
|
+
// At 91% with 5 parallel web_search calls incoming (~20-30% of budget),
|
|
1670
|
+
// a fixed 80% trim only frees 11% headroom — the wave overflows anyway
|
|
1671
|
+
// and results strip silently. Tier the trim target based on pre-trim
|
|
1672
|
+
// pressure so high-pressure sessions get real headroom before results land.
|
|
1673
|
+
const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
|
|
1674
|
+
try {
|
|
1675
|
+
const hm = await getHyperMem();
|
|
1676
|
+
const sk = resolveSessionKey(sessionId, sessionKey);
|
|
1677
|
+
const agentId = extractAgentId(sk);
|
|
1678
|
+
// ── Image / heavy-content eviction pre-pass ──────────────────────
|
|
1679
|
+
// Evict stale image payloads and large tool results before measuring
|
|
1680
|
+
// pressure. This frees tokens without compaction — images alone can
|
|
1681
|
+
// account for 30%+ of context from a single screenshot 2 turns ago.
|
|
1682
|
+
const evictionCfg = _evictionConfig;
|
|
1683
|
+
const evictionEnabled = evictionCfg?.enabled !== false;
|
|
1684
|
+
let workingMessages = messages;
|
|
1685
|
+
if (evictionEnabled) {
|
|
1686
|
+
const { messages: evicted, stats: evStats } = evictStaleContent(messages, {
|
|
1687
|
+
imageAgeTurns: evictionCfg?.imageAgeTurns,
|
|
1688
|
+
toolResultAgeTurns: evictionCfg?.toolResultAgeTurns,
|
|
1689
|
+
minTokensToEvict: evictionCfg?.minTokensToEvict,
|
|
1690
|
+
keepPreviewChars: evictionCfg?.keepPreviewChars,
|
|
1691
|
+
});
|
|
1692
|
+
workingMessages = evicted;
|
|
1693
|
+
if (evStats.tokensFreed > 0) {
|
|
1694
|
+
console.log(`[hypermem] eviction: ${evStats.imagesEvicted} images, ` +
|
|
1695
|
+
`${evStats.toolResultsEvicted} tool results, ` +
|
|
1696
|
+
`~${evStats.tokensFreed.toLocaleString()} tokens freed`);
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
// Measure pressure from the in-memory message array we are actually about
|
|
1700
|
+
// to shape and return. Redis remains a cross-check only.
|
|
1701
|
+
const runtimeTokens = estimateMessageArrayTokens(workingMessages);
|
|
1702
|
+
const redisTokens = await estimateWindowTokens(hm, agentId, sk);
|
|
1703
|
+
const replayRecovery = decideReplayRecovery({
|
|
1704
|
+
currentState: normalizeReplayRecoveryState(await hm.cache.getSlot(agentId, sk, 'replayRecoveryState').catch(() => '')),
|
|
1705
|
+
runtimeTokens,
|
|
1706
|
+
redisTokens,
|
|
1707
|
+
effectiveBudget,
|
|
1110
1708
|
});
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1709
|
+
const replayMarkerText = replayRecovery.emittedText;
|
|
1710
|
+
const preTrimTokens = runtimeTokens;
|
|
1711
|
+
const pressure = preTrimTokens / effectiveBudget;
|
|
1712
|
+
// Pressure-tiered trim targets use a single authority: the working
|
|
1713
|
+
// message array. Redis drift is logged as an anomaly, never used as
|
|
1714
|
+
// a trim trigger. Replay recovery gets its own explicit bounded mode
|
|
1715
|
+
// instead of sharing the steady-state pressure heuristics.
|
|
1716
|
+
let trimTarget;
|
|
1717
|
+
if (typeof replayRecovery.trimTargetOverride === 'number') {
|
|
1718
|
+
trimTarget = replayRecovery.trimTargetOverride;
|
|
1116
1719
|
}
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
//
|
|
1131
|
-
//
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
if (part.type === 'image' || part.type === 'image_url') {
|
|
1146
|
-
const src = part.source?.data;
|
|
1147
|
-
const url = part.image_url?.url;
|
|
1148
|
-
const dataStr = typeof src === 'string' ? src : (typeof url === 'string' ? url : '');
|
|
1149
|
-
return s + Math.ceil(dataStr.length / 3); // base64 ~1.33x bytes, ~1 token/4 bytes
|
|
1150
|
-
}
|
|
1151
|
-
return s;
|
|
1152
|
-
}, 0)
|
|
1153
|
-
: 0;
|
|
1154
|
-
return sum + textCost + toolCallCost + toolResultCost + contentCost + imageCost;
|
|
1155
|
-
}, 0);
|
|
1156
|
-
// Redis window is a useful cross-check; use whichever is higher so we never
|
|
1157
|
-
// underestimate when Redis is ahead of the runtime snapshot.
|
|
1158
|
-
const redisTokens = await estimateWindowTokens(hm, agentId, sk);
|
|
1159
|
-
const preTrimTokens = Math.max(runtimeTokens, redisTokens);
|
|
1160
|
-
const pressure = preTrimTokens / effectiveBudget;
|
|
1161
|
-
// Pressure-tiered trim targets:
|
|
1162
|
-
// JSONL-replay (EC1): runtimeTokens >> redisTokens means session
|
|
1163
|
-
// loaded from a large JSONL but Redis is cold (post-restart). Trim
|
|
1164
|
-
// aggressively to 30% so system prompt + this turn's tool results fit.
|
|
1165
|
-
// >85% (critical) → trim to 50%: blast headroom for incoming wave
|
|
1166
|
-
// >80% (high) → trim to 60%: 40% headroom
|
|
1167
|
-
// >75% (elevated) → trim to 65%: 35% headroom
|
|
1168
|
-
// ≤75% (normal) → trim to 80%: existing behaviour
|
|
1169
|
-
const isJsonlReplay = runtimeTokens > effectiveBudget * 0.80 && redisTokens < runtimeTokens * 0.20;
|
|
1170
|
-
let trimTarget;
|
|
1171
|
-
if (isJsonlReplay) {
|
|
1172
|
-
trimTarget = 0.20; // EC1: cold Redis + hot JSONL = post-restart replay, need max headroom
|
|
1173
|
-
}
|
|
1174
|
-
else if (pressure > 0.85) {
|
|
1175
|
-
trimTarget = 0.40; // critical: 60% headroom for incoming wave
|
|
1176
|
-
}
|
|
1177
|
-
else if (pressure > 0.80) {
|
|
1178
|
-
trimTarget = 0.50; // high: 50% headroom
|
|
1179
|
-
}
|
|
1180
|
-
else if (pressure > 0.75) {
|
|
1181
|
-
trimTarget = 0.55; // elevated: 45% headroom
|
|
1182
|
-
}
|
|
1183
|
-
else {
|
|
1184
|
-
trimTarget = 0.65; // normal: 35% headroom (was 0.80 — too tight)
|
|
1185
|
-
}
|
|
1186
|
-
const trimBudget = Math.floor(effectiveBudget * trimTarget);
|
|
1187
|
-
const trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimBudget);
|
|
1188
|
-
if (trimmed > 0) {
|
|
1189
|
-
await hm.cache.invalidateWindow(agentId, sk);
|
|
1190
|
-
}
|
|
1191
|
-
// Also trim the messages array itself to match the budget.
|
|
1192
|
-
// Redis trim clears the *next* turn's window. This turn's messages are
|
|
1193
|
-
// still the full runtime array — if we return them unchanged at 94%,
|
|
1194
|
-
// OpenClaw strips tool results before sending to the model regardless
|
|
1195
|
-
// of what estimatedTokens says. We need to return a slimmer array now.
|
|
1196
|
-
//
|
|
1197
|
-
// Strategy: keep system/identity messages at the front, then fill from
|
|
1198
|
-
// the back (most recent) until we hit trimBudget. Drop the middle.
|
|
1199
|
-
let trimmedMessages = workingMessages;
|
|
1200
|
-
if (pressure > trimTarget) {
|
|
1201
|
-
const msgArray = workingMessages;
|
|
1202
|
-
// Separate system messages (always keep) from conversation turns
|
|
1203
|
-
const systemMsgs = msgArray.filter(m => m.role === 'system');
|
|
1204
|
-
const convMsgs = msgArray.filter(m => m.role !== 'system');
|
|
1205
|
-
// Pre-process: inline-truncate large tool results before budget-fill drop.
|
|
1206
|
-
// A message with a 40k-token tool result that barely misses budget gets dropped
|
|
1207
|
-
// entirely. Replacing with a placeholder keeps the turn's metadata in context
|
|
1208
|
-
// while freeing the bulk of the tokens.
|
|
1209
|
-
const MAX_INLINE_TOOL_CHARS = 2000; // ~500 tokens
|
|
1210
|
-
// FIX (Bug 3): handle both NeutralMessage format (m.toolResults) and
|
|
1211
|
-
// OpenClaw native format (m.content array with type='tool_result' blocks).
|
|
1212
|
-
// Old guard `if (!m.toolResults)` skipped every native-format message.
|
|
1213
|
-
// Also fixed: replacement must be valid NeutralToolResult { callId, name, content },
|
|
1214
|
-
// not { type, text } which breaks pair-integrity downstream.
|
|
1215
|
-
const processedConvMsgs = convMsgs.map(m => {
|
|
1216
|
-
// NeutralMessage format
|
|
1217
|
-
if (m.toolResults) {
|
|
1218
|
-
const resultStr = JSON.stringify(m.toolResults);
|
|
1219
|
-
if (resultStr.length <= MAX_INLINE_TOOL_CHARS)
|
|
1220
|
-
return m;
|
|
1221
|
-
const firstResult = m.toolResults[0];
|
|
1222
|
-
return {
|
|
1223
|
-
...m,
|
|
1224
|
-
toolResults: [{
|
|
1225
|
-
callId: firstResult?.callId ?? 'unknown',
|
|
1226
|
-
name: firstResult?.name ?? 'tool',
|
|
1227
|
-
content: `[tool result truncated: ${Math.ceil(resultStr.length / 4)} tokens]`,
|
|
1228
|
-
}],
|
|
1229
|
-
};
|
|
1720
|
+
else if (pressure > 0.85) {
|
|
1721
|
+
trimTarget = 0.40; // critical: 60% headroom for incoming wave
|
|
1722
|
+
}
|
|
1723
|
+
else if (pressure > 0.80) {
|
|
1724
|
+
trimTarget = 0.50; // high: 50% headroom
|
|
1725
|
+
}
|
|
1726
|
+
else if (pressure > 0.75) {
|
|
1727
|
+
trimTarget = 0.55; // elevated: 45% headroom
|
|
1728
|
+
}
|
|
1729
|
+
else {
|
|
1730
|
+
trimTarget = 0.65; // normal: 35% headroom
|
|
1731
|
+
}
|
|
1732
|
+
const trimBudget = Math.floor(effectiveBudget * trimTarget);
|
|
1733
|
+
// Steady-state trim owner claim (Sprint 2.2a): route through the
|
|
1734
|
+
// shared helper keyed by (sessionKey, turnId). In development a
|
|
1735
|
+
// duplicate steady-state trim in the same assemble() turn throws.
|
|
1736
|
+
// In non-development a duplicate returns false; the real trim +
|
|
1737
|
+
// its `event:'trim'` emission are gated on the successful claim so
|
|
1738
|
+
// a duplicate claim is actually suppressed, not just warned.
|
|
1739
|
+
// Compact.* paths are exempt; this path is assemble-owned.
|
|
1740
|
+
const toolLoopClaimed = claimTrimOwner(sk, _asmTurnId, 'assemble.toolLoop');
|
|
1741
|
+
let trimmed = 0;
|
|
1742
|
+
let toolLoopCacheInvalidated = false;
|
|
1743
|
+
if (toolLoopClaimed) {
|
|
1744
|
+
trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimBudget);
|
|
1745
|
+
if (trimmed > 0) {
|
|
1746
|
+
await hm.cache.invalidateWindow(agentId, sk);
|
|
1747
|
+
toolLoopCacheInvalidated = true;
|
|
1230
1748
|
}
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1749
|
+
if (telemetryEnabled()) {
|
|
1750
|
+
const postTrimTokens = await estimateWindowTokens(hm, agentId, sk).catch(() => 0);
|
|
1751
|
+
trimTelemetry({
|
|
1752
|
+
path: 'assemble.toolLoop',
|
|
1753
|
+
agentId, sessionKey: sk,
|
|
1754
|
+
preTokens: preTrimTokens,
|
|
1755
|
+
postTokens: postTrimTokens,
|
|
1756
|
+
removed: trimmed,
|
|
1757
|
+
cacheInvalidated: toolLoopCacheInvalidated,
|
|
1758
|
+
reason: `pressure=${(pressure * 100).toFixed(1)}%`,
|
|
1239
1759
|
});
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1760
|
+
}
|
|
1761
|
+
}
|
|
1762
|
+
else if (telemetryEnabled()) {
|
|
1763
|
+
// Surface the suppressed-duplicate as a bounded guard record so
|
|
1764
|
+
// downstream reporting can see how often the gate fires. No
|
|
1765
|
+
// history or window mutation here.
|
|
1766
|
+
guardTelemetry({
|
|
1767
|
+
path: 'assemble.toolLoop',
|
|
1768
|
+
agentId, sessionKey: sk,
|
|
1769
|
+
reason: 'duplicate-claim-suppressed',
|
|
1770
|
+
});
|
|
1771
|
+
}
|
|
1772
|
+
// Also trim the messages array itself to match the budget.
|
|
1773
|
+
// Redis trim clears the *next* turn's window. This turn's messages are
|
|
1774
|
+
// still the full runtime array — if we return them unchanged at 94%,
|
|
1775
|
+
// OpenClaw strips tool results before sending to the model regardless
|
|
1776
|
+
// of what estimatedTokens says. We need to return a slimmer array now.
|
|
1777
|
+
//
|
|
1778
|
+
// Strategy: keep system/identity messages at the front, then fill from
|
|
1779
|
+
// the back (most recent) until we hit trimBudget. Drop the middle.
|
|
1780
|
+
let trimmedMessages = workingMessages;
|
|
1781
|
+
if (pressure > trimTarget) {
|
|
1782
|
+
const msgArray = workingMessages;
|
|
1783
|
+
// Separate system messages (always keep) from conversation turns
|
|
1784
|
+
const systemMsgs = msgArray.filter(m => m.role === 'system');
|
|
1785
|
+
const convMsgs = msgArray.filter(m => m.role !== 'system');
|
|
1786
|
+
// Pre-process: inline-truncate large tool results before budget-fill drop.
|
|
1787
|
+
// A message with a 40k-token tool result that barely misses budget gets dropped
|
|
1788
|
+
// entirely. Replacing with a placeholder keeps the turn's metadata in context
|
|
1789
|
+
// while freeing the bulk of the tokens.
|
|
1790
|
+
const MAX_INLINE_TOOL_CHARS = 2000; // ~500 tokens
|
|
1791
|
+
// FIX (Bug 3): handle both NeutralMessage format (m.toolResults) and
|
|
1792
|
+
// OpenClaw native format (m.content array with type='tool_result' blocks).
|
|
1793
|
+
// Old guard `if (!m.toolResults)` skipped every native-format message.
|
|
1794
|
+
// Also fixed: replacement must be valid NeutralToolResult { callId, name, content },
|
|
1795
|
+
// not { type, text } which breaks pair-integrity downstream.
|
|
1796
|
+
const processedConvMsgs = convMsgs.map(m => {
|
|
1797
|
+
// NeutralMessage format
|
|
1798
|
+
if (m.toolResults) {
|
|
1799
|
+
const resultStr = JSON.stringify(m.toolResults);
|
|
1800
|
+
if (resultStr.length <= MAX_INLINE_TOOL_CHARS)
|
|
1801
|
+
return m;
|
|
1802
|
+
const firstResult = m.toolResults[0];
|
|
1803
|
+
return {
|
|
1804
|
+
...m,
|
|
1805
|
+
toolResults: [{
|
|
1806
|
+
callId: firstResult?.callId ?? 'unknown',
|
|
1807
|
+
name: firstResult?.name ?? 'tool',
|
|
1808
|
+
content: `[tool result truncated: ${Math.ceil(resultStr.length / 4)} tokens]`,
|
|
1809
|
+
}],
|
|
1810
|
+
};
|
|
1811
|
+
}
|
|
1812
|
+
// OpenClaw native format
|
|
1813
|
+
if (Array.isArray(m.content)) {
|
|
1814
|
+
const content = m.content;
|
|
1815
|
+
const hasLarge = content.some(c => {
|
|
1245
1816
|
if (c.type !== 'tool_result')
|
|
1246
|
-
return
|
|
1817
|
+
return false;
|
|
1247
1818
|
const val = typeof c.content === 'string' ? c.content : JSON.stringify(c.content ?? '');
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1819
|
+
return val.length > MAX_INLINE_TOOL_CHARS;
|
|
1820
|
+
});
|
|
1821
|
+
if (!hasLarge)
|
|
1822
|
+
return m;
|
|
1823
|
+
return {
|
|
1824
|
+
...m,
|
|
1825
|
+
content: content.map(c => {
|
|
1826
|
+
if (c.type !== 'tool_result')
|
|
1827
|
+
return c;
|
|
1828
|
+
const val = typeof c.content === 'string' ? c.content : JSON.stringify(c.content ?? '');
|
|
1829
|
+
if (val.length <= MAX_INLINE_TOOL_CHARS)
|
|
1830
|
+
return c;
|
|
1831
|
+
return { ...c, content: `[tool result truncated: ${Math.ceil(val.length / 4)} tokens]` };
|
|
1832
|
+
}),
|
|
1833
|
+
};
|
|
1834
|
+
}
|
|
1835
|
+
return m;
|
|
1836
|
+
});
|
|
1837
|
+
// Fill from the back within budget
|
|
1838
|
+
let budget = trimBudget;
|
|
1839
|
+
// Reserve tokens for system messages using the same accounting
|
|
1840
|
+
// function as the final composed-array estimate.
|
|
1841
|
+
for (const sm of systemMsgs) {
|
|
1842
|
+
budget -= estimateMessageTokens(sm);
|
|
1253
1843
|
}
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
const msgCost = (m) => estimateTokens(typeof m.textContent === 'string' ? m.textContent : null)
|
|
1269
|
-
+ (m.toolCalls ? Math.ceil(JSON.stringify(m.toolCalls).length / 2) : 0)
|
|
1270
|
-
+ (m.toolResults ? Math.ceil(JSON.stringify(m.toolResults).length / 2) : 0)
|
|
1271
|
-
+ (Array.isArray(m.content) ? m.content.reduce((s, c) => {
|
|
1272
|
-
if (c.type === 'toolCall' || c.type === 'tool_use') {
|
|
1273
|
-
return s + Math.ceil(JSON.stringify(c).length / 2);
|
|
1844
|
+
const msgCost = (m) => estimateMessageTokens(m);
|
|
1845
|
+
const clusters = clusterTranscriptMessages(processedConvMsgs);
|
|
1846
|
+
const keptClusters = [];
|
|
1847
|
+
const tailCluster = clusters.length > 0 ? clusters[clusters.length - 1] : [];
|
|
1848
|
+
if (tailCluster.length > 0) {
|
|
1849
|
+
budget -= tailCluster.reduce((sum, msg) => sum + msgCost(msg), 0);
|
|
1850
|
+
keptClusters.unshift(tailCluster);
|
|
1851
|
+
}
|
|
1852
|
+
for (let i = clusters.length - 2; i >= 0 && budget > 0; i--) {
|
|
1853
|
+
const cluster = clusters[i];
|
|
1854
|
+
const clusterCost = cluster.reduce((sum, msg) => sum + msgCost(msg), 0);
|
|
1855
|
+
if (budget - clusterCost >= 0) {
|
|
1856
|
+
keptClusters.unshift(cluster);
|
|
1857
|
+
budget -= clusterCost;
|
|
1274
1858
|
}
|
|
1275
|
-
const textVal = typeof c.text === 'string' ? c.text
|
|
1276
|
-
: typeof c.content === 'string' ? c.content
|
|
1277
|
-
: c.content != null ? JSON.stringify(c.content) : null;
|
|
1278
|
-
return s + estimateTokens(textVal);
|
|
1279
|
-
}, 0) : 0);
|
|
1280
|
-
const clusters = clusterTranscriptMessages(processedConvMsgs);
|
|
1281
|
-
const keptClusters = [];
|
|
1282
|
-
const tailCluster = clusters.length > 0 ? clusters[clusters.length - 1] : [];
|
|
1283
|
-
if (tailCluster.length > 0) {
|
|
1284
|
-
budget -= tailCluster.reduce((sum, msg) => sum + msgCost(msg), 0);
|
|
1285
|
-
keptClusters.unshift(tailCluster);
|
|
1286
|
-
}
|
|
1287
|
-
for (let i = clusters.length - 2; i >= 0 && budget > 0; i--) {
|
|
1288
|
-
const cluster = clusters[i];
|
|
1289
|
-
const clusterCost = cluster.reduce((sum, msg) => sum + msgCost(msg), 0);
|
|
1290
|
-
if (budget - clusterCost >= 0) {
|
|
1291
|
-
keptClusters.unshift(cluster);
|
|
1292
|
-
budget -= clusterCost;
|
|
1293
1859
|
}
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1860
|
+
const kept = keptClusters.flat();
|
|
1861
|
+
const keptCount = processedConvMsgs.length - kept.length;
|
|
1862
|
+
if (keptCount > 0) {
|
|
1863
|
+
console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
|
|
1864
|
+
`target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs, messages=${keptCount} dropped)`);
|
|
1865
|
+
trimmedMessages = [...systemMsgs, ...kept];
|
|
1866
|
+
}
|
|
1867
|
+
else if (trimmed > 0) {
|
|
1868
|
+
console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
|
|
1869
|
+
`target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
|
|
1870
|
+
}
|
|
1301
1871
|
}
|
|
1302
1872
|
else if (trimmed > 0) {
|
|
1303
1873
|
console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
|
|
1304
1874
|
`target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
|
|
1305
1875
|
}
|
|
1876
|
+
// Apply tool gradient to compress large tool results before returning.
|
|
1877
|
+
// Skip if deferToolPruning is enabled — OpenClaw's contextPruning handles it.
|
|
1878
|
+
if (!_deferToolPruning) {
|
|
1879
|
+
// The full compose path runs applyToolGradientToWindow during reshaping;
|
|
1880
|
+
// the tool-loop path was previously skipping this, leaving a 40k-token
|
|
1881
|
+
// web_search result uncompressed every turn.
|
|
1882
|
+
try {
|
|
1883
|
+
const gradientApplied = applyToolGradientToWindow(trimmedMessages, trimBudget);
|
|
1884
|
+
trimmedMessages = gradientApplied;
|
|
1885
|
+
}
|
|
1886
|
+
catch {
|
|
1887
|
+
// Non-fatal: if gradient fails, continue with untouched trimmedMessages
|
|
1888
|
+
}
|
|
1889
|
+
} // end deferToolPruning gate
|
|
1890
|
+
// Repair orphaned tool pairs in the trimmed message list.
|
|
1891
|
+
// In-memory trim (cluster drop) can strand tool_result messages whose
|
|
1892
|
+
// paired tool_use was in a dropped cluster.
|
|
1893
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1894
|
+
trimmedMessages = repairToolPairs(trimmedMessages);
|
|
1895
|
+
const composedTokens = estimateMessageArrayTokens(trimmedMessages);
|
|
1896
|
+
maybeLogPressureAccountingAnomaly({
|
|
1897
|
+
path: 'assemble.toolLoop',
|
|
1898
|
+
agentId,
|
|
1899
|
+
sessionKey: sk,
|
|
1900
|
+
runtimeTokens: preTrimTokens,
|
|
1901
|
+
redisTokens,
|
|
1902
|
+
composedTokens,
|
|
1903
|
+
budget: effectiveBudget,
|
|
1904
|
+
});
|
|
1905
|
+
await persistReplayRecoveryState(hm, agentId, sk, replayRecovery.nextState);
|
|
1906
|
+
degradationTelemetry({
|
|
1907
|
+
agentId,
|
|
1908
|
+
sessionKey: sk,
|
|
1909
|
+
turnId: _asmTurnId,
|
|
1910
|
+
path: 'toolLoop',
|
|
1911
|
+
toolChainCoEjections: 0,
|
|
1912
|
+
toolChainStubReplacements: 0,
|
|
1913
|
+
artifactDegradations: 0,
|
|
1914
|
+
replayState: replayRecovery.emittedMarker?.state,
|
|
1915
|
+
replayReason: replayRecovery.emittedMarker?.reason,
|
|
1916
|
+
});
|
|
1917
|
+
const overhead = _overheadCache.get(sk) ?? getOverheadFallback();
|
|
1918
|
+
return {
|
|
1919
|
+
messages: trimmedMessages,
|
|
1920
|
+
estimatedTokens: composedTokens + overhead,
|
|
1921
|
+
systemPromptAddition: replayMarkerText || undefined,
|
|
1922
|
+
};
|
|
1306
1923
|
}
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1924
|
+
catch {
|
|
1925
|
+
// Non-fatal: return conservative estimate so guard doesn't go blind
|
|
1926
|
+
return {
|
|
1927
|
+
messages: messages,
|
|
1928
|
+
estimatedTokens: Math.floor(effectiveBudget * 0.8),
|
|
1929
|
+
};
|
|
1310
1930
|
}
|
|
1311
|
-
// Apply tool gradient to compress large tool results before returning.
|
|
1312
|
-
// Skip if deferToolPruning is enabled — OpenClaw's contextPruning handles it.
|
|
1313
|
-
if (!_deferToolPruning) {
|
|
1314
|
-
// The full compose path runs applyToolGradientToWindow during reshaping;
|
|
1315
|
-
// the tool-loop path was previously skipping this, leaving a 40k-token
|
|
1316
|
-
// web_search result uncompressed every turn.
|
|
1317
|
-
try {
|
|
1318
|
-
const gradientApplied = applyToolGradientToWindow(trimmedMessages, trimBudget);
|
|
1319
|
-
trimmedMessages = gradientApplied;
|
|
1320
|
-
}
|
|
1321
|
-
catch {
|
|
1322
|
-
// Non-fatal: if gradient fails, continue with untouched trimmedMessages
|
|
1323
|
-
}
|
|
1324
|
-
} // end deferToolPruning gate
|
|
1325
|
-
// Repair orphaned tool pairs in the trimmed message list.
|
|
1326
|
-
// In-memory trim (cluster drop) can strand tool_result messages whose
|
|
1327
|
-
// paired tool_use was in a dropped cluster.
|
|
1328
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1329
|
-
trimmedMessages = repairToolPairs(trimmedMessages);
|
|
1330
|
-
const windowTokens = await estimateWindowTokens(hm, agentId, sk);
|
|
1331
|
-
const overhead = _overheadCache.get(sk) ?? getOverheadFallback();
|
|
1332
|
-
return {
|
|
1333
|
-
messages: trimmedMessages,
|
|
1334
|
-
estimatedTokens: windowTokens + overhead,
|
|
1335
|
-
};
|
|
1336
|
-
}
|
|
1337
|
-
catch {
|
|
1338
|
-
// Non-fatal: return conservative estimate so guard doesn't go blind
|
|
1339
|
-
return {
|
|
1340
|
-
messages: messages,
|
|
1341
|
-
estimatedTokens: Math.floor(effectiveBudget * 0.8),
|
|
1342
|
-
};
|
|
1343
|
-
}
|
|
1344
|
-
}
|
|
1345
|
-
try {
|
|
1346
|
-
const hm = await getHyperMem();
|
|
1347
|
-
const sk = resolveSessionKey(sessionId, sessionKey);
|
|
1348
|
-
const agentId = extractAgentId(sk);
|
|
1349
|
-
// ── Subagent warming control ─────────────────────────────────────────
|
|
1350
|
-
// Detect subagent sessions by key pattern and apply warming mode.
|
|
1351
|
-
// 'off' = passthrough (no HyperMem context at all)
|
|
1352
|
-
// 'light' = facts + history only (skip library/wiki/semantic/keystones/doc chunks)
|
|
1353
|
-
// 'full' = standard compositor pipeline
|
|
1354
|
-
const isSubagent = sk.includes('subagent:');
|
|
1355
|
-
if (isSubagent && _subagentWarming === 'off') {
|
|
1356
|
-
console.log(`[hypermem-plugin] assemble: subagent warming=off, passthrough (sk: ${sk})`);
|
|
1357
|
-
return {
|
|
1358
|
-
messages: messages,
|
|
1359
|
-
estimatedTokens: messages.reduce((sum, m) => {
|
|
1360
|
-
const msg = m;
|
|
1361
|
-
return sum + Math.ceil((typeof msg.textContent === 'string' ? msg.textContent.length : 0) / 4);
|
|
1362
|
-
}, 0),
|
|
1363
|
-
};
|
|
1364
1931
|
}
|
|
1365
|
-
if (isSubagent) {
|
|
1366
|
-
console.log(`[hypermem-plugin] assemble: subagent warming=${_subagentWarming} (sk: ${sk})`);
|
|
1367
|
-
}
|
|
1368
|
-
// Resolve agent tier from fleet store (for doc chunk tier filtering)
|
|
1369
|
-
let tier;
|
|
1370
|
-
try {
|
|
1371
|
-
const agent = _fleetStore?.getAgent(agentId);
|
|
1372
|
-
tier = agent?.tier;
|
|
1373
|
-
}
|
|
1374
|
-
catch {
|
|
1375
|
-
// Non-fatal — tier filtering just won't apply
|
|
1376
|
-
}
|
|
1377
|
-
// historyDepth: derive a safe message count from the token budget.
|
|
1378
|
-
// Uses 50% of the budget for history (down from 60% — more budget goes to
|
|
1379
|
-
// L3/L4 context slots now). Floor at 50, ceiling at 200.
|
|
1380
|
-
// This is a preventive guard — the compositor's safety valve still trims
|
|
1381
|
-
// by token count post-assembly, but limiting depth up front avoids
|
|
1382
|
-
// feeding the compactor a window it can't reduce.
|
|
1383
|
-
const effectiveBudget = computeEffectiveBudget(tokenBudget);
|
|
1384
|
-
const historyDepth = Math.min(250, Math.max(50, Math.floor((effectiveBudget * 0.65) / 500)));
|
|
1385
|
-
// ── Redis guardrail: trim history to token budget ────────────────────
|
|
1386
|
-
// Prevents model-switch bloat: if an agent previously ran on a larger
|
|
1387
|
-
// context window, Redis history may exceed the current model's budget.
|
|
1388
|
-
// Trimming here (before compose) ensures the compositor never sees a
|
|
1389
|
-
// history window it can't fit. Uses 80% of budget as the trim ceiling
|
|
1390
|
-
// to leave room for system prompt, facts, and identity slots.
|
|
1391
1932
|
try {
|
|
1392
|
-
const
|
|
1393
|
-
const
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1933
|
+
const hm = await getHyperMem();
|
|
1934
|
+
const sk = resolveSessionKey(sessionId, sessionKey);
|
|
1935
|
+
const agentId = extractAgentId(sk);
|
|
1936
|
+
// ── Subagent warming control ─────────────────────────────────────────
|
|
1937
|
+
// Detect subagent sessions by key pattern and apply warming mode.
|
|
1938
|
+
// 'off' = passthrough (no HyperMem context at all)
|
|
1939
|
+
// 'light' = facts + history only (skip library/wiki/semantic/keystones/doc chunks)
|
|
1940
|
+
// 'full' = standard compositor pipeline
|
|
1941
|
+
const isSubagent = sk.includes('subagent:');
|
|
1942
|
+
if (isSubagent && _subagentWarming === 'off') {
|
|
1943
|
+
console.log(`[hypermem-plugin] assemble: subagent warming=off, passthrough (sk: ${sk})`);
|
|
1944
|
+
return {
|
|
1945
|
+
messages: messages,
|
|
1946
|
+
estimatedTokens: estimateMessageArrayTokens(messages),
|
|
1947
|
+
};
|
|
1397
1948
|
}
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
const
|
|
1417
|
-
const
|
|
1418
|
-
const
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1949
|
+
if (isSubagent) {
|
|
1950
|
+
console.log(`[hypermem-plugin] assemble: subagent warming=${_subagentWarming} (sk: ${sk})`);
|
|
1951
|
+
}
|
|
1952
|
+
// Resolve agent tier from fleet store (for doc chunk tier filtering)
|
|
1953
|
+
let tier;
|
|
1954
|
+
try {
|
|
1955
|
+
const agent = _fleetStore?.getAgent(agentId);
|
|
1956
|
+
tier = agent?.tier;
|
|
1957
|
+
}
|
|
1958
|
+
catch {
|
|
1959
|
+
// Non-fatal — tier filtering just won't apply
|
|
1960
|
+
}
|
|
1961
|
+
// historyDepth: derive a safe message count from the token budget.
|
|
1962
|
+
// Uses 50% of the budget for history (down from 60% — more budget goes to
|
|
1963
|
+
// L3/L4 context slots now). Floor at 50, ceiling at 200.
|
|
1964
|
+
// This is a preventive guard — the compositor's safety valve still trims
|
|
1965
|
+
// by token count post-assembly, but limiting depth up front avoids
|
|
1966
|
+
// feeding the compactor a window it can't reduce.
|
|
1967
|
+
const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
|
|
1968
|
+
const historyDepth = Math.min(250, Math.max(50, Math.floor((effectiveBudget * 0.65) / 500)));
|
|
1969
|
+
const runtimeEntryTokens = estimateMessageArrayTokens(messages);
|
|
1970
|
+
const redisEntryTokens = await estimateWindowTokens(hm, agentId, sk);
|
|
1971
|
+
const replayRecovery = decideReplayRecovery({
|
|
1972
|
+
currentState: normalizeReplayRecoveryState(await hm.cache.getSlot(agentId, sk, 'replayRecoveryState').catch(() => '')),
|
|
1973
|
+
runtimeTokens: runtimeEntryTokens,
|
|
1974
|
+
redisTokens: redisEntryTokens,
|
|
1975
|
+
effectiveBudget,
|
|
1976
|
+
});
|
|
1977
|
+
const replayHistoryDepth = replayRecovery.active && replayRecovery.historyDepthCap
|
|
1978
|
+
? Math.min(historyDepth, replayRecovery.historyDepthCap)
|
|
1979
|
+
: historyDepth;
|
|
1980
|
+
// ── Redis guardrail: trim history to token budget ────────────────────
|
|
1981
|
+
// Prevents model-switch bloat: if an agent previously ran on a larger
|
|
1982
|
+
// context window, Redis history may exceed the current model's budget.
|
|
1983
|
+
// Trimming here (before compose) ensures the compositor never sees a
|
|
1984
|
+
// history window it can't fit.
|
|
1985
|
+
//
|
|
1986
|
+
// Sprint 3 (AfterTurn Rebuild/Trim Loop Fix): the assemble.normal trim now
|
|
1987
|
+
// first checks whether the window is already within trimBudget. When
|
|
1988
|
+
// afterTurn's refreshRedisGradient caps the rebuilt window at the same
|
|
1989
|
+
// 0.65 fraction (Sprint 3 compositor fix), the steady-state path will
|
|
1990
|
+
// find preTokens <= trimBudget and skip the trim entirely. The trim only
|
|
1991
|
+
// fires when real excess exists (pressure spikes, model switch, cold start),
|
|
1992
|
+
// breaking the unconditional afterTurn→assemble trim churn loop.
|
|
1993
|
+
//
|
|
1994
|
+
// B3: Batch trim with growth allowance.
|
|
1995
|
+
// Trim only fires when the window has grown past the soft target by more
|
|
1996
|
+
// than TRIM_GROWTH_THRESHOLD (5%). When it does fire, trim to
|
|
1997
|
+
// softTarget * (1 - TRIM_HEADROOM_FRACTION) so the window has room to
|
|
1998
|
+
// grow for several turns before the next trim fires. This eliminates
|
|
1999
|
+
// per-turn trim churn from minor natural growth (short assistant replies,
|
|
2000
|
+
// small tool outputs) while still catching genuine pressure spikes.
|
|
2001
|
+
try {
|
|
2002
|
+
const { softBudget: trimSoftBudget, triggerBudget: trimTriggerBudget, targetBudget: trimTargetBudget, } = resolveTrimBudgets(effectiveBudget);
|
|
2003
|
+
// Always read preTokens so we can make the skip decision and emit telemetry.
|
|
2004
|
+
const preTokensNormal = await estimateWindowTokens(hm, agentId, sk).catch(() => 0);
|
|
2005
|
+
const normalPath = isSubagent ? 'assemble.subagent' : 'assemble.normal';
|
|
2006
|
+
// B3: Skip trim when window is within the growth-allowance envelope.
|
|
2007
|
+
// This replaces the Sprint 3 `windowAlreadyFits` check (which only
|
|
2008
|
+
// skipped at exactly ≤ softTarget). The growth allowance lets the
|
|
2009
|
+
// window float up to +5% before triggering, avoiding trim on every
|
|
2010
|
+
// turn that ends a few tokens above 65%.
|
|
2011
|
+
const withinGrowthEnvelope = preTokensNormal > 0 && preTokensNormal <= trimTriggerBudget;
|
|
2012
|
+
if (withinGrowthEnvelope) {
|
|
2013
|
+
if (telemetryEnabled()) {
|
|
2014
|
+
guardTelemetry({
|
|
2015
|
+
path: normalPath,
|
|
2016
|
+
agentId, sessionKey: sk,
|
|
2017
|
+
reason: 'window-within-budget-skip',
|
|
2018
|
+
});
|
|
2019
|
+
}
|
|
2020
|
+
}
|
|
2021
|
+
else {
|
|
2022
|
+
// Steady-state trim owner claim (Sprint 2.2a): route assemble.normal
|
|
2023
|
+
// and assemble.subagent through the shared helper keyed by
|
|
2024
|
+
// (sessionKey, _asmTurnId). The real trim + its `event:'trim'`
|
|
2025
|
+
// emission are gated on the claim so a duplicate steady-state claim
|
|
2026
|
+
// in the same turn is actually suppressed in production, not just
|
|
2027
|
+
// warned. In development the duplicate throws.
|
|
2028
|
+
const normalClaimed = claimTrimOwner(sk, _asmTurnId, normalPath);
|
|
2029
|
+
if (normalClaimed) {
|
|
2030
|
+
// B3: trim to the headroom target (below soft target) so the
|
|
2031
|
+
// window has room to grow before the next trim fires.
|
|
2032
|
+
const trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimTargetBudget);
|
|
2033
|
+
let normalCacheInvalidated = false;
|
|
2034
|
+
if (trimmed > 0) {
|
|
2035
|
+
// Invalidate window cache since history changed
|
|
1432
2036
|
await hm.cache.invalidateWindow(agentId, sk);
|
|
1433
|
-
|
|
1434
|
-
`${lastState.tokenBudget}→${effectiveBudget} tokens, ` +
|
|
1435
|
-
`reshaped ${currentHistory.length}→${reshaped.length} messages`);
|
|
2037
|
+
normalCacheInvalidated = true;
|
|
1436
2038
|
}
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
2039
|
+
if (telemetryEnabled()) {
|
|
2040
|
+
const postTokensNormal = await estimateWindowTokens(hm, agentId, sk).catch(() => 0);
|
|
2041
|
+
trimTelemetry({
|
|
2042
|
+
path: normalPath,
|
|
2043
|
+
agentId, sessionKey: sk,
|
|
2044
|
+
preTokens: preTokensNormal,
|
|
2045
|
+
postTokens: postTokensNormal,
|
|
2046
|
+
removed: trimmed,
|
|
2047
|
+
cacheInvalidated: normalCacheInvalidated,
|
|
2048
|
+
reason: `b3:trigger=${trimTriggerBudget},target=${trimTargetBudget}`,
|
|
2049
|
+
});
|
|
1443
2050
|
}
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
2051
|
+
}
|
|
2052
|
+
else if (telemetryEnabled()) {
|
|
2053
|
+
guardTelemetry({
|
|
2054
|
+
path: normalPath,
|
|
2055
|
+
agentId, sessionKey: sk,
|
|
2056
|
+
reason: 'duplicate-claim-suppressed',
|
|
1450
2057
|
});
|
|
1451
2058
|
}
|
|
1452
2059
|
}
|
|
1453
2060
|
}
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
2061
|
+
catch (trimErr) {
|
|
2062
|
+
// Non-fatal — compositor's budget-fit walk is the second line of defense
|
|
2063
|
+
console.warn('[hypermem-plugin] assemble: Redis trim failed (non-fatal):', trimErr.message);
|
|
2064
|
+
}
|
|
2065
|
+
// ── Budget downshift: proactive reshape pass ───────────────────────────────────────
|
|
2066
|
+
// If this session previously composed at a higher token budget (e.g. gpt-5.4
|
|
2067
|
+
// → claude-sonnet model switch), the Redis window is still sized for the old
|
|
2068
|
+
// budget. trimHistoryToTokenBudget above trims by count but skips tool
|
|
2069
|
+
// gradient logic. A downshift >10% triggers a full reshape: apply tool
|
|
2070
|
+
// gradient at the new budget + trim, then write back before compose runs.
|
|
2071
|
+
// This prevents several turns of compaction churn after a model switch.
|
|
2072
|
+
//
|
|
2073
|
+
// Bug fix: previously read from getWindow() which is always null here
|
|
2074
|
+
// (afterTurn invalidates it every turn). Also fixed: was doing setWindow()
|
|
2075
|
+
// then invalidateWindow() which is a write-then-delete no-op. Now reads
|
|
2076
|
+
// from history list and writes back via replaceHistory().
|
|
2077
|
+
let lastState = null;
|
|
1468
2078
|
try {
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
2079
|
+
lastState = await hm.cache.getModelState(agentId, sk);
|
|
2080
|
+
const DOWNSHIFT_THRESHOLD = 0.10;
|
|
2081
|
+
const isDownshift = lastState &&
|
|
2082
|
+
(lastState.tokenBudget - effectiveBudget) / lastState.tokenBudget > DOWNSHIFT_THRESHOLD;
|
|
2083
|
+
if (isDownshift && !_deferToolPruning) {
|
|
2084
|
+
// Sprint 2.2a: demote reshape to guard telemetry.
|
|
2085
|
+
//
|
|
2086
|
+
// Previously this branch re-ran applyToolGradientToWindow, wrote
|
|
2087
|
+
// back via replaceHistory, invalidated the window cache, and
|
|
2088
|
+
// stamped `reshapedAt` on model state. Assemble.* is the
|
|
2089
|
+
// steady-state owner, so the subsequent assemble.normal /
|
|
2090
|
+
// assemble.subagent trim (gated by claimTrimOwner) handles any
|
|
2091
|
+
// real downshift pressure. Keeping the detection branch preserves
|
|
2092
|
+
// observability; guardTelemetry records the would-be-reshape
|
|
2093
|
+
// without mutating history, the window, or model state.
|
|
2094
|
+
//
|
|
2095
|
+
// CRITICAL: do NOT call setModelState({ reshapedAt, … }) here.
|
|
2096
|
+
// compact() skips when reshapedAt is recent, which would cause it
|
|
2097
|
+
// to skip on the strength of a reshape that never ran.
|
|
2098
|
+
guardTelemetry({
|
|
2099
|
+
path: 'reshape',
|
|
2100
|
+
agentId, sessionKey: sk,
|
|
2101
|
+
reason: 'reshape-downshift-demoted',
|
|
2102
|
+
});
|
|
1475
2103
|
}
|
|
1476
2104
|
}
|
|
1477
|
-
catch {
|
|
1478
|
-
// Non-fatal —
|
|
2105
|
+
catch (reshapeErr) {
|
|
2106
|
+
// Non-fatal — compositor safety valve is still the last defense
|
|
2107
|
+
console.warn('[hypermem-plugin] assemble: reshape pass failed (non-fatal):', reshapeErr.message);
|
|
1479
2108
|
}
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
hm.cache.setSlot(agentId, sk, 'assemblyContextAt', nowStr),
|
|
1512
|
-
]).then(() => {
|
|
1513
|
-
// Extend TTL on the cached keys to 2× the threshold
|
|
1514
|
-
// setSlot uses the sessionTTL from RedisLayer config — acceptable fallback
|
|
1515
|
-
}).catch(() => { });
|
|
1516
|
-
}
|
|
1517
|
-
// Convert NeutralMessage[] → AgentMessage[] for the OpenClaw runtime.
|
|
1518
|
-
// neutralToAgentMessage can return a single message or an array (tool results
|
|
1519
|
-
// expand to individual ToolResultMessage objects), so we flatMap.
|
|
1520
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1521
|
-
let outputMessages = result.messages
|
|
1522
|
-
.filter(m => m.role != null)
|
|
1523
|
-
.flatMap(m => neutralToAgentMessage(m));
|
|
1524
|
-
const neutralPairStats = collectNeutralToolPairStats(result.messages);
|
|
1525
|
-
const agentPairStats = collectAgentToolPairStats(outputMessages);
|
|
1526
|
-
const toolPairAnomaly = neutralPairStats.missingToolResultCount > 0 ||
|
|
1527
|
-
neutralPairStats.orphanToolResultCount > 0 ||
|
|
1528
|
-
agentPairStats.missingToolResultCount > 0 ||
|
|
1529
|
-
agentPairStats.orphanToolResultCount > 0 ||
|
|
1530
|
-
agentPairStats.syntheticNoResultCount > 0
|
|
1531
|
-
? {
|
|
1532
|
-
stage: 'assemble',
|
|
1533
|
-
neutralMissingToolResultIds: neutralPairStats.missingToolResultIds.slice(0, 10),
|
|
1534
|
-
neutralOrphanToolResultIds: neutralPairStats.orphanToolResultIds.slice(0, 10),
|
|
1535
|
-
agentMissingToolResultIds: agentPairStats.missingToolResultIds.slice(0, 10),
|
|
1536
|
-
agentOrphanToolResultIds: agentPairStats.orphanToolResultIds.slice(0, 10),
|
|
1537
|
-
syntheticNoResultCount: agentPairStats.syntheticNoResultCount,
|
|
2109
|
+
// ── Cache replay fast path ─────────────────────────────────────────────
|
|
2110
|
+
// If the session was active recently, return the cached contextBlock
|
|
2111
|
+
// (systemPromptAddition) to produce a byte-identical system prompt and
|
|
2112
|
+
// hit the provider prefix cache (Anthropic / OpenAI).
|
|
2113
|
+
// The message window is always rebuilt fresh — only the compositor output
|
|
2114
|
+
// (contextBlock) is cached, since that's what determines prefix identity.
|
|
2115
|
+
const cacheReplayThresholdMs = _cacheReplayThresholdMs;
|
|
2116
|
+
let cachedContextBlock = null;
|
|
2117
|
+
if (cacheReplayThresholdMs > 0 && !replayRecovery.shouldSkipCacheReplay) {
|
|
2118
|
+
try {
|
|
2119
|
+
const cachedAt = await hm.cache.getSlot(agentId, sk, 'assemblyContextAt');
|
|
2120
|
+
if (cachedAt && Date.now() - parseInt(cachedAt) < cacheReplayThresholdMs) {
|
|
2121
|
+
cachedContextBlock = await hm.cache.getSlot(agentId, sk, 'assemblyContextBlock');
|
|
2122
|
+
if (cachedContextBlock) {
|
|
2123
|
+
console.log(`[hypermem-plugin] assemble: cache replay hit for ${agentId} (${Math.round((Date.now() - parseInt(cachedAt)) / 1000)}s old)`);
|
|
2124
|
+
if (telemetryEnabled()) {
|
|
2125
|
+
assembleTrace({
|
|
2126
|
+
agentId,
|
|
2127
|
+
sessionKey: sk,
|
|
2128
|
+
turnId: _asmTurnId,
|
|
2129
|
+
path: 'replay',
|
|
2130
|
+
toolLoop: isToolLoop,
|
|
2131
|
+
msgCount: messages.length,
|
|
2132
|
+
});
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
}
|
|
2136
|
+
}
|
|
2137
|
+
catch {
|
|
2138
|
+
// Non-fatal — fall through to full assembly
|
|
2139
|
+
}
|
|
1538
2140
|
}
|
|
1539
|
-
:
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
postBridgeOrphanToolResults: agentPairStats.orphanToolResultCount,
|
|
1546
|
-
}, toolPairAnomaly);
|
|
1547
|
-
if (toolPairAnomaly) {
|
|
1548
|
-
console.warn(`[hypermem-plugin] tool-pair-integrity: ${agentId}/${sk} ` +
|
|
1549
|
-
`neutralMissing=${neutralPairStats.missingToolResultCount} neutralOrphan=${neutralPairStats.orphanToolResultCount} ` +
|
|
1550
|
-
`agentMissing=${agentPairStats.missingToolResultCount} agentOrphan=${agentPairStats.orphanToolResultCount} ` +
|
|
1551
|
-
`synthetic=${agentPairStats.syntheticNoResultCount}`);
|
|
1552
|
-
}
|
|
1553
|
-
// Repair orphaned tool pairs before returning to provider.
|
|
1554
|
-
// compaction/trim passes can remove tool_use blocks without removing their
|
|
1555
|
-
// paired tool_result messages — Anthropic and Gemini reject these with 400.
|
|
1556
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1557
|
-
outputMessages = repairToolPairs(outputMessages);
|
|
1558
|
-
// Cache overhead for tool-loop turns: contextBlock tokens (chars/4) +
|
|
1559
|
-
// tier-aware estimate for runtime system prompt (SOUL.md, identity,
|
|
1560
|
-
// workspace files — not visible from inside the plugin).
|
|
1561
|
-
const contextBlockTokens = Math.ceil((result.contextBlock?.length ?? 0) / 4);
|
|
1562
|
-
const runtimeSystemTokens = getOverheadFallback(tier);
|
|
1563
|
-
_overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
|
|
1564
|
-
// Update model state for downshift detection on next turn
|
|
1565
|
-
try {
|
|
1566
|
-
await hm.cache.setModelState(agentId, sk, {
|
|
1567
|
-
model: model ?? 'unknown',
|
|
2141
|
+
// Subagent light mode: skip library/wiki/semantic/keystones/doc chunks.
|
|
2142
|
+
// Keeps: system, identity, history, active facts, output profile, tool gradient.
|
|
2143
|
+
const subagentLight = isSubagent && _subagentWarming === 'light';
|
|
2144
|
+
const request = {
|
|
2145
|
+
agentId,
|
|
2146
|
+
sessionKey: sk,
|
|
1568
2147
|
tokenBudget: effectiveBudget,
|
|
1569
|
-
|
|
1570
|
-
|
|
2148
|
+
historyDepth: lastState?.historyDepth && lastState.historyDepth < replayHistoryDepth
|
|
2149
|
+
? lastState.historyDepth
|
|
2150
|
+
: replayHistoryDepth,
|
|
2151
|
+
tier,
|
|
2152
|
+
model, // pass model for provider detection
|
|
2153
|
+
includeDocChunks: subagentLight ? false : !cachedContextBlock, // skip doc retrieval on cache hit or subagent light
|
|
2154
|
+
includeLibrary: subagentLight ? false : undefined, // skip wiki/knowledge/preferences
|
|
2155
|
+
includeSemanticRecall: subagentLight ? false : undefined, // skip vector/FTS recall
|
|
2156
|
+
includeKeystones: subagentLight ? false : undefined, // skip keystone history injection
|
|
2157
|
+
prompt,
|
|
2158
|
+
skipProviderTranslation: true, // runtime handles provider translation
|
|
2159
|
+
};
|
|
2160
|
+
const result = await hm.compose(request);
|
|
2161
|
+
degradationTelemetry({
|
|
2162
|
+
agentId,
|
|
2163
|
+
sessionKey: sk,
|
|
2164
|
+
turnId: _asmTurnId,
|
|
2165
|
+
path: 'compose',
|
|
2166
|
+
toolChainCoEjections: result.diagnostics?.toolChainCoEjections ?? 0,
|
|
2167
|
+
toolChainStubReplacements: result.diagnostics?.toolChainStubReplacements ?? 0,
|
|
2168
|
+
artifactDegradations: result.diagnostics?.artifactDegradations ?? 0,
|
|
2169
|
+
artifactOversizeThresholdTokens: result.diagnostics?.artifactOversizeThresholdTokens,
|
|
2170
|
+
replayState: replayRecovery.emittedMarker?.state,
|
|
2171
|
+
replayReason: replayRecovery.emittedMarker?.reason,
|
|
1571
2172
|
});
|
|
2173
|
+
// Use cached contextBlock if available (cache replay), otherwise use fresh result.
|
|
2174
|
+
// After a full compose, write the new contextBlock to cache for the next turn.
|
|
2175
|
+
if (cachedContextBlock) {
|
|
2176
|
+
result.contextBlock = cachedContextBlock;
|
|
2177
|
+
}
|
|
2178
|
+
else if (result.contextBlock && cacheReplayThresholdMs > 0 && !replayRecovery.shouldSkipCacheReplay && !replayRecovery.emittedText) {
|
|
2179
|
+
// Write cache async — never block the assemble() return on this
|
|
2180
|
+
const blockToCache = result.contextBlock;
|
|
2181
|
+
const nowStr = Date.now().toString();
|
|
2182
|
+
const ttlSec = Math.ceil((cacheReplayThresholdMs * 2) / 1000);
|
|
2183
|
+
Promise.all([
|
|
2184
|
+
hm.cache.setSlot(agentId, sk, 'assemblyContextBlock', blockToCache),
|
|
2185
|
+
hm.cache.setSlot(agentId, sk, 'assemblyContextAt', nowStr),
|
|
2186
|
+
]).then(() => {
|
|
2187
|
+
// Extend TTL on the cached keys to 2× the threshold
|
|
2188
|
+
// setSlot uses the sessionTTL from RedisLayer config — acceptable fallback
|
|
2189
|
+
}).catch(() => { });
|
|
2190
|
+
}
|
|
2191
|
+
if (replayRecovery.emittedText) {
|
|
2192
|
+
result.contextBlock = result.contextBlock
|
|
2193
|
+
? `${result.contextBlock}
|
|
2194
|
+
${replayRecovery.emittedText}`
|
|
2195
|
+
: replayRecovery.emittedText;
|
|
2196
|
+
}
|
|
2197
|
+
// Convert NeutralMessage[] → AgentMessage[] for the OpenClaw runtime.
|
|
2198
|
+
// neutralToAgentMessage can return a single message or an array (tool results
|
|
2199
|
+
// expand to individual ToolResultMessage objects), so we flatMap.
|
|
2200
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
2201
|
+
let outputMessages = result.messages
|
|
2202
|
+
.filter(m => m.role != null)
|
|
2203
|
+
.flatMap(m => neutralToAgentMessage(m));
|
|
2204
|
+
const neutralPairStats = collectNeutralToolPairStats(result.messages);
|
|
2205
|
+
const agentPairStats = collectAgentToolPairStats(outputMessages);
|
|
2206
|
+
const toolPairAnomaly = neutralPairStats.missingToolResultCount > 0 ||
|
|
2207
|
+
neutralPairStats.orphanToolResultCount > 0 ||
|
|
2208
|
+
agentPairStats.missingToolResultCount > 0 ||
|
|
2209
|
+
agentPairStats.orphanToolResultCount > 0 ||
|
|
2210
|
+
agentPairStats.syntheticNoResultCount > 0
|
|
2211
|
+
? {
|
|
2212
|
+
stage: 'assemble',
|
|
2213
|
+
neutralMissingToolResultIds: neutralPairStats.missingToolResultIds.slice(0, 10),
|
|
2214
|
+
neutralOrphanToolResultIds: neutralPairStats.orphanToolResultIds.slice(0, 10),
|
|
2215
|
+
agentMissingToolResultIds: agentPairStats.missingToolResultIds.slice(0, 10),
|
|
2216
|
+
agentOrphanToolResultIds: agentPairStats.orphanToolResultIds.slice(0, 10),
|
|
2217
|
+
syntheticNoResultCount: agentPairStats.syntheticNoResultCount,
|
|
2218
|
+
}
|
|
2219
|
+
: undefined;
|
|
2220
|
+
await bumpToolPairMetrics(hm, agentId, sk, {
|
|
2221
|
+
composeCount: 1,
|
|
2222
|
+
preBridgeMissingToolResults: neutralPairStats.missingToolResultCount,
|
|
2223
|
+
preBridgeOrphanToolResults: neutralPairStats.orphanToolResultCount,
|
|
2224
|
+
postBridgeMissingToolResults: agentPairStats.missingToolResultCount,
|
|
2225
|
+
postBridgeOrphanToolResults: agentPairStats.orphanToolResultCount,
|
|
2226
|
+
}, toolPairAnomaly);
|
|
2227
|
+
if (toolPairAnomaly) {
|
|
2228
|
+
console.warn(`[hypermem-plugin] tool-pair-integrity: ${agentId}/${sk} ` +
|
|
2229
|
+
`neutralMissing=${neutralPairStats.missingToolResultCount} neutralOrphan=${neutralPairStats.orphanToolResultCount} ` +
|
|
2230
|
+
`agentMissing=${agentPairStats.missingToolResultCount} agentOrphan=${agentPairStats.orphanToolResultCount} ` +
|
|
2231
|
+
`synthetic=${agentPairStats.syntheticNoResultCount}`);
|
|
2232
|
+
}
|
|
2233
|
+
// Repair orphaned tool pairs before returning to provider.
|
|
2234
|
+
// compaction/trim passes can remove tool_use blocks without removing their
|
|
2235
|
+
// paired tool_result messages — Anthropic and Gemini reject these with 400.
|
|
2236
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
2237
|
+
outputMessages = repairToolPairs(outputMessages);
|
|
2238
|
+
// Cache overhead for tool-loop turns: contextBlock tokens (chars/4) +
|
|
2239
|
+
// tier-aware estimate for runtime system prompt (SOUL.md, identity,
|
|
2240
|
+
// workspace files — not visible from inside the plugin).
|
|
2241
|
+
const contextBlockTokens = Math.ceil((result.contextBlock?.length ?? 0) / 4);
|
|
2242
|
+
const runtimeSystemTokens = getOverheadFallback(tier);
|
|
2243
|
+
_overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
|
|
2244
|
+
await persistReplayRecoveryState(hm, agentId, sk, replayRecovery.nextState);
|
|
2245
|
+
// Update model state for downshift detection on next turn
|
|
2246
|
+
try {
|
|
2247
|
+
await hm.cache.setModelState(agentId, sk, {
|
|
2248
|
+
model: model ?? 'unknown',
|
|
2249
|
+
tokenBudget: effectiveBudget,
|
|
2250
|
+
composedAt: new Date().toISOString(),
|
|
2251
|
+
historyDepth,
|
|
2252
|
+
});
|
|
2253
|
+
}
|
|
2254
|
+
catch {
|
|
2255
|
+
// Non-fatal
|
|
2256
|
+
}
|
|
2257
|
+
return {
|
|
2258
|
+
messages: outputMessages,
|
|
2259
|
+
estimatedTokens: result.tokenCount ?? 0,
|
|
2260
|
+
// systemPromptAddition injects hypermem context before the runtime system prompt.
|
|
2261
|
+
// This is the facts/recall/episodes block assembled by the compositor.
|
|
2262
|
+
systemPromptAddition: result.contextBlock || undefined,
|
|
2263
|
+
};
|
|
1572
2264
|
}
|
|
1573
|
-
catch {
|
|
1574
|
-
|
|
2265
|
+
catch (err) {
|
|
2266
|
+
console.error('[hypermem-plugin] assemble error (stack):', err.stack ?? err);
|
|
2267
|
+
throw err; // Re-throw so the runtime falls back to legacy pipeline
|
|
1575
2268
|
}
|
|
1576
|
-
return {
|
|
1577
|
-
messages: outputMessages,
|
|
1578
|
-
estimatedTokens: result.tokenCount ?? 0,
|
|
1579
|
-
// systemPromptAddition injects hypermem context before the runtime system prompt.
|
|
1580
|
-
// This is the facts/recall/episodes block assembled by the compositor.
|
|
1581
|
-
systemPromptAddition: result.contextBlock || undefined,
|
|
1582
|
-
};
|
|
1583
2269
|
}
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
2270
|
+
finally {
|
|
2271
|
+
// End the trim-owner turn scope opened at assemble entry. Paired
|
|
2272
|
+
// with beginTrimOwnerTurn(_asmSk, _asmTurnId) above; runs on every
|
|
2273
|
+
// exit path (normal return, tool-loop return, replay return, error
|
|
2274
|
+
// re-throw). Turn-scoped keying (Sprint 2.2a) means this only
|
|
2275
|
+
// removes THIS turn's slot, so concurrent same-session turns remain
|
|
2276
|
+
// isolated instead of clobbering each other.
|
|
2277
|
+
endTrimOwnerTurn(_asmSk, _asmTurnId);
|
|
1587
2278
|
}
|
|
1588
2279
|
},
|
|
1589
2280
|
/**
|
|
@@ -1609,14 +2300,16 @@ function createHyperMemEngine() {
|
|
|
1609
2300
|
// Skip if a reshape pass just ran (within last 30s) — avoid double-processing
|
|
1610
2301
|
// Cache modelState here for reuse in density-aware JSONL truncation below.
|
|
1611
2302
|
let cachedModelState = null;
|
|
2303
|
+
let model;
|
|
1612
2304
|
try {
|
|
1613
2305
|
cachedModelState = await hm.cache.getModelState(agentId, sk);
|
|
2306
|
+
model = cachedModelState?.model;
|
|
1614
2307
|
if (cachedModelState?.reshapedAt) {
|
|
1615
2308
|
const reshapeAge = Date.now() - new Date(cachedModelState.reshapedAt).getTime();
|
|
1616
2309
|
// Only skip if session is NOT critically full — nuclear path must bypass this guard.
|
|
1617
2310
|
// If currentTokenCount > 85% budget, fall through to nuclear compaction below.
|
|
1618
2311
|
const isCriticallyFull = currentTokenCount != null &&
|
|
1619
|
-
currentTokenCount > (computeEffectiveBudget(tokenBudget) * 0.85);
|
|
2312
|
+
currentTokenCount > (computeEffectiveBudget(tokenBudget, model) * 0.85);
|
|
1620
2313
|
if (reshapeAge < 30_000 && !isCriticallyFull) {
|
|
1621
2314
|
console.log(`[hypermem-plugin] compact: skipping — reshape pass ran ${reshapeAge}ms ago`);
|
|
1622
2315
|
return { ok: true, compacted: false, reason: 'reshape-recently-ran' };
|
|
@@ -1631,7 +2324,7 @@ function createHyperMemEngine() {
|
|
|
1631
2324
|
// and system prompt — our estimate only covers the history window. When they
|
|
1632
2325
|
// diverge significantly upward, the difference is "inbound overhead" consuming
|
|
1633
2326
|
// budget the history is competing for. We trim history to make room.
|
|
1634
|
-
const effectiveBudget = computeEffectiveBudget(tokenBudget);
|
|
2327
|
+
const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
|
|
1635
2328
|
const tokensBefore = await estimateWindowTokens(hm, agentId, sk);
|
|
1636
2329
|
// Target depth for both Redis trimming and JSONL truncation.
|
|
1637
2330
|
// Target 50% of budget capacity, assume ~500 tokens/message average.
|
|
@@ -1652,10 +2345,21 @@ function createHyperMemEngine() {
|
|
|
1652
2345
|
// Keeps very recent context, clears the long tool-heavy tail.
|
|
1653
2346
|
const nuclearDepth = Math.max(10, Math.floor(targetDepth * 0.20));
|
|
1654
2347
|
const nuclearBudget = Math.floor(effectiveBudget * 0.25);
|
|
1655
|
-
await hm.cache.trimHistoryToTokenBudget(agentId, sk, nuclearBudget);
|
|
2348
|
+
const nuclearRemoved = await hm.cache.trimHistoryToTokenBudget(agentId, sk, nuclearBudget);
|
|
1656
2349
|
await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
|
|
1657
2350
|
await truncateJsonlIfNeeded(sessionFile, nuclearDepth, true);
|
|
1658
2351
|
const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
|
|
2352
|
+
if (telemetryEnabled()) {
|
|
2353
|
+
trimTelemetry({
|
|
2354
|
+
path: 'compact.nuclear',
|
|
2355
|
+
agentId, sessionKey: sk,
|
|
2356
|
+
preTokens: tokensBefore,
|
|
2357
|
+
postTokens: tokensAfter,
|
|
2358
|
+
removed: nuclearRemoved,
|
|
2359
|
+
cacheInvalidated: true,
|
|
2360
|
+
reason: `currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
|
|
2361
|
+
});
|
|
2362
|
+
}
|
|
1659
2363
|
console.log(`[hypermem-plugin] compact: NUCLEAR — session at ${currentTokenCount}/${effectiveBudget} tokens ` +
|
|
1660
2364
|
`(${Math.round((currentTokenCount / effectiveBudget) * 100)}% full), ` +
|
|
1661
2365
|
`deep-trimmed JSONL to ${nuclearDepth} messages, Redis ${tokensBefore}→${tokensAfter} tokens`);
|
|
@@ -1676,6 +2380,17 @@ function createHyperMemEngine() {
|
|
|
1676
2380
|
await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
|
|
1677
2381
|
const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
|
|
1678
2382
|
await truncateJsonlIfNeeded(sessionFile, targetDepth);
|
|
2383
|
+
if (telemetryEnabled()) {
|
|
2384
|
+
trimTelemetry({
|
|
2385
|
+
path: 'compact.history',
|
|
2386
|
+
agentId, sessionKey: sk,
|
|
2387
|
+
preTokens: tokensBefore,
|
|
2388
|
+
postTokens: tokensAfter,
|
|
2389
|
+
removed: historyTrimmed,
|
|
2390
|
+
cacheInvalidated: true,
|
|
2391
|
+
reason: `inbound-overhead=${inboundOverhead}`,
|
|
2392
|
+
});
|
|
2393
|
+
}
|
|
1679
2394
|
console.log(`[hypermem-plugin] compact: large-inbound-content (gap=${inboundOverhead} tokens), ` +
|
|
1680
2395
|
`trimmed history ${tokensBefore}→${tokensAfter} (budget-for-history=${budgetForHistory}, trimmed=${historyTrimmed} messages)`);
|
|
1681
2396
|
return { ok: true, compacted: true, result: { tokensBefore, tokensAfter } };
|
|
@@ -1725,6 +2440,17 @@ function createHyperMemEngine() {
|
|
|
1725
2440
|
// Invalidate the compose cache so next assemble() re-builds from trimmed data
|
|
1726
2441
|
await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
|
|
1727
2442
|
const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
|
|
2443
|
+
if (telemetryEnabled()) {
|
|
2444
|
+
trimTelemetry({
|
|
2445
|
+
path: 'compact.history2',
|
|
2446
|
+
agentId, sessionKey: sk,
|
|
2447
|
+
preTokens: tokensBefore,
|
|
2448
|
+
postTokens: tokensAfter,
|
|
2449
|
+
removed: historyTrimmed,
|
|
2450
|
+
cacheInvalidated: true,
|
|
2451
|
+
reason: `over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
|
|
2452
|
+
});
|
|
2453
|
+
}
|
|
1728
2454
|
console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget})`);
|
|
1729
2455
|
// Density-aware JSONL truncation: derive target depth from actual avg tokens/message
|
|
1730
2456
|
// rather than assuming a fixed 500 tokens/message. This prevents a large-message
|
|
@@ -1862,7 +2588,8 @@ function createHyperMemEngine() {
|
|
|
1862
2588
|
try {
|
|
1863
2589
|
const modelState = await hm.cache.getModelState(agentId, sk);
|
|
1864
2590
|
const gradientBudget = modelState?.tokenBudget;
|
|
1865
|
-
|
|
2591
|
+
const gradientDepth = modelState?.historyDepth;
|
|
2592
|
+
await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth);
|
|
1866
2593
|
}
|
|
1867
2594
|
catch (refreshErr) {
|
|
1868
2595
|
console.warn('[hypermem-plugin] afterTurn: refreshRedisGradient failed (non-fatal):', refreshErr.message);
|
|
@@ -1887,41 +2614,43 @@ function createHyperMemEngine() {
|
|
|
1887
2614
|
try {
|
|
1888
2615
|
const modelState = await hm.cache.getModelState(agentId, sk);
|
|
1889
2616
|
if (modelState?.tokenBudget) {
|
|
1890
|
-
// Use the
|
|
1891
|
-
//
|
|
1892
|
-
|
|
1893
|
-
const runtimePostTokens = messages.reduce((sum, m) => {
|
|
1894
|
-
const msg = m;
|
|
1895
|
-
const textCost = estimateTokens(typeof msg.textContent === 'string' ? msg.textContent : null);
|
|
1896
|
-
const toolCallCost = msg.toolCalls ? Math.ceil(JSON.stringify(msg.toolCalls).length / 2) : 0;
|
|
1897
|
-
const toolResultCost = msg.toolResults ? Math.ceil(JSON.stringify(msg.toolResults).length / 2) : 0;
|
|
1898
|
-
const contentCost = Array.isArray(msg.content)
|
|
1899
|
-
? msg.content.reduce((s, c) => {
|
|
1900
|
-
const part = c;
|
|
1901
|
-
// FIX (Bug 2 — afterTurn estimator): read c.content for native format
|
|
1902
|
-
const textVal = typeof part.text === 'string' ? part.text
|
|
1903
|
-
: typeof part.content === 'string' ? part.content
|
|
1904
|
-
: part.content != null ? JSON.stringify(part.content) : null;
|
|
1905
|
-
return s + estimateTokens(textVal);
|
|
1906
|
-
}, 0)
|
|
1907
|
-
: 0;
|
|
1908
|
-
return sum + textCost + toolCallCost + toolResultCost + contentCost;
|
|
1909
|
-
}, 0);
|
|
2617
|
+
// Use the runtime message array as the only trim-pressure source.
|
|
2618
|
+
// Redis remains a drift signal for anomaly logging.
|
|
2619
|
+
const runtimePostTokens = estimateMessageArrayTokens(messages);
|
|
1910
2620
|
const redisPostTokens = await estimateWindowTokens(hm, agentId, sk);
|
|
1911
|
-
const postTurnTokens =
|
|
2621
|
+
const postTurnTokens = runtimePostTokens;
|
|
2622
|
+
maybeLogPressureAccountingAnomaly({
|
|
2623
|
+
path: 'afterTurn.secondary',
|
|
2624
|
+
agentId,
|
|
2625
|
+
sessionKey: sk,
|
|
2626
|
+
runtimeTokens: runtimePostTokens,
|
|
2627
|
+
redisTokens: redisPostTokens,
|
|
2628
|
+
composedTokens: postTurnTokens,
|
|
2629
|
+
budget: modelState.tokenBudget,
|
|
2630
|
+
});
|
|
1912
2631
|
const postTurnPressure = postTurnTokens / modelState.tokenBudget;
|
|
1913
|
-
//
|
|
1914
|
-
//
|
|
1915
|
-
//
|
|
1916
|
-
//
|
|
1917
|
-
|
|
2632
|
+
// Sprint 2.2b: demote afterTurn.secondary to guard-only no-op.
|
|
2633
|
+
//
|
|
2634
|
+
// Previously this path was a two-tier real trim that fired after
|
|
2635
|
+
// every turn ending at >80% pressure, calling
|
|
2636
|
+
// trimHistoryToTokenBudget() and emitting `event:'trim'` with
|
|
2637
|
+
// path='afterTurn.secondary'. Sprint 2 consolidates steady-state
|
|
2638
|
+
// trim ownership in assemble.* (tool-loop + normal/subagent),
|
|
2639
|
+
// with compact.* as the only exception family. The afterTurn
|
|
2640
|
+
// post-turn pressure path is now redundant: the next turn's
|
|
2641
|
+
// assemble.* trim absorbs any residual pressure.
|
|
2642
|
+
//
|
|
2643
|
+
// Pattern matches the warmstart/reshape demotion from 2.2a:
|
|
2644
|
+
// keep the pressure predicate + threshold branch so observability
|
|
2645
|
+
// via `event:'trim-guard'` is preserved, but emit NO real trim,
|
|
2646
|
+
// NO invalidateWindow, NO mutation. The compact skip-gate stays
|
|
2647
|
+
// correct because this path never stamped any model state.
|
|
1918
2648
|
if (postTurnPressure > 0.80) {
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
}
|
|
2649
|
+
guardTelemetry({
|
|
2650
|
+
path: 'afterTurn.secondary',
|
|
2651
|
+
agentId, sessionKey: sk,
|
|
2652
|
+
reason: 'afterturn-secondary-demoted',
|
|
2653
|
+
});
|
|
1925
2654
|
}
|
|
1926
2655
|
}
|
|
1927
2656
|
}
|
|
@@ -2237,20 +2966,44 @@ const hypercompositorConfigSchema = z.object({
|
|
|
2237
2966
|
contextWindowReserve: z.number().min(0).max(0.5).optional(),
|
|
2238
2967
|
/** Defer tool pruning to OpenClaw's contextPruning. Default: false */
|
|
2239
2968
|
deferToolPruning: z.boolean().optional(),
|
|
2969
|
+
/** Emit detailed budget-source and trim-decision logs. Default: false */
|
|
2970
|
+
verboseLogging: z.boolean().optional(),
|
|
2971
|
+
/** Manual per-model context window fallback table used when runtime tokenBudget is missing. */
|
|
2972
|
+
contextWindowOverrides: z.record(z.string().regex(CONTEXT_WINDOW_OVERRIDE_KEY_REGEX, 'key must be "provider/model"'), contextWindowOverrideSchema).optional(),
|
|
2973
|
+
/** Treat cache replay snapshots older than this as stale. Default: 120000ms */
|
|
2974
|
+
warmCacheReplayThresholdMs: z.number().int().positive().optional(),
|
|
2240
2975
|
/** Subagent context injection: 'full' | 'light' | 'off'. Default: 'light' */
|
|
2241
2976
|
subagentWarming: z.enum(['full', 'light', 'off']).optional(),
|
|
2242
2977
|
/** Compositor tuning overrides */
|
|
2243
2978
|
compositor: z.object({
|
|
2979
|
+
budgetFraction: z.number().min(0).max(1).optional(),
|
|
2980
|
+
reserveFraction: z.number().min(0).max(1).optional(),
|
|
2981
|
+
historyFraction: z.number().min(0).max(1).optional(),
|
|
2982
|
+
memoryFraction: z.number().min(0).max(1).optional(),
|
|
2244
2983
|
defaultTokenBudget: z.number().int().positive().optional(),
|
|
2245
2984
|
maxHistoryMessages: z.number().int().positive().optional(),
|
|
2246
2985
|
maxFacts: z.number().int().positive().optional(),
|
|
2986
|
+
maxExpertisePatterns: z.number().int().positive().optional(),
|
|
2247
2987
|
maxCrossSessionContext: z.number().int().nonnegative().optional(),
|
|
2988
|
+
maxTotalTriggerTokens: z.number().int().nonnegative().optional(),
|
|
2248
2989
|
maxRecentToolPairs: z.number().int().nonnegative().optional(),
|
|
2249
2990
|
maxProseToolPairs: z.number().int().nonnegative().optional(),
|
|
2250
2991
|
warmHistoryBudgetFraction: z.number().min(0).max(1).optional(),
|
|
2992
|
+
contextWindowReserve: z.number().min(0).max(1).optional(),
|
|
2993
|
+
dynamicReserveTurnHorizon: z.number().int().positive().optional(),
|
|
2994
|
+
dynamicReserveMax: z.number().min(0).max(1).optional(),
|
|
2995
|
+
dynamicReserveEnabled: z.boolean().optional(),
|
|
2251
2996
|
keystoneHistoryFraction: z.number().min(0).max(1).optional(),
|
|
2252
2997
|
keystoneMaxMessages: z.number().int().nonnegative().optional(),
|
|
2253
2998
|
keystoneMinSignificance: z.number().min(0).max(1).optional(),
|
|
2999
|
+
targetBudgetFraction: z.number().min(0).max(1).optional(),
|
|
3000
|
+
enableFOS: z.boolean().optional(),
|
|
3001
|
+
enableMOD: z.boolean().optional(),
|
|
3002
|
+
hyperformProfile: z.enum(['light', 'standard', 'full', 'starter', 'fleet']).optional(),
|
|
3003
|
+
outputProfile: z.enum(['light', 'standard', 'full', 'starter', 'fleet']).optional(),
|
|
3004
|
+
outputStandard: z.enum(['light', 'standard', 'full', 'starter', 'fleet']).optional(),
|
|
3005
|
+
wikiTokenCap: z.number().int().positive().optional(),
|
|
3006
|
+
zigzagOrdering: z.boolean().optional(),
|
|
2254
3007
|
}).optional(),
|
|
2255
3008
|
/** Image/tool eviction settings */
|
|
2256
3009
|
eviction: z.object({
|
|
@@ -2280,22 +3033,22 @@ const engine = createHyperMemEngine();
|
|
|
2280
3033
|
export default definePluginEntry({
|
|
2281
3034
|
id: 'hypercompositor',
|
|
2282
3035
|
name: 'HyperCompositor — context engine',
|
|
2283
|
-
description: 'Four-layer memory architecture for OpenClaw agents:
|
|
3036
|
+
description: 'Four-layer memory architecture for OpenClaw agents: SQLite hot cache, message history, vector search, and structured library.',
|
|
2284
3037
|
kind: 'context-engine',
|
|
2285
3038
|
configSchema: buildPluginConfigSchema(hypercompositorConfigSchema),
|
|
2286
3039
|
register(api) {
|
|
2287
3040
|
// ── Resolve plugin config from openclaw.json ──
|
|
2288
3041
|
const pluginCfg = (api.pluginConfig ?? {});
|
|
2289
3042
|
_pluginConfig = pluginCfg;
|
|
2290
|
-
// ── Resolve HYPERMEM_PATH: pluginConfig >
|
|
3043
|
+
// ── Resolve HYPERMEM_PATH: pluginConfig > ESM package resolve > dev fallback ──
|
|
2291
3044
|
if (pluginCfg.hyperMemPath) {
|
|
2292
3045
|
HYPERMEM_PATH = pluginCfg.hyperMemPath;
|
|
2293
3046
|
console.log(`[hypermem-plugin] Using configured hyperMemPath: ${HYPERMEM_PATH}`);
|
|
2294
3047
|
}
|
|
2295
3048
|
else {
|
|
2296
3049
|
try {
|
|
2297
|
-
|
|
2298
|
-
|
|
3050
|
+
const resolvedUrl = import.meta.resolve('@psiclawops/hypermem');
|
|
3051
|
+
HYPERMEM_PATH = resolvedUrl.startsWith('file:') ? fileURLToPath(resolvedUrl) : resolvedUrl;
|
|
2299
3052
|
}
|
|
2300
3053
|
catch {
|
|
2301
3054
|
// Dev fallback: resolve relative to plugin directory
|
|
@@ -2305,6 +3058,55 @@ export default definePluginEntry({
|
|
|
2305
3058
|
}
|
|
2306
3059
|
}
|
|
2307
3060
|
api.registerContextEngine('hypercompositor', () => engine);
|
|
3061
|
+
// ── HyperForm config dir init ──
|
|
3062
|
+
// Copy defaults and guide to ~/.openclaw/hypermem/config/ on every load.
|
|
3063
|
+
// Defaults are overwritten on plugin update. Active config files are never touched.
|
|
3064
|
+
void (async () => {
|
|
3065
|
+
try {
|
|
3066
|
+
const dataDir = _pluginConfig.dataDir ?? path.join(os.homedir(), '.openclaw/hypermem');
|
|
3067
|
+
const configDir = path.join(dataDir, 'config');
|
|
3068
|
+
await fs.mkdir(configDir, { recursive: true });
|
|
3069
|
+
const __pluginDir = path.dirname(fileURLToPath(import.meta.url));
|
|
3070
|
+
const defaultsSrc = path.resolve(__pluginDir, '../../../config-defaults');
|
|
3071
|
+
const defaultFiles = [
|
|
3072
|
+
'hyperform-fos-defaults.json',
|
|
3073
|
+
'hyperform-mod-defaults.json',
|
|
3074
|
+
'HYPERFORM-GUIDE.md',
|
|
3075
|
+
];
|
|
3076
|
+
for (const fname of defaultFiles) {
|
|
3077
|
+
const src = path.join(defaultsSrc, fname);
|
|
3078
|
+
const dest = path.join(configDir, fname);
|
|
3079
|
+
try {
|
|
3080
|
+
await fs.copyFile(src, dest);
|
|
3081
|
+
}
|
|
3082
|
+
catch {
|
|
3083
|
+
// defaults may not exist in dev builds — non-fatal
|
|
3084
|
+
}
|
|
3085
|
+
}
|
|
3086
|
+
// On first install, copy defaults as active config if active files don't exist
|
|
3087
|
+
for (const [src, dest] of [
|
|
3088
|
+
['hyperform-fos-defaults.json', 'hyperform-fos.json'],
|
|
3089
|
+
['hyperform-mod-defaults.json', 'hyperform-mod.json'],
|
|
3090
|
+
]) {
|
|
3091
|
+
const destPath = path.join(configDir, dest);
|
|
3092
|
+
try {
|
|
3093
|
+
await fs.access(destPath);
|
|
3094
|
+
}
|
|
3095
|
+
catch {
|
|
3096
|
+
// Active config doesn't exist — copy defaults as starting point
|
|
3097
|
+
try {
|
|
3098
|
+
await fs.copyFile(path.join(configDir, src), destPath);
|
|
3099
|
+
}
|
|
3100
|
+
catch {
|
|
3101
|
+
// non-fatal
|
|
3102
|
+
}
|
|
3103
|
+
}
|
|
3104
|
+
}
|
|
3105
|
+
}
|
|
3106
|
+
catch {
|
|
3107
|
+
// non-fatal — HyperForm config init is best-effort
|
|
3108
|
+
}
|
|
3109
|
+
})();
|
|
2308
3110
|
// P1.7: Bind TaskFlow runtime for task visibility — best-effort.
|
|
2309
3111
|
// Guard: api.runtime.taskFlow may not exist on older OpenClaw versions.
|
|
2310
3112
|
try {
|