@psiclawops/hypercompositor 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4,7 +4,7 @@
4
4
  * Implements OpenClaw's ContextEngine interface backed by hypermem's
5
5
  * four-layer memory architecture:
6
6
  *
7
- * L1 Redis — hot session working memory
7
+ * L1 CacheSQLite `:memory:` hot session working memory
8
8
  * L2 Messages — per-agent conversation history (SQLite)
9
9
  * L3 Vectors — semantic + keyword search (KNN + FTS5)
10
10
  * L4 Library — facts, knowledge, episodes, preferences
@@ -14,7 +14,7 @@
14
14
  * assemble() → compositor builds context from all four layers
15
15
  * compact() → delegate to runtime (ownsCompaction: false)
16
16
  * afterTurn() → trigger background indexer (fire-and-forget)
17
- * bootstrap() → warm Redis session, register agent in fleet
17
+ * bootstrap() → warm hot-cache session, register agent in fleet
18
18
  * dispose() → close hypermem connections
19
19
  *
20
20
  * Session key format expected: "agent:<agentId>:<channel>:<name>"
@@ -22,22 +22,293 @@
22
22
  import { definePluginEntry } from 'openclaw/plugin-sdk/plugin-entry';
23
23
  import { buildPluginConfigSchema } from 'openclaw/plugin-sdk/core';
24
24
  import { z } from 'zod';
25
- import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, canPersistReshapedHistory, OPENCLAW_BOOTSTRAP_FILES } from '@psiclawops/hypermem';
25
+ import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, OPENCLAW_BOOTSTRAP_FILES, rotateSessionContext, TRIM_SOFT_TARGET, TRIM_GROWTH_THRESHOLD, TRIM_HEADROOM_FRACTION, resolveTrimBudgets, formatToolChainStub, decideReplayRecovery, isReplayState, } from '@psiclawops/hypermem';
26
26
  import { evictStaleContent } from '@psiclawops/hypermem/image-eviction';
27
27
  import { repairToolPairs } from '@psiclawops/hypermem';
28
28
  import os from 'os';
29
29
  import path from 'path';
30
30
  import fs from 'fs/promises';
31
- import { createRequire } from 'module';
32
31
  import { fileURLToPath } from 'url';
32
+ import fsSync from 'fs';
33
+ let _telemetryStream = null;
34
+ let _telemetryStreamFailed = false;
35
+ let _telemetryTurnCounter = 0;
36
+ function telemetryEnabled() {
37
+ return process.env.HYPERMEM_TELEMETRY === '1';
38
+ }
39
+ function getTelemetryStream() {
40
+ if (_telemetryStream || _telemetryStreamFailed)
41
+ return _telemetryStream;
42
+ try {
43
+ const p = process.env.HYPERMEM_TELEMETRY_PATH || './hypermem-telemetry.jsonl';
44
+ _telemetryStream = fsSync.createWriteStream(p, { flags: 'a' });
45
+ _telemetryStream.on('error', () => {
46
+ _telemetryStreamFailed = true;
47
+ _telemetryStream = null;
48
+ });
49
+ }
50
+ catch {
51
+ _telemetryStreamFailed = true;
52
+ _telemetryStream = null;
53
+ }
54
+ return _telemetryStream;
55
+ }
56
+ function trimTelemetry(fields) {
57
+ if (!telemetryEnabled())
58
+ return;
59
+ const stream = getTelemetryStream();
60
+ if (!stream)
61
+ return;
62
+ try {
63
+ const record = {
64
+ event: 'trim',
65
+ ts: new Date().toISOString(),
66
+ ...fields,
67
+ };
68
+ stream.write(JSON.stringify(record) + '\n');
69
+ }
70
+ catch {
71
+ // Telemetry must never throw
72
+ }
73
+ }
74
+ function assembleTrace(fields) {
75
+ if (!telemetryEnabled())
76
+ return;
77
+ const stream = getTelemetryStream();
78
+ if (!stream)
79
+ return;
80
+ try {
81
+ const record = {
82
+ event: 'assemble',
83
+ ts: new Date().toISOString(),
84
+ ...fields,
85
+ };
86
+ stream.write(JSON.stringify(record) + '\n');
87
+ }
88
+ catch {
89
+ // Telemetry must never throw
90
+ }
91
+ }
92
+ function degradationTelemetry(fields) {
93
+ if (!telemetryEnabled())
94
+ return;
95
+ const stream = getTelemetryStream();
96
+ if (!stream)
97
+ return;
98
+ try {
99
+ const record = {
100
+ event: 'degradation',
101
+ ts: new Date().toISOString(),
102
+ ...fields,
103
+ };
104
+ stream.write(JSON.stringify(record) + '\n');
105
+ }
106
+ catch {
107
+ // Telemetry must never throw
108
+ }
109
+ }
110
+ function nextTurnId() {
111
+ _telemetryTurnCounter = (_telemetryTurnCounter + 1) >>> 0;
112
+ return `${Date.now().toString(36)}-${_telemetryTurnCounter.toString(36)}`;
113
+ }
114
+ // ─── Trim Ownership (Phase A Sprint 2) ───────────────────────────
115
+ //
116
+ // Sprint 2 consolidates trim ownership: the assemble-owned family
117
+ // (assemble.normal, assemble.subagent, assemble.toolLoop) is the single
118
+ // steady-state trim owner. Compact paths (compact.nuclear, compact.history,
119
+ // compact.history2) are exempted — they're exception-only. warmstart,
120
+ // reshape, and afterTurn.secondary are demoted in sub-tasks 2.2 and 2.3.
121
+ //
122
+ // This block adds:
123
+ // 1. A per-session turn context (beginTrimOwnerTurn/endTrimOwnerTurn) scoped
124
+ // by the main assemble() flow.
125
+ // 2. A single shared trimOwner claim helper that lets exactly one **real**
126
+ // steady-state trim claim ownership per turn and throws loudly in
127
+ // development (NODE_ENV='development') when a second real steady-state
128
+ // trim path attempts to claim the same turn.
129
+ // 3. A non-counting guard/noop telemetry helper (same JSONL channel) that
130
+ // demoted paths can emit to preserve visibility of warm-start/reshape
131
+ // without consuming a steady-state owner slot.
132
+ //
133
+ // Sub-task 2.1 only adds the scaffolding + invariant; no existing trim call
134
+ // is removed here. Demotions of warm-start/reshape/afterTurn.secondary land
135
+ // in 2.2 and 2.3.
136
+ const STEADY_STATE_TRIM_PATHS = new Set([
137
+ 'assemble.normal',
138
+ 'assemble.subagent',
139
+ 'assemble.toolLoop',
140
+ ]);
141
+ const COMPACT_TRIM_PATHS = new Set([
142
+ 'compact.nuclear',
143
+ 'compact.history',
144
+ 'compact.history2',
145
+ ]);
146
+ // ─── Guard-telemetry reason enum (Phase A Sprint 2.2a) ──────────────────
147
+ // Plugin-local, constant-backed union of allowed `reason` values on
148
+ // `event: 'trim-guard'` records. Keeping this bounded prevents ad-hoc
149
+ // numeric/user strings from leaking into the telemetry JSONL channel and
150
+ // makes downstream reporting stable. Do NOT widen this to arbitrary
151
+ // strings — add a new member here first, then reference it at call sites.
152
+ //
153
+ // Scope note: this union is plugin-local (per planner 2.2 §C). It is not
154
+ // re-exported via `src/types.ts` because the shared public types surface
155
+ // must not gain a telemetry-reason enum as part of this sprint.
156
+ const GUARD_TELEMETRY_REASONS = [
157
+ 'warmstart-pressure-demoted',
158
+ 'reshape-downshift-demoted',
159
+ 'duplicate-claim-suppressed',
160
+ 'afterturn-secondary-demoted',
161
+ 'window-within-budget-skip',
162
+ 'pressure-accounting-anomaly',
163
+ ];
164
+ // Turn-scoped ownership map (Phase A Sprint 2.2a).
165
+ //
166
+ // Previously keyed by `sessionKey` alone, which clobbered overlapping same-
167
+ // session assemble() flows (Sprint 2.1 security eval, medium finding #1).
168
+ // Now keyed by the composite `sessionKey|turnId` so two concurrent turns on
169
+ // the same session key remain isolated: each `beginTrimOwnerTurn` gets its
170
+ // own slot, `claimTrimOwner` checks the exact turn's slot, and
171
+ // `endTrimOwnerTurn` removes only that turn's slot.
172
+ const _trimOwnerTurns = new Map();
173
+ function _trimOwnerKey(sessionKey, turnId) {
174
+ return `${sessionKey}|${turnId}`;
175
+ }
176
+ function beginTrimOwnerTurn(sessionKey, turnId) {
177
+ _trimOwnerTurns.set(_trimOwnerKey(sessionKey, turnId), { turnId });
178
+ }
179
+ function endTrimOwnerTurn(sessionKey, turnId) {
180
+ _trimOwnerTurns.delete(_trimOwnerKey(sessionKey, turnId));
181
+ }
182
+ /**
183
+ * Claim the steady-state trim owner slot for the current turn.
184
+ *
185
+ * Behavior:
186
+ * - compact.* paths are exception-only and pass through without claiming.
187
+ * - Non-steady paths (warmstart, reshape, afterTurn.secondary) also pass
188
+ * through without claiming. Demoted/no-op sites should normally emit
189
+ * via guardTelemetry() instead so they stay visible without contending
190
+ * for ownership (sub-tasks 2.2 and 2.3 wire this in).
191
+ * - Steady-state paths (assemble.normal, assemble.subagent,
192
+ * assemble.toolLoop) claim the single owner slot for the current turn.
193
+ * The first such claim succeeds. A second steady-state claim against the
194
+ * same turn is a duplicate-turn violation: it throws loudly under
195
+ * NODE_ENV='development' and warns in other environments (returning
196
+ * false so non-dev runtimes keep working).
197
+ *
198
+ * Callers should invoke this immediately before the real
199
+ * trimHistoryToTokenBudget() call. Guard telemetry does NOT route through
200
+ * this helper — it is explicitly excluded from the steady-state invariant.
201
+ *
202
+ * Returns true when the claim succeeds (or is exempt); false on a swallowed
203
+ * duplicate claim in non-development. In development the duplicate throws
204
+ * before returning.
205
+ */
206
+ function claimTrimOwner(sessionKey, turnId, path) {
207
+ // Compact paths: exempt — they represent an exceptional pressure path and
208
+ // never contend for the steady-state slot.
209
+ if (COMPACT_TRIM_PATHS.has(path))
210
+ return true;
211
+ // Non-steady paths: pass through (warmstart/reshape/afterTurn.secondary).
212
+ // Warmstart + reshape are demoted to guardTelemetry in 2.2a.
213
+ if (!STEADY_STATE_TRIM_PATHS.has(path))
214
+ return true;
215
+ const ctx = _trimOwnerTurns.get(_trimOwnerKey(sessionKey, turnId));
216
+ if (!ctx)
217
+ return true; // No active assemble-turn scope — nothing to enforce here.
218
+ if (ctx.claimedPath) {
219
+ const msg = `[hypermem-plugin] trimOwner: duplicate steady-state trim claim in turn ` +
220
+ `${ctx.turnId} (sessionKey=${sessionKey}): first=${ctx.claimedPath} second=${path}`;
221
+ if (process.env.NODE_ENV === 'development') {
222
+ throw new Error(msg);
223
+ }
224
+ // Non-development: do not throw, but leave a loud trail so telemetry
225
+ // surfaces the violation. Callers MUST honor the false return and skip
226
+ // the second real trim (Sprint 2.2a enforcement).
227
+ console.warn(msg);
228
+ return false;
229
+ }
230
+ ctx.claimedPath = path;
231
+ return true;
232
+ }
233
+ /**
234
+ * Non-counting guard / noop telemetry.
235
+ *
236
+ * Emits a `trim-guard` record on the same JSONL channel as trimTelemetry()
237
+ * but with a distinct event name so per-turn reporting (scripts/trim-report.mjs,
238
+ * future ownership dashboards) can keep it out of `trimCount`. Used by
239
+ * demoted/no-op call sites in 2.2 and 2.3 so their path labels stay visible
240
+ * in telemetry without consuming a steady-state owner slot.
241
+ *
242
+ * Zero-cost when telemetry is off. Never throws.
243
+ */
244
+ function guardTelemetry(fields) {
245
+ if (!telemetryEnabled())
246
+ return;
247
+ const stream = getTelemetryStream();
248
+ if (!stream)
249
+ return;
250
+ try {
251
+ const record = {
252
+ event: 'trim-guard',
253
+ ts: new Date().toISOString(),
254
+ ...fields,
255
+ };
256
+ stream.write(JSON.stringify(record) + '\n');
257
+ }
258
+ catch {
259
+ // Telemetry must never throw
260
+ }
261
+ }
262
+ // ─── B3: Batch trim with growth allowance ────────────────────────────────
263
+ // Trim fires only when window usage exceeds the soft target by this fraction.
264
+ // Small natural growth (e.g. a short assistant reply) never triggers a trim;
265
+ // only genuine spikes (model switch, cold-start, multi-tool overrun) do.
266
+ // When trim fires, the target is (softTarget * (1 - headroomFraction)) so the
267
+ // window has room to grow for several turns before the next trim fires.
268
+ //
269
+ // softTarget (0.65): matches refreshRedisGradient → steady state never trims
270
+ // growthThreshold (0.05): 5% overage buffer before trim fires
271
+ // headroomFraction (0.10): trim target = softTarget * 0.90 → ~58.5% of budget
272
+ // Canonical values live in the core package so plugin trim guards and compose
273
+ // paths cannot drift.
274
+ // Test-only: expose emitters so the unit test can exercise them directly
275
+ // without standing up a real session. Wrapped in a getter object so the flag
276
+ // guard still runs (zero-cost when off).
277
+ export const __telemetryForTests = {
278
+ trimTelemetry,
279
+ assembleTrace,
280
+ degradationTelemetry,
281
+ guardTelemetry,
282
+ nextTurnId,
283
+ beginTrimOwnerTurn,
284
+ endTrimOwnerTurn,
285
+ claimTrimOwner,
286
+ // B3/C0.1: Expose the canonical policy surface so tests can assert against
287
+ // the shared source of truth instead of embedding formulas locally.
288
+ TRIM_SOFT_TARGET,
289
+ TRIM_GROWTH_THRESHOLD,
290
+ TRIM_HEADROOM_FRACTION,
291
+ resolveTrimBudgets,
292
+ reset() {
293
+ if (_telemetryStream) {
294
+ try {
295
+ _telemetryStream.end();
296
+ }
297
+ catch { /* ignore */ }
298
+ }
299
+ _telemetryStream = null;
300
+ _telemetryStreamFailed = false;
301
+ _telemetryTurnCounter = 0;
302
+ _trimOwnerTurns.clear();
303
+ },
304
+ };
33
305
  // ─── hypermem singleton ────────────────────────────────────────
34
306
  // Runtime load is dynamic (hypermem is a sibling package loaded from repo dist,
35
307
  // not installed via npm). Types come from the core package devDependency.
36
308
  // This pattern keeps the runtime path stable while TypeScript resolves types
37
309
  // from the canonical source — no more local shim drift.
38
- // Resolved at init time: pluginConfig.hyperMemPath > require.resolve('@psiclawops/hypermem') > dev fallback
310
+ // Resolved at init time: pluginConfig.hyperMemPath > import.meta.resolve('@psiclawops/hypermem') > dev fallback
39
311
  let HYPERMEM_PATH = '';
40
- const require = createRequire(import.meta.url);
41
312
  let _hm = null;
42
313
  let _hmInitPromise = null;
43
314
  let _indexer = null;
@@ -65,6 +336,89 @@ let _evictionConfig;
65
336
  let _contextWindowSize = 128_000;
66
337
  let _contextWindowReserve = 0.25;
67
338
  let _deferToolPruning = false;
339
+ let _verboseLogging = false;
340
+ let _contextWindowOverrides = {};
341
+ const _budgetFallbackWarnings = new Set();
342
+ export const CONTEXT_WINDOW_OVERRIDE_KEY_REGEX = /^[^/\s]+\/[^/\s]+$/;
343
+ const contextWindowOverrideSchema = z.object({
344
+ contextTokens: z.number().int().positive().optional(),
345
+ contextWindow: z.number().int().positive().optional(),
346
+ }).superRefine((value, ctx) => {
347
+ if (value.contextTokens == null && value.contextWindow == null) {
348
+ ctx.addIssue({
349
+ code: z.ZodIssueCode.custom,
350
+ message: 'override must declare contextTokens, contextWindow, or both',
351
+ });
352
+ }
353
+ if (value.contextTokens != null &&
354
+ value.contextWindow != null &&
355
+ value.contextTokens > value.contextWindow) {
356
+ ctx.addIssue({
357
+ code: z.ZodIssueCode.custom,
358
+ message: 'contextTokens must be less than or equal to contextWindow',
359
+ });
360
+ }
361
+ });
362
+ export function sanitizeContextWindowOverrides(raw) {
363
+ if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
364
+ return { value: {}, warnings: [] };
365
+ }
366
+ const value = {};
367
+ const warnings = [];
368
+ for (const [key, candidate] of Object.entries(raw)) {
369
+ const normalizedKey = key.trim().toLowerCase();
370
+ if (!CONTEXT_WINDOW_OVERRIDE_KEY_REGEX.test(normalizedKey)) {
371
+ warnings.push(`ignoring contextWindowOverrides[${JSON.stringify(key)}]: key must be "provider/model"`);
372
+ continue;
373
+ }
374
+ const parsed = contextWindowOverrideSchema.safeParse(candidate);
375
+ if (!parsed.success) {
376
+ warnings.push(`ignoring contextWindowOverrides[${JSON.stringify(key)}]: ` +
377
+ parsed.error.issues.map(issue => issue.message).join('; '));
378
+ continue;
379
+ }
380
+ value[normalizedKey] = parsed.data;
381
+ }
382
+ return { value, warnings };
383
+ }
384
+ export function resolveEffectiveBudget(args) {
385
+ const { tokenBudget, model, contextWindowSize, contextWindowReserve } = args;
386
+ if (tokenBudget) {
387
+ return { budget: tokenBudget, source: 'runtime tokenBudget' };
388
+ }
389
+ const key = normalizeModelKey(model);
390
+ const override = key ? args.contextWindowOverrides?.[key] : undefined;
391
+ const configuredWindow = override?.contextTokens ?? override?.contextWindow;
392
+ if (configuredWindow) {
393
+ return {
394
+ budget: Math.floor(configuredWindow * (1 - contextWindowReserve)),
395
+ source: `contextWindowOverrides[${key}]`,
396
+ };
397
+ }
398
+ return {
399
+ budget: Math.floor(contextWindowSize * (1 - contextWindowReserve)),
400
+ source: 'fallback contextWindowSize',
401
+ };
402
+ }
403
+ function normalizeModelKey(model) {
404
+ if (!model)
405
+ return null;
406
+ const key = model.trim().toLowerCase();
407
+ return key.length > 0 ? key : null;
408
+ }
409
+ function verboseLog(message) {
410
+ if (_verboseLogging)
411
+ console.log(message);
412
+ }
413
+ function resolveConfiguredWindow(model) {
414
+ const key = normalizeModelKey(model);
415
+ if (!key)
416
+ return null;
417
+ const override = _contextWindowOverrides[key];
418
+ if (!override)
419
+ return null;
420
+ return override.contextTokens ?? override.contextWindow ?? null;
421
+ }
68
422
  // Subagent warming mode: 'full' | 'light' | 'off'. Default: 'light'.
69
423
  // Controls how much HyperMem context is injected into subagent sessions.
70
424
  let _subagentWarming = 'light';
@@ -106,11 +460,34 @@ function getOverheadFallback(tier) {
106
460
  * total context (history + system) exceeds the model window before trim
107
461
  * completes, causing result stripping.
108
462
  */
109
- function computeEffectiveBudget(tokenBudget) {
110
- if (tokenBudget)
111
- return tokenBudget;
112
- // Derived from window config: floor to avoid fractional tokens
113
- return Math.floor(_contextWindowSize * (1 - _contextWindowReserve));
463
+ function computeEffectiveBudget(tokenBudget, model) {
464
+ const resolved = resolveEffectiveBudget({
465
+ tokenBudget,
466
+ model,
467
+ contextWindowSize: _contextWindowSize,
468
+ contextWindowReserve: _contextWindowReserve,
469
+ contextWindowOverrides: _contextWindowOverrides,
470
+ });
471
+ if (resolved.source === 'runtime tokenBudget') {
472
+ verboseLog(`[hypermem-plugin] budget source: runtime tokenBudget=${tokenBudget}${model ? ` model=${model}` : ''}`);
473
+ return resolved.budget;
474
+ }
475
+ const configuredWindow = resolveConfiguredWindow(model);
476
+ if (configuredWindow) {
477
+ verboseLog(`[hypermem-plugin] budget source: contextWindowOverrides[${normalizeModelKey(model)}]=${configuredWindow}, ` +
478
+ `reserve=${_contextWindowReserve}, effective=${resolved.budget}`);
479
+ return resolved.budget;
480
+ }
481
+ verboseLog(`[hypermem-plugin] budget source: fallback contextWindowSize=${_contextWindowSize}, ` +
482
+ `reserve=${_contextWindowReserve}, effective=${resolved.budget}${model ? ` model=${model}` : ''}`);
483
+ const warningKey = normalizeModelKey(model) ?? '(unknown-model)';
484
+ if (!_budgetFallbackWarnings.has(warningKey)) {
485
+ _budgetFallbackWarnings.add(warningKey);
486
+ console.warn(`[hypermem-plugin] No runtime tokenBudget${model ? ` for model ${model}` : ''}; ` +
487
+ `falling back to contextWindowSize=${_contextWindowSize}. ` +
488
+ `Add contextWindowOverrides["provider/model"] to config.json or openclaw.json if detection is wrong.`);
489
+ }
490
+ return resolved.budget;
114
491
  }
115
492
  // ─── Plugin config cache ───────────────────────────────────────
116
493
  // Populated from openclaw.json plugins.entries.hypercompositor.config
@@ -146,6 +523,12 @@ async function loadUserConfig() {
146
523
  merged.contextWindowReserve = _pluginConfig.contextWindowReserve;
147
524
  if (_pluginConfig.deferToolPruning != null)
148
525
  merged.deferToolPruning = _pluginConfig.deferToolPruning;
526
+ if (_pluginConfig.verboseLogging != null)
527
+ merged.verboseLogging = _pluginConfig.verboseLogging;
528
+ if (_pluginConfig.contextWindowOverrides != null)
529
+ merged.contextWindowOverrides = { ...merged.contextWindowOverrides, ..._pluginConfig.contextWindowOverrides };
530
+ if (_pluginConfig.warmCacheReplayThresholdMs != null)
531
+ merged.warmCacheReplayThresholdMs = _pluginConfig.warmCacheReplayThresholdMs;
149
532
  if (_pluginConfig.subagentWarming != null)
150
533
  merged.subagentWarming = _pluginConfig.subagentWarming;
151
534
  if (_pluginConfig.compositor)
@@ -214,10 +597,16 @@ async function getHyperMem() {
214
597
  userConfig.contextWindowReserve >= 0 && userConfig.contextWindowReserve <= 0.5) {
215
598
  _contextWindowReserve = userConfig.contextWindowReserve;
216
599
  }
217
- if (userConfig.deferToolPruning === true) {
218
- _deferToolPruning = true;
600
+ _deferToolPruning = userConfig.deferToolPruning === true;
601
+ if (_deferToolPruning) {
219
602
  console.log('[hypermem-plugin] deferToolPruning: true — tool gradient deferred to host contextPruning');
220
603
  }
604
+ _verboseLogging = userConfig.verboseLogging === true;
605
+ const sanitizedOverrides = sanitizeContextWindowOverrides(userConfig.contextWindowOverrides);
606
+ _contextWindowOverrides = sanitizedOverrides.value;
607
+ for (const warning of sanitizedOverrides.warnings) {
608
+ console.warn(`[hypermem-plugin] ${warning}`);
609
+ }
221
610
  const warmingVal = userConfig.subagentWarming;
222
611
  if (warmingVal === 'full' || warmingVal === 'light' || warmingVal === 'off') {
223
612
  _subagentWarming = warmingVal;
@@ -230,6 +619,8 @@ async function getHyperMem() {
230
619
  console.log(`[hypermem-plugin] context window: ${_contextWindowSize} tokens, ` +
231
620
  `${Math.round(_contextWindowReserve * 100)}% reserved (${reservedTokens} tokens), ` +
232
621
  `effective history budget: ${_contextWindowSize - reservedTokens} tokens`);
622
+ verboseLog(`[hypermem-plugin] warmCacheReplayThresholdMs=${_cacheReplayThresholdMs}`);
623
+ verboseLog(`[hypermem-plugin] contextWindowOverrides keys=${Object.keys(_contextWindowOverrides).join(', ') || '(none)'}`);
233
624
  const instance = await HyperMem.create({
234
625
  dataDir: _pluginConfig.dataDir ?? path.join(os.homedir(), '.openclaw/hypermem'),
235
626
  cache: {
@@ -260,16 +651,31 @@ async function getHyperMem() {
260
651
  catch {
261
652
  return [];
262
653
  }
263
- }, { enabled: true, periodicInterval: 300000 }, // 5-minute interval
264
- // Cursor fetcher: reads from Redis → SQLite fallback
654
+ }, {
655
+ enabled: true,
656
+ periodicInterval: userConfig?.maintenance?.periodicInterval ?? 300000,
657
+ maxActiveConversations: userConfig?.maintenance?.maxActiveConversations ?? 5,
658
+ recentConversationCooldownMs: userConfig?.maintenance?.recentConversationCooldownMs ?? 30000,
659
+ maxCandidatesPerPass: userConfig?.maintenance?.maxCandidatesPerPass ?? 200,
660
+ },
661
+ // Cursor fetcher: reads the SQLite-backed session cursor
265
662
  async (agentId, sessionKey) => {
266
663
  return instance.getSessionCursor(agentId, sessionKey);
267
664
  },
268
665
  // Pass vector store so new facts/episodes are embedded at index time
269
666
  instance.getVectorStore() ?? undefined,
270
667
  // Dreaming config — passed from hypermem user config if set
271
- userConfig?.dreaming ?? {});
668
+ userConfig?.dreaming ?? {},
669
+ // KL-01: global write policy — passed from hypermem user config
670
+ userConfig?.globalWritePolicy ?? 'deny');
272
671
  _indexer.start();
672
+ if (_verboseLogging) {
673
+ const mc = userConfig?.maintenance ?? {};
674
+ console.log(`[hypermem-plugin] maintenance settings: periodicInterval=${mc.periodicInterval ?? 300000}ms ` +
675
+ `maxActiveConversations=${mc.maxActiveConversations ?? 5} ` +
676
+ `cooldown=${mc.recentConversationCooldownMs ?? 30000}ms ` +
677
+ `maxCandidatesPerPass=${mc.maxCandidatesPerPass ?? 200}`);
678
+ }
273
679
  }
274
680
  catch {
275
681
  // Non-fatal — indexer wiring can fail without breaking context assembly
@@ -510,6 +916,75 @@ function estimateTokens(text) {
510
916
  return 0;
511
917
  return Math.ceil(text.length / 4);
512
918
  }
919
+ function estimateMessagePartTokens(part) {
920
+ if (part.type === 'image' || part.type === 'image_url') {
921
+ const src = part.source?.data;
922
+ const url = part.image_url?.url;
923
+ const dataStr = typeof src === 'string' ? src : (typeof url === 'string' ? url : '');
924
+ return Math.ceil(dataStr.length / 3);
925
+ }
926
+ if (part.type === 'toolCall' || part.type === 'tool_use') {
927
+ return Math.ceil(JSON.stringify(part).length / 2);
928
+ }
929
+ const textVal = typeof part.text === 'string' ? part.text
930
+ : typeof part.content === 'string' ? part.content
931
+ : part.content != null ? JSON.stringify(part.content) : null;
932
+ return estimateTokens(textVal);
933
+ }
934
+ function estimateMessageTokens(msg) {
935
+ let total = estimateTokens(typeof msg.textContent === 'string' ? msg.textContent : null);
936
+ if (typeof msg.content === 'string' && typeof msg.textContent !== 'string') {
937
+ total += estimateTokens(msg.content);
938
+ }
939
+ if (msg.toolCalls)
940
+ total += Math.ceil(JSON.stringify(msg.toolCalls).length / 2);
941
+ if (msg.toolResults)
942
+ total += Math.ceil(JSON.stringify(msg.toolResults).length / 2);
943
+ if (Array.isArray(msg.content)) {
944
+ total += msg.content.reduce((sum, part) => sum + estimateMessagePartTokens(part), 0);
945
+ }
946
+ return total;
947
+ }
948
+ function estimateMessageArrayTokens(messages) {
949
+ return messages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
950
+ }
951
+ function maybeLogPressureAccountingAnomaly(fields) {
952
+ const threshold = Math.max(500, Math.floor(fields.budget * 0.05));
953
+ const deltas = {
954
+ runtimeVsComposed: Math.abs(fields.runtimeTokens - fields.composedTokens),
955
+ redisVsComposed: Math.abs(fields.redisTokens - fields.composedTokens),
956
+ runtimeVsRedis: Math.abs(fields.runtimeTokens - fields.redisTokens),
957
+ };
958
+ if (deltas.runtimeVsComposed < threshold &&
959
+ deltas.redisVsComposed < threshold &&
960
+ deltas.runtimeVsRedis < threshold) {
961
+ return;
962
+ }
963
+ console.warn(`[hypermem-plugin] pressure-accounting anomaly: path=${fields.path} ` +
964
+ `runtime=${fields.runtimeTokens} redis=${fields.redisTokens} composed=${fields.composedTokens} ` +
965
+ `budget=${fields.budget} threshold=${threshold}`);
966
+ guardTelemetry({
967
+ path: fields.path,
968
+ agentId: fields.agentId,
969
+ sessionKey: fields.sessionKey,
970
+ reason: 'pressure-accounting-anomaly',
971
+ });
972
+ }
973
+ function normalizeReplayRecoveryState(value) {
974
+ if (value == null)
975
+ return null;
976
+ if (value === '')
977
+ return '';
978
+ return isReplayState(value) ? value : null;
979
+ }
980
+ async function persistReplayRecoveryState(hm, agentId, sessionKey, nextState) {
981
+ try {
982
+ await hm.cache.setSlot(agentId, sessionKey, 'replayRecoveryState', nextState ?? '');
983
+ }
984
+ catch {
985
+ // Non-fatal
986
+ }
987
+ }
513
988
  function hasStructuredToolCallMessage(msg) {
514
989
  if (Array.isArray(msg.toolCalls) && msg.toolCalls.length > 0)
515
990
  return true;
@@ -604,15 +1079,7 @@ async function estimateWindowTokens(hm, agentId, sessionKey) {
604
1079
  ?? await hm.cache.getHistory(agentId, sessionKey);
605
1080
  if (!window || window.length === 0)
606
1081
  return 0;
607
- return window.reduce((sum, msg) => {
608
- let t = estimateTokens(msg.textContent);
609
- // Tool payloads are dense JSON — use /2 not /4 to avoid systematic undercount
610
- if (msg.toolCalls)
611
- t += Math.ceil(JSON.stringify(msg.toolCalls).length / 2);
612
- if (msg.toolResults)
613
- t += Math.ceil(JSON.stringify(msg.toolResults).length / 2);
614
- return sum + t;
615
- }, 0);
1082
+ return estimateMessageArrayTokens(window);
616
1083
  }
617
1084
  catch {
618
1085
  return 0;
@@ -716,7 +1183,7 @@ async function truncateJsonlIfNeeded(sessionFile, targetDepth, force = false, to
716
1183
  function createHyperMemEngine() {
717
1184
  return {
718
1185
  info: {
719
- id: 'hypermem',
1186
+ id: 'hypercompositor',
720
1187
  name: 'hypermem context engine',
721
1188
  version: '0.6.3',
722
1189
  // We own compaction — assemble() trims to budget via the compositor safety
@@ -743,6 +1210,55 @@ function createHyperMemEngine() {
743
1210
  const sk = resolveSessionKey(sessionId, sessionKey);
744
1211
  const agentId = extractAgentId(sk);
745
1212
  // EC1 JSONL truncation moved to maintain() — bootstrap stays fast.
1213
+ // B2: Session-restart detection — rotateSessionContext hook.
1214
+ // When the runtime starts a new session (new sessionId) for an existing
1215
+ // sessionKey, archive the old context head and create a fresh active
1216
+ // context so the new conversation starts clean. This prevents the new
1217
+ // session from inheriting a stale context head pointer from the prior run.
1218
+ //
1219
+ // Detection: if a conversation row exists for this sessionKey AND the
1220
+ // stored session_id differs from the incoming sessionId (runtime-assigned),
1221
+ // treat this as a session restart.
1222
+ //
1223
+ // Non-fatal: context rotation is best-effort and never blocks bootstrap.
1224
+ if (sessionId) {
1225
+ try {
1226
+ const _msgDb = hm.dbManager.getMessageDb(agentId);
1227
+ if (_msgDb) {
1228
+ const _existingConv = _msgDb.prepare('SELECT id, session_id FROM conversations WHERE session_key = ? LIMIT 1').get(sk);
1229
+ if (_existingConv &&
1230
+ _existingConv.session_id !== null &&
1231
+ _existingConv.session_id !== sessionId) {
1232
+ // Distinct sessionId — this is a session restart for an existing sessionKey.
1233
+ rotateSessionContext(_msgDb, agentId, sk, _existingConv.id);
1234
+ // Update the stored session_id to the new one.
1235
+ try {
1236
+ _msgDb.prepare('UPDATE conversations SET session_id = ? WHERE id = ?')
1237
+ .run(sessionId, _existingConv.id);
1238
+ }
1239
+ catch {
1240
+ // Best-effort — column may not exist in older schemas
1241
+ }
1242
+ console.log(`[hypermem-plugin] bootstrap: session restart detected for ${agentId}/${sk} ` +
1243
+ `(prev session_id=${_existingConv.session_id}, new=${sessionId}) — context rotated`);
1244
+ }
1245
+ else if (_existingConv && _existingConv.session_id === null && sessionId) {
1246
+ // Conversation exists but session_id was never recorded — stamp it now.
1247
+ try {
1248
+ _msgDb.prepare('UPDATE conversations SET session_id = ? WHERE id = ?')
1249
+ .run(sessionId, _existingConv.id);
1250
+ }
1251
+ catch {
1252
+ // Best-effort
1253
+ }
1254
+ }
1255
+ }
1256
+ }
1257
+ catch (rotateErr) {
1258
+ // Non-fatal — never block bootstrap on context rotation
1259
+ console.warn('[hypermem-plugin] bootstrap: rotateSessionContext failed (non-fatal):', rotateErr.message);
1260
+ }
1261
+ }
746
1262
  // Fast path: if session already has history in Redis, skip warm entirely.
747
1263
  // sessionExists() is a single EXISTS call — sub-millisecond cost.
748
1264
  const alreadyWarm = await hm.cache.sessionExists(agentId, sk);
@@ -766,10 +1282,10 @@ function createHyperMemEngine() {
766
1282
  // Non-fatal: missing files are silently skipped.
767
1283
  let identityBlock;
768
1284
  try {
769
- // Council agents live at workspace-council/<agentId>/
1285
+ // Council agents live at workspace/<agentId>/
770
1286
  // Other agents at workspace/<agentId>/ — try council path first
771
1287
  const homedir = os.homedir();
772
- const councilPath = path.join(homedir, '.openclaw', 'workspace-council', agentId);
1288
+ const councilPath = path.join(homedir, '.openclaw', 'workspace', agentId);
773
1289
  const workspacePath = path.join(homedir, '.openclaw', 'workspace', agentId);
774
1290
  let wsPath = councilPath;
775
1291
  try {
@@ -801,7 +1317,7 @@ function createHyperMemEngine() {
801
1317
  let _wsPathForSeed;
802
1318
  try {
803
1319
  const homedir2 = os.homedir();
804
- const councilPath2 = path.join(homedir2, '.openclaw', 'workspace-council', agentId);
1320
+ const councilPath2 = path.join(homedir2, '.openclaw', 'workspace', agentId);
805
1321
  const workspacePath2 = path.join(homedir2, '.openclaw', 'workspace', agentId);
806
1322
  try {
807
1323
  await fs.access(councilPath2);
@@ -836,7 +1352,7 @@ function createHyperMemEngine() {
836
1352
  // Post-warm pressure check: if messages.db had accumulated history,
837
1353
  // warm() may have loaded the session straight to 80%+. Pre-trim now
838
1354
  // so the first turn has headroom instead of starting saturated.
839
- // This is the "restart at 98%" failure mode reported by Helm 2026-04-05:
1355
+ // This is the "restart at 98%" failure mode reported by Eve 2026-04-05:
840
1356
  // JSONL truncation + Redis flush isn't enough if messages.db is still full
841
1357
  // and warm() reloads it. Trim here closes the loop.
842
1358
  try {
@@ -846,15 +1362,20 @@ function createHyperMemEngine() {
846
1362
  const warmBudget = 90_000;
847
1363
  const warmPressure = postWarmTokens / warmBudget;
848
1364
  if (warmPressure > 0.80) {
849
- const warmTrimTarget = warmPressure > 0.90 ? 0.40 : 0.55;
850
- const warmTrimBudget = Math.floor(warmBudget * warmTrimTarget);
851
- const warmTrimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, warmTrimBudget);
852
- if (warmTrimmed > 0) {
853
- await hm.cache.invalidateWindow(agentId, sk);
854
- console.log(`[hypermem-plugin] bootstrap: high-pressure startup ` +
855
- `(${(warmPressure * 100).toFixed(1)}%), pre-trimmed Redis to ` +
856
- `~${warmTrimTarget * 100}% (${warmTrimmed} msgs dropped)`);
857
- }
1365
+ // Sprint 2.2a: demote warmstart to guard telemetry.
1366
+ //
1367
+ // Previously this path performed a real trim + invalidateWindow
1368
+ // and emitted `event:'trim'` with path='warmstart'. Assemble
1369
+ // (tool-loop + normal/subagent) is the steady-state owner now,
1370
+ // so the first turn's assemble.* trim absorbs any remaining
1371
+ // post-warm pressure. Keeping the pressure check + threshold
1372
+ // branch here preserves observability via `event:'trim-guard'`
1373
+ // without mutating Redis history or the window cache.
1374
+ guardTelemetry({
1375
+ path: 'warmstart',
1376
+ agentId, sessionKey: sk,
1377
+ reason: 'warmstart-pressure-demoted',
1378
+ });
858
1379
  }
859
1380
  }
860
1381
  catch {
@@ -957,55 +1478,74 @@ function createHyperMemEngine() {
957
1478
  // ── Pre-ingestion wave guard ──────────────────────────────────────────
958
1479
  // Tool result payloads can be 10k-50k tokens each. When a parallel tool
959
1480
  // batch (4-6 results) lands while the session is already at 70%+, storing
960
- // full payloads pushes Redis past the nuclear path threshold before the
961
- // next assemble() can trim. Use Redis current state (appropriate here —
962
- // we're deciding what to write TO Redis) as the pressure signal.
963
- // Above 70%: truncate toolResult content to a compact stub.
964
- // Above 85%: skip recording entirely — assemble() trim is the safety net.
1481
+ // full payloads pushes the hot window past the nuclear path threshold
1482
+ // before the next assemble() can trim. Use current hot-window state as
1483
+ // the pressure signal (appropriate here, we're deciding what to write TO
1484
+ // the window).
1485
+ //
1486
+ // Above 70%: truncate toolResult content in transcript, but keep the
1487
+ // full payload durable in tool_artifacts (schema v9). Stub carries
1488
+ // artifactId so the compositor can hydrate on demand.
1489
+ // Above 85%: full stub replacement in transcript, still with artifactId.
1490
+ // At all levels: the full payload is persisted durably. No data loss.
965
1491
  const isInboundToolResult = msg.role === 'tool' || msg.role === 'tool_result' || msg.role === 'toolResult';
966
1492
  if (isInboundToolResult && neutral.toolResults && neutral.toolResults.length > 0) {
967
- const redisTokens = await estimateWindowTokens(hm, agentId, sk);
1493
+ const windowTokens = await estimateWindowTokens(hm, agentId, sk);
968
1494
  const effectiveBudget = computeEffectiveBudget(undefined);
969
- const redisPressure = redisTokens / effectiveBudget;
970
- // Error tool results are always preserved intact they're small and
1495
+ const windowPressure = windowTokens / effectiveBudget;
1496
+ // Error tool results are always preserved intact: they're small and
971
1497
  // the model needs the error signal to understand what went wrong.
972
1498
  const hasErrorResult = neutral.toolResults.some(tr => tr.isError);
973
- if (redisPressure > 0.85) {
974
- // FIX (Bug 4): Never skip a tool result entirely — that leaves an orphaned
975
- // tool_call in Redis history (the assistant message was already recorded).
976
- // Anthropic rejects assistant messages with tool_calls that have no matching result.
977
- // Instead, record a compact stub that preserves pair integrity in history.
978
- const stubbedResults = neutral.toolResults.map(tr => {
1499
+ // Only apply degradation / artifact capture above elevated pressure.
1500
+ if (windowPressure > 0.70) {
1501
+ const MAX_TOOL_RESULT_CHARS = 500;
1502
+ const highPressure = windowPressure > 0.85;
1503
+ const reason = highPressure ? 'wave_guard_pressure_high' : 'wave_guard_pressure_elevated';
1504
+ // For each non-error tool result, persist the full payload as a
1505
+ // durable artifact first, then rewrite the transcript entry to
1506
+ // either a full stub (high pressure) or a truncated stub with an
1507
+ // artifact pointer (elevated pressure).
1508
+ const rewrittenResults = await Promise.all(neutral.toolResults.map(async (tr) => {
979
1509
  if (tr.isError)
980
- return tr; // preserve error results intact
1510
+ return tr;
1511
+ const content = typeof tr.content === 'string'
1512
+ ? tr.content
1513
+ : JSON.stringify(tr.content);
1514
+ // At elevated pressure, small payloads pass through unchanged.
1515
+ if (!highPressure && content.length <= MAX_TOOL_RESULT_CHARS) {
1516
+ return tr;
1517
+ }
1518
+ let artifactId;
1519
+ try {
1520
+ const record = await hm.recordToolArtifact(agentId, sk, {
1521
+ toolName: tr.name || 'tool_result',
1522
+ toolCallId: tr.callId || undefined,
1523
+ isError: false,
1524
+ payload: content,
1525
+ summary: content.slice(0, 160),
1526
+ });
1527
+ artifactId = record.id;
1528
+ }
1529
+ catch (artErr) {
1530
+ console.warn('[hypermem-plugin] tool artifact capture failed (non-fatal):', artErr.message);
1531
+ }
1532
+ const summary = highPressure
1533
+ ? `omitted at ${(windowPressure * 100).toFixed(0)}% window pressure`
1534
+ : `truncated at ${(windowPressure * 100).toFixed(0)}% pressure: ${Math.ceil(content.length / 4)} tokens`;
981
1535
  return {
982
1536
  ...tr,
983
- content: `[tool result omitted by wave-guard at ${(redisPressure * 100).toFixed(0)}% Redis pressure]`,
1537
+ content: formatToolChainStub({
1538
+ name: tr.name || 'tool_result',
1539
+ id: tr.callId || 'unknown',
1540
+ status: 'ejected',
1541
+ reason,
1542
+ summary,
1543
+ artifactId,
1544
+ }),
984
1545
  };
985
- });
986
- const stubNeutral = { ...neutral, toolResults: stubbedResults };
987
- console.log(`[hypermem] ingest wave-guard: stubbing toolResult (Redis pressure ${(redisPressure * 100).toFixed(0)}% > 85%)${hasErrorResult ? ' error results preserved' : ''} preserving pair integrity`);
988
- await hm.recordAssistantMessage(agentId, sk, stubNeutral);
989
- return { ingested: true };
990
- }
991
- else if (redisPressure > 0.70) {
992
- // Elevated: store truncated stub to preserve tool call pairing in history
993
- const MAX_TOOL_RESULT_CHARS = 500;
994
- neutral = {
995
- ...neutral,
996
- toolResults: neutral.toolResults.map(tr => {
997
- if (tr.isError)
998
- return tr; // preserve error results intact
999
- const content = typeof tr.content === 'string' ? tr.content : JSON.stringify(tr.content);
1000
- if (content.length <= MAX_TOOL_RESULT_CHARS)
1001
- return tr;
1002
- return {
1003
- ...tr,
1004
- content: `[truncated by wave-guard at ${(redisPressure * 100).toFixed(0)}% pressure: ${Math.ceil(content.length / 4)} tokens]`,
1005
- };
1006
- }),
1007
- };
1008
- console.log(`[hypermem] ingest wave-guard: truncated toolResult (Redis pressure ${(redisPressure * 100).toFixed(0)}% > 70%)${hasErrorResult ? ' — error results preserved' : ''}`);
1546
+ }));
1547
+ neutral = { ...neutral, toolResults: rewrittenResults };
1548
+ console.log(`[hypermem] ingest wave-guard: ${highPressure ? 'stubbed' : 'truncated'} toolResult (window pressure ${(windowPressure * 100).toFixed(0)}% > ${highPressure ? 85 : 70}%)${hasErrorResult ? ' + error results preserved' : ''} - full payload persisted to tool_artifacts`);
1009
1549
  }
1010
1550
  }
1011
1551
  await hm.recordAssistantMessage(agentId, sk, neutral);
@@ -1076,514 +1616,656 @@ function createHyperMemEngine() {
1076
1616
  // pass-through that never re-injects context on tool-loop calls.
1077
1617
  const lastMsg = messages[messages.length - 1];
1078
1618
  const isToolLoop = lastMsg?.role === 'toolResult' || lastMsg?.role === 'tool';
1079
- if (isToolLoop) {
1080
- // Tool-loop turns: pass messages through unchanged but still:
1081
- // 1. Run the trim guardrail — tool loops accumulate history as fast
1082
- // as regular turns, and the old path skipped trim entirely, leaving
1083
- // the compaction guard blind (received estimatedTokens=0).
1084
- // 2. Return a real estimatedTokens = windowTokens + cached overhead,
1085
- // so the guard has accurate signal and can fire when needed.
1086
- //
1087
- // Fix (ingestion-wave): use pressure-tiered trim instead of fixed 80%.
1088
- // At 91% with 5 parallel web_search calls incoming (~20-30% of budget),
1089
- // a fixed 80% trim only frees 11% headroom — the wave overflows anyway
1090
- // and results strip silently. Tier the trim target based on pre-trim
1091
- // pressure so high-pressure sessions get real headroom before results land.
1092
- const effectiveBudget = computeEffectiveBudget(tokenBudget);
1093
- try {
1094
- const hm = await getHyperMem();
1095
- const sk = resolveSessionKey(sessionId, sessionKey);
1096
- const agentId = extractAgentId(sk);
1097
- // ── Image / heavy-content eviction pre-pass ──────────────────────
1098
- // Evict stale image payloads and large tool results before measuring
1099
- // pressure. This frees tokens without compaction — images alone can
1100
- // account for 30%+ of context from a single screenshot 2 turns ago.
1101
- const evictionCfg = _evictionConfig;
1102
- const evictionEnabled = evictionCfg?.enabled !== false;
1103
- let workingMessages = messages;
1104
- if (evictionEnabled) {
1105
- const { messages: evicted, stats: evStats } = evictStaleContent(messages, {
1106
- imageAgeTurns: evictionCfg?.imageAgeTurns,
1107
- toolResultAgeTurns: evictionCfg?.toolResultAgeTurns,
1108
- minTokensToEvict: evictionCfg?.minTokensToEvict,
1109
- keepPreviewChars: evictionCfg?.keepPreviewChars,
1619
+ // Telemetry: emit one assembleTrace at entry. Path taxonomy:
1620
+ // 'subagent' - session key matches the subagent pattern
1621
+ // 'cold' - normal full-assembly or tool-loop entry (a separate
1622
+ // 'replay' trace is emitted if the cache replay fast
1623
+ // path is taken below)
1624
+ // Zero-cost when HYPERMEM_TELEMETRY !== '1'.
1625
+ //
1626
+ // Trim-ownership turn context (Sprint 2): the turnId is also used to
1627
+ // scope the shared trim-owner claim helper so duplicate steady-state
1628
+ // trims in a single assemble() turn can be detected and (under
1629
+ // NODE_ENV='development') throw loudly. We always allocate the turnId
1630
+ // and open the scope the map write is cheap and keeps enforcement
1631
+ // active even when telemetry is off. The scope is closed in the
1632
+ // finally block wrapping the full assemble body below.
1633
+ const _asmSk = resolveSessionKey(sessionId, sessionKey);
1634
+ const _asmTurnId = nextTurnId();
1635
+ beginTrimOwnerTurn(_asmSk, _asmTurnId);
1636
+ if (telemetryEnabled()) {
1637
+ const _agentId = extractAgentId(_asmSk);
1638
+ const _entryPath = _asmSk.includes('subagent:')
1639
+ ? 'subagent'
1640
+ : 'cold';
1641
+ assembleTrace({
1642
+ agentId: _agentId,
1643
+ sessionKey: _asmSk,
1644
+ turnId: _asmTurnId,
1645
+ path: _entryPath,
1646
+ toolLoop: isToolLoop,
1647
+ msgCount: messages.length,
1648
+ });
1649
+ }
1650
+ try {
1651
+ if (isToolLoop) {
1652
+ // Tool-loop turns: pass messages through unchanged but still:
1653
+ // 1. Run the trim guardrail — tool loops accumulate history as fast
1654
+ // as regular turns, and the old path skipped trim entirely, leaving
1655
+ // the compaction guard blind (received estimatedTokens=0).
1656
+ // 2. Return a real estimatedTokens = windowTokens + cached overhead,
1657
+ // so the guard has accurate signal and can fire when needed.
1658
+ //
1659
+ // Fix (ingestion-wave): use pressure-tiered trim instead of fixed 80%.
1660
+ // At 91% with 5 parallel web_search calls incoming (~20-30% of budget),
1661
+ // a fixed 80% trim only frees 11% headroom — the wave overflows anyway
1662
+ // and results strip silently. Tier the trim target based on pre-trim
1663
+ // pressure so high-pressure sessions get real headroom before results land.
1664
+ const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
1665
+ try {
1666
+ const hm = await getHyperMem();
1667
+ const sk = resolveSessionKey(sessionId, sessionKey);
1668
+ const agentId = extractAgentId(sk);
1669
+ // ── Image / heavy-content eviction pre-pass ──────────────────────
1670
+ // Evict stale image payloads and large tool results before measuring
1671
+ // pressure. This frees tokens without compaction — images alone can
1672
+ // account for 30%+ of context from a single screenshot 2 turns ago.
1673
+ const evictionCfg = _evictionConfig;
1674
+ const evictionEnabled = evictionCfg?.enabled !== false;
1675
+ let workingMessages = messages;
1676
+ if (evictionEnabled) {
1677
+ const { messages: evicted, stats: evStats } = evictStaleContent(messages, {
1678
+ imageAgeTurns: evictionCfg?.imageAgeTurns,
1679
+ toolResultAgeTurns: evictionCfg?.toolResultAgeTurns,
1680
+ minTokensToEvict: evictionCfg?.minTokensToEvict,
1681
+ keepPreviewChars: evictionCfg?.keepPreviewChars,
1682
+ });
1683
+ workingMessages = evicted;
1684
+ if (evStats.tokensFreed > 0) {
1685
+ console.log(`[hypermem] eviction: ${evStats.imagesEvicted} images, ` +
1686
+ `${evStats.toolResultsEvicted} tool results, ` +
1687
+ `~${evStats.tokensFreed.toLocaleString()} tokens freed`);
1688
+ }
1689
+ }
1690
+ // Measure pressure from the in-memory message array we are actually about
1691
+ // to shape and return. Redis remains a cross-check only.
1692
+ const runtimeTokens = estimateMessageArrayTokens(workingMessages);
1693
+ const redisTokens = await estimateWindowTokens(hm, agentId, sk);
1694
+ const replayRecovery = decideReplayRecovery({
1695
+ currentState: normalizeReplayRecoveryState(await hm.cache.getSlot(agentId, sk, 'replayRecoveryState').catch(() => '')),
1696
+ runtimeTokens,
1697
+ redisTokens,
1698
+ effectiveBudget,
1110
1699
  });
1111
- workingMessages = evicted;
1112
- if (evStats.tokensFreed > 0) {
1113
- console.log(`[hypermem] eviction: ${evStats.imagesEvicted} images, ` +
1114
- `${evStats.toolResultsEvicted} tool results, ` +
1115
- `~${evStats.tokensFreed.toLocaleString()} tokens freed`);
1700
+ const replayMarkerText = replayRecovery.emittedText;
1701
+ const preTrimTokens = runtimeTokens;
1702
+ const pressure = preTrimTokens / effectiveBudget;
1703
+ // Pressure-tiered trim targets use a single authority: the working
1704
+ // message array. Redis drift is logged as an anomaly, never used as
1705
+ // a trim trigger. Replay recovery gets its own explicit bounded mode
1706
+ // instead of sharing the steady-state pressure heuristics.
1707
+ let trimTarget;
1708
+ if (typeof replayRecovery.trimTargetOverride === 'number') {
1709
+ trimTarget = replayRecovery.trimTargetOverride;
1116
1710
  }
1117
- }
1118
- // Measure pressure BEFORE trim to pick the right tier.
1119
- // Critical: use the runtime-provided messages array, NOT estimateWindowTokens()
1120
- // which reads Redis. After a gateway restart Redis is empty — estimateWindowTokens
1121
- // returns ~0, pressure reads as 0%, and the trim tiers never fire even though
1122
- // the session is at 98% from JSONL loaded at runtime. The messages param is
1123
- // always authoritative it's what the runtime actually sent to the model.
1124
- const runtimeTokens = messages.reduce((sum, m) => {
1125
- const msg = m;
1126
- const textCost = estimateTokens(typeof msg.textContent === 'string' ? msg.textContent : null);
1127
- const toolCallCost = msg.toolCalls ? Math.ceil(JSON.stringify(msg.toolCalls).length / 2) : 0;
1128
- const toolResultCost = msg.toolResults ? Math.ceil(JSON.stringify(msg.toolResults).length / 2) : 0;
1129
- // FIX (Bug 2): count content arrays in OpenClaw native format.
1130
- // Native tool result messages store content as c.content (not c.text).
1131
- // Old code always read c.text, returning 0 for native format — severe undercount.
1132
- const contentCost = Array.isArray(msg.content)
1133
- ? msg.content.reduce((s, c) => {
1134
- const part = c;
1135
- const textVal = typeof part.text === 'string' ? part.text
1136
- : typeof part.content === 'string' ? part.content
1137
- : part.content != null ? JSON.stringify(part.content) : null;
1138
- return s + estimateTokens(textVal);
1139
- }, 0)
1140
- : 0;
1141
- // Count image parts base64 images are large and invisible to the text estimator
1142
- const imageCost = Array.isArray(msg.content)
1143
- ? msg.content.reduce((s, c) => {
1144
- const part = c;
1145
- if (part.type === 'image' || part.type === 'image_url') {
1146
- const src = part.source?.data;
1147
- const url = part.image_url?.url;
1148
- const dataStr = typeof src === 'string' ? src : (typeof url === 'string' ? url : '');
1149
- return s + Math.ceil(dataStr.length / 3); // base64 ~1.33x bytes, ~1 token/4 bytes
1150
- }
1151
- return s;
1152
- }, 0)
1153
- : 0;
1154
- return sum + textCost + toolCallCost + toolResultCost + contentCost + imageCost;
1155
- }, 0);
1156
- // Redis window is a useful cross-check; use whichever is higher so we never
1157
- // underestimate when Redis is ahead of the runtime snapshot.
1158
- const redisTokens = await estimateWindowTokens(hm, agentId, sk);
1159
- const preTrimTokens = Math.max(runtimeTokens, redisTokens);
1160
- const pressure = preTrimTokens / effectiveBudget;
1161
- // Pressure-tiered trim targets:
1162
- // JSONL-replay (EC1): runtimeTokens >> redisTokens means session
1163
- // loaded from a large JSONL but Redis is cold (post-restart). Trim
1164
- // aggressively to 30% so system prompt + this turn's tool results fit.
1165
- // >85% (critical) → trim to 50%: blast headroom for incoming wave
1166
- // >80% (high) → trim to 60%: 40% headroom
1167
- // >75% (elevated) → trim to 65%: 35% headroom
1168
- // ≤75% (normal) → trim to 80%: existing behaviour
1169
- const isJsonlReplay = runtimeTokens > effectiveBudget * 0.80 && redisTokens < runtimeTokens * 0.20;
1170
- let trimTarget;
1171
- if (isJsonlReplay) {
1172
- trimTarget = 0.20; // EC1: cold Redis + hot JSONL = post-restart replay, need max headroom
1173
- }
1174
- else if (pressure > 0.85) {
1175
- trimTarget = 0.40; // critical: 60% headroom for incoming wave
1176
- }
1177
- else if (pressure > 0.80) {
1178
- trimTarget = 0.50; // high: 50% headroom
1179
- }
1180
- else if (pressure > 0.75) {
1181
- trimTarget = 0.55; // elevated: 45% headroom
1182
- }
1183
- else {
1184
- trimTarget = 0.65; // normal: 35% headroom (was 0.80 — too tight)
1185
- }
1186
- const trimBudget = Math.floor(effectiveBudget * trimTarget);
1187
- const trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimBudget);
1188
- if (trimmed > 0) {
1189
- await hm.cache.invalidateWindow(agentId, sk);
1190
- }
1191
- // Also trim the messages array itself to match the budget.
1192
- // Redis trim clears the *next* turn's window. This turn's messages are
1193
- // still the full runtime array — if we return them unchanged at 94%,
1194
- // OpenClaw strips tool results before sending to the model regardless
1195
- // of what estimatedTokens says. We need to return a slimmer array now.
1196
- //
1197
- // Strategy: keep system/identity messages at the front, then fill from
1198
- // the back (most recent) until we hit trimBudget. Drop the middle.
1199
- let trimmedMessages = workingMessages;
1200
- if (pressure > trimTarget) {
1201
- const msgArray = workingMessages;
1202
- // Separate system messages (always keep) from conversation turns
1203
- const systemMsgs = msgArray.filter(m => m.role === 'system');
1204
- const convMsgs = msgArray.filter(m => m.role !== 'system');
1205
- // Pre-process: inline-truncate large tool results before budget-fill drop.
1206
- // A message with a 40k-token tool result that barely misses budget gets dropped
1207
- // entirely. Replacing with a placeholder keeps the turn's metadata in context
1208
- // while freeing the bulk of the tokens.
1209
- const MAX_INLINE_TOOL_CHARS = 2000; // ~500 tokens
1210
- // FIX (Bug 3): handle both NeutralMessage format (m.toolResults) and
1211
- // OpenClaw native format (m.content array with type='tool_result' blocks).
1212
- // Old guard `if (!m.toolResults)` skipped every native-format message.
1213
- // Also fixed: replacement must be valid NeutralToolResult { callId, name, content },
1214
- // not { type, text } which breaks pair-integrity downstream.
1215
- const processedConvMsgs = convMsgs.map(m => {
1216
- // NeutralMessage format
1217
- if (m.toolResults) {
1218
- const resultStr = JSON.stringify(m.toolResults);
1219
- if (resultStr.length <= MAX_INLINE_TOOL_CHARS)
1220
- return m;
1221
- const firstResult = m.toolResults[0];
1222
- return {
1223
- ...m,
1224
- toolResults: [{
1225
- callId: firstResult?.callId ?? 'unknown',
1226
- name: firstResult?.name ?? 'tool',
1227
- content: `[tool result truncated: ${Math.ceil(resultStr.length / 4)} tokens]`,
1228
- }],
1229
- };
1711
+ else if (pressure > 0.85) {
1712
+ trimTarget = 0.40; // critical: 60% headroom for incoming wave
1713
+ }
1714
+ else if (pressure > 0.80) {
1715
+ trimTarget = 0.50; // high: 50% headroom
1716
+ }
1717
+ else if (pressure > 0.75) {
1718
+ trimTarget = 0.55; // elevated: 45% headroom
1719
+ }
1720
+ else {
1721
+ trimTarget = 0.65; // normal: 35% headroom
1722
+ }
1723
+ const trimBudget = Math.floor(effectiveBudget * trimTarget);
1724
+ // Steady-state trim owner claim (Sprint 2.2a): route through the
1725
+ // shared helper keyed by (sessionKey, turnId). In development a
1726
+ // duplicate steady-state trim in the same assemble() turn throws.
1727
+ // In non-development a duplicate returns false; the real trim +
1728
+ // its `event:'trim'` emission are gated on the successful claim so
1729
+ // a duplicate claim is actually suppressed, not just warned.
1730
+ // Compact.* paths are exempt; this path is assemble-owned.
1731
+ const toolLoopClaimed = claimTrimOwner(sk, _asmTurnId, 'assemble.toolLoop');
1732
+ let trimmed = 0;
1733
+ let toolLoopCacheInvalidated = false;
1734
+ if (toolLoopClaimed) {
1735
+ trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimBudget);
1736
+ if (trimmed > 0) {
1737
+ await hm.cache.invalidateWindow(agentId, sk);
1738
+ toolLoopCacheInvalidated = true;
1230
1739
  }
1231
- // OpenClaw native format
1232
- if (Array.isArray(m.content)) {
1233
- const content = m.content;
1234
- const hasLarge = content.some(c => {
1235
- if (c.type !== 'tool_result')
1236
- return false;
1237
- const val = typeof c.content === 'string' ? c.content : JSON.stringify(c.content ?? '');
1238
- return val.length > MAX_INLINE_TOOL_CHARS;
1740
+ if (telemetryEnabled()) {
1741
+ const postTrimTokens = await estimateWindowTokens(hm, agentId, sk).catch(() => 0);
1742
+ trimTelemetry({
1743
+ path: 'assemble.toolLoop',
1744
+ agentId, sessionKey: sk,
1745
+ preTokens: preTrimTokens,
1746
+ postTokens: postTrimTokens,
1747
+ removed: trimmed,
1748
+ cacheInvalidated: toolLoopCacheInvalidated,
1749
+ reason: `pressure=${(pressure * 100).toFixed(1)}%`,
1239
1750
  });
1240
- if (!hasLarge)
1241
- return m;
1242
- return {
1243
- ...m,
1244
- content: content.map(c => {
1751
+ }
1752
+ }
1753
+ else if (telemetryEnabled()) {
1754
+ // Surface the suppressed-duplicate as a bounded guard record so
1755
+ // downstream reporting can see how often the gate fires. No
1756
+ // history or window mutation here.
1757
+ guardTelemetry({
1758
+ path: 'assemble.toolLoop',
1759
+ agentId, sessionKey: sk,
1760
+ reason: 'duplicate-claim-suppressed',
1761
+ });
1762
+ }
1763
+ // Also trim the messages array itself to match the budget.
1764
+ // Redis trim clears the *next* turn's window. This turn's messages are
1765
+ // still the full runtime array — if we return them unchanged at 94%,
1766
+ // OpenClaw strips tool results before sending to the model regardless
1767
+ // of what estimatedTokens says. We need to return a slimmer array now.
1768
+ //
1769
+ // Strategy: keep system/identity messages at the front, then fill from
1770
+ // the back (most recent) until we hit trimBudget. Drop the middle.
1771
+ let trimmedMessages = workingMessages;
1772
+ if (pressure > trimTarget) {
1773
+ const msgArray = workingMessages;
1774
+ // Separate system messages (always keep) from conversation turns
1775
+ const systemMsgs = msgArray.filter(m => m.role === 'system');
1776
+ const convMsgs = msgArray.filter(m => m.role !== 'system');
1777
+ // Pre-process: inline-truncate large tool results before budget-fill drop.
1778
+ // A message with a 40k-token tool result that barely misses budget gets dropped
1779
+ // entirely. Replacing with a placeholder keeps the turn's metadata in context
1780
+ // while freeing the bulk of the tokens.
1781
+ const MAX_INLINE_TOOL_CHARS = 2000; // ~500 tokens
1782
+ // FIX (Bug 3): handle both NeutralMessage format (m.toolResults) and
1783
+ // OpenClaw native format (m.content array with type='tool_result' blocks).
1784
+ // Old guard `if (!m.toolResults)` skipped every native-format message.
1785
+ // Also fixed: replacement must be valid NeutralToolResult { callId, name, content },
1786
+ // not { type, text } which breaks pair-integrity downstream.
1787
+ const processedConvMsgs = convMsgs.map(m => {
1788
+ // NeutralMessage format
1789
+ if (m.toolResults) {
1790
+ const resultStr = JSON.stringify(m.toolResults);
1791
+ if (resultStr.length <= MAX_INLINE_TOOL_CHARS)
1792
+ return m;
1793
+ const firstResult = m.toolResults[0];
1794
+ return {
1795
+ ...m,
1796
+ toolResults: [{
1797
+ callId: firstResult?.callId ?? 'unknown',
1798
+ name: firstResult?.name ?? 'tool',
1799
+ content: `[tool result truncated: ${Math.ceil(resultStr.length / 4)} tokens]`,
1800
+ }],
1801
+ };
1802
+ }
1803
+ // OpenClaw native format
1804
+ if (Array.isArray(m.content)) {
1805
+ const content = m.content;
1806
+ const hasLarge = content.some(c => {
1245
1807
  if (c.type !== 'tool_result')
1246
- return c;
1808
+ return false;
1247
1809
  const val = typeof c.content === 'string' ? c.content : JSON.stringify(c.content ?? '');
1248
- if (val.length <= MAX_INLINE_TOOL_CHARS)
1249
- return c;
1250
- return { ...c, content: `[tool result truncated: ${Math.ceil(val.length / 4)} tokens]` };
1251
- }),
1252
- };
1810
+ return val.length > MAX_INLINE_TOOL_CHARS;
1811
+ });
1812
+ if (!hasLarge)
1813
+ return m;
1814
+ return {
1815
+ ...m,
1816
+ content: content.map(c => {
1817
+ if (c.type !== 'tool_result')
1818
+ return c;
1819
+ const val = typeof c.content === 'string' ? c.content : JSON.stringify(c.content ?? '');
1820
+ if (val.length <= MAX_INLINE_TOOL_CHARS)
1821
+ return c;
1822
+ return { ...c, content: `[tool result truncated: ${Math.ceil(val.length / 4)} tokens]` };
1823
+ }),
1824
+ };
1825
+ }
1826
+ return m;
1827
+ });
1828
+ // Fill from the back within budget
1829
+ let budget = trimBudget;
1830
+ // Reserve tokens for system messages using the same accounting
1831
+ // function as the final composed-array estimate.
1832
+ for (const sm of systemMsgs) {
1833
+ budget -= estimateMessageTokens(sm);
1253
1834
  }
1254
- return m;
1255
- });
1256
- // Fill from the back within budget
1257
- let budget = trimBudget;
1258
- // Reserve tokens for system messages
1259
- for (const sm of systemMsgs) {
1260
- const t = estimateTokens(typeof sm.textContent === 'string' ? sm.textContent : null)
1261
- + (Array.isArray(sm.content) ? sm.content.reduce((s, c) => {
1262
- const textVal = typeof c.text === 'string' ? c.text
1263
- : typeof c.content === 'string' ? c.content : null;
1264
- return s + estimateTokens(textVal);
1265
- }, 0) : 0);
1266
- budget -= t;
1267
- }
1268
- const msgCost = (m) => estimateTokens(typeof m.textContent === 'string' ? m.textContent : null)
1269
- + (m.toolCalls ? Math.ceil(JSON.stringify(m.toolCalls).length / 2) : 0)
1270
- + (m.toolResults ? Math.ceil(JSON.stringify(m.toolResults).length / 2) : 0)
1271
- + (Array.isArray(m.content) ? m.content.reduce((s, c) => {
1272
- if (c.type === 'toolCall' || c.type === 'tool_use') {
1273
- return s + Math.ceil(JSON.stringify(c).length / 2);
1835
+ const msgCost = (m) => estimateMessageTokens(m);
1836
+ const clusters = clusterTranscriptMessages(processedConvMsgs);
1837
+ const keptClusters = [];
1838
+ const tailCluster = clusters.length > 0 ? clusters[clusters.length - 1] : [];
1839
+ if (tailCluster.length > 0) {
1840
+ budget -= tailCluster.reduce((sum, msg) => sum + msgCost(msg), 0);
1841
+ keptClusters.unshift(tailCluster);
1842
+ }
1843
+ for (let i = clusters.length - 2; i >= 0 && budget > 0; i--) {
1844
+ const cluster = clusters[i];
1845
+ const clusterCost = cluster.reduce((sum, msg) => sum + msgCost(msg), 0);
1846
+ if (budget - clusterCost >= 0) {
1847
+ keptClusters.unshift(cluster);
1848
+ budget -= clusterCost;
1274
1849
  }
1275
- const textVal = typeof c.text === 'string' ? c.text
1276
- : typeof c.content === 'string' ? c.content
1277
- : c.content != null ? JSON.stringify(c.content) : null;
1278
- return s + estimateTokens(textVal);
1279
- }, 0) : 0);
1280
- const clusters = clusterTranscriptMessages(processedConvMsgs);
1281
- const keptClusters = [];
1282
- const tailCluster = clusters.length > 0 ? clusters[clusters.length - 1] : [];
1283
- if (tailCluster.length > 0) {
1284
- budget -= tailCluster.reduce((sum, msg) => sum + msgCost(msg), 0);
1285
- keptClusters.unshift(tailCluster);
1286
- }
1287
- for (let i = clusters.length - 2; i >= 0 && budget > 0; i--) {
1288
- const cluster = clusters[i];
1289
- const clusterCost = cluster.reduce((sum, msg) => sum + msgCost(msg), 0);
1290
- if (budget - clusterCost >= 0) {
1291
- keptClusters.unshift(cluster);
1292
- budget -= clusterCost;
1293
1850
  }
1294
- }
1295
- const kept = keptClusters.flat();
1296
- const keptCount = processedConvMsgs.length - kept.length;
1297
- if (keptCount > 0) {
1298
- console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}%${isJsonlReplay ? ' [jsonl-replay]' : ''} ` +
1299
- `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs, messages=${keptCount} dropped)`);
1300
- trimmedMessages = [...systemMsgs, ...kept];
1851
+ const kept = keptClusters.flat();
1852
+ const keptCount = processedConvMsgs.length - kept.length;
1853
+ if (keptCount > 0) {
1854
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1855
+ `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs, messages=${keptCount} dropped)`);
1856
+ trimmedMessages = [...systemMsgs, ...kept];
1857
+ }
1858
+ else if (trimmed > 0) {
1859
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1860
+ `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1861
+ }
1301
1862
  }
1302
1863
  else if (trimmed > 0) {
1303
1864
  console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1304
1865
  `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1305
1866
  }
1867
+ // Apply tool gradient to compress large tool results before returning.
1868
+ // Skip if deferToolPruning is enabled — OpenClaw's contextPruning handles it.
1869
+ if (!_deferToolPruning) {
1870
+ // The full compose path runs applyToolGradientToWindow during reshaping;
1871
+ // the tool-loop path was previously skipping this, leaving a 40k-token
1872
+ // web_search result uncompressed every turn.
1873
+ try {
1874
+ const gradientApplied = applyToolGradientToWindow(trimmedMessages, trimBudget);
1875
+ trimmedMessages = gradientApplied;
1876
+ }
1877
+ catch {
1878
+ // Non-fatal: if gradient fails, continue with untouched trimmedMessages
1879
+ }
1880
+ } // end deferToolPruning gate
1881
+ // Repair orphaned tool pairs in the trimmed message list.
1882
+ // In-memory trim (cluster drop) can strand tool_result messages whose
1883
+ // paired tool_use was in a dropped cluster.
1884
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1885
+ trimmedMessages = repairToolPairs(trimmedMessages);
1886
+ const composedTokens = estimateMessageArrayTokens(trimmedMessages);
1887
+ maybeLogPressureAccountingAnomaly({
1888
+ path: 'assemble.toolLoop',
1889
+ agentId,
1890
+ sessionKey: sk,
1891
+ runtimeTokens: preTrimTokens,
1892
+ redisTokens,
1893
+ composedTokens,
1894
+ budget: effectiveBudget,
1895
+ });
1896
+ await persistReplayRecoveryState(hm, agentId, sk, replayRecovery.nextState);
1897
+ degradationTelemetry({
1898
+ agentId,
1899
+ sessionKey: sk,
1900
+ turnId: _asmTurnId,
1901
+ path: 'toolLoop',
1902
+ toolChainCoEjections: 0,
1903
+ toolChainStubReplacements: 0,
1904
+ artifactDegradations: 0,
1905
+ replayState: replayRecovery.emittedMarker?.state,
1906
+ replayReason: replayRecovery.emittedMarker?.reason,
1907
+ });
1908
+ const overhead = _overheadCache.get(sk) ?? getOverheadFallback();
1909
+ return {
1910
+ messages: trimmedMessages,
1911
+ estimatedTokens: composedTokens + overhead,
1912
+ systemPromptAddition: replayMarkerText || undefined,
1913
+ };
1306
1914
  }
1307
- else if (trimmed > 0) {
1308
- console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% ` +
1309
- `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1915
+ catch {
1916
+ // Non-fatal: return conservative estimate so guard doesn't go blind
1917
+ return {
1918
+ messages: messages,
1919
+ estimatedTokens: Math.floor(effectiveBudget * 0.8),
1920
+ };
1310
1921
  }
1311
- // Apply tool gradient to compress large tool results before returning.
1312
- // Skip if deferToolPruning is enabled — OpenClaw's contextPruning handles it.
1313
- if (!_deferToolPruning) {
1314
- // The full compose path runs applyToolGradientToWindow during reshaping;
1315
- // the tool-loop path was previously skipping this, leaving a 40k-token
1316
- // web_search result uncompressed every turn.
1317
- try {
1318
- const gradientApplied = applyToolGradientToWindow(trimmedMessages, trimBudget);
1319
- trimmedMessages = gradientApplied;
1320
- }
1321
- catch {
1322
- // Non-fatal: if gradient fails, continue with untouched trimmedMessages
1323
- }
1324
- } // end deferToolPruning gate
1325
- // Repair orphaned tool pairs in the trimmed message list.
1326
- // In-memory trim (cluster drop) can strand tool_result messages whose
1327
- // paired tool_use was in a dropped cluster.
1328
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
1329
- trimmedMessages = repairToolPairs(trimmedMessages);
1330
- const windowTokens = await estimateWindowTokens(hm, agentId, sk);
1331
- const overhead = _overheadCache.get(sk) ?? getOverheadFallback();
1332
- return {
1333
- messages: trimmedMessages,
1334
- estimatedTokens: windowTokens + overhead,
1335
- };
1336
- }
1337
- catch {
1338
- // Non-fatal: return conservative estimate so guard doesn't go blind
1339
- return {
1340
- messages: messages,
1341
- estimatedTokens: Math.floor(effectiveBudget * 0.8),
1342
- };
1343
- }
1344
- }
1345
- try {
1346
- const hm = await getHyperMem();
1347
- const sk = resolveSessionKey(sessionId, sessionKey);
1348
- const agentId = extractAgentId(sk);
1349
- // ── Subagent warming control ─────────────────────────────────────────
1350
- // Detect subagent sessions by key pattern and apply warming mode.
1351
- // 'off' = passthrough (no HyperMem context at all)
1352
- // 'light' = facts + history only (skip library/wiki/semantic/keystones/doc chunks)
1353
- // 'full' = standard compositor pipeline
1354
- const isSubagent = sk.includes('subagent:');
1355
- if (isSubagent && _subagentWarming === 'off') {
1356
- console.log(`[hypermem-plugin] assemble: subagent warming=off, passthrough (sk: ${sk})`);
1357
- return {
1358
- messages: messages,
1359
- estimatedTokens: messages.reduce((sum, m) => {
1360
- const msg = m;
1361
- return sum + Math.ceil((typeof msg.textContent === 'string' ? msg.textContent.length : 0) / 4);
1362
- }, 0),
1363
- };
1364
1922
  }
1365
- if (isSubagent) {
1366
- console.log(`[hypermem-plugin] assemble: subagent warming=${_subagentWarming} (sk: ${sk})`);
1367
- }
1368
- // Resolve agent tier from fleet store (for doc chunk tier filtering)
1369
- let tier;
1370
- try {
1371
- const agent = _fleetStore?.getAgent(agentId);
1372
- tier = agent?.tier;
1373
- }
1374
- catch {
1375
- // Non-fatal — tier filtering just won't apply
1376
- }
1377
- // historyDepth: derive a safe message count from the token budget.
1378
- // Uses 50% of the budget for history (down from 60% — more budget goes to
1379
- // L3/L4 context slots now). Floor at 50, ceiling at 200.
1380
- // This is a preventive guard — the compositor's safety valve still trims
1381
- // by token count post-assembly, but limiting depth up front avoids
1382
- // feeding the compactor a window it can't reduce.
1383
- const effectiveBudget = computeEffectiveBudget(tokenBudget);
1384
- const historyDepth = Math.min(250, Math.max(50, Math.floor((effectiveBudget * 0.65) / 500)));
1385
- // ── Redis guardrail: trim history to token budget ────────────────────
1386
- // Prevents model-switch bloat: if an agent previously ran on a larger
1387
- // context window, Redis history may exceed the current model's budget.
1388
- // Trimming here (before compose) ensures the compositor never sees a
1389
- // history window it can't fit. Uses 80% of budget as the trim ceiling
1390
- // to leave room for system prompt, facts, and identity slots.
1391
1923
  try {
1392
- const trimBudget = Math.floor(effectiveBudget * 0.65);
1393
- const trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimBudget);
1394
- if (trimmed > 0) {
1395
- // Invalidate window cache since history changed
1396
- await hm.cache.invalidateWindow(agentId, sk);
1924
+ const hm = await getHyperMem();
1925
+ const sk = resolveSessionKey(sessionId, sessionKey);
1926
+ const agentId = extractAgentId(sk);
1927
+ // ── Subagent warming control ─────────────────────────────────────────
1928
+ // Detect subagent sessions by key pattern and apply warming mode.
1929
+ // 'off' = passthrough (no HyperMem context at all)
1930
+ // 'light' = facts + history only (skip library/wiki/semantic/keystones/doc chunks)
1931
+ // 'full' = standard compositor pipeline
1932
+ const isSubagent = sk.includes('subagent:');
1933
+ if (isSubagent && _subagentWarming === 'off') {
1934
+ console.log(`[hypermem-plugin] assemble: subagent warming=off, passthrough (sk: ${sk})`);
1935
+ return {
1936
+ messages: messages,
1937
+ estimatedTokens: estimateMessageArrayTokens(messages),
1938
+ };
1397
1939
  }
1398
- }
1399
- catch (trimErr) {
1400
- // Non-fatal — compositor's budget-fit walk is the second line of defense
1401
- console.warn('[hypermem-plugin] assemble: Redis trim failed (non-fatal):', trimErr.message);
1402
- }
1403
- // ── Budget downshift: proactive reshape pass ───────────────────────────────────────
1404
- // If this session previously composed at a higher token budget (e.g. gpt-5.4
1405
- // claude-sonnet model switch), the Redis window is still sized for the old
1406
- // budget. trimHistoryToTokenBudget above trims by count but skips tool
1407
- // gradient logic. A downshift >10% triggers a full reshape: apply tool
1408
- // gradient at the new budget + trim, then write back before compose runs.
1409
- // This prevents several turns of compaction churn after a model switch.
1410
- //
1411
- // Bug fix: previously read from getWindow() which is always null here
1412
- // (afterTurn invalidates it every turn). Also fixed: was doing setWindow()
1413
- // then invalidateWindow() which is a write-then-delete no-op. Now reads
1414
- // from history list and writes back via replaceHistory().
1415
- try {
1416
- const lastState = await hm.cache.getModelState(agentId, sk);
1417
- const DOWNSHIFT_THRESHOLD = 0.10;
1418
- const isDownshift = lastState &&
1419
- (lastState.tokenBudget - effectiveBudget) / lastState.tokenBudget > DOWNSHIFT_THRESHOLD;
1420
- if (isDownshift && !_deferToolPruning) {
1421
- // Read from history list — window cache is always null here because
1422
- // afterTurn() calls invalidateWindow() on every turn.
1423
- const currentHistory = await hm.cache.getHistory(agentId, sk);
1424
- if (currentHistory && currentHistory.length > 0) {
1425
- const reshaped = applyToolGradientToWindow(currentHistory, effectiveBudget);
1426
- if (reshaped.length < currentHistory.length) {
1427
- const reshapedAt = new Date().toISOString();
1428
- if (canPersistReshapedHistory(currentHistory)) {
1429
- // No structured tool turns in canonical history, safe to persist
1430
- // the reshaped window back to cache/history.
1431
- await hm.cache.replaceHistory(agentId, sk, reshaped);
1940
+ if (isSubagent) {
1941
+ console.log(`[hypermem-plugin] assemble: subagent warming=${_subagentWarming} (sk: ${sk})`);
1942
+ }
1943
+ // Resolve agent tier from fleet store (for doc chunk tier filtering)
1944
+ let tier;
1945
+ try {
1946
+ const agent = _fleetStore?.getAgent(agentId);
1947
+ tier = agent?.tier;
1948
+ }
1949
+ catch {
1950
+ // Non-fatal tier filtering just won't apply
1951
+ }
1952
+ // historyDepth: derive a safe message count from the token budget.
1953
+ // Uses 50% of the budget for history (down from 60% more budget goes to
1954
+ // L3/L4 context slots now). Floor at 50, ceiling at 200.
1955
+ // This is a preventive guard the compositor's safety valve still trims
1956
+ // by token count post-assembly, but limiting depth up front avoids
1957
+ // feeding the compactor a window it can't reduce.
1958
+ const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
1959
+ const historyDepth = Math.min(250, Math.max(50, Math.floor((effectiveBudget * 0.65) / 500)));
1960
+ const runtimeEntryTokens = estimateMessageArrayTokens(messages);
1961
+ const redisEntryTokens = await estimateWindowTokens(hm, agentId, sk);
1962
+ const replayRecovery = decideReplayRecovery({
1963
+ currentState: normalizeReplayRecoveryState(await hm.cache.getSlot(agentId, sk, 'replayRecoveryState').catch(() => '')),
1964
+ runtimeTokens: runtimeEntryTokens,
1965
+ redisTokens: redisEntryTokens,
1966
+ effectiveBudget,
1967
+ });
1968
+ const replayHistoryDepth = replayRecovery.active && replayRecovery.historyDepthCap
1969
+ ? Math.min(historyDepth, replayRecovery.historyDepthCap)
1970
+ : historyDepth;
1971
+ // ── Redis guardrail: trim history to token budget ────────────────────
1972
+ // Prevents model-switch bloat: if an agent previously ran on a larger
1973
+ // context window, Redis history may exceed the current model's budget.
1974
+ // Trimming here (before compose) ensures the compositor never sees a
1975
+ // history window it can't fit.
1976
+ //
1977
+ // Sprint 3 (AfterTurn Rebuild/Trim Loop Fix): the assemble.normal trim now
1978
+ // first checks whether the window is already within trimBudget. When
1979
+ // afterTurn's refreshRedisGradient caps the rebuilt window at the same
1980
+ // 0.65 fraction (Sprint 3 compositor fix), the steady-state path will
1981
+ // find preTokens <= trimBudget and skip the trim entirely. The trim only
1982
+ // fires when real excess exists (pressure spikes, model switch, cold start),
1983
+ // breaking the unconditional afterTurn→assemble trim churn loop.
1984
+ //
1985
+ // B3: Batch trim with growth allowance.
1986
+ // Trim only fires when the window has grown past the soft target by more
1987
+ // than TRIM_GROWTH_THRESHOLD (5%). When it does fire, trim to
1988
+ // softTarget * (1 - TRIM_HEADROOM_FRACTION) so the window has room to
1989
+ // grow for several turns before the next trim fires. This eliminates
1990
+ // per-turn trim churn from minor natural growth (short assistant replies,
1991
+ // small tool outputs) while still catching genuine pressure spikes.
1992
+ try {
1993
+ const { softBudget: trimSoftBudget, triggerBudget: trimTriggerBudget, targetBudget: trimTargetBudget, } = resolveTrimBudgets(effectiveBudget);
1994
+ // Always read preTokens so we can make the skip decision and emit telemetry.
1995
+ const preTokensNormal = await estimateWindowTokens(hm, agentId, sk).catch(() => 0);
1996
+ const normalPath = isSubagent ? 'assemble.subagent' : 'assemble.normal';
1997
+ // B3: Skip trim when window is within the growth-allowance envelope.
1998
+ // This replaces the Sprint 3 `windowAlreadyFits` check (which only
1999
+ // skipped at exactly ≤ softTarget). The growth allowance lets the
2000
+ // window float up to +5% before triggering, avoiding trim on every
2001
+ // turn that ends a few tokens above 65%.
2002
+ const withinGrowthEnvelope = preTokensNormal > 0 && preTokensNormal <= trimTriggerBudget;
2003
+ if (withinGrowthEnvelope) {
2004
+ if (telemetryEnabled()) {
2005
+ guardTelemetry({
2006
+ path: normalPath,
2007
+ agentId, sessionKey: sk,
2008
+ reason: 'window-within-budget-skip',
2009
+ });
2010
+ }
2011
+ }
2012
+ else {
2013
+ // Steady-state trim owner claim (Sprint 2.2a): route assemble.normal
2014
+ // and assemble.subagent through the shared helper keyed by
2015
+ // (sessionKey, _asmTurnId). The real trim + its `event:'trim'`
2016
+ // emission are gated on the claim so a duplicate steady-state claim
2017
+ // in the same turn is actually suppressed in production, not just
2018
+ // warned. In development the duplicate throws.
2019
+ const normalClaimed = claimTrimOwner(sk, _asmTurnId, normalPath);
2020
+ if (normalClaimed) {
2021
+ // B3: trim to the headroom target (below soft target) so the
2022
+ // window has room to grow before the next trim fires.
2023
+ const trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimTargetBudget);
2024
+ let normalCacheInvalidated = false;
2025
+ if (trimmed > 0) {
2026
+ // Invalidate window cache since history changed
1432
2027
  await hm.cache.invalidateWindow(agentId, sk);
1433
- console.log(`[hypermem-plugin] budget-downshift: ${agentId}/${sk} ` +
1434
- `${lastState.tokenBudget}→${effectiveBudget} tokens, ` +
1435
- `reshaped ${currentHistory.length}→${reshaped.length} messages`);
2028
+ normalCacheInvalidated = true;
1436
2029
  }
1437
- else {
1438
- // Tool-bearing history must remain canonical. Use the reshaped
1439
- // window only as a compose-time view and leave hot history lossless.
1440
- console.log(`[hypermem-plugin] budget-downshift: ${agentId}/${sk} ` +
1441
- `${lastState.tokenBudget}→${effectiveBudget} tokens, ` +
1442
- `view-only reshape ${currentHistory.length}→${reshaped.length} messages (structured tool history preserved)`);
2030
+ if (telemetryEnabled()) {
2031
+ const postTokensNormal = await estimateWindowTokens(hm, agentId, sk).catch(() => 0);
2032
+ trimTelemetry({
2033
+ path: normalPath,
2034
+ agentId, sessionKey: sk,
2035
+ preTokens: preTokensNormal,
2036
+ postTokens: postTokensNormal,
2037
+ removed: trimmed,
2038
+ cacheInvalidated: normalCacheInvalidated,
2039
+ reason: `b3:trigger=${trimTriggerBudget},target=${trimTargetBudget}`,
2040
+ });
1443
2041
  }
1444
- await hm.cache.setModelState(agentId, sk, {
1445
- model: model ?? 'unknown',
1446
- tokenBudget: effectiveBudget,
1447
- composedAt: new Date().toISOString(),
1448
- historyDepth,
1449
- reshapedAt,
2042
+ }
2043
+ else if (telemetryEnabled()) {
2044
+ guardTelemetry({
2045
+ path: normalPath,
2046
+ agentId, sessionKey: sk,
2047
+ reason: 'duplicate-claim-suppressed',
1450
2048
  });
1451
2049
  }
1452
2050
  }
1453
2051
  }
1454
- }
1455
- catch (reshapeErr) {
1456
- // Non-fatal compositor safety valve is still the last defense
1457
- console.warn('[hypermem-plugin] assemble: reshape pass failed (non-fatal):', reshapeErr.message);
1458
- }
1459
- // ── Cache replay fast path ─────────────────────────────────────────────
1460
- // If the session was active recently, return the cached contextBlock
1461
- // (systemPromptAddition) to produce a byte-identical system prompt and
1462
- // hit the provider prefix cache (Anthropic / OpenAI).
1463
- // The message window is always rebuilt fresh only the compositor output
1464
- // (contextBlock) is cached, since that's what determines prefix identity.
1465
- const cacheReplayThresholdMs = _cacheReplayThresholdMs;
1466
- let cachedContextBlock = null;
1467
- if (cacheReplayThresholdMs > 0) {
2052
+ catch (trimErr) {
2053
+ // Non-fatal — compositor's budget-fit walk is the second line of defense
2054
+ console.warn('[hypermem-plugin] assemble: Redis trim failed (non-fatal):', trimErr.message);
2055
+ }
2056
+ // ── Budget downshift: proactive reshape pass ───────────────────────────────────────
2057
+ // If this session previously composed at a higher token budget (e.g. gpt-5.4
2058
+ // claude-sonnet model switch), the Redis window is still sized for the old
2059
+ // budget. trimHistoryToTokenBudget above trims by count but skips tool
2060
+ // gradient logic. A downshift >10% triggers a full reshape: apply tool
2061
+ // gradient at the new budget + trim, then write back before compose runs.
2062
+ // This prevents several turns of compaction churn after a model switch.
2063
+ //
2064
+ // Bug fix: previously read from getWindow() which is always null here
2065
+ // (afterTurn invalidates it every turn). Also fixed: was doing setWindow()
2066
+ // then invalidateWindow() which is a write-then-delete no-op. Now reads
2067
+ // from history list and writes back via replaceHistory().
2068
+ let lastState = null;
1468
2069
  try {
1469
- const cachedAt = await hm.cache.getSlot(agentId, sk, 'assemblyContextAt');
1470
- if (cachedAt && Date.now() - parseInt(cachedAt) < cacheReplayThresholdMs) {
1471
- cachedContextBlock = await hm.cache.getSlot(agentId, sk, 'assemblyContextBlock');
1472
- if (cachedContextBlock) {
1473
- console.log(`[hypermem-plugin] assemble: cache replay hit for ${agentId} (${Math.round((Date.now() - parseInt(cachedAt)) / 1000)}s old)`);
1474
- }
2070
+ lastState = await hm.cache.getModelState(agentId, sk);
2071
+ const DOWNSHIFT_THRESHOLD = 0.10;
2072
+ const isDownshift = lastState &&
2073
+ (lastState.tokenBudget - effectiveBudget) / lastState.tokenBudget > DOWNSHIFT_THRESHOLD;
2074
+ if (isDownshift && !_deferToolPruning) {
2075
+ // Sprint 2.2a: demote reshape to guard telemetry.
2076
+ //
2077
+ // Previously this branch re-ran applyToolGradientToWindow, wrote
2078
+ // back via replaceHistory, invalidated the window cache, and
2079
+ // stamped `reshapedAt` on model state. Assemble.* is the
2080
+ // steady-state owner, so the subsequent assemble.normal /
2081
+ // assemble.subagent trim (gated by claimTrimOwner) handles any
2082
+ // real downshift pressure. Keeping the detection branch preserves
2083
+ // observability; guardTelemetry records the would-be-reshape
2084
+ // without mutating history, the window, or model state.
2085
+ //
2086
+ // CRITICAL: do NOT call setModelState({ reshapedAt, … }) here.
2087
+ // compact() skips when reshapedAt is recent, which would cause it
2088
+ // to skip on the strength of a reshape that never ran.
2089
+ guardTelemetry({
2090
+ path: 'reshape',
2091
+ agentId, sessionKey: sk,
2092
+ reason: 'reshape-downshift-demoted',
2093
+ });
1475
2094
  }
1476
2095
  }
1477
- catch {
1478
- // Non-fatal — fall through to full assembly
2096
+ catch (reshapeErr) {
2097
+ // Non-fatal — compositor safety valve is still the last defense
2098
+ console.warn('[hypermem-plugin] assemble: reshape pass failed (non-fatal):', reshapeErr.message);
1479
2099
  }
1480
- }
1481
- // Subagent light mode: skip library/wiki/semantic/keystones/doc chunks.
1482
- // Keeps: system, identity, history, active facts, output profile, tool gradient.
1483
- const subagentLight = isSubagent && _subagentWarming === 'light';
1484
- const request = {
1485
- agentId,
1486
- sessionKey: sk,
1487
- tokenBudget: effectiveBudget,
1488
- historyDepth,
1489
- tier,
1490
- model, // pass model for provider detection
1491
- includeDocChunks: subagentLight ? false : !cachedContextBlock, // skip doc retrieval on cache hit or subagent light
1492
- includeLibrary: subagentLight ? false : undefined, // skip wiki/knowledge/preferences
1493
- includeSemanticRecall: subagentLight ? false : undefined, // skip vector/FTS recall
1494
- includeKeystones: subagentLight ? false : undefined, // skip keystone history injection
1495
- prompt,
1496
- skipProviderTranslation: true, // runtime handles provider translation
1497
- };
1498
- const result = await hm.compose(request);
1499
- // Use cached contextBlock if available (cache replay), otherwise use fresh result.
1500
- // After a full compose, write the new contextBlock to cache for the next turn.
1501
- if (cachedContextBlock) {
1502
- result.contextBlock = cachedContextBlock;
1503
- }
1504
- else if (result.contextBlock && cacheReplayThresholdMs > 0) {
1505
- // Write cache async — never block the assemble() return on this
1506
- const blockToCache = result.contextBlock;
1507
- const nowStr = Date.now().toString();
1508
- const ttlSec = Math.ceil((cacheReplayThresholdMs * 2) / 1000);
1509
- Promise.all([
1510
- hm.cache.setSlot(agentId, sk, 'assemblyContextBlock', blockToCache),
1511
- hm.cache.setSlot(agentId, sk, 'assemblyContextAt', nowStr),
1512
- ]).then(() => {
1513
- // Extend TTL on the cached keys to 2× the threshold
1514
- // setSlot uses the sessionTTL from RedisLayer config — acceptable fallback
1515
- }).catch(() => { });
1516
- }
1517
- // Convert NeutralMessage[] → AgentMessage[] for the OpenClaw runtime.
1518
- // neutralToAgentMessage can return a single message or an array (tool results
1519
- // expand to individual ToolResultMessage objects), so we flatMap.
1520
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
1521
- let outputMessages = result.messages
1522
- .filter(m => m.role != null)
1523
- .flatMap(m => neutralToAgentMessage(m));
1524
- const neutralPairStats = collectNeutralToolPairStats(result.messages);
1525
- const agentPairStats = collectAgentToolPairStats(outputMessages);
1526
- const toolPairAnomaly = neutralPairStats.missingToolResultCount > 0 ||
1527
- neutralPairStats.orphanToolResultCount > 0 ||
1528
- agentPairStats.missingToolResultCount > 0 ||
1529
- agentPairStats.orphanToolResultCount > 0 ||
1530
- agentPairStats.syntheticNoResultCount > 0
1531
- ? {
1532
- stage: 'assemble',
1533
- neutralMissingToolResultIds: neutralPairStats.missingToolResultIds.slice(0, 10),
1534
- neutralOrphanToolResultIds: neutralPairStats.orphanToolResultIds.slice(0, 10),
1535
- agentMissingToolResultIds: agentPairStats.missingToolResultIds.slice(0, 10),
1536
- agentOrphanToolResultIds: agentPairStats.orphanToolResultIds.slice(0, 10),
1537
- syntheticNoResultCount: agentPairStats.syntheticNoResultCount,
2100
+ // ── Cache replay fast path ─────────────────────────────────────────────
2101
+ // If the session was active recently, return the cached contextBlock
2102
+ // (systemPromptAddition) to produce a byte-identical system prompt and
2103
+ // hit the provider prefix cache (Anthropic / OpenAI).
2104
+ // The message window is always rebuilt fresh — only the compositor output
2105
+ // (contextBlock) is cached, since that's what determines prefix identity.
2106
+ const cacheReplayThresholdMs = _cacheReplayThresholdMs;
2107
+ let cachedContextBlock = null;
2108
+ if (cacheReplayThresholdMs > 0 && !replayRecovery.shouldSkipCacheReplay) {
2109
+ try {
2110
+ const cachedAt = await hm.cache.getSlot(agentId, sk, 'assemblyContextAt');
2111
+ if (cachedAt && Date.now() - parseInt(cachedAt) < cacheReplayThresholdMs) {
2112
+ cachedContextBlock = await hm.cache.getSlot(agentId, sk, 'assemblyContextBlock');
2113
+ if (cachedContextBlock) {
2114
+ console.log(`[hypermem-plugin] assemble: cache replay hit for ${agentId} (${Math.round((Date.now() - parseInt(cachedAt)) / 1000)}s old)`);
2115
+ if (telemetryEnabled()) {
2116
+ assembleTrace({
2117
+ agentId,
2118
+ sessionKey: sk,
2119
+ turnId: _asmTurnId,
2120
+ path: 'replay',
2121
+ toolLoop: isToolLoop,
2122
+ msgCount: messages.length,
2123
+ });
2124
+ }
2125
+ }
2126
+ }
2127
+ }
2128
+ catch {
2129
+ // Non-fatal — fall through to full assembly
2130
+ }
1538
2131
  }
1539
- : undefined;
1540
- await bumpToolPairMetrics(hm, agentId, sk, {
1541
- composeCount: 1,
1542
- preBridgeMissingToolResults: neutralPairStats.missingToolResultCount,
1543
- preBridgeOrphanToolResults: neutralPairStats.orphanToolResultCount,
1544
- postBridgeMissingToolResults: agentPairStats.missingToolResultCount,
1545
- postBridgeOrphanToolResults: agentPairStats.orphanToolResultCount,
1546
- }, toolPairAnomaly);
1547
- if (toolPairAnomaly) {
1548
- console.warn(`[hypermem-plugin] tool-pair-integrity: ${agentId}/${sk} ` +
1549
- `neutralMissing=${neutralPairStats.missingToolResultCount} neutralOrphan=${neutralPairStats.orphanToolResultCount} ` +
1550
- `agentMissing=${agentPairStats.missingToolResultCount} agentOrphan=${agentPairStats.orphanToolResultCount} ` +
1551
- `synthetic=${agentPairStats.syntheticNoResultCount}`);
1552
- }
1553
- // Repair orphaned tool pairs before returning to provider.
1554
- // compaction/trim passes can remove tool_use blocks without removing their
1555
- // paired tool_result messages — Anthropic and Gemini reject these with 400.
1556
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
1557
- outputMessages = repairToolPairs(outputMessages);
1558
- // Cache overhead for tool-loop turns: contextBlock tokens (chars/4) +
1559
- // tier-aware estimate for runtime system prompt (SOUL.md, identity,
1560
- // workspace files — not visible from inside the plugin).
1561
- const contextBlockTokens = Math.ceil((result.contextBlock?.length ?? 0) / 4);
1562
- const runtimeSystemTokens = getOverheadFallback(tier);
1563
- _overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
1564
- // Update model state for downshift detection on next turn
1565
- try {
1566
- await hm.cache.setModelState(agentId, sk, {
1567
- model: model ?? 'unknown',
2132
+ // Subagent light mode: skip library/wiki/semantic/keystones/doc chunks.
2133
+ // Keeps: system, identity, history, active facts, output profile, tool gradient.
2134
+ const subagentLight = isSubagent && _subagentWarming === 'light';
2135
+ const request = {
2136
+ agentId,
2137
+ sessionKey: sk,
1568
2138
  tokenBudget: effectiveBudget,
1569
- composedAt: new Date().toISOString(),
1570
- historyDepth,
2139
+ historyDepth: lastState?.historyDepth && lastState.historyDepth < replayHistoryDepth
2140
+ ? lastState.historyDepth
2141
+ : replayHistoryDepth,
2142
+ tier,
2143
+ model, // pass model for provider detection
2144
+ includeDocChunks: subagentLight ? false : !cachedContextBlock, // skip doc retrieval on cache hit or subagent light
2145
+ includeLibrary: subagentLight ? false : undefined, // skip wiki/knowledge/preferences
2146
+ includeSemanticRecall: subagentLight ? false : undefined, // skip vector/FTS recall
2147
+ includeKeystones: subagentLight ? false : undefined, // skip keystone history injection
2148
+ prompt,
2149
+ skipProviderTranslation: true, // runtime handles provider translation
2150
+ };
2151
+ const result = await hm.compose(request);
2152
+ degradationTelemetry({
2153
+ agentId,
2154
+ sessionKey: sk,
2155
+ turnId: _asmTurnId,
2156
+ path: 'compose',
2157
+ toolChainCoEjections: result.diagnostics?.toolChainCoEjections ?? 0,
2158
+ toolChainStubReplacements: result.diagnostics?.toolChainStubReplacements ?? 0,
2159
+ artifactDegradations: result.diagnostics?.artifactDegradations ?? 0,
2160
+ artifactOversizeThresholdTokens: result.diagnostics?.artifactOversizeThresholdTokens,
2161
+ replayState: replayRecovery.emittedMarker?.state,
2162
+ replayReason: replayRecovery.emittedMarker?.reason,
1571
2163
  });
2164
+ // Use cached contextBlock if available (cache replay), otherwise use fresh result.
2165
+ // After a full compose, write the new contextBlock to cache for the next turn.
2166
+ if (cachedContextBlock) {
2167
+ result.contextBlock = cachedContextBlock;
2168
+ }
2169
+ else if (result.contextBlock && cacheReplayThresholdMs > 0 && !replayRecovery.shouldSkipCacheReplay && !replayRecovery.emittedText) {
2170
+ // Write cache async — never block the assemble() return on this
2171
+ const blockToCache = result.contextBlock;
2172
+ const nowStr = Date.now().toString();
2173
+ const ttlSec = Math.ceil((cacheReplayThresholdMs * 2) / 1000);
2174
+ Promise.all([
2175
+ hm.cache.setSlot(agentId, sk, 'assemblyContextBlock', blockToCache),
2176
+ hm.cache.setSlot(agentId, sk, 'assemblyContextAt', nowStr),
2177
+ ]).then(() => {
2178
+ // Extend TTL on the cached keys to 2× the threshold
2179
+ // setSlot uses the sessionTTL from RedisLayer config — acceptable fallback
2180
+ }).catch(() => { });
2181
+ }
2182
+ if (replayRecovery.emittedText) {
2183
+ result.contextBlock = result.contextBlock
2184
+ ? `${result.contextBlock}
2185
+ ${replayRecovery.emittedText}`
2186
+ : replayRecovery.emittedText;
2187
+ }
2188
+ // Convert NeutralMessage[] → AgentMessage[] for the OpenClaw runtime.
2189
+ // neutralToAgentMessage can return a single message or an array (tool results
2190
+ // expand to individual ToolResultMessage objects), so we flatMap.
2191
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2192
+ let outputMessages = result.messages
2193
+ .filter(m => m.role != null)
2194
+ .flatMap(m => neutralToAgentMessage(m));
2195
+ const neutralPairStats = collectNeutralToolPairStats(result.messages);
2196
+ const agentPairStats = collectAgentToolPairStats(outputMessages);
2197
+ const toolPairAnomaly = neutralPairStats.missingToolResultCount > 0 ||
2198
+ neutralPairStats.orphanToolResultCount > 0 ||
2199
+ agentPairStats.missingToolResultCount > 0 ||
2200
+ agentPairStats.orphanToolResultCount > 0 ||
2201
+ agentPairStats.syntheticNoResultCount > 0
2202
+ ? {
2203
+ stage: 'assemble',
2204
+ neutralMissingToolResultIds: neutralPairStats.missingToolResultIds.slice(0, 10),
2205
+ neutralOrphanToolResultIds: neutralPairStats.orphanToolResultIds.slice(0, 10),
2206
+ agentMissingToolResultIds: agentPairStats.missingToolResultIds.slice(0, 10),
2207
+ agentOrphanToolResultIds: agentPairStats.orphanToolResultIds.slice(0, 10),
2208
+ syntheticNoResultCount: agentPairStats.syntheticNoResultCount,
2209
+ }
2210
+ : undefined;
2211
+ await bumpToolPairMetrics(hm, agentId, sk, {
2212
+ composeCount: 1,
2213
+ preBridgeMissingToolResults: neutralPairStats.missingToolResultCount,
2214
+ preBridgeOrphanToolResults: neutralPairStats.orphanToolResultCount,
2215
+ postBridgeMissingToolResults: agentPairStats.missingToolResultCount,
2216
+ postBridgeOrphanToolResults: agentPairStats.orphanToolResultCount,
2217
+ }, toolPairAnomaly);
2218
+ if (toolPairAnomaly) {
2219
+ console.warn(`[hypermem-plugin] tool-pair-integrity: ${agentId}/${sk} ` +
2220
+ `neutralMissing=${neutralPairStats.missingToolResultCount} neutralOrphan=${neutralPairStats.orphanToolResultCount} ` +
2221
+ `agentMissing=${agentPairStats.missingToolResultCount} agentOrphan=${agentPairStats.orphanToolResultCount} ` +
2222
+ `synthetic=${agentPairStats.syntheticNoResultCount}`);
2223
+ }
2224
+ // Repair orphaned tool pairs before returning to provider.
2225
+ // compaction/trim passes can remove tool_use blocks without removing their
2226
+ // paired tool_result messages — Anthropic and Gemini reject these with 400.
2227
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2228
+ outputMessages = repairToolPairs(outputMessages);
2229
+ // Cache overhead for tool-loop turns: contextBlock tokens (chars/4) +
2230
+ // tier-aware estimate for runtime system prompt (SOUL.md, identity,
2231
+ // workspace files — not visible from inside the plugin).
2232
+ const contextBlockTokens = Math.ceil((result.contextBlock?.length ?? 0) / 4);
2233
+ const runtimeSystemTokens = getOverheadFallback(tier);
2234
+ _overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
2235
+ await persistReplayRecoveryState(hm, agentId, sk, replayRecovery.nextState);
2236
+ // Update model state for downshift detection on next turn
2237
+ try {
2238
+ await hm.cache.setModelState(agentId, sk, {
2239
+ model: model ?? 'unknown',
2240
+ tokenBudget: effectiveBudget,
2241
+ composedAt: new Date().toISOString(),
2242
+ historyDepth,
2243
+ });
2244
+ }
2245
+ catch {
2246
+ // Non-fatal
2247
+ }
2248
+ return {
2249
+ messages: outputMessages,
2250
+ estimatedTokens: result.tokenCount ?? 0,
2251
+ // systemPromptAddition injects hypermem context before the runtime system prompt.
2252
+ // This is the facts/recall/episodes block assembled by the compositor.
2253
+ systemPromptAddition: result.contextBlock || undefined,
2254
+ };
1572
2255
  }
1573
- catch {
1574
- // Non-fatal
2256
+ catch (err) {
2257
+ console.error('[hypermem-plugin] assemble error (stack):', err.stack ?? err);
2258
+ throw err; // Re-throw so the runtime falls back to legacy pipeline
1575
2259
  }
1576
- return {
1577
- messages: outputMessages,
1578
- estimatedTokens: result.tokenCount ?? 0,
1579
- // systemPromptAddition injects hypermem context before the runtime system prompt.
1580
- // This is the facts/recall/episodes block assembled by the compositor.
1581
- systemPromptAddition: result.contextBlock || undefined,
1582
- };
1583
2260
  }
1584
- catch (err) {
1585
- console.error('[hypermem-plugin] assemble error (stack):', err.stack ?? err);
1586
- throw err; // Re-throw so the runtime falls back to legacy pipeline
2261
+ finally {
2262
+ // End the trim-owner turn scope opened at assemble entry. Paired
2263
+ // with beginTrimOwnerTurn(_asmSk, _asmTurnId) above; runs on every
2264
+ // exit path (normal return, tool-loop return, replay return, error
2265
+ // re-throw). Turn-scoped keying (Sprint 2.2a) means this only
2266
+ // removes THIS turn's slot, so concurrent same-session turns remain
2267
+ // isolated instead of clobbering each other.
2268
+ endTrimOwnerTurn(_asmSk, _asmTurnId);
1587
2269
  }
1588
2270
  },
1589
2271
  /**
@@ -1609,14 +2291,16 @@ function createHyperMemEngine() {
1609
2291
  // Skip if a reshape pass just ran (within last 30s) — avoid double-processing
1610
2292
  // Cache modelState here for reuse in density-aware JSONL truncation below.
1611
2293
  let cachedModelState = null;
2294
+ let model;
1612
2295
  try {
1613
2296
  cachedModelState = await hm.cache.getModelState(agentId, sk);
2297
+ model = cachedModelState?.model;
1614
2298
  if (cachedModelState?.reshapedAt) {
1615
2299
  const reshapeAge = Date.now() - new Date(cachedModelState.reshapedAt).getTime();
1616
2300
  // Only skip if session is NOT critically full — nuclear path must bypass this guard.
1617
2301
  // If currentTokenCount > 85% budget, fall through to nuclear compaction below.
1618
2302
  const isCriticallyFull = currentTokenCount != null &&
1619
- currentTokenCount > (computeEffectiveBudget(tokenBudget) * 0.85);
2303
+ currentTokenCount > (computeEffectiveBudget(tokenBudget, model) * 0.85);
1620
2304
  if (reshapeAge < 30_000 && !isCriticallyFull) {
1621
2305
  console.log(`[hypermem-plugin] compact: skipping — reshape pass ran ${reshapeAge}ms ago`);
1622
2306
  return { ok: true, compacted: false, reason: 'reshape-recently-ran' };
@@ -1631,7 +2315,7 @@ function createHyperMemEngine() {
1631
2315
  // and system prompt — our estimate only covers the history window. When they
1632
2316
  // diverge significantly upward, the difference is "inbound overhead" consuming
1633
2317
  // budget the history is competing for. We trim history to make room.
1634
- const effectiveBudget = computeEffectiveBudget(tokenBudget);
2318
+ const effectiveBudget = computeEffectiveBudget(tokenBudget, model);
1635
2319
  const tokensBefore = await estimateWindowTokens(hm, agentId, sk);
1636
2320
  // Target depth for both Redis trimming and JSONL truncation.
1637
2321
  // Target 50% of budget capacity, assume ~500 tokens/message average.
@@ -1652,10 +2336,21 @@ function createHyperMemEngine() {
1652
2336
  // Keeps very recent context, clears the long tool-heavy tail.
1653
2337
  const nuclearDepth = Math.max(10, Math.floor(targetDepth * 0.20));
1654
2338
  const nuclearBudget = Math.floor(effectiveBudget * 0.25);
1655
- await hm.cache.trimHistoryToTokenBudget(agentId, sk, nuclearBudget);
2339
+ const nuclearRemoved = await hm.cache.trimHistoryToTokenBudget(agentId, sk, nuclearBudget);
1656
2340
  await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
1657
2341
  await truncateJsonlIfNeeded(sessionFile, nuclearDepth, true);
1658
2342
  const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
2343
+ if (telemetryEnabled()) {
2344
+ trimTelemetry({
2345
+ path: 'compact.nuclear',
2346
+ agentId, sessionKey: sk,
2347
+ preTokens: tokensBefore,
2348
+ postTokens: tokensAfter,
2349
+ removed: nuclearRemoved,
2350
+ cacheInvalidated: true,
2351
+ reason: `currentTokenCount=${currentTokenCount}/${effectiveBudget}`,
2352
+ });
2353
+ }
1659
2354
  console.log(`[hypermem-plugin] compact: NUCLEAR — session at ${currentTokenCount}/${effectiveBudget} tokens ` +
1660
2355
  `(${Math.round((currentTokenCount / effectiveBudget) * 100)}% full), ` +
1661
2356
  `deep-trimmed JSONL to ${nuclearDepth} messages, Redis ${tokensBefore}→${tokensAfter} tokens`);
@@ -1676,6 +2371,17 @@ function createHyperMemEngine() {
1676
2371
  await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
1677
2372
  const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
1678
2373
  await truncateJsonlIfNeeded(sessionFile, targetDepth);
2374
+ if (telemetryEnabled()) {
2375
+ trimTelemetry({
2376
+ path: 'compact.history',
2377
+ agentId, sessionKey: sk,
2378
+ preTokens: tokensBefore,
2379
+ postTokens: tokensAfter,
2380
+ removed: historyTrimmed,
2381
+ cacheInvalidated: true,
2382
+ reason: `inbound-overhead=${inboundOverhead}`,
2383
+ });
2384
+ }
1679
2385
  console.log(`[hypermem-plugin] compact: large-inbound-content (gap=${inboundOverhead} tokens), ` +
1680
2386
  `trimmed history ${tokensBefore}→${tokensAfter} (budget-for-history=${budgetForHistory}, trimmed=${historyTrimmed} messages)`);
1681
2387
  return { ok: true, compacted: true, result: { tokensBefore, tokensAfter } };
@@ -1725,6 +2431,17 @@ function createHyperMemEngine() {
1725
2431
  // Invalidate the compose cache so next assemble() re-builds from trimmed data
1726
2432
  await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
1727
2433
  const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
2434
+ if (telemetryEnabled()) {
2435
+ trimTelemetry({
2436
+ path: 'compact.history2',
2437
+ agentId, sessionKey: sk,
2438
+ preTokens: tokensBefore,
2439
+ postTokens: tokensAfter,
2440
+ removed: historyTrimmed,
2441
+ cacheInvalidated: true,
2442
+ reason: `over-budget tokensBefore=${tokensBefore}/${effectiveBudget}`,
2443
+ });
2444
+ }
1728
2445
  console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget})`);
1729
2446
  // Density-aware JSONL truncation: derive target depth from actual avg tokens/message
1730
2447
  // rather than assuming a fixed 500 tokens/message. This prevents a large-message
@@ -1857,12 +2574,13 @@ function createHyperMemEngine() {
1857
2574
  // gradient-compressed window to budget before writing to Redis. Without
1858
2575
  // this, afterTurn writes up to 250 messages regardless of budget, causing
1859
2576
  // trimHistoryToTokenBudget to fire and trim ~200 messages on every
1860
- // subsequent assemble() — the churn loop seen in Helm's logs.
2577
+ // subsequent assemble() — the churn loop seen in Eve's logs.
1861
2578
  if (hm.cache.isConnected) {
1862
2579
  try {
1863
2580
  const modelState = await hm.cache.getModelState(agentId, sk);
1864
2581
  const gradientBudget = modelState?.tokenBudget;
1865
- await hm.refreshRedisGradient(agentId, sk, gradientBudget);
2582
+ const gradientDepth = modelState?.historyDepth;
2583
+ await hm.refreshRedisGradient(agentId, sk, gradientBudget, gradientDepth);
1866
2584
  }
1867
2585
  catch (refreshErr) {
1868
2586
  console.warn('[hypermem-plugin] afterTurn: refreshRedisGradient failed (non-fatal):', refreshErr.message);
@@ -1880,48 +2598,50 @@ function createHyperMemEngine() {
1880
2598
  // If a session just finished a turn at >80% pressure, the NEXT turn's
1881
2599
  // incoming tool results (parallel web searches, large exec output, etc.)
1882
2600
  // will hit a window with no headroom — the ingestion wave failure mode
1883
- // (reported by Helm, 2026-04-05). Pre-trim here so the tool-loop
2601
+ // (reported by Eve, 2026-04-05). Pre-trim here so the tool-loop
1884
2602
  // assemble() path starts the next turn with meaningful space.
1885
2603
  //
1886
2604
  // Uses modelState.tokenBudget if cached; skips if unavailable (non-fatal).
1887
2605
  try {
1888
2606
  const modelState = await hm.cache.getModelState(agentId, sk);
1889
2607
  if (modelState?.tokenBudget) {
1890
- // Use the same dual-source pressure estimate as the tool-loop trim:
1891
- // max(runtime messages, Redis) so a post-restart empty-Redis session
1892
- // still fires correctly.
1893
- const runtimePostTokens = messages.reduce((sum, m) => {
1894
- const msg = m;
1895
- const textCost = estimateTokens(typeof msg.textContent === 'string' ? msg.textContent : null);
1896
- const toolCallCost = msg.toolCalls ? Math.ceil(JSON.stringify(msg.toolCalls).length / 2) : 0;
1897
- const toolResultCost = msg.toolResults ? Math.ceil(JSON.stringify(msg.toolResults).length / 2) : 0;
1898
- const contentCost = Array.isArray(msg.content)
1899
- ? msg.content.reduce((s, c) => {
1900
- const part = c;
1901
- // FIX (Bug 2 — afterTurn estimator): read c.content for native format
1902
- const textVal = typeof part.text === 'string' ? part.text
1903
- : typeof part.content === 'string' ? part.content
1904
- : part.content != null ? JSON.stringify(part.content) : null;
1905
- return s + estimateTokens(textVal);
1906
- }, 0)
1907
- : 0;
1908
- return sum + textCost + toolCallCost + toolResultCost + contentCost;
1909
- }, 0);
2608
+ // Use the runtime message array as the only trim-pressure source.
2609
+ // Redis remains a drift signal for anomaly logging.
2610
+ const runtimePostTokens = estimateMessageArrayTokens(messages);
1910
2611
  const redisPostTokens = await estimateWindowTokens(hm, agentId, sk);
1911
- const postTurnTokens = Math.max(runtimePostTokens, redisPostTokens);
2612
+ const postTurnTokens = runtimePostTokens;
2613
+ maybeLogPressureAccountingAnomaly({
2614
+ path: 'afterTurn.secondary',
2615
+ agentId,
2616
+ sessionKey: sk,
2617
+ runtimeTokens: runtimePostTokens,
2618
+ redisTokens: redisPostTokens,
2619
+ composedTokens: postTurnTokens,
2620
+ budget: modelState.tokenBudget,
2621
+ });
1912
2622
  const postTurnPressure = postTurnTokens / modelState.tokenBudget;
1913
- // Two-tier afterTurn trim (EC3 fix, 2026-04-05):
1914
- // >90% → trim to 45%: deep saturation recovery — 70% target leaves only ~8k
1915
- // after system prompt (20-30k), which is not enough for any real tool work.
1916
- // >80% trim to 70%: mild pressure, preserve more history.
1917
- const afterTurnTrimTarget = postTurnPressure > 0.90 ? 0.45 : 0.70;
2623
+ // Sprint 2.2b: demote afterTurn.secondary to guard-only no-op.
2624
+ //
2625
+ // Previously this path was a two-tier real trim that fired after
2626
+ // every turn ending at >80% pressure, calling
2627
+ // trimHistoryToTokenBudget() and emitting `event:'trim'` with
2628
+ // path='afterTurn.secondary'. Sprint 2 consolidates steady-state
2629
+ // trim ownership in assemble.* (tool-loop + normal/subagent),
2630
+ // with compact.* as the only exception family. The afterTurn
2631
+ // post-turn pressure path is now redundant: the next turn's
2632
+ // assemble.* trim absorbs any residual pressure.
2633
+ //
2634
+ // Pattern matches the warmstart/reshape demotion from 2.2a:
2635
+ // keep the pressure predicate + threshold branch so observability
2636
+ // via `event:'trim-guard'` is preserved, but emit NO real trim,
2637
+ // NO invalidateWindow, NO mutation. The compact skip-gate stays
2638
+ // correct because this path never stamped any model state.
1918
2639
  if (postTurnPressure > 0.80) {
1919
- const headroomBudget = Math.floor(modelState.tokenBudget * afterTurnTrimTarget);
1920
- const secondaryTrimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, headroomBudget);
1921
- if (secondaryTrimmed > 0) {
1922
- console.log(`[hypermem-plugin] afterTurn: pre-emptive trim — session exiting at ` +
1923
- `${(postTurnPressure * 100).toFixed(1)}%, trimmed ${secondaryTrimmed} msgs to create headroom`);
1924
- }
2640
+ guardTelemetry({
2641
+ path: 'afterTurn.secondary',
2642
+ agentId, sessionKey: sk,
2643
+ reason: 'afterturn-secondary-demoted',
2644
+ });
1925
2645
  }
1926
2646
  }
1927
2647
  }
@@ -2237,20 +2957,44 @@ const hypercompositorConfigSchema = z.object({
2237
2957
  contextWindowReserve: z.number().min(0).max(0.5).optional(),
2238
2958
  /** Defer tool pruning to OpenClaw's contextPruning. Default: false */
2239
2959
  deferToolPruning: z.boolean().optional(),
2960
+ /** Emit detailed budget-source and trim-decision logs. Default: false */
2961
+ verboseLogging: z.boolean().optional(),
2962
+ /** Manual per-model context window fallback table used when runtime tokenBudget is missing. */
2963
+ contextWindowOverrides: z.record(z.string().regex(CONTEXT_WINDOW_OVERRIDE_KEY_REGEX, 'key must be "provider/model"'), contextWindowOverrideSchema).optional(),
2964
+ /** Treat cache replay snapshots older than this as stale. Default: 120000ms */
2965
+ warmCacheReplayThresholdMs: z.number().int().positive().optional(),
2240
2966
  /** Subagent context injection: 'full' | 'light' | 'off'. Default: 'light' */
2241
2967
  subagentWarming: z.enum(['full', 'light', 'off']).optional(),
2242
2968
  /** Compositor tuning overrides */
2243
2969
  compositor: z.object({
2970
+ budgetFraction: z.number().min(0).max(1).optional(),
2971
+ reserveFraction: z.number().min(0).max(1).optional(),
2972
+ historyFraction: z.number().min(0).max(1).optional(),
2973
+ memoryFraction: z.number().min(0).max(1).optional(),
2244
2974
  defaultTokenBudget: z.number().int().positive().optional(),
2245
2975
  maxHistoryMessages: z.number().int().positive().optional(),
2246
2976
  maxFacts: z.number().int().positive().optional(),
2977
+ maxExpertisePatterns: z.number().int().positive().optional(),
2247
2978
  maxCrossSessionContext: z.number().int().nonnegative().optional(),
2979
+ maxTotalTriggerTokens: z.number().int().nonnegative().optional(),
2248
2980
  maxRecentToolPairs: z.number().int().nonnegative().optional(),
2249
2981
  maxProseToolPairs: z.number().int().nonnegative().optional(),
2250
2982
  warmHistoryBudgetFraction: z.number().min(0).max(1).optional(),
2983
+ contextWindowReserve: z.number().min(0).max(1).optional(),
2984
+ dynamicReserveTurnHorizon: z.number().int().positive().optional(),
2985
+ dynamicReserveMax: z.number().min(0).max(1).optional(),
2986
+ dynamicReserveEnabled: z.boolean().optional(),
2251
2987
  keystoneHistoryFraction: z.number().min(0).max(1).optional(),
2252
2988
  keystoneMaxMessages: z.number().int().nonnegative().optional(),
2253
2989
  keystoneMinSignificance: z.number().min(0).max(1).optional(),
2990
+ targetBudgetFraction: z.number().min(0).max(1).optional(),
2991
+ enableFOS: z.boolean().optional(),
2992
+ enableMOD: z.boolean().optional(),
2993
+ hyperformProfile: z.enum(['light', 'standard', 'full', 'starter', 'fleet']).optional(),
2994
+ outputProfile: z.enum(['light', 'standard', 'full', 'starter', 'fleet']).optional(),
2995
+ outputStandard: z.enum(['light', 'standard', 'full', 'starter', 'fleet']).optional(),
2996
+ wikiTokenCap: z.number().int().positive().optional(),
2997
+ zigzagOrdering: z.boolean().optional(),
2254
2998
  }).optional(),
2255
2999
  /** Image/tool eviction settings */
2256
3000
  eviction: z.object({
@@ -2280,22 +3024,22 @@ const engine = createHyperMemEngine();
2280
3024
  export default definePluginEntry({
2281
3025
  id: 'hypercompositor',
2282
3026
  name: 'HyperCompositor — context engine',
2283
- description: 'Four-layer memory architecture for OpenClaw agents: Redis hot cache, message history, vector search, and structured library.',
3027
+ description: 'Four-layer memory architecture for OpenClaw agents: SQLite hot cache, message history, vector search, and structured library.',
2284
3028
  kind: 'context-engine',
2285
3029
  configSchema: buildPluginConfigSchema(hypercompositorConfigSchema),
2286
3030
  register(api) {
2287
3031
  // ── Resolve plugin config from openclaw.json ──
2288
3032
  const pluginCfg = (api.pluginConfig ?? {});
2289
3033
  _pluginConfig = pluginCfg;
2290
- // ── Resolve HYPERMEM_PATH: pluginConfig > npm resolve > dev fallback ──
3034
+ // ── Resolve HYPERMEM_PATH: pluginConfig > ESM package resolve > dev fallback ──
2291
3035
  if (pluginCfg.hyperMemPath) {
2292
3036
  HYPERMEM_PATH = pluginCfg.hyperMemPath;
2293
3037
  console.log(`[hypermem-plugin] Using configured hyperMemPath: ${HYPERMEM_PATH}`);
2294
3038
  }
2295
3039
  else {
2296
3040
  try {
2297
- HYPERMEM_PATH = require.resolve('@psiclawops/hypermem');
2298
- console.log(`[hypermem-plugin] Resolved @psiclawops/hypermem from node_modules: ${HYPERMEM_PATH}`);
3041
+ const resolvedUrl = import.meta.resolve('@psiclawops/hypermem');
3042
+ HYPERMEM_PATH = resolvedUrl.startsWith('file:') ? fileURLToPath(resolvedUrl) : resolvedUrl;
2299
3043
  }
2300
3044
  catch {
2301
3045
  // Dev fallback: resolve relative to plugin directory
@@ -2305,6 +3049,55 @@ export default definePluginEntry({
2305
3049
  }
2306
3050
  }
2307
3051
  api.registerContextEngine('hypercompositor', () => engine);
3052
+ // ── HyperForm config dir init ──
3053
+ // Copy defaults and guide to ~/.openclaw/hypermem/config/ on every load.
3054
+ // Defaults are overwritten on plugin update. Active config files are never touched.
3055
+ void (async () => {
3056
+ try {
3057
+ const dataDir = _pluginConfig.dataDir ?? path.join(os.homedir(), '.openclaw/hypermem');
3058
+ const configDir = path.join(dataDir, 'config');
3059
+ await fs.mkdir(configDir, { recursive: true });
3060
+ const __pluginDir = path.dirname(fileURLToPath(import.meta.url));
3061
+ const defaultsSrc = path.resolve(__pluginDir, '../../../config-defaults');
3062
+ const defaultFiles = [
3063
+ 'hyperform-fos-defaults.json',
3064
+ 'hyperform-mod-defaults.json',
3065
+ 'HYPERFORM-GUIDE.md',
3066
+ ];
3067
+ for (const fname of defaultFiles) {
3068
+ const src = path.join(defaultsSrc, fname);
3069
+ const dest = path.join(configDir, fname);
3070
+ try {
3071
+ await fs.copyFile(src, dest);
3072
+ }
3073
+ catch {
3074
+ // defaults may not exist in dev builds — non-fatal
3075
+ }
3076
+ }
3077
+ // On first install, copy defaults as active config if active files don't exist
3078
+ for (const [src, dest] of [
3079
+ ['hyperform-fos-defaults.json', 'hyperform-fos.json'],
3080
+ ['hyperform-mod-defaults.json', 'hyperform-mod.json'],
3081
+ ]) {
3082
+ const destPath = path.join(configDir, dest);
3083
+ try {
3084
+ await fs.access(destPath);
3085
+ }
3086
+ catch {
3087
+ // Active config doesn't exist — copy defaults as starting point
3088
+ try {
3089
+ await fs.copyFile(path.join(configDir, src), destPath);
3090
+ }
3091
+ catch {
3092
+ // non-fatal
3093
+ }
3094
+ }
3095
+ }
3096
+ }
3097
+ catch {
3098
+ // non-fatal — HyperForm config init is best-effort
3099
+ }
3100
+ })();
2308
3101
  // P1.7: Bind TaskFlow runtime for task visibility — best-effort.
2309
3102
  // Guard: api.runtime.taskFlow may not exist on older OpenClaw versions.
2310
3103
  try {