@psiclawops/hypercompositor 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,2025 @@
1
+ /**
2
+ * hypermem Context Engine Plugin
3
+ *
4
+ * Implements OpenClaw's ContextEngine interface backed by hypermem's
5
+ * four-layer memory architecture:
6
+ *
7
+ * L1 Redis — hot session working memory
8
+ * L2 Messages — per-agent conversation history (SQLite)
9
+ * L3 Vectors — semantic + keyword search (KNN + FTS5)
10
+ * L4 Library — facts, knowledge, episodes, preferences
11
+ *
12
+ * Lifecycle mapping:
13
+ * ingest() → record each message into messages.db
14
+ * assemble() → compositor builds context from all four layers
15
+ * compact() → delegate to runtime (ownsCompaction: false)
16
+ * afterTurn() → trigger background indexer (fire-and-forget)
17
+ * bootstrap() → warm Redis session, register agent in fleet
18
+ * dispose() → close hypermem connections
19
+ *
20
+ * Session key format expected: "agent:<agentId>:<channel>:<name>"
21
+ */
22
+ import { definePluginEntry, emptyPluginConfigSchema } from 'openclaw/plugin-sdk/plugin-entry';
23
+ import { detectTopicShift, stripMessageMetadata, SessionTopicMap, applyToolGradientToWindow, canPersistReshapedHistory } from '@psiclawops/hypermem';
24
+ import { evictStaleContent } from '@psiclawops/hypermem/image-eviction';
25
+ import { repairToolPairs } from '@psiclawops/hypermem';
26
+ import os from 'os';
27
+ import path from 'path';
28
+ import fs from 'fs/promises';
29
+ import { createRequire } from 'module';
30
+ // ─── hypermem singleton ────────────────────────────────────────
31
+ // Runtime load is dynamic (hypermem is a sibling package loaded from repo dist,
32
+ // not installed via npm). Types come from the core package devDependency.
33
+ // This pattern keeps the runtime path stable while TypeScript resolves types
34
+ // from the canonical source — no more local shim drift.
35
+ const HYPERMEM_PATH = path.join(os.homedir(), '.openclaw/workspace/repo/hypermem/dist/index.js');
36
+ const require = createRequire(import.meta.url);
37
+ let _hm = null;
38
+ let _hmInitPromise = null;
39
+ let _indexer = null;
40
+ let _fleetStore = null;
41
+ let _generateEmbeddings = null;
42
+ let _embeddingConfig = null;
43
+ // P1.7: TaskFlow runtime reference — bound at registration time, best-effort.
44
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
45
+ let _taskFlowRuntime = null;
46
+ // ─── Eviction config cache ────────────────────────────────────
47
+ // Populated from user config during hypermem init. Stored here so
48
+ // assemble() (which can't await loadUserConfig) can read it without
49
+ // re-reading disk on every turn.
50
+ let _evictionConfig;
51
+ // ─── Context window reserve cache ────────────────────────────
52
+ // Populated from user config during hypermem init. Ensures hypermem leaves
53
+ // a guaranteed headroom fraction for system prompts, tool results, and
54
+ // incoming data — preventing the trim tiers from firing too close to the edge.
55
+ //
56
+ // contextWindowSize: full model context window in tokens (default: 128_000)
57
+ // contextWindowReserve: fraction [0.0–0.5] to keep free (default: 0.25)
58
+ //
59
+ // Effective history budget = (windowSize * (1 - reserve)) - overheadFallback
60
+ // e.g. 128k * 0.75 - 28k = 68k for council agents at 25% reserve
61
+ let _contextWindowSize = 128_000;
62
+ let _contextWindowReserve = 0.25;
63
+ let _deferToolPruning = false;
64
+ // Cache replay threshold: 15min default. Set to 0 in user config to disable.
65
+ let _cacheReplayThresholdMs = 900_000;
66
+ // ─── System overhead cache ────────────────────────────────────
67
+ // Caches the non-history token cost (contextBlock + runtime system prompt)
68
+ // from the last full compose per session key. Used in tool-loop turns to
69
+ // return an honest estimatedTokens without re-running the full compose
70
+ // pipeline. Map key = resolved session key.
71
+ const _overheadCache = new Map();
72
+ // Tier-aware conservative fallback when no cached value exists (cold session,
73
+ // first turn after restart). Over-estimates are safer than under-estimates:
74
+ // a false-positive compact is cheaper than letting context blow past budget.
75
+ const OVERHEAD_FALLBACK = {
76
+ council: 28_000,
77
+ director: 28_000,
78
+ specialist: 18_000,
79
+ };
80
+ const OVERHEAD_FALLBACK_DEFAULT = 15_000;
81
+ function getOverheadFallback(tier) {
82
+ if (!tier)
83
+ return OVERHEAD_FALLBACK_DEFAULT;
84
+ return OVERHEAD_FALLBACK[tier] ?? OVERHEAD_FALLBACK_DEFAULT;
85
+ }
86
+ /**
87
+ * Compute the effective history budget for trim and compact operations.
88
+ *
89
+ * Priority:
90
+ * 1. tokenBudget passed by the runtime (most precise)
91
+ * 2. Derived from context window config: windowSize * (1 - reserve)
92
+ *
93
+ * The reserve fraction (default 0.25 = 25%) guarantees headroom for:
94
+ * - System prompt + identity blocks (~28k for council agents)
95
+ * - Incoming tool results (can be 10–30k in parallel web_search bursts)
96
+ * - Response generation buffer (~4k)
97
+ *
98
+ * Without the reserve, trim tiers fire at 75–85% of tokenBudget but
99
+ * total context (history + system) exceeds the model window before trim
100
+ * completes, causing result stripping.
101
+ */
102
+ function computeEffectiveBudget(tokenBudget) {
103
+ if (tokenBudget)
104
+ return tokenBudget;
105
+ // Derived from window config: floor to avoid fractional tokens
106
+ return Math.floor(_contextWindowSize * (1 - _contextWindowReserve));
107
+ }
108
+ /**
109
+ * Load optional user config from ~/.openclaw/hypermem/config.json.
110
+ * Supports overriding compositor tuning knobs without editing plugin source.
111
+ * Unknown keys are ignored. Missing file is silently skipped.
112
+ */
113
+ async function loadUserConfig() {
114
+ const configPath = path.join(os.homedir(), '.openclaw/hypermem/config.json');
115
+ try {
116
+ const raw = await fs.readFile(configPath, 'utf-8');
117
+ const parsed = JSON.parse(raw);
118
+ console.log(`[hypermem-plugin] Loaded user config from ${configPath}`);
119
+ return parsed;
120
+ }
121
+ catch (err) {
122
+ if (err.code !== 'ENOENT') {
123
+ console.warn(`[hypermem-plugin] Failed to parse config.json (using defaults):`, err.message);
124
+ }
125
+ return {};
126
+ }
127
+ }
128
+ async function getHyperMem() {
129
+ if (_hm)
130
+ return _hm;
131
+ if (_hmInitPromise)
132
+ return _hmInitPromise;
133
+ _hmInitPromise = (async () => {
134
+ // Dynamic import — hypermem is loaded from repo dist
135
+ const mod = await import(HYPERMEM_PATH);
136
+ const HyperMem = mod.HyperMem;
137
+ // Capture generateEmbeddings from the dynamic module for use in afterTurn().
138
+ // Bind it with the user's embedding config so the pre-compute path uses the
139
+ // same provider as the indexer (Ollama vs OpenAI).
140
+ if (typeof mod.generateEmbeddings === 'function') {
141
+ const rawGenerate = mod.generateEmbeddings;
142
+ _generateEmbeddings = (texts) => rawGenerate(texts, _embeddingConfig ?? undefined);
143
+ }
144
+ // Load optional user config — compositor tuning overrides
145
+ const userConfig = await loadUserConfig();
146
+ // Build embedding config from user config. Applied to both HyperMem core
147
+ // (VectorStore init) and the _generateEmbeddings closure above.
148
+ if (userConfig.embedding) {
149
+ const ue = userConfig.embedding;
150
+ _embeddingConfig = {
151
+ provider: ue.provider ?? 'ollama',
152
+ ollamaUrl: ue.ollamaUrl ?? 'http://localhost:11434',
153
+ openaiBaseUrl: ue.openaiBaseUrl ?? 'https://api.openai.com/v1',
154
+ openaiApiKey: ue.openaiApiKey,
155
+ // Apply provider-specific model + dimension defaults when not explicitly set
156
+ model: ue.model ?? (ue.provider === 'openai' ? 'text-embedding-3-small' : 'nomic-embed-text'),
157
+ dimensions: ue.dimensions ?? (ue.provider === 'openai' ? 1536 : 768),
158
+ timeout: ue.timeout ?? 10000,
159
+ batchSize: ue.batchSize ?? (ue.provider === 'openai' ? 128 : 32),
160
+ };
161
+ console.log(`[hypermem-plugin] Embedding provider: ${_embeddingConfig.provider} ` +
162
+ `(model: ${_embeddingConfig.model}, ${_embeddingConfig.dimensions}d, batch: ${_embeddingConfig.batchSize})`);
163
+ }
164
+ // Cache eviction config at module scope so assemble() can read it
165
+ // synchronously without re-reading disk on every turn.
166
+ _evictionConfig = userConfig.eviction ?? {};
167
+ // Cache context window config so all three trim hotpaths use the same values.
168
+ if (typeof userConfig.contextWindowSize === 'number' && userConfig.contextWindowSize > 0) {
169
+ _contextWindowSize = userConfig.contextWindowSize;
170
+ }
171
+ if (typeof userConfig.contextWindowReserve === 'number' &&
172
+ userConfig.contextWindowReserve >= 0 && userConfig.contextWindowReserve <= 0.5) {
173
+ _contextWindowReserve = userConfig.contextWindowReserve;
174
+ }
175
+ if (userConfig.deferToolPruning === true) {
176
+ _deferToolPruning = true;
177
+ console.log('[hypermem-plugin] deferToolPruning: true — tool gradient deferred to host contextPruning');
178
+ }
179
+ if (typeof userConfig.warmCacheReplayThresholdMs === 'number') {
180
+ _cacheReplayThresholdMs = userConfig.warmCacheReplayThresholdMs;
181
+ }
182
+ const reservedTokens = Math.floor(_contextWindowSize * _contextWindowReserve);
183
+ console.log(`[hypermem-plugin] context window: ${_contextWindowSize} tokens, ` +
184
+ `${Math.round(_contextWindowReserve * 100)}% reserved (${reservedTokens} tokens), ` +
185
+ `effective history budget: ${_contextWindowSize - reservedTokens} tokens`);
186
+ const instance = await HyperMem.create({
187
+ dataDir: path.join(os.homedir(), '.openclaw/hypermem'),
188
+ cache: {
189
+ keyPrefix: 'hm:',
190
+ sessionTTL: 14400, // 4h for system/identity/meta slots
191
+ historyTTL: 86400, // 24h for history — ages out, not count-trimmed
192
+ },
193
+ ...(userConfig.compositor ? { compositor: userConfig.compositor } : {}),
194
+ ...(_embeddingConfig ? { embedding: _embeddingConfig } : {}),
195
+ });
196
+ _hm = instance;
197
+ // Wire up fleet store and background indexer from dynamic module
198
+ const { FleetStore: FleetStoreClass, createIndexer } = mod;
199
+ const libraryDb = instance.dbManager.getLibraryDb();
200
+ _fleetStore = new FleetStoreClass(libraryDb);
201
+ try {
202
+ // T1.2: Wire indexer with proper DB accessors and cursor fetcher.
203
+ // The cursor fetcher enables priority-based indexing: messages the model
204
+ // hasn't seen yet (post-cursor) are processed first.
205
+ _indexer = createIndexer((agentId) => instance.dbManager.getMessageDb(agentId), () => instance.dbManager.getLibraryDb(), () => {
206
+ // List agents from fleet_agents table (active only)
207
+ try {
208
+ const rows = instance.dbManager.getLibraryDb()
209
+ .prepare("SELECT id FROM fleet_agents WHERE status = 'active'")
210
+ .all();
211
+ return rows.map(r => r.id);
212
+ }
213
+ catch {
214
+ return [];
215
+ }
216
+ }, { enabled: true, periodicInterval: 300000 }, // 5-minute interval
217
+ // Cursor fetcher: reads from Redis → SQLite fallback
218
+ async (agentId, sessionKey) => {
219
+ return instance.getSessionCursor(agentId, sessionKey);
220
+ },
221
+ // Pass vector store so new facts/episodes are embedded at index time
222
+ instance.getVectorStore() ?? undefined,
223
+ // Dreaming config — passed from hypermem user config if set
224
+ userConfig?.dreaming ?? {});
225
+ _indexer.start();
226
+ }
227
+ catch {
228
+ // Non-fatal — indexer wiring can fail without breaking context assembly
229
+ }
230
+ return instance;
231
+ })();
232
+ return _hmInitPromise;
233
+ }
234
+ // ─── Session Key Helpers ────────────────────────────────────────
235
+ /**
236
+ * Extract agentId from a session key.
237
+ * Session keys follow: "agent:<agentId>:<channel>:<name>"
238
+ * Falls back to "main" if the key doesn't match expected format.
239
+ */
240
+ function extractAgentId(sessionKey) {
241
+ if (!sessionKey)
242
+ return 'main';
243
+ const parts = sessionKey.split(':');
244
+ if (parts[0] === 'agent' && parts.length >= 2) {
245
+ return parts[1];
246
+ }
247
+ return 'main';
248
+ }
249
+ /**
250
+ * Normalize sessionKey — prefer the explicit sessionKey param,
251
+ * fall back to sessionId (UUID) which we can't parse as a session key.
252
+ * If neither is useful, use a default.
253
+ */
254
+ function resolveSessionKey(sessionId, sessionKey) {
255
+ if (sessionKey)
256
+ return sessionKey;
257
+ // sessionId is a UUID — not a parseable session key.
258
+ // Use a synthetic key so recording works but note it won't resolve to a named session.
259
+ return `session:${sessionId}`;
260
+ }
261
+ const SYNTHETIC_MISSING_TOOL_RESULT_TEXT = 'No result provided';
262
+ function extractTextFromInboundContent(content) {
263
+ if (typeof content === 'string')
264
+ return content;
265
+ if (!Array.isArray(content))
266
+ return '';
267
+ return content
268
+ .filter((part) => Boolean(part && typeof part.type === 'string'))
269
+ .filter(part => part.type === 'text' && typeof part.text === 'string')
270
+ .map(part => part.text ?? '')
271
+ .join('\n');
272
+ }
273
+ function collectNeutralToolPairStats(messages) {
274
+ const callIds = new Set();
275
+ const resultIds = new Set();
276
+ let toolCallCount = 0;
277
+ let toolResultCount = 0;
278
+ let syntheticNoResultCount = 0;
279
+ for (const msg of messages) {
280
+ for (const tc of msg.toolCalls ?? []) {
281
+ toolCallCount++;
282
+ if (tc.id)
283
+ callIds.add(tc.id);
284
+ }
285
+ for (const tr of msg.toolResults ?? []) {
286
+ toolResultCount++;
287
+ if (tr.callId)
288
+ resultIds.add(tr.callId);
289
+ if ((tr.content ?? '').trim() === SYNTHETIC_MISSING_TOOL_RESULT_TEXT)
290
+ syntheticNoResultCount++;
291
+ }
292
+ }
293
+ const missingToolResultIds = [...callIds].filter(id => !resultIds.has(id));
294
+ const orphanToolResultIds = [...resultIds].filter(id => !callIds.has(id));
295
+ return {
296
+ toolCallCount,
297
+ toolResultCount,
298
+ missingToolResultCount: missingToolResultIds.length,
299
+ orphanToolResultCount: orphanToolResultIds.length,
300
+ syntheticNoResultCount,
301
+ missingToolResultIds,
302
+ orphanToolResultIds,
303
+ };
304
+ }
305
+ function collectAgentToolPairStats(messages) {
306
+ const callIds = new Set();
307
+ const resultIds = new Set();
308
+ let toolCallCount = 0;
309
+ let toolResultCount = 0;
310
+ let syntheticNoResultCount = 0;
311
+ for (const msg of messages) {
312
+ if (msg.role === 'assistant' && Array.isArray(msg.content)) {
313
+ for (const block of msg.content) {
314
+ if (block.type === 'toolCall' || block.type === 'toolUse') {
315
+ toolCallCount++;
316
+ if (typeof block.id === 'string' && block.id.length > 0)
317
+ callIds.add(block.id);
318
+ }
319
+ }
320
+ }
321
+ if (msg.role === 'toolResult') {
322
+ toolResultCount++;
323
+ const toolCallId = typeof msg.toolCallId === 'string' ? msg.toolCallId : '';
324
+ if (toolCallId)
325
+ resultIds.add(toolCallId);
326
+ if (extractTextFromInboundContent(msg.content).trim() === SYNTHETIC_MISSING_TOOL_RESULT_TEXT) {
327
+ syntheticNoResultCount++;
328
+ }
329
+ }
330
+ }
331
+ const missingToolResultIds = [...callIds].filter(id => !resultIds.has(id));
332
+ const orphanToolResultIds = [...resultIds].filter(id => !callIds.has(id));
333
+ return {
334
+ toolCallCount,
335
+ toolResultCount,
336
+ missingToolResultCount: missingToolResultIds.length,
337
+ orphanToolResultCount: orphanToolResultIds.length,
338
+ syntheticNoResultCount,
339
+ missingToolResultIds,
340
+ orphanToolResultIds,
341
+ };
342
+ }
343
+ async function bumpToolPairMetrics(hm, agentId, sessionKey, delta, anomaly) {
344
+ const slot = 'toolPairMetrics';
345
+ let stored = {};
346
+ try {
347
+ const raw = await hm.cache.getSlot(agentId, sessionKey, slot);
348
+ if (raw)
349
+ stored = JSON.parse(raw);
350
+ }
351
+ catch {
352
+ stored = {};
353
+ }
354
+ const next = {
355
+ composeCount: (stored.composeCount ?? 0) + (delta.composeCount ?? 0),
356
+ syntheticNoResultIngested: (stored.syntheticNoResultIngested ?? 0) + (delta.syntheticNoResultIngested ?? 0),
357
+ preBridgeMissingToolResults: (stored.preBridgeMissingToolResults ?? 0) + (delta.preBridgeMissingToolResults ?? 0),
358
+ preBridgeOrphanToolResults: (stored.preBridgeOrphanToolResults ?? 0) + (delta.preBridgeOrphanToolResults ?? 0),
359
+ postBridgeMissingToolResults: (stored.postBridgeMissingToolResults ?? 0) + (delta.postBridgeMissingToolResults ?? 0),
360
+ postBridgeOrphanToolResults: (stored.postBridgeOrphanToolResults ?? 0) + (delta.postBridgeOrphanToolResults ?? 0),
361
+ lastUpdatedAt: new Date().toISOString(),
362
+ lastAnomaly: anomaly ?? stored.lastAnomaly,
363
+ };
364
+ await hm.cache.setSlot(agentId, sessionKey, slot, JSON.stringify(next));
365
+ }
366
+ /**
367
+ * Convert an OpenClaw AgentMessage to hypermem's NeutralMessage format.
368
+ */
369
+ function toNeutralMessage(msg) {
370
+ // Extract text content from string or array format
371
+ let textContent = null;
372
+ if (typeof msg.content === 'string') {
373
+ textContent = msg.content;
374
+ }
375
+ else if (Array.isArray(msg.content)) {
376
+ const textParts = msg.content
377
+ .filter((c) => c.type === 'text' && typeof c.text === 'string')
378
+ .map(c => c.text);
379
+ textContent = textParts.length > 0 ? textParts.join('\n') : null;
380
+ }
381
+ // Detect tool calls/results.
382
+ // OpenClaw stores tool calls as content blocks: { type: 'toolCall' | 'toolUse', id, name, input }
383
+ // Legacy wire format stores them as a separate msg.tool_calls / msg.toolCalls array
384
+ // with OpenAI format: { id, type: 'function', function: { name, arguments } }
385
+ // Normalize everything to NeutralToolCall format: { id, name, arguments: string }
386
+ const contentBlockToolCalls = Array.isArray(msg.content)
387
+ ? msg.content
388
+ .filter(c => c.type === 'toolCall' || c.type === 'toolUse')
389
+ .map(c => ({
390
+ id: c.id ?? 'unknown',
391
+ name: c.name ?? 'unknown',
392
+ arguments: typeof c.input === 'string' ? c.input : JSON.stringify(c.input ?? {}),
393
+ }))
394
+ : [];
395
+ // Legacy wire format tool calls (OpenAI style)
396
+ const rawToolCalls = msg.tool_calls
397
+ ?? msg.toolCalls
398
+ ?? null;
399
+ let toolCalls = null;
400
+ if (rawToolCalls && rawToolCalls.length > 0) {
401
+ toolCalls = rawToolCalls.map(tc => {
402
+ // OpenAI wire format: { id, type: 'function', function: { name, arguments } }
403
+ const fn = tc.function;
404
+ if (fn) {
405
+ return {
406
+ id: tc.id ?? 'unknown',
407
+ name: fn.name ?? 'unknown',
408
+ arguments: typeof fn.arguments === 'string' ? fn.arguments : JSON.stringify(fn.arguments ?? {}),
409
+ };
410
+ }
411
+ // Already NeutralToolCall-ish or content block format
412
+ return {
413
+ id: tc.id ?? 'unknown',
414
+ name: tc.name ?? 'unknown',
415
+ arguments: typeof tc.arguments === 'string' ? tc.arguments
416
+ : typeof tc.input === 'string' ? tc.input
417
+ : JSON.stringify(tc.arguments ?? tc.input ?? {}),
418
+ };
419
+ });
420
+ }
421
+ else if (contentBlockToolCalls.length > 0) {
422
+ toolCalls = contentBlockToolCalls;
423
+ }
424
+ // OpenClaw uses role 'toolResult' (camelCase). Support all three spellings.
425
+ const isToolResultMsg = msg.role === 'tool' || msg.role === 'tool_result' || msg.role === 'toolResult';
426
+ // Tool results must stay on the result side of the transcript. If we persist them as
427
+ // assistant rows with orphaned toolResults, later replay can retain a tool_result after
428
+ // trimming away the matching assistant tool_use, which Anthropic rejects with a 400.
429
+ let toolResults = null;
430
+ if (isToolResultMsg && textContent) {
431
+ const toolCallId = msg.tool_call_id ?? msg.toolCallId ?? 'unknown';
432
+ const toolName = msg.name ?? msg.toolName ?? 'tool';
433
+ toolResults = [{ callId: toolCallId, name: toolName, content: textContent }];
434
+ textContent = null; // owned by toolResults now, not duplicated in textContent
435
+ }
436
+ const role = isToolResultMsg
437
+ ? 'user'
438
+ : msg.role;
439
+ return {
440
+ role,
441
+ textContent,
442
+ toolCalls: isToolResultMsg ? null : toolCalls,
443
+ toolResults,
444
+ };
445
+ }
446
+ // ─── Context Engine Implementation ─────────────────────────────
447
+ /**
448
+ * In-flight warm dedup map.
449
+ * Key: "agentId::sessionKey" — Value: the in-progress warm() Promise.
450
+ * Prevents concurrent bootstrap() calls from firing multiple full warms
451
+ * for the same session key before the first one sets the Redis history key.
452
+ * Cleared on completion (success or failure) so the next cold start retries.
453
+ */
454
+ const _warmInFlight = new Map();
455
+ // ─── Token estimation ──────────────────────────────────────────
456
+ /**
457
+ * Estimate tokens for a string using the same ~4 chars/token heuristic
458
+ * used by the hypermem compositor. Fast and allocation-free — no tokenizer
459
+ * library needed for a budget guard.
460
+ */
461
+ function estimateTokens(text) {
462
+ if (!text)
463
+ return 0;
464
+ return Math.ceil(text.length / 4);
465
+ }
466
+ function hasStructuredToolCallMessage(msg) {
467
+ if (Array.isArray(msg.toolCalls) && msg.toolCalls.length > 0)
468
+ return true;
469
+ if (!Array.isArray(msg.content))
470
+ return false;
471
+ return msg.content.some(part => part.type === 'toolCall' || part.type === 'tool_use');
472
+ }
473
+ function hasStructuredToolResultMessage(msg) {
474
+ if (Array.isArray(msg.toolResults) && msg.toolResults.length > 0)
475
+ return true;
476
+ if (msg.role === 'toolResult' || msg.role === 'tool' || msg.role === 'tool_result')
477
+ return true;
478
+ if (!Array.isArray(msg.content))
479
+ return false;
480
+ return msg.content.some(part => part.type === 'tool_result' || part.type === 'toolResult');
481
+ }
482
+ function getToolCallIds(msg) {
483
+ const ids = [];
484
+ if (Array.isArray(msg.toolCalls)) {
485
+ ids.push(...msg.toolCalls.map(tc => tc.id).filter((id) => typeof id === 'string' && id.length > 0));
486
+ }
487
+ if (Array.isArray(msg.content)) {
488
+ for (const part of msg.content) {
489
+ if ((part.type === 'toolCall' || part.type === 'tool_use') && typeof part.id === 'string' && part.id.length > 0) {
490
+ ids.push(part.id);
491
+ }
492
+ }
493
+ }
494
+ return ids;
495
+ }
496
+ function getToolResultIds(msg) {
497
+ const ids = [];
498
+ if (Array.isArray(msg.toolResults)) {
499
+ ids.push(...msg.toolResults.map(tr => tr.callId).filter((id) => typeof id === 'string' && id.length > 0));
500
+ }
501
+ if (typeof msg.toolCallId === 'string' && msg.toolCallId.length > 0) {
502
+ ids.push(msg.toolCallId);
503
+ }
504
+ if (typeof msg.tool_call_id === 'string' && msg.tool_call_id.length > 0) {
505
+ ids.push(msg.tool_call_id);
506
+ }
507
+ return ids;
508
+ }
509
+ function clusterTranscriptMessages(messages) {
510
+ const clusters = [];
511
+ for (let i = 0; i < messages.length; i++) {
512
+ const current = messages[i];
513
+ const cluster = [current];
514
+ if (hasStructuredToolCallMessage(current)) {
515
+ const callIds = new Set(getToolCallIds(current));
516
+ let j = i + 1;
517
+ while (j < messages.length) {
518
+ const candidate = messages[j];
519
+ if (!hasStructuredToolResultMessage(candidate))
520
+ break;
521
+ const resultIds = getToolResultIds(candidate);
522
+ if (callIds.size > 0 && resultIds.length > 0 && !resultIds.some(id => callIds.has(id)))
523
+ break;
524
+ cluster.push(candidate);
525
+ j++;
526
+ }
527
+ i = j - 1;
528
+ }
529
+ else if (hasStructuredToolResultMessage(current)) {
530
+ let j = i + 1;
531
+ while (j < messages.length) {
532
+ const candidate = messages[j];
533
+ if (!hasStructuredToolResultMessage(candidate) || hasStructuredToolCallMessage(candidate))
534
+ break;
535
+ cluster.push(candidate);
536
+ j++;
537
+ }
538
+ i = j - 1;
539
+ }
540
+ clusters.push(cluster);
541
+ }
542
+ return clusters;
543
+ }
544
+ /**
545
+ * Estimate total token cost of the current Redis history window for a session.
546
+ * Counts text content + tool call/result JSON for each message.
547
+ */
548
+ async function estimateWindowTokens(hm, agentId, sessionKey) {
549
+ try {
550
+ // Prefer the hot window cache (set after compaction trims the history).
551
+ // Fall back to the actual history list — the window cache is only populated
552
+ // after compact() calls setWindow(), so a fresh or never-compacted session
553
+ // has no window cache entry. Without this fallback, getWindow returns null
554
+ // → estimateWindowTokens returns 0 → compact() always says within_budget
555
+ // → overflow loop.
556
+ const window = await hm.cache.getWindow(agentId, sessionKey)
557
+ ?? await hm.cache.getHistory(agentId, sessionKey);
558
+ if (!window || window.length === 0)
559
+ return 0;
560
+ return window.reduce((sum, msg) => {
561
+ let t = estimateTokens(msg.textContent);
562
+ // Tool payloads are dense JSON — use /2 not /4 to avoid systematic undercount
563
+ if (msg.toolCalls)
564
+ t += Math.ceil(JSON.stringify(msg.toolCalls).length / 2);
565
+ if (msg.toolResults)
566
+ t += Math.ceil(JSON.stringify(msg.toolResults).length / 2);
567
+ return sum + t;
568
+ }, 0);
569
+ }
570
+ catch {
571
+ return 0;
572
+ }
573
+ }
574
+ /**
575
+ * Truncate a JSONL session file to keep only the last `targetDepth` message
576
+ * entries plus all non-message entries (header, compaction, model_change, etc).
577
+ *
578
+ * This is needed because the runtime loads messages from the JSONL file
579
+ * (not from Redis) to build its overflow estimate. When ownsCompaction=true,
580
+ * OpenClaw's truncateSessionAfterCompaction() is never called, so we do it
581
+ * ourselves.
582
+ *
583
+ * Returns true if the file was actually truncated, false if no action was
584
+ * needed or the file didn't exist.
585
+ */
586
+ async function truncateJsonlIfNeeded(sessionFile, targetDepth, force = false, tokenBudgetOverride) {
587
+ if (!sessionFile || typeof sessionFile !== 'string')
588
+ return false;
589
+ try {
590
+ const raw = await fs.readFile(sessionFile, 'utf-8');
591
+ const lines = raw.split('\n').filter(l => l.trim());
592
+ if (lines.length === 0)
593
+ return false;
594
+ const header = lines[0];
595
+ const entries = [];
596
+ for (let i = 1; i < lines.length; i++) {
597
+ try {
598
+ entries.push({ line: lines[i], parsed: JSON.parse(lines[i]) });
599
+ }
600
+ catch {
601
+ entries.push({ line: lines[i], parsed: null });
602
+ }
603
+ // Yield every 100 entries to avoid blocking the event loop
604
+ if (i % 100 === 0)
605
+ await new Promise(r => setImmediate(r));
606
+ }
607
+ const messageEntries = [];
608
+ const metadataEntries = [];
609
+ for (const e of entries) {
610
+ if (e.parsed?.type === 'message') {
611
+ messageEntries.push(e);
612
+ }
613
+ else {
614
+ metadataEntries.push(e);
615
+ }
616
+ }
617
+ // Only rewrite if meaningfully over target — unless force=true (over-budget path)
618
+ if (!force && messageEntries.length <= targetDepth * 1.5)
619
+ return false;
620
+ // If a token budget is specified, keep newest messages within that budget
621
+ let keptMessages;
622
+ if (tokenBudgetOverride) {
623
+ let tokenCount = 0;
624
+ const kept = [];
625
+ for (let i = messageEntries.length - 1; i >= 0 && kept.length < targetDepth; i--) {
626
+ const m = messageEntries[i].parsed?.message ?? messageEntries[i].parsed;
627
+ let t = 0;
628
+ if (m?.content)
629
+ t += Math.ceil(JSON.stringify(m.content).length / 4);
630
+ if (m?.textContent)
631
+ t += Math.ceil(String(m.textContent).length / 4);
632
+ if (m?.toolResults)
633
+ t += Math.ceil(JSON.stringify(m.toolResults).length / 4);
634
+ if (m?.toolCalls)
635
+ t += Math.ceil(JSON.stringify(m.toolCalls).length / 4);
636
+ if (tokenCount + t > tokenBudgetOverride && kept.length > 0)
637
+ break;
638
+ kept.unshift(messageEntries[i]);
639
+ tokenCount += t;
640
+ }
641
+ keptMessages = kept;
642
+ }
643
+ else {
644
+ keptMessages = messageEntries.slice(-targetDepth);
645
+ }
646
+ const keptSet = new Set(keptMessages.map(e => e.line));
647
+ const metaSet = new Set(metadataEntries.map(e => e.line));
648
+ const rebuilt = [header];
649
+ for (const e of entries) {
650
+ if (metaSet.has(e.line) || keptSet.has(e.line)) {
651
+ rebuilt.push(e.line);
652
+ }
653
+ }
654
+ const tmpPath = `${sessionFile}.hm-compact-${process.pid}-${Date.now()}.tmp`;
655
+ await fs.writeFile(tmpPath, rebuilt.join('\n') + '\n', 'utf-8');
656
+ await fs.rename(tmpPath, sessionFile);
657
+ console.log(`[hypermem-plugin] truncateJsonl: ${entries.length} → ${rebuilt.length - 1} entries ` +
658
+ `(kept ${keptMessages.length} messages + ${metadataEntries.length} metadata, file=${sessionFile.split('/').pop()})`);
659
+ return true;
660
+ }
661
+ catch (err) {
662
+ // ENOENT is expected when session file doesn't exist yet — not worth logging
663
+ if (err.code !== 'ENOENT') {
664
+ console.warn('[hypermem-plugin] truncateJsonl failed (non-fatal):', err.message);
665
+ }
666
+ return false;
667
+ }
668
+ }
669
+ function createHyperMemEngine() {
670
+ return {
671
+ info: {
672
+ id: 'hypermem',
673
+ name: 'hypermem context engine',
674
+ version: '0.1.0',
675
+ // We own compaction — assemble() trims to budget via the compositor safety
676
+ // valve, so runtime compaction is never needed. compact() handles any
677
+ // explicit calls by trimming the Redis history window directly.
678
+ ownsCompaction: true,
679
+ },
680
+ /**
681
+ * Bootstrap: warm Redis session for this agent, register in fleet if needed.
682
+ *
683
+ * Idempotent — skips warming if the session is already hot in Redis.
684
+ * Without this guard, the OpenClaw runtime calls bootstrap() on every turn
685
+ * (not just session start), causing:
686
+ * 1. A SQLite read + Redis pipeline push on every message (lane lock)
687
+ * 2. 250 messages re-pushed to Redis per turn (dedup in pushHistory helps,
688
+ * but the read cost still runs)
689
+ * 3. Followup queue drain blocked until warm completes
690
+ *
691
+ * With this guard: cold start = full warm; hot session = single EXISTS check.
692
+ */
693
+ async bootstrap({ sessionId, sessionKey }) {
694
+ try {
695
+ const hm = await getHyperMem();
696
+ const sk = resolveSessionKey(sessionId, sessionKey);
697
+ const agentId = extractAgentId(sk);
698
+ // EC1 pre-flight: proactively truncate the JSONL on disk if it is over
699
+ // the safe replay threshold. Fires BEFORE warm() so the next restart
700
+ // (not this one) loads a clean file. Combined with the preflight script
701
+ // run before each gateway restart, this closes the EC1 loop entirely.
702
+ //
703
+ // Why this doesn't help the CURRENT session:
704
+ // OpenClaw has already replayed the JSONL into memory by the time
705
+ // bootstrap() is called. Disk truncation here is forward-looking only.
706
+ //
707
+ // Why it still matters:
708
+ // Without this, a session that saturates in operation (no preflight ran)
709
+ // would restart saturated on the next boot. This ensures at least one
710
+ // restart cycle later the session comes up clean.
711
+ try {
712
+ const sessionDir = path.join(os.homedir(), '.openclaw', 'agents', agentId, 'sessions');
713
+ const jsonlPath = path.join(sessionDir, `${sessionId}.jsonl`);
714
+ // EC1 threshold: 60 conversation messages (token-capped at 40% of 128k)
715
+ const EC1_MAX_MESSAGES = 60;
716
+ const EC1_TOKEN_BUDGET = Math.floor(128_000 * 0.40);
717
+ const truncated = await truncateJsonlIfNeeded(jsonlPath, EC1_MAX_MESSAGES, false, EC1_TOKEN_BUDGET);
718
+ if (truncated) {
719
+ console.log(`[hypermem-plugin] bootstrap: proactive JSONL trim for ${agentId} ` +
720
+ `(EC1 guard — next restart will load clean)`);
721
+ }
722
+ }
723
+ catch {
724
+ // Non-fatal — JSONL truncation is best-effort
725
+ }
726
+ // Fast path: if session already has history in Redis, skip warm entirely.
727
+ // sessionExists() is a single EXISTS call — sub-millisecond cost.
728
+ const alreadyWarm = await hm.cache.sessionExists(agentId, sk);
729
+ if (alreadyWarm) {
730
+ return { bootstrapped: true };
731
+ }
732
+ // In-flight dedup: if a warm is already running for this session key,
733
+ // reuse that promise instead of launching a second concurrent warm.
734
+ const inflightKey = `${agentId}::${sk}`;
735
+ const existing = _warmInFlight.get(inflightKey);
736
+ if (existing) {
737
+ await existing;
738
+ return { bootstrapped: true };
739
+ }
740
+ // Cold start: warm Redis with the session — pre-loads history + slots
741
+ // CRIT-002: Load identity block (SOUL.md + IDENTITY.md + MOTIVATIONS.md)
742
+ // and pass into warm() so the compositor identity slot is populated.
743
+ // Previously opts.identity was always undefined — the slot was allocated
744
+ // but always empty. Non-fatal: missing files are silently skipped.
745
+ let identityBlock;
746
+ try {
747
+ // Council agents live at workspace-council/<agentId>/
748
+ // Other agents at workspace/<agentId>/ — try council path first
749
+ const homedir = os.homedir();
750
+ const councilPath = path.join(homedir, '.openclaw', 'workspace-council', agentId);
751
+ const workspacePath = path.join(homedir, '.openclaw', 'workspace', agentId);
752
+ let wsPath = councilPath;
753
+ try {
754
+ await fs.access(councilPath);
755
+ }
756
+ catch {
757
+ wsPath = workspacePath;
758
+ }
759
+ const identityFiles = ['SOUL.md', 'IDENTITY.md', 'MOTIVATIONS.md', 'STYLE.md'];
760
+ const parts = [];
761
+ for (const fname of identityFiles) {
762
+ try {
763
+ const content = await fs.readFile(path.join(wsPath, fname), 'utf-8');
764
+ if (content.trim())
765
+ parts.push(content.trim());
766
+ }
767
+ catch {
768
+ // File absent — skip silently
769
+ }
770
+ }
771
+ if (parts.length > 0)
772
+ identityBlock = parts.join('\n\n');
773
+ }
774
+ catch {
775
+ // Identity load is best-effort — never block bootstrap on this
776
+ }
777
+ // Capture wsPath for post-warm seeding (declared in the identity block above)
778
+ let _wsPathForSeed;
779
+ try {
780
+ const homedir2 = os.homedir();
781
+ const councilPath2 = path.join(homedir2, '.openclaw', 'workspace-council', agentId);
782
+ const workspacePath2 = path.join(homedir2, '.openclaw', 'workspace', agentId);
783
+ try {
784
+ await fs.access(councilPath2);
785
+ _wsPathForSeed = councilPath2;
786
+ }
787
+ catch {
788
+ _wsPathForSeed = workspacePath2;
789
+ }
790
+ }
791
+ catch { /* non-fatal */ }
792
+ const warmPromise = hm.warm(agentId, sk, identityBlock ? { identity: identityBlock } : undefined).finally(() => {
793
+ _warmInFlight.delete(inflightKey);
794
+ });
795
+ _warmInFlight.set(inflightKey, warmPromise);
796
+ await warmPromise;
797
+ // ACA doc seeding — fire-and-forget after warm.
798
+ // Idempotent: WorkspaceSeeder skips files whose hash hasn't changed.
799
+ // Seeds SOUL.md, TOOLS.md, AGENTS.md, POLICY.md etc. into library.db
800
+ // doc_chunks so trigger-based retrieval can serve them at compose time.
801
+ if (_wsPathForSeed) {
802
+ const wsPathForSeed = _wsPathForSeed;
803
+ hm.seedWorkspace(wsPathForSeed, { agentId }).then(seedResult => {
804
+ if (seedResult.totalInserted > 0 || seedResult.reindexed > 0) {
805
+ console.log(`[hypermem-plugin] bootstrap: seeded workspace docs for ${agentId} ` +
806
+ `(+${seedResult.totalInserted} chunks, ${seedResult.reindexed} reindexed, ` +
807
+ `${seedResult.skipped} unchanged, ${seedResult.errors.length} errors)`);
808
+ }
809
+ }).catch(err => {
810
+ console.warn('[hypermem-plugin] bootstrap: workspace seeding failed (non-fatal):', err.message);
811
+ });
812
+ }
813
+ // Post-warm pressure check: if messages.db had accumulated history,
814
+ // warm() may have loaded the session straight to 80%+. Pre-trim now
815
+ // so the first turn has headroom instead of starting saturated.
816
+ // This is the "restart at 98%" failure mode reported by Helm 2026-04-05:
817
+ // JSONL truncation + Redis flush isn't enough if messages.db is still full
818
+ // and warm() reloads it. Trim here closes the loop.
819
+ try {
820
+ const postWarmTokens = await estimateWindowTokens(hm, agentId, sk);
821
+ // Use a conservative 90k default; if the session is genuinely large,
822
+ // we'll underestimate budget and trim more aggressively — that's fine.
823
+ const warmBudget = 90_000;
824
+ const warmPressure = postWarmTokens / warmBudget;
825
+ if (warmPressure > 0.80) {
826
+ const warmTrimTarget = warmPressure > 0.90 ? 0.40 : 0.55;
827
+ const warmTrimBudget = Math.floor(warmBudget * warmTrimTarget);
828
+ const warmTrimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, warmTrimBudget);
829
+ if (warmTrimmed > 0) {
830
+ await hm.cache.invalidateWindow(agentId, sk);
831
+ console.log(`[hypermem-plugin] bootstrap: high-pressure startup ` +
832
+ `(${(warmPressure * 100).toFixed(1)}%), pre-trimmed Redis to ` +
833
+ `~${warmTrimTarget * 100}% (${warmTrimmed} msgs dropped)`);
834
+ }
835
+ }
836
+ }
837
+ catch {
838
+ // Non-fatal — first turn's tool-loop trim is the fallback
839
+ }
840
+ return { bootstrapped: true };
841
+ }
842
+ catch (err) {
843
+ // Bootstrap failure is non-fatal — log and continue
844
+ console.warn('[hypermem-plugin] bootstrap failed:', err.message);
845
+ return { bootstrapped: false, reason: err.message };
846
+ }
847
+ },
848
+ /**
849
+ * Ingest a single message into hypermem's message store.
850
+ * Skip heartbeats — they're noise in the memory store.
851
+ */
852
+ async ingest({ sessionId, sessionKey, message, isHeartbeat }) {
853
+ if (isHeartbeat) {
854
+ return { ingested: false };
855
+ }
856
+ // Skip system messages — they come from the runtime, not the conversation
857
+ const msg = message;
858
+ if (msg.role === 'system') {
859
+ return { ingested: false };
860
+ }
861
+ try {
862
+ const hm = await getHyperMem();
863
+ const sk = resolveSessionKey(sessionId, sessionKey);
864
+ const agentId = extractAgentId(sk);
865
+ let neutral = toNeutralMessage(msg);
866
+ // Route to appropriate record method based on role.
867
+ // User messages are intentionally NOT recorded here — afterTurn() handles
868
+ // user recording with proper metadata stripping (stripMessageMetadata).
869
+ // Recording here too causes dual-write: once raw (here), once clean (afterTurn).
870
+ if (neutral.role === 'user') {
871
+ return { ingested: false };
872
+ }
873
+ // ── Pre-ingestion wave guard ──────────────────────────────────────────
874
+ // Tool result payloads can be 10k-50k tokens each. When a parallel tool
875
+ // batch (4-6 results) lands while the session is already at 70%+, storing
876
+ // full payloads pushes Redis past the nuclear path threshold before the
877
+ // next assemble() can trim. Use Redis current state (appropriate here —
878
+ // we're deciding what to write TO Redis) as the pressure signal.
879
+ // Above 70%: truncate toolResult content to a compact stub.
880
+ // Above 85%: skip recording entirely — assemble() trim is the safety net.
881
+ const isInboundToolResult = msg.role === 'tool' || msg.role === 'tool_result' || msg.role === 'toolResult';
882
+ if (isInboundToolResult && neutral.toolResults && neutral.toolResults.length > 0) {
883
+ const redisTokens = await estimateWindowTokens(hm, agentId, sk);
884
+ const effectiveBudget = computeEffectiveBudget(undefined);
885
+ const redisPressure = redisTokens / effectiveBudget;
886
+ if (redisPressure > 0.85) {
887
+ // FIX (Bug 4): Never skip a tool result entirely — that leaves an orphaned
888
+ // tool_call in Redis history (the assistant message was already recorded).
889
+ // Anthropic rejects assistant messages with tool_calls that have no matching result.
890
+ // Instead, record a compact stub that preserves pair integrity in history.
891
+ const stubbedResults = neutral.toolResults.map(tr => ({
892
+ ...tr,
893
+ content: `[tool result omitted by wave-guard at ${(redisPressure * 100).toFixed(0)}% Redis pressure]`,
894
+ }));
895
+ const stubNeutral = { ...neutral, toolResults: stubbedResults };
896
+ console.log(`[hypermem] ingest wave-guard: stubbing toolResult (Redis pressure ${(redisPressure * 100).toFixed(0)}% > 85%) — preserving pair integrity`);
897
+ await hm.recordAssistantMessage(agentId, sk, stubNeutral);
898
+ return { ingested: true };
899
+ }
900
+ else if (redisPressure > 0.70) {
901
+ // Elevated: store truncated stub to preserve tool call pairing in history
902
+ const MAX_TOOL_RESULT_CHARS = 500;
903
+ neutral = {
904
+ ...neutral,
905
+ toolResults: neutral.toolResults.map(tr => {
906
+ const content = typeof tr.content === 'string' ? tr.content : JSON.stringify(tr.content);
907
+ if (content.length <= MAX_TOOL_RESULT_CHARS)
908
+ return tr;
909
+ return {
910
+ ...tr,
911
+ content: `[truncated by wave-guard at ${(redisPressure * 100).toFixed(0)}% pressure: ${Math.ceil(content.length / 4)} tokens]`,
912
+ };
913
+ }),
914
+ };
915
+ console.log(`[hypermem] ingest wave-guard: truncated toolResult (Redis pressure ${(redisPressure * 100).toFixed(0)}% > 70%)`);
916
+ }
917
+ }
918
+ await hm.recordAssistantMessage(agentId, sk, neutral);
919
+ return { ingested: true };
920
+ }
921
+ catch (err) {
922
+ // Ingest failure is non-fatal — record is best-effort
923
+ console.warn('[hypermem-plugin] ingest failed:', err.message);
924
+ return { ingested: false };
925
+ }
926
+ },
927
+ /**
928
+ * Assemble model context from all four hypermem layers.
929
+ *
930
+ * The `messages` param contains the current conversation history from the
931
+ * runtime. We pass the prompt (latest user message) as the retrieval query,
932
+ * and let the compositor build the full context.
933
+ *
934
+ * Returns:
935
+ * messages — full assembled message array for the model
936
+ * estimatedTokens — token count of assembled context
937
+ * systemPromptAddition — facts/recall/episodes injected before runtime system prompt
938
+ */
939
+ async assemble({ sessionId, sessionKey, messages, tokenBudget, prompt, model }) {
940
+ // ── Tool-loop guard ──────────────────────────────────────────────────────
941
+ // When the last message is a toolResult, the runtime is mid tool-loop:
942
+ // the model already has full context from the initial turn assembly.
943
+ // Re-running the full compose pipeline here is wasteful and, in long
944
+ // tool loops, causes cumulative context growth that triggers preemptive
945
+ // context overflow. Pass the messages through as-is.
946
+ //
947
+ // Matches OpenClaw's legacy behavior: the legacy engine's assemble() is a
948
+ // pass-through that never re-injects context on tool-loop calls.
949
+ const lastMsg = messages[messages.length - 1];
950
+ const isToolLoop = lastMsg?.role === 'toolResult' || lastMsg?.role === 'tool';
951
+ if (isToolLoop) {
952
+ // Tool-loop turns: pass messages through unchanged but still:
953
+ // 1. Run the trim guardrail — tool loops accumulate history as fast
954
+ // as regular turns, and the old path skipped trim entirely, leaving
955
+ // the compaction guard blind (received estimatedTokens=0).
956
+ // 2. Return a real estimatedTokens = windowTokens + cached overhead,
957
+ // so the guard has accurate signal and can fire when needed.
958
+ //
959
+ // Fix (ingestion-wave): use pressure-tiered trim instead of fixed 80%.
960
+ // At 91% with 5 parallel web_search calls incoming (~20-30% of budget),
961
+ // a fixed 80% trim only frees 11% headroom — the wave overflows anyway
962
+ // and results strip silently. Tier the trim target based on pre-trim
963
+ // pressure so high-pressure sessions get real headroom before results land.
964
+ const effectiveBudget = computeEffectiveBudget(tokenBudget);
965
+ try {
966
+ const hm = await getHyperMem();
967
+ const sk = resolveSessionKey(sessionId, sessionKey);
968
+ const agentId = extractAgentId(sk);
969
+ // ── Image / heavy-content eviction pre-pass ──────────────────────
970
+ // Evict stale image payloads and large tool results before measuring
971
+ // pressure. This frees tokens without compaction — images alone can
972
+ // account for 30%+ of context from a single screenshot 2 turns ago.
973
+ const evictionCfg = _evictionConfig;
974
+ const evictionEnabled = evictionCfg?.enabled !== false;
975
+ let workingMessages = messages;
976
+ if (evictionEnabled) {
977
+ const { messages: evicted, stats: evStats } = evictStaleContent(messages, {
978
+ imageAgeTurns: evictionCfg?.imageAgeTurns,
979
+ toolResultAgeTurns: evictionCfg?.toolResultAgeTurns,
980
+ minTokensToEvict: evictionCfg?.minTokensToEvict,
981
+ keepPreviewChars: evictionCfg?.keepPreviewChars,
982
+ });
983
+ workingMessages = evicted;
984
+ if (evStats.tokensFreed > 0) {
985
+ console.log(`[hypermem] eviction: ${evStats.imagesEvicted} images, ` +
986
+ `${evStats.toolResultsEvicted} tool results, ` +
987
+ `~${evStats.tokensFreed.toLocaleString()} tokens freed`);
988
+ }
989
+ }
990
+ // Measure pressure BEFORE trim to pick the right tier.
991
+ // Critical: use the runtime-provided messages array, NOT estimateWindowTokens()
992
+ // which reads Redis. After a gateway restart Redis is empty — estimateWindowTokens
993
+ // returns ~0, pressure reads as 0%, and the trim tiers never fire even though
994
+ // the session is at 98% from JSONL loaded at runtime. The messages param is
995
+ // always authoritative — it's what the runtime actually sent to the model.
996
+ const runtimeTokens = messages.reduce((sum, m) => {
997
+ const msg = m;
998
+ const textCost = estimateTokens(typeof msg.textContent === 'string' ? msg.textContent : null);
999
+ const toolCallCost = msg.toolCalls ? Math.ceil(JSON.stringify(msg.toolCalls).length / 2) : 0;
1000
+ const toolResultCost = msg.toolResults ? Math.ceil(JSON.stringify(msg.toolResults).length / 2) : 0;
1001
+ // FIX (Bug 2): count content arrays in OpenClaw native format.
1002
+ // Native tool result messages store content as c.content (not c.text).
1003
+ // Old code always read c.text, returning 0 for native format — severe undercount.
1004
+ const contentCost = Array.isArray(msg.content)
1005
+ ? msg.content.reduce((s, c) => {
1006
+ const part = c;
1007
+ const textVal = typeof part.text === 'string' ? part.text
1008
+ : typeof part.content === 'string' ? part.content
1009
+ : part.content != null ? JSON.stringify(part.content) : null;
1010
+ return s + estimateTokens(textVal);
1011
+ }, 0)
1012
+ : 0;
1013
+ // Count image parts — base64 images are large and invisible to the text estimator
1014
+ const imageCost = Array.isArray(msg.content)
1015
+ ? msg.content.reduce((s, c) => {
1016
+ const part = c;
1017
+ if (part.type === 'image' || part.type === 'image_url') {
1018
+ const src = part.source?.data;
1019
+ const url = part.image_url?.url;
1020
+ const dataStr = typeof src === 'string' ? src : (typeof url === 'string' ? url : '');
1021
+ return s + Math.ceil(dataStr.length / 3); // base64 ~1.33x bytes, ~1 token/4 bytes
1022
+ }
1023
+ return s;
1024
+ }, 0)
1025
+ : 0;
1026
+ return sum + textCost + toolCallCost + toolResultCost + contentCost + imageCost;
1027
+ }, 0);
1028
+ // Redis window is a useful cross-check; use whichever is higher so we never
1029
+ // underestimate when Redis is ahead of the runtime snapshot.
1030
+ const redisTokens = await estimateWindowTokens(hm, agentId, sk);
1031
+ const preTrimTokens = Math.max(runtimeTokens, redisTokens);
1032
+ const pressure = preTrimTokens / effectiveBudget;
1033
+ // Pressure-tiered trim targets:
1034
+ // JSONL-replay (EC1): runtimeTokens >> redisTokens means session
1035
+ // loaded from a large JSONL but Redis is cold (post-restart). Trim
1036
+ // aggressively to 30% so system prompt + this turn's tool results fit.
1037
+ // >85% (critical) → trim to 50%: blast headroom for incoming wave
1038
+ // >80% (high) → trim to 60%: 40% headroom
1039
+ // >75% (elevated) → trim to 65%: 35% headroom
1040
+ // ≤75% (normal) → trim to 80%: existing behaviour
1041
+ const isJsonlReplay = runtimeTokens > effectiveBudget * 0.80 && redisTokens < runtimeTokens * 0.20;
1042
+ let trimTarget;
1043
+ if (isJsonlReplay) {
1044
+ trimTarget = 0.20; // EC1: cold Redis + hot JSONL = post-restart replay, need max headroom
1045
+ }
1046
+ else if (pressure > 0.85) {
1047
+ trimTarget = 0.40; // critical: 60% headroom for incoming wave
1048
+ }
1049
+ else if (pressure > 0.80) {
1050
+ trimTarget = 0.50; // high: 50% headroom
1051
+ }
1052
+ else if (pressure > 0.75) {
1053
+ trimTarget = 0.55; // elevated: 45% headroom
1054
+ }
1055
+ else {
1056
+ trimTarget = 0.65; // normal: 35% headroom (was 0.80 — too tight)
1057
+ }
1058
+ const trimBudget = Math.floor(effectiveBudget * trimTarget);
1059
+ const trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimBudget);
1060
+ if (trimmed > 0) {
1061
+ await hm.cache.invalidateWindow(agentId, sk);
1062
+ }
1063
+ // Also trim the messages array itself to match the budget.
1064
+ // Redis trim clears the *next* turn's window. This turn's messages are
1065
+ // still the full runtime array — if we return them unchanged at 94%,
1066
+ // OpenClaw strips tool results before sending to the model regardless
1067
+ // of what estimatedTokens says. We need to return a slimmer array now.
1068
+ //
1069
+ // Strategy: keep system/identity messages at the front, then fill from
1070
+ // the back (most recent) until we hit trimBudget. Drop the middle.
1071
+ let trimmedMessages = workingMessages;
1072
+ if (pressure > trimTarget) {
1073
+ const msgArray = workingMessages;
1074
+ // Separate system messages (always keep) from conversation turns
1075
+ const systemMsgs = msgArray.filter(m => m.role === 'system');
1076
+ const convMsgs = msgArray.filter(m => m.role !== 'system');
1077
+ // Pre-process: inline-truncate large tool results before budget-fill drop.
1078
+ // A message with a 40k-token tool result that barely misses budget gets dropped
1079
+ // entirely. Replacing with a placeholder keeps the turn's metadata in context
1080
+ // while freeing the bulk of the tokens.
1081
+ const MAX_INLINE_TOOL_CHARS = 2000; // ~500 tokens
1082
+ // FIX (Bug 3): handle both NeutralMessage format (m.toolResults) and
1083
+ // OpenClaw native format (m.content array with type='tool_result' blocks).
1084
+ // Old guard `if (!m.toolResults)` skipped every native-format message.
1085
+ // Also fixed: replacement must be valid NeutralToolResult { callId, name, content },
1086
+ // not { type, text } which breaks pair-integrity downstream.
1087
+ const processedConvMsgs = convMsgs.map(m => {
1088
+ // NeutralMessage format
1089
+ if (m.toolResults) {
1090
+ const resultStr = JSON.stringify(m.toolResults);
1091
+ if (resultStr.length <= MAX_INLINE_TOOL_CHARS)
1092
+ return m;
1093
+ const firstResult = m.toolResults[0];
1094
+ return {
1095
+ ...m,
1096
+ toolResults: [{
1097
+ callId: firstResult?.callId ?? 'unknown',
1098
+ name: firstResult?.name ?? 'tool',
1099
+ content: `[tool result truncated: ${Math.ceil(resultStr.length / 4)} tokens]`,
1100
+ }],
1101
+ };
1102
+ }
1103
+ // OpenClaw native format
1104
+ if (Array.isArray(m.content)) {
1105
+ const content = m.content;
1106
+ const hasLarge = content.some(c => {
1107
+ if (c.type !== 'tool_result')
1108
+ return false;
1109
+ const val = typeof c.content === 'string' ? c.content : JSON.stringify(c.content ?? '');
1110
+ return val.length > MAX_INLINE_TOOL_CHARS;
1111
+ });
1112
+ if (!hasLarge)
1113
+ return m;
1114
+ return {
1115
+ ...m,
1116
+ content: content.map(c => {
1117
+ if (c.type !== 'tool_result')
1118
+ return c;
1119
+ const val = typeof c.content === 'string' ? c.content : JSON.stringify(c.content ?? '');
1120
+ if (val.length <= MAX_INLINE_TOOL_CHARS)
1121
+ return c;
1122
+ return { ...c, content: `[tool result truncated: ${Math.ceil(val.length / 4)} tokens]` };
1123
+ }),
1124
+ };
1125
+ }
1126
+ return m;
1127
+ });
1128
+ // Fill from the back within budget
1129
+ let budget = trimBudget;
1130
+ // Reserve tokens for system messages
1131
+ for (const sm of systemMsgs) {
1132
+ const t = estimateTokens(typeof sm.textContent === 'string' ? sm.textContent : null)
1133
+ + (Array.isArray(sm.content) ? sm.content.reduce((s, c) => {
1134
+ const textVal = typeof c.text === 'string' ? c.text
1135
+ : typeof c.content === 'string' ? c.content : null;
1136
+ return s + estimateTokens(textVal);
1137
+ }, 0) : 0);
1138
+ budget -= t;
1139
+ }
1140
+ const msgCost = (m) => estimateTokens(typeof m.textContent === 'string' ? m.textContent : null)
1141
+ + (m.toolCalls ? Math.ceil(JSON.stringify(m.toolCalls).length / 2) : 0)
1142
+ + (m.toolResults ? Math.ceil(JSON.stringify(m.toolResults).length / 2) : 0)
1143
+ + (Array.isArray(m.content) ? m.content.reduce((s, c) => {
1144
+ if (c.type === 'toolCall' || c.type === 'tool_use') {
1145
+ return s + Math.ceil(JSON.stringify(c).length / 2);
1146
+ }
1147
+ const textVal = typeof c.text === 'string' ? c.text
1148
+ : typeof c.content === 'string' ? c.content
1149
+ : c.content != null ? JSON.stringify(c.content) : null;
1150
+ return s + estimateTokens(textVal);
1151
+ }, 0) : 0);
1152
+ const clusters = clusterTranscriptMessages(processedConvMsgs);
1153
+ const keptClusters = [];
1154
+ const tailCluster = clusters.length > 0 ? clusters[clusters.length - 1] : [];
1155
+ if (tailCluster.length > 0) {
1156
+ budget -= tailCluster.reduce((sum, msg) => sum + msgCost(msg), 0);
1157
+ keptClusters.unshift(tailCluster);
1158
+ }
1159
+ for (let i = clusters.length - 2; i >= 0 && budget > 0; i--) {
1160
+ const cluster = clusters[i];
1161
+ const clusterCost = cluster.reduce((sum, msg) => sum + msgCost(msg), 0);
1162
+ if (budget - clusterCost >= 0) {
1163
+ keptClusters.unshift(cluster);
1164
+ budget -= clusterCost;
1165
+ }
1166
+ }
1167
+ const kept = keptClusters.flat();
1168
+ const keptCount = processedConvMsgs.length - kept.length;
1169
+ if (keptCount > 0) {
1170
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}%${isJsonlReplay ? ' [jsonl-replay]' : ''} → ` +
1171
+ `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs, messages=${keptCount} dropped)`);
1172
+ trimmedMessages = [...systemMsgs, ...kept];
1173
+ }
1174
+ else if (trimmed > 0) {
1175
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1176
+ `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1177
+ }
1178
+ }
1179
+ else if (trimmed > 0) {
1180
+ console.log(`[hypermem-plugin] tool-loop trim: pressure=${(pressure * 100).toFixed(1)}% → ` +
1181
+ `target=${(trimTarget * 100).toFixed(0)}% (redis=${trimmed} msgs)`);
1182
+ }
1183
+ // Apply tool gradient to compress large tool results before returning.
1184
+ // Skip if deferToolPruning is enabled — OpenClaw's contextPruning handles it.
1185
+ if (!_deferToolPruning) {
1186
+ // The full compose path runs applyToolGradientToWindow during reshaping;
1187
+ // the tool-loop path was previously skipping this, leaving a 40k-token
1188
+ // web_search result uncompressed every turn.
1189
+ try {
1190
+ const gradientApplied = applyToolGradientToWindow(trimmedMessages, trimBudget);
1191
+ trimmedMessages = gradientApplied;
1192
+ }
1193
+ catch {
1194
+ // Non-fatal: if gradient fails, continue with untouched trimmedMessages
1195
+ }
1196
+ } // end deferToolPruning gate
1197
+ // Repair orphaned tool pairs in the trimmed message list.
1198
+ // In-memory trim (cluster drop) can strand tool_result messages whose
1199
+ // paired tool_use was in a dropped cluster.
1200
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1201
+ trimmedMessages = repairToolPairs(trimmedMessages);
1202
+ const windowTokens = await estimateWindowTokens(hm, agentId, sk);
1203
+ const overhead = _overheadCache.get(sk) ?? getOverheadFallback();
1204
+ return {
1205
+ messages: trimmedMessages,
1206
+ estimatedTokens: windowTokens + overhead,
1207
+ };
1208
+ }
1209
+ catch {
1210
+ // Non-fatal: return conservative estimate so guard doesn't go blind
1211
+ return {
1212
+ messages: messages,
1213
+ estimatedTokens: Math.floor(effectiveBudget * 0.8),
1214
+ };
1215
+ }
1216
+ }
1217
+ try {
1218
+ const hm = await getHyperMem();
1219
+ const sk = resolveSessionKey(sessionId, sessionKey);
1220
+ const agentId = extractAgentId(sk);
1221
+ // Resolve agent tier from fleet store (for doc chunk tier filtering)
1222
+ let tier;
1223
+ try {
1224
+ const agent = _fleetStore?.getAgent(agentId);
1225
+ tier = agent?.tier;
1226
+ }
1227
+ catch {
1228
+ // Non-fatal — tier filtering just won't apply
1229
+ }
1230
+ // historyDepth: derive a safe message count from the token budget.
1231
+ // Uses 50% of the budget for history (down from 60% — more budget goes to
1232
+ // L3/L4 context slots now). Floor at 50, ceiling at 200.
1233
+ // This is a preventive guard — the compositor's safety valve still trims
1234
+ // by token count post-assembly, but limiting depth up front avoids
1235
+ // feeding the compactor a window it can't reduce.
1236
+ const effectiveBudget = computeEffectiveBudget(tokenBudget);
1237
+ const historyDepth = Math.min(250, Math.max(50, Math.floor((effectiveBudget * 0.65) / 500)));
1238
+ // ── Redis guardrail: trim history to token budget ────────────────────
1239
+ // Prevents model-switch bloat: if an agent previously ran on a larger
1240
+ // context window, Redis history may exceed the current model's budget.
1241
+ // Trimming here (before compose) ensures the compositor never sees a
1242
+ // history window it can't fit. Uses 80% of budget as the trim ceiling
1243
+ // to leave room for system prompt, facts, and identity slots.
1244
+ try {
1245
+ const trimBudget = Math.floor(effectiveBudget * 0.65);
1246
+ const trimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimBudget);
1247
+ if (trimmed > 0) {
1248
+ // Invalidate window cache since history changed
1249
+ await hm.cache.invalidateWindow(agentId, sk);
1250
+ }
1251
+ }
1252
+ catch (trimErr) {
1253
+ // Non-fatal — compositor's budget-fit walk is the second line of defense
1254
+ console.warn('[hypermem-plugin] assemble: Redis trim failed (non-fatal):', trimErr.message);
1255
+ }
1256
+ // ── Budget downshift: proactive reshape pass ───────────────────────────────────────
1257
+ // If this session previously composed at a higher token budget (e.g. gpt-5.4
1258
+ // → claude-sonnet model switch), the Redis window is still sized for the old
1259
+ // budget. trimHistoryToTokenBudget above trims by count but skips tool
1260
+ // gradient logic. A downshift >10% triggers a full reshape: apply tool
1261
+ // gradient at the new budget + trim, then write back before compose runs.
1262
+ // This prevents several turns of compaction churn after a model switch.
1263
+ //
1264
+ // Bug fix: previously read from getWindow() which is always null here
1265
+ // (afterTurn invalidates it every turn). Also fixed: was doing setWindow()
1266
+ // then invalidateWindow() which is a write-then-delete no-op. Now reads
1267
+ // from history list and writes back via replaceHistory().
1268
+ try {
1269
+ const lastState = await hm.cache.getModelState(agentId, sk);
1270
+ const DOWNSHIFT_THRESHOLD = 0.10;
1271
+ const isDownshift = lastState &&
1272
+ (lastState.tokenBudget - effectiveBudget) / lastState.tokenBudget > DOWNSHIFT_THRESHOLD;
1273
+ if (isDownshift && !_deferToolPruning) {
1274
+ // Read from history list — window cache is always null here because
1275
+ // afterTurn() calls invalidateWindow() on every turn.
1276
+ const currentHistory = await hm.cache.getHistory(agentId, sk);
1277
+ if (currentHistory && currentHistory.length > 0) {
1278
+ const reshaped = applyToolGradientToWindow(currentHistory, effectiveBudget);
1279
+ if (reshaped.length < currentHistory.length) {
1280
+ const reshapedAt = new Date().toISOString();
1281
+ if (canPersistReshapedHistory(currentHistory)) {
1282
+ // No structured tool turns in canonical history, safe to persist
1283
+ // the reshaped window back to cache/history.
1284
+ await hm.cache.replaceHistory(agentId, sk, reshaped);
1285
+ await hm.cache.invalidateWindow(agentId, sk);
1286
+ console.log(`[hypermem-plugin] budget-downshift: ${agentId}/${sk} ` +
1287
+ `${lastState.tokenBudget}→${effectiveBudget} tokens, ` +
1288
+ `reshaped ${currentHistory.length}→${reshaped.length} messages`);
1289
+ }
1290
+ else {
1291
+ // Tool-bearing history must remain canonical. Use the reshaped
1292
+ // window only as a compose-time view and leave hot history lossless.
1293
+ console.log(`[hypermem-plugin] budget-downshift: ${agentId}/${sk} ` +
1294
+ `${lastState.tokenBudget}→${effectiveBudget} tokens, ` +
1295
+ `view-only reshape ${currentHistory.length}→${reshaped.length} messages (structured tool history preserved)`);
1296
+ }
1297
+ await hm.cache.setModelState(agentId, sk, {
1298
+ model: model ?? 'unknown',
1299
+ tokenBudget: effectiveBudget,
1300
+ composedAt: new Date().toISOString(),
1301
+ historyDepth,
1302
+ reshapedAt,
1303
+ });
1304
+ }
1305
+ }
1306
+ }
1307
+ }
1308
+ catch (reshapeErr) {
1309
+ // Non-fatal — compositor safety valve is still the last defense
1310
+ console.warn('[hypermem-plugin] assemble: reshape pass failed (non-fatal):', reshapeErr.message);
1311
+ }
1312
+ // ── Cache replay fast path ─────────────────────────────────────────────
1313
+ // If the session was active recently, return the cached contextBlock
1314
+ // (systemPromptAddition) to produce a byte-identical system prompt and
1315
+ // hit the provider prefix cache (Anthropic / OpenAI).
1316
+ // The message window is always rebuilt fresh — only the compositor output
1317
+ // (contextBlock) is cached, since that's what determines prefix identity.
1318
+ const cacheReplayThresholdMs = _cacheReplayThresholdMs;
1319
+ let cachedContextBlock = null;
1320
+ if (cacheReplayThresholdMs > 0) {
1321
+ try {
1322
+ const cachedAt = await hm.cache.getSlot(agentId, sk, 'assemblyContextAt');
1323
+ if (cachedAt && Date.now() - parseInt(cachedAt) < cacheReplayThresholdMs) {
1324
+ cachedContextBlock = await hm.cache.getSlot(agentId, sk, 'assemblyContextBlock');
1325
+ if (cachedContextBlock) {
1326
+ console.log(`[hypermem-plugin] assemble: cache replay hit for ${agentId} (${Math.round((Date.now() - parseInt(cachedAt)) / 1000)}s old)`);
1327
+ }
1328
+ }
1329
+ }
1330
+ catch {
1331
+ // Non-fatal — fall through to full assembly
1332
+ }
1333
+ }
1334
+ const request = {
1335
+ agentId,
1336
+ sessionKey: sk,
1337
+ tokenBudget: effectiveBudget,
1338
+ historyDepth,
1339
+ tier,
1340
+ model, // pass model for provider detection
1341
+ includeDocChunks: !cachedContextBlock, // skip doc retrieval on cache hit
1342
+ prompt,
1343
+ skipProviderTranslation: true, // runtime handles provider translation
1344
+ };
1345
+ const result = await hm.compose(request);
1346
+ // Use cached contextBlock if available (cache replay), otherwise use fresh result.
1347
+ // After a full compose, write the new contextBlock to cache for the next turn.
1348
+ if (cachedContextBlock) {
1349
+ result.contextBlock = cachedContextBlock;
1350
+ }
1351
+ else if (result.contextBlock && cacheReplayThresholdMs > 0) {
1352
+ // Write cache async — never block the assemble() return on this
1353
+ const blockToCache = result.contextBlock;
1354
+ const nowStr = Date.now().toString();
1355
+ const ttlSec = Math.ceil((cacheReplayThresholdMs * 2) / 1000);
1356
+ Promise.all([
1357
+ hm.cache.setSlot(agentId, sk, 'assemblyContextBlock', blockToCache),
1358
+ hm.cache.setSlot(agentId, sk, 'assemblyContextAt', nowStr),
1359
+ ]).then(() => {
1360
+ // Extend TTL on the cached keys to 2× the threshold
1361
+ // setSlot uses the sessionTTL from RedisLayer config — acceptable fallback
1362
+ }).catch(() => { });
1363
+ }
1364
+ // Convert NeutralMessage[] → AgentMessage[] for the OpenClaw runtime.
1365
+ // neutralToAgentMessage can return a single message or an array (tool results
1366
+ // expand to individual ToolResultMessage objects), so we flatMap.
1367
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1368
+ let outputMessages = result.messages
1369
+ .filter(m => m.role != null)
1370
+ .flatMap(m => neutralToAgentMessage(m));
1371
+ const neutralPairStats = collectNeutralToolPairStats(result.messages);
1372
+ const agentPairStats = collectAgentToolPairStats(outputMessages);
1373
+ const toolPairAnomaly = neutralPairStats.missingToolResultCount > 0 ||
1374
+ neutralPairStats.orphanToolResultCount > 0 ||
1375
+ agentPairStats.missingToolResultCount > 0 ||
1376
+ agentPairStats.orphanToolResultCount > 0 ||
1377
+ agentPairStats.syntheticNoResultCount > 0
1378
+ ? {
1379
+ stage: 'assemble',
1380
+ neutralMissingToolResultIds: neutralPairStats.missingToolResultIds.slice(0, 10),
1381
+ neutralOrphanToolResultIds: neutralPairStats.orphanToolResultIds.slice(0, 10),
1382
+ agentMissingToolResultIds: agentPairStats.missingToolResultIds.slice(0, 10),
1383
+ agentOrphanToolResultIds: agentPairStats.orphanToolResultIds.slice(0, 10),
1384
+ syntheticNoResultCount: agentPairStats.syntheticNoResultCount,
1385
+ }
1386
+ : undefined;
1387
+ await bumpToolPairMetrics(hm, agentId, sk, {
1388
+ composeCount: 1,
1389
+ preBridgeMissingToolResults: neutralPairStats.missingToolResultCount,
1390
+ preBridgeOrphanToolResults: neutralPairStats.orphanToolResultCount,
1391
+ postBridgeMissingToolResults: agentPairStats.missingToolResultCount,
1392
+ postBridgeOrphanToolResults: agentPairStats.orphanToolResultCount,
1393
+ }, toolPairAnomaly);
1394
+ if (toolPairAnomaly) {
1395
+ console.warn(`[hypermem-plugin] tool-pair-integrity: ${agentId}/${sk} ` +
1396
+ `neutralMissing=${neutralPairStats.missingToolResultCount} neutralOrphan=${neutralPairStats.orphanToolResultCount} ` +
1397
+ `agentMissing=${agentPairStats.missingToolResultCount} agentOrphan=${agentPairStats.orphanToolResultCount} ` +
1398
+ `synthetic=${agentPairStats.syntheticNoResultCount}`);
1399
+ }
1400
+ // Repair orphaned tool pairs before returning to provider.
1401
+ // compaction/trim passes can remove tool_use blocks without removing their
1402
+ // paired tool_result messages — Anthropic and Gemini reject these with 400.
1403
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1404
+ outputMessages = repairToolPairs(outputMessages);
1405
+ // Cache overhead for tool-loop turns: contextBlock tokens (chars/4) +
1406
+ // tier-aware estimate for runtime system prompt (SOUL.md, identity,
1407
+ // workspace files — not visible from inside the plugin).
1408
+ const contextBlockTokens = Math.ceil((result.contextBlock?.length ?? 0) / 4);
1409
+ const runtimeSystemTokens = getOverheadFallback(tier);
1410
+ _overheadCache.set(sk, contextBlockTokens + runtimeSystemTokens);
1411
+ // Update model state for downshift detection on next turn
1412
+ try {
1413
+ await hm.cache.setModelState(agentId, sk, {
1414
+ model: model ?? 'unknown',
1415
+ tokenBudget: effectiveBudget,
1416
+ composedAt: new Date().toISOString(),
1417
+ historyDepth,
1418
+ });
1419
+ }
1420
+ catch {
1421
+ // Non-fatal
1422
+ }
1423
+ return {
1424
+ messages: outputMessages,
1425
+ estimatedTokens: result.tokenCount ?? 0,
1426
+ // systemPromptAddition injects hypermem context before the runtime system prompt.
1427
+ // This is the facts/recall/episodes block assembled by the compositor.
1428
+ systemPromptAddition: result.contextBlock || undefined,
1429
+ };
1430
+ }
1431
+ catch (err) {
1432
+ console.error('[hypermem-plugin] assemble error (stack):', err.stack ?? err);
1433
+ throw err; // Re-throw so the runtime falls back to legacy pipeline
1434
+ }
1435
+ },
1436
+ /**
1437
+ * Compact context. hypermem owns compaction.
1438
+ *
1439
+ * Strategy: assemble() already trims the composed message list to the token
1440
+ * budget via the compositor safety valve, so the model never receives an
1441
+ * oversized context. compact() is called by the runtime when it detects
1442
+ * overflow — at that point we:
1443
+ * 1. Estimate tokens in the current Redis history window
1444
+ * 2. If already under budget (compositor already handled it), report clean
1445
+ * 3. If over budget (e.g. window was built before budget cap was applied),
1446
+ * trim the Redis window to a safe depth and invalidate the compose cache
1447
+ *
1448
+ * This prevents the runtime from running its own LLM-summarization compaction
1449
+ * pass, which would destroy message history we're explicitly managing.
1450
+ */
1451
+ async compact({ sessionId, sessionKey, sessionFile, tokenBudget, currentTokenCount }) {
1452
+ try {
1453
+ const hm = await getHyperMem();
1454
+ const sk = resolveSessionKey(sessionId, sessionKey);
1455
+ const agentId = extractAgentId(sk);
1456
+ // Skip if a reshape pass just ran (within last 30s) — avoid double-processing
1457
+ // Cache modelState here for reuse in density-aware JSONL truncation below.
1458
+ let cachedModelState = null;
1459
+ try {
1460
+ cachedModelState = await hm.cache.getModelState(agentId, sk);
1461
+ if (cachedModelState?.reshapedAt) {
1462
+ const reshapeAge = Date.now() - new Date(cachedModelState.reshapedAt).getTime();
1463
+ // Only skip if session is NOT critically full — nuclear path must bypass this guard.
1464
+ // If currentTokenCount > 85% budget, fall through to nuclear compaction below.
1465
+ const isCriticallyFull = currentTokenCount != null &&
1466
+ currentTokenCount > (computeEffectiveBudget(tokenBudget) * 0.85);
1467
+ if (reshapeAge < 30_000 && !isCriticallyFull) {
1468
+ console.log(`[hypermem-plugin] compact: skipping — reshape pass ran ${reshapeAge}ms ago`);
1469
+ return { ok: true, compacted: false, reason: 'reshape-recently-ran' };
1470
+ }
1471
+ }
1472
+ }
1473
+ catch {
1474
+ // Non-fatal — proceed with compaction
1475
+ }
1476
+ // Re-estimate from the actual Redis window.
1477
+ // The runtime's estimate (currentTokenCount) includes the full inbound message
1478
+ // and system prompt — our estimate only covers the history window. When they
1479
+ // diverge significantly upward, the difference is "inbound overhead" consuming
1480
+ // budget the history is competing for. We trim history to make room.
1481
+ const effectiveBudget = computeEffectiveBudget(tokenBudget);
1482
+ const tokensBefore = await estimateWindowTokens(hm, agentId, sk);
1483
+ // Target depth for both Redis trimming and JSONL truncation.
1484
+ // Target 50% of budget capacity, assume ~500 tokens/message average.
1485
+ const targetDepth = Math.max(20, Math.floor((effectiveBudget * 0.5) / 500));
1486
+ // ── NUCLEAR COMPACTION ────────────────────────────────────────────────
1487
+ // When the runtime reports the session is ≥85% full, trust that signal
1488
+ // over our Redis estimate. The JSONL accumulates full tool results that
1489
+ // the gradient never sees, so Redis can look fine while the transcript
1490
+ // is genuinely saturated. Normal compact() returns compacted=false in
1491
+ // this scenario ("within_budget"), which gives the runtime zero relief.
1492
+ //
1493
+ // Also triggered when reshape ran recently but the session is still
1494
+ // critically full — bypass the reshape guard in that case.
1495
+ const NUCLEAR_THRESHOLD = 0.85;
1496
+ const isNuclear = currentTokenCount != null && currentTokenCount > effectiveBudget * NUCLEAR_THRESHOLD;
1497
+ if (isNuclear) {
1498
+ // Cut deep: target 20% of normal depth = ~25 messages for a 128k session.
1499
+ // Keeps very recent context, clears the long tool-heavy tail.
1500
+ const nuclearDepth = Math.max(10, Math.floor(targetDepth * 0.20));
1501
+ const nuclearBudget = Math.floor(effectiveBudget * 0.25);
1502
+ await hm.cache.trimHistoryToTokenBudget(agentId, sk, nuclearBudget);
1503
+ await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
1504
+ await truncateJsonlIfNeeded(sessionFile, nuclearDepth, true);
1505
+ const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
1506
+ console.log(`[hypermem-plugin] compact: NUCLEAR — session at ${currentTokenCount}/${effectiveBudget} tokens ` +
1507
+ `(${Math.round((currentTokenCount / effectiveBudget) * 100)}% full), ` +
1508
+ `deep-trimmed JSONL to ${nuclearDepth} messages, Redis ${tokensBefore}→${tokensAfter} tokens`);
1509
+ return { ok: true, compacted: true, result: { tokensBefore, tokensAfter } };
1510
+ }
1511
+ // ── END NUCLEAR ───────────────────────────────────────────────────────
1512
+ // Detect large-inbound-content scenario: runtime total significantly exceeds
1513
+ // our history estimate. The gap is the inbound message + system prompt overhead.
1514
+ // Trim history to leave room for it even if history alone is within budget.
1515
+ if (currentTokenCount != null && currentTokenCount > tokensBefore) {
1516
+ const inboundOverhead = currentTokenCount - tokensBefore;
1517
+ if (inboundOverhead > effectiveBudget * 0.15) {
1518
+ // Large inbound content (document review, big tool result, etc.)
1519
+ // Trim history so history + inbound fits within 85% of budget.
1520
+ const budgetForHistory = Math.floor(effectiveBudget * 0.85) - inboundOverhead;
1521
+ if (budgetForHistory < tokensBefore && budgetForHistory > 0) {
1522
+ const historyTrimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, budgetForHistory);
1523
+ await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
1524
+ const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
1525
+ await truncateJsonlIfNeeded(sessionFile, targetDepth);
1526
+ console.log(`[hypermem-plugin] compact: large-inbound-content (gap=${inboundOverhead} tokens), ` +
1527
+ `trimmed history ${tokensBefore}→${tokensAfter} (budget-for-history=${budgetForHistory}, trimmed=${historyTrimmed} messages)`);
1528
+ return { ok: true, compacted: true, result: { tokensBefore, tokensAfter } };
1529
+ }
1530
+ }
1531
+ }
1532
+ // Under 70% of budget by our own Redis estimate.
1533
+ // We still need to check the JSONL — the runtime's overflow is based on JSONL
1534
+ // message count, not Redis. If the JSONL is bloated (> targetDepth * 1.5 messages)
1535
+ // we truncate it even if Redis looks fine, then return compacted=true so the
1536
+ // runtime retries with the trimmed file instead of killing the session.
1537
+ if (tokensBefore <= effectiveBudget * 0.7) {
1538
+ const jsonlTruncated = await truncateJsonlIfNeeded(sessionFile, targetDepth);
1539
+ if (jsonlTruncated) {
1540
+ console.log(`[hypermem-plugin] compact: Redis within_budget but JSONL was bloated — truncated to ${targetDepth} messages`);
1541
+ return {
1542
+ ok: true,
1543
+ compacted: true,
1544
+ result: { tokensBefore, tokensAfter: tokensBefore },
1545
+ };
1546
+ }
1547
+ return {
1548
+ ok: true,
1549
+ compacted: false,
1550
+ reason: 'within_budget',
1551
+ result: { tokensBefore, tokensAfter: tokensBefore },
1552
+ };
1553
+ }
1554
+ // Over budget: trim both the window cache AND the history list.
1555
+ // Bug fix: if no window cache exists (fresh/never-compacted session),
1556
+ // compact() was only trying to trim the window (which was null) and
1557
+ // the history list was left untouched → 0 actual trimming → timeout
1558
+ // compaction death spiral.
1559
+ const window = await hm.cache.getWindow(agentId, sk);
1560
+ if (window && window.length > targetDepth) {
1561
+ const trimmed = window.slice(-targetDepth);
1562
+ await hm.cache.setWindow(agentId, sk, trimmed);
1563
+ }
1564
+ // Always trim the underlying history list — this is the source of truth
1565
+ // when no window cache exists. trimHistoryToTokenBudget walks newest→oldest
1566
+ // and LTRIMs everything beyond the budget.
1567
+ const trimBudget = Math.floor(effectiveBudget * 0.5);
1568
+ const historyTrimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, trimBudget);
1569
+ if (historyTrimmed > 0) {
1570
+ console.log(`[hypermem-plugin] compact: trimmed ${historyTrimmed} messages from history list`);
1571
+ }
1572
+ // Invalidate the compose cache so next assemble() re-builds from trimmed data
1573
+ await hm.cache.invalidateWindow(agentId, sk).catch(() => { });
1574
+ const tokensAfter = await estimateWindowTokens(hm, agentId, sk);
1575
+ console.log(`[hypermem-plugin] compact: trimmed ${tokensBefore} → ${tokensAfter} tokens (budget: ${effectiveBudget})`);
1576
+ // Density-aware JSONL truncation: derive target depth from actual avg tokens/message
1577
+ // rather than assuming a fixed 500 tokens/message. This prevents a large-message
1578
+ // session (e.g. 145 msgs × 882 tok = 128k) from bypassing the 1.5x guard and
1579
+ // leaving the JSONL untouched while Redis is correctly trimmed.
1580
+ // force=true bypasses the 1.5x early-exit — over-budget always rewrites.
1581
+ const histDepth = cachedModelState?.historyDepth ?? targetDepth;
1582
+ const avgTokPerMsg = histDepth > 0 && tokensBefore > 0 ? tokensBefore / histDepth : 500;
1583
+ const densityTargetDepth = Math.max(10, Math.floor(trimBudget / avgTokPerMsg));
1584
+ await truncateJsonlIfNeeded(sessionFile, densityTargetDepth, true);
1585
+ console.log(`[hypermem-plugin] compact: JSONL density-trim targetDepth=${densityTargetDepth} (histDepth=${histDepth}, avg=${Math.round(avgTokPerMsg)} tok/msg)`);
1586
+ return {
1587
+ ok: true,
1588
+ compacted: true,
1589
+ result: { tokensBefore, tokensAfter },
1590
+ };
1591
+ }
1592
+ catch (err) {
1593
+ console.warn('[hypermem-plugin] compact failed:', err.message);
1594
+ // Non-fatal: return ok so the runtime doesn't retry with its own compaction
1595
+ return { ok: true, compacted: false, reason: err.message };
1596
+ }
1597
+ },
1598
+ /**
1599
+ * After-turn hook: ingest new messages then trigger background indexer.
1600
+ *
1601
+ * IMPORTANT: When afterTurn is defined, the runtime calls ONLY afterTurn —
1602
+ * it never calls ingest() or ingestBatch(). So we must ingest the new
1603
+ * messages here, using messages.slice(prePromptMessageCount).
1604
+ */
1605
+ async afterTurn({ sessionId, sessionKey, messages, prePromptMessageCount, isHeartbeat }) {
1606
+ if (isHeartbeat)
1607
+ return;
1608
+ try {
1609
+ const hm = await getHyperMem();
1610
+ const sk = resolveSessionKey(sessionId, sessionKey);
1611
+ const agentId = extractAgentId(sk);
1612
+ // Ingest only the new messages produced this turn
1613
+ const newMessages = messages.slice(prePromptMessageCount);
1614
+ for (const msg of newMessages) {
1615
+ const m = msg;
1616
+ // Skip system messages — they come from the runtime, not the conversation
1617
+ if (m.role === 'system')
1618
+ continue;
1619
+ if (m.role === 'toolResult' && extractTextFromInboundContent(m.content).trim() === SYNTHETIC_MISSING_TOOL_RESULT_TEXT) {
1620
+ const toolCallId = typeof m.toolCallId === 'string' ? m.toolCallId : 'unknown';
1621
+ const toolName = typeof m.toolName === 'string' ? m.toolName : 'unknown';
1622
+ await bumpToolPairMetrics(hm, agentId, sk, { syntheticNoResultIngested: 1 }, {
1623
+ stage: 'afterTurn',
1624
+ toolCallId,
1625
+ toolName,
1626
+ });
1627
+ console.warn(`[hypermem-plugin] tool-pair-integrity: observed synthetic missing tool result for ${agentId}/${sk} ` +
1628
+ `tool=${toolName} callId=${toolCallId}`);
1629
+ }
1630
+ const neutral = toNeutralMessage(m);
1631
+ if (neutral.role === 'user' && !neutral.toolResults?.length) {
1632
+ // Record plain user messages here and strip transport envelope metadata
1633
+ // before storage so prompt wrappers like:
1634
+ // Sender (untrusted metadata): { ... }
1635
+ // never enter messages.db / Redis history / downstream facts.
1636
+ //
1637
+ // recordUserMessage() also strips defensively at core level, but we do
1638
+ // it here too so the intended behavior is explicit at the plugin boundary.
1639
+ //
1640
+ // IMPORTANT: tool results arrive as role='user' carriers (toNeutralMessage
1641
+ // sets role='user' + toolResults=[...] + textContent=null). These MUST go
1642
+ // through recordAssistantMessage to persist the toolResults array.
1643
+ // recordUserMessage takes a plain string and would silently discard them.
1644
+ await hm.recordUserMessage(agentId, sk, stripMessageMetadata(neutral.textContent ?? ''));
1645
+ }
1646
+ else {
1647
+ await hm.recordAssistantMessage(agentId, sk, neutral);
1648
+ }
1649
+ }
1650
+ // P3.1: Topic detection on the inbound user message
1651
+ // Non-fatal: topic detection never blocks afterTurn
1652
+ try {
1653
+ const inboundUserMsg = newMessages
1654
+ .map(m => m)
1655
+ .find(m => m.role === 'user');
1656
+ if (inboundUserMsg) {
1657
+ const neutralUser = toNeutralMessage(inboundUserMsg);
1658
+ // Gather recent messages for context (all messages before the new ones)
1659
+ const contextMessages = messages.slice(0, prePromptMessageCount)
1660
+ .filter(m => m.role !== 'system')
1661
+ .slice(-10)
1662
+ .map(m => toNeutralMessage(m));
1663
+ const db = hm.dbManager.getMessageDb(agentId);
1664
+ if (db) {
1665
+ const topicMap = new SessionTopicMap(db);
1666
+ const activeTopic = topicMap.getActiveTopic(sk);
1667
+ const signal = detectTopicShift(neutralUser, contextMessages, activeTopic?.id ?? null);
1668
+ if (signal.isNewTopic && signal.topicName) {
1669
+ const newTopicId = topicMap.createTopic(sk, signal.topicName);
1670
+ // New topic starts with count 1 (the message that triggered the shift)
1671
+ topicMap.incrementMessageCount(newTopicId);
1672
+ // Write topic_id onto the stored user message (best-effort)
1673
+ try {
1674
+ const stored = db.prepare(`
1675
+ SELECT m.id FROM messages m
1676
+ JOIN conversations c ON c.id = m.conversation_id
1677
+ WHERE c.session_key = ? AND m.role = 'user'
1678
+ ORDER BY m.message_index DESC LIMIT 1
1679
+ `).get(sk);
1680
+ if (stored) {
1681
+ db.prepare('UPDATE messages SET topic_id = ? WHERE id = ?')
1682
+ .run(newTopicId, stored.id);
1683
+ }
1684
+ }
1685
+ catch {
1686
+ // Best-effort
1687
+ }
1688
+ }
1689
+ else if (activeTopic) {
1690
+ topicMap.activateTopic(sk, activeTopic.id);
1691
+ topicMap.incrementMessageCount(activeTopic.id);
1692
+ }
1693
+ }
1694
+ }
1695
+ }
1696
+ catch {
1697
+ // Topic detection is entirely non-fatal
1698
+ }
1699
+ // Recompute the Redis hot history from SQLite so turn-age gradient is
1700
+ // materialized after every turn. This prevents warm-compressed history
1701
+ // from drifting back to raw payloads during live sessions.
1702
+ //
1703
+ // Pass the cached model tokenBudget so refreshRedisGradient can cap the
1704
+ // gradient-compressed window to budget before writing to Redis. Without
1705
+ // this, afterTurn writes up to 250 messages regardless of budget, causing
1706
+ // trimHistoryToTokenBudget to fire and trim ~200 messages on every
1707
+ // subsequent assemble() — the churn loop seen in Helm's logs.
1708
+ if (hm.cache.isConnected) {
1709
+ try {
1710
+ const modelState = await hm.cache.getModelState(agentId, sk);
1711
+ const gradientBudget = modelState?.tokenBudget;
1712
+ await hm.refreshRedisGradient(agentId, sk, gradientBudget);
1713
+ }
1714
+ catch (refreshErr) {
1715
+ console.warn('[hypermem-plugin] afterTurn: refreshRedisGradient failed (non-fatal):', refreshErr.message);
1716
+ }
1717
+ }
1718
+ // Invalidate the window cache after ingesting new messages.
1719
+ // The next assemble() call will re-compose with the new data.
1720
+ try {
1721
+ await hm.cache.invalidateWindow(agentId, sk);
1722
+ }
1723
+ catch {
1724
+ // Window invalidation is best-effort
1725
+ }
1726
+ // Pre-emptive secondary trim when session exits a turn hot.
1727
+ // If a session just finished a turn at >80% pressure, the NEXT turn's
1728
+ // incoming tool results (parallel web searches, large exec output, etc.)
1729
+ // will hit a window with no headroom — the ingestion wave failure mode
1730
+ // (reported by Helm, 2026-04-05). Pre-trim here so the tool-loop
1731
+ // assemble() path starts the next turn with meaningful space.
1732
+ //
1733
+ // Uses modelState.tokenBudget if cached; skips if unavailable (non-fatal).
1734
+ try {
1735
+ const modelState = await hm.cache.getModelState(agentId, sk);
1736
+ if (modelState?.tokenBudget) {
1737
+ // Use the same dual-source pressure estimate as the tool-loop trim:
1738
+ // max(runtime messages, Redis) so a post-restart empty-Redis session
1739
+ // still fires correctly.
1740
+ const runtimePostTokens = messages.reduce((sum, m) => {
1741
+ const msg = m;
1742
+ const textCost = estimateTokens(typeof msg.textContent === 'string' ? msg.textContent : null);
1743
+ const toolCallCost = msg.toolCalls ? Math.ceil(JSON.stringify(msg.toolCalls).length / 2) : 0;
1744
+ const toolResultCost = msg.toolResults ? Math.ceil(JSON.stringify(msg.toolResults).length / 2) : 0;
1745
+ const contentCost = Array.isArray(msg.content)
1746
+ ? msg.content.reduce((s, c) => {
1747
+ const part = c;
1748
+ // FIX (Bug 2 — afterTurn estimator): read c.content for native format
1749
+ const textVal = typeof part.text === 'string' ? part.text
1750
+ : typeof part.content === 'string' ? part.content
1751
+ : part.content != null ? JSON.stringify(part.content) : null;
1752
+ return s + estimateTokens(textVal);
1753
+ }, 0)
1754
+ : 0;
1755
+ return sum + textCost + toolCallCost + toolResultCost + contentCost;
1756
+ }, 0);
1757
+ const redisPostTokens = await estimateWindowTokens(hm, agentId, sk);
1758
+ const postTurnTokens = Math.max(runtimePostTokens, redisPostTokens);
1759
+ const postTurnPressure = postTurnTokens / modelState.tokenBudget;
1760
+ // Two-tier afterTurn trim (EC3 fix, 2026-04-05):
1761
+ // >90% → trim to 45%: deep saturation recovery — 70% target leaves only ~8k
1762
+ // after system prompt (20-30k), which is not enough for any real tool work.
1763
+ // >80% → trim to 70%: mild pressure, preserve more history.
1764
+ const afterTurnTrimTarget = postTurnPressure > 0.90 ? 0.45 : 0.70;
1765
+ if (postTurnPressure > 0.80) {
1766
+ const headroomBudget = Math.floor(modelState.tokenBudget * afterTurnTrimTarget);
1767
+ const secondaryTrimmed = await hm.cache.trimHistoryToTokenBudget(agentId, sk, headroomBudget);
1768
+ if (secondaryTrimmed > 0) {
1769
+ console.log(`[hypermem-plugin] afterTurn: pre-emptive trim — session exiting at ` +
1770
+ `${(postTurnPressure * 100).toFixed(1)}%, trimmed ${secondaryTrimmed} msgs to create headroom`);
1771
+ }
1772
+ }
1773
+ }
1774
+ }
1775
+ catch {
1776
+ // Non-fatal — next turn's tool-loop trim is the fallback
1777
+ }
1778
+ // Pre-compute embedding for the assistant's reply so the next compose()
1779
+ // can skip the Ollama round-trip entirely (fire-and-forget).
1780
+ //
1781
+ // Why the assistant reply, not the current user message:
1782
+ // The assistant's reply is the strongest semantic predictor of what the
1783
+ // user will ask next — it's the context they're responding to. By the time
1784
+ // the next user message arrives and compose() fires, this embedding is
1785
+ // already warm in Redis. Cache hit rate: near 100% on normal conversation
1786
+ // flow (one reply per turn).
1787
+ //
1788
+ // The previous approach (embedding the current user message) still missed
1789
+ // on every turn because compose() queries against the INCOMING user message,
1790
+ // not the one that was just processed.
1791
+ //
1792
+ // newMessages = messages.slice(prePromptMessageCount) — the assistant reply
1793
+ // is always in here. Walk backwards to find the last assistant text turn.
1794
+ try {
1795
+ let assistantReplyText = null;
1796
+ for (let i = newMessages.length - 1; i >= 0; i--) {
1797
+ const m = newMessages[i];
1798
+ if (m.role === 'assistant') {
1799
+ const neutral = toNeutralMessage(m);
1800
+ if (neutral.textContent) {
1801
+ assistantReplyText = neutral.textContent;
1802
+ break;
1803
+ }
1804
+ }
1805
+ }
1806
+ if (assistantReplyText && _generateEmbeddings) {
1807
+ // Fire-and-forget: don't await, don't block afterTurn
1808
+ _generateEmbeddings([assistantReplyText]).then(async ([embedding]) => {
1809
+ if (embedding) {
1810
+ await hm.cache.setQueryEmbedding(agentId, sk, embedding);
1811
+ }
1812
+ }).catch(() => {
1813
+ // Non-fatal: embedding pre-compute failed, compose() will call Ollama
1814
+ });
1815
+ }
1816
+ }
1817
+ catch {
1818
+ // Pre-embed is entirely non-fatal
1819
+ }
1820
+ // P1.7: Direct per-agent tick after each turn — no need to wait for 5-min interval.
1821
+ if (_indexer) {
1822
+ const _agentIdForTick = agentId;
1823
+ const runTick = async () => {
1824
+ if (_taskFlowRuntime) {
1825
+ // Preflight: only create a managed flow if we can actually tick.
1826
+ // Creating a flow we never finish/fail leaves orphaned queued rows.
1827
+ let flow = null;
1828
+ try {
1829
+ // Use createManaged + finish/fail only — do NOT call runTask().
1830
+ // runTask() writes a task_run row to runs.sqlite with status='running'
1831
+ // and the TaskFlow runtime has no completeTask() method, so those rows
1832
+ // would accumulate forever and block clean restarts.
1833
+ flow = _taskFlowRuntime.createManaged({
1834
+ controllerId: 'hypermem/indexer',
1835
+ goal: `Index messages for ${_agentIdForTick}`,
1836
+ });
1837
+ await _indexer.tick();
1838
+ // expectedRevision is required: finishFlow uses optimistic locking.
1839
+ // A freshly created managed flow always starts at revision 0.
1840
+ // MUST be awaited — finish/fail return Promises. Calling without
1841
+ // await lets the Promise get GC'd before the DB write completes,
1842
+ // leaving the flow permanently in queued state.
1843
+ const finishResult = await Promise.resolve(_taskFlowRuntime.finish({ flowId: flow.flowId, expectedRevision: flow.revision }));
1844
+ if (finishResult && !finishResult.applied) {
1845
+ console.warn('[hypermem-plugin] TaskFlow finish failed:', finishResult.code ?? finishResult.reason, 'flowId:', flow.flowId, 'revision:', flow.revision);
1846
+ }
1847
+ }
1848
+ catch (tickErr) {
1849
+ // Best-effort fail — non-fatal, but always mark the flow so it doesn't leak
1850
+ if (flow) {
1851
+ try {
1852
+ await Promise.resolve(_taskFlowRuntime.fail({ flowId: flow.flowId, expectedRevision: flow.revision }));
1853
+ }
1854
+ catch { /* ignore */ }
1855
+ }
1856
+ throw tickErr;
1857
+ }
1858
+ }
1859
+ else {
1860
+ await _indexer.tick();
1861
+ }
1862
+ };
1863
+ runTick().catch(() => {
1864
+ // Non-fatal: indexer tick failure never blocks afterTurn
1865
+ });
1866
+ }
1867
+ }
1868
+ catch (err) {
1869
+ // afterTurn is never fatal
1870
+ console.warn('[hypermem-plugin] afterTurn failed:', err.message);
1871
+ }
1872
+ },
1873
+ /**
1874
+ * Dispose: intentionally a no-op.
1875
+ *
1876
+ * The runtime calls dispose() at the end of every request cycle, but
1877
+ * hypermem's Redis connection and SQLite handles are gateway-lifetime
1878
+ * singletons — not request-scoped. Closing and nulling _hm here causes
1879
+ * a full reconnect + re-init on every turn (~400-800ms latency per turn).
1880
+ *
1881
+ * ioredis manages its own reconnection on connection loss. If the gateway
1882
+ * process exits, Node.js cleans up file handles automatically.
1883
+ *
1884
+ * If a true shutdown is needed (e.g. gateway restart signal), call
1885
+ * _hm.close() directly from a gateway:shutdown hook instead.
1886
+ */
1887
+ async dispose() {
1888
+ // Intentional no-op — see comment above.
1889
+ },
1890
+ };
1891
+ }
1892
+ // ─── NeutralMessage → AgentMessage ─────────────────────────────
1893
+ /**
1894
+ * Convert hypermem's NeutralMessage back to OpenClaw's AgentMessage format.
1895
+ *
1896
+ * The runtime expects messages conforming to pi-ai's Message union:
1897
+ * UserMessage: { role: 'user', content: string | ContentBlock[], timestamp }
1898
+ * AssistantMessage: { role: 'assistant', content: ContentBlock[], api, provider, model, usage, stopReason, timestamp }
1899
+ * ToolResultMessage: { role: 'toolResult', toolCallId, toolName, content, isError, timestamp }
1900
+ *
1901
+ * hypermem stores tool results as NeutralMessage with role='user' and toolResults[].
1902
+ * These must be expanded into individual ToolResultMessage objects.
1903
+ *
1904
+ * For assistant messages with tool calls, NeutralToolCall.arguments is a JSON string
1905
+ * but the runtime's ToolCall.arguments is Record<string, any>. We parse it here.
1906
+ *
1907
+ * Missing metadata fields (api, provider, model, usage, stopReason) are filled with
1908
+ * sentinel values. The runtime's convertToLlm strips them before the API call, and
1909
+ * the session transcript already has the real values. These are just structural stubs
1910
+ * so the AgentMessage type is satisfied at runtime.
1911
+ */
1912
+ function neutralToAgentMessage(msg) {
1913
+ const now = Date.now();
1914
+ // Tool results: expand to individual ToolResultMessage objects
1915
+ if (msg.toolResults && msg.toolResults.length > 0) {
1916
+ return msg.toolResults.map(tr => ({
1917
+ role: 'toolResult',
1918
+ toolCallId: tr.callId,
1919
+ toolName: tr.name,
1920
+ content: [{ type: 'text', text: tr.content ?? '' }],
1921
+ isError: tr.isError ?? false,
1922
+ timestamp: now,
1923
+ }));
1924
+ }
1925
+ if (msg.role === 'user') {
1926
+ return {
1927
+ role: 'user',
1928
+ content: msg.textContent ?? '',
1929
+ timestamp: now,
1930
+ };
1931
+ }
1932
+ if (msg.role === 'system') {
1933
+ // System messages are passed through as-is; the runtime handles them separately
1934
+ return {
1935
+ role: 'system',
1936
+ content: msg.textContent ?? '',
1937
+ timestamp: now,
1938
+ // Preserve dynamicBoundary metadata for prompt caching
1939
+ ...msg.metadata?.dynamicBoundary
1940
+ ? { metadata: { dynamicBoundary: true } }
1941
+ : {},
1942
+ };
1943
+ }
1944
+ // Assistant message
1945
+ const content = [];
1946
+ if (msg.textContent) {
1947
+ content.push({ type: 'text', text: msg.textContent });
1948
+ }
1949
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
1950
+ for (const tc of msg.toolCalls) {
1951
+ // Parse arguments from JSON string → object (runtime expects Record<string, any>)
1952
+ let args;
1953
+ try {
1954
+ args = typeof tc.arguments === 'string' ? JSON.parse(tc.arguments) : (tc.arguments ?? {});
1955
+ }
1956
+ catch {
1957
+ args = {};
1958
+ }
1959
+ content.push({
1960
+ type: 'toolCall',
1961
+ id: tc.id,
1962
+ name: tc.name,
1963
+ arguments: args,
1964
+ });
1965
+ }
1966
+ }
1967
+ // Stub metadata fields — the runtime needs these structurally but convertToLlm
1968
+ // strips them before the API call. Real values live in the session transcript.
1969
+ return {
1970
+ role: 'assistant',
1971
+ content: content.length > 0 ? content : [{ type: 'text', text: '' }],
1972
+ api: 'unknown',
1973
+ provider: 'unknown',
1974
+ model: 'unknown',
1975
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
1976
+ stopReason: 'stop',
1977
+ timestamp: now,
1978
+ };
1979
+ }
1980
+ // ─── Cache Bust Utility ────────────────────────────────────────────────────
1981
+ /**
1982
+ * Bust the assembly cache for a specific agent+session.
1983
+ * Call this after writing to identity files (SOUL.md, IDENTITY.md, TOOLS.md,
1984
+ * USER.md) to ensure the next assemble() runs full compositor, not a replay.
1985
+ */
1986
+ export async function bustAssemblyCache(agentId, sessionKey) {
1987
+ try {
1988
+ const hm = await getHyperMem();
1989
+ await Promise.all([
1990
+ hm.cache.setSlot(agentId, sessionKey, 'assemblyContextBlock', ''),
1991
+ hm.cache.setSlot(agentId, sessionKey, 'assemblyContextAt', '0'),
1992
+ ]);
1993
+ }
1994
+ catch {
1995
+ // Non-fatal
1996
+ }
1997
+ }
1998
+ // ─── Plugin Entry ───────────────────────────────────────────────
1999
+ const engine = createHyperMemEngine();
2000
+ export default definePluginEntry({
2001
+ id: 'hypercompositor',
2002
+ name: 'HyperCompositor — context engine',
2003
+ description: 'Four-layer memory architecture for OpenClaw agents: Redis hot cache, message history, vector search, and structured library.',
2004
+ kind: 'context-engine',
2005
+ configSchema: emptyPluginConfigSchema(),
2006
+ register(api) {
2007
+ api.registerContextEngine('hypercompositor', () => engine);
2008
+ // P1.7: Bind TaskFlow runtime for task visibility — best-effort.
2009
+ // Guard: api.runtime.taskFlow may not exist on older OpenClaw versions.
2010
+ try {
2011
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2012
+ const tf = api.runtime?.taskFlow;
2013
+ if (tf && typeof tf.bindSession === 'function') {
2014
+ _taskFlowRuntime = tf.bindSession({
2015
+ sessionKey: 'hypermem-plugin',
2016
+ requesterOrigin: 'hypermem-plugin',
2017
+ });
2018
+ }
2019
+ }
2020
+ catch {
2021
+ // TaskFlow binding is best-effort — plugin remains fully functional without it
2022
+ }
2023
+ },
2024
+ });
2025
+ //# sourceMappingURL=index.js.map