openclaw-memory-hierarchical 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/worker.ts ADDED
@@ -0,0 +1,457 @@
1
+ /**
2
+ * Background worker for hierarchical memory summarization.
3
+ *
4
+ * Runs on a timer, finds eligible chunks, summarizes them,
5
+ * and merges summaries when thresholds are reached.
6
+ */
7
+
8
+ import fs from "node:fs";
9
+ import path from "node:path";
10
+ import { SessionManager } from "@mariozechner/pi-coding-agent";
11
+ import type { PluginConfig } from "./config.js";
12
+ import { resolveHierarchicalMemoryConfig } from "./config.js";
13
+ import { acquireSummaryLock } from "./lock.js";
14
+ import {
15
+ generateNextSummaryId,
16
+ loadSummaryContents,
17
+ loadSummaryIndex,
18
+ saveSummaryIndex,
19
+ writeSummary,
20
+ } from "./storage.js";
21
+ import {
22
+ type ChunkToSummarize,
23
+ estimateMessagesTokens,
24
+ getNextLevel,
25
+ getSourceLevel,
26
+ mergeSummaries,
27
+ summarizeChunk,
28
+ type SummarizationParams,
29
+ } from "./summarize.js";
30
+ import {
31
+ getAllSummariesForContext,
32
+ getUnmergedSummaries,
33
+ type HierarchicalMemoryConfig,
34
+ type SummaryEntry,
35
+ type SummaryIndex,
36
+ } from "./types.js";
37
+
38
+ const DEFAULT_AGENT_ID = "main";
39
+
40
+ export type WorkerResult = {
41
+ success: boolean;
42
+ skipped?: "disabled" | "lock_held" | "no_session";
43
+ chunksProcessed?: number;
44
+ mergesPerformed?: number;
45
+ error?: string;
46
+ durationMs?: number;
47
+ };
48
+
49
+ /**
50
+ * Run the hierarchical memory worker for an agent.
51
+ */
52
+ export async function runHierarchicalMemoryWorker(params: {
53
+ agentId: string;
54
+ pluginConfig: PluginConfig;
55
+ stateDir: string;
56
+ signal?: AbortSignal;
57
+ }): Promise<WorkerResult> {
58
+ const startTime = Date.now();
59
+ const memoryConfig = resolveHierarchicalMemoryConfig(params.pluginConfig);
60
+
61
+ // Acquire lock to prevent concurrent runs
62
+ const lock = await acquireSummaryLock(params.agentId);
63
+ if (!lock) {
64
+ return { success: true, skipped: "lock_held" };
65
+ }
66
+
67
+ try {
68
+ const result = await runWorkerWithLock({
69
+ agentId: params.agentId,
70
+ pluginConfig: params.pluginConfig,
71
+ memoryConfig,
72
+ stateDir: params.stateDir,
73
+ signal: params.signal,
74
+ });
75
+
76
+ return {
77
+ ...result,
78
+ durationMs: Date.now() - startTime,
79
+ };
80
+ } finally {
81
+ await lock.release();
82
+ }
83
+ }
84
+
85
+ async function runWorkerWithLock(params: {
86
+ agentId: string;
87
+ pluginConfig: PluginConfig;
88
+ memoryConfig: HierarchicalMemoryConfig;
89
+ stateDir: string;
90
+ signal?: AbortSignal;
91
+ }): Promise<Omit<WorkerResult, "durationMs">> {
92
+ const { agentId, pluginConfig, memoryConfig, stateDir, signal } = params;
93
+
94
+ try {
95
+ // Load current state
96
+ const index = await loadSummaryIndex(agentId);
97
+
98
+ // Find the current session
99
+ const storePath = resolveSessionStorePath(stateDir, agentId);
100
+ const sessionStore = loadSessionStoreSimple(storePath);
101
+ const mainSessionKey = `agent:${agentId}:main`;
102
+ const sessionEntry = sessionStore[mainSessionKey];
103
+
104
+ if (!sessionEntry?.sessionId) {
105
+ return { success: true, skipped: "no_session" };
106
+ }
107
+
108
+ const sessionFile = resolveSessionTranscriptPath(stateDir, agentId, sessionEntry.sessionId);
109
+
110
+ // Get summarization params
111
+ const summarization = resolveSummarizationParams({
112
+ pluginConfig,
113
+ memoryConfig,
114
+ sessionEntry,
115
+ });
116
+
117
+ if (!summarization) {
118
+ return {
119
+ success: false,
120
+ error: "Failed to resolve summarization parameters (no API key?)",
121
+ };
122
+ }
123
+
124
+ // Phase 1: Find and summarize eligible chunks
125
+ const chunks = await findEligibleChunks({
126
+ sessionFile,
127
+ lastSummarizedEntryId: index.lastSummarizedEntryId,
128
+ memoryConfig,
129
+ sessionId: sessionEntry.sessionId,
130
+ });
131
+
132
+ let chunksProcessed = 0;
133
+ for (const chunk of chunks) {
134
+ if (signal?.aborted) {
135
+ break;
136
+ }
137
+
138
+ // Load prior summaries for context
139
+ const summaryContext = getAllSummariesForContext(index);
140
+ const priorSummaries = [
141
+ ...(await loadSummaryContents(summaryContext.L3, agentId)),
142
+ ...(await loadSummaryContents(summaryContext.L2, agentId)),
143
+ ...(await loadSummaryContents(summaryContext.L1, agentId)),
144
+ ];
145
+
146
+ // Summarize the chunk
147
+ const summaryContent = await summarizeChunk({
148
+ chunk,
149
+ priorSummaries,
150
+ config: memoryConfig,
151
+ summarization,
152
+ });
153
+
154
+ // Create and save the summary entry
155
+ const summaryId = generateNextSummaryId(index, "L1");
156
+ const entry: SummaryEntry = {
157
+ id: summaryId,
158
+ level: "L1",
159
+ createdAt: Date.now(),
160
+ tokenEstimate: Math.ceil(summaryContent.length / 4), // Rough estimate
161
+ sourceLevel: "L0",
162
+ sourceIds: chunk.entryIds,
163
+ sourceSessionId: chunk.sessionId,
164
+ mergedInto: null,
165
+ };
166
+
167
+ await writeSummary(entry, summaryContent, agentId);
168
+ index.levels.L1.push(entry);
169
+ index.lastSummarizedEntryId = chunk.entryIds[chunk.entryIds.length - 1];
170
+ index.lastSummarizedSessionId = chunk.sessionId;
171
+
172
+ chunksProcessed++;
173
+ }
174
+
175
+ // Phase 2: Check for merges at each level
176
+ let mergesPerformed = 0;
177
+
178
+ for (const level of ["L1", "L2"] as const) {
179
+ if (signal?.aborted) {
180
+ break;
181
+ }
182
+
183
+ const merged = await maybeMergeLevel({
184
+ index,
185
+ level,
186
+ memoryConfig,
187
+ summarization,
188
+ agentId,
189
+ });
190
+
191
+ if (merged) {
192
+ mergesPerformed++;
193
+ }
194
+ }
195
+
196
+ // Save updated index
197
+ index.worker.lastRunAt = Date.now();
198
+ index.worker.lastError = null;
199
+ await saveSummaryIndex(index, agentId);
200
+
201
+ return {
202
+ success: true,
203
+ chunksProcessed,
204
+ mergesPerformed,
205
+ };
206
+ } catch (err) {
207
+ const errorMessage = err instanceof Error ? err.message : String(err);
208
+
209
+ // Try to save error state
210
+ try {
211
+ const index = await loadSummaryIndex(agentId);
212
+ index.worker.lastRunAt = Date.now();
213
+ index.worker.lastError = errorMessage;
214
+ await saveSummaryIndex(index, agentId);
215
+ } catch {
216
+ // Ignore save errors
217
+ }
218
+
219
+ return {
220
+ success: false,
221
+ error: errorMessage,
222
+ };
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Find chunks of messages eligible for summarization.
228
+ */
229
+ async function findEligibleChunks(params: {
230
+ sessionFile: string;
231
+ lastSummarizedEntryId: string | null;
232
+ memoryConfig: HierarchicalMemoryConfig;
233
+ sessionId: string;
234
+ }): Promise<ChunkToSummarize[]> {
235
+ const { sessionFile, lastSummarizedEntryId, memoryConfig, sessionId } = params;
236
+
237
+ let sessionManager: SessionManager;
238
+ try {
239
+ sessionManager = SessionManager.open(sessionFile);
240
+ } catch {
241
+ return []; // Session file doesn't exist or is invalid
242
+ }
243
+
244
+ try {
245
+ const entries = sessionManager.getEntries();
246
+
247
+ if (entries.length === 0) {
248
+ return [];
249
+ }
250
+
251
+ // Walk entries from the end to find the cutoff index: everything before
252
+ // this index is "old enough" to summarize (at least pruningBoundaryTokens
253
+ // behind the conversation head). This uses the same entry stream we iterate
254
+ // so the boundary is correctly aligned.
255
+ let tailTokens = 0;
256
+ let cutoffIndex = -1;
257
+ for (let i = entries.length - 1; i >= 0; i--) {
258
+ const entry = entries[i];
259
+ if (entry.type !== "message") {
260
+ continue;
261
+ }
262
+ const msg = entry.message as { role: string; content?: unknown };
263
+ tailTokens += estimateMessagesTokens([msg]);
264
+ if (tailTokens >= memoryConfig.pruningBoundaryTokens) {
265
+ cutoffIndex = i;
266
+ break;
267
+ }
268
+ }
269
+
270
+ if (cutoffIndex <= 0) {
271
+ return []; // Not enough history yet
272
+ }
273
+
274
+ // Find start position (after lastSummarizedEntryId)
275
+ let startIndex = 0;
276
+ if (lastSummarizedEntryId) {
277
+ const lastIdx = entries.findIndex((e) => e.id === lastSummarizedEntryId);
278
+ if (lastIdx >= 0) {
279
+ startIndex = lastIdx + 1;
280
+ }
281
+ }
282
+
283
+ // Build chunks from eligible entries (startIndex..cutoffIndex)
284
+ const chunks: ChunkToSummarize[] = [];
285
+ let currentChunk: ChunkToSummarize = {
286
+ messages: [],
287
+ entryIds: [],
288
+ sessionId,
289
+ tokenEstimate: 0,
290
+ };
291
+
292
+ for (let i = startIndex; i < cutoffIndex; i++) {
293
+ const entry = entries[i];
294
+
295
+ // Skip non-message entries
296
+ if (entry.type !== "message") {
297
+ continue;
298
+ }
299
+
300
+ const msg = entry.message as { role: string; content?: unknown };
301
+ const msgTokens = estimateMessagesTokens([msg]);
302
+
303
+ currentChunk.messages.push(msg);
304
+ currentChunk.entryIds.push(entry.id);
305
+ currentChunk.tokenEstimate += msgTokens;
306
+
307
+ // Check if chunk is big enough
308
+ if (currentChunk.tokenEstimate >= memoryConfig.chunkTokens) {
309
+ // Ensure we end on a complete turn (assistant message)
310
+ if (msg.role === "assistant") {
311
+ chunks.push(currentChunk);
312
+ currentChunk = {
313
+ messages: [],
314
+ entryIds: [],
315
+ sessionId,
316
+ tokenEstimate: 0,
317
+ };
318
+ }
319
+ }
320
+ }
321
+
322
+ // Don't add partial chunks - they'll be picked up next time
323
+
324
+ return chunks;
325
+ } finally {
326
+ // SessionManager doesn't have dispose in all versions, but we're done with it
327
+ }
328
+ }
329
+
330
+ /**
331
+ * Merge summaries at a level if threshold is reached.
332
+ */
333
+ async function maybeMergeLevel(params: {
334
+ index: SummaryIndex;
335
+ level: "L1" | "L2";
336
+ memoryConfig: HierarchicalMemoryConfig;
337
+ summarization: SummarizationParams;
338
+ agentId: string;
339
+ }): Promise<boolean> {
340
+ const { index, level, memoryConfig, summarization, agentId } = params;
341
+
342
+ const unmerged = getUnmergedSummaries(index, level);
343
+
344
+ if (unmerged.length < memoryConfig.mergeThreshold) {
345
+ return false; // Not enough to merge yet
346
+ }
347
+
348
+ const nextLevel = getNextLevel(level);
349
+ if (!nextLevel) {
350
+ return false; // Already at max level
351
+ }
352
+
353
+ // Take exactly mergeThreshold entries to maintain fixed merge cadence
354
+ const toMerge = unmerged.slice(0, memoryConfig.mergeThreshold);
355
+
356
+ // Load summary contents
357
+ const summaryContents = await loadSummaryContents(toMerge, agentId);
358
+
359
+ // Load older context (unmerged higher-level summaries only)
360
+ const olderContext: string[] = [];
361
+ if (nextLevel === "L2") {
362
+ olderContext.push(...(await loadSummaryContents(getUnmergedSummaries(index, "L3"), agentId)));
363
+ }
364
+ if (nextLevel === "L3") {
365
+ // L3 has no older context
366
+ }
367
+
368
+ // Merge summaries
369
+ const mergedContent = await mergeSummaries({
370
+ summaries: summaryContents,
371
+ olderContext,
372
+ config: memoryConfig,
373
+ summarization,
374
+ });
375
+
376
+ // Create merged entry
377
+ const mergedId = generateNextSummaryId(index, nextLevel);
378
+ const mergedEntry: SummaryEntry = {
379
+ id: mergedId,
380
+ level: nextLevel,
381
+ createdAt: Date.now(),
382
+ tokenEstimate: Math.ceil(mergedContent.length / 4),
383
+ sourceLevel: getSourceLevel(nextLevel),
384
+ sourceIds: toMerge.map((s) => s.id),
385
+ mergedInto: null,
386
+ };
387
+
388
+ // Save merged summary
389
+ await writeSummary(mergedEntry, mergedContent, agentId);
390
+ index.levels[nextLevel].push(mergedEntry);
391
+
392
+ // Mark source summaries as merged
393
+ for (const summary of toMerge) {
394
+ summary.mergedInto = mergedId;
395
+ }
396
+
397
+ return true;
398
+ }
399
+
400
+ /**
401
+ * Resolve parameters needed for summarization.
402
+ * Uses the API key from plugin config instead of the complex auth system.
403
+ */
404
+ function resolveSummarizationParams(params: {
405
+ pluginConfig: PluginConfig;
406
+ memoryConfig: HierarchicalMemoryConfig;
407
+ sessionEntry: { model?: string };
408
+ }): SummarizationParams | null {
409
+ const { pluginConfig, memoryConfig, sessionEntry } = params;
410
+
411
+ // Determine model to use
412
+ const modelSpec =
413
+ memoryConfig.model ?? sessionEntry.model ?? "anthropic/claude-sonnet-4-5-20250929";
414
+ const [provider, model] = modelSpec.includes("/")
415
+ ? modelSpec.split("/", 2)
416
+ : ["anthropic", modelSpec];
417
+
418
+ const apiKey = pluginConfig.apiKey;
419
+ if (!apiKey) {
420
+ return null;
421
+ }
422
+
423
+ return {
424
+ model,
425
+ provider,
426
+ apiKey,
427
+ };
428
+ }
429
+
430
+ // --- Inline session helpers (replacing core imports) ---
431
+
432
+ /** Resolve the session store path for an agent */
433
+ function resolveSessionStorePath(stateDir: string, agentId?: string): string {
434
+ const id = agentId ?? DEFAULT_AGENT_ID;
435
+ return path.join(stateDir, "agents", id, "sessions", "sessions.json");
436
+ }
437
+
438
+ /** Resolve the session transcript JSONL path */
439
+ function resolveSessionTranscriptPath(
440
+ stateDir: string,
441
+ agentId: string,
442
+ sessionId: string,
443
+ ): string {
444
+ return path.join(stateDir, "agents", agentId, "sessions", `${sessionId}.jsonl`);
445
+ }
446
+
447
+ /** Load the session store (simplified — just reads JSON) */
448
+ function loadSessionStoreSimple(
449
+ storePath: string,
450
+ ): Record<string, { sessionId?: string; model?: string }> {
451
+ try {
452
+ const raw = fs.readFileSync(storePath, "utf-8");
453
+ return JSON.parse(raw) as Record<string, { sessionId?: string; model?: string }>;
454
+ } catch {
455
+ return {};
456
+ }
457
+ }