clawmem 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,441 @@
1
+ /**
2
+ * ClawMem OpenClaw Plugin — Transcript path resolver
3
+ *
4
+ * Why this exists: the typed `PluginHookName` events that ClawMem subscribes
5
+ * to (`before_prompt_build`, `agent_end`) do NOT carry a `sessionFile` field
6
+ * in their event payload. ClawMem's hooks (precompact-extract,
7
+ * decision-extractor, handoff-generator, feedback-loop) all require a
8
+ * `transcript_path` to read the session JSONL — they call
9
+ * `validateTranscriptPath(input.transcriptPath ?? "")` and return empty on
10
+ * failure (no session_id fallback).
11
+ *
12
+ * Codex Turn N+2 caught this: §14.11's design fix moved precompact off
13
+ * `agent_end` (fire-and-forget) onto `before_prompt_build` (awaited), but
14
+ * neither event delivers `sessionFile`, so the load-bearing precompact
15
+ * path AND the eventually-consistent extractors were both no-ops.
16
+ *
17
+ * The fix: derive the transcript path from `sessionId` + `agentId` using
18
+ * OpenClaw's canonical layout from `src/config/sessions/paths.ts`:
19
+ *
20
+ * <state-dir>/agents/<agentId>/sessions/<sessionId>.jsonl
21
+ *
22
+ * where:
23
+ * - state-dir defaults to ~/.openclaw, overridable via OPENCLAW_STATE_DIR
24
+ * or OPENCLAW_HOME env vars (mirrors `src/config/paths.ts:resolveStateDir`)
25
+ * - agentId defaults to "main" (mirrors
26
+ * `src/routing/session-key.ts:DEFAULT_AGENT_ID`)
27
+ *
28
+ * Events that DO carry `sessionFile` (`before_compaction`, `before_reset`,
29
+ * `after_compaction`, `session_end`) should pass that explicit value
30
+ * instead — the resolver is a fallback, not a replacement.
31
+ *
32
+ * The resolver is fail-open: returns undefined when the resolved path does
33
+ * not exist, the sessionId is invalid, or any underlying step throws.
34
+ */
35
+
36
+ import { existsSync, readdirSync, readFileSync } from "node:fs";
37
+ import { isAbsolute, join, resolve } from "node:path";
38
+ import { homedir } from "node:os";
39
+
40
+ /**
41
+ * OpenClaw's default agent id (mirrors
42
+ * `openclaw/src/routing/session-key.ts:20` `DEFAULT_AGENT_ID = "main"`).
43
+ */
44
+ export const DEFAULT_AGENT_ID = "main";
45
+
46
+ /**
47
+ * Mirror of OpenClaw's session-id validation regex from
48
+ * `openclaw/src/config/sessions/paths.ts:61` `SAFE_SESSION_ID_RE`.
49
+ *
50
+ * Keeping the validation client-side prevents us from constructing
51
+ * paths with `..` or other separators when the upstream sessionId is
52
+ * malformed.
53
+ */
54
+ const SAFE_SESSION_ID_RE = /^[a-z0-9][a-z0-9._-]{0,127}$/i;
55
+
56
+ /**
57
+ * Mirror of OpenClaw's agent-id validation/normalization from
58
+ * `openclaw/src/routing/session-key.ts:25` (`VALID_ID_RE`,
59
+ * `INVALID_CHARS_RE`, `LEADING_DASH_RE`, `TRAILING_DASH_RE`).
60
+ *
61
+ * Codex Turn N+3 caught that a simple `.toLowerCase()` is not equivalent
62
+ * to OpenClaw's full normalization: invalid characters must collapse to
63
+ * `-`, leading/trailing dashes get stripped, and the result is bounded
64
+ * to 64 characters. Mirroring this faithfully is critical because
65
+ * ClawMem's resolver MUST produce the same path string OpenClaw's
66
+ * `resolveSessionTranscriptPathInDir` would produce — otherwise the
67
+ * extractor hooks read the wrong file (or no file at all) for sessions
68
+ * whose agent id requires sanitization.
69
+ */
70
+ const AGENT_ID_VALID_RE = /^[a-z0-9][a-z0-9_-]{0,63}$/i;
71
+ const AGENT_ID_INVALID_CHARS_RE = /[^a-z0-9_-]+/g;
72
+ const AGENT_ID_LEADING_DASH_RE = /^-+/;
73
+ const AGENT_ID_TRAILING_DASH_RE = /-+$/;
74
+
75
+ /**
76
+ * Faithful mirror of `openclaw/src/routing/session-key.ts:91 normalizeAgentId`.
77
+ *
78
+ * - empty / whitespace → `DEFAULT_AGENT_ID` ("main")
79
+ * - already-valid id (case-insensitive) → returned lowercased
80
+ * - otherwise: lowercase, collapse invalid runs to `-`, strip leading +
81
+ * trailing dashes, slice to 64 chars, fall back to `DEFAULT_AGENT_ID`
82
+ * when the sanitized form is empty
83
+ */
84
+ export function normalizeAgentId(value: string | undefined | null): string {
85
+ const trimmed = (value ?? "").trim();
86
+ if (!trimmed) return DEFAULT_AGENT_ID;
87
+ const lowered = trimmed.toLowerCase();
88
+ if (AGENT_ID_VALID_RE.test(trimmed)) {
89
+ return lowered;
90
+ }
91
+ const sanitized = lowered
92
+ .replace(AGENT_ID_INVALID_CHARS_RE, "-")
93
+ .replace(AGENT_ID_LEADING_DASH_RE, "")
94
+ .replace(AGENT_ID_TRAILING_DASH_RE, "")
95
+ .slice(0, 64);
96
+ return sanitized || DEFAULT_AGENT_ID;
97
+ }
98
+
99
+ /**
100
+ * Legacy state-directory names that OpenClaw still falls back to when the
101
+ * new `.openclaw` directory does not exist on disk. Mirrors
102
+ * `openclaw/src/config/paths.ts:21` `LEGACY_STATE_DIRNAMES`.
103
+ *
104
+ * Codex Turn N+4 caught that the prior implementation always synthesized
105
+ * `~/.openclaw` and never checked whether OpenClaw was actually running
106
+ * from the legacy state root. This array preserves OpenClaw's pre-rebrand
107
+ * compatibility for installs that haven't been migrated.
108
+ */
109
+ const LEGACY_STATE_DIRNAMES = [".clawdbot"] as const;
110
+ const NEW_STATE_DIRNAME = ".openclaw";
111
+
112
+ /**
113
+ * Resolve OpenClaw's state directory using the same precedence as
114
+ * `openclaw/src/config/paths.ts:resolveStateDir`:
115
+ *
116
+ * 1. `$OPENCLAW_STATE_DIR` if set (env override, no further fallback)
117
+ * 2. `$OPENCLAW_HOME` if set (replaces homedir; appends `.openclaw`)
118
+ * 3. `<home>/.openclaw` if the directory exists
119
+ * 4. `<home>/.clawdbot` (legacy) if it exists and `.openclaw` does not
120
+ * 5. `<home>/.openclaw` as the synthesized default
121
+ *
122
+ * Honors `OPENCLAW_TEST_FAST=1` to skip the existence checks (mirrors
123
+ * OpenClaw's behavior at `paths.ts:70-72`).
124
+ *
125
+ * Codex Turn N+4 fix: prior version skipped step 4 entirely. Upgraded-but-
126
+ * not-migrated installs would then point at a synthesized `.openclaw` path
127
+ * that doesn't exist, while OpenClaw itself runs from `.clawdbot`.
128
+ */
129
+ export function resolveOpenClawStateDir(
130
+ env: NodeJS.ProcessEnv = process.env,
131
+ home: () => string = homedir,
132
+ ): string {
133
+ const stateOverride = env.OPENCLAW_STATE_DIR?.trim();
134
+ if (stateOverride) {
135
+ return resolveHomeRelative(stateOverride, home);
136
+ }
137
+
138
+ // OPENCLAW_HOME replaces the homedir entirely (mirrors
139
+ // `openclaw/src/infra/home-dir.ts:resolveRawHomeDir`). The state dir
140
+ // then becomes `<OPENCLAW_HOME>/.openclaw`.
141
+ const effectiveHome = (() => {
142
+ const homeOverride = env.OPENCLAW_HOME?.trim();
143
+ if (homeOverride) return resolveHomeRelative(homeOverride, home);
144
+ return home();
145
+ })();
146
+
147
+ const newDir = join(effectiveHome, NEW_STATE_DIRNAME);
148
+
149
+ if (env.OPENCLAW_TEST_FAST === "1") {
150
+ return newDir;
151
+ }
152
+
153
+ // Prefer the new dir when it exists.
154
+ try {
155
+ if (existsSync(newDir)) return newDir;
156
+ } catch {
157
+ // Fall through to legacy detection
158
+ }
159
+
160
+ // Legacy fallback: check each legacy name in order; first existing wins.
161
+ for (const legacyName of LEGACY_STATE_DIRNAMES) {
162
+ const legacyDir = join(effectiveHome, legacyName);
163
+ try {
164
+ if (existsSync(legacyDir)) return legacyDir;
165
+ } catch {
166
+ // Ignore and continue
167
+ }
168
+ }
169
+
170
+ // Synthesized default — OpenClaw bootstraps the new dir on first run.
171
+ return newDir;
172
+ }
173
+
174
+ function resolveHomeRelative(input: string, home: () => string): string {
175
+ if (input === "~") return home();
176
+ if (input.startsWith("~/")) return join(home(), input.slice(2));
177
+ return resolve(input);
178
+ }
179
+
180
+ /**
181
+ * Minimal SessionEntry shape ClawMem cares about. The full type lives in
182
+ * `openclaw/src/config/sessions/types.ts:111` and has many more fields,
183
+ * but the resolver only needs `sessionId` and `sessionFile`. Treating
184
+ * the rest of the JSON as `unknown` keeps ClawMem decoupled from
185
+ * OpenClaw's internal session model.
186
+ */
187
+ type MinimalSessionEntry = {
188
+ sessionId?: unknown;
189
+ sessionFile?: unknown;
190
+ [key: string]: unknown;
191
+ };
192
+
193
+ /**
194
+ * Read `<sessionsDir>/sessions.json` and look up the entry whose
195
+ * `sessionFile` ClawMem should use. Mirrors OpenClaw's authoritative
196
+ * source of truth at `openclaw/src/config/sessions/store-read.ts:10`.
197
+ *
198
+ * Resolution order:
199
+ * 1. Exact key match `store[sessionKey]` (when caller has the full
200
+ * session-store key like `agent:main:abc123`)
201
+ * 2. Scan entries for one whose `entry.sessionId === sessionId`
202
+ *
203
+ * Returns the resolved `sessionFile` path (made absolute against
204
+ * `sessionsDir` when stored as a relative basename) or undefined when:
205
+ * - sessions.json does not exist or fails to parse
206
+ * - no matching entry has a `sessionFile` field
207
+ *
208
+ * Fail-open: never throws. Codex Turn N+5 fix — sessions.json is the
209
+ * only authoritative way to disambiguate between base and topic-scoped
210
+ * transcript files when both exist for the same sessionId.
211
+ */
212
+ function lookupSessionFileFromStore(params: {
213
+ sessionsDir: string;
214
+ sessionId: string;
215
+ sessionKey?: string;
216
+ }): string | undefined {
217
+ const storePath = join(params.sessionsDir, "sessions.json");
218
+ let raw: string;
219
+ try {
220
+ raw = readFileSync(storePath, "utf-8");
221
+ } catch {
222
+ return undefined;
223
+ }
224
+ if (!raw.trim()) return undefined;
225
+
226
+ let parsed: unknown;
227
+ try {
228
+ parsed = JSON.parse(raw);
229
+ } catch {
230
+ return undefined;
231
+ }
232
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
233
+ return undefined;
234
+ }
235
+ const store = parsed as Record<string, unknown>;
236
+
237
+ const resolveEntryFile = (entry: MinimalSessionEntry | undefined): string | undefined => {
238
+ const file = entry?.sessionFile;
239
+ if (typeof file !== "string" || !file.trim()) return undefined;
240
+ const trimmed = file.trim();
241
+ return isAbsolute(trimmed) ? trimmed : join(params.sessionsDir, trimmed);
242
+ };
243
+
244
+ // 1. Exact sessionKey match
245
+ if (params.sessionKey) {
246
+ const directEntry = store[params.sessionKey];
247
+ if (directEntry && typeof directEntry === "object" && !Array.isArray(directEntry)) {
248
+ const candidate = resolveEntryFile(directEntry as MinimalSessionEntry);
249
+ if (candidate) return candidate;
250
+ }
251
+ }
252
+
253
+ // 2. Scan for entry whose sessionId matches
254
+ for (const value of Object.values(store)) {
255
+ if (!value || typeof value !== "object" || Array.isArray(value)) continue;
256
+ const entry = value as MinimalSessionEntry;
257
+ if (typeof entry.sessionId === "string" && entry.sessionId === params.sessionId) {
258
+ const candidate = resolveEntryFile(entry);
259
+ if (candidate) return candidate;
260
+ }
261
+ }
262
+
263
+ return undefined;
264
+ }
265
+
266
+ /**
267
+ * Build the session-file basename. Mirrors the filename-construction
268
+ * branch in `openclaw/src/config/sessions/paths.ts:248-251`:
269
+ *
270
+ * - no topicId: `<sessionId>.jsonl`
271
+ * - topicId (string): `<sessionId>-topic-<encodeURIComponent(topicId)>.jsonl`
272
+ * - topicId (number): `<sessionId>-topic-<topicId>.jsonl`
273
+ */
274
+ function buildTranscriptFileName(sessionId: string, topicId?: string | number): string {
275
+ if (topicId === undefined || topicId === null || topicId === "") {
276
+ return `${sessionId}.jsonl`;
277
+ }
278
+ const encoded =
279
+ typeof topicId === "string" ? encodeURIComponent(topicId) : String(topicId);
280
+ return `${sessionId}-topic-${encoded}.jsonl`;
281
+ }
282
+
283
+ /**
284
+ * Derive the canonical OpenClaw session transcript path for a given
285
+ * (sessionId, agentId, sessionKey, topicId) tuple. Mirrors
286
+ * `openclaw/src/config/sessions/paths.ts:resolveSessionTranscriptPathInDir`
287
+ * including the topic-id filename branch and `resolveSessionFilePath`'s
288
+ * `entry.sessionFile` lookup against `sessions.json`.
289
+ *
290
+ * Resolution order (Codex Turn N+5 fix):
291
+ * 1. **Authoritative source-of-truth**: read `sessions.json` and look
292
+ * up `entry.sessionFile` by sessionKey (exact match) or by scanning
293
+ * for an entry whose `sessionId` matches. This is the same path
294
+ * OpenClaw uses internally at
295
+ * `openclaw/src/config/sessions/paths.ts:resolveSessionFilePath` —
296
+ * the entry's sessionFile is the truth, regardless of whether the
297
+ * transcript is base or topic-scoped.
298
+ * 2. If `params.topicId` is provided explicitly, use it:
299
+ * `<sessionId>-topic-<encodeURIComponent(topicId)>.jsonl`
300
+ * 3. Try the base filename `<sessionId>.jsonl` if AND ONLY IF no
301
+ * topic-scoped variants coexist. This prevents ClawMem from
302
+ * silently picking the wrong transcript when both base and topic
303
+ * files exist for the same sessionId without sessions.json metadata.
304
+ * 4. If only topic-scoped variants exist, return the single match
305
+ * (unambiguous). Two or more topic variants without metadata is
306
+ * ambiguous → fail-open.
307
+ *
308
+ * Returns undefined when:
309
+ * - sessionId is missing or fails the SAFE_SESSION_ID_RE check
310
+ * - none of the resolution steps find a file on disk
311
+ * - the filesystem fallback is ambiguous (base + topic coexist, or
312
+ * multiple topic variants exist) and sessions.json could not
313
+ * disambiguate
314
+ *
315
+ * Fail-open: never throws. Each filesystem check is wrapped in a
316
+ * try/catch and ignored.
317
+ */
318
+ export function resolveOpenClawSessionFile(params: {
319
+ sessionId?: string;
320
+ agentId?: string;
321
+ sessionKey?: string;
322
+ topicId?: string | number;
323
+ env?: NodeJS.ProcessEnv;
324
+ }): string | undefined {
325
+ const sessionId = params.sessionId?.trim();
326
+ if (!sessionId) return undefined;
327
+ if (!SAFE_SESSION_ID_RE.test(sessionId)) return undefined;
328
+
329
+ // Use the full normalizeAgentId mirror — NOT a simple .toLowerCase().
330
+ const agentId = normalizeAgentId(params.agentId);
331
+
332
+ let stateDir: string;
333
+ try {
334
+ stateDir = resolveOpenClawStateDir(params.env);
335
+ } catch {
336
+ return undefined;
337
+ }
338
+
339
+ const sessionsDir = join(stateDir, "agents", agentId, "sessions");
340
+
341
+ // 1. AUTHORITATIVE: sessions.json lookup. This is OpenClaw's source
342
+ // of truth for which transcript file is active for a given session.
343
+ const fromStore = lookupSessionFileFromStore({
344
+ sessionsDir,
345
+ sessionId,
346
+ sessionKey: params.sessionKey,
347
+ });
348
+ if (fromStore) {
349
+ try {
350
+ if (existsSync(fromStore)) return fromStore;
351
+ } catch {
352
+ // Fall through to filesystem fallback
353
+ }
354
+ }
355
+
356
+ // 2. Explicit topic-id from caller wins (rare — typed-hook events
357
+ // don't carry topicId, but the API surface still supports it for
358
+ // tests and future callers).
359
+ if (params.topicId !== undefined && params.topicId !== null && params.topicId !== "") {
360
+ const explicitTopic = join(
361
+ sessionsDir,
362
+ buildTranscriptFileName(sessionId, params.topicId),
363
+ );
364
+ try {
365
+ if (existsSync(explicitTopic)) return explicitTopic;
366
+ } catch {
367
+ // Fall through
368
+ }
369
+ // Don't fall back to the base filename if caller asked for a specific
370
+ // topic — they explicitly want THAT file or nothing.
371
+ return undefined;
372
+ }
373
+
374
+ // 3 + 4. Filesystem fallback when sessions.json could not resolve.
375
+ // We need to enumerate the sessions dir to detect the base+topic
376
+ // coexistence case (Codex Turn N+5 finding) before deciding which
377
+ // file to return.
378
+ let baseExists = false;
379
+ try {
380
+ baseExists = existsSync(join(sessionsDir, `${sessionId}.jsonl`));
381
+ } catch {
382
+ baseExists = false;
383
+ }
384
+
385
+ let topicMatches: string[] = [];
386
+ try {
387
+ const topicPrefix = `${sessionId}-topic-`;
388
+ const entries = readdirSync(sessionsDir);
389
+ topicMatches = entries.filter(
390
+ (name) => name.startsWith(topicPrefix) && name.endsWith(".jsonl"),
391
+ );
392
+ } catch {
393
+ topicMatches = [];
394
+ }
395
+
396
+ // Codex Turn N+5 fix: when BOTH base and topic variants exist, the
397
+ // resolver cannot tell which is the active transcript without
398
+ // sessions.json metadata. Fail-open instead of preferring base.
399
+ if (baseExists && topicMatches.length > 0) {
400
+ return undefined;
401
+ }
402
+
403
+ // Base only → return base
404
+ if (baseExists && topicMatches.length === 0) {
405
+ return join(sessionsDir, `${sessionId}.jsonl`);
406
+ }
407
+
408
+ // Single topic-scoped variant → unambiguous, return it
409
+ if (!baseExists && topicMatches.length === 1) {
410
+ return join(sessionsDir, topicMatches[0]!);
411
+ }
412
+
413
+ // 0 matches → no transcript exists yet (new session)
414
+ // 2+ topic variants without base → ambiguous, fail-open
415
+ return undefined;
416
+ }
417
+
418
+ /**
419
+ * Pure-function variant of `resolveOpenClawSessionFile` that skips the
420
+ * filesystem existence check. Useful for unit tests that want to verify
421
+ * the path-construction logic without setting up a real OpenClaw state
422
+ * tree on disk.
423
+ */
424
+ export function buildOpenClawSessionFilePath(params: {
425
+ sessionId: string;
426
+ agentId?: string;
427
+ topicId?: string | number;
428
+ env?: NodeJS.ProcessEnv;
429
+ }): string | undefined {
430
+ const sessionId = params.sessionId?.trim();
431
+ if (!sessionId || !SAFE_SESSION_ID_RE.test(sessionId)) return undefined;
432
+ const agentId = normalizeAgentId(params.agentId);
433
+ let stateDir: string;
434
+ try {
435
+ stateDir = resolveOpenClawStateDir(params.env);
436
+ } catch {
437
+ return undefined;
438
+ }
439
+ const fileName = buildTranscriptFileName(sessionId, params.topicId);
440
+ return join(stateDir, "agents", agentId, "sessions", fileName);
441
+ }