clawmem 0.8.5 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +20 -4
- package/CLAUDE.md +20 -4
- package/README.md +25 -16
- package/SKILL.md +27 -6
- package/package.json +2 -2
- package/src/clawmem.ts +247 -23
- package/src/config.ts +14 -3
- package/src/hooks/context-surfacing.ts +87 -6
- package/src/openclaw/compaction-threshold.ts +166 -0
- package/src/openclaw/engine.ts +520 -241
- package/src/openclaw/index.ts +151 -140
- package/src/openclaw/openclaw.plugin.json +4 -1
- package/src/openclaw/package.json +9 -0
- package/src/openclaw/session-state.ts +55 -0
- package/src/openclaw/transcript-resolver.ts +441 -0
- package/src/session-focus.ts +227 -0
- package/src/store.ts +5 -0
- package/src/vault-facts.ts +506 -0
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ClawMem OpenClaw Plugin — Transcript path resolver
|
|
3
|
+
*
|
|
4
|
+
* Why this exists: the typed `PluginHookName` events that ClawMem subscribes
|
|
5
|
+
* to (`before_prompt_build`, `agent_end`) do NOT carry a `sessionFile` field
|
|
6
|
+
* in their event payload. ClawMem's hooks (precompact-extract,
|
|
7
|
+
* decision-extractor, handoff-generator, feedback-loop) all require a
|
|
8
|
+
* `transcript_path` to read the session JSONL — they call
|
|
9
|
+
* `validateTranscriptPath(input.transcriptPath ?? "")` and return empty on
|
|
10
|
+
* failure (no session_id fallback).
|
|
11
|
+
*
|
|
12
|
+
* Codex Turn N+2 caught this: §14.11's design fix moved precompact off
|
|
13
|
+
* `agent_end` (fire-and-forget) onto `before_prompt_build` (awaited), but
|
|
14
|
+
* neither event delivers `sessionFile`, so the load-bearing precompact
|
|
15
|
+
* path AND the eventually-consistent extractors were both no-ops.
|
|
16
|
+
*
|
|
17
|
+
* The fix: derive the transcript path from `sessionId` + `agentId` using
|
|
18
|
+
* OpenClaw's canonical layout from `src/config/sessions/paths.ts`:
|
|
19
|
+
*
|
|
20
|
+
* <state-dir>/agents/<agentId>/sessions/<sessionId>.jsonl
|
|
21
|
+
*
|
|
22
|
+
* where:
|
|
23
|
+
* - state-dir defaults to ~/.openclaw, overridable via OPENCLAW_STATE_DIR
|
|
24
|
+
* or OPENCLAW_HOME env vars (mirrors `src/config/paths.ts:resolveStateDir`)
|
|
25
|
+
* - agentId defaults to "main" (mirrors
|
|
26
|
+
* `src/routing/session-key.ts:DEFAULT_AGENT_ID`)
|
|
27
|
+
*
|
|
28
|
+
* Events that DO carry `sessionFile` (`before_compaction`, `before_reset`,
|
|
29
|
+
* `after_compaction`, `session_end`) should pass that explicit value
|
|
30
|
+
* instead — the resolver is a fallback, not a replacement.
|
|
31
|
+
*
|
|
32
|
+
* The resolver is fail-open: returns undefined when the resolved path does
|
|
33
|
+
* not exist, the sessionId is invalid, or any underlying step throws.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
|
37
|
+
import { isAbsolute, join, resolve } from "node:path";
|
|
38
|
+
import { homedir } from "node:os";
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* OpenClaw's default agent id (mirrors
|
|
42
|
+
* `openclaw/src/routing/session-key.ts:20` `DEFAULT_AGENT_ID = "main"`).
|
|
43
|
+
*/
|
|
44
|
+
export const DEFAULT_AGENT_ID = "main";
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Mirror of OpenClaw's session-id validation regex from
|
|
48
|
+
* `openclaw/src/config/sessions/paths.ts:61` `SAFE_SESSION_ID_RE`.
|
|
49
|
+
*
|
|
50
|
+
* Keeping the validation client-side prevents us from constructing
|
|
51
|
+
* paths with `..` or other separators when the upstream sessionId is
|
|
52
|
+
* malformed.
|
|
53
|
+
*/
|
|
54
|
+
const SAFE_SESSION_ID_RE = /^[a-z0-9][a-z0-9._-]{0,127}$/i;
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Mirror of OpenClaw's agent-id validation/normalization from
|
|
58
|
+
* `openclaw/src/routing/session-key.ts:25` (`VALID_ID_RE`,
|
|
59
|
+
* `INVALID_CHARS_RE`, `LEADING_DASH_RE`, `TRAILING_DASH_RE`).
|
|
60
|
+
*
|
|
61
|
+
* Codex Turn N+3 caught that a simple `.toLowerCase()` is not equivalent
|
|
62
|
+
* to OpenClaw's full normalization: invalid characters must collapse to
|
|
63
|
+
* `-`, leading/trailing dashes get stripped, and the result is bounded
|
|
64
|
+
* to 64 characters. Mirroring this faithfully is critical because
|
|
65
|
+
* ClawMem's resolver MUST produce the same path string OpenClaw's
|
|
66
|
+
* `resolveSessionTranscriptPathInDir` would produce — otherwise the
|
|
67
|
+
* extractor hooks read the wrong file (or no file at all) for sessions
|
|
68
|
+
* whose agent id requires sanitization.
|
|
69
|
+
*/
|
|
70
|
+
const AGENT_ID_VALID_RE = /^[a-z0-9][a-z0-9_-]{0,63}$/i;
|
|
71
|
+
const AGENT_ID_INVALID_CHARS_RE = /[^a-z0-9_-]+/g;
|
|
72
|
+
const AGENT_ID_LEADING_DASH_RE = /^-+/;
|
|
73
|
+
const AGENT_ID_TRAILING_DASH_RE = /-+$/;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Faithful mirror of `openclaw/src/routing/session-key.ts:91 normalizeAgentId`.
|
|
77
|
+
*
|
|
78
|
+
* - empty / whitespace → `DEFAULT_AGENT_ID` ("main")
|
|
79
|
+
* - already-valid id (case-insensitive) → returned lowercased
|
|
80
|
+
* - otherwise: lowercase, collapse invalid runs to `-`, strip leading +
|
|
81
|
+
* trailing dashes, slice to 64 chars, fall back to `DEFAULT_AGENT_ID`
|
|
82
|
+
* when the sanitized form is empty
|
|
83
|
+
*/
|
|
84
|
+
export function normalizeAgentId(value: string | undefined | null): string {
|
|
85
|
+
const trimmed = (value ?? "").trim();
|
|
86
|
+
if (!trimmed) return DEFAULT_AGENT_ID;
|
|
87
|
+
const lowered = trimmed.toLowerCase();
|
|
88
|
+
if (AGENT_ID_VALID_RE.test(trimmed)) {
|
|
89
|
+
return lowered;
|
|
90
|
+
}
|
|
91
|
+
const sanitized = lowered
|
|
92
|
+
.replace(AGENT_ID_INVALID_CHARS_RE, "-")
|
|
93
|
+
.replace(AGENT_ID_LEADING_DASH_RE, "")
|
|
94
|
+
.replace(AGENT_ID_TRAILING_DASH_RE, "")
|
|
95
|
+
.slice(0, 64);
|
|
96
|
+
return sanitized || DEFAULT_AGENT_ID;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Legacy state-directory names that OpenClaw still falls back to when the
|
|
101
|
+
* new `.openclaw` directory does not exist on disk. Mirrors
|
|
102
|
+
* `openclaw/src/config/paths.ts:21` `LEGACY_STATE_DIRNAMES`.
|
|
103
|
+
*
|
|
104
|
+
* Codex Turn N+4 caught that the prior implementation always synthesized
|
|
105
|
+
* `~/.openclaw` and never checked whether OpenClaw was actually running
|
|
106
|
+
* from the legacy state root. This array preserves OpenClaw's pre-rebrand
|
|
107
|
+
* compatibility for installs that haven't been migrated.
|
|
108
|
+
*/
|
|
109
|
+
const LEGACY_STATE_DIRNAMES = [".clawdbot"] as const;
|
|
110
|
+
const NEW_STATE_DIRNAME = ".openclaw";
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Resolve OpenClaw's state directory using the same precedence as
|
|
114
|
+
* `openclaw/src/config/paths.ts:resolveStateDir`:
|
|
115
|
+
*
|
|
116
|
+
* 1. `$OPENCLAW_STATE_DIR` if set (env override, no further fallback)
|
|
117
|
+
* 2. `$OPENCLAW_HOME` if set (replaces homedir; appends `.openclaw`)
|
|
118
|
+
* 3. `<home>/.openclaw` if the directory exists
|
|
119
|
+
* 4. `<home>/.clawdbot` (legacy) if it exists and `.openclaw` does not
|
|
120
|
+
* 5. `<home>/.openclaw` as the synthesized default
|
|
121
|
+
*
|
|
122
|
+
* Honors `OPENCLAW_TEST_FAST=1` to skip the existence checks (mirrors
|
|
123
|
+
* OpenClaw's behavior at `paths.ts:70-72`).
|
|
124
|
+
*
|
|
125
|
+
* Codex Turn N+4 fix: prior version skipped step 4 entirely. Upgraded-but-
|
|
126
|
+
* not-migrated installs would then point at a synthesized `.openclaw` path
|
|
127
|
+
* that doesn't exist, while OpenClaw itself runs from `.clawdbot`.
|
|
128
|
+
*/
|
|
129
|
+
export function resolveOpenClawStateDir(
|
|
130
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
131
|
+
home: () => string = homedir,
|
|
132
|
+
): string {
|
|
133
|
+
const stateOverride = env.OPENCLAW_STATE_DIR?.trim();
|
|
134
|
+
if (stateOverride) {
|
|
135
|
+
return resolveHomeRelative(stateOverride, home);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// OPENCLAW_HOME replaces the homedir entirely (mirrors
|
|
139
|
+
// `openclaw/src/infra/home-dir.ts:resolveRawHomeDir`). The state dir
|
|
140
|
+
// then becomes `<OPENCLAW_HOME>/.openclaw`.
|
|
141
|
+
const effectiveHome = (() => {
|
|
142
|
+
const homeOverride = env.OPENCLAW_HOME?.trim();
|
|
143
|
+
if (homeOverride) return resolveHomeRelative(homeOverride, home);
|
|
144
|
+
return home();
|
|
145
|
+
})();
|
|
146
|
+
|
|
147
|
+
const newDir = join(effectiveHome, NEW_STATE_DIRNAME);
|
|
148
|
+
|
|
149
|
+
if (env.OPENCLAW_TEST_FAST === "1") {
|
|
150
|
+
return newDir;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Prefer the new dir when it exists.
|
|
154
|
+
try {
|
|
155
|
+
if (existsSync(newDir)) return newDir;
|
|
156
|
+
} catch {
|
|
157
|
+
// Fall through to legacy detection
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Legacy fallback: check each legacy name in order; first existing wins.
|
|
161
|
+
for (const legacyName of LEGACY_STATE_DIRNAMES) {
|
|
162
|
+
const legacyDir = join(effectiveHome, legacyName);
|
|
163
|
+
try {
|
|
164
|
+
if (existsSync(legacyDir)) return legacyDir;
|
|
165
|
+
} catch {
|
|
166
|
+
// Ignore and continue
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Synthesized default — OpenClaw bootstraps the new dir on first run.
|
|
171
|
+
return newDir;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function resolveHomeRelative(input: string, home: () => string): string {
|
|
175
|
+
if (input === "~") return home();
|
|
176
|
+
if (input.startsWith("~/")) return join(home(), input.slice(2));
|
|
177
|
+
return resolve(input);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Minimal SessionEntry shape ClawMem cares about. The full type lives in
|
|
182
|
+
* `openclaw/src/config/sessions/types.ts:111` and has many more fields,
|
|
183
|
+
* but the resolver only needs `sessionId` and `sessionFile`. Treating
|
|
184
|
+
* the rest of the JSON as `unknown` keeps ClawMem decoupled from
|
|
185
|
+
* OpenClaw's internal session model.
|
|
186
|
+
*/
|
|
187
|
+
type MinimalSessionEntry = {
|
|
188
|
+
sessionId?: unknown;
|
|
189
|
+
sessionFile?: unknown;
|
|
190
|
+
[key: string]: unknown;
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Read `<sessionsDir>/sessions.json` and look up the entry whose
|
|
195
|
+
* `sessionFile` ClawMem should use. Mirrors OpenClaw's authoritative
|
|
196
|
+
* source of truth at `openclaw/src/config/sessions/store-read.ts:10`.
|
|
197
|
+
*
|
|
198
|
+
* Resolution order:
|
|
199
|
+
* 1. Exact key match `store[sessionKey]` (when caller has the full
|
|
200
|
+
* session-store key like `agent:main:abc123`)
|
|
201
|
+
* 2. Scan entries for one whose `entry.sessionId === sessionId`
|
|
202
|
+
*
|
|
203
|
+
* Returns the resolved `sessionFile` path (made absolute against
|
|
204
|
+
* `sessionsDir` when stored as a relative basename) or undefined when:
|
|
205
|
+
* - sessions.json does not exist or fails to parse
|
|
206
|
+
* - no matching entry has a `sessionFile` field
|
|
207
|
+
*
|
|
208
|
+
* Fail-open: never throws. Codex Turn N+5 fix — sessions.json is the
|
|
209
|
+
* only authoritative way to disambiguate between base and topic-scoped
|
|
210
|
+
* transcript files when both exist for the same sessionId.
|
|
211
|
+
*/
|
|
212
|
+
function lookupSessionFileFromStore(params: {
|
|
213
|
+
sessionsDir: string;
|
|
214
|
+
sessionId: string;
|
|
215
|
+
sessionKey?: string;
|
|
216
|
+
}): string | undefined {
|
|
217
|
+
const storePath = join(params.sessionsDir, "sessions.json");
|
|
218
|
+
let raw: string;
|
|
219
|
+
try {
|
|
220
|
+
raw = readFileSync(storePath, "utf-8");
|
|
221
|
+
} catch {
|
|
222
|
+
return undefined;
|
|
223
|
+
}
|
|
224
|
+
if (!raw.trim()) return undefined;
|
|
225
|
+
|
|
226
|
+
let parsed: unknown;
|
|
227
|
+
try {
|
|
228
|
+
parsed = JSON.parse(raw);
|
|
229
|
+
} catch {
|
|
230
|
+
return undefined;
|
|
231
|
+
}
|
|
232
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
233
|
+
return undefined;
|
|
234
|
+
}
|
|
235
|
+
const store = parsed as Record<string, unknown>;
|
|
236
|
+
|
|
237
|
+
const resolveEntryFile = (entry: MinimalSessionEntry | undefined): string | undefined => {
|
|
238
|
+
const file = entry?.sessionFile;
|
|
239
|
+
if (typeof file !== "string" || !file.trim()) return undefined;
|
|
240
|
+
const trimmed = file.trim();
|
|
241
|
+
return isAbsolute(trimmed) ? trimmed : join(params.sessionsDir, trimmed);
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
// 1. Exact sessionKey match
|
|
245
|
+
if (params.sessionKey) {
|
|
246
|
+
const directEntry = store[params.sessionKey];
|
|
247
|
+
if (directEntry && typeof directEntry === "object" && !Array.isArray(directEntry)) {
|
|
248
|
+
const candidate = resolveEntryFile(directEntry as MinimalSessionEntry);
|
|
249
|
+
if (candidate) return candidate;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// 2. Scan for entry whose sessionId matches
|
|
254
|
+
for (const value of Object.values(store)) {
|
|
255
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) continue;
|
|
256
|
+
const entry = value as MinimalSessionEntry;
|
|
257
|
+
if (typeof entry.sessionId === "string" && entry.sessionId === params.sessionId) {
|
|
258
|
+
const candidate = resolveEntryFile(entry);
|
|
259
|
+
if (candidate) return candidate;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return undefined;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Build the session-file basename. Mirrors the filename-construction
|
|
268
|
+
* branch in `openclaw/src/config/sessions/paths.ts:248-251`:
|
|
269
|
+
*
|
|
270
|
+
* - no topicId: `<sessionId>.jsonl`
|
|
271
|
+
* - topicId (string): `<sessionId>-topic-<encodeURIComponent(topicId)>.jsonl`
|
|
272
|
+
* - topicId (number): `<sessionId>-topic-<topicId>.jsonl`
|
|
273
|
+
*/
|
|
274
|
+
function buildTranscriptFileName(sessionId: string, topicId?: string | number): string {
|
|
275
|
+
if (topicId === undefined || topicId === null || topicId === "") {
|
|
276
|
+
return `${sessionId}.jsonl`;
|
|
277
|
+
}
|
|
278
|
+
const encoded =
|
|
279
|
+
typeof topicId === "string" ? encodeURIComponent(topicId) : String(topicId);
|
|
280
|
+
return `${sessionId}-topic-${encoded}.jsonl`;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Derive the canonical OpenClaw session transcript path for a given
|
|
285
|
+
* (sessionId, agentId, sessionKey, topicId) tuple. Mirrors
|
|
286
|
+
* `openclaw/src/config/sessions/paths.ts:resolveSessionTranscriptPathInDir`
|
|
287
|
+
* including the topic-id filename branch and `resolveSessionFilePath`'s
|
|
288
|
+
* `entry.sessionFile` lookup against `sessions.json`.
|
|
289
|
+
*
|
|
290
|
+
* Resolution order (Codex Turn N+5 fix):
|
|
291
|
+
* 1. **Authoritative source-of-truth**: read `sessions.json` and look
|
|
292
|
+
* up `entry.sessionFile` by sessionKey (exact match) or by scanning
|
|
293
|
+
* for an entry whose `sessionId` matches. This is the same path
|
|
294
|
+
* OpenClaw uses internally at
|
|
295
|
+
* `openclaw/src/config/sessions/paths.ts:resolveSessionFilePath` —
|
|
296
|
+
* the entry's sessionFile is the truth, regardless of whether the
|
|
297
|
+
* transcript is base or topic-scoped.
|
|
298
|
+
* 2. If `params.topicId` is provided explicitly, use it:
|
|
299
|
+
* `<sessionId>-topic-<encodeURIComponent(topicId)>.jsonl`
|
|
300
|
+
* 3. Try the base filename `<sessionId>.jsonl` if AND ONLY IF no
|
|
301
|
+
* topic-scoped variants coexist. This prevents ClawMem from
|
|
302
|
+
* silently picking the wrong transcript when both base and topic
|
|
303
|
+
* files exist for the same sessionId without sessions.json metadata.
|
|
304
|
+
* 4. If only topic-scoped variants exist, return the single match
|
|
305
|
+
* (unambiguous). Two or more topic variants without metadata is
|
|
306
|
+
* ambiguous → fail-open.
|
|
307
|
+
*
|
|
308
|
+
* Returns undefined when:
|
|
309
|
+
* - sessionId is missing or fails the SAFE_SESSION_ID_RE check
|
|
310
|
+
* - none of the resolution steps find a file on disk
|
|
311
|
+
* - the filesystem fallback is ambiguous (base + topic coexist, or
|
|
312
|
+
* multiple topic variants exist) and sessions.json could not
|
|
313
|
+
* disambiguate
|
|
314
|
+
*
|
|
315
|
+
* Fail-open: never throws. Each filesystem check is wrapped in a
|
|
316
|
+
* try/catch and ignored.
|
|
317
|
+
*/
|
|
318
|
+
export function resolveOpenClawSessionFile(params: {
|
|
319
|
+
sessionId?: string;
|
|
320
|
+
agentId?: string;
|
|
321
|
+
sessionKey?: string;
|
|
322
|
+
topicId?: string | number;
|
|
323
|
+
env?: NodeJS.ProcessEnv;
|
|
324
|
+
}): string | undefined {
|
|
325
|
+
const sessionId = params.sessionId?.trim();
|
|
326
|
+
if (!sessionId) return undefined;
|
|
327
|
+
if (!SAFE_SESSION_ID_RE.test(sessionId)) return undefined;
|
|
328
|
+
|
|
329
|
+
// Use the full normalizeAgentId mirror — NOT a simple .toLowerCase().
|
|
330
|
+
const agentId = normalizeAgentId(params.agentId);
|
|
331
|
+
|
|
332
|
+
let stateDir: string;
|
|
333
|
+
try {
|
|
334
|
+
stateDir = resolveOpenClawStateDir(params.env);
|
|
335
|
+
} catch {
|
|
336
|
+
return undefined;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
const sessionsDir = join(stateDir, "agents", agentId, "sessions");
|
|
340
|
+
|
|
341
|
+
// 1. AUTHORITATIVE: sessions.json lookup. This is OpenClaw's source
|
|
342
|
+
// of truth for which transcript file is active for a given session.
|
|
343
|
+
const fromStore = lookupSessionFileFromStore({
|
|
344
|
+
sessionsDir,
|
|
345
|
+
sessionId,
|
|
346
|
+
sessionKey: params.sessionKey,
|
|
347
|
+
});
|
|
348
|
+
if (fromStore) {
|
|
349
|
+
try {
|
|
350
|
+
if (existsSync(fromStore)) return fromStore;
|
|
351
|
+
} catch {
|
|
352
|
+
// Fall through to filesystem fallback
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// 2. Explicit topic-id from caller wins (rare — typed-hook events
|
|
357
|
+
// don't carry topicId, but the API surface still supports it for
|
|
358
|
+
// tests and future callers).
|
|
359
|
+
if (params.topicId !== undefined && params.topicId !== null && params.topicId !== "") {
|
|
360
|
+
const explicitTopic = join(
|
|
361
|
+
sessionsDir,
|
|
362
|
+
buildTranscriptFileName(sessionId, params.topicId),
|
|
363
|
+
);
|
|
364
|
+
try {
|
|
365
|
+
if (existsSync(explicitTopic)) return explicitTopic;
|
|
366
|
+
} catch {
|
|
367
|
+
// Fall through
|
|
368
|
+
}
|
|
369
|
+
// Don't fall back to the base filename if caller asked for a specific
|
|
370
|
+
// topic — they explicitly want THAT file or nothing.
|
|
371
|
+
return undefined;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// 3 + 4. Filesystem fallback when sessions.json could not resolve.
|
|
375
|
+
// We need to enumerate the sessions dir to detect the base+topic
|
|
376
|
+
// coexistence case (Codex Turn N+5 finding) before deciding which
|
|
377
|
+
// file to return.
|
|
378
|
+
let baseExists = false;
|
|
379
|
+
try {
|
|
380
|
+
baseExists = existsSync(join(sessionsDir, `${sessionId}.jsonl`));
|
|
381
|
+
} catch {
|
|
382
|
+
baseExists = false;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
let topicMatches: string[] = [];
|
|
386
|
+
try {
|
|
387
|
+
const topicPrefix = `${sessionId}-topic-`;
|
|
388
|
+
const entries = readdirSync(sessionsDir);
|
|
389
|
+
topicMatches = entries.filter(
|
|
390
|
+
(name) => name.startsWith(topicPrefix) && name.endsWith(".jsonl"),
|
|
391
|
+
);
|
|
392
|
+
} catch {
|
|
393
|
+
topicMatches = [];
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Codex Turn N+5 fix: when BOTH base and topic variants exist, the
|
|
397
|
+
// resolver cannot tell which is the active transcript without
|
|
398
|
+
// sessions.json metadata. Fail-open instead of preferring base.
|
|
399
|
+
if (baseExists && topicMatches.length > 0) {
|
|
400
|
+
return undefined;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Base only → return base
|
|
404
|
+
if (baseExists && topicMatches.length === 0) {
|
|
405
|
+
return join(sessionsDir, `${sessionId}.jsonl`);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Single topic-scoped variant → unambiguous, return it
|
|
409
|
+
if (!baseExists && topicMatches.length === 1) {
|
|
410
|
+
return join(sessionsDir, topicMatches[0]!);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// 0 matches → no transcript exists yet (new session)
|
|
414
|
+
// 2+ topic variants without base → ambiguous, fail-open
|
|
415
|
+
return undefined;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Pure-function variant of `resolveOpenClawSessionFile` that skips the
|
|
420
|
+
* filesystem existence check. Useful for unit tests that want to verify
|
|
421
|
+
* the path-construction logic without setting up a real OpenClaw state
|
|
422
|
+
* tree on disk.
|
|
423
|
+
*/
|
|
424
|
+
export function buildOpenClawSessionFilePath(params: {
|
|
425
|
+
sessionId: string;
|
|
426
|
+
agentId?: string;
|
|
427
|
+
topicId?: string | number;
|
|
428
|
+
env?: NodeJS.ProcessEnv;
|
|
429
|
+
}): string | undefined {
|
|
430
|
+
const sessionId = params.sessionId?.trim();
|
|
431
|
+
if (!sessionId || !SAFE_SESSION_ID_RE.test(sessionId)) return undefined;
|
|
432
|
+
const agentId = normalizeAgentId(params.agentId);
|
|
433
|
+
let stateDir: string;
|
|
434
|
+
try {
|
|
435
|
+
stateDir = resolveOpenClawStateDir(params.env);
|
|
436
|
+
} catch {
|
|
437
|
+
return undefined;
|
|
438
|
+
}
|
|
439
|
+
const fileName = buildTranscriptFileName(sessionId, params.topicId);
|
|
440
|
+
return join(stateDir, "agents", agentId, "sessions", fileName);
|
|
441
|
+
}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session-Scoped Focus (§11.4 — v0.9.0)
|
|
3
|
+
*
|
|
4
|
+
* Per-session topic primitive that biases context-surfacing ranking toward
|
|
5
|
+
* docs relevant to the declared working context — WITHOUT persisting any
|
|
6
|
+
* state to SQLite. Intra-session curation that cannot contaminate other
|
|
7
|
+
* sessions.
|
|
8
|
+
*
|
|
9
|
+
* Primary signal: per-session state file at
|
|
10
|
+
* ~/.cache/clawmem/sessions/<session_id>.focus
|
|
11
|
+
*
|
|
12
|
+
* The env var CLAWMEM_SESSION_FOCUS is a DEBUG-ONLY override: it bypasses
|
|
13
|
+
* the per-session file entirely, and because it is a single process-wide
|
|
14
|
+
* variable it does NOT provide per-session scoping in multi-session host
|
|
15
|
+
* processes (e.g. a long-lived MCP server handling multiple Claude Code
|
|
16
|
+
* sessions). Use the file path for correctness; use the env var for
|
|
17
|
+
* ad-hoc single-session debugging only.
|
|
18
|
+
*
|
|
19
|
+
* All read paths are fail-open. Unreadable, corrupt, empty, missing,
|
|
20
|
+
* invalid-UTF-8, or oversized focus files return undefined and the
|
|
21
|
+
* caller proceeds with baseline ranking (byte-identical to pre-§11.4).
|
|
22
|
+
* The stage must NEVER half-apply a malformed topic.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import * as fs from "fs";
|
|
26
|
+
import * as path from "path";
|
|
27
|
+
import * as os from "os";
|
|
28
|
+
import type { ScoredResult } from "./memory.ts";
|
|
29
|
+
|
|
30
|
+
const MAX_TOPIC_LEN = 256;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Resolve the root directory for session focus files. Defaults to
|
|
34
|
+
* `~/.cache/clawmem/sessions`, overridable via `CLAWMEM_FOCUS_ROOT`.
|
|
35
|
+
* The override is primarily a test hook (so `bun:test` can redirect
|
|
36
|
+
* writes to a tmp dir) but is also safe to use in production if an
|
|
37
|
+
* operator wants to relocate the focus files out of `$HOME`.
|
|
38
|
+
*
|
|
39
|
+
* Computed lazily on every call so env-var changes in tests take
|
|
40
|
+
* effect without module reload.
|
|
41
|
+
*/
|
|
42
|
+
export function focusRoot(): string {
|
|
43
|
+
const override = process.env.CLAWMEM_FOCUS_ROOT;
|
|
44
|
+
if (override && override.trim().length > 0) return override;
|
|
45
|
+
return path.join(os.homedir(), ".cache", "clawmem", "sessions");
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function focusFilePath(sessionId: string): string {
|
|
49
|
+
return path.join(focusRoot(), `${sessionId}.focus`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Read the session focus topic. Returns undefined on any failure:
|
|
54
|
+
* - sessionId missing/empty
|
|
55
|
+
* - file does not exist
|
|
56
|
+
* - file unreadable (permissions, etc.)
|
|
57
|
+
* - file empty or whitespace-only
|
|
58
|
+
* - file exceeds MAX_TOPIC_LEN
|
|
59
|
+
* - file contains invalid UTF-8 (readFileSync throws)
|
|
60
|
+
*
|
|
61
|
+
* Never throws. Caller treats undefined as "no topic set" and skips
|
|
62
|
+
* the boost stage entirely.
|
|
63
|
+
*/
|
|
64
|
+
export function readSessionFocus(sessionId?: string): string | undefined {
|
|
65
|
+
if (!sessionId) return undefined;
|
|
66
|
+
try {
|
|
67
|
+
const p = focusFilePath(sessionId);
|
|
68
|
+
if (!fs.existsSync(p)) return undefined;
|
|
69
|
+
const raw = fs.readFileSync(p, { encoding: "utf-8" });
|
|
70
|
+
const topic = raw.trim();
|
|
71
|
+
if (!topic) return undefined;
|
|
72
|
+
if (topic.length > MAX_TOPIC_LEN) return undefined;
|
|
73
|
+
return topic;
|
|
74
|
+
} catch {
|
|
75
|
+
return undefined;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Write a session focus topic. Creates the sessions directory if needed.
|
|
81
|
+
* Overwrites any existing file. Throws on invalid input or I/O errors
|
|
82
|
+
* (caller surface — CLI command that should fail loudly on misuse).
|
|
83
|
+
*/
|
|
84
|
+
export function writeSessionFocus(sessionId: string, topic: string): void {
|
|
85
|
+
if (!sessionId || !sessionId.trim()) {
|
|
86
|
+
throw new Error("writeSessionFocus: sessionId required");
|
|
87
|
+
}
|
|
88
|
+
const trimmed = topic.trim();
|
|
89
|
+
if (!trimmed) {
|
|
90
|
+
throw new Error("writeSessionFocus: topic required");
|
|
91
|
+
}
|
|
92
|
+
if (trimmed.length > MAX_TOPIC_LEN) {
|
|
93
|
+
throw new Error(`writeSessionFocus: topic exceeds max length ${MAX_TOPIC_LEN}`);
|
|
94
|
+
}
|
|
95
|
+
fs.mkdirSync(focusRoot(), { recursive: true });
|
|
96
|
+
fs.writeFileSync(focusFilePath(sessionId), trimmed, { encoding: "utf-8" });
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Clear a session focus. No-op if the file does not exist.
|
|
101
|
+
* Never throws (caller is typically "revert ranking to baseline").
|
|
102
|
+
*/
|
|
103
|
+
export function clearSessionFocus(sessionId: string): void {
|
|
104
|
+
if (!sessionId) return;
|
|
105
|
+
try {
|
|
106
|
+
const p = focusFilePath(sessionId);
|
|
107
|
+
if (fs.existsSync(p)) fs.unlinkSync(p);
|
|
108
|
+
} catch {
|
|
109
|
+
/* ignore — clearing is best-effort */
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Resolve the effective session focus topic by checking the per-session
|
|
115
|
+
* focus file first, then falling back to a provided env-var value (the
|
|
116
|
+
* CLAWMEM_SESSION_FOCUS debug override). Returns undefined when neither
|
|
117
|
+
* yields a valid topic.
|
|
118
|
+
*
|
|
119
|
+
* Precedence is file > env var because the file is the only signal
|
|
120
|
+
* that provides per-session scoping on multi-session host processes.
|
|
121
|
+
* Exposed here (rather than inlined at the call site) so the hook's
|
|
122
|
+
* precedence logic can be unit-tested directly without spinning up a
|
|
123
|
+
* full contextSurfacing invocation.
|
|
124
|
+
*
|
|
125
|
+
* Never throws. Never logs. Every failure path returns undefined and
|
|
126
|
+
* the caller treats that as "no topic set" (byte-identical to
|
|
127
|
+
* pre-§11.4 hook behavior).
|
|
128
|
+
*/
|
|
129
|
+
export function resolveSessionTopic(
|
|
130
|
+
sessionId: string | undefined,
|
|
131
|
+
envVar: string | undefined
|
|
132
|
+
): string | undefined {
|
|
133
|
+
const fromFile = readSessionFocus(sessionId);
|
|
134
|
+
if (fromFile) return fromFile;
|
|
135
|
+
const fromEnv = envVar?.trim();
|
|
136
|
+
if (fromEnv) return fromEnv;
|
|
137
|
+
return undefined;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Case-insensitive tokenized AND-match against title + displayPath + body.
|
|
142
|
+
* Tokens shorter than 2 chars are dropped (common stopwords and typos).
|
|
143
|
+
* Returns true only if every remaining token appears in the haystack.
|
|
144
|
+
*/
|
|
145
|
+
function matchesTopic(result: ScoredResult, topic: string): boolean {
|
|
146
|
+
const tokens = topic
|
|
147
|
+
.toLowerCase()
|
|
148
|
+
.split(/\s+/)
|
|
149
|
+
.map(t => t.trim())
|
|
150
|
+
.filter(t => t.length >= 2);
|
|
151
|
+
if (tokens.length === 0) return false;
|
|
152
|
+
|
|
153
|
+
const haystack = [
|
|
154
|
+
result.title || "",
|
|
155
|
+
result.displayPath || "",
|
|
156
|
+
(result.body || "").slice(0, 800),
|
|
157
|
+
]
|
|
158
|
+
.join(" ")
|
|
159
|
+
.toLowerCase();
|
|
160
|
+
|
|
161
|
+
return tokens.every(t => haystack.includes(t));
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface TopicBoostOptions {
|
|
165
|
+
/** Multiplier applied to docs whose title/path/body match all topic tokens. Default 1.4. */
|
|
166
|
+
boostFactor?: number;
|
|
167
|
+
/**
|
|
168
|
+
* Multiplier applied to non-matching docs. Default 0.75.
|
|
169
|
+
* Clamped to a 0.5 floor so the boost is a re-ranker, not a hide —
|
|
170
|
+
* non-matching docs are demoted but never suppressed to zero.
|
|
171
|
+
*/
|
|
172
|
+
demoteFactor?: number;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Apply session-topic boost/demote to a scored result set as a POST-COMPOSITE
|
|
177
|
+
* reranking pass. Runs AFTER applyCompositeScoring(...) and BEFORE threshold
|
|
178
|
+
* filtering (the specific architectural placement Codex approved in Turn 1 of
|
|
179
|
+
* the v0.9.0 design review).
|
|
180
|
+
*
|
|
181
|
+
* Behavior:
|
|
182
|
+
* - Empty/undefined topic → returns input unchanged (no-op, byte-identical).
|
|
183
|
+
* - Topic present but ZERO docs match → returns input unchanged (no-op).
|
|
184
|
+
* This is the fail-open contract from the approved §11.4 spec: "topic
|
|
185
|
+
* set + zero matching docs → proceed with the normal results." Without
|
|
186
|
+
* this short-circuit, uniformly demoting every doc would push some
|
|
187
|
+
* below the downstream threshold filter and silently shrink the
|
|
188
|
+
* result set — a regression vs the no-topic baseline.
|
|
189
|
+
* (Caught by Codex in §11.4 code review Turn 1, 2026-04-13.)
|
|
190
|
+
* - Topic present AND at least one match → each result's compositeScore
|
|
191
|
+
* is multiplied by either boostFactor (matching) or demoteFactor
|
|
192
|
+
* (non-matching), then results are re-sorted descending.
|
|
193
|
+
*
|
|
194
|
+
* Matching is computed exactly once per result in a pre-pass so the
|
|
195
|
+
* short-circuit can decide without double-evaluating the token match.
|
|
196
|
+
*
|
|
197
|
+
* This is a pure function over the scored set — it does NOT call the DB,
|
|
198
|
+
* does NOT write SQLite state, does NOT touch any lifecycle column.
|
|
199
|
+
* Mutates compositeScore in place (consistent with existing scoring
|
|
200
|
+
* helpers in this codebase; single caller, single thread).
|
|
201
|
+
*/
|
|
202
|
+
export function applyTopicBoost<T extends ScoredResult>(
|
|
203
|
+
scored: T[],
|
|
204
|
+
topic: string | undefined,
|
|
205
|
+
options: TopicBoostOptions = {}
|
|
206
|
+
): T[] {
|
|
207
|
+
if (!topic || !topic.trim()) return scored;
|
|
208
|
+
if (scored.length === 0) return scored;
|
|
209
|
+
|
|
210
|
+
const boostFactor = options.boostFactor ?? 1.4;
|
|
211
|
+
const demoteFactor = Math.max(options.demoteFactor ?? 0.75, 0.5);
|
|
212
|
+
|
|
213
|
+
// Pre-compute per-result match flags so we can early-return on zero
|
|
214
|
+
// matches without double-evaluating matchesTopic during the mutation
|
|
215
|
+
// pass. Caching is also a (small) perf win for any single call.
|
|
216
|
+
const matches = scored.map(r => matchesTopic(r, topic));
|
|
217
|
+
const anyMatch = matches.some(Boolean);
|
|
218
|
+
if (!anyMatch) return scored; // fail-open: baseline ordering preserved
|
|
219
|
+
|
|
220
|
+
for (let i = 0; i < scored.length; i++) {
|
|
221
|
+
const factor = matches[i] ? boostFactor : demoteFactor;
|
|
222
|
+
scored[i]!.compositeScore = scored[i]!.compositeScore * factor;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
scored.sort((a, b) => b.compositeScore - a.compositeScore);
|
|
226
|
+
return scored;
|
|
227
|
+
}
|
package/src/store.ts
CHANGED
|
@@ -711,6 +711,11 @@ function initializeDatabase(db: Database): void {
|
|
|
711
711
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_type ON entity_nodes(entity_type)`);
|
|
712
712
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_vault ON entity_nodes(vault)`);
|
|
713
713
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_mentions ON entity_nodes(mention_count DESC)`);
|
|
714
|
+
// §11.1 (v0.9.0): expression index backing the `LOWER(name) IN (...) AND vault = ?`
|
|
715
|
+
// batch lookup used by the context-surfacing entity-detection hot path.
|
|
716
|
+
// Without this index the batch query devolves into a full scan on large vaults.
|
|
717
|
+
// Idempotent via IF NOT EXISTS — existing vaults pick it up on next open.
|
|
718
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_lower_name ON entity_nodes(LOWER(name), vault)`);
|
|
714
719
|
|
|
715
720
|
// Entity mentions: entity ↔ document junction table
|
|
716
721
|
db.exec(`
|