clawmem 0.8.5 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,441 @@
1
+ /**
2
+ * ClawMem OpenClaw Plugin — Transcript path resolver
3
+ *
4
+ * Why this exists: the typed `PluginHookName` events that ClawMem subscribes
5
+ * to (`before_prompt_build`, `agent_end`) do NOT carry a `sessionFile` field
6
+ * in their event payload. ClawMem's hooks (precompact-extract,
7
+ * decision-extractor, handoff-generator, feedback-loop) all require a
8
+ * `transcript_path` to read the session JSONL — they call
9
+ * `validateTranscriptPath(input.transcriptPath ?? "")` and return empty on
10
+ * failure (no session_id fallback).
11
+ *
12
+ * Codex Turn N+2 caught this: §14.11's design fix moved precompact off
13
+ * `agent_end` (fire-and-forget) onto `before_prompt_build` (awaited), but
14
+ * neither event delivers `sessionFile`, so the load-bearing precompact
15
+ * path AND the eventually-consistent extractors were both no-ops.
16
+ *
17
+ * The fix: derive the transcript path from `sessionId` + `agentId` using
18
+ * OpenClaw's canonical layout from `src/config/sessions/paths.ts`:
19
+ *
20
+ * <state-dir>/agents/<agentId>/sessions/<sessionId>.jsonl
21
+ *
22
+ * where:
23
+ * - state-dir defaults to ~/.openclaw, overridable via OPENCLAW_STATE_DIR
24
+ * or OPENCLAW_HOME env vars (mirrors `src/config/paths.ts:resolveStateDir`)
25
+ * - agentId defaults to "main" (mirrors
26
+ * `src/routing/session-key.ts:DEFAULT_AGENT_ID`)
27
+ *
28
+ * Events that DO carry `sessionFile` (`before_compaction`, `before_reset`,
29
+ * `after_compaction`, `session_end`) should pass that explicit value
30
+ * instead — the resolver is a fallback, not a replacement.
31
+ *
32
+ * The resolver is fail-open: returns undefined when the resolved path does
33
+ * not exist, the sessionId is invalid, or any underlying step throws.
34
+ */
35
+
36
+ import { existsSync, readdirSync, readFileSync } from "node:fs";
37
+ import { isAbsolute, join, resolve } from "node:path";
38
+ import { homedir } from "node:os";
39
+
40
+ /**
41
+ * OpenClaw's default agent id (mirrors
42
+ * `openclaw/src/routing/session-key.ts:20` `DEFAULT_AGENT_ID = "main"`).
43
+ */
44
+ export const DEFAULT_AGENT_ID = "main";
45
+
46
+ /**
47
+ * Mirror of OpenClaw's session-id validation regex from
48
+ * `openclaw/src/config/sessions/paths.ts:61` `SAFE_SESSION_ID_RE`.
49
+ *
50
+ * Keeping the validation client-side prevents us from constructing
51
+ * paths with `..` or other separators when the upstream sessionId is
52
+ * malformed.
53
+ */
54
+ const SAFE_SESSION_ID_RE = /^[a-z0-9][a-z0-9._-]{0,127}$/i;
55
+
56
+ /**
57
+ * Mirror of OpenClaw's agent-id validation/normalization from
58
+ * `openclaw/src/routing/session-key.ts:25` (`VALID_ID_RE`,
59
+ * `INVALID_CHARS_RE`, `LEADING_DASH_RE`, `TRAILING_DASH_RE`).
60
+ *
61
+ * Codex Turn N+3 caught that a simple `.toLowerCase()` is not equivalent
62
+ * to OpenClaw's full normalization: invalid characters must collapse to
63
+ * `-`, leading/trailing dashes get stripped, and the result is bounded
64
+ * to 64 characters. Mirroring this faithfully is critical because
65
+ * ClawMem's resolver MUST produce the same path string OpenClaw's
66
+ * `resolveSessionTranscriptPathInDir` would produce — otherwise the
67
+ * extractor hooks read the wrong file (or no file at all) for sessions
68
+ * whose agent id requires sanitization.
69
+ */
70
+ const AGENT_ID_VALID_RE = /^[a-z0-9][a-z0-9_-]{0,63}$/i;
71
+ const AGENT_ID_INVALID_CHARS_RE = /[^a-z0-9_-]+/g;
72
+ const AGENT_ID_LEADING_DASH_RE = /^-+/;
73
+ const AGENT_ID_TRAILING_DASH_RE = /-+$/;
74
+
75
+ /**
76
+ * Faithful mirror of `openclaw/src/routing/session-key.ts:91 normalizeAgentId`.
77
+ *
78
+ * - empty / whitespace → `DEFAULT_AGENT_ID` ("main")
79
+ * - already-valid id (case-insensitive) → returned lowercased
80
+ * - otherwise: lowercase, collapse invalid runs to `-`, strip leading +
81
+ * trailing dashes, slice to 64 chars, fall back to `DEFAULT_AGENT_ID`
82
+ * when the sanitized form is empty
83
+ */
84
+ export function normalizeAgentId(value: string | undefined | null): string {
85
+ const trimmed = (value ?? "").trim();
86
+ if (!trimmed) return DEFAULT_AGENT_ID;
87
+ const lowered = trimmed.toLowerCase();
88
+ if (AGENT_ID_VALID_RE.test(trimmed)) {
89
+ return lowered;
90
+ }
91
+ const sanitized = lowered
92
+ .replace(AGENT_ID_INVALID_CHARS_RE, "-")
93
+ .replace(AGENT_ID_LEADING_DASH_RE, "")
94
+ .replace(AGENT_ID_TRAILING_DASH_RE, "")
95
+ .slice(0, 64);
96
+ return sanitized || DEFAULT_AGENT_ID;
97
+ }
98
+
99
+ /**
100
+ * Legacy state-directory names that OpenClaw still falls back to when the
101
+ * new `.openclaw` directory does not exist on disk. Mirrors
102
+ * `openclaw/src/config/paths.ts:21` `LEGACY_STATE_DIRNAMES`.
103
+ *
104
+ * Codex Turn N+4 caught that the prior implementation always synthesized
105
+ * `~/.openclaw` and never checked whether OpenClaw was actually running
106
+ * from the legacy state root. This array preserves OpenClaw's pre-rebrand
107
+ * compatibility for installs that haven't been migrated.
108
+ */
109
+ const LEGACY_STATE_DIRNAMES = [".clawdbot"] as const;
110
+ const NEW_STATE_DIRNAME = ".openclaw";
111
+
112
+ /**
113
+ * Resolve OpenClaw's state directory using the same precedence as
114
+ * `openclaw/src/config/paths.ts:resolveStateDir`:
115
+ *
116
+ * 1. `$OPENCLAW_STATE_DIR` if set (env override, no further fallback)
117
+ * 2. `$OPENCLAW_HOME` if set (replaces homedir; appends `.openclaw`)
118
+ * 3. `<home>/.openclaw` if the directory exists
119
+ * 4. `<home>/.clawdbot` (legacy) if it exists and `.openclaw` does not
120
+ * 5. `<home>/.openclaw` as the synthesized default
121
+ *
122
+ * Honors `OPENCLAW_TEST_FAST=1` to skip the existence checks (mirrors
123
+ * OpenClaw's behavior at `paths.ts:70-72`).
124
+ *
125
+ * Codex Turn N+4 fix: prior version skipped step 4 entirely. Upgraded-but-
126
+ * not-migrated installs would then point at a synthesized `.openclaw` path
127
+ * that doesn't exist, while OpenClaw itself runs from `.clawdbot`.
128
+ */
129
+ export function resolveOpenClawStateDir(
130
+ env: NodeJS.ProcessEnv = process.env,
131
+ home: () => string = homedir,
132
+ ): string {
133
+ const stateOverride = env.OPENCLAW_STATE_DIR?.trim();
134
+ if (stateOverride) {
135
+ return resolveHomeRelative(stateOverride, home);
136
+ }
137
+
138
+ // OPENCLAW_HOME replaces the homedir entirely (mirrors
139
+ // `openclaw/src/infra/home-dir.ts:resolveRawHomeDir`). The state dir
140
+ // then becomes `<OPENCLAW_HOME>/.openclaw`.
141
+ const effectiveHome = (() => {
142
+ const homeOverride = env.OPENCLAW_HOME?.trim();
143
+ if (homeOverride) return resolveHomeRelative(homeOverride, home);
144
+ return home();
145
+ })();
146
+
147
+ const newDir = join(effectiveHome, NEW_STATE_DIRNAME);
148
+
149
+ if (env.OPENCLAW_TEST_FAST === "1") {
150
+ return newDir;
151
+ }
152
+
153
+ // Prefer the new dir when it exists.
154
+ try {
155
+ if (existsSync(newDir)) return newDir;
156
+ } catch {
157
+ // Fall through to legacy detection
158
+ }
159
+
160
+ // Legacy fallback: check each legacy name in order; first existing wins.
161
+ for (const legacyName of LEGACY_STATE_DIRNAMES) {
162
+ const legacyDir = join(effectiveHome, legacyName);
163
+ try {
164
+ if (existsSync(legacyDir)) return legacyDir;
165
+ } catch {
166
+ // Ignore and continue
167
+ }
168
+ }
169
+
170
+ // Synthesized default — OpenClaw bootstraps the new dir on first run.
171
+ return newDir;
172
+ }
173
+
174
+ function resolveHomeRelative(input: string, home: () => string): string {
175
+ if (input === "~") return home();
176
+ if (input.startsWith("~/")) return join(home(), input.slice(2));
177
+ return resolve(input);
178
+ }
179
+
180
+ /**
181
+ * Minimal SessionEntry shape ClawMem cares about. The full type lives in
182
+ * `openclaw/src/config/sessions/types.ts:111` and has many more fields,
183
+ * but the resolver only needs `sessionId` and `sessionFile`. Treating
184
+ * the rest of the JSON as `unknown` keeps ClawMem decoupled from
185
+ * OpenClaw's internal session model.
186
+ */
187
+ type MinimalSessionEntry = {
188
+ sessionId?: unknown;
189
+ sessionFile?: unknown;
190
+ [key: string]: unknown;
191
+ };
192
+
193
+ /**
194
+ * Read `<sessionsDir>/sessions.json` and look up the entry whose
195
+ * `sessionFile` ClawMem should use. Mirrors OpenClaw's authoritative
196
+ * source of truth at `openclaw/src/config/sessions/store-read.ts:10`.
197
+ *
198
+ * Resolution order:
199
+ * 1. Exact key match `store[sessionKey]` (when caller has the full
200
+ * session-store key like `agent:main:abc123`)
201
+ * 2. Scan entries for one whose `entry.sessionId === sessionId`
202
+ *
203
+ * Returns the resolved `sessionFile` path (made absolute against
204
+ * `sessionsDir` when stored as a relative basename) or undefined when:
205
+ * - sessions.json does not exist or fails to parse
206
+ * - no matching entry has a `sessionFile` field
207
+ *
208
+ * Fail-open: never throws. Codex Turn N+5 fix — sessions.json is the
209
+ * only authoritative way to disambiguate between base and topic-scoped
210
+ * transcript files when both exist for the same sessionId.
211
+ */
212
+ function lookupSessionFileFromStore(params: {
213
+ sessionsDir: string;
214
+ sessionId: string;
215
+ sessionKey?: string;
216
+ }): string | undefined {
217
+ const storePath = join(params.sessionsDir, "sessions.json");
218
+ let raw: string;
219
+ try {
220
+ raw = readFileSync(storePath, "utf-8");
221
+ } catch {
222
+ return undefined;
223
+ }
224
+ if (!raw.trim()) return undefined;
225
+
226
+ let parsed: unknown;
227
+ try {
228
+ parsed = JSON.parse(raw);
229
+ } catch {
230
+ return undefined;
231
+ }
232
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
233
+ return undefined;
234
+ }
235
+ const store = parsed as Record<string, unknown>;
236
+
237
+ const resolveEntryFile = (entry: MinimalSessionEntry | undefined): string | undefined => {
238
+ const file = entry?.sessionFile;
239
+ if (typeof file !== "string" || !file.trim()) return undefined;
240
+ const trimmed = file.trim();
241
+ return isAbsolute(trimmed) ? trimmed : join(params.sessionsDir, trimmed);
242
+ };
243
+
244
+ // 1. Exact sessionKey match
245
+ if (params.sessionKey) {
246
+ const directEntry = store[params.sessionKey];
247
+ if (directEntry && typeof directEntry === "object" && !Array.isArray(directEntry)) {
248
+ const candidate = resolveEntryFile(directEntry as MinimalSessionEntry);
249
+ if (candidate) return candidate;
250
+ }
251
+ }
252
+
253
+ // 2. Scan for entry whose sessionId matches
254
+ for (const value of Object.values(store)) {
255
+ if (!value || typeof value !== "object" || Array.isArray(value)) continue;
256
+ const entry = value as MinimalSessionEntry;
257
+ if (typeof entry.sessionId === "string" && entry.sessionId === params.sessionId) {
258
+ const candidate = resolveEntryFile(entry);
259
+ if (candidate) return candidate;
260
+ }
261
+ }
262
+
263
+ return undefined;
264
+ }
265
+
266
+ /**
267
+ * Build the session-file basename. Mirrors the filename-construction
268
+ * branch in `openclaw/src/config/sessions/paths.ts:248-251`:
269
+ *
270
+ * - no topicId: `<sessionId>.jsonl`
271
+ * - topicId (string): `<sessionId>-topic-<encodeURIComponent(topicId)>.jsonl`
272
+ * - topicId (number): `<sessionId>-topic-<topicId>.jsonl`
273
+ */
274
+ function buildTranscriptFileName(sessionId: string, topicId?: string | number): string {
275
+ if (topicId === undefined || topicId === null || topicId === "") {
276
+ return `${sessionId}.jsonl`;
277
+ }
278
+ const encoded =
279
+ typeof topicId === "string" ? encodeURIComponent(topicId) : String(topicId);
280
+ return `${sessionId}-topic-${encoded}.jsonl`;
281
+ }
282
+
283
+ /**
284
+ * Derive the canonical OpenClaw session transcript path for a given
285
+ * (sessionId, agentId, sessionKey, topicId) tuple. Mirrors
286
+ * `openclaw/src/config/sessions/paths.ts:resolveSessionTranscriptPathInDir`
287
+ * including the topic-id filename branch and `resolveSessionFilePath`'s
288
+ * `entry.sessionFile` lookup against `sessions.json`.
289
+ *
290
+ * Resolution order (Codex Turn N+5 fix):
291
+ * 1. **Authoritative source-of-truth**: read `sessions.json` and look
292
+ * up `entry.sessionFile` by sessionKey (exact match) or by scanning
293
+ * for an entry whose `sessionId` matches. This is the same path
294
+ * OpenClaw uses internally at
295
+ * `openclaw/src/config/sessions/paths.ts:resolveSessionFilePath` —
296
+ * the entry's sessionFile is the truth, regardless of whether the
297
+ * transcript is base or topic-scoped.
298
+ * 2. If `params.topicId` is provided explicitly, use it:
299
+ * `<sessionId>-topic-<encodeURIComponent(topicId)>.jsonl`
300
+ * 3. Try the base filename `<sessionId>.jsonl` if AND ONLY IF no
301
+ * topic-scoped variants coexist. This prevents ClawMem from
302
+ * silently picking the wrong transcript when both base and topic
303
+ * files exist for the same sessionId without sessions.json metadata.
304
+ * 4. If only topic-scoped variants exist, return the single match
305
+ * (unambiguous). Two or more topic variants without metadata is
306
+ * ambiguous → fail-open.
307
+ *
308
+ * Returns undefined when:
309
+ * - sessionId is missing or fails the SAFE_SESSION_ID_RE check
310
+ * - none of the resolution steps find a file on disk
311
+ * - the filesystem fallback is ambiguous (base + topic coexist, or
312
+ * multiple topic variants exist) and sessions.json could not
313
+ * disambiguate
314
+ *
315
+ * Fail-open: never throws. Each filesystem check is wrapped in a
316
+ * try/catch and ignored.
317
+ */
318
+ export function resolveOpenClawSessionFile(params: {
319
+ sessionId?: string;
320
+ agentId?: string;
321
+ sessionKey?: string;
322
+ topicId?: string | number;
323
+ env?: NodeJS.ProcessEnv;
324
+ }): string | undefined {
325
+ const sessionId = params.sessionId?.trim();
326
+ if (!sessionId) return undefined;
327
+ if (!SAFE_SESSION_ID_RE.test(sessionId)) return undefined;
328
+
329
+ // Use the full normalizeAgentId mirror — NOT a simple .toLowerCase().
330
+ const agentId = normalizeAgentId(params.agentId);
331
+
332
+ let stateDir: string;
333
+ try {
334
+ stateDir = resolveOpenClawStateDir(params.env);
335
+ } catch {
336
+ return undefined;
337
+ }
338
+
339
+ const sessionsDir = join(stateDir, "agents", agentId, "sessions");
340
+
341
+ // 1. AUTHORITATIVE: sessions.json lookup. This is OpenClaw's source
342
+ // of truth for which transcript file is active for a given session.
343
+ const fromStore = lookupSessionFileFromStore({
344
+ sessionsDir,
345
+ sessionId,
346
+ sessionKey: params.sessionKey,
347
+ });
348
+ if (fromStore) {
349
+ try {
350
+ if (existsSync(fromStore)) return fromStore;
351
+ } catch {
352
+ // Fall through to filesystem fallback
353
+ }
354
+ }
355
+
356
+ // 2. Explicit topic-id from caller wins (rare — typed-hook events
357
+ // don't carry topicId, but the API surface still supports it for
358
+ // tests and future callers).
359
+ if (params.topicId !== undefined && params.topicId !== null && params.topicId !== "") {
360
+ const explicitTopic = join(
361
+ sessionsDir,
362
+ buildTranscriptFileName(sessionId, params.topicId),
363
+ );
364
+ try {
365
+ if (existsSync(explicitTopic)) return explicitTopic;
366
+ } catch {
367
+ // Fall through
368
+ }
369
+ // Don't fall back to the base filename if caller asked for a specific
370
+ // topic — they explicitly want THAT file or nothing.
371
+ return undefined;
372
+ }
373
+
374
+ // 3 + 4. Filesystem fallback when sessions.json could not resolve.
375
+ // We need to enumerate the sessions dir to detect the base+topic
376
+ // coexistence case (Codex Turn N+5 finding) before deciding which
377
+ // file to return.
378
+ let baseExists = false;
379
+ try {
380
+ baseExists = existsSync(join(sessionsDir, `${sessionId}.jsonl`));
381
+ } catch {
382
+ baseExists = false;
383
+ }
384
+
385
+ let topicMatches: string[] = [];
386
+ try {
387
+ const topicPrefix = `${sessionId}-topic-`;
388
+ const entries = readdirSync(sessionsDir);
389
+ topicMatches = entries.filter(
390
+ (name) => name.startsWith(topicPrefix) && name.endsWith(".jsonl"),
391
+ );
392
+ } catch {
393
+ topicMatches = [];
394
+ }
395
+
396
+ // Codex Turn N+5 fix: when BOTH base and topic variants exist, the
397
+ // resolver cannot tell which is the active transcript without
398
+ // sessions.json metadata. Fail-open instead of preferring base.
399
+ if (baseExists && topicMatches.length > 0) {
400
+ return undefined;
401
+ }
402
+
403
+ // Base only → return base
404
+ if (baseExists && topicMatches.length === 0) {
405
+ return join(sessionsDir, `${sessionId}.jsonl`);
406
+ }
407
+
408
+ // Single topic-scoped variant → unambiguous, return it
409
+ if (!baseExists && topicMatches.length === 1) {
410
+ return join(sessionsDir, topicMatches[0]!);
411
+ }
412
+
413
+ // 0 matches → no transcript exists yet (new session)
414
+ // 2+ topic variants without base → ambiguous, fail-open
415
+ return undefined;
416
+ }
417
+
418
+ /**
419
+ * Pure-function variant of `resolveOpenClawSessionFile` that skips the
420
+ * filesystem existence check. Useful for unit tests that want to verify
421
+ * the path-construction logic without setting up a real OpenClaw state
422
+ * tree on disk.
423
+ */
424
+ export function buildOpenClawSessionFilePath(params: {
425
+ sessionId: string;
426
+ agentId?: string;
427
+ topicId?: string | number;
428
+ env?: NodeJS.ProcessEnv;
429
+ }): string | undefined {
430
+ const sessionId = params.sessionId?.trim();
431
+ if (!sessionId || !SAFE_SESSION_ID_RE.test(sessionId)) return undefined;
432
+ const agentId = normalizeAgentId(params.agentId);
433
+ let stateDir: string;
434
+ try {
435
+ stateDir = resolveOpenClawStateDir(params.env);
436
+ } catch {
437
+ return undefined;
438
+ }
439
+ const fileName = buildTranscriptFileName(sessionId, params.topicId);
440
+ return join(stateDir, "agents", agentId, "sessions", fileName);
441
+ }
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Session-Scoped Focus (§11.4 — v0.9.0)
3
+ *
4
+ * Per-session topic primitive that biases context-surfacing ranking toward
5
+ * docs relevant to the declared working context — WITHOUT persisting any
6
+ * state to SQLite. Intra-session curation that cannot contaminate other
7
+ * sessions.
8
+ *
9
+ * Primary signal: per-session state file at
10
+ * ~/.cache/clawmem/sessions/<session_id>.focus
11
+ *
12
+ * The env var CLAWMEM_SESSION_FOCUS is a DEBUG-ONLY override: it bypasses
13
+ * the per-session file entirely, and because it is a single process-wide
14
+ * variable it does NOT provide per-session scoping in multi-session host
15
+ * processes (e.g. a long-lived MCP server handling multiple Claude Code
16
+ * sessions). Use the file path for correctness; use the env var for
17
+ * ad-hoc single-session debugging only.
18
+ *
19
+ * All read paths are fail-open. Unreadable, corrupt, empty, missing,
20
+ * invalid-UTF-8, or oversized focus files return undefined and the
21
+ * caller proceeds with baseline ranking (byte-identical to pre-§11.4).
22
+ * The stage must NEVER half-apply a malformed topic.
23
+ */
24
+
25
+ import * as fs from "fs";
26
+ import * as path from "path";
27
+ import * as os from "os";
28
+ import type { ScoredResult } from "./memory.ts";
29
+
30
+ const MAX_TOPIC_LEN = 256;
31
+
32
+ /**
33
+ * Resolve the root directory for session focus files. Defaults to
34
+ * `~/.cache/clawmem/sessions`, overridable via `CLAWMEM_FOCUS_ROOT`.
35
+ * The override is primarily a test hook (so `bun:test` can redirect
36
+ * writes to a tmp dir) but is also safe to use in production if an
37
+ * operator wants to relocate the focus files out of `$HOME`.
38
+ *
39
+ * Computed lazily on every call so env-var changes in tests take
40
+ * effect without module reload.
41
+ */
42
+ export function focusRoot(): string {
43
+ const override = process.env.CLAWMEM_FOCUS_ROOT;
44
+ if (override && override.trim().length > 0) return override;
45
+ return path.join(os.homedir(), ".cache", "clawmem", "sessions");
46
+ }
47
+
48
+ export function focusFilePath(sessionId: string): string {
49
+ return path.join(focusRoot(), `${sessionId}.focus`);
50
+ }
51
+
52
+ /**
53
+ * Read the session focus topic. Returns undefined on any failure:
54
+ * - sessionId missing/empty
55
+ * - file does not exist
56
+ * - file unreadable (permissions, etc.)
57
+ * - file empty or whitespace-only
58
+ * - file exceeds MAX_TOPIC_LEN
59
+ * - file contains invalid UTF-8 (readFileSync throws)
60
+ *
61
+ * Never throws. Caller treats undefined as "no topic set" and skips
62
+ * the boost stage entirely.
63
+ */
64
+ export function readSessionFocus(sessionId?: string): string | undefined {
65
+ if (!sessionId) return undefined;
66
+ try {
67
+ const p = focusFilePath(sessionId);
68
+ if (!fs.existsSync(p)) return undefined;
69
+ const raw = fs.readFileSync(p, { encoding: "utf-8" });
70
+ const topic = raw.trim();
71
+ if (!topic) return undefined;
72
+ if (topic.length > MAX_TOPIC_LEN) return undefined;
73
+ return topic;
74
+ } catch {
75
+ return undefined;
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Write a session focus topic. Creates the sessions directory if needed.
81
+ * Overwrites any existing file. Throws on invalid input or I/O errors
82
+ * (caller surface — CLI command that should fail loudly on misuse).
83
+ */
84
+ export function writeSessionFocus(sessionId: string, topic: string): void {
85
+ if (!sessionId || !sessionId.trim()) {
86
+ throw new Error("writeSessionFocus: sessionId required");
87
+ }
88
+ const trimmed = topic.trim();
89
+ if (!trimmed) {
90
+ throw new Error("writeSessionFocus: topic required");
91
+ }
92
+ if (trimmed.length > MAX_TOPIC_LEN) {
93
+ throw new Error(`writeSessionFocus: topic exceeds max length ${MAX_TOPIC_LEN}`);
94
+ }
95
+ fs.mkdirSync(focusRoot(), { recursive: true });
96
+ fs.writeFileSync(focusFilePath(sessionId), trimmed, { encoding: "utf-8" });
97
+ }
98
+
99
+ /**
100
+ * Clear a session focus. No-op if the file does not exist.
101
+ * Never throws (caller is typically "revert ranking to baseline").
102
+ */
103
+ export function clearSessionFocus(sessionId: string): void {
104
+ if (!sessionId) return;
105
+ try {
106
+ const p = focusFilePath(sessionId);
107
+ if (fs.existsSync(p)) fs.unlinkSync(p);
108
+ } catch {
109
+ /* ignore — clearing is best-effort */
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Resolve the effective session focus topic by checking the per-session
115
+ * focus file first, then falling back to a provided env-var value (the
116
+ * CLAWMEM_SESSION_FOCUS debug override). Returns undefined when neither
117
+ * yields a valid topic.
118
+ *
119
+ * Precedence is file > env var because the file is the only signal
120
+ * that provides per-session scoping on multi-session host processes.
121
+ * Exposed here (rather than inlined at the call site) so the hook's
122
+ * precedence logic can be unit-tested directly without spinning up a
123
+ * full contextSurfacing invocation.
124
+ *
125
+ * Never throws. Never logs. Every failure path returns undefined and
126
+ * the caller treats that as "no topic set" (byte-identical to
127
+ * pre-§11.4 hook behavior).
128
+ */
129
+ export function resolveSessionTopic(
130
+ sessionId: string | undefined,
131
+ envVar: string | undefined
132
+ ): string | undefined {
133
+ const fromFile = readSessionFocus(sessionId);
134
+ if (fromFile) return fromFile;
135
+ const fromEnv = envVar?.trim();
136
+ if (fromEnv) return fromEnv;
137
+ return undefined;
138
+ }
139
+
140
+ /**
141
+ * Case-insensitive tokenized AND-match against title + displayPath + body.
142
+ * Tokens shorter than 2 chars are dropped (common stopwords and typos).
143
+ * Returns true only if every remaining token appears in the haystack.
144
+ */
145
+ function matchesTopic(result: ScoredResult, topic: string): boolean {
146
+ const tokens = topic
147
+ .toLowerCase()
148
+ .split(/\s+/)
149
+ .map(t => t.trim())
150
+ .filter(t => t.length >= 2);
151
+ if (tokens.length === 0) return false;
152
+
153
+ const haystack = [
154
+ result.title || "",
155
+ result.displayPath || "",
156
+ (result.body || "").slice(0, 800),
157
+ ]
158
+ .join(" ")
159
+ .toLowerCase();
160
+
161
+ return tokens.every(t => haystack.includes(t));
162
+ }
163
+
164
+ export interface TopicBoostOptions {
165
+ /** Multiplier applied to docs whose title/path/body match all topic tokens. Default 1.4. */
166
+ boostFactor?: number;
167
+ /**
168
+ * Multiplier applied to non-matching docs. Default 0.75.
169
+ * Clamped to a 0.5 floor so the boost is a re-ranker, not a hide —
170
+ * non-matching docs are demoted but never suppressed to zero.
171
+ */
172
+ demoteFactor?: number;
173
+ }
174
+
175
+ /**
176
+ * Apply session-topic boost/demote to a scored result set as a POST-COMPOSITE
177
+ * reranking pass. Runs AFTER applyCompositeScoring(...) and BEFORE threshold
178
+ * filtering (the specific architectural placement Codex approved in Turn 1 of
179
+ * the v0.9.0 design review).
180
+ *
181
+ * Behavior:
182
+ * - Empty/undefined topic → returns input unchanged (no-op, byte-identical).
183
+ * - Topic present but ZERO docs match → returns input unchanged (no-op).
184
+ * This is the fail-open contract from the approved §11.4 spec: "topic
185
+ * set + zero matching docs → proceed with the normal results." Without
186
+ * this short-circuit, uniformly demoting every doc would push some
187
+ * below the downstream threshold filter and silently shrink the
188
+ * result set — a regression vs the no-topic baseline.
189
+ * (Caught by Codex in §11.4 code review Turn 1, 2026-04-13.)
190
+ * - Topic present AND at least one match → each result's compositeScore
191
+ * is multiplied by either boostFactor (matching) or demoteFactor
192
+ * (non-matching), then results are re-sorted descending.
193
+ *
194
+ * Matching is computed exactly once per result in a pre-pass so the
195
+ * short-circuit can decide without double-evaluating the token match.
196
+ *
197
+ * This is a pure function over the scored set — it does NOT call the DB,
198
+ * does NOT write SQLite state, does NOT touch any lifecycle column.
199
+ * Mutates compositeScore in place (consistent with existing scoring
200
+ * helpers in this codebase; single caller, single thread).
201
+ */
202
+ export function applyTopicBoost<T extends ScoredResult>(
203
+ scored: T[],
204
+ topic: string | undefined,
205
+ options: TopicBoostOptions = {}
206
+ ): T[] {
207
+ if (!topic || !topic.trim()) return scored;
208
+ if (scored.length === 0) return scored;
209
+
210
+ const boostFactor = options.boostFactor ?? 1.4;
211
+ const demoteFactor = Math.max(options.demoteFactor ?? 0.75, 0.5);
212
+
213
+ // Pre-compute per-result match flags so we can early-return on zero
214
+ // matches without double-evaluating matchesTopic during the mutation
215
+ // pass. Caching is also a (small) perf win for any single call.
216
+ const matches = scored.map(r => matchesTopic(r, topic));
217
+ const anyMatch = matches.some(Boolean);
218
+ if (!anyMatch) return scored; // fail-open: baseline ordering preserved
219
+
220
+ for (let i = 0; i < scored.length; i++) {
221
+ const factor = matches[i] ? boostFactor : demoteFactor;
222
+ scored[i]!.compositeScore = scored[i]!.compositeScore * factor;
223
+ }
224
+
225
+ scored.sort((a, b) => b.compositeScore - a.compositeScore);
226
+ return scored;
227
+ }
package/src/store.ts CHANGED
@@ -711,6 +711,11 @@ function initializeDatabase(db: Database): void {
711
711
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_type ON entity_nodes(entity_type)`);
712
712
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_vault ON entity_nodes(vault)`);
713
713
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_mentions ON entity_nodes(mention_count DESC)`);
714
+ // §11.1 (v0.9.0): expression index backing the `LOWER(name) IN (...) AND vault = ?`
715
+ // batch lookup used by the context-surfacing entity-detection hot path.
716
+ // Without this index the batch query devolves into a full scan on large vaults.
717
+ // Idempotent via IF NOT EXISTS — existing vaults pick it up on next open.
718
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_lower_name ON entity_nodes(LOWER(name), vault)`);
714
719
 
715
720
  // Entity mentions: entity ↔ document junction table
716
721
  db.exec(`