clawmem 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/observer.ts CHANGED
@@ -22,6 +22,13 @@ export type Observation = {
22
22
  concepts: string[];
23
23
  filesRead: string[];
24
24
  filesModified: string[];
25
+ triples?: ParsedTriple[];
26
+ };
27
+
28
+ export type ParsedTriple = {
29
+ subject: string;
30
+ predicate: string;
31
+ object: string;
25
32
  };
26
33
 
27
34
  export type SessionSummary = {
@@ -48,28 +55,54 @@ const GENERATION_TEMPERATURE = 0.3;
48
55
  // =============================================================================
49
56
 
50
57
  const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding session transcript. Extract structured observations.
51
- For each significant action, decision, or discovery, output an <observation> XML element.
58
+ For each significant action, decision, or discovery, output an <observation> XML element with the structure below.
52
59
 
60
+ Structure:
53
61
  <observation>
54
- <type>one of: decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem</type>
55
- <title>Brief descriptive title (max 80 chars)</title>
62
+ <type>...</type>
63
+ <title>...</title>
56
64
  <facts>
57
- <fact>Individual atomic fact</fact>
65
+ <fact>...</fact>
58
66
  </facts>
59
- <narrative>2-3 sentences explaining context and reasoning</narrative>
67
+ <triples>
68
+ <triple>
69
+ <subject>...</subject>
70
+ <predicate>...</predicate>
71
+ <object>...</object>
72
+ </triple>
73
+ </triples>
74
+ <narrative>...</narrative>
60
75
  <concepts>
61
- <concept>one of: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off</concept>
76
+ <concept>...</concept>
62
77
  </concepts>
63
- <files_read><file>path/to/file</file></files_read>
64
- <files_modified><file>path/to/file</file></files_modified>
78
+ <files_read><file>...</file></files_read>
79
+ <files_modified><file>...</file></files_modified>
65
80
  </observation>
66
81
 
67
- Rules:
82
+ Field rules:
83
+ - <type>: one of decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem
84
+ - <title>: brief descriptive title, max 80 chars
85
+ - <facts>: 1-5 <fact> elements, each a standalone atomic claim about what happened or what is true (concrete, specific, no schema placeholders or template text)
86
+ - <triples>: 0-3 <triple> elements for structural relationships between named entities (see predicate vocabulary below). Omit entirely if no relational claims apply. Do NOT emit triples for descriptive facts — only for explicit S-P-O relations.
87
+ - <narrative>: 2-3 sentences explaining WHY something was done, not just WHAT
88
+ - <concepts>: 0-3 <concept> elements from: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off
89
+ - <files_read>, <files_modified>: only files explicitly mentioned in the transcript
90
+
91
+ Predicate vocabulary (use EXACTLY these predicates in <predicate>, nothing else):
92
+ - adopted, migrated_to — switching to a new tool/framework/approach
93
+ - deployed_to, runs_on — where something runs
94
+ - replaced — when one thing supersedes another
95
+ - depends_on, integrates_with, uses — structural dependencies
96
+ - prefers, avoids — user preferences (use for <subject>user</subject>)
97
+ - caused_by, resolved_by — causal relationships between problems and fixes
98
+ - owned_by — responsibility / ownership
99
+
100
+ <subject> and <object> must be short canonical entity names (2-80 chars). No sentences. No placeholder text. If you cannot fit a claim into this vocabulary, keep it in <facts> instead and omit the triple.
101
+
102
+ Observation rules:
68
103
  - Output 1-5 observations, focusing on the MOST significant events
69
- - Each fact should be a standalone, atomic piece of information
70
- - The narrative should explain WHY something was done, not just WHAT
71
- - Only include files that were explicitly mentioned in the transcript
72
104
  - If no significant observations, output nothing
105
+ - Never use schema example text or template placeholders in <fact>, <subject>, or <object> — emit only real content extracted from the transcript
73
106
 
74
107
  Type guidance:
75
108
  - preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
@@ -131,6 +164,47 @@ const VALID_CONCEPTS = new Set([
131
164
  "gotcha", "pattern", "trade-off",
132
165
  ]);
133
166
 
167
+ // Canonical SPO predicate vocabulary — parser rejects anything outside this set.
168
+ // Must stay in sync with the predicate list in OBSERVATION_SYSTEM_PROMPT.
169
+ export const VALID_PREDICATES = new Set([
170
+ "adopted", "migrated_to",
171
+ "deployed_to", "runs_on",
172
+ "replaced",
173
+ "depends_on", "integrates_with", "uses",
174
+ "prefers", "avoids",
175
+ "caused_by", "resolved_by",
176
+ "owned_by",
177
+ ]);
178
+
179
+ // Predicates whose <object> should be stored as a literal (not resolved to an entity).
180
+ export const LITERAL_PREDICATES = new Set(["prefers", "avoids"]);
181
+
182
+ // Exact placeholder strings that must never be persisted as facts or triple components.
183
+ // Defense-in-depth: even though the prompt no longer places example text inside
184
+ // <fact>/<subject>/<object> tags, a weak model could still echo these phrases.
185
+ const SCHEMA_PLACEHOLDER_STRINGS = new Set([
186
+ "individual atomic fact",
187
+ "atomic fact",
188
+ "one atomic claim per fact element",
189
+ "brief descriptive title",
190
+ "canonical entity name",
191
+ ]);
192
+
193
+ // Regex for template placeholder markers: {{...}}, <!--...-->, ${...}.
194
+ // Intentionally narrow — earlier drafts rejected any line starting with
195
+ // "example:" / "placeholder:", which false-positived legitimate facts like
196
+ // "Example: QMD switched to Bun in v0.2". Shape-only matching avoids that
197
+ // drift; the exact-string blocklist above handles known echoed placeholders.
198
+ const PLACEHOLDER_REGEX = /^(\{\{.*\}\}|<!--.*-->|\$\{.*\})/;
199
+
200
+ function isSchemaPlaceholder(text: string): boolean {
201
+ if (!text) return true;
202
+ const normalized = text.trim().toLowerCase();
203
+ if (SCHEMA_PLACEHOLDER_STRINGS.has(normalized)) return true;
204
+ if (PLACEHOLDER_REGEX.test(normalized)) return true;
205
+ return false;
206
+ }
207
+
134
208
  export function parseObservationXml(xml: string): Observation | null {
135
209
  const typeMatch = xml.match(/<type>\s*(.*?)\s*<\/type>/s);
136
210
  const titleMatch = xml.match(/<title>\s*(.*?)\s*<\/title>/s);
@@ -141,24 +215,67 @@ export function parseObservationXml(xml: string): Observation | null {
141
215
  const type = typeMatch[1].trim().toLowerCase();
142
216
  if (!VALID_OBSERVATION_TYPES.has(type)) return null;
143
217
 
144
- const facts = extractMultiple(xml, "fact");
218
+ const rawTitle = titleMatch[1].trim();
219
+ if (isSchemaPlaceholder(rawTitle)) return null;
220
+
221
+ const facts = extractMultiple(xml, "fact")
222
+ .filter(f => f.length >= 5)
223
+ .filter(f => !isSchemaPlaceholder(f));
224
+
145
225
  const concepts = extractMultiple(xml, "concept")
146
226
  .filter(c => VALID_CONCEPTS.has(c.toLowerCase()))
147
227
  .map(c => c.toLowerCase());
148
228
  const filesRead = extractMultiple(xml, "file", "files_read");
149
229
  const filesModified = extractMultiple(xml, "file", "files_modified");
150
230
 
231
+ // Parse triples (Fix A): strict validation against canonical predicate vocabulary.
232
+ // Missing/malformed triples are silently dropped — fail-closed on ambiguity.
233
+ const triples = extractTriples(xml);
234
+
151
235
  return {
152
236
  type: type as Observation["type"],
153
- title: titleMatch[1].trim().slice(0, 80),
154
- facts: facts.filter(f => f.length >= 5),
237
+ title: rawTitle.slice(0, 80),
238
+ facts,
155
239
  narrative: narrativeMatch?.[1]?.trim() || "",
156
240
  concepts,
157
241
  filesRead,
158
242
  filesModified,
243
+ triples: triples.length > 0 ? triples : undefined,
159
244
  };
160
245
  }
161
246
 
247
+ function extractTriples(xml: string): ParsedTriple[] {
248
+ const parentMatch = xml.match(/<triples>([\s\S]*?)<\/triples>/s);
249
+ if (!parentMatch?.[1]) return [];
250
+
251
+ const blockRegex = /<triple>([\s\S]*?)<\/triple>/g;
252
+ const results: ParsedTriple[] = [];
253
+ let match;
254
+ while ((match = blockRegex.exec(parentMatch[1])) !== null) {
255
+ const block = match[1] ?? "";
256
+ const subject = block.match(/<subject>\s*(.*?)\s*<\/subject>/s)?.[1]?.trim();
257
+ const rawPredicate = block.match(/<predicate>\s*(.*?)\s*<\/predicate>/s)?.[1]?.trim();
258
+ const object = block.match(/<object>\s*(.*?)\s*<\/object>/s)?.[1]?.trim();
259
+
260
+ if (!subject || !rawPredicate || !object) continue;
261
+
262
+ const predicate = rawPredicate.toLowerCase().replace(/\s+/g, "_");
263
+ if (!VALID_PREDICATES.has(predicate)) continue;
264
+
265
+ // Length bounds — guards against sentence-shaped subjects/objects that the
266
+ // regex-era tests expected. Subject and object should be short canonical names.
267
+ if (subject.length < 2 || subject.length > 80) continue;
268
+ if (object.length < 2 || object.length > 120) continue;
269
+
270
+ if (isSchemaPlaceholder(subject) || isSchemaPlaceholder(object)) continue;
271
+
272
+ results.push({ subject, predicate, object });
273
+
274
+ if (results.length >= 5) break; // cap per observation
275
+ }
276
+ return results;
277
+ }
278
+
162
279
  export function parseSummaryXml(xml: string): SessionSummary | null {
163
280
  const request = extractSingle(xml, "request");
164
281
  const investigated = extractSingle(xml, "investigated");
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Session-Scoped Focus (§11.4 — v0.9.0)
3
+ *
4
+ * Per-session topic primitive that biases context-surfacing ranking toward
5
+ * docs relevant to the declared working context — WITHOUT persisting any
6
+ * state to SQLite. Intra-session curation that cannot contaminate other
7
+ * sessions.
8
+ *
9
+ * Primary signal: per-session state file at
10
+ * ~/.cache/clawmem/sessions/<session_id>.focus
11
+ *
12
+ * The env var CLAWMEM_SESSION_FOCUS is a DEBUG-ONLY override: it bypasses
13
+ * the per-session file entirely, and because it is a single process-wide
14
+ * variable it does NOT provide per-session scoping in multi-session host
15
+ * processes (e.g. a long-lived MCP server handling multiple Claude Code
16
+ * sessions). Use the file path for correctness; use the env var for
17
+ * ad-hoc single-session debugging only.
18
+ *
19
+ * All read paths are fail-open. Unreadable, corrupt, empty, missing,
20
+ * invalid-UTF-8, or oversized focus files return undefined and the
21
+ * caller proceeds with baseline ranking (byte-identical to pre-§11.4).
22
+ * The stage must NEVER half-apply a malformed topic.
23
+ */
24
+
25
+ import * as fs from "fs";
26
+ import * as path from "path";
27
+ import * as os from "os";
28
+ import type { ScoredResult } from "./memory.ts";
29
+
30
+ const MAX_TOPIC_LEN = 256;
31
+
32
+ /**
33
+ * Resolve the root directory for session focus files. Defaults to
34
+ * `~/.cache/clawmem/sessions`, overridable via `CLAWMEM_FOCUS_ROOT`.
35
+ * The override is primarily a test hook (so `bun:test` can redirect
36
+ * writes to a tmp dir) but is also safe to use in production if an
37
+ * operator wants to relocate the focus files out of `$HOME`.
38
+ *
39
+ * Computed lazily on every call so env-var changes in tests take
40
+ * effect without module reload.
41
+ */
42
+ export function focusRoot(): string {
43
+ const override = process.env.CLAWMEM_FOCUS_ROOT;
44
+ if (override && override.trim().length > 0) return override;
45
+ return path.join(os.homedir(), ".cache", "clawmem", "sessions");
46
+ }
47
+
48
+ export function focusFilePath(sessionId: string): string {
49
+ return path.join(focusRoot(), `${sessionId}.focus`);
50
+ }
51
+
52
+ /**
53
+ * Read the session focus topic. Returns undefined on any failure:
54
+ * - sessionId missing/empty
55
+ * - file does not exist
56
+ * - file unreadable (permissions, etc.)
57
+ * - file empty or whitespace-only
58
+ * - file exceeds MAX_TOPIC_LEN
59
+ * - file contains invalid UTF-8 (readFileSync throws)
60
+ *
61
+ * Never throws. Caller treats undefined as "no topic set" and skips
62
+ * the boost stage entirely.
63
+ */
64
+ export function readSessionFocus(sessionId?: string): string | undefined {
65
+ if (!sessionId) return undefined;
66
+ try {
67
+ const p = focusFilePath(sessionId);
68
+ if (!fs.existsSync(p)) return undefined;
69
+ const raw = fs.readFileSync(p, { encoding: "utf-8" });
70
+ const topic = raw.trim();
71
+ if (!topic) return undefined;
72
+ if (topic.length > MAX_TOPIC_LEN) return undefined;
73
+ return topic;
74
+ } catch {
75
+ return undefined;
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Write a session focus topic. Creates the sessions directory if needed.
81
+ * Overwrites any existing file. Throws on invalid input or I/O errors
82
+ * (caller surface — CLI command that should fail loudly on misuse).
83
+ */
84
+ export function writeSessionFocus(sessionId: string, topic: string): void {
85
+ if (!sessionId || !sessionId.trim()) {
86
+ throw new Error("writeSessionFocus: sessionId required");
87
+ }
88
+ const trimmed = topic.trim();
89
+ if (!trimmed) {
90
+ throw new Error("writeSessionFocus: topic required");
91
+ }
92
+ if (trimmed.length > MAX_TOPIC_LEN) {
93
+ throw new Error(`writeSessionFocus: topic exceeds max length ${MAX_TOPIC_LEN}`);
94
+ }
95
+ fs.mkdirSync(focusRoot(), { recursive: true });
96
+ fs.writeFileSync(focusFilePath(sessionId), trimmed, { encoding: "utf-8" });
97
+ }
98
+
99
+ /**
100
+ * Clear a session focus. No-op if the file does not exist.
101
+ * Never throws (caller is typically "revert ranking to baseline").
102
+ */
103
+ export function clearSessionFocus(sessionId: string): void {
104
+ if (!sessionId) return;
105
+ try {
106
+ const p = focusFilePath(sessionId);
107
+ if (fs.existsSync(p)) fs.unlinkSync(p);
108
+ } catch {
109
+ /* ignore — clearing is best-effort */
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Resolve the effective session focus topic by checking the per-session
115
+ * focus file first, then falling back to a provided env-var value (the
116
+ * CLAWMEM_SESSION_FOCUS debug override). Returns undefined when neither
117
+ * yields a valid topic.
118
+ *
119
+ * Precedence is file > env var because the file is the only signal
120
+ * that provides per-session scoping on multi-session host processes.
121
+ * Exposed here (rather than inlined at the call site) so the hook's
122
+ * precedence logic can be unit-tested directly without spinning up a
123
+ * full contextSurfacing invocation.
124
+ *
125
+ * Never throws. Never logs. Every failure path returns undefined and
126
+ * the caller treats that as "no topic set" (byte-identical to
127
+ * pre-§11.4 hook behavior).
128
+ */
129
+ export function resolveSessionTopic(
130
+ sessionId: string | undefined,
131
+ envVar: string | undefined
132
+ ): string | undefined {
133
+ const fromFile = readSessionFocus(sessionId);
134
+ if (fromFile) return fromFile;
135
+ const fromEnv = envVar?.trim();
136
+ if (fromEnv) return fromEnv;
137
+ return undefined;
138
+ }
139
+
140
+ /**
141
+ * Case-insensitive tokenized AND-match against title + displayPath + body.
142
+ * Tokens shorter than 2 chars are dropped (common stopwords and typos).
143
+ * Returns true only if every remaining token appears in the haystack.
144
+ */
145
+ function matchesTopic(result: ScoredResult, topic: string): boolean {
146
+ const tokens = topic
147
+ .toLowerCase()
148
+ .split(/\s+/)
149
+ .map(t => t.trim())
150
+ .filter(t => t.length >= 2);
151
+ if (tokens.length === 0) return false;
152
+
153
+ const haystack = [
154
+ result.title || "",
155
+ result.displayPath || "",
156
+ (result.body || "").slice(0, 800),
157
+ ]
158
+ .join(" ")
159
+ .toLowerCase();
160
+
161
+ return tokens.every(t => haystack.includes(t));
162
+ }
163
+
164
+ export interface TopicBoostOptions {
165
+ /** Multiplier applied to docs whose title/path/body match all topic tokens. Default 1.4. */
166
+ boostFactor?: number;
167
+ /**
168
+ * Multiplier applied to non-matching docs. Default 0.75.
169
+ * Clamped to a 0.5 floor so the boost is a re-ranker, not a hide —
170
+ * non-matching docs are demoted but never suppressed to zero.
171
+ */
172
+ demoteFactor?: number;
173
+ }
174
+
175
+ /**
176
+ * Apply session-topic boost/demote to a scored result set as a POST-COMPOSITE
177
+ * reranking pass. Runs AFTER applyCompositeScoring(...) and BEFORE threshold
178
+ * filtering (the specific architectural placement Codex approved in Turn 1 of
179
+ * the v0.9.0 design review).
180
+ *
181
+ * Behavior:
182
+ * - Empty/undefined topic → returns input unchanged (no-op, byte-identical).
183
+ * - Topic present but ZERO docs match → returns input unchanged (no-op).
184
+ * This is the fail-open contract from the approved §11.4 spec: "topic
185
+ * set + zero matching docs → proceed with the normal results." Without
186
+ * this short-circuit, uniformly demoting every doc would push some
187
+ * below the downstream threshold filter and silently shrink the
188
+ * result set — a regression vs the no-topic baseline.
189
+ * (Caught by Codex in §11.4 code review Turn 1, 2026-04-13.)
190
+ * - Topic present AND at least one match → each result's compositeScore
191
+ * is multiplied by either boostFactor (matching) or demoteFactor
192
+ * (non-matching), then results are re-sorted descending.
193
+ *
194
+ * Matching is computed exactly once per result in a pre-pass so the
195
+ * short-circuit can decide without double-evaluating the token match.
196
+ *
197
+ * This is a pure function over the scored set — it does NOT call the DB,
198
+ * does NOT write SQLite state, does NOT touch any lifecycle column.
199
+ * Mutates compositeScore in place (consistent with existing scoring
200
+ * helpers in this codebase; single caller, single thread).
201
+ */
202
+ export function applyTopicBoost<T extends ScoredResult>(
203
+ scored: T[],
204
+ topic: string | undefined,
205
+ options: TopicBoostOptions = {}
206
+ ): T[] {
207
+ if (!topic || !topic.trim()) return scored;
208
+ if (scored.length === 0) return scored;
209
+
210
+ const boostFactor = options.boostFactor ?? 1.4;
211
+ const demoteFactor = Math.max(options.demoteFactor ?? 0.75, 0.5);
212
+
213
+ // Pre-compute per-result match flags so we can early-return on zero
214
+ // matches without double-evaluating matchesTopic during the mutation
215
+ // pass. Caching is also a (small) perf win for any single call.
216
+ const matches = scored.map(r => matchesTopic(r, topic));
217
+ const anyMatch = matches.some(Boolean);
218
+ if (!anyMatch) return scored; // fail-open: baseline ordering preserved
219
+
220
+ for (let i = 0; i < scored.length; i++) {
221
+ const factor = matches[i] ? boostFactor : demoteFactor;
222
+ scored[i]!.compositeScore = scored[i]!.compositeScore * factor;
223
+ }
224
+
225
+ scored.sort((a, b) => b.compositeScore - a.compositeScore);
226
+ return scored;
227
+ }
package/src/store.ts CHANGED
@@ -711,6 +711,11 @@ function initializeDatabase(db: Database): void {
711
711
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_type ON entity_nodes(entity_type)`);
712
712
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_vault ON entity_nodes(vault)`);
713
713
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_mentions ON entity_nodes(mention_count DESC)`);
714
+ // §11.1 (v0.9.0): expression index backing the `LOWER(name) IN (...) AND vault = ?`
715
+ // batch lookup used by the context-surfacing entity-detection hot path.
716
+ // Without this index the batch query devolves into a full scan on large vaults.
717
+ // Idempotent via IF NOT EXISTS — existing vaults pick it up on next open.
718
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_lower_name ON entity_nodes(LOWER(name), vault)`);
714
719
 
715
720
  // Entity mentions: entity ↔ document junction table
716
721
  db.exec(`