npm - clawmem - Versions diffs - 0.8.4 → 0.9.0 - Mend

clawmem 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/AGENTS.md +31 -20
package/CLAUDE.md +21 -9
package/README.md +20 -22
package/SKILL.md +22 -9
package/package.json +1 -1
package/src/amem.ts +8 -1
package/src/clawmem.ts +97 -0
package/src/config.ts +14 -3
package/src/entity.ts +63 -0
package/src/hooks/context-surfacing.ts +87 -6
package/src/hooks/decision-extractor.ts +145 -115
package/src/mcp.ts +19 -6
package/src/observer.ts +132 -15
package/src/session-focus.ts +227 -0
package/src/store.ts +5 -0
package/src/vault-facts.ts +506 -0

package/src/observer.ts CHANGED Viewed

@@ -22,6 +22,13 @@ export type Observation = {
   concepts: string[];
   filesRead: string[];
   filesModified: string[];
+  triples?: ParsedTriple[];
+};
+export type ParsedTriple = {
+  subject: string;
+  predicate: string;
+  object: string;
 };
 export type SessionSummary = {
@@ -48,28 +55,54 @@ const GENERATION_TEMPERATURE = 0.3;
 // =============================================================================
 const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding session transcript. Extract structured observations.
-For each significant action, decision, or discovery, output an <observation> XML element.
+For each significant action, decision, or discovery, output an <observation> XML element with the structure below.
+Structure:
 <observation>
-  <type>one of: decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem</type>
-  <title>Brief descriptive title (max 80 chars)</title>
+  <type>...</type>
+  <title>...</title>
   <facts>
-    <fact>Individual atomic fact</fact>
+    <fact>...</fact>
   </facts>
-  <narrative>2-3 sentences explaining context and reasoning</narrative>
+  <triples>
+    <triple>
+      <subject>...</subject>
+      <predicate>...</predicate>
+      <object>...</object>
+    </triple>
+  </triples>
+  <narrative>...</narrative>
   <concepts>
-    <concept>one of: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off</concept>
+    <concept>...</concept>
   </concepts>
-  <files_read><file>path/to/file</file></files_read>
-  <files_modified><file>path/to/file</file></files_modified>
+  <files_read><file>...</file></files_read>
+  <files_modified><file>...</file></files_modified>
 </observation>
-Rules:
+Field rules:
+- <type>: one of decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem
+- <title>: brief descriptive title, max 80 chars
+- <facts>: 1-5 <fact> elements, each a standalone atomic claim about what happened or what is true (concrete, specific, no schema placeholders or template text)
+- <triples>: 0-3 <triple> elements for structural relationships between named entities (see predicate vocabulary below). Omit entirely if no relational claims apply. Do NOT emit triples for descriptive facts — only for explicit S-P-O relations.
+- <narrative>: 2-3 sentences explaining WHY something was done, not just WHAT
+- <concepts>: 0-3 <concept> elements from: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off
+- <files_read>, <files_modified>: only files explicitly mentioned in the transcript
+Predicate vocabulary (use EXACTLY these predicates in <predicate>, nothing else):
+- adopted, migrated_to — switching to a new tool/framework/approach
+- deployed_to, runs_on — where something runs
+- replaced — when one thing supersedes another
+- depends_on, integrates_with, uses — structural dependencies
+- prefers, avoids — user preferences (use for <subject>user</subject>)
+- caused_by, resolved_by — causal relationships between problems and fixes
+- owned_by — responsibility / ownership
+<subject> and <object> must be short canonical entity names (2-80 chars). No sentences. No placeholder text. If you cannot fit a claim into this vocabulary, keep it in <facts> instead and omit the triple.
+Observation rules:
 - Output 1-5 observations, focusing on the MOST significant events
-- Each fact should be a standalone, atomic piece of information
-- The narrative should explain WHY something was done, not just WHAT
-- Only include files that were explicitly mentioned in the transcript
 - If no significant observations, output nothing
+- Never use schema example text or template placeholders in <fact>, <subject>, or <object> — emit only real content extracted from the transcript
 Type guidance:
 - preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
@@ -131,6 +164,47 @@ const VALID_CONCEPTS = new Set([
   "gotcha", "pattern", "trade-off",
 ]);
+// Canonical SPO predicate vocabulary — parser rejects anything outside this set.
+// Must stay in sync with the predicate list in OBSERVATION_SYSTEM_PROMPT.
+export const VALID_PREDICATES = new Set([
+  "adopted", "migrated_to",
+  "deployed_to", "runs_on",
+  "replaced",
+  "depends_on", "integrates_with", "uses",
+  "prefers", "avoids",
+  "caused_by", "resolved_by",
+  "owned_by",
+]);
+// Predicates whose <object> should be stored as a literal (not resolved to an entity).
+export const LITERAL_PREDICATES = new Set(["prefers", "avoids"]);
+// Exact placeholder strings that must never be persisted as facts or triple components.
+// Defense-in-depth: even though the prompt no longer places example text inside
+// <fact>/<subject>/<object> tags, a weak model could still echo these phrases.
+const SCHEMA_PLACEHOLDER_STRINGS = new Set([
+  "individual atomic fact",
+  "atomic fact",
+  "one atomic claim per fact element",
+  "brief descriptive title",
+  "canonical entity name",
+]);
+// Regex for template placeholder markers: {{...}}, <!--...-->, ${...}.
+// Intentionally narrow — earlier drafts rejected any line starting with
+// "example:" / "placeholder:", which false-positived legitimate facts like
+// "Example: QMD switched to Bun in v0.2". Shape-only matching avoids that
+// drift; the exact-string blocklist above handles known echoed placeholders.
+const PLACEHOLDER_REGEX = /^(\{\{.*\}\}|<!--.*-->|\$\{.*\})/;
+function isSchemaPlaceholder(text: string): boolean {
+  if (!text) return true;
+  const normalized = text.trim().toLowerCase();
+  if (SCHEMA_PLACEHOLDER_STRINGS.has(normalized)) return true;
+  if (PLACEHOLDER_REGEX.test(normalized)) return true;
+  return false;
+}
 export function parseObservationXml(xml: string): Observation | null {
   const typeMatch = xml.match(/<type>\s*(.*?)\s*<\/type>/s);
   const titleMatch = xml.match(/<title>\s*(.*?)\s*<\/title>/s);
@@ -141,24 +215,67 @@ export function parseObservationXml(xml: string): Observation | null {
   const type = typeMatch[1].trim().toLowerCase();
   if (!VALID_OBSERVATION_TYPES.has(type)) return null;
-  const facts = extractMultiple(xml, "fact");
+  const rawTitle = titleMatch[1].trim();
+  if (isSchemaPlaceholder(rawTitle)) return null;
+  const facts = extractMultiple(xml, "fact")
+    .filter(f => f.length >= 5)
+    .filter(f => !isSchemaPlaceholder(f));
   const concepts = extractMultiple(xml, "concept")
     .filter(c => VALID_CONCEPTS.has(c.toLowerCase()))
     .map(c => c.toLowerCase());
   const filesRead = extractMultiple(xml, "file", "files_read");
   const filesModified = extractMultiple(xml, "file", "files_modified");
+  // Parse triples (Fix A): strict validation against canonical predicate vocabulary.
+  // Missing/malformed triples are silently dropped — fail-closed on ambiguity.
+  const triples = extractTriples(xml);
   return {
     type: type as Observation["type"],
-    title: titleMatch[1].trim().slice(0, 80),
-    facts: facts.filter(f => f.length >= 5),
+    title: rawTitle.slice(0, 80),
+    facts,
     narrative: narrativeMatch?.[1]?.trim() || "",
     concepts,
     filesRead,
     filesModified,
+    triples: triples.length > 0 ? triples : undefined,
   };
 }
+function extractTriples(xml: string): ParsedTriple[] {
+  const parentMatch = xml.match(/<triples>([\s\S]*?)<\/triples>/s);
+  if (!parentMatch?.[1]) return [];
+  const blockRegex = /<triple>([\s\S]*?)<\/triple>/g;
+  const results: ParsedTriple[] = [];
+  let match;
+  while ((match = blockRegex.exec(parentMatch[1])) !== null) {
+    const block = match[1] ?? "";
+    const subject = block.match(/<subject>\s*(.*?)\s*<\/subject>/s)?.[1]?.trim();
+    const rawPredicate = block.match(/<predicate>\s*(.*?)\s*<\/predicate>/s)?.[1]?.trim();
+    const object = block.match(/<object>\s*(.*?)\s*<\/object>/s)?.[1]?.trim();
+    if (!subject || !rawPredicate || !object) continue;
+    const predicate = rawPredicate.toLowerCase().replace(/\s+/g, "_");
+    if (!VALID_PREDICATES.has(predicate)) continue;
+    // Length bounds — guards against sentence-shaped subjects/objects that the
+    // regex-era tests expected. Subject and object should be short canonical names.
+    if (subject.length < 2 || subject.length > 80) continue;
+    if (object.length < 2 || object.length > 120) continue;
+    if (isSchemaPlaceholder(subject) || isSchemaPlaceholder(object)) continue;
+    results.push({ subject, predicate, object });
+    if (results.length >= 5) break; // cap per observation
+  }
+  return results;
+}
 export function parseSummaryXml(xml: string): SessionSummary | null {
   const request = extractSingle(xml, "request");
   const investigated = extractSingle(xml, "investigated");

package/src/session-focus.ts ADDED Viewed

@@ -0,0 +1,227 @@
+/**
+ * Session-Scoped Focus (§11.4 — v0.9.0)
+ *
+ * Per-session topic primitive that biases context-surfacing ranking toward
+ * docs relevant to the declared working context — WITHOUT persisting any
+ * state to SQLite. Intra-session curation that cannot contaminate other
+ * sessions.
+ *
+ * Primary signal: per-session state file at
+ *   ~/.cache/clawmem/sessions/<session_id>.focus
+ *
+ * The env var CLAWMEM_SESSION_FOCUS is a DEBUG-ONLY override: it bypasses
+ * the per-session file entirely, and because it is a single process-wide
+ * variable it does NOT provide per-session scoping in multi-session host
+ * processes (e.g. a long-lived MCP server handling multiple Claude Code
+ * sessions). Use the file path for correctness; use the env var for
+ * ad-hoc single-session debugging only.
+ *
+ * All read paths are fail-open. Unreadable, corrupt, empty, missing,
+ * invalid-UTF-8, or oversized focus files return undefined and the
+ * caller proceeds with baseline ranking (byte-identical to pre-§11.4).
+ * The stage must NEVER half-apply a malformed topic.
+ */
+import * as fs from "fs";
+import * as path from "path";
+import * as os from "os";
+import type { ScoredResult } from "./memory.ts";
+const MAX_TOPIC_LEN = 256;
+/**
+ * Resolve the root directory for session focus files. Defaults to
+ * `~/.cache/clawmem/sessions`, overridable via `CLAWMEM_FOCUS_ROOT`.
+ * The override is primarily a test hook (so `bun:test` can redirect
+ * writes to a tmp dir) but is also safe to use in production if an
+ * operator wants to relocate the focus files out of `$HOME`.
+ *
+ * Computed lazily on every call so env-var changes in tests take
+ * effect without module reload.
+ */
+export function focusRoot(): string {
+  const override = process.env.CLAWMEM_FOCUS_ROOT;
+  if (override && override.trim().length > 0) return override;
+  return path.join(os.homedir(), ".cache", "clawmem", "sessions");
+}
+export function focusFilePath(sessionId: string): string {
+  return path.join(focusRoot(), `${sessionId}.focus`);
+}
+/**
+ * Read the session focus topic. Returns undefined on any failure:
+ * - sessionId missing/empty
+ * - file does not exist
+ * - file unreadable (permissions, etc.)
+ * - file empty or whitespace-only
+ * - file exceeds MAX_TOPIC_LEN
+ * - file contains invalid UTF-8 (readFileSync throws)
+ *
+ * Never throws. Caller treats undefined as "no topic set" and skips
+ * the boost stage entirely.
+ */
+export function readSessionFocus(sessionId?: string): string | undefined {
+  if (!sessionId) return undefined;
+  try {
+    const p = focusFilePath(sessionId);
+    if (!fs.existsSync(p)) return undefined;
+    const raw = fs.readFileSync(p, { encoding: "utf-8" });
+    const topic = raw.trim();
+    if (!topic) return undefined;
+    if (topic.length > MAX_TOPIC_LEN) return undefined;
+    return topic;
+  } catch {
+    return undefined;
+  }
+}
+/**
+ * Write a session focus topic. Creates the sessions directory if needed.
+ * Overwrites any existing file. Throws on invalid input or I/O errors
+ * (caller surface — CLI command that should fail loudly on misuse).
+ */
+export function writeSessionFocus(sessionId: string, topic: string): void {
+  if (!sessionId || !sessionId.trim()) {
+    throw new Error("writeSessionFocus: sessionId required");
+  }
+  const trimmed = topic.trim();
+  if (!trimmed) {
+    throw new Error("writeSessionFocus: topic required");
+  }
+  if (trimmed.length > MAX_TOPIC_LEN) {
+    throw new Error(`writeSessionFocus: topic exceeds max length ${MAX_TOPIC_LEN}`);
+  }
+  fs.mkdirSync(focusRoot(), { recursive: true });
+  fs.writeFileSync(focusFilePath(sessionId), trimmed, { encoding: "utf-8" });
+}
+/**
+ * Clear a session focus. No-op if the file does not exist.
+ * Never throws (caller is typically "revert ranking to baseline").
+ */
+export function clearSessionFocus(sessionId: string): void {
+  if (!sessionId) return;
+  try {
+    const p = focusFilePath(sessionId);
+    if (fs.existsSync(p)) fs.unlinkSync(p);
+  } catch {
+    /* ignore — clearing is best-effort */
+  }
+}
+/**
+ * Resolve the effective session focus topic by checking the per-session
+ * focus file first, then falling back to a provided env-var value (the
+ * CLAWMEM_SESSION_FOCUS debug override). Returns undefined when neither
+ * yields a valid topic.
+ *
+ * Precedence is file > env var because the file is the only signal
+ * that provides per-session scoping on multi-session host processes.
+ * Exposed here (rather than inlined at the call site) so the hook's
+ * precedence logic can be unit-tested directly without spinning up a
+ * full contextSurfacing invocation.
+ *
+ * Never throws. Never logs. Every failure path returns undefined and
+ * the caller treats that as "no topic set" (byte-identical to
+ * pre-§11.4 hook behavior).
+ */
+export function resolveSessionTopic(
+  sessionId: string | undefined,
+  envVar: string | undefined
+): string | undefined {
+  const fromFile = readSessionFocus(sessionId);
+  if (fromFile) return fromFile;
+  const fromEnv = envVar?.trim();
+  if (fromEnv) return fromEnv;
+  return undefined;
+}
+/**
+ * Case-insensitive tokenized AND-match against title + displayPath + body.
+ * Tokens shorter than 2 chars are dropped (common stopwords and typos).
+ * Returns true only if every remaining token appears in the haystack.
+ */
+function matchesTopic(result: ScoredResult, topic: string): boolean {
+  const tokens = topic
+    .toLowerCase()
+    .split(/\s+/)
+    .map(t => t.trim())
+    .filter(t => t.length >= 2);
+  if (tokens.length === 0) return false;
+  const haystack = [
+    result.title || "",
+    result.displayPath || "",
+    (result.body || "").slice(0, 800),
+  ]
+    .join(" ")
+    .toLowerCase();
+  return tokens.every(t => haystack.includes(t));
+}
+export interface TopicBoostOptions {
+  /** Multiplier applied to docs whose title/path/body match all topic tokens. Default 1.4. */
+  boostFactor?: number;
+  /**
+   * Multiplier applied to non-matching docs. Default 0.75.
+   * Clamped to a 0.5 floor so the boost is a re-ranker, not a hide —
+   * non-matching docs are demoted but never suppressed to zero.
+   */
+  demoteFactor?: number;
+}
+/**
+ * Apply session-topic boost/demote to a scored result set as a POST-COMPOSITE
+ * reranking pass. Runs AFTER applyCompositeScoring(...) and BEFORE threshold
+ * filtering (the specific architectural placement Codex approved in Turn 1 of
+ * the v0.9.0 design review).
+ *
+ * Behavior:
+ *   - Empty/undefined topic → returns input unchanged (no-op, byte-identical).
+ *   - Topic present but ZERO docs match → returns input unchanged (no-op).
+ *     This is the fail-open contract from the approved §11.4 spec: "topic
+ *     set + zero matching docs → proceed with the normal results." Without
+ *     this short-circuit, uniformly demoting every doc would push some
+ *     below the downstream threshold filter and silently shrink the
+ *     result set — a regression vs the no-topic baseline.
+ *     (Caught by Codex in §11.4 code review Turn 1, 2026-04-13.)
+ *   - Topic present AND at least one match → each result's compositeScore
+ *     is multiplied by either boostFactor (matching) or demoteFactor
+ *     (non-matching), then results are re-sorted descending.
+ *
+ * Matching is computed exactly once per result in a pre-pass so the
+ * short-circuit can decide without double-evaluating the token match.
+ *
+ * This is a pure function over the scored set — it does NOT call the DB,
+ * does NOT write SQLite state, does NOT touch any lifecycle column.
+ * Mutates compositeScore in place (consistent with existing scoring
+ * helpers in this codebase; single caller, single thread).
+ */
+export function applyTopicBoost<T extends ScoredResult>(
+  scored: T[],
+  topic: string | undefined,
+  options: TopicBoostOptions = {}
+): T[] {
+  if (!topic || !topic.trim()) return scored;
+  if (scored.length === 0) return scored;
+  const boostFactor = options.boostFactor ?? 1.4;
+  const demoteFactor = Math.max(options.demoteFactor ?? 0.75, 0.5);
+  // Pre-compute per-result match flags so we can early-return on zero
+  // matches without double-evaluating matchesTopic during the mutation
+  // pass. Caching is also a (small) perf win for any single call.
+  const matches = scored.map(r => matchesTopic(r, topic));
+  const anyMatch = matches.some(Boolean);
+  if (!anyMatch) return scored; // fail-open: baseline ordering preserved
+  for (let i = 0; i < scored.length; i++) {
+    const factor = matches[i] ? boostFactor : demoteFactor;
+    scored[i]!.compositeScore = scored[i]!.compositeScore * factor;
+  }
+  scored.sort((a, b) => b.compositeScore - a.compositeScore);
+  return scored;
+}

package/src/store.ts CHANGED Viewed

@@ -711,6 +711,11 @@ function initializeDatabase(db: Database): void {
   db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_type ON entity_nodes(entity_type)`);
   db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_vault ON entity_nodes(vault)`);
   db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_mentions ON entity_nodes(mention_count DESC)`);
+  // §11.1 (v0.9.0): expression index backing the `LOWER(name) IN (...) AND vault = ?`
+  // batch lookup used by the context-surfacing entity-detection hot path.
+  // Without this index the batch query devolves into a full scan on large vaults.
+  // Idempotent via IF NOT EXISTS — existing vaults pick it up on next open.
+  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_lower_name ON entity_nodes(LOWER(name), vault)`);
   // Entity mentions: entity ↔ document junction table
   db.exec(`